@inproceedings{d254e63a85be498aab0c63f304dbcbad,
title = "CRUCIBLE: Towards unified secure on- and off-line analytics at scale",
abstract = "The burgeoning field of data science benefits from the application of a variety of analytic models and techniques to the oft-cited problems of large volume, high velocity data rates, and significant variety in data structure and semantics. Many approaches make use of common analytic techniques in either a streaming or batch processing paradigm. This paper presents progress in developing a framework for the analysis of large-scale datasets using both of these pools of techniques in a unified manner. This includes: (1) a Domain Specific Language (DSL) for describing analyses as a set of Communicating Sequential Processes, fully integrated with the Java type system, including an Integrated Development Environment (IDE) and a compiler which builds idiomatic Java; (2) a runtime model for execution of an analytic in both streaming and batch environments; and (3) a novel approach to automated management of cell-level security labels, applied uniformly across all runtimes. The paper concludes with a demonstration of the successful use of this system with a sample workload developed in (1), and an analysis of the performance characteristics of each of the runtimes described in (2).",
keywords = "Algorithms, Languages, Security",
author = "Peter Coetzee and Stephen Jarvis",
year = "2013",
month = nov,
day = "18",
doi = "10.1145/2534645.2534649",
language = "English",
series = "Proceedings of DISCS 2013: The 2013 International Workshop on Data-Intensive Scalable Computing Systems, Held in conjunction with SC 2013: The International Conference for High Performance Computing, Networking, Storage and Analysis",
publisher = "Association for Computing Machinery ",
pages = "43--48",
booktitle = "Proceedings of DISCS 2013",
note = "2013 International Workshop on Data-Intensive Scalable Computing Systems, DISCS 2013 ; Conference date: 18-11-2013",
}