@inproceedings{potharaju2020helios, author = {Potharaju, Rahul and Kim, Terry and Wu, Wentao and Acharya, Vidip and Suh, Steve and Fogarty, Andrew and Dave, Apoorve and Ramanujam, Sinduja and Talius, Tomas and Novik, Lev and Ramakrishnan, Raghu}, title = {Helios: Hyperscale Indexing for the Cloud & Edge}, booktitle = {Proceedings of the VLDB Endowment (VLDB 2020)}, year = {2020}, month = {June}, abstract = {Helios is a distributed, highly-scalable system used at Microsoft for flexible ingestion, indexing, and aggregation of large streams of real-time data that is designed to plug into relational engines. The system collects close to a quadrillion events indexing approximately 16 trillion search keys per day from hundreds of thousands of machines across tens of data centers around the world. Helios use cases within Microsoft include debugging/diagnostics in both public and government clouds, workload characterization, cluster health monitoring, deriving business insights and performing impact analysis of incidents in other large-scale systems such as Azure Data Lake and Cosmos. Helios also serves as a reference blueprint for other large-scale systems within Microsoft. We present the simple data model behind Helios, which offers great flexibility and control over costs, and enables the system to asynchronously index massive streams of data. We also present our experiences in building and operating Helios over the last five years at Microsoft.}, url = {http://approjects.co.za/?big=en-us/research/publication/helios-hyperscale-indexing-for-the-cloud-edge-2/}, pages = {3231-3244}, }