@inproceedings{chandramouli2019fishstore, author = {Chandramouli, Badrish and Xie, Dong and Li, Yinan and Kossmann, Donald}, title = {FishStore: Fast Ingestion and Indexing of Raw Data}, organization = {VLDB}, booktitle = {PVLDB Vol. 12 No. 12}, year = {2019}, month = {August}, abstract = {The last decade has witnessed a huge increase in data being ingested into the cloud from a variety of data sources. The ingested data takes various forms such as JSON, CSV, and binary formats. Traditionally, data is either ingested into storage in raw form, indexed ad-hoc using range indices, or cooked into analytics-friendly columnar formats. None of these solutions is able to handle modern requirements on storage: making the data available immediately for ad-hoc and streaming queries while ingesting at extremely high throughputs. We demonstrate FishStore, our opensource concurrent latch-free storage layer for data with flexible schema. FishStore builds on recent advances in parsing and indexing techniques, and is based on multi-chain hash indexing of dynamically registered predicated subsets of data. We find predicated subset hashing to be a powerful primitive that supports a broad range of queries on ingested data and admits a higher performance (by up to an order of magnitude) implementation than current alternatives.}, url = {http://approjects.co.za/?big=en-us/research/publication/fishstore-fast-ingestion-and-indexing-of-raw-data/}, note = {Demo paper}, }