@inproceedings{gankidi2014indexing,
author = {Gankidi, Vinitha Reddy and Teletia, Nikhil and Patel, Jignesh M. and Halverson, Alan and DeWitt, David J.},
title = {Indexing HDFS data in PDW: splitting the data from the index},
booktitle = {Very Large Data Bases},
year = {2014},
month = {July},
abstract = {There is a growing interest in making relational DBMSs work synergistically with MapReduce systems. However, there are interesting technical challenges associated with figuring out the right balance between the use and co-deployment of these systems. This paper focuses on one specific aspect of this balance, namely how to leverage the superior indexing and query processing power of a relational DBMS for data that is often more cost-effectively stored in Hadoop/HDFS. We present a method to use conventional B+-tree indices in an RDBMS for data stored in HDFS and demonstrate that our approach is especially effective for highly selective queries.},
publisher = {VLDB Endowment},
url = {http://approjects.co.za/?big=en-us/research/publication/indexing-hdfs-data-in-pdw-splitting-the-data-from-the-index/},
pages = {1520-1528},
}