@inproceedings{gankidi2014indexing, author = {Gankidi, Vinitha Reddy and Teletia, Nikhil and Patel, Jignesh M. and Halverson, Alan and DeWitt, David J.}, title = {Indexing HDFS data in PDW: splitting the data from the index}, booktitle = {Very Large Data Bases}, year = {2014}, month = {July}, abstract = {There is a growing interest in making relational DBMSs work synergistically with MapReduce systems. However, there are interesting technical challenges associated with figuring out the right balance between the use and co-deployment of these systems. This paper focuses on one specific aspect of this balance, namely how to leverage the superior indexing and query processing power of a relational DBMS for data that is often more cost-effectively stored in Hadoop/HDFS. We present a method to use conventional B+-tree indices in an RDBMS for data stored in HDFS and demonstrate that our approach is especially effective for highly selective queries.}, publisher = {VLDB Endowment}, url = {http://approjects.co.za/?big=en-us/research/publication/indexing-hdfs-data-in-pdw-splitting-the-data-from-the-index/}, pages = {1520-1528}, }