@inproceedings{chaudhuri2007heavy-tailed, author = {Chaudhuri, Surajit and Church, Kenneth and Sui, Liying and König, Arnd Christian and König, Arnd Christian and Church, Ken}, title = {Heavy-Tailed Distributions and Multi-Keyword Queries}, booktitle = {30th ACM SIGIR International Conference on Research & Developement on Information Retreival}, year = {2007}, month = {July}, abstract = {Intersecting inverted indexes is a fundamental operation for many applications in information retrieval and databases. Efficient indexing for this operation is known to be a hard problem for arbitrary data distributions. However, text corpora used in Information Retrieval applications often have convenient power-law constraints (also known as Zipf’s Law and long tails) that allow us to materialize carefully chosen combinations of multi-keyword indexes, which significantly improve worst-case performance without requiring excessive storage. These multi-keyword indexes limit the number of postings accessed when computing arbitrary index intersections. Our evaluation on an e-commerce collection of 20 million products shows that the indexes of up to four arbitrary keywords can be intersected while accessing less than 20% of the postings in the largest single-keyword index.}, publisher = {Association for Computing Machinery, Inc.}, url = {http://approjects.co.za/?big=en-us/research/publication/heavy-tailed-distributions-and-multi-keyword-queries/}, edition = {30th ACM SIGIR International Conference on Research & Developement on Information Retreival}, }