@inproceedings{li2010b-bit, author = {Li, Ping and König, Arnd Christian and Gui, Wenhao}, title = {b-Bit Minwise Hashing for Estimating Three-Way Similarities}, booktitle = {Twenty-Fourth Annual Conference on Neural Information Processing Systems (NIPS)}, year = {2010}, month = {December}, abstract = {Computing two-way and multi-way set similarities is a fundamental problem. This study focuses on estimating 3-way resemblance (Jaccard similarity) using b-bit minwise hashing. While traditional minwise hashing methods store each hashed value using 64 bits, b-bit minwise hashing only stores the lowest b bits (where b <= 2 for 3-way). The extension to 3-way similarity from the prior work on 2-way similarity is technically non-trivial. We develop the precise estimator which is accurate and very complicated; and we recommend a much simplified estimator suitable for sparse data. Our analysis shows that b-bit minwise hashing can normally achieve a 10 to 25-fold improvement in the storage space required for a given estimator accuracy of the 3-way resemblance.}, url = {http://approjects.co.za/?big=en-us/research/publication/b-bit-minwise-hashing-for-estimating-three-way-similarities/}, edition = {Twenty-Fourth Annual Conference on Neural Information Processing Systems (NIPS)}, }