@inproceedings{svore2010learning, author = {Svore, Krysta M. and Burges, Chris J.C.}, title = {Learning to Rank on a Cluster using Boosted Decision Trees}, booktitle = {Learning to Rank on Cores, Clusters, and Clouds Workshop at NIPS 2010}, year = {2010}, month = {December}, abstract = {We investigate the problem of learning to rank on a cluster using Web search data composed of 140,000 queries and approximately fourteen million URLs, and a boosted tree ranking algorithm called LambdaMART. We compare to a baseline algorithm that has been carefully engineered to allow training on the full dataset using a single machine, in order to evaluate the loss or gain incurred by the distributed algorithms we consider. Our contributions are two-fold: (1) we implement a method for improving the speed of training when the training data fits in main memory on a single machine; (2) we develop a training method for the case where the training data size exceeds the main memory of a single machine that easily scales to far larger datasets, i.e., billions of examples, and is based on data distribution. Results of our methods on a real-world Web dataset indicate significant improvements in training speed.}, url = {http://approjects.co.za/?big=en-us/research/publication/learning-to-rank-on-a-cluster-using-boosted-decision-trees/}, edition = {Learning to Rank on Cores, Clusters, and Clouds Workshop at NIPS 2010}, }