@inproceedings{minka2008selection,
author = {Minka, Tom},
title = {Selection Bias in the LETOR Datasets},
year = {2008},
month = {August},
abstract = {The LETOR datasets consist of data extracted from traditional IR test corpora. For each of a number of test topics, a set of documents has been extracted, in the form of features of each document-query pair, for use by a ranker. An examination of the ways in which documents were selected for each topic shows that the selection has (for each of the three corpora) a particular bias or skewness. This has some unexpected effects which may considerably in influence any learning-to-rank exercise conducted on these datasets. The problems may be resolvable by modifying the datasets.},
url = {http://approjects.co.za/?big=en-us/research/publication/selection-bias-letor-datasets/},
}