@inproceedings{minka2008selection, author = {Minka, Tom}, title = {Selection Bias in the LETOR Datasets}, year = {2008}, month = {August}, abstract = {The LETOR datasets consist of data extracted from traditional IR test corpora. For each of a number of test topics, a set of documents has been extracted, in the form of features of each document-query pair, for use by a ranker. An examination of the ways in which documents were selected for each topic shows that the selection has (for each of the three corpora) a particular bias or skewness. This has some unexpected eff ects which may considerably in influence any learning-to-rank exercise conducted on these datasets. The problems may be resolvable by modifying the datasets.}, url = {http://approjects.co.za/?big=en-us/research/publication/selection-bias-letor-datasets/}, }