@inproceedings{langford2007the,
author = {Langford, John and Zhang, Tong},
title = {The epoch-greedy algorithm for contextual multi-armed bandits},
booktitle = {NIPS 2007},
year = {2007},
month = {December},
abstract = {We present Epoch-Greedy, an algorithm for multi-armed bandits with observable side information. Epoch-Greedy has the following properties: No knowledge of a time horizon T is necessary. The regret incurred by Epoch-Greedy is controlled by a sample complexity bound for a hypothesis class. The regret scales as O(T2/3 S1/3) or better (sometimes, much better). Here S is the complexity term in a sample complexity bound for standard supervised learning.},
url = {http://approjects.co.za/?big=en-us/research/publication/the-epoch-greedy-algorithm-for-contextual-multi-armed-bandits/},
}