@inproceedings{agarwal2017open,
author = {Agarwal, Alekh and Krishnamurthy, Akshay and Langford, John and Luo, Haipeng and Schapire, Robert E.},
title = {Open Problem: First-Order Regret Bounds for Contextual Bandits},
booktitle = {Conference on Learning Theory},
year = {2017},
month = {July},
abstract = {We describe two open problems related to first order regret bounds for contextual bandits. The first asks for an algorithm with a regret bound of  where there are  actions,  policies, and  is the cumulative loss of the best policy. The second asks for an optimization-oracle-efficient algorithm with regret . We describe some positive results, such as an inefficient algorithm for the second problem, and some partial negative results.},
publisher = {PMLR},
url = {http://approjects.co.za/?big=en-us/research/publication/open-problem-first-order-regret-bounds-contextual-bandits/},
pages = {4-7},
}