@inproceedings{agarwal2017open, author = {Agarwal, Alekh and Krishnamurthy, Akshay and Langford, John and Luo, Haipeng and Schapire, Robert E.}, title = {Open Problem: First-Order Regret Bounds for Contextual Bandits}, booktitle = {Conference on Learning Theory}, year = {2017}, month = {July}, abstract = {We describe two open problems related to first order regret bounds for contextual bandits. The first asks for an algorithm with a regret bound of where there are actions, policies, and is the cumulative loss of the best policy. The second asks for an optimization-oracle-efficient algorithm with regret . We describe some positive results, such as an inefficient algorithm for the second problem, and some partial negative results.}, publisher = {PMLR}, url = {http://approjects.co.za/?big=en-us/research/publication/open-problem-first-order-regret-bounds-contextual-bandits/}, pages = {4-7}, }