@inproceedings{chu2011contextual, author = {Chu, Wei and Li, Lihong and Reyzin, Lev and Schapire, Robert E.}, title = {Contextual Bandits with Linear Payoff Functions}, organization = {Artificial Intelligence and Statistics}, booktitle = {AISTATS 2011}, year = {2011}, month = {April}, abstract = {In this paper we study the contextual bandit problem (also known as the multi-armed bandit problem with expert advice) for linear payoff functions. For T rounds, K actions, and d dimensional feature vectors, we prove an OqTdln3(KT ln(T)/δ)regret bound that holds with probability 1−δ for the simplest known (both conceptually and computationally) efficient upper confidence bound algorithm for this problem. We also prove a lower bound of Ω(√Td) for this setting, matching the upper bound up to logarithmic factors.}, url = {http://approjects.co.za/?big=en-us/research/publication/contextual-bandits-with-linear-payo%ef%ac%80-functions/}, }