@inproceedings{luo2018efficient,
author = {Luo, Haipeng and Wei, Chen-Yu and Agarwal, Alekh and Langford, John},
title = {Efficient Contextual Bandits in Non-stationary Worlds},
booktitle = {Conference on Learning Theory},
year = {2018},
month = {June},
abstract = {Most contextual bandit algorithms minimize regret against the best fixed policy, a questionable benchmark for non-stationary environments that are ubiquitous in applications. In this work, we develop several efficient contextual bandit algorithms for non-stationary environments by equipping existing methods for i.i.d. problems with sophisticated statistical tests so as to dynamically adapt to a change in distribution.
We analyze various standard notions of regret suited to non-stationary environments for these algorithms, including interval regret, switching regret, and dynamic regret. When competing with the best policy at each time, one of our algorithms achieves regret  if there are  rounds with  stationary periods, or more generally  where  is some non-stationarity measure. These results almost match the optimal guarantees achieved by an inefficient baseline that is a variant of the classic Exp4 algorithm. The dynamic regret result is also the first one for efficient and fully adversarial contextual bandit.
Furthermore, while the results above require tuning a parameter based on the unknown quantity  or , we also develop a parameter free algorithm achieving regret . This improves and generalizes the best existing result  by Karnin and Anava (2016) which only holds for the two-armed bandit problem.},
url = {http://approjects.co.za/?big=en-us/research/publication/efficient-contextual-bandits-non-stationary-worlds/},
}