@inproceedings{chen2021combinatorial, author = {Chen, Wei and Wang, Liwei and Zhao, Haoyu and Zheng, Kai}, title = {Combinatorial Semi-Bandit in the Non-Stationary Environment}, booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI) 2021}, year = {2021}, month = {June}, abstract = {In this paper, we investigate the non-stationary combinatorial semi-bandit problem, both in the switching case and in the dynamic case. In the general case where (a) the reward function is non-linear, (b) arms may be probabilistically triggered, and (c) only approximate offline oracle exists \cite[wang2017improving], our algorithm achieves Õ(√ST) distribution-dependent regret in the switching case, and Õ(V1/3T2/3) in the dynamic case, where S is the number of switchings and V is the sum of the total ''distribution changes''. The regret bounds in both scenarios are nearly optimal, but our algorithm needs to know the parameter S or V in advance. We further show that by employing another technique, our algorithm no longer needs to know the parameters S or V but the regret bounds could become suboptimal. In a special case where the reward function is linear and we have an exact oracle, we design a parameter-free algorithm that achieves nearly optimal regret both in the switching case and in the dynamic case without knowing the parameters in advance.}, url = {http://approjects.co.za/?big=en-us/research/publication/combinatorial-semi-bandit-in-the-non-stationary-environment/}, }