@inproceedings{wang2018thompson,
author = {Wang, Siwei and Chen, Wei},
title = {Thompson Sampling for Combinatorial Semi-Bandits},
booktitle = {Proceedings of the 35th International Conference on Machine Learning (ICML'2018)},
year = {2018},
month = {July},
abstract = {We study the application of the Thompson sampling (TS) methodology to the stochastic combinatorial multi-armed bandit (CMAB) framework. We analyze the standard TS algorithm for the general CMAB, and obtain the first distribution-dependent regret bound of  for TS under general CMAB, where m is the number of arms, T is the time horizon, and  is the minimum gap between the expected reward of the optimal solution and any non-optimal solution. We also show that one cannot use an approximate oracle in TS algorithm for even MAB problems. Then we expand the analysis to matroid bandit, a special case of CMAB and for which we could remove the independence assumption across arms and achieve a better regret bound. Finally, we use some experiments to show the comparison of regrets of CUCB and CTS algorithms.},
url = {http://approjects.co.za/?big=en-us/research/publication/thompson-sampling-for-combinatorial-semi-bandits/},
}