@inproceedings{li2016contextual,
author = {Li, Shuai and Wang, Baoxiang and Zhang, Shengyu and Chen, Wei},
title = {Contextual Combinatorial Cascading Bandits},
booktitle = {Proceedings of the 33rd International Conference on Machine Learning (ICML'2016)},
year = {2016},
month = {June},
abstract = {We propose the contextual combinatorial cascading bandits, a combinatorial online learning game, where at each time step a learning agent is given a set of contextual information, then selects a list of items, and observes stochastic outcomes of a preﬁx in the selected items by some stopping criterion. In online recommendation, the stopping criterion might be the ﬁrst item a user selects; in network routing, the stopping criterion might be the ﬁrst edge blocked in a path. We consider position discounts in the list order, so that the agent’s reward is discounted depending on the position where the stopping criterion is met. We design a UCB-type algorithm, C3-UCB, for this problem, prove an n-step regret bound ˜ O(√n) in the general setting, and give ﬁner analysis for two special cases. Our work generalizes existing studies in several directions, including contextual information, position discounts, and a more general cascading bandit model. Experiments on synthetic and real datasets demonstrate the advantage of involving contextual information and position discounts.},
url = {http://approjects.co.za/?big=en-us/research/publication/contextual-combinatorial-cascading-bandits/},
volume = {48},
edition = {Proceedings of the 33rd International Conference on Machine Learning (ICML'2016), New York, NY, USA, June 2016},
}