@inproceedings{goyal2026contextual,
author = {Goyal, Tanmay and Midigeshi, Sukruta and Sinha, Gaurav},
title = {Contextual Slate GLM Bandits with Limited Adaptivity},
booktitle = {ICML 2026},
year = {2026},
month = {June},
abstract = {We investigate the contextual slate bandit problem with generalized linear rewards under limited adaptivity. At each round, the learner is presented with  sets of items and constructs a slate by selecting one item per set; the resulting slate yields a scalar reward sampled from a Generalized Linear Model (GLM). We propose algorithms under two limited-adaptivity paradigms: (a) batched and (b) rarely-switching settings. For the batched setting, we introduce B-SlateGLinCB, which partitions the time horizon into  batches such that each batch's policy relies only on data from previous batches. For the rarely-switching setting, we propose RS-SlateGLinCB, which adaptively performs only  parameter updates. Under a diversity assumption on the item sequences, we prove that B-SlateGLinCB and RS-SlateGLinCB achieve regret bounds of  and , respectively. Notably, both bounds are independent of the non-linearity parameter  that is typically found to scale the regret of GLM bandit algorithms. Our algorithms are computationally efficient, requiring only  time per round despite  possible slates. Simulations show our algorithms outperform existing batched baselines and remain competitive with Slate-GLM-OFU, a fully adaptive state-of-the-art algorithm. Notably, a slightly modified B-SlateGLinCB empirically matches this baseline. Finally, we demonstrate strong performance in a practical in-context example selection task for language models.},
url = {http://approjects.co.za/?big=en-us/research/publication/contextual-slate-glm-bandits-with-limited-adaptivity-2/},
}