@inproceedings{sawarni2024generalized,
author = {Sawarni, Ayush and Das, Nirjhar and Barman, Siddharth and Sinha, Gaurav},
title = {Generalized Linear Bandits with Limited Adaptivity},
booktitle = {NeurIPS 2024},
year = {2024},
month = {April},
abstract = {We study the generalized linear contextual bandit problem within the constraints of limited adaptivity. In this paper, we present two algorithms, $\texttt[B-GLinCB]$ and $\texttt[RS-GLinCB]$, that address, respectively, two prevalent limited adaptivity settings. Given a budget $M$ on the number of policy updates, in the first setting, the algorithm needs to decide upfront $M$ rounds at which it will update its policy, while in the second setting it can adaptively perform $M$ policy updates during its course. For the first setting, we design an algorithm $\texttt[B-GLinCB]$, that incurs $\tilde[O](\sqrt[T])$ regret when $M = \Omega\left( \log[\log T] \right)$ and the arm feature vectors are generated stochastically. For the second setting, we design an algorithm $\texttt[RS-GLinCB]$ that updates its policy $\tilde[O](\log^2 T)$ times and achieves a regret of $\tilde[O](\sqrt[T])$ even when the arm feature vectors are adversarially generated. Notably, in these bounds, we manage to eliminate the dependence on a key instance dependent parameter $\kappa$, that captures non-linearity of the underlying reward model. Our novel approach for removing this dependence for generalized linear contextual bandits might be of independent interest.},
url = {http://approjects.co.za/?big=en-us/research/publication/generalized-linear-bandits-with-limited-adaptivity/},
}