@inproceedings{saha2021optimal,
author = {Saha, Aadirupa and Natarajan, Nagarajan and Netrapalli, Praneeth and Jain, Prateek},
title = {Optimal Regret Algorithm for Pseudo-1d Bandit Convex Optimization},
booktitle = {2021 International Conference on Machine Learning},
year = {2021},
month = {July},
abstract = {We study online learning with bandit feedback (i.e. learner has access to only zeroth-order oracle) where cost/reward functions $\f_t$ admit a "pseudo-1d" structure, i.e. $\f_t(\w) = \loss_t(\pred_t(\w))$ where the output of $\pred_t$ is one-dimensional. At each round, the learner observes context $\x_t$, plays prediction $\pred_t(\w_t; \x_t)$ (e.g. $\pred_t(\cdot)=\langle \x_t, \cdot\rangle$) for some $\w_t \in \mathbb[R]^d$ and observes loss $\loss_t(\pred_t(\w_t))$ where $\loss_t$ is a convex Lipschitz-continuous function. The goal is to minimize the standard regret metric. This pseudo-1d bandit convex optimization problem (\SBCO) arises frequently in domains such as online decision-making or parameter-tuning in large systems. For this problem, we first show a lower bound of $\min(\sqrt[dT], T^[3/4])$ for the regret of any algorithm, where $T$ is the number of rounds. We propose a new algorithm \sbcalg that combines randomized online gradient descent with a kernelized exponential weights method to exploit the pseudo-1d structure effectively, guaranteeing the [\em optimal] regret bound mentioned above, up to additional logarithmic factors. In contrast, applying state-of-the-art online convex optimization methods leads to $\tilde[O]\left(\min\left(d^[9.5]\sqrt[T],\sqrt[d]T^[3/4]\right)\right)$ regret, that is significantly suboptimal in $d$.},
url = {http://approjects.co.za/?big=en-us/research/publication/optimal-regret-algorithm-for-pseudo-1d-bandit-convex-optimization-2/},
}