@inproceedings{kash2020combining,
author = {Kash, Ian A. and Sullins, Michael and Hofmann, Katja},
title = {Combining No-regret and Q-learning},
booktitle = {2020 Adaptive Agents and Multi-Agents Systems},
year = {2020},
month = {May},
abstract = {Counterfactual Regret Minimization (CFR) has found success in settings like poker which have both terminal states and perfect recall. We seek to understand how to relax these requirements. As a first step, we introduce a simple algorithm, local no-regret learning (LONR), which uses a Q-learning-like update rule to allow learning without terminal states or perfect recall. We prove its convergence for the basic case of MDPs (where Q-learning already suffices), as well as limited extensions of them. With a straightforward modification, we extend the basic premise of LONR to work in multi-agent settings and present empirical results showing that it achieves last iterate convergence in a number of settings. Most notably, we show this for NoSDE games, a class of Markov games specifically designed to be impossible for Q-value-based methods to learn and where no prior algorithm is known to achieve convergence to a stationary equilibrium even on average. Furthermore, by leveraging last iterate converging no-regret algorithms (one of which we introduce), we show empirical last iterate convergence in all domains tested with LONR.},
url = {http://approjects.co.za/?big=en-us/research/publication/combining-no-regret-and-q-learning/},
pages = {593-601},
}