@inproceedings{dauparas2018depth,
author = {Dauparas, Justas and Tomioka, Ryota and Hofmann, Katja},
title = {Depth and nonlinearity induce implicit exploration for RL},
booktitle = {ICML workshop on Exploration in RL},
year = {2018},
month = {July},
abstract = {The question of how to explore, ie, take actions with uncertain outcomes to learn about possible future rewards, is a key question in reinforcement learning (RL). Here, we show a surprising result: We show that Q-learning with nonlinear Q-function and no explicit exploration (ie, a purely greedy policy) can learn several standard benchmark tasks, including mountain car, equally well as, or better than, the most commonly-used -greedy exploration. We carefully examine this result and show that both the depth of the Q-network and the type of nonlinearity are important to induce such deterministic exploration.},
url = {http://approjects.co.za/?big=en-us/research/publication/depth-and-nonlinearity-induce-implicit-exploration-for-rl/},
}