@inproceedings{dauparas2018depth, author = {Dauparas, Justas and Tomioka, Ryota and Hofmann, Katja}, title = {Depth and nonlinearity induce implicit exploration for RL}, booktitle = {ICML workshop on Exploration in RL}, year = {2018}, month = {July}, abstract = {The question of how to explore, ie, take actions with uncertain outcomes to learn about possible future rewards, is a key question in reinforcement learning (RL). Here, we show a surprising result: We show that Q-learning with nonlinear Q-function and no explicit exploration (ie, a purely greedy policy) can learn several standard benchmark tasks, including mountain car, equally well as, or better than, the most commonly-used -greedy exploration. We carefully examine this result and show that both the depth of the Q-network and the type of nonlinearity are important to induce such deterministic exploration.}, url = {http://approjects.co.za/?big=en-us/research/publication/depth-and-nonlinearity-induce-implicit-exploration-for-rl/}, }