@inproceedings{chen2022efficient,
author = {Chen, Mingzhe and Xiao, Xi and Zhang, Wanpeng and Gao, Xiaotian},
title = {Efficient and Stable Information Directed Exploration for Continuous Reinforcement Learning},
booktitle = {ICASSP 2022},
year = {2022},
month = {April},
abstract = {In this paper, we investigate the exploration-exploitation dilemma of reinforcement learning algorithms. We adapt the information directed sampling, an exploration framework that measures the information gain of a policy, to the continuous reinforcement learning. To stabilize the off-policy learning process and further improve the sample efficiency, we propose to use a randomized learning target and to dynamically adjust the update-to-data ratio for different parts of the neural network model. Experiments show that our approach significantly improves over existing methods and successfully completes tasks with highly sparse reward signals.},
url = {http://approjects.co.za/?big=en-us/research/publication/efficient-and-stable-information-directed-exploration-for-continuous-reinforcement-learning/},
}