@inproceedings{lipton2018bbq-networks, author = {Lipton, Zachary and Li, Xiujun and Gao, Jianfeng and Li, Lihong and Ahmed, Faisal and Deng, Li}, title = {BBQ-Networks: Efficient Exploration in Deep Reinforcement Learning for Task-Oriented Dialogue Systems}, booktitle = {AAAI 2018}, year = {2018}, month = {February}, abstract = {We present a new algorithm that significantly improves the efficiency of exploration for deep Q-learning agents in dialogue systems. Our agents explore via Thompson sampling, drawing Monte Carlo samples from a Bayes-by-Backprop neural network. Our algorithm learns much faster than common exploration strategies such as ε-greedy, Boltzmann exploration, and bootstrapping-based approaches. Additionally, we show that spiking the replay buffer with experiences from just a few successful episodes can make Q-learning feasible when it might otherwise fail.}, url = {http://approjects.co.za/?big=en-us/research/publication/efficient-task-completion-dialogue-policy-learning-bbq-networks/}, edition = {AAAI 2018}, }