@inproceedings{lipton2018bbq-networks,
author = {Lipton, Zachary and Li, Xiujun and Gao, Jianfeng and Li, Lihong and Ahmed, Faisal and Deng, Li},
title = {BBQ-Networks: Efficient Exploration in Deep Reinforcement Learning for Task-Oriented Dialogue Systems},
booktitle = {AAAI 2018},
year = {2018},
month = {February},
abstract = {We present a new algorithm that significantly improves the efficiency of exploration for deep Q-learning agents in dialogue systems. Our agents explore via Thompson sampling, drawing Monte Carlo samples from a Bayes-by-Backprop neural network. Our algorithm learns much faster than common exploration strategies such as ε-greedy, Boltzmann exploration, and bootstrapping-based approaches. Additionally, we show that spiking the replay buffer with experiences from just a few successful episodes can make Q-learning feasible when it might otherwise fail.},
url = {http://approjects.co.za/?big=en-us/research/publication/efficient-task-completion-dialogue-policy-learning-bbq-networks/},
edition = {AAAI 2018},
}