@inproceedings{daumiii2018residual,
author = {Daumé III, Hal and Langford, John and Mineiro, Paul and Sharaf, Amr},
title = {Residual Loss Prediction: Reinforcement Learning with no Incremental Feedback},
booktitle = {ICLR 2018 Conference},
year = {2018},
month = {February},
abstract = {We consider reinforcement learning and bandit structured prediction problems with very sparse loss feedback: only at the end of an episode. We introduce a novel algorithm, RESIDUAL LOSS PREDICTION (RESLOPE), that solves such problems by automatically learning an internal representation of a denser reward function. RESLOPE operates as a reduction to contextual bandits, using its learned loss representation to solve the credit assignment problem, and a contextual bandit oracle to trade-off exploration and exploitation. RESLOPE enjoys a no-regret reduction-style theoretical guarantee and outperforms state of the art reinforcement learning algorithms in both MDP environments and bandit structured prediction settings.
TL;DR: We present a novel algorithm for solving reinforcement learning and bandit structured prediction problems with very sparse loss feedback.},
url = {http://approjects.co.za/?big=en-us/research/publication/residual-loss-prediction-reinforcement-learning-no-incremental-feedback/},
edition = {ICLR 2018 Conference},
}