@inproceedings{laroche2018safe,
author = {Laroche, Romain and Trichelair, Paul},
title = {Safe Policy Improvement with Baseline Bootstrapping},
booktitle = {European Workshop for Reinforcement Learning (EWRL)},
year = {2018},
month = {October},
abstract = {In this paper, we consider the Batch Reinforcement Learning task and adopt the safe policy improvement (SPI) approach: we compute a target policy guaranteed to perform at least as well as a given baseline policy, approximately and with high probability. Our SPI strategy, inspired by the knows-what-it-knows paradigm, consists in bootstrapping the target with the baseline when the target does not know. We develop a policy-based computationally efficient bootstrapping algorithm, accompanied by theoretical SPI bounds for the tabular case. We empirically show the limits of the existing algorithms on a small stochastic gridworld problem, and then demonstrate that our algorithm not only improve the worst-case scenario but also the mean performance.},
url = {http://approjects.co.za/?big=en-us/research/publication/safe-policy-improvement-with-baseline-bootstrapping/},
}