@inproceedings{du2019provably,
author = {Du, Simon and Krishnamurthy, Akshay and Jiang, Nan and Agarwal, Alekh and Dudík, Miro and Langford, John},
title = {Provably efficient RL with Rich Observations via Latent State Decoding},
booktitle = {International Conference on Machine Learning},
year = {2019},
month = {June},
abstract = {We study the exploration problem in episodic MDPs with rich observations generated from a small number of latent states. Under certain identifiability assumptions, we demonstrate how to estimate a mapping from the observations to latent states inductively through a sequence of regression and clustering steps---where previously decoded latent states provide labels for later regression problems---and use it to construct good exploration policies. We provide finite-sample guarantees on the quality of the learned state decoding function and exploration policies, and complement our theory with an empirical evaluation on a class of hard exploration problems. Our method exponentially improves over Q-learning with naïve exploration, even when Q-learning has cheating access to latent states.},
url = {http://approjects.co.za/?big=en-us/research/publication/provably-efficient-rl-with-rich-observations-via-latent-state-decoding/},
}