@inproceedings{du2019provably, author = {Du, Simon and Krishnamurthy, Akshay and Jiang, Nan and Agarwal, Alekh and Dudík, Miro and Langford, John}, title = {Provably efficient RL with Rich Observations via Latent State Decoding}, booktitle = {International Conference on Machine Learning}, year = {2019}, month = {June}, abstract = {We study the exploration problem in episodic MDPs with rich observations generated from a small number of latent states. Under certain identifiability assumptions, we demonstrate how to estimate a mapping from the observations to latent states inductively through a sequence of regression and clustering steps---where previously decoded latent states provide labels for later regression problems---and use it to construct good exploration policies. We provide finite-sample guarantees on the quality of the learned state decoding function and exploration policies, and complement our theory with an empirical evaluation on a class of hard exploration problems. Our method exponentially improves over Q-learning with naïve exploration, even when Q-learning has cheating access to latent states.}, url = {http://approjects.co.za/?big=en-us/research/publication/provably-efficient-rl-with-rich-observations-via-latent-state-decoding/}, }