@inproceedings{dann2018on, author = {Dann, Christoph and Jiang, Nan and Krishnamurthy, Akshay and Agarwal, Alekh and Langford, John and Schapire, Robert E.}, title = {On Oracle-Efficient PAC Reinforcement Learning with Rich Observations}, booktitle = {Advances in Neural Information Processing Systems}, year = {2018}, month = {December}, abstract = {We study the computational tractability of provably sample-efficient (PAC) reinforcement learning in episodic environments with rich observations. We present new sample-efficient algorithms for environments with deterministic hidden state dynamics and stochastic rich observations. These methods operate in an oracle model of computation -- accessing policy and value function classes exclusively through standard optimization primitives -- and therefore represent computationally efficient alternatives to prior algorithms that require enumeration. In the more general stochastic transition setting, we prove that the only known sample-efficient algorithm, Olive [1], cannot be implemented in our oracle model. We also present several examples that illustrate fundamental challenges of tractable PAC reinforcement learning in such general settings.}, url = {http://approjects.co.za/?big=en-us/research/publication/on-oracle-efficient-pac-reinforcement-learning-with-rich-observations/}, }