@inproceedings{mhammedi2020learning,
author = {Mhammedi, Zakaria and Dylan J. Foster and Simchowitz, Max and Misra, Dipendra and Sun, Wen and Krishnamurthy, Akshay and Rakhlin, Alexander and Langford, John},
title = {Learning the Linear Quadratic Regulator from Nonlinear Observations},
booktitle = {Thirty-fourth Conference on Neural Information Processing Systems (NeurIPS) 2020},
year = {2020},
month = {December},
abstract = {We introduce a new problem setting for continuous control called the LQR with Rich Observations, or RichLQR. In our setting, the environment is summarized by a low-dimensional continuous latent state with linear dynamics and quadratic costs, but the agent operates on high-dimensional, nonlinear observations such as images from a camera. To enable sample-efficient learning, we assume that the learner has access to a class of decoder functions (e.g., neural networks) that is flexible enough to capture the mapping from observations to latent states. We introduce a new algorithm, RichID, which learns a near-optimal policy for the RichLQR with sample complexity scaling only with the dimension of the latent state space and the capacity of the decoder function class. RichID is oracle-efficient and accesses the decoder class only through calls to a least-squares regression oracle. Our results constitute the first provable sample complexity guarantee for continuous control with an unknown nonlinearity in the system model and general function approximation.},
url = {http://approjects.co.za/?big=en-us/research/publication/learning-the-linear-quadratic-regulator-from-nonlinear-observations/},
}