@article{vanseijen2015an, author = {van Seijen, Harm and Mahmood, Ashique Rupam and Pilarski, Patrick M. and Sutton, Richard}, title = {An Empirical Evaluation of True Online TD([lambda])}, year = {2015}, month = {July}, abstract = {The true online TD([\lambda]) algorithm has recently been proposed (van Seijen and Sutton, 2014) as a universal replacement for the popular TD([\lambda]) algorithm, in temporal-difference learning and reinforcement learning. True online TD([\lambda]) has better theoretical properties than conventional TD([\lambda]), and the expectation is that it also results in faster learning. In this paper, we put this hypothesis to the test. Specifically, we compare the performance of true online TD([\lambda]) with that of TD([\lambda]) on challenging examples, random Markov reward processes, and a real-world myoelectric prosthetic arm. We use linear function approximation with tabular, binary, and non-binary features. We assess the algorithms along three dimensions: computational cost, learning speed, and ease of use. Our results confirm the strength of true online TD([\lambda]): 1) for sparse feature vectors, the computational overhead with respect to TD([\lambda]) is minimal; for non-sparse features the computation time is at most twice that of TD([\lambda]), 2) across all domains/representations the learning speed of true online TD([\lambda]) is often better, but never worse than that of TD([\lambda]), and 3) true online TD([\lambda]) is easier to use, because it does not require choosing between trace types, and it is generally more stable with respect to the step-size. Overall, our results suggest that true online TD([\lambda]) should be the first choice when looking for an efficient, general-purpose TD method.}, url = {http://approjects.co.za/?big=en-us/research/publication/empirical-evaluation-true-online-tdlambda/}, journal = {aRXiv}, }