@inproceedings{loynd2020working,
author = {Loynd, Ricky and Fernandez, Roland and Celikyilmaz, Asli and Swaminathan, Adith and Hausknecht, Matthew},
title = {Working Memory Graphs},
booktitle = {2020 International Conference on Machine Learning},
year = {2020},
month = {July},
abstract = {Transformers have increasingly outperformed gated RNNs in obtaining new state-of-the-art results on supervised tasks involving text sequences. Inspired by this trend, we study the question of how Transformer-based models can improve the performance of sequential decision-making agents. We present the Working Memory Graph (WMG), an agent that employs multi-head self-attention to reason over a dynamic set of vectors representing observed and recurrent state. We evaluate WMG in three environments featuring factored observation spaces: a Pathﬁnding environment that requires complex reasoning over past observations, BabyAI gridworld levels that involve variable goals, and Sokoban which emphasizes future planning. We ﬁnd that the combination of WMG’s Transformer-based architecture with factored observation spaces leads to signiﬁcant gains in learning efﬁciency compared to baseline architectures across all tasks. WMG demonstrates how Transformer-based models can dramatically boost sample efﬁciency in RL environments for which observations can be factored.},
url = {http://approjects.co.za/?big=en-us/research/publication/working-memory-graphs/},
}