@article{wang2022scienceworld, author = {Wang, Ruoyao and Jansen, Peter and Côté, Marc-Alexandre and Ammanabrolu, Prithviraj}, title = {ScienceWorld: Is your Agent Smarter than a 5th Grader?}, year = {2022}, month = {March}, abstract = {No, it is not. Yet. This new benchmark aims at testing the scientific reasoning abilities of contemporary interactive and grounded text agents at the level of a standard North American elementary school curriculum. Despite the recent transformer-based progress seen in adjacent fields such as question-answering, scientific text processing, and the wider area of natural language processing, we find that current state-of-the-art models are unable to reason about or explain learned science concepts in novel contexts — e.g. models can easily answer what the conductivity of a previously seen material is but struggle when asked how they would conduct an experiment in a grounded, interactive environment to find the conductivity of an unknown material. This begs the question of whether current models are simply retrieving answers by way of seeing a large number of similar input examples or if they have learned to reason about concepts in a reusable manner. We hypothesize that agents need to be grounded in interactive environments to achieve such reasoning capabilities. Our experiments provide empirical evidence supporting this hypothesis---finding that an 1.5 million parameter grounded agent trained interactively for 100k steps can outperform a 11 billion parameter model statically trained for scientific question-answering and reasoning via millions of expert demonstrations.}, url = {http://approjects.co.za/?big=en-us/research/publication/scienceworld-is-your-agent-smarter-than-a-5th-grader/}, journal = {Science World}, }