@article{singh2023assessing,
author = {Singh, Mukul and Cambronero, José and Gulwani, Sumit and Le, Vu and Verbruggen, Gust},
title = {Assessing GPT4-V on Structured Reasoning Tasks},
year = {2023},
month = {December},
abstract = {Multi-modality promises to unlock further uses for large language models. Recently, the state-of-the-art language model GPT-4 was enhanced with vision capabilities. We carry out a prompting evaluation of GPT-4V and five other baselines on structured reasoning tasks, such as mathematical reasoning, visual data analysis, and code generation. We show that visual Chain-of-Thought, an extension of Chain-of-Thought to multi-modal LLMs, yields significant improvements over the vanilla model. We also present a categorized analysis of scenarios where these models perform well and where they struggle, highlighting challenges associated with coherent multimodal reasoning.},
url = {http://approjects.co.za/?big=en-us/research/publication/assessing-gpt4-v-on-structured-reasoning-tasks/},
journal = {Preprint},
}