@misc{gunasekar2023textbooks,
author = {Gunasekar, Suriya and Zhang, Yi and Aneja, Jyoti and Cesar, Caio and Mendes, Teodoro and Giorno, Allie Del and Gopi, Sivakanth and Javaheripi, Mojan and Kauffmann, Piero and de Rosa, Gustavo and Saarikivi, Olli and Salim, Adil and Shah, Shital and Singh Behl, Harkirat and Wang, Xin and Bubeck, Sébastien and Eldan, Ronen and Kalai, Adam Tauman and Lee, Yin Tat and Li, Yuanzhi},
title = {Textbooks Are All You Need},
year = {2023},
month = {June},
abstract = {We introduce phi-1, a new large language model for code, with significantly smaller size than competing models: phi-1 is a Transformer-based model with 1.3B parameters, trained for 4 days on 8 A100s, using a selection of ``textbook quality" data from the web (6B tokens) and synthetically generated textbooks and exercises with GPT-3.5 (1B tokens). Despite this small scale, phi-1 attains pass@1 accuracy 50.6% on HumanEval and 55.5% on MBPP. It also displays surprising emergent properties compared to phi-1-base, our model before our finetuning stage on a dataset of coding exercises, and phi-1-small, a smaller model with 350M parameters trained with the same pipeline as phi-1 that still achieves 45% on HumanEval.},
url = {http://approjects.co.za/?big=en-us/research/publication/textbooks-are-all-you-need/},
}