@article{boiarsky2024deeper, author = {Boiarsky, Rebecca and Singh, Nalini M. and Buendia, Alejandro and Amini, Ava P. and Getz, Gad and Sontag, David}, title = {Deeper evaluation of a single-cell foundation model}, year = {2024}, month = {December}, abstract = {Large-scale foundation models, which are pre-trained on massive, unlabelled datasets and subsequently fine-tuned on specific tasks, have recently achieved unparalleled success on a wide array of applications, including in healthcare and biology. The success of these models has showcased the power of leveraging generalizable features and contextual understanding to improve a model’s performance. Single-cell bidirectional encoder representations from transformers (scBERT) by Yang et al.7 is one of several recently developed foundation models to learn representations of single-cell RNA-sequencing data. Yang et al. pre-trained their model on 1.12 million cells to impute masked gene-expression values and characterize the performance of their model on a fine-tuning task to annotate cell types. We reproduce their results, and provide additional baselines and ablation studies (that is, remove components of the model’s architecture or training process) to develop a deeper understanding of their results and the potential benefits and limitations of single-cell foundation models.}, url = {http://approjects.co.za/?big=en-us/research/publication/deeper-evaluation-of-a-single-cell-foundation-model/}, journal = {Nature Machine Intelligence}, }