@inproceedings{suboti2022a, author = {Subotić, Pavle and Milikić, Lazar and Stojić, Milan}, title = {A Static Analysis Framework for Data Science Notebooks}, booktitle = {2022 International Conference on Software Engineering}, year = {2022}, month = {May}, abstract = {Notebooks provide an interactive environment for programmers to develop code, analyse data and inject interleaved visualizations in a single environment. Despite their flexibility, a major pitfall that data scientists encounter is unexpected behaviour caused by the unique out-of-order execution model of notebooks. As a result, data scientists face various challenges ranging from notebook correctness, reproducibility and cleaning. In this paper, we propose a framework that performs static analysis on notebooks, incorporating their unique execution semantics. Our framework is general in the sense that it accommodate for a wide range of analyses, useful for various notebook use cases. We have instantiated our framework on a diverse set of analyses and have evaluated them on 2211 real world notebooks. Our evaluation demonstrates that the vast majority (98.7%) of notebooks can be analysed in less than a second, well within the time frame required by interactive notebook clients}, url = {http://approjects.co.za/?big=en-us/research/publication/a-static-analysis-framework-for-data-science-notebooks/}, }