@inproceedings{chakraborty2023ranking,
author = {Chakraborty, Saikat and Lahiri, Shuvendu and Fakhoury, Sarah and Musuvathi, Madan and Lal, Akash and Rastogi, Aseem and Swamy, Nikhil and Sharma, Rahul},
title = {Ranking LLM-Generated Loop Invariants for Program Verification},
booktitle = {2023 Empirical Methods in Natural Language Processing},
year = {2023},
month = {December},
abstract = {Synthesizing inductive loop invariants is fundamental to automating program verification. In this work, we observe that Large Language Models (such as gpt-3.5 or gpt-4) are capable of synthesizing loop invariants for a class of programs in a 0-shot setting, yet require several samples to generate the correct invariants. This can lead to a large number of calls to a program verifier to establish an invariant. To address this issue, we propose a [\it re-ranking] approach for the generated results of LLMs. We have designed a ranker that can distinguish between correct inductive invariants and incorrect attempts based on the problem definition. The ranker is optimized as a contrastive ranker. Experimental results demonstrate that this re-ranking mechanism significantly improves the ranking of correct invariants among the generated candidates, leading to a notable reduction in the number of calls to a verifier.},
url = {http://approjects.co.za/?big=en-us/research/publication/ranking-llm-generated-loop-invariants-for-program-verification/},
note = {EMNLP-Findings 2023},
}