@inproceedings{singh2023format, author = {Singh, Mukul and Cambronero, José and Gulwani, Sumit and Le, Vu and Negreanu, Carina and Nouri, Elnaz and Raza, Mohammad and Verbruggen, Gust}, title = {FormaT5: Abstention and Examples for Conditional Table Formatting with Natural Language}, booktitle = {50th International Conference on Very Large Data Bases (VLDB 24)}, year = {2023}, month = {October}, abstract = {Formatting is an important property in tables for visualization, presentation, and analysis. Spreadsheet software allows users to automatically format their tables by writing data-dependent conditional formatting (CF) rules. Writing such rules is often challenging for users as it requires them to understand and implement the underlying logic. We present FormaT5, a transformer-based model that can generate a CF rule given the target table and a natural language description of the desired formatting logic. We find that user descriptions for these tasks are often under-specified or ambiguous, making it harder for code generation systems to accurately learn the desired rule in a single step. To tackle this problem of under-specification and minimize argument errors, FormaT5 learns to predict placeholders though an abstention objective. These placeholders can then be filled by a second model or, when examples of rows that should be formatted are available, by a programming-by-example system. To evaluate FormaT5 on diverse and real scenarios, we create an extensive benchmark of 1053 CF tasks, containing real-world descriptions collected from four different sources. We release our benchmarks to encourage research in this area. Abstention and filling allow FormaT5 to outperform 8 different neural approaches on our benchmarks, both with and without examples. Our results illustrate the value of building domain-specific learning systems.}, url = {http://approjects.co.za/?big=en-us/research/publication/format5-abstention-and-examples-for-conditional-table-formatting-with-natural-language/}, }