@inproceedings{kim2021differentially, author = {Kim, Kunho and Gopi, Sivakanth and Kulkarni, Janardhan (Jana) and Yekhanin, Sergey}, title = {Differentially private n-gram extraction}, booktitle = {NeurIPS 2021}, year = {2021}, month = {December}, abstract = {We revisit the problem of n-gram extraction in the differential privacy setting. In this problem, given a corpus of private text data, the goal is to release as many n-grams as possible while preserving user level privacy. Extracting n-grams is a fundamental subroutine in many NLP applications such as sentence completion, response generation for emails etc. The problem also arises in other applications such as sequence mining, and is a generalization of recently studied differentially private set union (DPSU). In this paper, we develop a new differentially private algorithm for this problem which, in our experiments, significantly outperforms the state-of-the-art. Our improvements stem from combining recent advances in DPSU, privacy accounting, and new heuristics for pruning in the tree-based approach initiated by Chen et al. (2012).}, url = {http://approjects.co.za/?big=en-us/research/publication/differentially-private-n-gram-extraction/}, }