@inproceedings{kim2015compact, author = {Kim, Young-Bum and Sarikaya, Ruhi}, title = {Compact Lexicon Selection with Spectral Methods}, booktitle = {Association for Computational Linguistics (ACL)}, year = {2015}, month = {August}, abstract = {In this paper, we introduce the task of selecting compact lexicon from large, noisy gazetteers. This scenario arises often in practice, in particular spoken language understanding (SLU). We propose a simple and effective solution based on matrix decomposition techniques: canonical correlation analysis (CCA) and rank-revealing QR (RRQR) factorization. CCA is first used to derive low-dimensional gazetteer embeddings from domain-specific search logs. Then RRQR is used to find a subset of these embeddings whose span approximates the entire lexicon space. Experiments on slot tagging show that our method yields a small set of lexicon entities with average relative error reduction of > 50\% over randomly selected lexicon.}, publisher = {ACL - Association for Computational Linguistics}, url = {http://approjects.co.za/?big=en-us/research/publication/compact-lexicon-selection-with-spectral-methods/}, edition = {Association for Computational Linguistics (ACL)}, }