@inproceedings{suzuki2009discovery, author = {Suzuki, Hisami and Li, Xiao and Gao, Jianfeng}, title = {Discovery of Term Variation in Japanese Web Search Queries}, booktitle = {Proceedings of EMNLP}, year = {2009}, month = {August}, abstract = {In this paper we address the problem of identifying a broad range of term variations in Japanese web search queries, where these variations pose a particularly thorny problem due to the multiple character types employed in its writing system. Our method extends the techniques proposed for English spelling correction of web queries to handle a wider range of term variants including spelling mistakes, valid alternative spellings using multiple character types, transliterations and abbreviations. The core of our method is a statistical model built on the MART algorithm (Friedman, 2001). We show that both string and semantic similarity features contribute to identifying term variation in web search queries; specifically, the semantic similarity features used in our system are learned by mining user session and click-through logs, and are useful not only as model features but also in generating term variation candidates efficiently. The proposed method achieves 70% precision on the term variation identification task with the recall slightly higher than 60%, reducing the error rate of a naïve baseline by 38%.}, publisher = {Association for Computational Linguistics}, url = {http://approjects.co.za/?big=en-us/research/publication/discovery-of-term-variation-in-japanese-web-search-queries/}, edition = {Proceedings of EMNLP}, }