@inproceedings{brill2001automatically, author = {Brill, Eric and Kacmarcik, Gary and Brockett, Chris}, title = {Automatically Harvesting Katakana-English Term Pairs from Search Engine Query Logs}, booktitle = {Asia Federation of Natural Language Processing}, year = {2001}, month = {November}, abstract = {This paper describes a method of extracting katakana words and phrases, along with their English counterparts from non-aligned monolingual web search engine query logs. The method employs a trainable edit distance function to find pairs that have a high probability of being equivalent. These pairs can then be used to further bootstrap training of the edit distance function, resulting in improved back-transliteration from katakana to English. In addition, this is an effective method for mining large numbers of katakana strings to enhance a bilingual lexicon. The improved edit distance function and enhanced lexicon can be used for more accurate alignment of bitexts, and for application during runtime MT and multilingual IR.}, url = {http://approjects.co.za/?big=en-us/research/publication/automatically-harvesting-katakana-english-term-pairs-from-search-engine-query-logs/}, }