@inproceedings{roy2014automatic, author = {Roy, Rishiraj Saha and Katare, Rahul and Ganguly, Niloy and Choudhury, Monojit}, title = {Automatic Discovery of Adposition Typology}, booktitle = {Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics: Technical Papers}, year = {2014}, month = {August}, abstract = {Natural languages (NL) can be classified as prepositional or postpositional based on the order of the noun phrase and the adposition. Categorizing a language by its adposition typology helps in addressing several challenges in linguistics and natural language processing (NLP). Understanding the adposition typologies for less-studied languages by manual analysis of large text corpora can be quite expensive, yet automatic discovery of the same has received very little attention till date. This research presents a simple unsupervised technique to automatically predict the adposition typology for a language. Most of the function words of a language are adpositions, and we show that function words can be effectively separated from content words by leveraging differences in their distributional properties in a corpus. Using this principle, we show that languages can be classified as prepositional or postpositional based on the rank correlations derived from entropies of word co-occurrence distributions. Our claims are substantiated through experiments on 23 languages from ten diverse families, 19 of which are correctly classified by our technique.}, publisher = {Coling 2014}, url = {http://approjects.co.za/?big=en-us/research/publication/automatic-discovery-of-adposition-typology/}, pages = {1037-1046}, edition = {Proceedings of COLING 2014, the 25th International Conference on Computational Linguistics: Technical Papers}, }