@inproceedings{handler2016bag, author = {Handler, Abram and Denny, Matthew and Wallach, Hanna and O'Connor, Brendan T.}, title = {Bag of What? Simple Noun Phrase Extraction for Text Analysis}, booktitle = {NLP+CSS@EMNLP}, year = {2016}, month = {April}, abstract = {Social scientists who do not have specialized natural language processing training often use a unigram bag-of-words (BOW) representation when analyzing text corpora. We offer a new phrase-based method, NPFST, for enriching a unigram BOW. NPFST uses a partof-speech tagger and a finite state transducer to extract multiword phrases to be added to a unigram BOW. We compare NPFST to both ngram and parsing methods in terms of yield, recall, and efficiency. We then demonstrate how to use NPFST for exploratory analyses; it performs well, without configuration, on many different kinds of English text. Finally, we present a case study using NPFST to analyze a new corpus of U.S. congressional bills.}, url = {http://approjects.co.za/?big=en-us/research/publication/bag-of-what-simple-noun-phrase-extraction-for-text-analysis/}, }