@inproceedings{brockett2005support,
author = {Brockett, Chris and Dolan, Bill},
title = {Support Vector Machines for Paraphrase Identification and Corpus Construction},
booktitle = {Third International Workshop on Paraphrasing (IWP2005)},
year = {2005},
month = {January},
abstract = {The lack of readily-available large corpora of aligned monolingual sentence pairs is a major obstacle to the development of Statistical Machine Translation-based paraphrase models. In this paper, we describe the use of annotated datasets and Support Vector Machines to induce larger monolingual paraphrase corpora from a comparable corpus of news clusters found on the World Wide Web. Features include: morphological variants; WordNet synonyms and hypernyms; log-likelihood-based word pairings dynamically obtained from baseline sentence alignments; and formal stringfeatures such as word-based edit distance. Use of this technique dramatically reduces the Alignment Error Rate of the extracted corpora over heuristic methods based on position of the sentences in the text.},
publisher = {Asia Federation of Natural Language Processing},
url = {http://approjects.co.za/?big=en-us/research/publication/support-vector-machines-for-paraphrase-identification-and-corpus-construction/},
edition = {Third International Workshop on Paraphrasing (IWP2005)},
}