@inproceedings{pado2009robust, author = {Pado, Sebastian and Galley, Michel and Jurafsky, Dan and Manning, Christopher D.}, title = {Robust Machine Translation Evaluation with Entailment Features}, booktitle = {Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP (ACL-AFNLP)}, year = {2009}, month = {August}, abstract = {Existing evaluation metrics for machine translation lack crucial robustness: their correlations with human quality judgments vary considerably across languages and genres. We believe that the main reason is their inability to properly capture meaning: A good translation candidate means the same thing as the reference translation, regardless of formulation. We propose a metric that evaluates MT output based on a rich set of features motivated by textual entailment, such as lexical-semantic (in-)compatibility and argument structure overlap. We compare this metric against a combination metric of four state-of-the-art scores (BLEU, NIST, TER, and METEOR) in two different settings. The combination metric outperforms the individual scores, but is bested by the entailment-based metric. Combining the entailment and traditional features yields further improvements.}, url = {http://approjects.co.za/?big=en-us/research/publication/robust-machine-translation-evaluation-with-entailment-features/}, pages = {297-305}, edition = {Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP (ACL-AFNLP)}, }