@inproceedings{galley2015bleu,
author = {Galley, Michel and Brockett, Chris and Sordoni, Alessandro and Ji, Yangfeng and Auli, Michael and Quirk, Chris and Mitchell, Margaret and Gao, Jianfeng and Dolan, Bill},
title = {ˆ†BLEU: A Discriminative Metric for Generation Tasks with Intrinsically Diverse Targets},
booktitle = {Proc. of ACL},
year = {2015},
month = {July},
abstract = {We introduce Discriminative BLEU (∆BLEU), a novel metric for intrinsic evaluation of generated text in tasks that admit a diverse range of possible outputs. Reference strings are scored for quality by human raters on a scale of [−1, +1] to weight multi-reference BLEU. In tasks involving generation of conversational responses, ∆BLEU correlates reasonably with human judgments and outperforms sentence-level and IBM BLEU in terms of both Spearman’s ρ and Kendall’s τ.},
url = {http://approjects.co.za/?big=en-us/research/publication/deltableu-a-discriminative-metric-for-generation-tasks-with-intrinsically-diverse-targets/},
pages = {445-450},
edition = {Proc. of ACL},
}