@inproceedings{galley2015bleu, author = {Galley, Michel and Brockett, Chris and Sordoni, Alessandro and Ji, Yangfeng and Auli, Michael and Quirk, Chris and Mitchell, Margaret and Gao, Jianfeng and Dolan, Bill}, title = {ˆ†BLEU: A Discriminative Metric for Generation Tasks with Intrinsically Diverse Targets}, booktitle = {Proc. of ACL}, year = {2015}, month = {July}, abstract = {We introduce Discriminative BLEU (∆BLEU), a novel metric for intrinsic evaluation of generated text in tasks that admit a diverse range of possible outputs. Reference strings are scored for quality by human raters on a scale of [−1, +1] to weight multi-reference BLEU. In tasks involving generation of conversational responses, ∆BLEU correlates reasonably with human judgments and outperforms sentence-level and IBM BLEU in terms of both Spearman’s ρ and Kendall’s τ.}, url = {http://approjects.co.za/?big=en-us/research/publication/deltableu-a-discriminative-metric-for-generation-tasks-with-intrinsically-diverse-targets/}, pages = {445-450}, edition = {Proc. of ACL}, }