@article{levit2017dont, author = {Levit, Michael and Huang, Yan and Chang, Shawn and Gong, Yifan}, title = {Don't Count on ASR to Transcribe for You: Breaking Bias with Two Crowds}, year = {2017}, month = {August}, abstract = {A crowdsourcing approach for collecting high-quality speech transcriptions is presented. The approach addresses typical weakness of traditional semi-supervised transcription strategies that show ASR hypotheses to transcribers to help them cope with unclear or ambiguous audio and speed up transcriptions. We explain how the traditional methods introduce bias into transcriptions that make it difficult to objectively measure system improvements against existing baselines, and suggest a two-stage crowdsourcing alternative that, first, iteratively collects transcription hypotheses and, then, asks a different crowd to pick the best of them. We show that this alternative not only outperforms the traditional method in a side-by-side comparison, but it also leads to ASR improvements due to superior quality of acoustic and language models trained on the transcribed data.}, url = {http://approjects.co.za/?big=en-us/research/publication/dont-count-asr-transcribe-breaking-bias-two-crowds/}, journal = {Interspeech}, }