@inproceedings{shi2018auto-dialabel, author = {Shi, Chen and Chen, Qi and Shan, Lei and Li, Sujian and Sun, Xu and Wang, Houfeng and Zhang, Lintao}, title = {Auto-Dialabel: Labeling Dialogue Data with Unsupervised Learning}, booktitle = {2018 Conference on Empirical Methods in Natural Language Processing}, year = {2018}, month = {November}, abstract = {The lack of labeled data is one of the main challenges when building a task-oriented dialogue system. Existing dialogue datasets usually rely on human labeling, which is expensive, limited in size, and in low coverage. In this paper, we instead propose our framework auto-dialabel to automatically cluster the dialogue intents and slots. In this framework, we collect a set of context features, leverage an autoencoder for feature assembly, and adapt a dynamic hierarchical clustering method for intent and slot labeling. Experimental results show that our framework can promote human labeling cost to a great extent, achieve good intent clustering accuracy (84.1%), and provide reasonable and instructive slot labeling results.}, url = {http://approjects.co.za/?big=en-us/research/publication/auto-dialabel-labeling-dialogue-data-with-unsupervised-learning/}, }