@inproceedings{zhao2023robust, author = {Zhao, Chenyu and Ma, Minghua and Zhong, Zhenyu and Zhang, Shenglin and Tan, Zhiyuan Simon and Xiong, Xiao and Yu, Lulu and Feng, Jiayi and Sun, Yongqian and Zhang, Yuzhi and Pei, Dan and 林庆维, Qingwei Lin and Zhang, Dongmei}, title = {Robust Multimodal Failure Detection for Microservice Systems}, booktitle = {KDD'23 ADS}, year = {2023}, month = {May}, abstract = {Proactive failure detection of instances is vitally essential to microservice systems because an instance failure can propagate to the whole system and degrade the system's performance. Over the years, many single-modal (i.e., metrics, logs, or traces) databased anomaly detection methods have been proposed. However, they tend to miss a large number of failures and generate numerous false alarms because they ignore the correlation of multimodal data. In this work, we propose AnoFusion, an unsupervised failure detection approach, to proactively detect instance failures through multimodal data for microservice systems. It applies a Graph Transformer Network (GTN) to learn the correlation of the heterogeneous multimodal data and integrates a Graph Attention Network (GAT) with Gated Recurrent Unit (GRU) to address the challenges introduced by dynamically changing multimodal data. We evaluate the performance of AnoFusion through two datasets, demonstrating that it achieves the F1-score of 0.857 and 0.922, respectively, outperforming the state-of-the-art failure detection approaches.}, url = {http://approjects.co.za/?big=en-us/research/publication/robust-multimodal-failure-detection-for-microservice-systems/}, }