@article{lou2017experience, author = {Lou, Jian-Guang and 林庆维, Qingwei Lin and Ding, Justin}, title = {Experience report on applying software analytics in incident management of online service}, year = {2017}, month = {December}, abstract = {As online services become more and more popular, incident management has become a critical task that aims to minimize the service downtime and to ensure high quality of the provided services. In practice, incident management is conducted through analyzing a huge amount of monitoring data collected at runtime of a service. Such data-driven incident management faces several significant challenges such as the large data scale, complex problem space, and incomplete knowledge. To address these challenges, we carried out 2-year software-analytics research where we designed a set of novel data-driven techniques and developed an industrial system called the Service Analysis Studio (SAS) targeting real scenarios in a largescale online service of Microsoft. SAS has been deployed to worldwide product datacenters and widely used by on-call engineers for incident management. This paper shares our experience about using software analytics to solve engineers pain points in incident management, the developed data-analysis techniques, and the lessons learned from the process of research development and technology transfer.}, url = {http://approjects.co.za/?big=en-us/research/publication/experience-report-on-applying-software-analytics-in-incident-management-of-online-service/}, pages = {905-941}, journal = {Automated Software Engineering}, volume = {24}, }