@inproceedings{poppe2020seagull, author = {Poppe, Olga and Amuneke, Tayo and Banda, Dalitso and De, Aritra and Green, Ari and Knoertzer, Manon and Nosakhare, Ehi and Rajendran, Karthik and Shankargouda, Deepak and Wang, Meina and Au, Alan and Curino, Carlo and Guo, Qun and Jindal, Alekh and Kalhan, Ajay and Oslake, Morgan and Parchani, Sonia and Ramani, Vijay and Sellappan, Raj and Sen, Saikat and Shrotri, Sheetal and Srinivasan, Soundararajan and Xia, Ping and Xu, Shize and Yang, Alicia and Zhu, Yiwen}, title = {Seagull: An Infrastructure for Load Prediction and Optimized Resource Allocation}, booktitle = {VLDB 2021}, year = {2020}, month = {September}, abstract = {Microsoft Azure is dedicated to guarantee high quality of service to its customers, in particular, during periods of high customer activity, while controlling cost. We employ a Data Science (DS) driven solution to predict user load and leverage these predictions to optimize resource allocation. To this end, we built the Seagull infrastructure that processes per-server telemetry, validates the data, trains and deploys ML models. The models are used to predict customer load per server (24h into the future), and optimize service operations. Seagull continually re-evaluates accuracy of predictions, fallback to previously known good models and triggers alerts as appropriate. We deployed this infrastructure in production for PostgreSQL and MySQL servers across all Azure regions, and applied it to the problem of scheduling server backups during low-load time. This minimizes interference with user-induced load and improves customer experience.}, publisher = {VLDB Endowment}, url = {http://approjects.co.za/?big=en-us/research/publication/seagull/}, pages = {154-162}, note = {Top project in Azure All Hands 2021}, }