@techreport{krishnaswamy2021decentralized, author = {Krishnaswamy, Umesh and Singh, Rachee and Bjørner, Nikolaj and Raj, Himanshu}, title = {Decentralized cloud wide-area network traffic engineering with BlastShield}, institution = {Microsoft}, year = {2021}, month = {November}, abstract = {Cloud networks are increasingly managed by centralized software defined controllers. Centralized traffic engineering controllers achieve higher network throughput than decentralized implementations, but are a single point of failure in the network. Large scale networks require controllers with isolated fault domains to contain the blast radius of faults. In this work, we present BlastShield, Microsoft's SDN-based decentralized WAN traffic engineering system. BlastShield slices the WAN into smaller fault domains, each managed by its own slice controller. Slice controllers independently engineer traffic in their slices to maximize global network throughput without relying on hierarchical or central coordination. Despite the lack of central coordination, BlastShield achieves similar network throughput as state-of-the-art centralized deployments. Moreover, BlastShield reduces throughput loss from the failure of a single controller by over 65%. BlastShield is deployed in Microsoft's WAN today and carries a majority of the backbone traffic.}, url = {http://approjects.co.za/?big=en-us/research/publication/decentralized-cloud-wide-area-network-traffic-engineering-with-blastshield/}, number = {MSR-TR-2021-31}, }