@inproceedings{roy2018cloud, author = {Roy, Arjun and Bansal, Deepak and Brumley, David and Chandrappa, Harish Kumar and Sharma, Parag and Tewari, Rishabh and Arzani, Behnaz and Sneoren, Alex C.}, title = {Cloud Datacenter SDN Monitoring: Experiences and Challenges}, booktitle = {Internet Measurement Conference (IMC)}, year = {2018}, month = {October}, abstract = {Cloud customers require highly reliable and performant leased datacenter infrastructure to deliver quality service for their users. It is thus critical for cloud providers to quickly detect and mitigate infrastructure faults. While much is known about managing faults that arise in the datacenter physical infrastructure (i.e., network and server equipment), comparatively little has been published regarding management of the logical overlay networks frequently employed to provide strong isolation in multi-tenant datacenters. We present a first look into the nuances of monitoring these “virtualized” networks through the lens of a large cloud provider. We describe challenges to building cloud-based fault monitoring systems, and use the output of a production system to illuminate how virtualization impacts multi-tenant datacenter fault management. We show that interactions between the virtualization, tenant software, and lower layers of the network fabric both simplify and complicate different aspects of fault detection and diagnosis efforts.}, publisher = {ACM}, url = {http://approjects.co.za/?big=en-us/research/publication/cloud-datacenter-sdn-monitoring-experiences-and-challenges/}, edition = {Internet Measurement Conference (IMC)}, }