@techreport{sapio2019scaling,
author = {Sapio, Amadeo and Canini, Marco and Ho, Chen-Yu and Nelson, Jacob and Kalnis, Panos and Kim, Changhoon and Krishnamurthy, Arvind and Moshref, Masoud and Ports, Dan R. K. and Richtarik, Peter},
title = {Scaling Distributed Machine Learning with In-Network Aggregation},
institution = {KAUST},
year = {2019},
month = {February},
abstract = {Training complex machine learning models in parallel is an increasingly important workload. We accelerate distributed parallel training by designing a communication primitive that uses a programmable switch dataplane to execute a key step of the training process. Our approach, SwitchML, reduces the volume of exchanged data by aggregating the model updates from multiple workers in the network. We co-design the switch processing with the end-host protocols and ML frameworks to provide a robust, efficient solution that speeds up training by up to 300%, and at least by 20% for a number of real-world benchmark models.},
url = {http://approjects.co.za/?big=en-us/research/publication/scaling-distributed-machine-learning-with-in-network-aggregation/},
number = {MSR-TR-2019-9},
}