@article{zhang2021towards, author = {Zhang, Wei and Chen, Quan and Zheng, Ningxin and Cui, Weihao and Fu, Kaihua and Guo, Minyi}, title = {Towards QoS-awareness and Improved Utilization of Spatial Multitasking GPUs}, year = {2021}, month = {March}, abstract = {Datacenters use GPUs to provide the significant computing throughput required by emerging user-facing services. The diurnal user access pattern of user-facing services provides a strong incentive to co-located applications for better GPU utilization, and prior work has focused on enabling co-location on multicore processors and traditional non-preemptive accelerators. However, current GPUs are evolving towards spatial multitasking and introduce a new set of challenges to eliminate QoS violations. We propose C-Laius, a runtime system that carefully allocates the computation resource to co-located applications for maximizing the throughput of batch applications while guaranteeing the required QoS of user-facing services. C-Laius not only allows co-locating one user-facing application with multiple batch applications, but also supports the condition of multiple user-facing applications with batch applications. In the case of a single co-located user-facing application, our evaluation on an Nvidia RTX 2080Ti GPU shows that C-Laius improves the utilization of spatial multitasking accelerators by 20.8%, while achieving the 99%-ile latency target for user-facing services. As to the case of multiple co-located user-facing applications, C-Laius ensures no violation of QoS while improving the accelerator utilization by 35.9% on average.}, url = {http://approjects.co.za/?big=en-us/research/publication/towards-qos-awareness-and-improved-utilization-of-spatial-multitasking-gpus/}, pages = {1-1}, journal = {IEEE Transactions on Computers}, }