@inproceedings{li2020adatune,
author = {Li, Menghao and Zhang, Minjia and Wang, Chi and Li, Mingqin},
title = {AdaTune: Adaptive Tensor Program Compilation Made Efficient},
booktitle = {34th Conference on Neural Information Processing Systems (NeurIPS 2020)},
year = {2020},
month = {December},
abstract = {Deep learning models are computationally intense, and implementations often have to be highly optimized by experts or hardware vendors to be usable in practice. The DL compiler, together with Learning-to-Compile has proven to be a powerful technique for optimizing tensor programs. However, a limitation of this approach is that it still suffers from unbearably long overall optimization time. In this paper, we present a new method, called AdaTune, that significantly reduces the optimization time of tensor programs for high-performance deep learning inference. In particular, we propose an adaptive evaluation method that statistically early terminates a costly hardware measurement without losing much accuracy. We further devise a surrogate model with uncertainty quantification that allows the optimization to adapt to hardware and model heterogeneity better. Finally, we introduce a contextual optimizer that provides adaptive control of the exploration and exploitation to improve the transformation space searching effectiveness. We evaluate and compare the levels of optimization obtained by AutoTVM, a state-of-the-art Learning-to-Compile technique on top of TVM, and AdaTune. The experiment results show that AdaTune obtains up to 115% higher GFLOPS than the baseline under the same optimization time budget. Furthermore, AdaTune provides 1.3–3.9 speedup in optimization time over the baseline to reach the same optimization quality for a range of models across different hardware architectures.},
url = {http://approjects.co.za/?big=en-us/research/publication/adatune-adaptive-tensor-program-compilation-made-efficient/},
}