@inproceedings{darvishrouhani2023with, author = {Darvish Rouhani, Bita and Zhao, Ritchie and Elango, Venmugil and Shafipour, Rasoul and Hall, Mathew and Mesmakhosroshahi, Maral and More, Ankit and Melnick, Levi and Golub, Maximilian and Varatkar, Girish and Shao, Lai and Kolhe, Gaurav and Melts, Dimitry and Klar, Jasmine and L'Heureux, Renee and Perry, Matt and Burger, Doug and Chung, Eric and Deng, Zhaoxia (Summer) and Naghshineh, Sam and Park, Jongsoo and Naumov, Maxim}, title = {With Shared Microexponents, A Little Shifting Goes a Long Way}, booktitle = {ISCA '23: Proceedings of the 50th Annual International Symposium on Computer Architecture}, year = {2023}, month = {June}, abstract = {This paper introduces Block Data Representations (BDR), a framework for exploring and evaluating a wide spectrum of narrow-precision formats for deep learning. It enables comparison of popular quantization standards, and through BDR, new formats based on shared microexponents (MX) are identified, which outperform other state-of-the-art quantization approaches, including narrow-precision floating-point and block floating-point. MX utilizes multiple levels of quantization scaling with ultra-fine scaling factors based on shared microexponents in the hardware. The effectiveness of MX is demonstrated on real-world models including large-scale generative pretraining and inferencing, and production-scale recommendation systems.}, url = {http://approjects.co.za/?big=en-us/research/publication/with-shared-microexponents-a-little-shifting-goes-a-long-way/}, pages = {1-13}, }