@inproceedings{fromm2018heterogeneous, author = {Fromm, Josh and Patel, Shwetak and Philipose, Matthai}, title = {Heterogeneous Bitwidth Binarization in Convolutional Neural Networks}, booktitle = {NIPS 2018}, year = {2018}, month = {December}, abstract = {Recent work has shown that fast, compact low-bitwidth neural networks can be surprisingly accurate. These networks use homogeneous binarization: all parameters in each layer or (more commonly) the whole model have the same low bitwidth (e.g., 2 bits). However, modern hardware allows efficient designs where each arithmetic instruction can have a custom bitwidth, motivating heterogeneous binarization, where every parameter in the network may have a different bitwidth. In this paper, we show that it is feasible and useful to select bitwidths at the parameter granularity during training. For instance a heterogeneously quantized version of modern networks such as AlexNet and MobileNet, with the right mix of 1-, 2- and 3-bit parameters that average to just 1.4 bits can equal the accuracy of homogeneous 2-bit versions of these networks. Further, we provide analyses to show that the heterogeneously binarized systems yield FPGA- and ASIC-based implementations that are correspondingly more efficient in both circuit area and energy efficiency than their homogeneous counterparts.}, url = {http://approjects.co.za/?big=en-us/research/publication/heterogeneous-bitwidth-binarization-in-convolutional-neural-networks/}, }