@inproceedings{xie2020metafuse, author = {Xie, Rongchang and Wang, Chunyu and Wang, Yizhou}, title = {MetaFuse: A Pre-trained Fusion Model for Human Pose Estimation}, booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year = {2020}, month = {February}, abstract = {Cross view feature fusion is the key to address the occlusion problem in human pose estimation. The current fusion methods need to train a separate model for every pair of cameras making them difficult to scale. In this work, we introduce MetaFuse, a pre-trained fusion model learned from a large number of cameras in the Panoptic dataset. The model can be efficiently adapted or finetuned for a new pair of cameras using a small number of labeled images. The strong adaptation power of MetaFuse is due in large part to the proposed factorization of the original fusion model into two parts (1) a generic fusion model shared by all cameras, and (2) lightweight camera-dependent transformations. Furthermore, the generic model is learned from many cameras by a meta-learning style algorithm to maximize its adaptation capability to various camera poses. We observe in experiments that MetaFuse finetuned on the public datasets outperforms the state-of-the-arts by a large margin which validates its value in practice.}, url = {http://approjects.co.za/?big=en-us/research/publication/metafuse-a-pre-trained-fusion-model-for-human-pose-estimation/}, }