@inproceedings{wu2022nuwa-infinity, author = {Wu, Chenfei and Liang, Jian and Hu, Xiaowei and Gan, Zhe and Wang, Jianfeng and Wang, Lijuan and Liu, Zicheng and Fang, Yuejian and Duan, Nan}, title = {NUWA-Infinity: Autoregressive over Autoregressive Generation for Infinite Visual Synthesis}, booktitle = {NeurIPS 2022}, year = {2022}, month = {July}, abstract = {In this paper, we present NUWA-Infinity, a generative model for infinite visual synthesis, which is defined as the task of generating arbitrarily-sized high-resolution images or long-duration videos. An autoregressive over autoregressive generation mechanism is proposed to deal with this variable-size generation task, where a global patch-level autoregressive model considers the dependencies between patches, and a local token-level autoregressive model considers dependencies between visual tokens within each patch. A Nearby Context Pool (NCP) is introduced to cache-related patches already generated as the context for the current patch being generated, which can significantly save computation costs without sacrificing patch-level dependency modeling. An Arbitrary Direction Controller (ADC) is used to decide suitable generation orders for different visual synthesis tasks and learn order-aware positional embeddings. Compared to DALL-E, Imagen and Parti, NUWA-Infinity can generate high-resolution images with arbitrary sizes and support long-duration video generation additionally. Compared to NUWA, which also covers images and videos, NUWA-Infinity has superior visual synthesis capabilities in terms of resolution and variable-size generation. The GitHub link is this https URL. The homepage link is this https URL.}, url = {http://approjects.co.za/?big=en-us/research/publication/nuwa-infinity-autoregressive-over-autoregressive-generation-for-infinite-visual-synthesis/}, }