@inproceedings{sun2023trust, author = {Sun, Mingfei and Devlin, Sam and Beck, Jacob and Hofmann, Katja and Whiteson, Shimon}, title = {Trust Region Bounds for Decentralized PPO Under Non-stationarity}, booktitle = {International Conference on Autonomous Agents and Multiagent Systems (AAMAS)}, year = {2023}, month = {May}, abstract = {We present trust region bounds for optimizing decentralized policies in cooperative Multi-Agent Reinforcement Learning (MARL), which holds even when the transition dynamics are non-stationary. This new analysis provides a theoretical understanding of the strong performance of two recent actor-critic methods for MARL, which both rely on independent ratios, i.e., computing probability ratios separately for each agent's policy. We show that, despite the non-stationarity that independent ratios cause, a monotonic improvement guarantee still arises as a result of enforcing the trust region constraint over all decentralized policies. We also show this trust region constraint can be effectively enforced in a principled way by bounding independent ratios based on the number of agents in training, providing a theoretical foundation for proximal ratio clipping. Finally, our empirical results support the hypothesis that the strong performance of IPPO and MAPPO is a direct result of enforcing such a trust region constraint via clipping in centralized training, and tuning the hyperparameters with regards to the number of agents, as predicted by our theoretical analysis.}, url = {http://approjects.co.za/?big=en-us/research/publication/trust-region-bounds-for-decentralized-ppo-under-non-stationarity/}, }