@inproceedings{194633d34e3543929ac27e2ab50d3073,
title = "Advantage Policy Update Based on Proximal Policy Optimization",
abstract = "In this paper, a novel policy network update approach based on Proximal Policy Optimization (PPO), Advantageous Update Policy Proximal Policy Optimization (AUP-PPO), is proposed to alleviate the problem of over-fitting caused by the use of shared layers for policy and value functions. Extended from the previous sample-efficient reinforcement learning method PPO that uses separate networks to learn policy and value functions to make them decouple optimization, AUP-PPO uses the value function to calculate the advantage and updates the policy with the loss between the current and target advantage function as a penalty term instead of the value function. Evaluated by multiple benchmark control tasks in Open-AI gym, AUP-PPO exhibits better generalization to the environment and achieves faster convergence and better robustness compared with the original PPO.",
keywords = "deep reinforcement learning, policy gradient, proximal policy optimization, reinforcement learning",
author = "Zilin Zeng and Junwei Wang and Zhigang Hu and Dongnan Su and Peng Shang",
note = "Publisher Copyright: {\textcopyright} 2023 SPIE.; 3rd International Seminar on Artificial Intelligence, Networking, and Information Technology, AINIT 2022 ; Conference date: 23-09-2022 Through 25-09-2022",
year = "2023",
doi = "10.1117/12.2667235",
language = "英语",
series = "Proceedings of SPIE - The International Society for Optical Engineering",
publisher = "SPIE",
editor = "Naijing Hu and Guanglin Zhang",
booktitle = "Third International Seminar on Artificial Intelligence, Networking, and Information Technology, AINIT 2022",
}