@article{MC39A44AA, title = "Research on Reinforcement Learning Methodologies for Large Language Models Using TRPO, PPO, and DPO", journal = "The Journal of Korean Institute of Communications and Information Sciences", year = "2025", issn = "1226-4717", doi = "10.7840/kics.2025.50.5.790", author = "Taehyun Kim, Soohyun Park", keywords = "RLHF, LLMs", abstract = "As the utilization of reinforcement learning (RL) in training large language models (LLMs) becomes more prevalent, the necessity to identify optimal RL methodologies tailored for LLMs has emerged. The fields of LLMs and RL are continually evolving through the development of novel techniques that contribute to their mutual advancement. This paper addresses the current trends in reinforcement learning algorithms aimed at enhancing the performance of large language models." }