@inproceedings{tandon2024reward,
author = {Tandon, Karan and Mishra, Manav and Somashekar, Gagan and Das, Mayukh and Natarajan, Nagarajan},
title = {Reward Copilot for RL-driven Systems Optimization},
booktitle = {Advances in Neural Information Processing Systems 2024 - Workshop on ML For Systems},
year = {2024},
month = {November},
abstract = {Systems optimization problems such as workload auto-scaling, kernel parameter tuning, and cluster management arising in large-scale enterprise infrastructure are becoming increasingly RL-driven. While effective, it is difficult to set up the RL framework for such real-world problems – designing correct and useful reward functions or state spaces is highly challenging and needs a lot of domain expertise. Our proposed novel REWARD COPILOT solution can help design suitable and interpretable reward functions guided by client-provided specifications for any RL framework. Using experiments on standard benchmarks as well as systems-specific optimization problems, we show that our solution can return reward functions with a certain (informal) feasibility certificate, in addition to pareto-optimality},
url = {http://approjects.co.za/?big=en-us/research/publication/reward-copilot-for-rl-driven-systems-optimization/},
}