@inproceedings{kolluri2026optimizing,
author = {Kolluri, Aashish and Sharma, Rishi and Costa, Manuel and Köpf, Boris and Niessen, Tobias and Russinovich, Mark and Tople, Shruti and Zanella-Béguelin, Santiago},
title = {Optimizing Agent Planning for Security and Autonomy},
booktitle = {ICLR 2026},
year = {2026},
month = {February},
abstract = {Indirect prompt injection attacks threaten AI agents that execute consequential actions, motivating deterministic system-level defenses. Such defenses can provably block unsafe actions by enforcing confidentiality and integrity policies, but currently appear costly: they reduce task completion rates and increase token usage compared to probabilistic defenses. We argue that existing evaluations miss a key benefit of system-level defenses: reduced reliance on human oversight. We introduce autonomy metrics to quantify this benefit: the fraction of consequential actions an agent can execute without human-in-the-loop (HITL) approval while preserving security. To increase autonomy, we design a security-aware agent that (i) introduces richer HITL interactions, and (ii) explicitly plans for both task progress and policy compliance. We implement this agent design atop an existing information-flow control defense against prompt injection and evaluate it on the AgentDojo and WASP benchmarks. Experiments show that this approach yields higher autonomy without sacrificing utility.},
publisher = {OpenReview.net},
url = {http://approjects.co.za/?big=en-us/research/publication/optimizing-agent-planning-for-security-and-autonomy/},
}