Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions run_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5622,6 +5622,11 @@ def _run_tool(index, tool_call, function_name, function_args):
f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
)

# Redact secrets before entering LLM context (concurrent path)
if isinstance(function_result, str):
from agent.redact import redact_sensitive_text
function_result = redact_sensitive_text(function_result)

# Append tool result message in order
tool_msg = {
"role": "tool",
Expand Down Expand Up @@ -5877,6 +5882,14 @@ def _execute_tool_calls_sequential(self, assistant_message, messages: list, effe
f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
)

# Redact secrets from tool output before it enters LLM context.
# Defense-in-depth: individual tools (terminal, file_tools) also
# redact, but this catches any tool that doesn't (e.g. execute_code,
# browser, MCP tools). Prevents secret exfiltration via prompt injection.
if isinstance(function_result, str):
from agent.redact import redact_sensitive_text
function_result = redact_sensitive_text(function_result)

tool_msg = {
"role": "tool",
"content": function_result,
Expand Down
74 changes: 74 additions & 0 deletions tests/agent/test_redact.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,77 @@ def test_all_three_in_env_dump(self):
assert "XYZ789abcdef" not in result
assert "HOME=/home/user" in result
assert "SHELL=/bin/bash" in result


class TestCodeExecutionRedaction:
"""Verify execute_code output is redacted before reaching LLM context."""

def test_env_var_in_stdout_is_redacted(self):
"""Secrets printed via os.environ should be masked."""
text = "ANTHROPIC_API_KEY=sk-ant-api03-abc123def456ghi789jkl012mno345"
result = redact_sensitive_text(text)
assert "sk-ant-api03-abc123def456ghi789jkl012mno345" not in result
assert "ANTHROPIC_API_KEY=" in result
assert "***" in result or "..." in result

def test_openrouter_key_in_stdout_is_redacted(self):
"""OpenRouter keys in script output should be masked."""
text = "sk-or-v1-abc123def456ghi789jkl012mno345pqr678stu901"
result = redact_sensitive_text(text)
assert "abc123def456" not in result

def test_multiple_keys_in_environ_dump(self):
"""Full os.environ dump should redact all known key formats."""
text = (
"OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl\n"
"GROQ_API_KEY=gsk_abc123def456ghi789jkl012\n"
"DISCORD_BOT_TOKEN=MTIzNDU2Nzg5MDEyMzQ1Njc4.abc.def\n"
"DATABASE_URL=postgres://user:secretpass123@host/db\n"
)
result = redact_sensitive_text(text)
assert "sk-proj-abc123" not in result
assert "secretpass123" not in result

def test_non_secret_output_unchanged(self):
"""Regular script output should pass through unmodified."""
text = "Hello world\nResult: 42\nProcessed 100 items"
result = redact_sensitive_text(text)
assert result == text


class TestMemoryAndSkillSecretBlocking:
"""Verify secrets are blocked from being written to memory and skills."""

def test_memory_blocks_api_key(self):
from tools.memory_tool import _scan_memory_content
result = _scan_memory_content("Found key: sk-ant-api03-abc123def456ghi789jkl012mno345")
assert result is not None
assert "API key" in result or "Blocked" in result

def test_memory_blocks_env_assignment(self):
from tools.memory_tool import _scan_memory_content
result = _scan_memory_content("OPENAI_API_KEY=sk-proj-abc123def456ghi789")
assert result is not None
assert "Blocked" in result

def test_memory_allows_normal_content(self):
from tools.memory_tool import _scan_memory_content
result = _scan_memory_content("User prefers dark mode and uses TypeScript.")
assert result is None

def test_skill_blocks_api_key(self):
from tools.skill_manager_tool import _scan_skill_for_secrets
result = _scan_skill_for_secrets("Use this key: sk-ant-api03-abc123def456ghi789jkl012mno345")
assert result is not None
assert "Blocked" in result

def test_skill_blocks_env_assignment(self):
from tools.skill_manager_tool import _scan_skill_for_secrets
result = _scan_skill_for_secrets("ANTHROPIC_TOKEN=sk-ant-oat01-abc123def456ghi789")
assert result is not None
assert "Blocked" in result

def test_skill_allows_normal_content(self):
from tools.skill_manager_tool import _scan_skill_for_secrets
result = _scan_skill_for_secrets("# My Skill\n\nThis skill helps with coding tasks.")
assert result is None
7 changes: 7 additions & 0 deletions tools/code_execution_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,13 @@ def _drain_head_tail(pipe, head_chunks, tail_chunks, head_bytes, tail_bytes, tot
stdout_text = strip_ansi(stdout_text)
stderr_text = strip_ansi(stderr_text)

# Redact secrets from script output — same as terminal_tool.
# Without this, execute_code can exfiltrate .env secrets via
# `import os; print(os.environ)` bypassing terminal redaction.
from agent.redact import redact_sensitive_text
stdout_text = redact_sensitive_text(stdout_text)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pattern matching is not enough. It is not possible to know the pattern of every secret value and the KEY=VALUE pattern is already defeated in the sample I shared.

stderr_text = redact_sensitive_text(stderr_text)

# Build response
result: Dict[str, Any] = {
"status": status,
Expand Down
9 changes: 9 additions & 0 deletions tools/memory_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ def _scan_memory_content(content: str) -> Optional[str]:
if re.search(pattern, content, re.IGNORECASE):
return f"Blocked: content matches threat pattern '{pid}'. Memory entries are injected into the system prompt and must not contain injection or exfiltration payloads."

# Block raw secrets — memory is persisted to disk and injected into
# every future system prompt. A prompt injection that saves a secret
# to memory would exfiltrate it across sessions.
from agent.redact import _PREFIX_RE, _ENV_ASSIGN_RE
if _PREFIX_RE.search(content):
return "Blocked: content contains what appears to be an API key or token. Secrets must not be stored in memory."
if _ENV_ASSIGN_RE.search(content):
return "Blocked: content contains a secret assignment (KEY=value pattern). Secrets must not be stored in memory."

return None


Expand Down
19 changes: 19 additions & 0 deletions tools/skill_manager_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,18 @@ def _remove_file(name: str, file_path: str) -> Dict[str, Any]:
# Main entry point
# =============================================================================

def _scan_skill_for_secrets(text: str) -> Optional[str]:
"""Block raw secrets from being written into skill files."""
if not text:
return None
from agent.redact import _PREFIX_RE, _ENV_ASSIGN_RE
if _PREFIX_RE.search(text):
return "Blocked: content contains what appears to be an API key or token. Secrets must not be stored in skills."
if _ENV_ASSIGN_RE.search(text):
return "Blocked: content contains a secret assignment (KEY=value pattern). Secrets must not be stored in skills."
return None


def skill_manage(
action: str,
name: str,
Expand All @@ -541,6 +553,13 @@ def skill_manage(

Returns JSON string with results.
"""
# Block secrets from being persisted in skill files
for text in (content, file_content, new_string):
if text:
secret_err = _scan_skill_for_secrets(text)
if secret_err:
return json.dumps({"success": False, "error": secret_err}, ensure_ascii=False)

if action == "create":
if not content:
return json.dumps({"success": False, "error": "content is required for 'create'. Provide the full SKILL.md text (frontmatter + body)."}, ensure_ascii=False)
Expand Down
Loading