NousResearch · 0xbyt4 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · jeremyjh
diff --git a/run_agent.py b/run_agent.py
@@ -5622,6 +5622,11 @@ def _run_tool(index, tool_call, function_name, function_args):
                     f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
                 )
 
+            # Redact secrets before entering LLM context (concurrent path)
+            if isinstance(function_result, str):
+                from agent.redact import redact_sensitive_text
+                function_result = redact_sensitive_text(function_result)
+
             # Append tool result message in order
             tool_msg = {
                 "role": "tool",
@@ -5877,6 +5882,14 @@ def _execute_tool_calls_sequential(self, assistant_message, messages: list, effe
                     f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]"
                 )
 
+            # Redact secrets from tool output before it enters LLM context.
+            # Defense-in-depth: individual tools (terminal, file_tools) also
+            # redact, but this catches any tool that doesn't (e.g. execute_code,
+            # browser, MCP tools). Prevents secret exfiltration via prompt injection.
+            if isinstance(function_result, str):
+                from agent.redact import redact_sensitive_text
+                function_result = redact_sensitive_text(function_result)
+
             tool_msg = {
                 "role": "tool",
                 "content": function_result,

diff --git a/tests/agent/test_redact.py b/tests/agent/test_redact.py
@@ -252,3 +252,77 @@ def test_all_three_in_env_dump(self):
         assert "XYZ789abcdef" not in result
         assert "HOME=/home/user" in result
         assert "SHELL=/bin/bash" in result
+
+
+class TestCodeExecutionRedaction:
+    """Verify execute_code output is redacted before reaching LLM context."""
+
+    def test_env_var_in_stdout_is_redacted(self):
+        """Secrets printed via os.environ should be masked."""
+        text = "ANTHROPIC_API_KEY=sk-ant-api03-abc123def456ghi789jkl012mno345"
+        result = redact_sensitive_text(text)
+        assert "sk-ant-api03-abc123def456ghi789jkl012mno345" not in result
+        assert "ANTHROPIC_API_KEY=" in result
+        assert "***" in result or "..." in result
+
+    def test_openrouter_key_in_stdout_is_redacted(self):
+        """OpenRouter keys in script output should be masked."""
+        text = "sk-or-v1-abc123def456ghi789jkl012mno345pqr678stu901"
+        result = redact_sensitive_text(text)
+        assert "abc123def456" not in result
+
+    def test_multiple_keys_in_environ_dump(self):
+        """Full os.environ dump should redact all known key formats."""
+        text = (
+            "OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl\n"
+            "GROQ_API_KEY=gsk_abc123def456ghi789jkl012\n"
+            "DISCORD_BOT_TOKEN=MTIzNDU2Nzg5MDEyMzQ1Njc4.abc.def\n"
+            "DATABASE_URL=postgres://user:secretpass123@host/db\n"
+        )
+        result = redact_sensitive_text(text)
+        assert "sk-proj-abc123" not in result
+        assert "secretpass123" not in result
+
+    def test_non_secret_output_unchanged(self):
+        """Regular script output should pass through unmodified."""
+        text = "Hello world\nResult: 42\nProcessed 100 items"
+        result = redact_sensitive_text(text)
+        assert result == text
+
+
+class TestMemoryAndSkillSecretBlocking:
+    """Verify secrets are blocked from being written to memory and skills."""
+
+    def test_memory_blocks_api_key(self):
+        from tools.memory_tool import _scan_memory_content
+        result = _scan_memory_content("Found key: sk-ant-api03-abc123def456ghi789jkl012mno345")
+        assert result is not None
+        assert "API key" in result or "Blocked" in result
+
+    def test_memory_blocks_env_assignment(self):
+        from tools.memory_tool import _scan_memory_content
+        result = _scan_memory_content("OPENAI_API_KEY=sk-proj-abc123def456ghi789")
+        assert result is not None
+        assert "Blocked" in result
+
+    def test_memory_allows_normal_content(self):
+        from tools.memory_tool import _scan_memory_content
+        result = _scan_memory_content("User prefers dark mode and uses TypeScript.")
+        assert result is None
+
+    def test_skill_blocks_api_key(self):
+        from tools.skill_manager_tool import _scan_skill_for_secrets
+        result = _scan_skill_for_secrets("Use this key: sk-ant-api03-abc123def456ghi789jkl012mno345")
+        assert result is not None
+        assert "Blocked" in result
+
+    def test_skill_blocks_env_assignment(self):
+        from tools.skill_manager_tool import _scan_skill_for_secrets
+        result = _scan_skill_for_secrets("ANTHROPIC_TOKEN=sk-ant-oat01-abc123def456ghi789")
+        assert result is not None
+        assert "Blocked" in result
+
+    def test_skill_allows_normal_content(self):
+        from tools.skill_manager_tool import _scan_skill_for_secrets
+        result = _scan_skill_for_secrets("# My Skill\n\nThis skill helps with coding tasks.")
+        assert result is None
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
@@ -596,6 +596,13 @@ def _drain_head_tail(pipe, head_chunks, tail_chunks, head_bytes, tail_bytes, tot
         stdout_text = strip_ansi(stdout_text)
         stderr_text = strip_ansi(stderr_text)
 
+        # Redact secrets from script output — same as terminal_tool.
+        # Without this, execute_code can exfiltrate .env secrets via
+        # `import os; print(os.environ)` bypassing terminal redaction.
+        from agent.redact import redact_sensitive_text
+        stdout_text = redact_sensitive_text(stdout_text)
+        stderr_text = redact_sensitive_text(stderr_text)
+
         # Build response
         result: Dict[str, Any] = {
             "status": status,

diff --git a/tools/memory_tool.py b/tools/memory_tool.py
@@ -84,6 +84,15 @@ def _scan_memory_content(content: str) -> Optional[str]:
         if re.search(pattern, content, re.IGNORECASE):
             return f"Blocked: content matches threat pattern '{pid}'. Memory entries are injected into the system prompt and must not contain injection or exfiltration payloads."
 
+    # Block raw secrets — memory is persisted to disk and injected into
+    # every future system prompt. A prompt injection that saves a secret
+    # to memory would exfiltrate it across sessions.
+    from agent.redact import _PREFIX_RE, _ENV_ASSIGN_RE
+    if _PREFIX_RE.search(content):
+        return "Blocked: content contains what appears to be an API key or token. Secrets must not be stored in memory."
+    if _ENV_ASSIGN_RE.search(content):
+        return "Blocked: content contains a secret assignment (KEY=value pattern). Secrets must not be stored in memory."
+
     return None
 
 

diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
@@ -525,6 +525,18 @@ def _remove_file(name: str, file_path: str) -> Dict[str, Any]:
 # Main entry point
 # =============================================================================
 
+def _scan_skill_for_secrets(text: str) -> Optional[str]:
+    """Block raw secrets from being written into skill files."""
+    if not text:
+        return None
+    from agent.redact import _PREFIX_RE, _ENV_ASSIGN_RE
+    if _PREFIX_RE.search(text):
+        return "Blocked: content contains what appears to be an API key or token. Secrets must not be stored in skills."
+    if _ENV_ASSIGN_RE.search(text):
+        return "Blocked: content contains a secret assignment (KEY=value pattern). Secrets must not be stored in skills."
+    return None
+
+
 def skill_manage(
     action: str,
     name: str,
@@ -541,6 +553,13 @@ def skill_manage(
 
     Returns JSON string with results.
     """
+    # Block secrets from being persisted in skill files
+    for text in (content, file_content, new_string):
+        if text:
+            secret_err = _scan_skill_for_secrets(text)
+            if secret_err:
+                return json.dumps({"success": False, "error": secret_err}, ensure_ascii=False)
+
     if action == "create":
         if not content:
             return json.dumps({"success": False, "error": "content is required for 'create'. Provide the full SKILL.md text (frontmatter + body)."}, ensure_ascii=False)