From 539629923c05e98fee06258a3341af94f2dcccba Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 16:37:14 -0700
Subject: [PATCH 001/154] docs(llm-wiki): add Obsidian Headless setup for
 servers (#5660)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds obsidian-headless (npm) setup guide to the Obsidian Integration
section — Node 22+, ob login, sync-create-remote, sync-setup, systemd
service for continuous background sync. Covers the full headless
workflow for agents running on servers syncing to Obsidian desktop on
other devices.
---
 skills/research/llm-wiki/SKILL.md | 56 +++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
diff --git a/skills/research/llm-wiki/SKILL.md b/skills/research/llm-wiki/SKILL.md
index db172438..753bc3af 100644
--- a/skills/research/llm-wiki/SKILL.md
+++ b/skills/research/llm-wiki/SKILL.md
@@ -380,6 +380,62 @@ For best results:
 If using the Obsidian skill alongside this one, set `OBSIDIAN_VAULT_PATH` to the
 same directory as the wiki path.
 
+### Obsidian Headless (servers and headless machines)
+
+On machines without a display, use `obsidian-headless` instead of the desktop app.
+It syncs vaults via Obsidian Sync without a GUI — perfect for agents running on
+servers that write to the wiki while Obsidian desktop reads it on another device.
+
+**Setup:**
+```bash
+# Requires Node.js 22+
+npm install -g obsidian-headless
+
+# Login (requires Obsidian account with Sync subscription)
+ob login --email <email> --password '<password>'
+
+# Create a remote vault for the wiki
+ob sync-create-remote --name "LLM Wiki"
+
+# Connect the wiki directory to the vault
+cd ~/wiki
+ob sync-setup --vault "<vault-id>"
+
+# Initial sync
+ob sync
+
+# Continuous sync (foreground — use systemd for background)
+ob sync --continuous
+```
+
+**Continuous background sync via systemd:**
+```ini
+# ~/.config/systemd/user/obsidian-wiki-sync.service
+[Unit]
+Description=Obsidian LLM Wiki Sync
+After=network-online.target
+Wants=network-online.target
+
+[Service]
+ExecStart=/path/to/ob sync --continuous
+WorkingDirectory=/home/user/wiki
+Restart=on-failure
+RestartSec=10
+
+[Install]
+WantedBy=default.target
+```
+
+```bash
+systemctl --user daemon-reload
+systemctl --user enable --now obsidian-wiki-sync
+# Enable linger so sync survives logout:
+sudo loginctl enable-linger $USER
+```
+
+This lets the agent write to `~/wiki` on a server while you browse the same
+vault in Obsidian on your laptop/phone — changes appear within seconds.
+
 ## Pitfalls
 
 - **Never modify files in `raw/`** — sources are immutable. Corrections go in wiki pages.

From 9201370c7ef54d6a3c1a582e5632567c02dd687c Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Mon, 6 Apr 2026 14:17:43 +0530
Subject: [PATCH 002/154] feat(plugins): prompt for required env vars during
 hermes plugins install

Read requires_env from plugin.yaml after install and interactively
prompt for any missing environment variables, saving them to
~/.hermes/.env.

Supports two manifest formats:

  Simple (backwards-compatible):
    requires_env:
      - MY_API_KEY

  Rich (with metadata):
    requires_env:
      - name: MY_API_KEY
        description: "API key for Acme"
        url: "https://acme.com/keys"
        secret: true

Already-set variables are skipped. Empty input skips gracefully.
Secret values use getpass (hidden input). Ctrl+C aborts remaining
prompts without error.
---
 hermes_cli/plugins_cmd.py |  81 +++++++++++++++++++++++++++
 tests/test_plugins_cmd.py | 112 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 193 insertions(+)

diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index 68a31544..e794a3a1 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -147,6 +147,81 @@ def _copy_example_files(plugin_dir: Path, console) -> None:
                 )
 
 
+def _prompt_plugin_env_vars(manifest: dict, console) -> None:
+    """Prompt for required environment variables declared in plugin.yaml.
+
+    ``requires_env`` accepts two formats:
+
+    Simple list (backwards-compatible)::
+
+        requires_env:
+          - MY_API_KEY
+
+    Rich list with metadata::
+
+        requires_env:
+          - name: MY_API_KEY
+            description: "API key for Acme service"
+            url: "https://acme.com/keys"
+            secret: true
+
+    Already-set variables are skipped.  Values are saved to ``~/.hermes/.env``.
+    """
+    requires_env = manifest.get("requires_env") or []
+    if not requires_env:
+        return
+
+    from hermes_cli.config import get_env_value, save_env_value  # noqa: F811
+
+    # Normalise to list-of-dicts
+    env_specs: list[dict] = []
+    for entry in requires_env:
+        if isinstance(entry, str):
+            env_specs.append({"name": entry})
+        elif isinstance(entry, dict) and entry.get("name"):
+            env_specs.append(entry)
+
+    # Filter to only vars that aren't already set
+    missing = [s for s in env_specs if not get_env_value(s["name"])]
+    if not missing:
+        return
+
+    plugin_name = manifest.get("name", "this plugin")
+    console.print(f"\n[bold]{plugin_name}[/bold] requires the following environment variables:\n")
+
+    for spec in missing:
+        name = spec["name"]
+        desc = spec.get("description", "")
+        url = spec.get("url", "")
+        secret = spec.get("secret", False)
+
+        label = f"  {name}"
+        if desc:
+            label += f" — {desc}"
+        console.print(label)
+        if url:
+            console.print(f"  [dim]Get yours at: {url}[/dim]")
+
+        try:
+            if secret:
+                import getpass
+                value = getpass.getpass(f"  {name}: ").strip()
+            else:
+                value = input(f"  {name}: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            console.print("\n[dim]  Skipped (you can set these later in ~/.hermes/.env)[/dim]")
+            return
+
+        if value:
+            save_env_value(name, value)
+            os.environ[name] = value
+            console.print(f"  [green]✓[/green] Saved to ~/.hermes/.env")
+        else:
+            console.print(f"  [dim]  Skipped (set {name} in ~/.hermes/.env later)[/dim]")
+
+    console.print()
+
+
 def _display_after_install(plugin_dir: Path, identifier: str) -> None:
     """Show after-install.md if it exists, otherwise a default message."""
     from rich.console import Console
@@ -306,6 +381,12 @@ def cmd_install(identifier: str, force: bool = False) -> None:
     # Copy .example files to their real names (e.g. config.yaml.example → config.yaml)
     _copy_example_files(target, console)
 
+    # Re-read manifest from installed location (for env var prompting)
+    installed_manifest = _read_manifest(target)
+
+    # Prompt for required environment variables before showing after-install docs
+    _prompt_plugin_env_vars(installed_manifest, console)
+
     _display_after_install(target, identifier)
 
     console.print("[dim]Restart the gateway for the plugin to take effect:[/dim]")
diff --git a/tests/test_plugins_cmd.py b/tests/test_plugins_cmd.py
index 492f94ad..b3d3eb7b 100644
--- a/tests/test_plugins_cmd.py
+++ b/tests/test_plugins_cmd.py
@@ -443,3 +443,115 @@ class TestCopyExampleFiles:
 
         # Should have printed a warning
         assert any("Warning" in str(c) for c in console.print.call_args_list)
+
+
+class TestPromptPluginEnvVars:
+    """Tests for _prompt_plugin_env_vars."""
+
+    def test_skips_when_no_requires_env(self):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock
+
+        console = MagicMock()
+        _prompt_plugin_env_vars({}, console)
+        console.print.assert_not_called()
+
+    def test_skips_already_set_vars(self, monkeypatch):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock, patch
+
+        console = MagicMock()
+        with patch("hermes_cli.config.get_env_value", return_value="already-set"):
+            _prompt_plugin_env_vars({"requires_env": ["MY_KEY"]}, console)
+        # No prompt should appear — all vars are set
+        console.print.assert_not_called()
+
+    def test_prompts_for_missing_var_simple_format(self):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock, patch
+
+        console = MagicMock()
+        manifest = {
+            "name": "test_plugin",
+            "requires_env": ["MY_API_KEY"],
+        }
+
+        with patch("hermes_cli.config.get_env_value", return_value=None), \
+             patch("builtins.input", return_value="sk-test-123"), \
+             patch("hermes_cli.config.save_env_value") as mock_save:
+            _prompt_plugin_env_vars(manifest, console)
+
+        mock_save.assert_called_once_with("MY_API_KEY", "sk-test-123")
+
+    def test_prompts_for_missing_var_rich_format(self):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock, patch
+
+        console = MagicMock()
+        manifest = {
+            "name": "langfuse_tracing",
+            "requires_env": [
+                {
+                    "name": "LANGFUSE_PUBLIC_KEY",
+                    "description": "Public key",
+                    "url": "https://langfuse.com",
+                    "secret": False,
+                },
+            ],
+        }
+
+        with patch("hermes_cli.config.get_env_value", return_value=None), \
+             patch("builtins.input", return_value="pk-lf-123"), \
+             patch("hermes_cli.config.save_env_value") as mock_save:
+            _prompt_plugin_env_vars(manifest, console)
+
+        mock_save.assert_called_once_with("LANGFUSE_PUBLIC_KEY", "pk-lf-123")
+        # Should show url hint
+        printed = " ".join(str(c) for c in console.print.call_args_list)
+        assert "langfuse.com" in printed
+
+    def test_secret_uses_getpass(self):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock, patch
+
+        console = MagicMock()
+        manifest = {
+            "name": "test",
+            "requires_env": [{"name": "SECRET_KEY", "secret": True}],
+        }
+
+        with patch("hermes_cli.config.get_env_value", return_value=None), \
+             patch("getpass.getpass", return_value="s3cret") as mock_gp, \
+             patch("hermes_cli.config.save_env_value"):
+            _prompt_plugin_env_vars(manifest, console)
+
+        mock_gp.assert_called_once()
+
+    def test_empty_input_skips(self):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock, patch
+
+        console = MagicMock()
+        manifest = {"name": "test", "requires_env": ["OPTIONAL_VAR"]}
+
+        with patch("hermes_cli.config.get_env_value", return_value=None), \
+             patch("builtins.input", return_value=""), \
+             patch("hermes_cli.config.save_env_value") as mock_save:
+            _prompt_plugin_env_vars(manifest, console)
+
+        mock_save.assert_not_called()
+
+    def test_keyboard_interrupt_skips_gracefully(self):
+        from hermes_cli.plugins_cmd import _prompt_plugin_env_vars
+        from unittest.mock import MagicMock, patch
+
+        console = MagicMock()
+        manifest = {"name": "test", "requires_env": ["KEY1", "KEY2"]}
+
+        with patch("hermes_cli.config.get_env_value", return_value=None), \
+             patch("builtins.input", side_effect=KeyboardInterrupt), \
+             patch("hermes_cli.config.save_env_value") as mock_save:
+            _prompt_plugin_env_vars(manifest, console)
+
+        # Should not crash, and not save anything
+        mock_save.assert_not_called()

From 631d1598646d021a600cae56e5b3366b28d813c4 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 16:40:15 -0700
Subject: [PATCH 003/154] fix: use display_hermes_home() for profile-aware
 paths in plugin env prompts

Follow-up to PR #5470. Replaces hardcoded ~/.hermes/.env references with
display_hermes_home() for correct behavior under profiles. Also updates
PluginManifest.requires_env type hint to List[Union[str, Dict[str, Any]]]
to document the rich format introduced in #5470.
---
 hermes_cli/plugins.py     | 4 ++--
 hermes_cli/plugins_cmd.py | 9 +++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index 73591443..ce57695f 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -36,7 +36,7 @@ import sys
 import types
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Optional, Set, Union
 
 from utils import env_var_enabled
 
@@ -95,7 +95,7 @@ class PluginManifest:
     version: str = ""
     description: str = ""
     author: str = ""
-    requires_env: List[str] = field(default_factory=list)
+    requires_env: List[Union[str, Dict[str, Any]]] = field(default_factory=list)
     provides_tools: List[str] = field(default_factory=list)
     provides_hooks: List[str] = field(default_factory=list)
     source: str = ""        # "user", "project", or "entrypoint"
diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index e794a3a1..bd6d7fab 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -165,13 +165,14 @@ def _prompt_plugin_env_vars(manifest: dict, console) -> None:
             url: "https://acme.com/keys"
             secret: true
 
-    Already-set variables are skipped.  Values are saved to ``~/.hermes/.env``.
+    Already-set variables are skipped.  Values are saved to the user's ``.env``.
     """
     requires_env = manifest.get("requires_env") or []
     if not requires_env:
         return
 
     from hermes_cli.config import get_env_value, save_env_value  # noqa: F811
+    from hermes_constants import display_hermes_home
 
     # Normalise to list-of-dicts
     env_specs: list[dict] = []
@@ -209,15 +210,15 @@ def _prompt_plugin_env_vars(manifest: dict, console) -> None:
             else:
                 value = input(f"  {name}: ").strip()
         except (EOFError, KeyboardInterrupt):
-            console.print("\n[dim]  Skipped (you can set these later in ~/.hermes/.env)[/dim]")
+            console.print(f"\n[dim]  Skipped (you can set these later in {display_hermes_home()}/.env)[/dim]")
             return
 
         if value:
             save_env_value(name, value)
             os.environ[name] = value
-            console.print(f"  [green]✓[/green] Saved to ~/.hermes/.env")
+            console.print(f"  [green]✓[/green] Saved to {display_hermes_home()}/.env")
         else:
-            console.print(f"  [dim]  Skipped (set {name} in ~/.hermes/.env later)[/dim]")
+            console.print(f"  [dim]  Skipped (set {name} in {display_hermes_home()}/.env later)[/dim]")
 
     console.print()
 

From 4f03b9a419bcba023bcdb2fd5e9ef9ee57f46f9a Mon Sep 17 00:00:00 2001
From: Nick <git@flybynight.io>
Date: Mon, 6 Apr 2026 08:26:39 +1200
Subject: [PATCH 004/154] feat(webhook): add {__raw__} template token and
 thread_id passthrough for forum topics

- {__raw__} in webhook prompt templates dumps the full JSON payload (truncated at 4000 chars)
- _deliver_cross_platform now passes thread_id/message_thread_id from deliver_extra as metadata, enabling Telegram forum topic delivery
- Tests for both features
---
 gateway/platforms/webhook.py              |  15 +++-
 tests/gateway/test_webhook_adapter.py     | 104 ++++++++++++++++++++++
 tests/gateway/test_webhook_integration.py |   2 +-
 3 files changed, 119 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index 5f7c78cf..ae2e7f27 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -484,6 +484,10 @@ class WebhookAdapter(BasePlatformAdapter):
 
         Supports dot-notation access into nested dicts:
         ``{pull_request.title}`` → ``payload["pull_request"]["title"]``
+
+        Special token ``{__raw__}`` dumps the entire payload as indented
+        JSON (truncated to 4000 chars).  Useful for monitoring alerts or
+        any webhook where the agent needs to see the full payload.
         """
         if not template:
             truncated = json.dumps(payload, indent=2)[:4000]
@@ -494,6 +498,9 @@ class WebhookAdapter(BasePlatformAdapter):
 
         def _resolve(match: re.Match) -> str:
             key = match.group(1)
+            # Special token: dump the entire payload as JSON
+            if key == "__raw__":
+                return json.dumps(payload, indent=2)[:4000]
             value: Any = payload
             for part in key.split("."):
                 if isinstance(value, dict):
@@ -613,4 +620,10 @@ class WebhookAdapter(BasePlatformAdapter):
                     error=f"No chat_id or home channel for {platform_name}",
                 )
 
-        return await adapter.send(chat_id, content)
+        # Pass thread_id from deliver_extra so Telegram forum topics work
+        metadata = None
+        thread_id = extra.get("message_thread_id") or extra.get("thread_id")
+        if thread_id:
+            metadata = {"thread_id": thread_id}
+
+        return await adapter.send(chat_id, content, metadata=metadata)
diff --git a/tests/gateway/test_webhook_adapter.py b/tests/gateway/test_webhook_adapter.py
index 9b8a9131..f323b95a 100644
--- a/tests/gateway/test_webhook_adapter.py
+++ b/tests/gateway/test_webhook_adapter.py
@@ -617,3 +617,107 @@ class TestCheckRequirements:
     @patch("gateway.platforms.webhook.AIOHTTP_AVAILABLE", False)
     def test_returns_false_without_aiohttp(self):
         assert check_webhook_requirements() is False
+
+
+# ===================================================================
+# __raw__ template token
+# ===================================================================
+
+
+class TestRawTemplateToken:
+    """Tests for the {__raw__} special token in _render_prompt."""
+
+    def test_raw_resolves_to_full_json_payload(self):
+        """{__raw__} in a template dumps the entire payload as JSON."""
+        adapter = _make_adapter()
+        payload = {"action": "opened", "number": 42}
+        result = adapter._render_prompt(
+            "Payload: {__raw__}", payload, "push", "test"
+        )
+        expected_json = json.dumps(payload, indent=2)
+        assert result == f"Payload: {expected_json}"
+
+    def test_raw_truncated_at_4000_chars(self):
+        """{__raw__} output is truncated at 4000 characters for large payloads."""
+        adapter = _make_adapter()
+        # Build a payload whose JSON repr exceeds 4000 chars
+        payload = {"data": "x" * 5000}
+        result = adapter._render_prompt("{__raw__}", payload, "push", "test")
+        assert len(result) <= 4000
+
+    def test_raw_mixed_with_other_variables(self):
+        """{__raw__} can be mixed with regular template variables."""
+        adapter = _make_adapter()
+        payload = {"action": "closed", "number": 7}
+        result = adapter._render_prompt(
+            "Action={action} Raw={__raw__}", payload, "push", "test"
+        )
+        assert result.startswith("Action=closed Raw=")
+        assert '"action": "closed"' in result
+        assert '"number": 7' in result
+
+
+# ===================================================================
+# Cross-platform delivery thread_id passthrough
+# ===================================================================
+
+
+class TestDeliverCrossPlatformThreadId:
+    """Tests for thread_id passthrough in _deliver_cross_platform."""
+
+    def _setup_adapter_with_mock_target(self):
+        """Set up a webhook adapter with a mocked gateway_runner and target adapter."""
+        adapter = _make_adapter()
+        mock_target = AsyncMock()
+        mock_target.send = AsyncMock(return_value=SendResult(success=True))
+
+        mock_runner = MagicMock()
+        mock_runner.adapters = {Platform("telegram"): mock_target}
+        mock_runner.config.get_home_channel.return_value = None
+
+        adapter.gateway_runner = mock_runner
+        return adapter, mock_target
+
+    @pytest.mark.asyncio
+    async def test_thread_id_passed_as_metadata(self):
+        """thread_id from deliver_extra is passed as metadata to adapter.send()."""
+        adapter, mock_target = self._setup_adapter_with_mock_target()
+        delivery = {
+            "deliver_extra": {
+                "chat_id": "12345",
+                "thread_id": "999",
+            }
+        }
+        await adapter._deliver_cross_platform("telegram", "hello", delivery)
+        mock_target.send.assert_awaited_once_with(
+            "12345", "hello", metadata={"thread_id": "999"}
+        )
+
+    @pytest.mark.asyncio
+    async def test_message_thread_id_passed_as_thread_id(self):
+        """message_thread_id from deliver_extra is mapped to thread_id in metadata."""
+        adapter, mock_target = self._setup_adapter_with_mock_target()
+        delivery = {
+            "deliver_extra": {
+                "chat_id": "12345",
+                "message_thread_id": "888",
+            }
+        }
+        await adapter._deliver_cross_platform("telegram", "hello", delivery)
+        mock_target.send.assert_awaited_once_with(
+            "12345", "hello", metadata={"thread_id": "888"}
+        )
+
+    @pytest.mark.asyncio
+    async def test_no_thread_id_sends_no_metadata(self):
+        """When no thread_id is present, metadata is None."""
+        adapter, mock_target = self._setup_adapter_with_mock_target()
+        delivery = {
+            "deliver_extra": {
+                "chat_id": "12345",
+            }
+        }
+        await adapter._deliver_cross_platform("telegram", "hello", delivery)
+        mock_target.send.assert_awaited_once_with(
+            "12345", "hello", metadata=None
+        )
diff --git a/tests/gateway/test_webhook_integration.py b/tests/gateway/test_webhook_integration.py
index 14b9b697..89998981 100644
--- a/tests/gateway/test_webhook_integration.py
+++ b/tests/gateway/test_webhook_integration.py
@@ -257,7 +257,7 @@ class TestCrossPlatformDelivery:
 
         assert result.success is True
         mock_tg_adapter.send.assert_awaited_once_with(
-            "12345", "I've acknowledged the alert."
+            "12345", "I've acknowledged the alert.", metadata=None
         )
         # Delivery info should be cleaned up
         assert chat_id not in adapter._delivery_info

From f071b1832a446f09137fea625045bb4be3a0d781 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 16:43:31 -0700
Subject: [PATCH 005/154] docs: document rich requires_env format and
 install-time prompting

Updates the plugin build guide and features page to reflect the
interactive env var prompting added in PR #5470. Documents the rich
manifest format (name/description/url/secret) alongside the simple
string format.
---
 website/docs/guides/build-a-hermes-plugin.md | 32 ++++++++++++++++++--
 website/docs/user-guide/features/plugins.md  |  2 +-
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index 7a63ac04..e0a7f662 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -44,8 +44,12 @@ This tells Hermes: "I'm a plugin called calculator, I provide tools and hooks."
 Optional fields you could add:
 ```yaml
 author: Your Name
-requires_env:          # gate loading on env vars
-  - SOME_API_KEY       # plugin disabled if missing
+requires_env:          # gate loading on env vars; prompted during install
+  - SOME_API_KEY       # simple format — plugin disabled if missing
+  - name: OTHER_KEY    # rich format — shows description/url during install
+    description: "Key for the Other service"
+    url: "https://other.com/keys"
+    secret: true
 ```
 
 ## Step 3: Write the tool schemas
@@ -336,13 +340,35 @@ def register(ctx):
 If your plugin needs an API key:
 
 ```yaml
-# plugin.yaml
+# plugin.yaml — simple format (backwards-compatible)
 requires_env:
   - WEATHER_API_KEY
 ```
 
 If `WEATHER_API_KEY` isn't set, the plugin is disabled with a clear message. No crash, no error in the agent — just "Plugin weather disabled (missing: WEATHER_API_KEY)".
 
+When users run `hermes plugins install`, they're **prompted interactively** for any missing `requires_env` variables. Values are saved to `.env` automatically.
+
+For a better install experience, use the rich format with descriptions and signup URLs:
+
+```yaml
+# plugin.yaml — rich format
+requires_env:
+  - name: WEATHER_API_KEY
+    description: "API key for OpenWeather"
+    url: "https://openweathermap.org/api"
+    secret: true
+```
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `name` | Yes | Environment variable name |
+| `description` | No | Shown to user during install prompt |
+| `url` | No | Where to get the credential |
+| `secret` | No | If `true`, input is hidden (like a password field) |
+
+Both formats can be mixed in the same list. Already-set variables are skipped silently.
+
 ### Conditional tool availability
 
 For tools that depend on optional libraries:
diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md
index 18191cb7..a8f984fe 100644
--- a/website/docs/user-guide/features/plugins.md
+++ b/website/docs/user-guide/features/plugins.md
@@ -87,7 +87,7 @@ Project-local plugins under `./.hermes/plugins/` are disabled by default. Enable
 | Inject messages | `ctx.inject_message(content, role="user")` — see [Injecting Messages](#injecting-messages) |
 | Ship data files | `Path(__file__).parent / "data" / "file.yaml"` |
 | Bundle skills | Copy `skill.md` to `~/.hermes/skills/` at load time |
-| Gate on env vars | `requires_env: [API_KEY]` in plugin.yaml |
+| Gate on env vars | `requires_env: [API_KEY]` in plugin.yaml — prompted during `hermes plugins install` |
 | Distribute via pip | `[project.entry-points."hermes_agent.plugins"]` |
 
 ## Plugin discovery

From e7698521e7ff98b8344c3c683b7cd3235d9cf07a Mon Sep 17 00:00:00 2001
From: dagbs <dagbs@users.noreply.github.com>
Date: Mon, 6 Apr 2026 16:45:13 -0700
Subject: [PATCH 006/154] fix(openviking): add atexit safety net for session
 commit

Ensures pending sessions are committed on process exit even if
shutdown_memory_provider is never called (gateway crash, SIGKILL,
or exception in _async_flush_memories preventing shutdown).

Also reorders on_session_end to wait for the pending sync thread
before checking turn_count, so the last turn's messages are flushed.

Based on PR #4919 by dagbs.
---
 plugins/memory/openviking/__init__.py | 42 +++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py
index 410979a0..596080c3 100644
--- a/plugins/memory/openviking/__init__.py
+++ b/plugins/memory/openviking/__init__.py
@@ -23,6 +23,7 @@ Capabilities:
 
 from __future__ import annotations
 
+import atexit
 import json
 import logging
 import os
@@ -37,6 +38,30 @@ _DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
 _TIMEOUT = 30.0
 
 
+# ---------------------------------------------------------------------------
+# Process-level atexit safety net — ensures pending sessions are committed
+# even if shutdown_memory_provider is never called (e.g. gateway crash,
+# SIGKILL, or exception in _async_flush_memories preventing shutdown).
+# ---------------------------------------------------------------------------
+_last_active_provider: Optional["OpenVikingMemoryProvider"] = None
+
+
+def _atexit_commit_sessions():
+    """Fire on_session_end for the last active provider on process exit."""
+    global _last_active_provider
+    provider = _last_active_provider
+    if provider is None:
+        return
+    _last_active_provider = None
+    try:
+        provider.on_session_end([])
+    except Exception:
+        pass  # best-effort at shutdown time
+
+
+atexit.register(_atexit_commit_sessions)
+
+
 # ---------------------------------------------------------------------------
 # HTTP helper — uses httpx to avoid requiring the openviking SDK
 # ---------------------------------------------------------------------------
@@ -277,6 +302,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
             logger.warning("httpx not installed — OpenViking plugin disabled")
             self._client = None
 
+        # Register as the last active provider for atexit safety net
+        global _last_active_provider
+        _last_active_provider = self
+
     def system_prompt_block(self) -> str:
         if not self._client:
             return ""
@@ -387,13 +416,18 @@ class OpenVikingMemoryProvider(MemoryProvider):
         OpenViking automatically extracts 6 categories of memories:
         profile, preferences, entities, events, cases, and patterns.
         """
-        if not self._client or self._turn_count == 0:
+        if not self._client:
             return
 
-        # Wait for any pending sync to finish first
+        # Wait for any pending sync to finish first — do this before the
+        # turn_count check so the last turn's messages are flushed even if
+        # the count hasn't been incremented yet.
         if self._sync_thread and self._sync_thread.is_alive():
             self._sync_thread.join(timeout=10.0)
 
+        if self._turn_count == 0:
+            return
+
         try:
             self._client.post(f"/api/v1/sessions/{self._session_id}/commit")
             logger.info("OpenViking session %s committed (%d turns)", self._session_id, self._turn_count)
@@ -449,6 +483,10 @@ class OpenVikingMemoryProvider(MemoryProvider):
         for t in (self._sync_thread, self._prefetch_thread):
             if t and t.is_alive():
                 t.join(timeout=5.0)
+        # Clear atexit reference so it doesn't double-commit
+        global _last_active_provider
+        if _last_active_provider is self:
+            _last_active_provider = None
 
     # -- Tool implementations ------------------------------------------------
 

From e131f13662d46b266dec1d666d5ac3ad59953725 Mon Sep 17 00:00:00 2001
From: Simon Brumfield <simon@bluebeast.co>
Date: Mon, 6 Apr 2026 17:39:52 -0400
Subject: [PATCH 007/154] fix(doctor): use recall_mode instead of memory_mode
 on HonchoClientConfig

---
 hermes_cli/doctor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 40cbfe20..08cf7233 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -836,7 +836,7 @@ def run_doctor(args):
                 get_honcho_client(hcfg)
                 check_ok(
                     "Honcho connected",
-                    f"workspace={hcfg.workspace_id} mode={hcfg.memory_mode} freq={hcfg.write_frequency}",
+                    f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
                 )
             except Exception as _e:
                 check_fail("Honcho connection failed", str(_e))

From 77610961be12c369d6b33a4948149b703b46e773 Mon Sep 17 00:00:00 2001
From: KangYu <richardowen7212@gmail.com>
Date: Tue, 7 Apr 2026 04:48:15 +0900
Subject: [PATCH 008/154] Lower Telegram fallback activation log to info

---
 gateway/platforms/telegram.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index b4638703..7575c10f 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -518,7 +518,7 @@ class TelegramAdapter(BasePlatformAdapter):
                     ", ".join(fallback_ips),
                 )
             if fallback_ips:
-                logger.warning(
+                logger.info(
                     "[%s] Telegram fallback IPs active: %s",
                     self.name,
                     ", ".join(fallback_ips),

From 3b4dfc8e226cb7d703cd669073a1af1101ceca27 Mon Sep 17 00:00:00 2001
From: charliekerfoot <charliekerfoot@gmail.com>
Date: Mon, 6 Apr 2026 18:25:54 -0500
Subject: [PATCH 009/154] fix(tools): portable base64 encoding for image
 reading on macOS

---
 tools/file_operations.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/file_operations.py b/tools/file_operations.py
index 4202e797..8305eb9c 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -580,8 +580,10 @@ class ShellFileOperations(FileOperations):
                 ),
             )
         
-        # Get base64 content
-        b64_cmd = f"base64 -w 0 {self._escape_shell_arg(path)} 2>/dev/null"
+        # Get base64 content — pipe through tr to strip newlines portably.
+        # GNU base64 supports -w 0 but macOS base64 does not; both wrap by
+        # default, so stripping with tr is portable across all backends.
+        b64_cmd = f"base64 {self._escape_shell_arg(path)} 2>/dev/null | tr -d '\\n'"
         b64_result = self._exec(b64_cmd, timeout=30)
         
         if b64_result.exit_code != 0:

From ea16949422768d759941272f630c0e25e40f9689 Mon Sep 17 00:00:00 2001
From: Myeongwon Choi <auspic7@gmail.com>
Date: Tue, 7 Apr 2026 07:43:30 +0900
Subject: [PATCH 010/154] fix(cron): suppress delivery when [SILENT] appears
 anywhere in response

Previously the scheduler checked startswith('[SILENT]'), so agents that
appended [SILENT] after an explanation (e.g. 'N items filtered.\n\n[SILENT]')
would still trigger delivery.

Change the check to 'in' so the marker is caught regardless of position.
Add test_silent_trailing_suppresses_delivery to cover this case.
---
 cron/scheduler.py            |  2 +-
 tests/cron/test_scheduler.py | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index 63018d6f..5f3feba0 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -817,7 +817,7 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                 # output is already saved above).  Failed jobs always deliver.
                 deliver_content = final_response if success else f"⚠️ Cron job '{job.get('name', job['id'])}' failed:\n{error}"
                 should_deliver = bool(deliver_content)
-                if should_deliver and success and deliver_content.strip().upper().startswith(SILENT_MARKER):
+                if should_deliver and success and SILENT_MARKER in deliver_content.strip().upper():
                     logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
                     should_deliver = False
 
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 33f265de..c1282897 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -709,6 +709,18 @@ class TestSilentDelivery:
             tick(verbose=False)
         deliver_mock.assert_not_called()
 
+    def test_silent_trailing_suppresses_delivery(self):
+        """Agent appended [SILENT] after explanation text — must still suppress."""
+        response = "2 deals filtered out (like<10, reply<15).\n\n[SILENT]"
+        with patch("cron.scheduler.get_due_jobs", return_value=[self._make_job()]), \
+             patch("cron.scheduler.run_job", return_value=(True, "# output", response, None)), \
+             patch("cron.scheduler.save_job_output", return_value="/tmp/out.md"), \
+             patch("cron.scheduler._deliver_result") as deliver_mock, \
+             patch("cron.scheduler.mark_job_run"):
+            from cron.scheduler import tick
+            tick(verbose=False)
+        deliver_mock.assert_not_called()
+
     def test_silent_is_case_insensitive(self):
         with patch("cron.scheduler.get_due_jobs", return_value=[self._make_job()]), \
              patch("cron.scheduler.run_job", return_value=(True, "# output", "[silent] nothing new", None)), \

From 0f9aa570695df978d8256b93a462c1c443e4769f Mon Sep 17 00:00:00 2001
From: ryanautomated <ryanautomated@proton.me>
Date: Mon, 6 Apr 2026 22:10:47 +0100
Subject: [PATCH 011/154] fix: silent memory flush failure on /new and /resume
 commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The _async_flush_memories() helper accepts (session_id) but both the
/new and /resume handlers passed two arguments (session_id, session_key).
The TypeError was silently swallowed at DEBUG level, so memory extraction
never ran when users typed /new or /resume.

One call site (the session expiry watcher) was already fixed in 9c96f669,
but /new and /resume were missed.

- gateway/run.py:3247 — remove stray session_key from /new handler
- gateway/run.py:4989 — remove stray session_key from /resume handler
- tests/gateway/test_resume_command.py:222 — update test assertion
---
 gateway/run.py                       | 4 ++--
 tests/gateway/test_resume_command.py | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 731bc8c0..82cb10b4 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3244,7 +3244,7 @@ class GatewayRunner:
             old_entry = self.session_store._entries.get(session_key)
             if old_entry:
                 _flush_task = asyncio.create_task(
-                    self._async_flush_memories(old_entry.session_id, session_key)
+                    self._async_flush_memories(old_entry.session_id)
                 )
                 self._background_tasks.add(_flush_task)
                 _flush_task.add_done_callback(self._background_tasks.discard)
@@ -4990,7 +4990,7 @@ class GatewayRunner:
         # Flush memories for current session before switching
         try:
             _flush_task = asyncio.create_task(
-                self._async_flush_memories(current_entry.session_id, session_key)
+                self._async_flush_memories(current_entry.session_id)
             )
             self._background_tasks.add(_flush_task)
             _flush_task.add_done_callback(self._background_tasks.discard)
diff --git a/tests/gateway/test_resume_command.py b/tests/gateway/test_resume_command.py
index 739bc149..dc788f74 100644
--- a/tests/gateway/test_resume_command.py
+++ b/tests/gateway/test_resume_command.py
@@ -201,8 +201,8 @@ class TestHandleResumeCommand:
         db.close()
 
     @pytest.mark.asyncio
-    async def test_resume_flushes_memories_with_gateway_session_key(self, tmp_path):
-        """Resume should preserve the gateway session key for Honcho flushes."""
+    async def test_resume_flushes_memories(self, tmp_path):
+        """Resume should flush memories from the current session before switching."""
         from hermes_state import SessionDB
 
         db = SessionDB(db_path=tmp_path / "state.db")
@@ -221,6 +221,5 @@ class TestHandleResumeCommand:
 
         runner._async_flush_memories.assert_called_once_with(
             "current_session_001",
-            _session_key_for_event(event),
         )
         db.close()

From 3282b7066c7c49e8be4d8679d0148aef6baf5483 Mon Sep 17 00:00:00 2001
From: Neri Cervin <nericervin@gmail.com>
Date: Mon, 6 Apr 2026 16:32:56 -0300
Subject: [PATCH 012/154] fix(mattermost): set message type to DOCUMENT when
 post has file attachments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Mattermost adapter downloads file attachments correctly but
never updates msg_type from TEXT to DOCUMENT. This means the
document enrichment block in gateway/run.py (which requires
MessageType.DOCUMENT) never executes — text files are not
inlined, and the agent is never notified about attached files.

The user sends a file, the adapter downloads it to the local
cache, but the agent sees an empty message and responds with
'I didn't receive any file'.

Set msg_type to DOCUMENT when file_ids is non-empty, matching
the behavior of the Telegram and Discord adapters.
---
 gateway/platforms/mattermost.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py
index 7c9c2d29..0e689bed 100644
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@@ -661,6 +661,8 @@ class MattermostAdapter(BasePlatformAdapter):
         msg_type = MessageType.TEXT
         if message_text.startswith("/"):
             msg_type = MessageType.COMMAND
+        elif file_ids:
+            msg_type = MessageType.DOCUMENT
 
         # Download file attachments immediately (URLs require auth headers
         # that downstream tools won't have).

From f3ae2491a3f9b315d0dd5bdffe58aae6783ae529 Mon Sep 17 00:00:00 2001
From: Neri Cervin <nericervin@gmail.com>
Date: Mon, 6 Apr 2026 17:03:50 -0300
Subject: [PATCH 013/154] fix: detect correct message type from file mime
 instead of blanket DOCUMENT

Images need PHOTO for vision, audio needs VOICE for STT,
and other files get DOCUMENT for text inlining.
---
 gateway/platforms/mattermost.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py
index 0e689bed..95702603 100644
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@@ -661,8 +661,6 @@ class MattermostAdapter(BasePlatformAdapter):
         msg_type = MessageType.TEXT
         if message_text.startswith("/"):
             msg_type = MessageType.COMMAND
-        elif file_ids:
-            msg_type = MessageType.DOCUMENT
 
         # Download file attachments immediately (URLs require auth headers
         # that downstream tools won't have).
@@ -703,6 +701,15 @@ class MattermostAdapter(BasePlatformAdapter):
             except Exception as exc:
                 logger.warning("Mattermost: error downloading file %s: %s", fid, exc)
 
+        # Set message type based on downloaded media types.
+        if media_types and msg_type == MessageType.TEXT:
+            if any(m.startswith("image/") for m in media_types):
+                msg_type = MessageType.PHOTO
+            elif any(m.startswith("audio/") for m in media_types):
+                msg_type = MessageType.VOICE
+            elif media_types:
+                msg_type = MessageType.DOCUMENT
+
         source = self.build_source(
             chat_id=channel_id,
             chat_type=chat_type,

From c1818b7e9ee2cf03d2ea7ddfa7a74d658ce98faa Mon Sep 17 00:00:00 2001
From: WAXLYY <ysfwaxlycan@gmail.com>
Date: Tue, 7 Apr 2026 01:02:56 +0300
Subject: [PATCH 014/154] fix(tools): redact query secrets in send_message
 errors

---
 .../test_send_message_missing_platforms.py    | 23 +++++
 tests/tools/test_send_message_tool.py         | 27 ++++++
 tools/send_message_tool.py                    | 93 ++++++++++++-------
 3 files changed, 111 insertions(+), 32 deletions(-)

diff --git a/tests/tools/test_send_message_missing_platforms.py b/tests/tools/test_send_message_missing_platforms.py
index 8943109e..881ae33d 100644
--- a/tests/tools/test_send_message_missing_platforms.py
+++ b/tests/tools/test_send_message_missing_platforms.py
@@ -314,6 +314,29 @@ class TestSendDingtalk:
         assert "error" in result
         assert "DingTalk send failed" in result["error"]
 
+    def test_http_error_redacts_access_token_in_exception_text(self):
+        token = "supersecret-access-token-123456789"
+        resp = self._make_httpx_resp(status_code=401)
+        resp.raise_for_status = MagicMock(
+            side_effect=Exception(
+                f"POST https://oapi.dingtalk.com/robot/send?access_token={token} returned 401"
+            )
+        )
+        client_ctx, _ = self._make_httpx_client(resp)
+
+        with patch("httpx.AsyncClient", return_value=client_ctx):
+            result = asyncio.run(
+                _send_dingtalk(
+                    {"webhook_url": f"https://oapi.dingtalk.com/robot/send?access_token={token}"},
+                    "ch",
+                    "hi",
+                )
+            )
+
+        assert "error" in result
+        assert token not in result["error"]
+        assert "access_token=***" in result["error"]
+
     def test_missing_config(self):
         with patch.dict(os.environ, {"DINGTALK_WEBHOOK_URL": ""}, clear=False):
             result = asyncio.run(_send_dingtalk({}, "ch", "hi"))
diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py
index 7b4643af..34cea278 100644
--- a/tests/tools/test_send_message_tool.py
+++ b/tests/tools/test_send_message_tool.py
@@ -276,6 +276,33 @@ class TestSendMessageTool:
             thread_id=None,
         )
 
+    def test_top_level_send_failure_redacts_query_token(self):
+        config, _telegram_cfg = _make_config()
+        leaked = "very-secret-query-token-123456"
+
+        def _raise_and_close(coro):
+            coro.close()
+            raise RuntimeError(
+                f"transport error: https://api.example.com/send?access_token={leaked}"
+            )
+
+        with patch("gateway.config.load_gateway_config", return_value=config), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch("model_tools._run_async", side_effect=_raise_and_close):
+            result = json.loads(
+                send_message_tool(
+                    {
+                        "action": "send",
+                        "target": "telegram:-1001",
+                        "message": "hello",
+                    }
+                )
+            )
+
+        assert "error" in result
+        assert leaked not in result["error"]
+        assert "access_token=***" in result["error"]
+
 
 class TestSendTelegramMediaDelivery:
     def test_sends_text_then_photo_for_media_tag(self, tmp_path, monkeypatch):
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 32741f08..eff0e7b5 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -12,6 +12,8 @@ import re
 import ssl
 import time
 
+from agent.redact import redact_sensitive_text
+
 logger = logging.getLogger(__name__)
 
 _TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
@@ -20,6 +22,27 @@ _IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
 _VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"}
 _AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
 _VOICE_EXTS = {".ogg", ".opus"}
+_URL_SECRET_QUERY_RE = re.compile(
+    r"([?&](?:access_token|api[_-]?key|auth[_-]?token|token|signature|sig)=)([^&#\s]+)",
+    re.IGNORECASE,
+)
+_GENERIC_SECRET_ASSIGN_RE = re.compile(
+    r"\b(access_token|api[_-]?key|auth[_-]?token|signature|sig)\s*=\s*([^\s,;]+)",
+    re.IGNORECASE,
+)
+
+
+def _sanitize_error_text(text) -> str:
+    """Redact secrets from error text before surfacing it to users/models."""
+    redacted = redact_sensitive_text(text)
+    redacted = _URL_SECRET_QUERY_RE.sub(lambda m: f"{m.group(1)}***", redacted)
+    redacted = _GENERIC_SECRET_ASSIGN_RE.sub(lambda m: f"{m.group(1)}=***", redacted)
+    return redacted
+
+
+def _error(message: str) -> dict:
+    """Build a standardized error payload with redacted content."""
+    return {"error": _sanitize_error_text(message)}
 
 
 SEND_MESSAGE_SCHEMA = {
@@ -70,7 +93,7 @@ def _handle_list():
         from gateway.channel_directory import format_directory_for_display
         return json.dumps({"targets": format_directory_for_display()})
     except Exception as e:
-        return json.dumps({"error": f"Failed to load channel directory: {e}"})
+        return json.dumps(_error(f"Failed to load channel directory: {e}"))
 
 
 def _handle_send(args):
@@ -117,7 +140,7 @@ def _handle_send(args):
         from gateway.config import load_gateway_config, Platform
         config = load_gateway_config()
     except Exception as e:
-        return json.dumps({"error": f"Failed to load gateway config: {e}"})
+        return json.dumps(_error(f"Failed to load gateway config: {e}"))
 
     platform_map = {
         "telegram": Platform.TELEGRAM,
@@ -190,9 +213,11 @@ def _handle_send(args):
             except Exception:
                 pass
 
+        if isinstance(result, dict) and "error" in result:
+            result["error"] = _sanitize_error_text(result["error"])
         return json.dumps(result)
     except Exception as e:
-        return json.dumps({"error": f"Send failed: {e}"})
+        return json.dumps(_error(f"Send failed: {e}"))
 
 
 def _parse_target_ref(platform_name: str, target_ref: str):
@@ -434,7 +459,11 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
             except Exception as md_error:
                 # Parse failed, fall back to plain text
                 if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower() or "html" in str(md_error).lower():
-                    logger.warning("Parse mode %s failed in _send_telegram, falling back to plain text: %s", send_parse_mode, md_error)
+                    logger.warning(
+                        "Parse mode %s failed in _send_telegram, falling back to plain text: %s",
+                        send_parse_mode,
+                        _sanitize_error_text(md_error),
+                    )
                     if not _has_html:
                         try:
                             from gateway.platforms.telegram import _strip_mdv2
@@ -481,7 +510,7 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
                             chat_id=int_chat_id, document=f, **thread_kwargs
                         )
             except Exception as e:
-                warning = f"Failed to send media {media_path}: {e}"
+                warning = _sanitize_error_text(f"Failed to send media {media_path}: {e}")
                 logger.error(warning)
                 warnings.append(warning)
 
@@ -503,7 +532,7 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
     except ImportError:
         return {"error": "python-telegram-bot not installed. Run: pip install python-telegram-bot"}
     except Exception as e:
-        return {"error": f"Telegram send failed: {e}"}
+        return _error(f"Telegram send failed: {e}")
 
 
 async def _send_discord(token, chat_id, message):
@@ -522,11 +551,11 @@ async def _send_discord(token, chat_id, message):
             async with session.post(url, headers=headers, json={"content": message}) as resp:
                 if resp.status not in (200, 201):
                     body = await resp.text()
-                    return {"error": f"Discord API error ({resp.status}): {body}"}
+                    return _error(f"Discord API error ({resp.status}): {body}")
                 data = await resp.json()
         return {"success": True, "platform": "discord", "chat_id": chat_id, "message_id": data.get("id")}
     except Exception as e:
-        return {"error": f"Discord send failed: {e}"}
+        return _error(f"Discord send failed: {e}")
 
 
 async def _send_slack(token, chat_id, message):
@@ -543,9 +572,9 @@ async def _send_slack(token, chat_id, message):
                 data = await resp.json()
                 if data.get("ok"):
                     return {"success": True, "platform": "slack", "chat_id": chat_id, "message_id": data.get("ts")}
-                return {"error": f"Slack API error: {data.get('error', 'unknown')}"}
+                return _error(f"Slack API error: {data.get('error', 'unknown')}")
     except Exception as e:
-        return {"error": f"Slack send failed: {e}"}
+        return _error(f"Slack send failed: {e}")
 
 
 async def _send_whatsapp(extra, chat_id, message):
@@ -571,9 +600,9 @@ async def _send_whatsapp(extra, chat_id, message):
                         "message_id": data.get("messageId"),
                     }
                 body = await resp.text()
-                return {"error": f"WhatsApp bridge error ({resp.status}): {body}"}
+                return _error(f"WhatsApp bridge error ({resp.status}): {body}")
     except Exception as e:
-        return {"error": f"WhatsApp send failed: {e}"}
+        return _error(f"WhatsApp send failed: {e}")
 
 
 async def _send_signal(extra, chat_id, message):
@@ -606,10 +635,10 @@ async def _send_signal(extra, chat_id, message):
             resp.raise_for_status()
             data = resp.json()
             if "error" in data:
-                return {"error": f"Signal RPC error: {data['error']}"}
+                return _error(f"Signal RPC error: {data['error']}")
             return {"success": True, "platform": "signal", "chat_id": chat_id}
     except Exception as e:
-        return {"error": f"Signal send failed: {e}"}
+        return _error(f"Signal send failed: {e}")
 
 
 async def _send_email(extra, chat_id, message):
@@ -638,7 +667,7 @@ async def _send_email(extra, chat_id, message):
         server.quit()
         return {"success": True, "platform": "email", "chat_id": chat_id}
     except Exception as e:
-        return {"error": f"Email send failed: {e}"}
+        return _error(f"Email send failed: {e}")
 
 
 async def _send_sms(auth_token, chat_id, message):
@@ -687,11 +716,11 @@ async def _send_sms(auth_token, chat_id, message):
                 body = await resp.json()
                 if resp.status >= 400:
                     error_msg = body.get("message", str(body))
-                    return {"error": f"Twilio API error ({resp.status}): {error_msg}"}
+                    return _error(f"Twilio API error ({resp.status}): {error_msg}")
                 msg_sid = body.get("sid", "")
                 return {"success": True, "platform": "sms", "chat_id": chat_id, "message_id": msg_sid}
     except Exception as e:
-        return {"error": f"SMS send failed: {e}"}
+        return _error(f"SMS send failed: {e}")
 
 
 async def _send_mattermost(token, extra, chat_id, message):
@@ -711,11 +740,11 @@ async def _send_mattermost(token, extra, chat_id, message):
             async with session.post(url, headers=headers, json={"channel_id": chat_id, "message": message}) as resp:
                 if resp.status not in (200, 201):
                     body = await resp.text()
-                    return {"error": f"Mattermost API error ({resp.status}): {body}"}
+                    return _error(f"Mattermost API error ({resp.status}): {body}")
                 data = await resp.json()
         return {"success": True, "platform": "mattermost", "chat_id": chat_id, "message_id": data.get("id")}
     except Exception as e:
-        return {"error": f"Mattermost send failed: {e}"}
+        return _error(f"Mattermost send failed: {e}")
 
 
 async def _send_matrix(token, extra, chat_id, message):
@@ -753,11 +782,11 @@ async def _send_matrix(token, extra, chat_id, message):
             async with session.put(url, headers=headers, json=payload) as resp:
                 if resp.status not in (200, 201):
                     body = await resp.text()
-                    return {"error": f"Matrix API error ({resp.status}): {body}"}
+                    return _error(f"Matrix API error ({resp.status}): {body}")
                 data = await resp.json()
         return {"success": True, "platform": "matrix", "chat_id": chat_id, "message_id": data.get("event_id")}
     except Exception as e:
-        return {"error": f"Matrix send failed: {e}"}
+        return _error(f"Matrix send failed: {e}")
 
 
 async def _send_homeassistant(token, extra, chat_id, message):
@@ -777,10 +806,10 @@ async def _send_homeassistant(token, extra, chat_id, message):
             async with session.post(url, headers=headers, json={"message": message, "target": chat_id}) as resp:
                 if resp.status not in (200, 201):
                     body = await resp.text()
-                    return {"error": f"Home Assistant API error ({resp.status}): {body}"}
+                    return _error(f"Home Assistant API error ({resp.status}): {body}")
         return {"success": True, "platform": "homeassistant", "chat_id": chat_id}
     except Exception as e:
-        return {"error": f"Home Assistant send failed: {e}"}
+        return _error(f"Home Assistant send failed: {e}")
 
 
 async def _send_dingtalk(extra, chat_id, message):
@@ -808,10 +837,10 @@ async def _send_dingtalk(extra, chat_id, message):
             resp.raise_for_status()
             data = resp.json()
             if data.get("errcode", 0) != 0:
-                return {"error": f"DingTalk API error: {data.get('errmsg', 'unknown')}"}
+                return _error(f"DingTalk API error: {data.get('errmsg', 'unknown')}")
         return {"success": True, "platform": "dingtalk", "chat_id": chat_id}
     except Exception as e:
-        return {"error": f"DingTalk send failed: {e}"}
+        return _error(f"DingTalk send failed: {e}")
 
 
 async def _send_wecom(extra, chat_id, message):
@@ -829,16 +858,16 @@ async def _send_wecom(extra, chat_id, message):
         adapter = WeComAdapter(pconfig)
         connected = await adapter.connect()
         if not connected:
-            return {"error": f"WeCom: failed to connect — {adapter.fatal_error_message or 'unknown error'}"}
+            return _error(f"WeCom: failed to connect - {adapter.fatal_error_message or 'unknown error'}")
         try:
             result = await adapter.send(chat_id, message)
             if not result.success:
-                return {"error": f"WeCom send failed: {result.error}"}
+                return _error(f"WeCom send failed: {result.error}")
             return {"success": True, "platform": "wecom", "chat_id": chat_id, "message_id": result.message_id}
         finally:
             await adapter.disconnect()
     except Exception as e:
-        return {"error": f"WeCom send failed: {e}"}
+        return _error(f"WeCom send failed: {e}")
 
 
 async def _send_feishu(pconfig, chat_id, message, media_files=None, thread_id=None):
@@ -864,11 +893,11 @@ async def _send_feishu(pconfig, chat_id, message, media_files=None, thread_id=No
         if message.strip():
             last_result = await adapter.send(chat_id, message, metadata=metadata)
             if not last_result.success:
-                return {"error": f"Feishu send failed: {last_result.error}"}
+                return _error(f"Feishu send failed: {last_result.error}")
 
         for media_path, is_voice in media_files:
             if not os.path.exists(media_path):
-                return {"error": f"Media file not found: {media_path}"}
+                return _error(f"Media file not found: {media_path}")
 
             ext = os.path.splitext(media_path)[1].lower()
             if ext in _IMAGE_EXTS:
@@ -883,7 +912,7 @@ async def _send_feishu(pconfig, chat_id, message, media_files=None, thread_id=No
                 last_result = await adapter.send_document(chat_id, media_path, metadata=metadata)
 
             if not last_result.success:
-                return {"error": f"Feishu media send failed: {last_result.error}"}
+                return _error(f"Feishu media send failed: {last_result.error}")
 
         if last_result is None:
             return {"error": "No deliverable text or media remained after processing MEDIA tags"}
@@ -895,7 +924,7 @@ async def _send_feishu(pconfig, chat_id, message, media_files=None, thread_id=No
             "message_id": last_result.message_id,
         }
     except Exception as e:
-        return {"error": f"Feishu send failed: {e}"}
+        return _error(f"Feishu send failed: {e}")
 
 
 def _check_send_message():

From e9b5864b3f059d111b031f3506db739d81122315 Mon Sep 17 00:00:00 2001
From: charliekerfoot <charliekerfoot@gmail.com>
Date: Mon, 6 Apr 2026 18:13:54 -0500
Subject: [PATCH 015/154] fix: multiple platform adaptors concurrency

---
 gateway/pairing.py | 133 +++++++++++++++++++++++++++------------------
 1 file changed, 79 insertions(+), 54 deletions(-)

diff --git a/gateway/pairing.py b/gateway/pairing.py
index 34b3d902..09b61fef 100644
--- a/gateway/pairing.py
+++ b/gateway/pairing.py
@@ -21,6 +21,8 @@ Storage: ~/.hermes/pairing/
 import json
 import os
 import secrets
+import tempfile
+import threading
 import time
 from pathlib import Path
 from typing import Optional
@@ -45,13 +47,29 @@ PAIRING_DIR = get_hermes_dir("platforms/pairing", "pairing")
 
 
 def _secure_write(path: Path, data: str) -> None:
-    """Write data to file with restrictive permissions (owner read/write only)."""
+    """Write data to file with restrictive permissions (owner read/write only).
+
+    Uses a temp-file + atomic rename so readers always see either the old
+    complete file or the new one — never a partial write.
+    """
     path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(data, encoding="utf-8")
+    fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
     try:
-        os.chmod(path, 0o600)
-    except OSError:
-        pass  # Windows doesn't support chmod the same way
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(data)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, str(path))
+        try:
+            os.chmod(path, 0o600)
+        except OSError:
+            pass  # Windows doesn't support chmod the same way
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
 
 
 class PairingStore:
@@ -66,6 +84,9 @@ class PairingStore:
 
     def __init__(self):
         PAIRING_DIR.mkdir(parents=True, exist_ok=True)
+        # Protects all read-modify-write cycles. The gateway runs multiple
+        # platform adapters concurrently in threads sharing one PairingStore.
+        self._lock = threading.RLock()
 
     def _pending_path(self, platform: str) -> Path:
         return PAIRING_DIR / f"{platform}-pending.json"
@@ -105,7 +126,7 @@ class PairingStore:
         return results
 
     def _approve_user(self, platform: str, user_id: str, user_name: str = "") -> None:
-        """Add a user to the approved list."""
+        """Add a user to the approved list. Must be called under self._lock."""
         approved = self._load_json(self._approved_path(platform))
         approved[user_id] = {
             "user_name": user_name,
@@ -116,11 +137,12 @@ class PairingStore:
     def revoke(self, platform: str, user_id: str) -> bool:
         """Remove a user from the approved list. Returns True if found."""
         path = self._approved_path(platform)
-        approved = self._load_json(path)
-        if user_id in approved:
-            del approved[user_id]
-            self._save_json(path, approved)
-            return True
+        with self._lock:
+            approved = self._load_json(path)
+            if user_id in approved:
+                del approved[user_id]
+                self._save_json(path, approved)
+                return True
         return False
 
     # ----- Pending codes -----
@@ -136,36 +158,37 @@ class PairingStore:
           - Max pending codes reached for this platform
           - User/platform is in lockout due to failed attempts
         """
-        self._cleanup_expired(platform)
+        with self._lock:
+            self._cleanup_expired(platform)
 
-        # Check lockout
-        if self._is_locked_out(platform):
-            return None
+            # Check lockout
+            if self._is_locked_out(platform):
+                return None
 
-        # Check rate limit for this specific user
-        if self._is_rate_limited(platform, user_id):
-            return None
+            # Check rate limit for this specific user
+            if self._is_rate_limited(platform, user_id):
+                return None
 
-        # Check max pending
-        pending = self._load_json(self._pending_path(platform))
-        if len(pending) >= MAX_PENDING_PER_PLATFORM:
-            return None
+            # Check max pending
+            pending = self._load_json(self._pending_path(platform))
+            if len(pending) >= MAX_PENDING_PER_PLATFORM:
+                return None
 
-        # Generate cryptographically random code
-        code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
+            # Generate cryptographically random code
+            code = "".join(secrets.choice(ALPHABET) for _ in range(CODE_LENGTH))
 
-        # Store pending request
-        pending[code] = {
-            "user_id": user_id,
-            "user_name": user_name,
-            "created_at": time.time(),
-        }
-        self._save_json(self._pending_path(platform), pending)
+            # Store pending request
+            pending[code] = {
+                "user_id": user_id,
+                "user_name": user_name,
+                "created_at": time.time(),
+            }
+            self._save_json(self._pending_path(platform), pending)
 
-        # Record rate limit
-        self._record_rate_limit(platform, user_id)
+            # Record rate limit
+            self._record_rate_limit(platform, user_id)
 
-        return code
+            return code
 
     def approve_code(self, platform: str, code: str) -> Optional[dict]:
         """
@@ -173,24 +196,25 @@ class PairingStore:
 
         Returns {user_id, user_name} on success, None if code is invalid/expired.
         """
-        self._cleanup_expired(platform)
-        code = code.upper().strip()
+        with self._lock:
+            self._cleanup_expired(platform)
+            code = code.upper().strip()
 
-        pending = self._load_json(self._pending_path(platform))
-        if code not in pending:
-            self._record_failed_attempt(platform)
-            return None
+            pending = self._load_json(self._pending_path(platform))
+            if code not in pending:
+                self._record_failed_attempt(platform)
+                return None
 
-        entry = pending.pop(code)
-        self._save_json(self._pending_path(platform), pending)
+            entry = pending.pop(code)
+            self._save_json(self._pending_path(platform), pending)
 
-        # Add to approved list
-        self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
+            # Add to approved list
+            self._approve_user(platform, entry["user_id"], entry.get("user_name", ""))
 
-        return {
-            "user_id": entry["user_id"],
-            "user_name": entry.get("user_name", ""),
-        }
+            return {
+                "user_id": entry["user_id"],
+                "user_name": entry.get("user_name", ""),
+            }
 
     def list_pending(self, platform: str = None) -> list:
         """List pending pairing requests, optionally filtered by platform."""
@@ -212,12 +236,13 @@ class PairingStore:
 
     def clear_pending(self, platform: str = None) -> int:
         """Clear all pending requests. Returns count removed."""
-        count = 0
-        platforms = [platform] if platform else self._all_platforms("pending")
-        for p in platforms:
-            pending = self._load_json(self._pending_path(p))
-            count += len(pending)
-            self._save_json(self._pending_path(p), {})
+        with self._lock:
+            count = 0
+            platforms = [platform] if platform else self._all_platforms("pending")
+            for p in platforms:
+                pending = self._load_json(self._pending_path(p))
+                count += len(pending)
+                self._save_json(self._pending_path(p), {})
         return count
 
     # ----- Rate limiting and lockout -----

From b26e85bf9d6266c3691f152ed677085b3eeaf04c Mon Sep 17 00:00:00 2001
From: KangYu <richardowen7212@gmail.com>
Date: Tue, 7 Apr 2026 04:28:11 +0900
Subject: [PATCH 016/154] Fix compaction summary retries for
 temperature-restricted models

---
 agent/context_compressor.py            | 28 ++++++++++++++++---
 tests/agent/test_context_compressor.py | 38 ++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 6fdb38b2..0d971e4b 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -14,6 +14,7 @@ Improvements over v1:
 """
 
 import logging
+import time
 from typing import Any, Dict, List, Optional
 
 from agent.auxiliary_client import call_llm
@@ -46,6 +47,7 @@ _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
 
 # Chars per token rough estimate
 _CHARS_PER_TOKEN = 4
+_SUMMARY_FAILURE_COOLDOWN_SECONDS = 600
 
 
 class ContextCompressor:
@@ -118,6 +120,7 @@ class ContextCompressor:
 
         # Stores the previous compaction summary for iterative updates
         self._previous_summary: Optional[str] = None
+        self._summary_failure_cooldown_until: float = 0.0
 
     def update_from_response(self, usage: Dict[str, Any]):
         """Update tracked token usage from API response."""
@@ -258,6 +261,14 @@ class ContextCompressor:
         the middle turns without a summary rather than inject a useless
         placeholder.
         """
+        now = time.monotonic()
+        if now < self._summary_failure_cooldown_until:
+            logger.debug(
+                "Skipping context summary during cooldown (%.0fs remaining)",
+                self._summary_failure_cooldown_until - now,
+            )
+            return None
+
         summary_budget = self._compute_summary_budget(turns_to_summarize)
         content_to_summarize = self._serialize_for_summary(turns_to_summarize)
 
@@ -345,7 +356,6 @@ Write only the summary body. Do not include any preamble or prefix."""
             call_kwargs = {
                 "task": "compression",
                 "messages": [{"role": "user", "content": prompt}],
-                "temperature": 0.3,
                 "max_tokens": summary_budget * 2,
                 # timeout resolved from auxiliary.compression.timeout config by call_llm
             }
@@ -359,13 +369,23 @@ Write only the summary body. Do not include any preamble or prefix."""
             summary = content.strip()
             # Store for iterative updates on next compaction
             self._previous_summary = summary
+            self._summary_failure_cooldown_until = 0.0
             return self._with_summary_prefix(summary)
         except RuntimeError:
+            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
             logging.warning("Context compression: no provider available for "
-                            "summary. Middle turns will be dropped without summary.")
+                            "summary. Middle turns will be dropped without summary "
+                            "for %d seconds.",
+                            _SUMMARY_FAILURE_COOLDOWN_SECONDS)
             return None
         except Exception as e:
-            logging.warning("Failed to generate context summary: %s", e)
+            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            logging.warning(
+                "Failed to generate context summary: %s. "
+                "Further summary attempts paused for %d seconds.",
+                e,
+                _SUMMARY_FAILURE_COOLDOWN_SECONDS,
+            )
             return None
 
     @staticmethod
@@ -648,7 +668,7 @@ Write only the summary body. Do not include any preamble or prefix."""
                 compressed.append({"role": summary_role, "content": summary})
         else:
             if not self.quiet_mode:
-                logger.warning("No summary model available — middle turns dropped without summary")
+                logger.debug("No summary model available — middle turns dropped without summary")
 
         for i in range(compress_end, n_messages):
             msg = messages[i].copy()
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 45c832df..257cf903 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -197,6 +197,44 @@ class TestNonStringContent:
         assert summary is not None
         assert summary == SUMMARY_PREFIX
 
+    def test_summary_call_does_not_force_temperature(self):
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = "ok"
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call:
+            c._generate_summary(messages)
+
+        kwargs = mock_call.call_args.kwargs
+        assert "temperature" not in kwargs
+
+
+class TestSummaryFailureCooldown:
+    def test_summary_failure_enters_cooldown_and_skips_retry(self):
+        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+
+        messages = [
+            {"role": "user", "content": "do something"},
+            {"role": "assistant", "content": "ok"},
+        ]
+
+        with patch("agent.context_compressor.call_llm", side_effect=Exception("boom")) as mock_call:
+            first = c._generate_summary(messages)
+            second = c._generate_summary(messages)
+
+        assert first is None
+        assert second is None
+        assert mock_call.call_count == 1
+
 
 class TestSummaryPrefixNormalization:
     def test_legacy_prefix_is_replaced(self):

From 1c0183ec71b10c3fcf2bf502aeb6ed11f9fa630b Mon Sep 17 00:00:00 2001
From: WAXLYY <ysfwaxlycan@gmail.com>
Date: Mon, 6 Apr 2026 23:27:54 +0300
Subject: [PATCH 017/154] fix(gateway): sanitize media URLs in base platform
 logs

---
 gateway/platforms/base.py           | 65 ++++++++++++++++++++++++++---
 tests/gateway/test_platform_base.py | 26 ++++++++++++
 2 files changed, 86 insertions(+), 5 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 5261acee..0ba00d89 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -12,6 +12,7 @@ import random
 import re
 import uuid
 from abc import ABC, abstractmethod
+from urllib.parse import urlsplit
 
 logger = logging.getLogger(__name__)
 from dataclasses import dataclass, field
@@ -36,6 +37,43 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = (
 )
 
 
+def _safe_url_for_log(url: str, max_len: int = 80) -> str:
+    """Return a URL string safe for logs (no query/fragment/userinfo)."""
+    if max_len <= 0:
+        return ""
+
+    if url is None:
+        return ""
+
+    raw = str(url)
+    if not raw:
+        return ""
+
+    try:
+        parsed = urlsplit(raw)
+    except Exception:
+        return raw[:max_len]
+
+    if parsed.scheme and parsed.netloc:
+        # Strip potential embedded credentials (user:pass@host).
+        netloc = parsed.netloc.rsplit("@", 1)[-1]
+        base = f"{parsed.scheme}://{netloc}"
+        path = parsed.path or ""
+        if path and path != "/":
+            basename = path.rsplit("/", 1)[-1]
+            safe = f"{base}/.../{basename}" if basename else f"{base}/..."
+        else:
+            safe = base
+    else:
+        safe = raw
+
+    if len(safe) <= max_len:
+        return safe
+    if max_len <= 3:
+        return "." * max_len
+    return f"{safe[:max_len - 3]}..."
+
+
 # ---------------------------------------------------------------------------
 # Image cache utilities
 #
@@ -112,8 +150,14 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                     raise
                 if attempt < retries:
                     wait = 1.5 * (attempt + 1)
-                    _log.debug("Media cache retry %d/%d for %s (%.1fs): %s",
-                               attempt + 1, retries, url[:80], wait, exc)
+                    _log.debug(
+                        "Media cache retry %d/%d for %s (%.1fs): %s",
+                        attempt + 1,
+                        retries,
+                        _safe_url_for_log(url),
+                        wait,
+                        exc,
+                    )
                     await asyncio.sleep(wait)
                     continue
                 raise
@@ -214,8 +258,14 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                     raise
                 if attempt < retries:
                     wait = 1.5 * (attempt + 1)
-                    _log.debug("Audio cache retry %d/%d for %s (%.1fs): %s",
-                               attempt + 1, retries, url[:80], wait, exc)
+                    _log.debug(
+                        "Audio cache retry %d/%d for %s (%.1fs): %s",
+                        attempt + 1,
+                        retries,
+                        _safe_url_for_log(url),
+                        wait,
+                        exc,
+                    )
                     await asyncio.sleep(wait)
                     continue
                 raise
@@ -1266,7 +1316,12 @@ class BasePlatformAdapter(ABC):
                     if human_delay > 0:
                         await asyncio.sleep(human_delay)
                     try:
-                        logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "")
+                        logger.info(
+                            "[%s] Sending image: %s (alt=%s)",
+                            self.name,
+                            _safe_url_for_log(image_url),
+                            alt_text[:30] if alt_text else "",
+                        )
                         # Route animated GIFs through send_animation for proper playback
                         if self._is_animation_url(image_url):
                             img_result = await self.send_animation(
diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py
index 13b52f24..43dd17bd 100644
--- a/tests/gateway/test_platform_base.py
+++ b/tests/gateway/test_platform_base.py
@@ -8,6 +8,7 @@ from gateway.platforms.base import (
     GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE,
     MessageEvent,
     MessageType,
+    _safe_url_for_log,
 )
 
 
@@ -18,6 +19,31 @@ class TestSecretCaptureGuidance:
         assert "~/.hermes/.env" in message
 
 
+class TestSafeUrlForLog:
+    def test_strips_query_fragment_and_userinfo(self):
+        url = (
+            "https://user:pass@example.com/private/path/image.png"
+            "?X-Amz-Signature=supersecret&token=abc#frag"
+        )
+        result = _safe_url_for_log(url)
+        assert result == "https://example.com/.../image.png"
+        assert "supersecret" not in result
+        assert "token=abc" not in result
+        assert "user:pass@" not in result
+
+    def test_truncates_long_values(self):
+        long_url = "https://example.com/" + ("a" * 300)
+        result = _safe_url_for_log(long_url, max_len=40)
+        assert len(result) == 40
+        assert result.endswith("...")
+
+    def test_handles_small_and_non_positive_max_len(self):
+        url = "https://example.com/very/long/path/file.png?token=secret"
+        assert _safe_url_for_log(url, max_len=3) == "..."
+        assert _safe_url_for_log(url, max_len=2) == ".."
+        assert _safe_url_for_log(url, max_len=0) == ""
+
+
 # ---------------------------------------------------------------------------
 # MessageEvent — command parsing
 # ---------------------------------------------------------------------------

From 83df001d01e0de7fa84640ccd87c6b38777d8730 Mon Sep 17 00:00:00 2001
From: Zainan Victor Zhou <zzn+pa@zzn.im>
Date: Mon, 6 Apr 2026 13:01:14 -0700
Subject: [PATCH 018/154] fix: allow google-workspace skill scripts to run
 directly

- fall back to adding the repo root to sys.path when hermes_constants is not importable
- fixes direct execution of setup.py and google_api.py from the repo checkout
- keeps the upstream PR scoped to the google-workspace compatibility fix
---
 .../productivity/google-workspace/scripts/google_api.py   | 8 +++++++-
 skills/productivity/google-workspace/scripts/setup.py     | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/skills/productivity/google-workspace/scripts/google_api.py b/skills/productivity/google-workspace/scripts/google_api.py
index 2a5c662a..ece0c3ea 100644
--- a/skills/productivity/google-workspace/scripts/google_api.py
+++ b/skills/productivity/google-workspace/scripts/google_api.py
@@ -27,7 +27,13 @@ from datetime import datetime, timedelta, timezone
 from email.mime.text import MIMEText
 from pathlib import Path
 
-from hermes_constants import display_hermes_home, get_hermes_home
+try:
+    from hermes_constants import display_hermes_home, get_hermes_home
+except ModuleNotFoundError:
+    HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4]
+    if HERMES_AGENT_ROOT.exists():
+        sys.path.insert(0, str(HERMES_AGENT_ROOT))
+    from hermes_constants import display_hermes_home, get_hermes_home
 
 HERMES_HOME = get_hermes_home()
 TOKEN_PATH = HERMES_HOME / "google_token.json"
diff --git a/skills/productivity/google-workspace/scripts/setup.py b/skills/productivity/google-workspace/scripts/setup.py
index 52a07427..5e4924f9 100644
--- a/skills/productivity/google-workspace/scripts/setup.py
+++ b/skills/productivity/google-workspace/scripts/setup.py
@@ -27,7 +27,13 @@ import subprocess
 import sys
 from pathlib import Path
 
-from hermes_constants import display_hermes_home, get_hermes_home
+try:
+    from hermes_constants import display_hermes_home, get_hermes_home
+except ModuleNotFoundError:
+    HERMES_AGENT_ROOT = Path(__file__).resolve().parents[4]
+    if HERMES_AGENT_ROOT.exists():
+        sys.path.insert(0, str(HERMES_AGENT_ROOT))
+    from hermes_constants import display_hermes_home, get_hermes_home
 
 HERMES_HOME = get_hermes_home()
 TOKEN_PATH = HERMES_HOME / "google_token.json"

From 190471fdc0d07d79dec23daa14c50d6c71eb1da2 Mon Sep 17 00:00:00 2001
From: Zainan Victor Zhou <zzn+pa@zzn.im>
Date: Mon, 6 Apr 2026 13:09:10 -0700
Subject: [PATCH 019/154] docs: use HERMES_HOME in google-workspace skill
 examples

- avoid hard-coded ~/.hermes paths in the setup and API shorthands
- prefer HERMES_HOME with a sane default to /Users/peteradams/.hermes
- keep the examples aligned with profile-aware Hermes installs
---
 skills/productivity/google-workspace/SKILL.md | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/skills/productivity/google-workspace/SKILL.md b/skills/productivity/google-workspace/SKILL.md
index 6252c671..60b9693d 100644
--- a/skills/productivity/google-workspace/SKILL.md
+++ b/skills/productivity/google-workspace/SKILL.md
@@ -37,7 +37,13 @@ on CLI, Telegram, Discord, or any platform.
 Define a shorthand first:
 
 ```bash
-GSETUP="python ~/.hermes/skills/productivity/google-workspace/scripts/setup.py"
+HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
+GWORKSPACE_SKILL_DIR="$HERMES_HOME/skills/productivity/google-workspace"
+PYTHON_BIN="${HERMES_PYTHON:-python3}"
+if [ -x "$HERMES_HOME/hermes-agent/venv/bin/python" ]; then
+  PYTHON_BIN="$HERMES_HOME/hermes-agent/venv/bin/python"
+fi
+GSETUP="$PYTHON_BIN $GWORKSPACE_SKILL_DIR/scripts/setup.py"
 ```
 
 ### Step 0: Check if already set up
@@ -135,7 +141,13 @@ Should print `AUTHENTICATED`. Setup is complete — token refreshes automaticall
 All commands go through the API script. Set `GAPI` as a shorthand:
 
 ```bash
-GAPI="python ~/.hermes/skills/productivity/google-workspace/scripts/google_api.py"
+HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
+GWORKSPACE_SKILL_DIR="$HERMES_HOME/skills/productivity/google-workspace"
+PYTHON_BIN="${HERMES_PYTHON:-python3}"
+if [ -x "$HERMES_HOME/hermes-agent/venv/bin/python" ]; then
+  PYTHON_BIN="$HERMES_HOME/hermes-agent/venv/bin/python"
+fi
+GAPI="$PYTHON_BIN $GWORKSPACE_SKILL_DIR/scripts/google_api.py"
 ```
 
 ### Gmail

From 40527ff5e35db7fc6f501d8aa5997caa4a382a79 Mon Sep 17 00:00:00 2001
From: tymrtn <ty@tmrtn.com>
Date: Mon, 6 Apr 2026 21:12:57 +0200
Subject: [PATCH 020/154] fix(auth): actionable error message when Codex
 refresh token is reused

When the Codex CLI (or VS Code extension) consumes a refresh token before
Hermes can use it, Hermes previously surfaced a generic 401 error with no
actionable guidance.

- In `refresh_codex_oauth_pure`: detect `refresh_token_reused` from the
  OAuth endpoint and raise an AuthError explaining the cause and the exact
  steps to recover (run `codex` to refresh, then `hermes login`).
- In `run_agent.py`: when provider is `openai-codex` and HTTP 401 is
  received, show Codex-specific recovery steps instead of the generic
  "check your API key" message.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 hermes_cli/auth.py |  8 ++++++++
 run_agent.py       | 16 +++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 5a02c923..bfbeb818 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1030,6 +1030,14 @@ def refresh_codex_oauth_pure(
             pass
         if code in {"invalid_grant", "invalid_token", "invalid_request"}:
             relogin_required = True
+        if code == "refresh_token_reused":
+            message = (
+                "Codex refresh token was already consumed by another client "
+                "(e.g. Codex CLI or VS Code extension). "
+                "Run `codex` in your terminal to generate fresh tokens, "
+                "then run `hermes login --provider openai-codex` to re-authenticate."
+            )
+            relogin_required = True
         raise AuthError(
             message,
             provider="openai-codex",
diff --git a/run_agent.py b/run_agent.py
index d85682a1..d97e08ad 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8187,11 +8187,17 @@ class AIAgent:
                         self._vprint(f"{self.log_prefix}   🌐 Endpoint: {_base}", force=True)
                         # Actionable guidance for common auth errors
                         if status_code in (401, 403) or "unauthorized" in error_msg or "forbidden" in error_msg or "permission" in error_msg:
-                            self._vprint(f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:", force=True)
-                            self._vprint(f"{self.log_prefix}      • Is the key valid? Run: hermes setup", force=True)
-                            self._vprint(f"{self.log_prefix}      • Does your account have access to {_model}?", force=True)
-                            if "openrouter" in str(_base).lower():
-                                self._vprint(f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
+                            if _provider == "openai-codex" and status_code == 401:
+                                self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
+                                self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
+                                self._vprint(f"{self.log_prefix}      1. Run `codex` in your terminal to generate fresh tokens.", force=True)
+                                self._vprint(f"{self.log_prefix}      2. Then run `hermes login --provider openai-codex` to re-authenticate.", force=True)
+                            else:
+                                self._vprint(f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:", force=True)
+                                self._vprint(f"{self.log_prefix}      • Is the key valid? Run: hermes setup", force=True)
+                                self._vprint(f"{self.log_prefix}      • Does your account have access to {_model}?", force=True)
+                                if "openrouter" in str(_base).lower():
+                                    self._vprint(f"{self.log_prefix}      • Check credits: https://openrouter.ai/settings/credits", force=True)
                         else:
                             self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
                         logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")

From 05f9267938b9701e16eb5c4ae1d8b4a2c62a12cc Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Mon, 6 Apr 2026 17:07:10 +0530
Subject: [PATCH 021/154] fix(matrix): hard-fail E2EE when python-olm missing +
 stable MATRIX_DEVICE_ID

Two issues caused Matrix E2EE to silently not work in encrypted rooms:

1. When matrix-nio is installed without the [e2e] extra (no python-olm /
   libolm), nio.crypto.ENCRYPTION_ENABLED is False and client.olm is
   never initialized. The adapter logged warnings but returned True from
   connect(), so the bot appeared online but could never decrypt messages.
   Now: check_matrix_requirements() and connect() both hard-fail with a
   clear error message when MATRIX_ENCRYPTION=true but E2EE deps are
   missing.

2. Without a stable device_id, the bot gets a new device identity on each
   restart. Other clients see it as "unknown device" and refuse to share
   Megolm session keys. Now: MATRIX_DEVICE_ID env var lets users pin a
   stable device identity that persists across restarts and is passed to
   nio.AsyncClient constructor + restore_login().

Changes:
- gateway/platforms/matrix.py: add _check_e2ee_deps(), hard-fail in
  connect() and check_matrix_requirements(), MATRIX_DEVICE_ID support
  in constructor + restore_login
- gateway/config.py: plumb MATRIX_DEVICE_ID into platform extras
- hermes_cli/config.py: add MATRIX_DEVICE_ID to OPTIONAL_ENV_VARS

Closes #3521
---
 gateway/config.py            |   3 +
 gateway/platforms/matrix.py  | 100 +++++++--
 hermes_cli/config.py         |  10 +-
 tests/gateway/test_matrix.py | 380 ++++++++++++++++++++++++++++++++++-
 4 files changed, 465 insertions(+), 28 deletions(-)

diff --git a/gateway/config.py b/gateway/config.py
index 0ff3127c..470eee7f 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -779,6 +779,9 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
             config.platforms[Platform.MATRIX].extra["password"] = matrix_password
         matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
         config.platforms[Platform.MATRIX].extra["encryption"] = matrix_e2ee
+        matrix_device_id = os.getenv("MATRIX_DEVICE_ID", "")
+        if matrix_device_id:
+            config.platforms[Platform.MATRIX].extra["device_id"] = matrix_device_id
     matrix_home = os.getenv("MATRIX_HOME_ROOM")
     if matrix_home and Platform.MATRIX in config.platforms:
         config.platforms[Platform.MATRIX].home_channel = HomeChannel(
diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index 35cf72ad..2dc0c5a9 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -10,6 +10,7 @@ Environment variables:
     MATRIX_USER_ID              Full user ID (@bot:server) — required for password login
     MATRIX_PASSWORD             Password (alternative to access token)
     MATRIX_ENCRYPTION           Set "true" to enable E2EE
+    MATRIX_DEVICE_ID            Stable device ID for E2EE persistence across restarts
     MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
     MATRIX_HOME_ROOM        Room ID for cron/notification delivery
     MATRIX_REACTIONS        Set "false" to disable processing lifecycle reactions
@@ -65,6 +66,21 @@ _MAX_PENDING_EVENTS = 100
 _PENDING_EVENT_TTL = 300  # seconds — stop retrying after 5 min
 
 
+_E2EE_INSTALL_HINT = (
+    "Install with: pip install 'matrix-nio[e2e]'  "
+    "(requires libolm C library)"
+)
+
+
+def _check_e2ee_deps() -> bool:
+    """Return True if matrix-nio E2EE dependencies (python-olm) are available."""
+    try:
+        from nio.crypto import ENCRYPTION_ENABLED
+        return bool(ENCRYPTION_ENABLED)
+    except (ImportError, AttributeError):
+        return False
+
+
 def check_matrix_requirements() -> bool:
     """Return True if the Matrix adapter can be used."""
     token = os.getenv("MATRIX_ACCESS_TOKEN", "")
@@ -79,7 +95,6 @@ def check_matrix_requirements() -> bool:
         return False
     try:
         import nio  # noqa: F401
-        return True
     except ImportError:
         logger.warning(
             "Matrix: matrix-nio not installed. "
@@ -87,6 +102,20 @@ def check_matrix_requirements() -> bool:
         )
         return False
 
+    # If encryption is requested, verify E2EE deps are available at startup
+    # rather than silently degrading to plaintext-only at connect time.
+    encryption_requested = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
+    if encryption_requested and not _check_e2ee_deps():
+        logger.error(
+            "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. "
+            "Without this, encrypted rooms will not work. "
+            "Set MATRIX_ENCRYPTION=false to disable E2EE.",
+            _E2EE_INSTALL_HINT,
+        )
+        return False
+
+    return True
+
 
 class MatrixAdapter(BasePlatformAdapter):
     """Gateway adapter for Matrix (any homeserver)."""
@@ -111,6 +140,10 @@ class MatrixAdapter(BasePlatformAdapter):
             "encryption",
             os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes"),
         )
+        self._device_id: str = (
+            config.extra.get("device_id", "")
+            or os.getenv("MATRIX_DEVICE_ID", "")
+        )
 
         self._client: Any = None  # nio.AsyncClient
         self._sync_task: Optional[asyncio.Task] = None
@@ -169,24 +202,42 @@ class MatrixAdapter(BasePlatformAdapter):
         _STORE_DIR.mkdir(parents=True, exist_ok=True)
 
         # Create the client.
+        # When a stable device_id is configured, pass it to the constructor
+        # so matrix-nio binds to it from the start (important for E2EE
+        # crypto-store persistence across restarts).
+        ctor_device_id = self._device_id or None
         if self._encryption:
+            if not _check_e2ee_deps():
+                logger.error(
+                    "Matrix: MATRIX_ENCRYPTION=true but E2EE dependencies are missing. %s. "
+                    "Refusing to connect — encrypted rooms would silently fail.",
+                    _E2EE_INSTALL_HINT,
+                )
+                return False
             try:
                 client = nio.AsyncClient(
                     self._homeserver,
                     self._user_id or "",
+                    device_id=ctor_device_id,
                     store_path=store_path,
                 )
-                logger.info("Matrix: E2EE enabled (store: %s)", store_path)
-            except Exception as exc:
-                logger.warning(
-                    "Matrix: failed to create E2EE client (%s), "
-                    "falling back to plain client. Install: "
-                    "pip install 'matrix-nio[e2e]'",
-                    exc,
+                logger.info(
+                    "Matrix: E2EE enabled (store: %s%s)",
+                    store_path,
+                    f", device_id={self._device_id}" if self._device_id else "",
                 )
-                client = nio.AsyncClient(self._homeserver, self._user_id or "")
+            except Exception as exc:
+                logger.error(
+                    "Matrix: failed to create E2EE client: %s. %s",
+                    exc, _E2EE_INSTALL_HINT,
+                )
+                return False
         else:
-            client = nio.AsyncClient(self._homeserver, self._user_id or "")
+            client = nio.AsyncClient(
+                self._homeserver,
+                self._user_id or "",
+                device_id=ctor_device_id,
+            )
 
         self._client = client
 
@@ -205,30 +256,36 @@ class MatrixAdapter(BasePlatformAdapter):
                 if resolved_user_id:
                     self._user_id = resolved_user_id
 
+                # Prefer the user-configured device_id (MATRIX_DEVICE_ID) so
+                # the bot reuses a stable identity across restarts.  Fall back
+                # to whatever whoami returned.
+                effective_device_id = self._device_id or resolved_device_id
+
                 # restore_login() is the matrix-nio path that binds the access
                 # token to a specific device and loads the crypto store.
-                if resolved_device_id and hasattr(client, "restore_login"):
+                if effective_device_id and hasattr(client, "restore_login"):
                     client.restore_login(
                         self._user_id or resolved_user_id,
-                        resolved_device_id,
+                        effective_device_id,
                         self._access_token,
                     )
                 else:
                     if self._user_id:
                         client.user_id = self._user_id
-                    if resolved_device_id:
-                        client.device_id = resolved_device_id
+                    if effective_device_id:
+                        client.device_id = effective_device_id
                     client.access_token = self._access_token
                     if self._encryption:
                         logger.warning(
                             "Matrix: access-token login did not restore E2EE state; "
-                            "encrypted rooms may fail until a device_id is available"
+                            "encrypted rooms may fail until a device_id is available. "
+                            "Set MATRIX_DEVICE_ID to a stable value."
                         )
 
                 logger.info(
                     "Matrix: using access token for %s%s",
                     self._user_id or "(unknown user)",
-                    f" (device {resolved_device_id})" if resolved_device_id else "",
+                    f" (device {effective_device_id})" if effective_device_id else "",
                 )
             else:
                 logger.error(
@@ -271,10 +328,15 @@ class MatrixAdapter(BasePlatformAdapter):
                 except Exception as exc:
                     logger.debug("Matrix: could not import keys: %s", exc)
         elif self._encryption:
-            logger.warning(
-                "Matrix: E2EE requested but crypto store is not loaded; "
-                "encrypted rooms may fail"
+            # E2EE was requested but the crypto store failed to load —
+            # this means encrypted rooms will silently not work.  Hard-fail.
+            logger.error(
+                "Matrix: E2EE requested but crypto store is not loaded — "
+                "cannot decrypt or encrypt messages. %s",
+                _E2EE_INSTALL_HINT,
             )
+            await client.close()
+            return False
 
         # Register event callbacks.
         client.add_event_callback(self._on_room_message, nio.RoomMessageText)
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 369fe7ac..bf0b27c2 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -42,7 +42,7 @@ _EXTRA_ENV_KEYS = frozenset({
     "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
     "WHATSAPP_MODE", "WHATSAPP_ENABLED",
     "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
-    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM",
+    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_DEVICE_ID", "MATRIX_HOME_ROOM",
     "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
 })
 import yaml
@@ -1079,6 +1079,14 @@ OPTIONAL_ENV_VARS = {
         "category": "messaging",
         "advanced": True,
     },
+    "MATRIX_DEVICE_ID": {
+        "description": "Stable Matrix device ID for E2EE persistence across restarts (e.g. HERMES_BOT)",
+        "prompt": "Matrix device ID (stable across restarts)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
     "GATEWAY_ALLOW_ALL_USERS": {
         "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
         "prompt": "Allow all users (true/false)",
diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py
index fb2d47f4..09f0ab95 100644
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -428,6 +428,7 @@ class TestMatrixRequirements:
     def test_check_requirements_with_token(self, monkeypatch):
         monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test")
         monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.delenv("MATRIX_ENCRYPTION", raising=False)
         from gateway.platforms.matrix import check_matrix_requirements
         try:
             import nio  # noqa: F401
@@ -448,6 +449,45 @@ class TestMatrixRequirements:
         from gateway.platforms.matrix import check_matrix_requirements
         assert check_matrix_requirements() is False
 
+    def test_check_requirements_encryption_true_no_e2ee_deps(self, monkeypatch):
+        """MATRIX_ENCRYPTION=true should fail if python-olm is not installed."""
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_ENCRYPTION", "true")
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=False):
+            assert matrix_mod.check_matrix_requirements() is False
+
+    def test_check_requirements_encryption_false_no_e2ee_deps_ok(self, monkeypatch):
+        """Without encryption, missing E2EE deps should not block startup."""
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.delenv("MATRIX_ENCRYPTION", raising=False)
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=False):
+            # Still needs nio itself to be importable
+            try:
+                import nio  # noqa: F401
+                assert matrix_mod.check_matrix_requirements() is True
+            except ImportError:
+                assert matrix_mod.check_matrix_requirements() is False
+
+    def test_check_requirements_encryption_true_with_e2ee_deps(self, monkeypatch):
+        """MATRIX_ENCRYPTION=true should pass if E2EE deps are available."""
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_ENCRYPTION", "true")
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            try:
+                import nio  # noqa: F401
+                assert matrix_mod.check_matrix_requirements() is True
+            except ImportError:
+                assert matrix_mod.check_matrix_requirements() is False
+
 
 # ---------------------------------------------------------------------------
 # Access-token auth / E2EE bootstrap
@@ -516,10 +556,12 @@ class TestMatrixAccessTokenAuth:
         fake_nio.InviteMemberEvent = type("InviteMemberEvent", (), {})
         fake_nio.MegolmEvent = type("MegolmEvent", (), {})
 
-        with patch.dict("sys.modules", {"nio": fake_nio}):
-            with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
-                with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
-                    assert await adapter.connect() is True
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", {"nio": fake_nio}):
+                with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                    with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                        assert await adapter.connect() is True
 
         fake_client.restore_login.assert_called_once_with(
             "@bot:example.org", "DEV123", "syt_test_access_token"
@@ -532,6 +574,326 @@ class TestMatrixAccessTokenAuth:
         await adapter.disconnect()
 
 
+class TestMatrixE2EEHardFail:
+    """connect() must refuse to start when E2EE is requested but deps are missing."""
+
+    @pytest.mark.asyncio
+    async def test_connect_fails_when_encryption_true_but_no_e2ee_deps(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_access_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+            },
+        )
+        adapter = MatrixAdapter(config)
+
+        fake_nio = MagicMock()
+        fake_nio.AsyncClient = MagicMock()
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=False):
+            with patch.dict("sys.modules", {"nio": fake_nio}):
+                result = await adapter.connect()
+
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_connect_fails_when_olm_not_loaded_after_login(self):
+        """Even if _check_e2ee_deps passes, if olm is None after auth, hard-fail."""
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_access_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+            },
+        )
+        adapter = MatrixAdapter(config)
+
+        class FakeWhoamiResponse:
+            def __init__(self, user_id, device_id):
+                self.user_id = user_id
+                self.device_id = device_id
+
+        fake_client = MagicMock()
+        fake_client.whoami = AsyncMock(return_value=FakeWhoamiResponse("@bot:example.org", "DEV123"))
+        fake_client.close = AsyncMock()
+        # olm is None — crypto store not loaded
+        fake_client.olm = None
+        fake_client.should_upload_keys = False
+
+        def _restore_login(user_id, device_id, access_token):
+            fake_client.user_id = user_id
+            fake_client.device_id = device_id
+            fake_client.access_token = access_token
+
+        fake_client.restore_login = MagicMock(side_effect=_restore_login)
+
+        fake_nio = MagicMock()
+        fake_nio.AsyncClient = MagicMock(return_value=fake_client)
+        fake_nio.WhoamiResponse = FakeWhoamiResponse
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", {"nio": fake_nio}):
+                result = await adapter.connect()
+
+        assert result is False
+        fake_client.close.assert_awaited_once()
+
+
+class TestMatrixDeviceId:
+    """MATRIX_DEVICE_ID should be used for stable device identity."""
+
+    def test_device_id_from_config_extra(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "device_id": "HERMES_BOT_STABLE",
+            },
+        )
+        adapter = MatrixAdapter(config)
+        assert adapter._device_id == "HERMES_BOT_STABLE"
+
+    def test_device_id_from_env(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_DEVICE_ID", "FROM_ENV")
+
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={
+                "homeserver": "https://matrix.example.org",
+            },
+        )
+        adapter = MatrixAdapter(config)
+        assert adapter._device_id == "FROM_ENV"
+
+    def test_device_id_config_takes_precedence_over_env(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_DEVICE_ID", "FROM_ENV")
+
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "device_id": "FROM_CONFIG",
+            },
+        )
+        adapter = MatrixAdapter(config)
+        assert adapter._device_id == "FROM_CONFIG"
+
+    @pytest.mark.asyncio
+    async def test_connect_uses_configured_device_id_over_whoami(self):
+        """When MATRIX_DEVICE_ID is set, it should be used instead of whoami device_id."""
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_access_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+                "device_id": "MY_STABLE_DEVICE",
+            },
+        )
+        adapter = MatrixAdapter(config)
+
+        class FakeWhoamiResponse:
+            def __init__(self, user_id, device_id):
+                self.user_id = user_id
+                self.device_id = device_id
+
+        class FakeSyncResponse:
+            def __init__(self):
+                self.rooms = MagicMock(join={})
+
+        fake_client = MagicMock()
+        fake_client.whoami = AsyncMock(return_value=FakeWhoamiResponse("@bot:example.org", "WHOAMI_DEV"))
+        fake_client.sync = AsyncMock(return_value=FakeSyncResponse())
+        fake_client.keys_upload = AsyncMock()
+        fake_client.keys_query = AsyncMock()
+        fake_client.keys_claim = AsyncMock()
+        fake_client.send_to_device_messages = AsyncMock(return_value=[])
+        fake_client.get_users_for_key_claiming = MagicMock(return_value={})
+        fake_client.close = AsyncMock()
+        fake_client.add_event_callback = MagicMock()
+        fake_client.rooms = {}
+        fake_client.account_data = {}
+        fake_client.olm = object()
+        fake_client.should_upload_keys = False
+        fake_client.should_query_keys = False
+        fake_client.should_claim_keys = False
+
+        def _restore_login(user_id, device_id, access_token):
+            fake_client.user_id = user_id
+            fake_client.device_id = device_id
+            fake_client.access_token = access_token
+
+        fake_client.restore_login = MagicMock(side_effect=_restore_login)
+
+        fake_nio = MagicMock()
+        fake_nio.AsyncClient = MagicMock(return_value=fake_client)
+        fake_nio.WhoamiResponse = FakeWhoamiResponse
+        fake_nio.SyncResponse = FakeSyncResponse
+        fake_nio.LoginResponse = type("LoginResponse", (), {})
+        fake_nio.RoomMessageText = type("RoomMessageText", (), {})
+        fake_nio.RoomMessageImage = type("RoomMessageImage", (), {})
+        fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {})
+        fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {})
+        fake_nio.RoomMessageFile = type("RoomMessageFile", (), {})
+        fake_nio.InviteMemberEvent = type("InviteMemberEvent", (), {})
+        fake_nio.MegolmEvent = type("MegolmEvent", (), {})
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", {"nio": fake_nio}):
+                with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                    with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                        assert await adapter.connect() is True
+
+        # The configured device_id should override the whoami device_id
+        fake_client.restore_login.assert_called_once_with(
+            "@bot:example.org", "MY_STABLE_DEVICE", "syt_test_access_token"
+        )
+        assert fake_client.device_id == "MY_STABLE_DEVICE"
+
+        # Verify device_id was passed to nio.AsyncClient constructor
+        ctor_call = fake_nio.AsyncClient.call_args
+        assert ctor_call.kwargs.get("device_id") == "MY_STABLE_DEVICE"
+
+        await adapter.disconnect()
+
+
+class TestMatrixE2EEClientConstructorFailure:
+    """connect() should hard-fail if nio.AsyncClient() raises when encryption is on."""
+
+    @pytest.mark.asyncio
+    async def test_connect_fails_when_e2ee_client_constructor_raises(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            token="syt_test_access_token",
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "encryption": True,
+            },
+        )
+        adapter = MatrixAdapter(config)
+
+        fake_nio = MagicMock()
+        fake_nio.AsyncClient = MagicMock(side_effect=Exception("olm init failed"))
+
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", {"nio": fake_nio}):
+                result = await adapter.connect()
+
+        assert result is False
+
+
+class TestMatrixPasswordLoginDeviceId:
+    """MATRIX_DEVICE_ID should be passed to nio.AsyncClient even with password login."""
+
+    @pytest.mark.asyncio
+    async def test_password_login_passes_device_id_to_constructor(self):
+        from gateway.platforms.matrix import MatrixAdapter
+
+        config = PlatformConfig(
+            enabled=True,
+            extra={
+                "homeserver": "https://matrix.example.org",
+                "user_id": "@bot:example.org",
+                "password": "secret",
+                "device_id": "STABLE_PW_DEVICE",
+            },
+        )
+        adapter = MatrixAdapter(config)
+
+        class FakeLoginResponse:
+            pass
+
+        class FakeSyncResponse:
+            def __init__(self):
+                self.rooms = MagicMock(join={})
+
+        fake_client = MagicMock()
+        fake_client.login = AsyncMock(return_value=FakeLoginResponse())
+        fake_client.sync = AsyncMock(return_value=FakeSyncResponse())
+        fake_client.close = AsyncMock()
+        fake_client.add_event_callback = MagicMock()
+        fake_client.rooms = {}
+        fake_client.account_data = {}
+
+        fake_nio = MagicMock()
+        fake_nio.AsyncClient = MagicMock(return_value=fake_client)
+        fake_nio.LoginResponse = FakeLoginResponse
+        fake_nio.SyncResponse = FakeSyncResponse
+        fake_nio.RoomMessageText = type("RoomMessageText", (), {})
+        fake_nio.RoomMessageImage = type("RoomMessageImage", (), {})
+        fake_nio.RoomMessageAudio = type("RoomMessageAudio", (), {})
+        fake_nio.RoomMessageVideo = type("RoomMessageVideo", (), {})
+        fake_nio.RoomMessageFile = type("RoomMessageFile", (), {})
+        fake_nio.InviteMemberEvent = type("InviteMemberEvent", (), {})
+
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                    assert await adapter.connect() is True
+
+        # Verify device_id was passed to the nio.AsyncClient constructor
+        ctor_call = fake_nio.AsyncClient.call_args
+        assert ctor_call.kwargs.get("device_id") == "STABLE_PW_DEVICE"
+
+        await adapter.disconnect()
+
+
+class TestMatrixDeviceIdConfig:
+    """MATRIX_DEVICE_ID should be plumbed through gateway config."""
+
+    def test_device_id_in_config_extra(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_DEVICE_ID", "HERMES_BOT")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        mc = config.platforms[Platform.MATRIX]
+        assert mc.extra.get("device_id") == "HERMES_BOT"
+
+    def test_device_id_not_set_when_env_empty(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.delenv("MATRIX_DEVICE_ID", raising=False)
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        mc = config.platforms[Platform.MATRIX]
+        assert "device_id" not in mc.extra
+
+
 class TestMatrixE2EEMaintenance:
     @pytest.mark.asyncio
     async def test_sync_loop_runs_e2ee_maintenance_requests(self):
@@ -1071,10 +1433,12 @@ class TestMatrixEncryptedMedia:
         fake_nio.InviteMemberEvent = FakeInviteMemberEvent
         fake_nio.MegolmEvent = FakeMegolmEvent
 
-        with patch.dict("sys.modules", {"nio": fake_nio}):
-            with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
-                with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
-                    assert await adapter.connect() is True
+        from gateway.platforms import matrix as matrix_mod
+        with patch.object(matrix_mod, "_check_e2ee_deps", return_value=True):
+            with patch.dict("sys.modules", {"nio": fake_nio}):
+                with patch.object(adapter, "_refresh_dm_cache", AsyncMock()):
+                    with patch.object(adapter, "_sync_loop", AsyncMock(return_value=None)):
+                        assert await adapter.connect() is True
 
         callback_classes = [call.args[1] for call in fake_client.add_event_callback.call_args_list]
         assert FakeRoomEncryptedImage in callback_classes

From 8a29b4903619c7a6dcb79995dbc1909d095dbe27 Mon Sep 17 00:00:00 2001
From: Tianxiao <tianxiao@local>
Date: Mon, 6 Apr 2026 16:30:58 +0200
Subject: [PATCH 022/154] fix(cli): handle CJK wide chars in TUI input height

---
 cli.py                       | 18 +++++---
 tests/test_cli_status_bar.py | 87 ++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+), 5 deletions(-)

diff --git a/cli.py b/cli.py
index ff097532..ed9e0813 100644
--- a/cli.py
+++ b/cli.py
@@ -7469,18 +7469,26 @@ class HermesCLI:
         # wrapping of long lines so the input area always fits its content.
         def _input_height():
             try:
+                from prompt_toolkit.application import get_app
+                from prompt_toolkit.utils import get_cwidth
+
                 doc = input_area.buffer.document
-                prompt_width = max(2, len(self._get_tui_prompt_text()))
-                available_width = shutil.get_terminal_size().columns - prompt_width
+                prompt_width = max(2, get_cwidth(self._get_tui_prompt_text()))
+                try:
+                    available_width = get_app().output.get_size().columns - prompt_width
+                except Exception:
+                    available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
                 if available_width < 10:
                     available_width = 40
                 visual_lines = 0
                 for line in doc.lines:
-                    # Each logical line takes at least 1 visual row; long lines wrap
-                    if len(line) == 0:
+                    # Each logical line takes at least 1 visual row; long lines wrap.
+                    # Use prompt_toolkit's cell width so CJK wide characters count as 2.
+                    line_width = get_cwidth(line)
+                    if line_width <= 0:
                         visual_lines += 1
                     else:
-                        visual_lines += max(1, -(-len(line) // available_width))  # ceil division
+                        visual_lines += max(1, -(-line_width // available_width))  # ceil division
                 return min(max(visual_lines, 1), 8)
             except Exception:
                 return 1
diff --git a/tests/test_cli_status_bar.py b/tests/test_cli_status_bar.py
index 104c58b1..e728328b 100644
--- a/tests/test_cli_status_bar.py
+++ b/tests/test_cli_status_bar.py
@@ -1,5 +1,6 @@
 from datetime import datetime, timedelta
 from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
 
 from cli import HermesCLI
 
@@ -78,6 +79,92 @@ class TestCLIStatusBar:
         assert "$0.06" not in text  # cost hidden by default
         assert "15m" in text
 
+    def test_input_height_counts_wide_characters_using_cell_width(self):
+        cli_obj = _make_cli()
+
+        class _Doc:
+            lines = ["你" * 10]
+
+        class _Buffer:
+            document = _Doc()
+
+        input_area = SimpleNamespace(buffer=_Buffer())
+
+        def _input_height():
+            try:
+                from prompt_toolkit.application import get_app
+                from prompt_toolkit.utils import get_cwidth
+
+                doc = input_area.buffer.document
+                prompt_width = max(2, get_cwidth(cli_obj._get_tui_prompt_text()))
+                try:
+                    available_width = get_app().output.get_size().columns - prompt_width
+                except Exception:
+                    import shutil
+                    available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
+                if available_width < 10:
+                    available_width = 40
+                visual_lines = 0
+                for line in doc.lines:
+                    line_width = get_cwidth(line)
+                    if line_width <= 0:
+                        visual_lines += 1
+                    else:
+                        visual_lines += max(1, -(-line_width // available_width))
+                return min(max(visual_lines, 1), 8)
+            except Exception:
+                return 1
+
+        mock_app = MagicMock()
+        mock_app.output.get_size.return_value = MagicMock(columns=14)
+        with patch.object(HermesCLI, "_get_tui_prompt_text", return_value="❯ "), \
+             patch("prompt_toolkit.application.get_app", return_value=mock_app):
+            assert _input_height() == 2
+
+    def test_input_height_uses_prompt_toolkit_width_over_shutil(self):
+        cli_obj = _make_cli()
+
+        class _Doc:
+            lines = ["你" * 10]
+
+        class _Buffer:
+            document = _Doc()
+
+        input_area = SimpleNamespace(buffer=_Buffer())
+
+        def _input_height():
+            try:
+                from prompt_toolkit.application import get_app
+                from prompt_toolkit.utils import get_cwidth
+
+                doc = input_area.buffer.document
+                prompt_width = max(2, get_cwidth(cli_obj._get_tui_prompt_text()))
+                try:
+                    available_width = get_app().output.get_size().columns - prompt_width
+                except Exception:
+                    import shutil
+                    available_width = shutil.get_terminal_size((80, 24)).columns - prompt_width
+                if available_width < 10:
+                    available_width = 40
+                visual_lines = 0
+                for line in doc.lines:
+                    line_width = get_cwidth(line)
+                    if line_width <= 0:
+                        visual_lines += 1
+                    else:
+                        visual_lines += max(1, -(-line_width // available_width))
+                return min(max(visual_lines, 1), 8)
+            except Exception:
+                return 1
+
+        mock_app = MagicMock()
+        mock_app.output.get_size.return_value = MagicMock(columns=14)
+        with patch.object(HermesCLI, "_get_tui_prompt_text", return_value="❯ "), \
+             patch("prompt_toolkit.application.get_app", return_value=mock_app), \
+             patch("shutil.get_terminal_size") as mock_shutil:
+            assert _input_height() == 2
+        mock_shutil.assert_not_called()
+
     def test_build_status_bar_text_no_cost_in_status_bar(self):
         cli_obj = _attach_agent(
             _make_cli(),

From abd24d381bcb0f1dc5a91b76d10c51998c8cbbd6 Mon Sep 17 00:00:00 2001
From: Ruzzgar <ruzzgarcn@gmail.com>
Date: Mon, 6 Apr 2026 20:50:38 +0300
Subject: [PATCH 023/154] Implement comprehensive browser path discovery for
 Windows

---
 cli.py                            | 77 +++++++++++++++++++++++--------
 tests/test_cli_browser_connect.py | 43 +++++++++++++++++
 2 files changed, 101 insertions(+), 19 deletions(-)
 create mode 100644 tests/test_cli_browser_connect.py

diff --git a/cli.py b/cli.py
index ed9e0813..29e6257d 100644
--- a/cli.py
+++ b/cli.py
@@ -120,6 +120,63 @@ def _parse_reasoning_config(effort: str) -> dict | None:
     return result
 
 
+def _get_chrome_debug_candidates(system: str) -> list[str]:
+    """Return likely browser executables for local CDP auto-launch."""
+    candidates: list[str] = []
+    seen: set[str] = set()
+
+    def _add_candidate(path: str | None) -> None:
+        if not path:
+            return
+        normalized = os.path.normcase(os.path.normpath(path))
+        if normalized in seen:
+            return
+        if os.path.isfile(path):
+            candidates.append(path)
+            seen.add(normalized)
+
+    def _add_from_path(*names: str) -> None:
+        for name in names:
+            _add_candidate(shutil.which(name))
+
+    if system == "Darwin":
+        for app in (
+            "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
+            "/Applications/Chromium.app/Contents/MacOS/Chromium",
+            "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
+            "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
+        ):
+            _add_candidate(app)
+    elif system == "Windows":
+        _add_from_path(
+            "chrome.exe", "msedge.exe", "brave.exe", "chromium.exe",
+            "chrome", "msedge", "brave", "chromium",
+        )
+
+        for base in (
+            os.environ.get("ProgramFiles"),
+            os.environ.get("ProgramFiles(x86)"),
+            os.environ.get("LOCALAPPDATA"),
+        ):
+            if not base:
+                continue
+            for parts in (
+                ("Google", "Chrome", "Application", "chrome.exe"),
+                ("Chromium", "Application", "chrome.exe"),
+                ("Chromium", "Application", "chromium.exe"),
+                ("BraveSoftware", "Brave-Browser", "Application", "brave.exe"),
+                ("Microsoft", "Edge", "Application", "msedge.exe"),
+            ):
+                _add_candidate(os.path.join(base, *parts))
+    else:
+        _add_from_path(
+            "google-chrome", "google-chrome-stable", "chromium-browser",
+            "chromium", "brave-browser", "microsoft-edge",
+        )
+
+    return candidates
+
+
 def load_cli_config() -> Dict[str, Any]:
     """
     Load CLI configuration from config files.
@@ -4838,27 +4895,9 @@ class HermesCLI:
 
         Returns True if a launch command was executed (doesn't guarantee success).
         """
-        import shutil
         import subprocess as _sp
 
-        candidates = []
-        if system == "Darwin":
-            # macOS: try common app bundle locations
-            for app in (
-                "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
-                "/Applications/Chromium.app/Contents/MacOS/Chromium",
-                "/Applications/Brave Browser.app/Contents/MacOS/Brave Browser",
-                "/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
-            ):
-                if os.path.isfile(app):
-                    candidates.append(app)
-        else:
-            # Linux: try common binary names
-            for name in ("google-chrome", "google-chrome-stable", "chromium-browser",
-                         "chromium", "brave-browser", "microsoft-edge"):
-                path = shutil.which(name)
-                if path:
-                    candidates.append(path)
+        candidates = _get_chrome_debug_candidates(system)
 
         if not candidates:
             return False
diff --git a/tests/test_cli_browser_connect.py b/tests/test_cli_browser_connect.py
new file mode 100644
index 00000000..a913d96f
--- /dev/null
+++ b/tests/test_cli_browser_connect.py
@@ -0,0 +1,43 @@
+"""Tests for CLI browser CDP auto-launch helpers."""
+
+from unittest.mock import patch
+
+from cli import HermesCLI
+
+
+class TestChromeDebugLaunch:
+    def test_windows_launch_uses_browser_found_on_path(self):
+        captured = {}
+
+        def fake_popen(cmd, **kwargs):
+            captured["cmd"] = cmd
+            captured["kwargs"] = kwargs
+            return object()
+
+        with patch("cli.shutil.which", side_effect=lambda name: r"C:\Chrome\chrome.exe" if name == "chrome.exe" else None), \
+             patch("cli.os.path.isfile", side_effect=lambda path: path == r"C:\Chrome\chrome.exe"), \
+             patch("subprocess.Popen", side_effect=fake_popen):
+            assert HermesCLI._try_launch_chrome_debug(9333, "Windows") is True
+
+        assert captured["cmd"] == [r"C:\Chrome\chrome.exe", "--remote-debugging-port=9333"]
+        assert captured["kwargs"]["start_new_session"] is True
+
+    def test_windows_launch_falls_back_to_common_install_dirs(self, monkeypatch):
+        captured = {}
+        installed = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
+
+        def fake_popen(cmd, **kwargs):
+            captured["cmd"] = cmd
+            captured["kwargs"] = kwargs
+            return object()
+
+        monkeypatch.setenv("ProgramFiles", r"C:\Program Files")
+        monkeypatch.delenv("ProgramFiles(x86)", raising=False)
+        monkeypatch.delenv("LOCALAPPDATA", raising=False)
+
+        with patch("cli.shutil.which", return_value=None), \
+             patch("cli.os.path.isfile", side_effect=lambda path: path == installed), \
+             patch("subprocess.Popen", side_effect=fake_popen):
+            assert HermesCLI._try_launch_chrome_debug(9222, "Windows") is True
+
+        assert captured["cmd"] == [installed, "--remote-debugging-port=9222"]

From 7cf4bd06bfad5a51ea1e381fd8994defcb7632a0 Mon Sep 17 00:00:00 2001
From: jtuki <jtuki@foxmail.com>
Date: Sun, 5 Apr 2026 21:56:58 +0800
Subject: [PATCH 024/154] fix(gateway): fix Feishu reconnect message drops and
 shutdown hang

This commit fixes two critical bugs in the Feishu adapter that affect
message reliability and process lifecycle.

**Bug Fix 1: Intermittent Message Drops**

Root cause: Event handler was created once in __init__ and reused across
reconnects, causing callbacks to capture stale loop references. When the
adapter disconnected and reconnected, old callbacks continued firing with
invalid loop references, resulting in dropped messages with warnings:
"[Feishu] Dropping inbound message before adapter loop is ready"

Fix:
- Rebuild event handler on each connect (websocket/webhook)
- Clear handler on disconnect
- Ensure callbacks always capture current valid loop
- Add defensive loop.is_closed() checks with getattr for test compatibility
- Unify webhook dispatch path to use same loop checks as websocket mode

**Bug Fix 2: Process Hangs on Ctrl+C / SIGTERM**

Root cause: Feishu SDK's websocket client runs in a background thread with
an infinite _select() loop that never exits naturally. The thread was never
properly joined on disconnect, causing processes to hang indefinitely after
Ctrl+C or gateway stop commands.

Fix:
- Store reference to thread-local event loop (_ws_thread_loop)
- On disconnect, cancel all tasks in thread loop and stop it gracefully
  via call_soon_threadsafe()
- Await thread future with 10s timeout
- Clean up pending tasks in thread's finally block before closing loop
- Add detailed debug logging for disconnect flow

**Additional Improvements:**
- Add regression tests for disconnect cleanup and webhook dispatch
- Ensure all event callbacks check loop readiness before dispatching

Tested on Linux with websocket mode. All Feishu tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/feishu.py  | 87 +++++++++++++++++++++++++++++++-----
 tests/gateway/test_feishu.py | 63 ++++++++++++++++++++++++--
 2 files changed, 134 insertions(+), 16 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index bee8b01d..de4d97e6 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -913,14 +913,33 @@ def _unique_lines(lines: List[str]) -> List[str]:
     return unique
 
 
-def _run_official_feishu_ws_client(ws_client: Any) -> None:
+def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
     """Run the official Lark WS client in its own thread-local event loop."""
     import lark_oapi.ws.client as ws_client_module
 
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     ws_client_module.loop = loop
-    ws_client.start()
+    adapter._ws_thread_loop = loop
+    try:
+        ws_client.start()
+    except Exception:
+        pass
+    finally:
+        pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
+        for task in pending:
+            task.cancel()
+        if pending:
+            loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
+        try:
+            loop.stop()
+        except Exception:
+            pass
+        try:
+            loop.close()
+        except Exception:
+            pass
+        adapter._ws_thread_loop = None
 
 
 def check_feishu_requirements() -> bool:
@@ -945,10 +964,11 @@ class FeishuAdapter(BasePlatformAdapter):
         self._client: Optional[Any] = None
         self._ws_client: Optional[Any] = None
         self._ws_future: Optional[asyncio.Future] = None
+        self._ws_thread_loop: Optional[asyncio.AbstractEventLoop] = None
         self._loop: Optional[asyncio.AbstractEventLoop] = None
         self._webhook_runner: Optional[Any] = None
         self._webhook_site: Optional[Any] = None
-        self._event_handler = self._build_event_handler()
+        self._event_handler: Optional[Any] = None
         self._seen_message_ids: Dict[str, float] = {}  # message_id → seen_at (time.time())
         self._seen_message_order: List[str] = []
         self._dedup_state_path = get_hermes_home() / "feishu_seen_message_ids.json"
@@ -1116,8 +1136,37 @@ class FeishuAdapter(BasePlatformAdapter):
         self._reset_batch_buffers()
         self._disable_websocket_auto_reconnect()
         await self._stop_webhook_server()
+
+        ws_thread_loop = self._ws_thread_loop
+        if ws_thread_loop is not None and not ws_thread_loop.is_closed():
+            logger.debug("[Feishu] Cancelling websocket thread tasks and stopping loop")
+
+            def cancel_all_tasks() -> None:
+                tasks = [t for t in asyncio.all_tasks(ws_thread_loop) if not t.done()]
+                logger.debug("[Feishu] Found %d pending tasks in websocket thread", len(tasks))
+                for task in tasks:
+                    task.cancel()
+                ws_thread_loop.call_later(0.1, ws_thread_loop.stop)
+
+            ws_thread_loop.call_soon_threadsafe(cancel_all_tasks)
+
+        ws_future = self._ws_future
+        if ws_future is not None:
+            try:
+                logger.debug("[Feishu] Waiting for websocket thread to exit (timeout=10s)")
+                await asyncio.wait_for(asyncio.shield(ws_future), timeout=10.0)
+                logger.debug("[Feishu] Websocket thread exited cleanly")
+            except asyncio.TimeoutError:
+                logger.warning("[Feishu] Websocket thread did not exit within 10s - may be stuck")
+            except asyncio.CancelledError:
+                logger.debug("[Feishu] Websocket thread cancelled during disconnect")
+            except Exception as exc:
+                logger.debug("[Feishu] Websocket thread exited with error: %s", exc, exc_info=True)
+
         self._ws_future = None
+        self._ws_thread_loop = None
         self._loop = None
+        self._event_handler = None
         self._persist_seen_message_ids()
         await self._release_app_lock()
 
@@ -1476,12 +1525,13 @@ class FeishuAdapter(BasePlatformAdapter):
 
     def _on_message_event(self, data: Any) -> None:
         """Normalize Feishu inbound events into MessageEvent."""
-        if self._loop is None:
+        loop = self._loop
+        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
             logger.warning("[Feishu] Dropping inbound message before adapter loop is ready")
             return
         future = asyncio.run_coroutine_threadsafe(
             self._handle_message_event_data(data),
-            self._loop,
+            loop,
         )
         future.add_done_callback(self._log_background_failure)
 
@@ -1553,27 +1603,30 @@ class FeishuAdapter(BasePlatformAdapter):
         )
         # Only process reactions from real users. Ignore app/bot-generated reactions
         # and Hermes' own ACK emoji to avoid feedback loops.
+        loop = self._loop
         if (
             operator_type in {"bot", "app"}
             or emoji_type == _FEISHU_ACK_EMOJI
             or not message_id
-            or self._loop is None
+            or loop is None
+            or bool(getattr(loop, "is_closed", lambda: False)())
         ):
             return
         future = asyncio.run_coroutine_threadsafe(
             self._handle_reaction_event(event_type, data),
-            self._loop,
+            loop,
         )
         future.add_done_callback(self._log_background_failure)
 
     def _on_card_action_trigger(self, data: Any) -> Any:
         """Schedule Feishu card actions on the adapter loop and acknowledge immediately."""
-        if self._loop is None:
+        loop = self._loop
+        if loop is None or bool(getattr(loop, "is_closed", lambda: False)()):
             logger.warning("[Feishu] Dropping card action before adapter loop is ready")
         else:
             future = asyncio.run_coroutine_threadsafe(
                 self._handle_card_action_event(data),
-                self._loop,
+                loop,
             )
             future.add_done_callback(self._log_background_failure)
         if P2CardActionTriggerResponse is None:
@@ -2083,7 +2136,7 @@ class FeishuAdapter(BasePlatformAdapter):
         event_type = str((payload.get("header") or {}).get("event_type") or "")
         data = self._namespace_from_mapping(payload)
         if event_type == "im.message.receive_v1":
-            await self._handle_message_event_data(data)
+            self._on_message_event(data)
         elif event_type == "im.message.message_read_v1":
             self._on_message_read_event(data)
         elif event_type == "im.chat.member.bot.added_v1":
@@ -2093,7 +2146,7 @@ class FeishuAdapter(BasePlatformAdapter):
         elif event_type in ("im.message.reaction.created_v1", "im.message.reaction.deleted_v1"):
             self._on_reaction_event(event_type, data)
         elif event_type == "card.action.trigger":
-            asyncio.ensure_future(self._handle_card_action_event(data))
+            self._on_card_action_trigger(data)
         else:
             logger.debug("[Feishu] Ignoring webhook event type: %s", event_type or "unknown")
         return web.json_response({"code": 0, "msg": "ok"})
@@ -2965,6 +3018,12 @@ class FeishuAdapter(BasePlatformAdapter):
             raise RuntimeError("websockets not installed; websocket mode unavailable")
         domain = FEISHU_DOMAIN if self._domain_name != "lark" else LARK_DOMAIN
         self._client = self._build_lark_client(domain)
+        self._event_handler = self._build_event_handler()
+        if self._event_handler is None:
+            raise RuntimeError("failed to build Feishu event handler")
+        loop = self._loop
+        if loop is None or loop.is_closed():
+            raise RuntimeError("adapter loop is not ready")
         await self._hydrate_bot_identity()
         self._ws_client = FeishuWSClient(
             app_id=self._app_id,
@@ -2973,10 +3032,11 @@ class FeishuAdapter(BasePlatformAdapter):
             event_handler=self._event_handler,
             domain=domain,
         )
-        self._ws_future = self._loop.run_in_executor(
+        self._ws_future = loop.run_in_executor(
             None,
             _run_official_feishu_ws_client,
             self._ws_client,
+            self,
         )
 
     async def _connect_webhook(self) -> None:
@@ -2984,6 +3044,9 @@ class FeishuAdapter(BasePlatformAdapter):
             raise RuntimeError("aiohttp not installed; webhook mode unavailable")
         domain = FEISHU_DOMAIN if self._domain_name != "lark" else LARK_DOMAIN
         self._client = self._build_lark_client(domain)
+        self._event_handler = self._build_event_handler()
+        if self._event_handler is None:
+            raise RuntimeError("failed to build Feishu event handler")
         await self._hydrate_bot_identity()
         app = web.Application()
         app.router.add_post(self._webhook_path, self._handle_webhook_request)
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 5344cda5..41f92d60 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -8,7 +8,7 @@ import time
 import unittest
 from pathlib import Path
 from types import SimpleNamespace
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, Mock, patch
 
 try:
     import lark_oapi
@@ -289,7 +289,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True),
             patch("gateway.platforms.feishu.FEISHU_WEBSOCKET_AVAILABLE", True),
             patch("gateway.platforms.feishu.lark", SimpleNamespace(LogLevel=SimpleNamespace(INFO="INFO", WARNING="WARNING"))),
-            patch("gateway.platforms.feishu.EventDispatcherHandler", object()),
+            patch("gateway.platforms.feishu.EventDispatcherHandler") as mock_handler_class,
             patch("gateway.platforms.feishu.FeishuWSClient", return_value=ws_client),
             patch("gateway.platforms.feishu._run_official_feishu_ws_client"),
             patch("gateway.platforms.feishu.acquire_scoped_lock", return_value=(True, None)) as acquire_lock,
@@ -297,6 +297,15 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch.object(adapter, "_hydrate_bot_identity", new=AsyncMock()),
             patch.object(adapter, "_build_lark_client", return_value=SimpleNamespace()),
         ):
+            mock_builder = Mock()
+            mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
+            mock_builder.build = Mock(return_value=object())
+            mock_handler_class.builder = Mock(return_value=mock_builder)
+
             loop = asyncio.new_event_loop()
             future = loop.create_future()
             future.set_result(None)
@@ -305,6 +314,9 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
                 def run_in_executor(self, *_args, **_kwargs):
                     return future
 
+                def is_closed(self):
+                    return False
+
             try:
                 with patch("gateway.platforms.feishu.asyncio.get_running_loop", return_value=_Loop()):
                     connected = asyncio.run(adapter.connect())
@@ -313,6 +325,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
                 loop.close()
 
         self.assertTrue(connected)
+        self.assertIsNone(adapter._event_handler)
         acquire_lock.assert_called_once_with(
             "feishu-app-id",
             "cli_app",
@@ -361,7 +374,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True),
             patch("gateway.platforms.feishu.FEISHU_WEBSOCKET_AVAILABLE", True),
             patch("gateway.platforms.feishu.lark", SimpleNamespace(LogLevel=SimpleNamespace(INFO="INFO", WARNING="WARNING"))),
-            patch("gateway.platforms.feishu.EventDispatcherHandler", object()),
+            patch("gateway.platforms.feishu.EventDispatcherHandler") as mock_handler_class,
             patch("gateway.platforms.feishu.FeishuWSClient", return_value=ws_client),
             patch("gateway.platforms.feishu.acquire_scoped_lock", return_value=(True, None)),
             patch("gateway.platforms.feishu.release_scoped_lock"),
@@ -369,6 +382,15 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch("gateway.platforms.feishu.asyncio.sleep", side_effect=lambda delay: sleeps.append(delay)),
             patch.object(adapter, "_build_lark_client", return_value=SimpleNamespace()),
         ):
+            mock_builder = Mock()
+            mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
+            mock_builder.build = Mock(return_value=object())
+            mock_handler_class.builder = Mock(return_value=mock_builder)
+
             loop = asyncio.new_event_loop()
             future = loop.create_future()
             future.set_result(None)
@@ -383,6 +405,9 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
                         raise OSError("temporary websocket failure")
                     return future
 
+                def is_closed(self):
+                    return False
+
             fake_loop = _Loop()
             try:
                 with patch("gateway.platforms.feishu.asyncio.get_running_loop", return_value=fake_loop):
@@ -1196,7 +1221,12 @@ class TestAdapterBehavior(unittest.TestCase):
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
-        adapter._loop = object()
+
+        class _Loop:
+            def is_closed(self):
+                return False
+
+        adapter._loop = _Loop()
 
         message = SimpleNamespace(
             message_id="om_text",
@@ -1210,6 +1240,7 @@ class TestAdapterBehavior(unittest.TestCase):
         data = SimpleNamespace(event=SimpleNamespace(message=message, sender=sender))
 
         future = SimpleNamespace(add_done_callback=lambda *_args, **_kwargs: None)
+
         def _submit(coro, _loop):
             coro.close()
             return future
@@ -1219,6 +1250,30 @@ class TestAdapterBehavior(unittest.TestCase):
 
         self.assertTrue(submit.called)
 
+    @patch.dict(os.environ, {}, clear=True)
+    def test_webhook_request_uses_same_message_dispatch_path(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        adapter._on_message_event = Mock()
+
+        body = json.dumps({
+            "header": {"event_type": "im.message.receive_v1"},
+            "event": {"message": {"message_id": "om_test"}},
+        }).encode("utf-8")
+        request = SimpleNamespace(
+            remote="127.0.0.1",
+            content_length=None,
+            headers={},
+            read=AsyncMock(return_value=body),
+        )
+
+        response = asyncio.run(adapter._handle_webhook_request(request))
+
+        self.assertEqual(response.status, 200)
+        adapter._on_message_event.assert_called_once()
+
     @patch.dict(os.environ, {}, clear=True)
     def test_process_inbound_message_uses_event_sender_identity_only(self):
         from gateway.config import PlatformConfig

From 7d0bf151213a09d65f6bbece5f741955c2f6519d Mon Sep 17 00:00:00 2001
From: jtuki <jtuki@foxmail.com>
Date: Sun, 5 Apr 2026 22:45:49 +0800
Subject: [PATCH 025/154] feat(gateway): add configurable Feishu websocket
 reconnect timing

Allow users to configure websocket reconnect behavior via platform extra
config to reduce reconnect latency in production environments.

The official Feishu SDK defaults to:
- First reconnect: random jitter 0-30 seconds
- Subsequent retries: 120 second intervals

This can cause 20-30 second delays before reconnection after network
interruptions. This commit makes these values configurable while keeping
the SDK defaults for backward compatibility.

Configuration via ~/.hermes/config.yaml:
```yaml
platforms:
  feishu:
    extra:
      ws_reconnect_nonce: 0        # Disable first-reconnect jitter (default: 30)
      ws_reconnect_interval: 3     # Retry every 3 seconds (default: 120)
```

Invalid values (negative numbers, non-integers) fall back to SDK defaults.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/feishu.py  | 27 +++++++++++++++++++++++++++
 tests/gateway/test_feishu.py | 30 ++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index de4d97e6..30483932 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -270,6 +270,8 @@ class FeishuAdapterSettings:
     webhook_host: str
     webhook_port: int
     webhook_path: str
+    ws_reconnect_nonce: int = 30
+    ws_reconnect_interval: int = 120
 
 
 @dataclass
@@ -358,6 +360,22 @@ def _strip_markdown_to_plain_text(text: str) -> str:
     return plain.strip()
 
 
+def _coerce_non_negative_int(value: Any, default: int) -> int:
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        return default
+    return parsed if parsed >= 0 else default
+
+
+def _coerce_positive_int(value: Any, default: int) -> int:
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        return default
+    return parsed if parsed >= 1 else default
+
+
 # ---------------------------------------------------------------------------
 # Post payload builders and parsers
 # ---------------------------------------------------------------------------
@@ -1040,6 +1058,8 @@ class FeishuAdapter(BasePlatformAdapter):
                 str(extra.get("webhook_path") or os.getenv("FEISHU_WEBHOOK_PATH", _DEFAULT_WEBHOOK_PATH)).strip()
                 or _DEFAULT_WEBHOOK_PATH
             ),
+            ws_reconnect_nonce=_coerce_non_negative_int(extra.get("ws_reconnect_nonce"), 30),
+            ws_reconnect_interval=_coerce_positive_int(extra.get("ws_reconnect_interval"), 120),
         )
 
     def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1062,6 +1082,8 @@ class FeishuAdapter(BasePlatformAdapter):
         self._webhook_host = settings.webhook_host
         self._webhook_port = settings.webhook_port
         self._webhook_path = settings.webhook_path
+        self._ws_reconnect_nonce = settings.ws_reconnect_nonce
+        self._ws_reconnect_interval = settings.ws_reconnect_interval
 
     def _build_event_handler(self) -> Any:
         if EventDispatcherHandler is None:
@@ -3032,6 +3054,11 @@ class FeishuAdapter(BasePlatformAdapter):
             event_handler=self._event_handler,
             domain=domain,
         )
+        try:
+            setattr(self._ws_client, "_reconnect_nonce", self._ws_reconnect_nonce)
+            setattr(self._ws_client, "_reconnect_interval", self._ws_reconnect_interval)
+        except Exception:
+            logger.debug("[Feishu] Failed to override websocket reconnect settings", exc_info=True)
         self._ws_future = loop.run_in_executor(
             None,
             _run_official_feishu_ws_client,
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 41f92d60..db532ba7 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -283,7 +283,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
-        ws_client = object()
+        ws_client = SimpleNamespace()
 
         with (
             patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True),
@@ -367,7 +367,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
-        ws_client = object()
+        ws_client = SimpleNamespace()
         sleeps = []
 
         with (
@@ -561,6 +561,32 @@ class TestAdapterModule(unittest.TestCase):
         self.assertIn("register_p2_im_message_reaction_deleted_v1", source)
         self.assertIn("register_p2_card_action_trigger", source)
 
+    def test_load_settings_uses_sdk_defaults_for_invalid_ws_reconnect_values(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        settings = FeishuAdapter._load_settings(
+            {
+                "ws_reconnect_nonce": -1,
+                "ws_reconnect_interval": "bad",
+            }
+        )
+
+        self.assertEqual(settings.ws_reconnect_nonce, 30)
+        self.assertEqual(settings.ws_reconnect_interval, 120)
+
+    def test_load_settings_accepts_custom_ws_reconnect_values(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        settings = FeishuAdapter._load_settings(
+            {
+                "ws_reconnect_nonce": 0,
+                "ws_reconnect_interval": 3,
+            }
+        )
+
+        self.assertEqual(settings.ws_reconnect_nonce, 0)
+        self.assertEqual(settings.ws_reconnect_interval, 3)
+
 
 class TestAdapterBehavior(unittest.TestCase):
     @patch.dict(os.environ, {}, clear=True)

From ea31d9077c1109f94fde5b97cd92e7920f5a3d18 Mon Sep 17 00:00:00 2001
From: jtuki <jtuki@foxmail.com>
Date: Sun, 5 Apr 2026 23:23:07 +0800
Subject: [PATCH 026/154] feat(gateway): add Feishu websocket ping timing
 overrides

Allow Feishu websocket keepalive timing to be configured via platform
extra config so disconnects can be detected faster in unstable networks.

New optional extra settings:
- ws_ping_interval
- ws_ping_timeout

These values are applied only when explicitly configured. Invalid values
fall back to the websocket library defaults by leaving the options unset.

This complements the reconnect timing settings added previously and helps
reduce total recovery time after network interruptions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/feishu.py  | 26 ++++++++++++++++++++++++++
 tests/gateway/test_feishu.py | 26 ++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 30483932..7fa6e3a9 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -272,6 +272,8 @@ class FeishuAdapterSettings:
     webhook_path: str
     ws_reconnect_nonce: int = 30
     ws_reconnect_interval: int = 120
+    ws_ping_interval: Optional[int] = None
+    ws_ping_timeout: Optional[int] = None
 
 
 @dataclass
@@ -376,6 +378,14 @@ def _coerce_positive_int(value: Any, default: int) -> int:
     return parsed if parsed >= 1 else default
 
 
+def _coerce_optional_positive_int(value: Any) -> Optional[int]:
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        return None
+    return parsed if parsed >= 1 else None
+
+
 # ---------------------------------------------------------------------------
 # Post payload builders and parsers
 # ---------------------------------------------------------------------------
@@ -939,11 +949,23 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
     asyncio.set_event_loop(loop)
     ws_client_module.loop = loop
     adapter._ws_thread_loop = loop
+
+    original_connect = ws_client_module.websockets.connect
+
+    async def _connect_with_overrides(*args: Any, **kwargs: Any) -> Any:
+        if adapter._ws_ping_interval is not None and "ping_interval" not in kwargs:
+            kwargs["ping_interval"] = adapter._ws_ping_interval
+        if adapter._ws_ping_timeout is not None and "ping_timeout" not in kwargs:
+            kwargs["ping_timeout"] = adapter._ws_ping_timeout
+        return await original_connect(*args, **kwargs)
+
+    ws_client_module.websockets.connect = _connect_with_overrides
     try:
         ws_client.start()
     except Exception:
         pass
     finally:
+        ws_client_module.websockets.connect = original_connect
         pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
         for task in pending:
             task.cancel()
@@ -1060,6 +1082,8 @@ class FeishuAdapter(BasePlatformAdapter):
             ),
             ws_reconnect_nonce=_coerce_non_negative_int(extra.get("ws_reconnect_nonce"), 30),
             ws_reconnect_interval=_coerce_positive_int(extra.get("ws_reconnect_interval"), 120),
+            ws_ping_interval=_coerce_optional_positive_int(extra.get("ws_ping_interval")),
+            ws_ping_timeout=_coerce_optional_positive_int(extra.get("ws_ping_timeout")),
         )
 
     def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1084,6 +1108,8 @@ class FeishuAdapter(BasePlatformAdapter):
         self._webhook_path = settings.webhook_path
         self._ws_reconnect_nonce = settings.ws_reconnect_nonce
         self._ws_reconnect_interval = settings.ws_reconnect_interval
+        self._ws_ping_interval = settings.ws_ping_interval
+        self._ws_ping_timeout = settings.ws_ping_timeout
 
     def _build_event_handler(self) -> Any:
         if EventDispatcherHandler is None:
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index db532ba7..64ee3697 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -587,6 +587,32 @@ class TestAdapterModule(unittest.TestCase):
         self.assertEqual(settings.ws_reconnect_nonce, 0)
         self.assertEqual(settings.ws_reconnect_interval, 3)
 
+    def test_load_settings_accepts_custom_ws_ping_values(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        settings = FeishuAdapter._load_settings(
+            {
+                "ws_ping_interval": 10,
+                "ws_ping_timeout": 8,
+            }
+        )
+
+        self.assertEqual(settings.ws_ping_interval, 10)
+        self.assertEqual(settings.ws_ping_timeout, 8)
+
+    def test_load_settings_ignores_invalid_ws_ping_values(self):
+        from gateway.platforms.feishu import FeishuAdapter
+
+        settings = FeishuAdapter._load_settings(
+            {
+                "ws_ping_interval": 0,
+                "ws_ping_timeout": -1,
+            }
+        )
+
+        self.assertIsNone(settings.ws_ping_interval)
+        self.assertIsNone(settings.ws_ping_timeout)
+
 
 class TestAdapterBehavior(unittest.TestCase):
     @patch.dict(os.environ, {}, clear=True)

From 157d6184e3ac51463401725d92216a9a68040a49 Mon Sep 17 00:00:00 2001
From: jtuki <jtuki@foxmail.com>
Date: Mon, 6 Apr 2026 00:40:10 +0800
Subject: [PATCH 027/154] fix(gateway): make Feishu websocket overrides
 effective at runtime

Reapply local reconnect and ping settings after the Feishu SDK refreshes its client config so user-provided websocket tuning actually takes effect.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/feishu.py  | 25 +++++++++++---
 tests/gateway/test_feishu.py | 64 ++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 7fa6e3a9..6b43ea55 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -951,6 +951,16 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
     adapter._ws_thread_loop = loop
 
     original_connect = ws_client_module.websockets.connect
+    original_configure = getattr(ws_client, "_configure", None)
+
+    def _apply_runtime_ws_overrides() -> None:
+        try:
+            setattr(ws_client, "_reconnect_nonce", adapter._ws_reconnect_nonce)
+            setattr(ws_client, "_reconnect_interval", adapter._ws_reconnect_interval)
+            if adapter._ws_ping_interval is not None:
+                setattr(ws_client, "_ping_interval", adapter._ws_ping_interval)
+        except Exception:
+            logger.debug("[Feishu] Failed to apply websocket runtime overrides", exc_info=True)
 
     async def _connect_with_overrides(*args: Any, **kwargs: Any) -> Any:
         if adapter._ws_ping_interval is not None and "ping_interval" not in kwargs:
@@ -959,13 +969,23 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
             kwargs["ping_timeout"] = adapter._ws_ping_timeout
         return await original_connect(*args, **kwargs)
 
+    def _configure_with_overrides(conf: Any) -> Any:
+        result = original_configure(conf)
+        _apply_runtime_ws_overrides()
+        return result
+
     ws_client_module.websockets.connect = _connect_with_overrides
+    if callable(original_configure):
+        setattr(ws_client, "_configure", _configure_with_overrides)
+    _apply_runtime_ws_overrides()
     try:
         ws_client.start()
     except Exception:
         pass
     finally:
         ws_client_module.websockets.connect = original_connect
+        if callable(original_configure):
+            setattr(ws_client, "_configure", original_configure)
         pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
         for task in pending:
             task.cancel()
@@ -3080,11 +3100,6 @@ class FeishuAdapter(BasePlatformAdapter):
             event_handler=self._event_handler,
             domain=domain,
         )
-        try:
-            setattr(self._ws_client, "_reconnect_nonce", self._ws_reconnect_nonce)
-            setattr(self._ws_client, "_reconnect_interval", self._ws_reconnect_interval)
-        except Exception:
-            logger.debug("[Feishu] Failed to override websocket reconnect settings", exc_info=True)
         self._ws_future = loop.run_in_executor(
             None,
             _run_official_feishu_ws_client,
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 64ee3697..33ed55d3 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -262,12 +262,21 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
         with (
             patch("gateway.platforms.feishu.FEISHU_AVAILABLE", True),
             patch("gateway.platforms.feishu.FEISHU_WEBHOOK_AVAILABLE", True),
+            patch("gateway.platforms.feishu.EventDispatcherHandler") as mock_handler_class,
             patch("gateway.platforms.feishu.acquire_scoped_lock", return_value=(True, None)),
             patch("gateway.platforms.feishu.release_scoped_lock"),
             patch.object(adapter, "_hydrate_bot_identity", new=AsyncMock()),
             patch.object(adapter, "_build_lark_client", return_value=SimpleNamespace()),
             patch("gateway.platforms.feishu.web", web_module),
         ):
+            mock_builder = Mock()
+            mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
+            mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
+            mock_builder.build = Mock(return_value=object())
+            mock_handler_class.builder = Mock(return_value=mock_builder)
             connected = asyncio.run(adapter.connect())
 
         self.assertTrue(connected)
@@ -613,6 +622,61 @@ class TestAdapterModule(unittest.TestCase):
         self.assertIsNone(settings.ws_ping_interval)
         self.assertIsNone(settings.ws_ping_timeout)
 
+    def test_runtime_ws_overrides_reapply_after_sdk_configure(self):
+        import sys
+        from types import ModuleType
+
+        class _FakeWSClient:
+            def __init__(self):
+                self._reconnect_nonce = 30
+                self._reconnect_interval = 120
+                self._ping_interval = 120
+                self.configure_calls = []
+
+            def _configure(self, conf):
+                self.configure_calls.append(conf)
+                self._reconnect_nonce = conf.ReconnectNonce
+                self._reconnect_interval = conf.ReconnectInterval
+                self._ping_interval = conf.PingInterval
+
+            def start(self):
+                conf = SimpleNamespace(ReconnectNonce=99, ReconnectInterval=88, PingInterval=77)
+                self._configure(conf)
+                raise RuntimeError("stop test client")
+
+        fake_client = _FakeWSClient()
+        fake_adapter = SimpleNamespace(
+            _ws_thread_loop=None,
+            _ws_reconnect_nonce=2,
+            _ws_reconnect_interval=3,
+            _ws_ping_interval=4,
+            _ws_ping_timeout=5,
+        )
+        fake_client_module = ModuleType("lark_oapi.ws.client")
+        fake_client_module.loop = None
+        fake_client_module.websockets = SimpleNamespace(connect=AsyncMock())
+        fake_ws_module = ModuleType("lark_oapi.ws")
+        fake_ws_module.client = fake_client_module
+        fake_root_module = ModuleType("lark_oapi")
+        fake_root_module.ws = fake_ws_module
+
+        original_modules = sys.modules.copy()
+        sys.modules["lark_oapi"] = fake_root_module
+        sys.modules["lark_oapi.ws"] = fake_ws_module
+        sys.modules["lark_oapi.ws.client"] = fake_client_module
+        try:
+            from gateway.platforms.feishu import _run_official_feishu_ws_client
+
+            _run_official_feishu_ws_client(fake_client, fake_adapter)
+        finally:
+            sys.modules.clear()
+            sys.modules.update(original_modules)
+
+        self.assertEqual(len(fake_client.configure_calls), 1)
+        self.assertEqual(fake_client._reconnect_nonce, 2)
+        self.assertEqual(fake_client._reconnect_interval, 3)
+        self.assertEqual(fake_client._ping_interval, 4)
+
 
 class TestAdapterBehavior(unittest.TestCase):
     @patch.dict(os.environ, {}, clear=True)

From 18727ca9aae48c5a4caeb1ab113bd7413ad81c53 Mon Sep 17 00:00:00 2001
From: jtuki <jtuki@foxmail.com>
Date: Mon, 6 Apr 2026 01:04:39 +0800
Subject: [PATCH 028/154] refactor(gateway): simplify Feishu websocket config
 helpers

Consolidate coercion functions, extract loop readiness check, and deduplicate test mock setup to improve maintainability without changing behavior.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/feishu.py  | 35 ++++++++++++++------------------
 tests/gateway/test_feishu.py | 39 ++++++++++++++----------------------
 2 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 6b43ea55..e39290b7 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -362,28 +362,22 @@ def _strip_markdown_to_plain_text(text: str) -> str:
     return plain.strip()
 
 
-def _coerce_non_negative_int(value: Any, default: int) -> int:
+def _coerce_int(value: Any, default: Optional[int] = None, min_value: int = 0) -> Optional[int]:
+    """Coerce value to int with optional default and minimum constraint."""
     try:
         parsed = int(value)
     except (TypeError, ValueError):
         return default
-    return parsed if parsed >= 0 else default
+    return parsed if parsed >= min_value else default
 
 
-def _coerce_positive_int(value: Any, default: int) -> int:
-    try:
-        parsed = int(value)
-    except (TypeError, ValueError):
-        return default
-    return parsed if parsed >= 1 else default
+def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
+    parsed = _coerce_int(value, default=default, min_value=min_value)
+    return default if parsed is None else parsed
 
 
-def _coerce_optional_positive_int(value: Any) -> Optional[int]:
-    try:
-        parsed = int(value)
-    except (TypeError, ValueError):
-        return None
-    return parsed if parsed >= 1 else None
+def _is_loop_ready(loop: Optional[asyncio.AbstractEventLoop]) -> bool:
+    return loop is not None and not bool(getattr(loop, "is_closed", lambda: False)())
 
 
 # ---------------------------------------------------------------------------
@@ -970,12 +964,13 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
         return await original_connect(*args, **kwargs)
 
     def _configure_with_overrides(conf: Any) -> Any:
+        assert original_configure is not None
         result = original_configure(conf)
         _apply_runtime_ws_overrides()
         return result
 
     ws_client_module.websockets.connect = _connect_with_overrides
-    if callable(original_configure):
+    if original_configure is not None:
         setattr(ws_client, "_configure", _configure_with_overrides)
     _apply_runtime_ws_overrides()
     try:
@@ -984,7 +979,7 @@ def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
         pass
     finally:
         ws_client_module.websockets.connect = original_connect
-        if callable(original_configure):
+        if original_configure is not None:
             setattr(ws_client, "_configure", original_configure)
         pending = [t for t in asyncio.all_tasks(loop) if not t.done()]
         for task in pending:
@@ -1100,10 +1095,10 @@ class FeishuAdapter(BasePlatformAdapter):
                 str(extra.get("webhook_path") or os.getenv("FEISHU_WEBHOOK_PATH", _DEFAULT_WEBHOOK_PATH)).strip()
                 or _DEFAULT_WEBHOOK_PATH
             ),
-            ws_reconnect_nonce=_coerce_non_negative_int(extra.get("ws_reconnect_nonce"), 30),
-            ws_reconnect_interval=_coerce_positive_int(extra.get("ws_reconnect_interval"), 120),
-            ws_ping_interval=_coerce_optional_positive_int(extra.get("ws_ping_interval")),
-            ws_ping_timeout=_coerce_optional_positive_int(extra.get("ws_ping_timeout")),
+            ws_reconnect_nonce=_coerce_required_int(extra.get("ws_reconnect_nonce"), default=30, min_value=0),
+            ws_reconnect_interval=_coerce_required_int(extra.get("ws_reconnect_interval"), default=120, min_value=1),
+            ws_ping_interval=_coerce_int(extra.get("ws_ping_interval"), default=None, min_value=1),
+            ws_ping_timeout=_coerce_int(extra.get("ws_ping_timeout"), default=None, min_value=1),
         )
 
     def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 33ed55d3..4e8d18e3 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -17,6 +17,18 @@ except ImportError:
     _HAS_LARK_OAPI = False
 
 
+def _mock_event_dispatcher_builder(mock_handler_class):
+    mock_builder = Mock()
+    mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
+    mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
+    mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
+    mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
+    mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
+    mock_builder.build = Mock(return_value=object())
+    mock_handler_class.builder = Mock(return_value=mock_builder)
+    return mock_builder
+
+
 class TestPlatformEnum(unittest.TestCase):
     def test_feishu_in_platform_enum(self):
         from gateway.config import Platform
@@ -269,14 +281,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch.object(adapter, "_build_lark_client", return_value=SimpleNamespace()),
             patch("gateway.platforms.feishu.web", web_module),
         ):
-            mock_builder = Mock()
-            mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
-            mock_builder.build = Mock(return_value=object())
-            mock_handler_class.builder = Mock(return_value=mock_builder)
+            _mock_event_dispatcher_builder(mock_handler_class)
             connected = asyncio.run(adapter.connect())
 
         self.assertTrue(connected)
@@ -306,14 +311,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch.object(adapter, "_hydrate_bot_identity", new=AsyncMock()),
             patch.object(adapter, "_build_lark_client", return_value=SimpleNamespace()),
         ):
-            mock_builder = Mock()
-            mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
-            mock_builder.build = Mock(return_value=object())
-            mock_handler_class.builder = Mock(return_value=mock_builder)
+            _mock_event_dispatcher_builder(mock_handler_class)
 
             loop = asyncio.new_event_loop()
             future = loop.create_future()
@@ -391,14 +389,7 @@ class TestFeishuAdapterMessaging(unittest.TestCase):
             patch("gateway.platforms.feishu.asyncio.sleep", side_effect=lambda delay: sleeps.append(delay)),
             patch.object(adapter, "_build_lark_client", return_value=SimpleNamespace()),
         ):
-            mock_builder = Mock()
-            mock_builder.register_p2_im_message_message_read_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_receive_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_reaction_created_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_im_message_reaction_deleted_v1 = Mock(return_value=mock_builder)
-            mock_builder.register_p2_card_action_trigger = Mock(return_value=mock_builder)
-            mock_builder.build = Mock(return_value=object())
-            mock_handler_class.builder = Mock(return_value=mock_builder)
+            _mock_event_dispatcher_builder(mock_handler_class)
 
             loop = asyncio.new_event_loop()
             future = loop.create_future()

From 57abc9931509dd0da9e7faee6855d20b8cb18830 Mon Sep 17 00:00:00 2001
From: jtuki <jtuki@foxmail.com>
Date: Mon, 6 Apr 2026 22:18:41 +0800
Subject: [PATCH 029/154] feat(gateway): add per-group access control for
 Feishu

Add fine-grained authorization policies per Feishu group chat via
platforms.feishu.extra configuration.

- Add global bot-level admins that bypass all group restrictions
- Add per-group policies: open, allowlist, blacklist, admin_only, disabled
- Add default_group_policy fallback for chats without explicit rules
- Thread chat_id through group message gate for per-chat rule selection
- Match both open_id and user_id for backward compatibility
- Preserve existing FEISHU_ALLOWED_USERS / FEISHU_GROUP_POLICY behavior
- Add focused regression tests for all policy modes

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 gateway/platforms/feishu.py  |  84 ++++++++++--
 tests/gateway/test_feishu.py | 252 ++++++++++++++++++++++++++++++-----
 2 files changed, 290 insertions(+), 46 deletions(-)

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index e39290b7..9bbf2f62 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -274,6 +274,18 @@ class FeishuAdapterSettings:
     ws_reconnect_interval: int = 120
     ws_ping_interval: Optional[int] = None
     ws_ping_timeout: Optional[int] = None
+    admins: frozenset[str] = frozenset()
+    default_group_policy: str = ""
+    group_rules: Dict[str, FeishuGroupRule] = field(default_factory=dict)
+
+
+@dataclass
+class FeishuGroupRule:
+    """Per-group policy rule for controlling which users may interact with the bot."""
+
+    policy: str  # "open" | "allowlist" | "blacklist" | "admin_only" | "disabled"
+    allowlist: set[str] = field(default_factory=set)
+    blacklist: set[str] = field(default_factory=set)
 
 
 @dataclass
@@ -1049,6 +1061,26 @@ class FeishuAdapter(BasePlatformAdapter):
 
     @staticmethod
     def _load_settings(extra: Dict[str, Any]) -> FeishuAdapterSettings:
+        # Parse per-group rules from config
+        raw_group_rules = extra.get("group_rules", {})
+        group_rules: Dict[str, FeishuGroupRule] = {}
+        if isinstance(raw_group_rules, dict):
+            for chat_id, rule_cfg in raw_group_rules.items():
+                if not isinstance(rule_cfg, dict):
+                    continue
+                group_rules[str(chat_id)] = FeishuGroupRule(
+                    policy=str(rule_cfg.get("policy", "open")).strip().lower(),
+                    allowlist=set(str(u).strip() for u in rule_cfg.get("allowlist", []) if str(u).strip()),
+                    blacklist=set(str(u).strip() for u in rule_cfg.get("blacklist", []) if str(u).strip()),
+                )
+
+        # Bot-level admins
+        raw_admins = extra.get("admins", [])
+        admins = frozenset(str(u).strip() for u in raw_admins if str(u).strip())
+
+        # Default group policy (for groups not in group_rules)
+        default_group_policy = str(extra.get("default_group_policy", "")).strip().lower()
+
         return FeishuAdapterSettings(
             app_id=str(extra.get("app_id") or os.getenv("FEISHU_APP_ID", "")).strip(),
             app_secret=str(extra.get("app_secret") or os.getenv("FEISHU_APP_SECRET", "")).strip(),
@@ -1099,6 +1131,9 @@ class FeishuAdapter(BasePlatformAdapter):
             ws_reconnect_interval=_coerce_required_int(extra.get("ws_reconnect_interval"), default=120, min_value=1),
             ws_ping_interval=_coerce_int(extra.get("ws_ping_interval"), default=None, min_value=1),
             ws_ping_timeout=_coerce_int(extra.get("ws_ping_timeout"), default=None, min_value=1),
+            admins=admins,
+            default_group_policy=default_group_policy,
+            group_rules=group_rules,
         )
 
     def _apply_settings(self, settings: FeishuAdapterSettings) -> None:
@@ -1110,6 +1145,9 @@ class FeishuAdapter(BasePlatformAdapter):
         self._verification_token = settings.verification_token
         self._group_policy = settings.group_policy
         self._allowed_group_users = set(settings.allowed_group_users)
+        self._admins = set(settings.admins)
+        self._default_group_policy = settings.default_group_policy or settings.group_policy
+        self._group_rules = settings.group_rules
         self._bot_open_id = settings.bot_open_id
         self._bot_user_id = settings.bot_user_id
         self._bot_name = settings.bot_name
@@ -1617,7 +1655,8 @@ class FeishuAdapter(BasePlatformAdapter):
             return
 
         chat_type = getattr(message, "chat_type", "p2p")
-        if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id):
+        chat_id = getattr(message, "chat_id", "") or ""
+        if chat_type != "p2p" and not self._should_accept_group_message(message, sender_id, chat_id):
             logger.debug("[Feishu] Dropping group message that failed mention/policy gate: %s", message_id)
             return
         await self._process_inbound_message(
@@ -2773,18 +2812,41 @@ class FeishuAdapter(BasePlatformAdapter):
     # Group policy and mention gating
     # =========================================================================
 
-    def _allow_group_message(self, sender_id: Any) -> bool:
-        """Current group policy gate for non-DM traffic."""
-        if self._group_policy == "disabled":
-            return False
-        sender_open_id = getattr(sender_id, "open_id", None) or getattr(sender_id, "user_id", None)
-        if self._group_policy == "open":
-            return True
-        return bool(sender_open_id and sender_open_id in self._allowed_group_users)
+    def _allow_group_message(self, sender_id: Any, chat_id: str = "") -> bool:
+        """Per-group policy gate for non-DM traffic."""
+        sender_open_id = getattr(sender_id, "open_id", None)
+        sender_user_id = getattr(sender_id, "user_id", None)
+        sender_ids = {sender_open_id, sender_user_id} - {None}
 
-    def _should_accept_group_message(self, message: Any, sender_id: Any) -> bool:
+        if sender_ids and self._admins and (sender_ids & self._admins):
+            return True
+
+        rule = self._group_rules.get(chat_id) if chat_id else None
+        if rule:
+            policy = rule.policy
+            allowlist = rule.allowlist
+            blacklist = rule.blacklist
+        else:
+            policy = self._default_group_policy or self._group_policy
+            allowlist = self._allowed_group_users
+            blacklist = set()
+
+        if policy == "disabled":
+            return False
+        if policy == "open":
+            return True
+        if policy == "admin_only":
+            return False
+        if policy == "allowlist":
+            return bool(sender_ids and (sender_ids & allowlist))
+        if policy == "blacklist":
+            return bool(sender_ids and not (sender_ids & blacklist))
+
+        return bool(sender_ids and (sender_ids & self._allowed_group_users))
+
+    def _should_accept_group_message(self, message: Any, sender_id: Any, chat_id: str = "") -> bool:
         """Require an explicit @mention before group messages enter the agent."""
-        if not self._allow_group_message(sender_id):
+        if not self._allow_group_message(sender_id, chat_id):
             return False
         # @_all is Feishu's @everyone placeholder — always route to the bot.
         raw_content = getattr(message, "content", "") or ""
diff --git a/tests/gateway/test_feishu.py b/tests/gateway/test_feishu.py
index 4e8d18e3..47f274d1 100644
--- a/tests/gateway/test_feishu.py
+++ b/tests/gateway/test_feishu.py
@@ -822,10 +822,10 @@ class TestAdapterBehavior(unittest.TestCase):
         adapter = FeishuAdapter(PlatformConfig())
         message = SimpleNamespace(mentions=[])
         sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
-        self.assertFalse(adapter._should_accept_group_message(message, sender_id))
+        self.assertFalse(adapter._should_accept_group_message(message, sender_id, ""))
 
         message_with_mention = SimpleNamespace(mentions=[SimpleNamespace(key="@_user_1")])
-        self.assertFalse(adapter._should_accept_group_message(message_with_mention, sender_id))
+        self.assertFalse(adapter._should_accept_group_message(message_with_mention, sender_id, ""))
 
     @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
     def test_group_message_with_other_user_mention_is_rejected_when_bot_identity_unknown(self):
@@ -839,7 +839,7 @@ class TestAdapterBehavior(unittest.TestCase):
             id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
         )
 
-        self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id))
+        self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id, ""))
 
     @patch.dict(
         os.environ,
@@ -868,28 +868,222 @@ class TestAdapterBehavior(unittest.TestCase):
             adapter._should_accept_group_message(
                 mentioned,
                 SimpleNamespace(open_id="ou_allowed", user_id=None),
+                "",
             )
         )
         self.assertFalse(
             adapter._should_accept_group_message(
                 mentioned,
                 SimpleNamespace(open_id="ou_blocked", user_id=None),
+                "",
             )
         )
 
-    @patch.dict(
-        os.environ,
-        {
-            "FEISHU_GROUP_POLICY": "open",
-            "FEISHU_BOT_OPEN_ID": "ou_bot",
-        },
-        clear=True,
-    )
+    def test_per_group_allowlist_policy_gates_by_sender(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        config = PlatformConfig(
+            extra={
+                "group_rules": {
+                    "oc_chat_a": {
+                        "policy": "allowlist",
+                        "allowlist": ["ou_alice", "ou_bob"],
+                    }
+                }
+            }
+        )
+        adapter = FeishuAdapter(config)
+        adapter._bot_open_id = "ou_bot"
+
+        message = SimpleNamespace(
+            mentions=[SimpleNamespace(name="Bot", id=SimpleNamespace(open_id="ou_bot", user_id=None))]
+        )
+
+        self.assertTrue(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_alice", user_id=None),
+                "oc_chat_a",
+            )
+        )
+        self.assertFalse(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_charlie", user_id=None),
+                "oc_chat_a",
+            )
+        )
+
+    def test_per_group_blacklist_policy_blocks_specific_users(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        config = PlatformConfig(
+            extra={
+                "group_rules": {
+                    "oc_chat_b": {
+                        "policy": "blacklist",
+                        "blacklist": ["ou_blocked"],
+                    }
+                }
+            }
+        )
+        adapter = FeishuAdapter(config)
+        adapter._bot_open_id = "ou_bot"
+
+        message = SimpleNamespace(
+            mentions=[SimpleNamespace(name="Bot", id=SimpleNamespace(open_id="ou_bot", user_id=None))]
+        )
+
+        self.assertTrue(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_alice", user_id=None),
+                "oc_chat_b",
+            )
+        )
+        self.assertFalse(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_blocked", user_id=None),
+                "oc_chat_b",
+            )
+        )
+
+    def test_per_group_admin_only_policy_requires_admin(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        config = PlatformConfig(
+            extra={
+                "admins": ["ou_admin"],
+                "group_rules": {
+                    "oc_chat_c": {
+                        "policy": "admin_only",
+                    }
+                },
+            }
+        )
+        adapter = FeishuAdapter(config)
+        adapter._bot_open_id = "ou_bot"
+
+        message = SimpleNamespace(
+            mentions=[SimpleNamespace(name="Bot", id=SimpleNamespace(open_id="ou_bot", user_id=None))]
+        )
+
+        self.assertTrue(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_admin", user_id=None),
+                "oc_chat_c",
+            )
+        )
+        self.assertFalse(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_regular", user_id=None),
+                "oc_chat_c",
+            )
+        )
+
+    def test_per_group_disabled_policy_blocks_all(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        config = PlatformConfig(
+            extra={
+                "admins": ["ou_admin"],
+                "group_rules": {
+                    "oc_chat_d": {
+                        "policy": "disabled",
+                    }
+                },
+            }
+        )
+        adapter = FeishuAdapter(config)
+        adapter._bot_open_id = "ou_bot"
+
+        message = SimpleNamespace(
+            mentions=[SimpleNamespace(name="Bot", id=SimpleNamespace(open_id="ou_bot", user_id=None))]
+        )
+
+        self.assertTrue(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_admin", user_id=None),
+                "oc_chat_d",
+            )
+        )
+        self.assertFalse(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_regular", user_id=None),
+                "oc_chat_d",
+            )
+        )
+
+    def test_global_admins_bypass_all_group_rules(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        config = PlatformConfig(
+            extra={
+                "admins": ["ou_admin"],
+                "group_rules": {
+                    "oc_chat_e": {
+                        "policy": "allowlist",
+                        "allowlist": ["ou_alice"],
+                    }
+                },
+            }
+        )
+        adapter = FeishuAdapter(config)
+        adapter._bot_open_id = "ou_bot"
+
+        message = SimpleNamespace(
+            mentions=[SimpleNamespace(name="Bot", id=SimpleNamespace(open_id="ou_bot", user_id=None))]
+        )
+
+        self.assertTrue(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_admin", user_id=None),
+                "oc_chat_e",
+            )
+        )
+
+    def test_default_group_policy_fallback_for_chats_without_explicit_rule(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        config = PlatformConfig(
+            extra={
+                "default_group_policy": "open",
+            }
+        )
+        adapter = FeishuAdapter(config)
+        adapter._bot_open_id = "ou_bot"
+
+        message = SimpleNamespace(
+            mentions=[SimpleNamespace(name="Bot", id=SimpleNamespace(open_id="ou_bot", user_id=None))]
+        )
+
+        self.assertTrue(
+            adapter._should_accept_group_message(
+                message,
+                SimpleNamespace(open_id="ou_anyone", user_id=None),
+                "oc_chat_unknown",
+            )
+        )
+
+    @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
     def test_group_message_matches_bot_open_id_when_configured(self):
         from gateway.config import PlatformConfig
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
+        adapter._bot_open_id = "ou_bot"
         sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
 
         bot_mention = SimpleNamespace(
@@ -901,22 +1095,16 @@ class TestAdapterBehavior(unittest.TestCase):
             id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
         )
 
-        self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[bot_mention]), sender_id))
-        self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id))
+        self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[bot_mention]), sender_id, ""))
+        self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[other_mention]), sender_id, ""))
 
-    @patch.dict(
-        os.environ,
-        {
-            "FEISHU_GROUP_POLICY": "open",
-            "FEISHU_BOT_NAME": "Hermes Bot",
-        },
-        clear=True,
-    )
+    @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
     def test_group_message_matches_bot_name_when_only_name_available(self):
         from gateway.config import PlatformConfig
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
+        adapter._bot_name = "Hermes Bot"
         sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
 
         named_mention = SimpleNamespace(
@@ -928,22 +1116,16 @@ class TestAdapterBehavior(unittest.TestCase):
             id=SimpleNamespace(open_id="ou_other", user_id="u_other"),
         )
 
-        self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[named_mention]), sender_id))
-        self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[different_mention]), sender_id))
+        self.assertTrue(adapter._should_accept_group_message(SimpleNamespace(mentions=[named_mention]), sender_id, ""))
+        self.assertFalse(adapter._should_accept_group_message(SimpleNamespace(mentions=[different_mention]), sender_id, ""))
 
-    @patch.dict(
-        os.environ,
-        {
-            "FEISHU_GROUP_POLICY": "open",
-            "FEISHU_BOT_OPEN_ID": "ou_bot",
-        },
-        clear=True,
-    )
+    @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
     def test_group_post_message_uses_parsed_mentions_when_sdk_mentions_missing(self):
         from gateway.config import PlatformConfig
         from gateway.platforms.feishu import FeishuAdapter
 
         adapter = FeishuAdapter(PlatformConfig())
+        adapter._bot_open_id = "ou_bot"
         sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
         message = SimpleNamespace(
             message_type="post",
@@ -951,7 +1133,7 @@ class TestAdapterBehavior(unittest.TestCase):
             content='{"en_us":{"content":[[{"tag":"at","user_name":"Hermes","open_id":"ou_bot"}]]}}',
         )
 
-        self.assertTrue(adapter._should_accept_group_message(message, sender_id))
+        self.assertTrue(adapter._should_accept_group_message(message, sender_id, ""))
 
     @patch.dict(os.environ, {}, clear=True)
     def test_extract_post_message_as_text(self):
@@ -2618,7 +2800,7 @@ class TestGroupMentionAtAll(unittest.TestCase):
             mentions=[],
         )
         sender_id = SimpleNamespace(open_id="ou_any", user_id=None)
-        self.assertTrue(adapter._should_accept_group_message(message, sender_id))
+        self.assertTrue(adapter._should_accept_group_message(message, sender_id, ""))
 
     @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "allowlist", "FEISHU_ALLOWED_USERS": "ou_allowed"}, clear=True)
     def test_at_all_still_requires_policy_gate(self):
@@ -2630,10 +2812,10 @@ class TestGroupMentionAtAll(unittest.TestCase):
         message = SimpleNamespace(content='{"text":"@_all attention"}', mentions=[])
         # Non-allowlisted user — should be blocked even with @_all.
         blocked_sender = SimpleNamespace(open_id="ou_blocked", user_id=None)
-        self.assertFalse(adapter._should_accept_group_message(message, blocked_sender))
+        self.assertFalse(adapter._should_accept_group_message(message, blocked_sender, ""))
         # Allowlisted user — should pass.
         allowed_sender = SimpleNamespace(open_id="ou_allowed", user_id=None)
-        self.assertTrue(adapter._should_accept_group_message(message, allowed_sender))
+        self.assertTrue(adapter._should_accept_group_message(message, allowed_sender, ""))
 
 
 @unittest.skipUnless(_HAS_LARK_OAPI, "lark-oapi not installed")

From adb418fb5390d77b4516a7413cbc53b09717f1d5 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 16:53:24 -0700
Subject: [PATCH 030/154] fix: cross-platform browser test path separators

Use os.path.join for Windows install path so test passes on Linux
(os.path.join uses / on Linux, \ on Windows).
---
 tests/test_cli_browser_connect.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/test_cli_browser_connect.py b/tests/test_cli_browser_connect.py
index a913d96f..f01475bf 100644
--- a/tests/test_cli_browser_connect.py
+++ b/tests/test_cli_browser_connect.py
@@ -1,5 +1,6 @@
 """Tests for CLI browser CDP auto-launch helpers."""
 
+import os
 from unittest.mock import patch
 
 from cli import HermesCLI
@@ -24,14 +25,16 @@ class TestChromeDebugLaunch:
 
     def test_windows_launch_falls_back_to_common_install_dirs(self, monkeypatch):
         captured = {}
-        installed = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
+        program_files = r"C:\Program Files"
+        # Use os.path.join so path separators match cross-platform
+        installed = os.path.join(program_files, "Google", "Chrome", "Application", "chrome.exe")
 
         def fake_popen(cmd, **kwargs):
             captured["cmd"] = cmd
             captured["kwargs"] = kwargs
             return object()
 
-        monkeypatch.setenv("ProgramFiles", r"C:\Program Files")
+        monkeypatch.setenv("ProgramFiles", program_files)
         monkeypatch.delenv("ProgramFiles(x86)", raising=False)
         monkeypatch.delenv("LOCALAPPDATA", raising=False)
 

From 8cf013ecd9c00c0113171ceb2dedb0aeec9010d3 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 17:17:57 -0700
Subject: [PATCH 031/154] fix: replace stale 'hermes login' refs with 'hermes
 auth' + fix credential removal re-seeding (#5670)

Two fixes:

1. Replace all stale 'hermes login' references with 'hermes auth' across
   auth.py, auxiliary_client.py, delegate_tool.py, config.py, run_agent.py,
   and documentation. The 'hermes login' command was deprecated; 'hermes auth'
   now handles OAuth credential management.

2. Fix credential removal not persisting for singleton-sourced credentials
   (device_code for openai-codex/nous, hermes_pkce for anthropic).
   auth_remove_command already cleared env vars for env-sourced credentials,
   but singleton credentials stored in the auth store were re-seeded by
   _seed_from_singletons() on the next load_pool() call. Now clears the
   underlying auth store entry when removing singleton-sourced credentials.
---
 agent/auxiliary_client.py                     |  4 +--
 hermes_cli/auth.py                            | 20 +++++++-------
 hermes_cli/auth_commands.py                   | 26 +++++++++++++++++++
 hermes_cli/config.py                          |  8 +++---
 run_agent.py                                  |  2 +-
 tools/delegate_tool.py                        |  2 +-
 website/docs/reference/cli-commands.md        | 23 +++++-----------
 website/docs/user-guide/configuration.md      |  2 +-
 .../user-guide/features/fallback-providers.md |  4 +--
 9 files changed, 53 insertions(+), 38 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 95d5def0..7cb8f9f5 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -849,7 +849,7 @@ def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[st
     if forced == "nous":
         client, model = _try_nous()
         if client is None:
-            logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)")
+            logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes auth)")
         return client, model
 
     if forced == "codex":
@@ -1119,7 +1119,7 @@ def resolve_provider_client(
         client, default = _try_nous()
         if client is None:
             logger.warning("resolve_provider_client: nous requested "
-                           "but Nous Portal not configured (run: hermes login)")
+                           "but Nous Portal not configured (run: hermes auth)")
             return None, None
         final_model = model or default
         return (_to_async_client(client, final_model) if async_mode
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index bfbeb818..fd65246a 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -936,7 +936,7 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
     state = _load_provider_state(auth_store, "openai-codex")
     if not state:
         raise AuthError(
-            "No Codex credentials stored. Run `hermes login` to authenticate.",
+            "No Codex credentials stored. Run `hermes auth` to authenticate.",
             provider="openai-codex",
             code="codex_auth_missing",
             relogin_required=True,
@@ -944,7 +944,7 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
     tokens = state.get("tokens")
     if not isinstance(tokens, dict):
         raise AuthError(
-            "Codex auth state is missing tokens. Run `hermes login` to re-authenticate.",
+            "Codex auth state is missing tokens. Run `hermes auth` to re-authenticate.",
             provider="openai-codex",
             code="codex_auth_invalid_shape",
             relogin_required=True,
@@ -953,14 +953,14 @@ def _read_codex_tokens(*, _lock: bool = True) -> Dict[str, Any]:
     refresh_token = tokens.get("refresh_token")
     if not isinstance(access_token, str) or not access_token.strip():
         raise AuthError(
-            "Codex auth is missing access_token. Run `hermes login` to re-authenticate.",
+            "Codex auth is missing access_token. Run `hermes auth` to re-authenticate.",
             provider="openai-codex",
             code="codex_auth_missing_access_token",
             relogin_required=True,
         )
     if not isinstance(refresh_token, str) or not refresh_token.strip():
         raise AuthError(
-            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
+            "Codex auth is missing refresh_token. Run `hermes auth` to re-authenticate.",
             provider="openai-codex",
             code="codex_auth_missing_refresh_token",
             relogin_required=True,
@@ -995,7 +995,7 @@ def refresh_codex_oauth_pure(
     del access_token  # Access token is only used by callers to decide whether to refresh.
     if not isinstance(refresh_token, str) or not refresh_token.strip():
         raise AuthError(
-            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
+            "Codex auth is missing refresh_token. Run `hermes auth` to re-authenticate.",
             provider="openai-codex",
             code="codex_auth_missing_refresh_token",
             relogin_required=True,
@@ -1035,7 +1035,7 @@ def refresh_codex_oauth_pure(
                 "Codex refresh token was already consumed by another client "
                 "(e.g. Codex CLI or VS Code extension). "
                 "Run `codex` in your terminal to generate fresh tokens, "
-                "then run `hermes login --provider openai-codex` to re-authenticate."
+                "then run `hermes auth` to re-authenticate."
             )
             relogin_required = True
         raise AuthError(
@@ -1140,7 +1140,7 @@ def resolve_codex_runtime_credentials(
             logger.info("Migrating Codex credentials from ~/.codex/ to Hermes auth store")
             print("⚠️  Migrating Codex credentials to Hermes's own auth store.")
             print("   This avoids conflicts with Codex CLI and VS Code.")
-            print("   Run `hermes login` to create a fully independent session.\n")
+            print("   Run `hermes auth` to create a fully independent session.\n")
             _save_codex_tokens(cli_tokens)
             data = _read_codex_tokens()
         else:
@@ -2096,7 +2096,7 @@ def detect_external_credentials() -> List[Dict[str, Any]]:
         found.append({
             "provider": "openai-codex",
             "path": str(codex_path),
-            "label": f"Codex CLI credentials found ({codex_path}) — run `hermes login` to create a separate session",
+            "label": f"Codex CLI credentials found ({codex_path}) — run `hermes auth` to create a separate session",
         })
 
     return found
@@ -2345,8 +2345,8 @@ def _save_model_choice(model_id: str) -> None:
 def login_command(args) -> None:
     """Deprecated: use 'hermes model' or 'hermes setup' instead."""
     print("The 'hermes login' command has been removed.")
-    print("Use 'hermes model' to select a provider and model,")
-    print("or 'hermes setup' for full interactive setup.")
+    print("Use 'hermes auth' to manage credentials,")
+    print("'hermes model' to select a provider, or 'hermes setup' for full setup.")
     raise SystemExit(0)
 
 
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 1564c100..395dbb76 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -305,6 +305,32 @@ def auth_remove_command(args) -> None:
             if cleared:
                 print(f"Cleared {env_var} from .env")
 
+    # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
+    # clear the underlying auth store / credential file so it doesn't get
+    # re-seeded on the next load_pool() call.
+    elif removed.source == "device_code" and provider in ("openai-codex", "nous"):
+        from hermes_cli.auth import (
+            _load_auth_store, _save_auth_store, _auth_store_lock,
+        )
+        with _auth_store_lock():
+            auth_store = _load_auth_store()
+            providers_dict = auth_store.get("providers")
+            if isinstance(providers_dict, dict) and provider in providers_dict:
+                del providers_dict[provider]
+                _save_auth_store(auth_store)
+                print(f"Cleared {provider} OAuth tokens from auth store")
+
+    elif removed.source == "hermes_pkce" and provider == "anthropic":
+        from hermes_constants import get_hermes_home
+        oauth_file = get_hermes_home() / ".anthropic_oauth.json"
+        if oauth_file.exists():
+            oauth_file.unlink()
+            print("Cleared Hermes Anthropic OAuth credentials")
+
+    elif removed.source == "claude_code" and provider == "anthropic":
+        print("Note: Claude Code credentials live in ~/.claude/.credentials.json")
+        print("      Remove them manually if you want to deauthorize Claude Code.")
+
 
 def auth_reset_command(args) -> None:
     provider = _normalize_provider(getattr(args, "provider", ""))
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index bf0b27c2..8863bda5 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1932,8 +1932,8 @@ _FALLBACK_COMMENT = """
 #
 # Supported providers:
 #   openrouter   (OPENROUTER_API_KEY)  — routes to any model
-#   openai-codex (OAuth — hermes login) — OpenAI Codex
-#   nous         (OAuth — hermes login) — Nous Portal
+#   openai-codex (OAuth — hermes auth) — OpenAI Codex
+#   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
@@ -1975,8 +1975,8 @@ _COMMENTED_SECTIONS = """
 #
 # Supported providers:
 #   openrouter   (OPENROUTER_API_KEY)  — routes to any model
-#   openai-codex (OAuth — hermes login) — OpenAI Codex
-#   nous         (OAuth — hermes login) — Nous Portal
+#   openai-codex (OAuth — hermes auth) — OpenAI Codex
+#   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
diff --git a/run_agent.py b/run_agent.py
index d97e08ad..ffe94774 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -8191,7 +8191,7 @@ class AIAgent:
                                 self._vprint(f"{self.log_prefix}   💡 Codex OAuth token was rejected (HTTP 401). Your token may have been", force=True)
                                 self._vprint(f"{self.log_prefix}      refreshed by another client (Codex CLI, VS Code). To fix:", force=True)
                                 self._vprint(f"{self.log_prefix}      1. Run `codex` in your terminal to generate fresh tokens.", force=True)
-                                self._vprint(f"{self.log_prefix}      2. Then run `hermes login --provider openai-codex` to re-authenticate.", force=True)
+                                self._vprint(f"{self.log_prefix}      2. Then run `hermes auth` to re-authenticate.", force=True)
                             else:
                                 self._vprint(f"{self.log_prefix}   💡 Your API key was rejected by the provider. Check:", force=True)
                                 self._vprint(f"{self.log_prefix}      • Is the key valid? Run: hermes setup", force=True)
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 71a78ea6..28ffc795 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -701,7 +701,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     if not api_key:
         raise ValueError(
             f"Delegation provider '{configured_provider}' resolved but has no API key. "
-            f"Set the appropriate environment variable or run 'hermes login'."
+            f"Set the appropriate environment variable or run 'hermes auth'."
         )
 
     return {
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index b2df9bca..5fbe921b 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -37,8 +37,8 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes gateway` | Run or manage the messaging gateway service. |
 | `hermes setup` | Interactive setup wizard for all or part of the configuration. |
 | `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
-| `hermes login` / `logout` | Authenticate with OAuth-backed providers. |
-| `hermes auth` | Manage credential pools — add, list, remove, reset, set strategy. |
+| `hermes auth` | Manage credentials — add, list, remove, reset, set strategy. Handles OAuth flows for Codex/Nous/Anthropic. |
+| `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. |
 | `hermes status` | Show agent, auth, and platform status. |
 | `hermes cron` | Inspect and tick the cron scheduler. |
 | `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. |
@@ -178,22 +178,11 @@ hermes whatsapp
 
 Runs the WhatsApp pairing/setup flow, including mode selection and QR-code pairing.
 
-## `hermes login` / `hermes logout`
+## `hermes login` / `hermes logout` *(Deprecated)*
 
-```bash
-hermes login [--provider nous|openai-codex] [--portal-url ...] [--inference-url ...]
-hermes logout [--provider nous|openai-codex]
-```
-
-`login` supports:
-- Nous Portal OAuth/device flow
-- OpenAI Codex OAuth/device flow
-
-Useful options for `login`:
-- `--no-browser`
-- `--timeout <seconds>`
-- `--ca-bundle <pem>`
-- `--insecure`
+:::caution
+`hermes login` has been removed. Use `hermes auth` to manage OAuth credentials, `hermes model` to select a provider, or `hermes setup` for full interactive setup.
+:::
 
 ## `hermes auth`
 
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 7148b423..06332908 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -651,7 +651,7 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
 |----------|-------------|-------------|
 | `"auto"` | Best available (default). Vision tries OpenRouter → Nous → Codex. | — |
 | `"openrouter"` | Force OpenRouter — routes to any model (Gemini, GPT-4o, Claude, etc.) | `OPENROUTER_API_KEY` |
-| `"nous"` | Force Nous Portal | `hermes login` |
+| `"nous"` | Force Nous Portal | `hermes auth` |
 | `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
 | `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. | Custom endpoint credentials + base URL |
 
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index a5cdc5ba..8868162e 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -37,7 +37,7 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
 |----------|-------|-------------|
 | AI Gateway | `ai-gateway` | `AI_GATEWAY_API_KEY` |
 | OpenRouter | `openrouter` | `OPENROUTER_API_KEY` |
-| Nous Portal | `nous` | `hermes login` (OAuth) |
+| Nous Portal | `nous` | `hermes auth` (OAuth) |
 | OpenAI Codex | `openai-codex` | `hermes model` (ChatGPT OAuth) |
 | GitHub Copilot | `copilot` | `COPILOT_GITHUB_TOKEN`, `GH_TOKEN`, or `GITHUB_TOKEN` |
 | GitHub Copilot ACP | `copilot-acp` | External process (editor integration) |
@@ -244,7 +244,7 @@ All three — auxiliary, compression, fallback — work the same way: set `provi
 |----------|-------------|-------------|
 | `"auto"` | Try providers in order until one works (default) | At least one provider configured |
 | `"openrouter"` | Force OpenRouter | `OPENROUTER_API_KEY` |
-| `"nous"` | Force Nous Portal | `hermes login` |
+| `"nous"` | Force Nous Portal | `hermes auth` |
 | `"codex"` | Force Codex OAuth | `hermes model` → Codex |
 | `"main"` | Use whatever provider the main agent uses | Active main provider configured |
 | `"anthropic"` | Force Anthropic native | `ANTHROPIC_API_KEY` or Claude Code credentials |

From dc4c07ed9d1653919bd8f6201662326de062196a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 18:10:33 -0700
Subject: [PATCH 032/154] fix: codex OAuth credential pool disconnect + expired
 token import (#5681)

Three bugs causing OpenAI Codex sessions to fail silently:

1. Credential pool vs legacy store disconnect: hermes auth and hermes
   model store device_code tokens in the credential pool, but
   get_codex_auth_status(), resolve_codex_runtime_credentials(), and
   _model_flow_openai_codex() only read from the legacy provider state.
   Fresh pool tokens were invisible to the auth status checks and model
   selection flow.

2. _import_codex_cli_tokens() imported expired tokens from ~/.codex/
   without checking JWT expiry. Combined with _login_openai_codex()
   saying 'Login successful!' for expired credentials, users got stuck
   in a loop of dead tokens being recycled.

3. _login_openai_codex() accepted expired tokens from
   resolve_codex_runtime_credentials() without validating expiry before
   telling the user login succeeded.

Fixes:
- get_codex_auth_status() now checks credential pool first, falls back
  to legacy provider state
- _model_flow_openai_codex() uses pool-aware auth status for token
  retrieval when fetching model lists
- _import_codex_cli_tokens() validates JWT exp claim, rejects expired
- _login_openai_codex() verifies resolved token isn't expiring before
  accepting existing credentials
- _run_codex_stream() logs response.incomplete/failed terminal events
  with status and incomplete_details for diagnostics
- Codex empty output recovery: captures streamed text during streaming
  and synthesizes a response when get_final_response() returns empty
  output (handles chatgpt.com backend-api edge cases)
---
 hermes_cli/auth.py | 77 +++++++++++++++++++++++++++++++++++++---------
 hermes_cli/main.py | 15 +++++++--
 run_agent.py       | 63 +++++++++++++++++++++++++++++++++++--
 3 files changed, 136 insertions(+), 19 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index fd65246a..588d06d4 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1099,7 +1099,8 @@ def _refresh_codex_auth_tokens(
 def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
     """Try to read tokens from ~/.codex/auth.json (Codex CLI shared file).
     
-    Returns tokens dict if valid, None otherwise. Does NOT write to the shared file.
+    Returns tokens dict if valid and not expired, None otherwise.
+    Does NOT write to the shared file.
     """
     codex_home = os.getenv("CODEX_HOME", "").strip()
     if not codex_home:
@@ -1112,7 +1113,17 @@ def _import_codex_cli_tokens() -> Optional[Dict[str, str]]:
         tokens = payload.get("tokens")
         if not isinstance(tokens, dict):
             return None
-        if not tokens.get("access_token") or not tokens.get("refresh_token"):
+        access_token = tokens.get("access_token")
+        refresh_token = tokens.get("refresh_token")
+        if not access_token or not refresh_token:
+            return None
+        # Reject expired tokens — importing stale tokens from ~/.codex/
+        # that can't be refreshed leaves the user stuck with "Login successful!"
+        # but no working credentials.
+        if _codex_access_token_is_expiring(access_token, 0):
+            logger.debug(
+                "Codex CLI tokens at %s are expired — skipping import.", auth_path,
+            )
             return None
         return dict(tokens)
     except Exception:
@@ -1904,7 +1915,36 @@ def get_nous_auth_status() -> Dict[str, Any]:
 
 
 def get_codex_auth_status() -> Dict[str, Any]:
-    """Status snapshot for Codex auth."""
+    """Status snapshot for Codex auth.
+    
+    Checks the credential pool first (where `hermes auth` stores credentials),
+    then falls back to the legacy provider state.
+    """
+    # Check credential pool first — this is where `hermes auth` and
+    # `hermes model` store device_code tokens.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("openai-codex")
+        if pool and pool.has_credentials():
+            entry = pool.select()
+            if entry is not None:
+                api_key = (
+                    getattr(entry, "runtime_api_key", None)
+                    or getattr(entry, "access_token", "")
+                )
+                if api_key and not _codex_access_token_is_expiring(api_key, 0):
+                    return {
+                        "logged_in": True,
+                        "auth_store": str(_auth_file_path()),
+                        "last_refresh": getattr(entry, "last_refresh", None),
+                        "auth_mode": "chatgpt",
+                        "source": f"pool:{getattr(entry, 'label', 'unknown')}",
+                        "api_key": api_key,
+                    }
+    except Exception:
+        pass
+
+    # Fall back to legacy provider state
     try:
         creds = resolve_codex_runtime_credentials()
         return {
@@ -1913,6 +1953,7 @@ def get_codex_auth_status() -> Dict[str, Any]:
             "last_refresh": creds.get("last_refresh"),
             "auth_mode": creds.get("auth_mode"),
             "source": creds.get("source"),
+            "api_key": creds.get("api_key"),
         }
     except AuthError as exc:
         return {
@@ -2356,17 +2397,25 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
     # Check for existing Hermes-owned credentials
     try:
         existing = resolve_codex_runtime_credentials()
-        print("Existing Codex credentials found in Hermes auth store.")
-        try:
-            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
-        except (EOFError, KeyboardInterrupt):
-            reuse = "y"
-        if reuse in ("", "y", "yes"):
-            config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
-            print()
-            print("Login successful!")
-            print(f"  Config updated: {config_path} (model.provider=openai-codex)")
-            return
+        # Verify the resolved token is actually usable (not expired).
+        # resolve_codex_runtime_credentials attempts refresh, so if we get
+        # here the token should be valid — but double-check before telling
+        # the user "Login successful!".
+        _resolved_key = existing.get("api_key", "")
+        if isinstance(_resolved_key, str) and _resolved_key and not _codex_access_token_is_expiring(_resolved_key, 60):
+            print("Existing Codex credentials found in Hermes auth store.")
+            try:
+                reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+            except (EOFError, KeyboardInterrupt):
+                reuse = "y"
+            if reuse in ("", "y", "yes"):
+                config_path = _update_config_for_provider("openai-codex", existing.get("base_url", DEFAULT_CODEX_BASE_URL))
+                print()
+                print("Login successful!")
+                print(f"  Config updated: {config_path} (model.provider=openai-codex)")
+                return
+        else:
+            print("Existing Codex credentials are expired. Starting fresh login...")
     except AuthError:
         pass
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 205cef8d..1a968952 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1294,12 +1294,21 @@ def _model_flow_openai_codex(config, current_model=""):
             return
 
     _codex_token = None
+    # Prefer credential pool (where `hermes auth` stores device_code tokens),
+    # fall back to legacy provider state.
     try:
-        from hermes_cli.auth import resolve_codex_runtime_credentials
-        _codex_creds = resolve_codex_runtime_credentials()
-        _codex_token = _codex_creds.get("api_key")
+        _codex_status = get_codex_auth_status()
+        if _codex_status.get("logged_in"):
+            _codex_token = _codex_status.get("api_key")
     except Exception:
         pass
+    if not _codex_token:
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+            _codex_creds = resolve_codex_runtime_credentials()
+            _codex_token = _codex_creds.get("api_key")
+        except Exception:
+            pass
 
     codex_models = get_codex_model_ids(access_token=_codex_token)
 
diff --git a/run_agent.py b/run_agent.py
index ffe94774..95926ff8 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3868,6 +3868,10 @@ class AIAgent:
         has_tool_calls = False
         first_delta_fired = False
         self._reasoning_deltas_fired = False
+        # Accumulate streamed text so we can recover if get_final_response()
+        # returns empty output (e.g. chatgpt.com backend-api sends
+        # response.incomplete instead of response.completed).
+        self._codex_streamed_text_parts: list = []
         for attempt in range(max_stream_retries + 1):
             try:
                 with active_client.responses.stream(**api_kwargs) as stream:
@@ -3887,6 +3891,7 @@ class AIAgent:
                                         except Exception:
                                             pass
                                 self._fire_stream_delta(delta_text)
+                                self._codex_streamed_text_parts.append(delta_text)
                         # Track tool calls to suppress text streaming
                         elif "function_call" in event_type:
                             has_tool_calls = True
@@ -3895,6 +3900,18 @@ class AIAgent:
                             reasoning_text = getattr(event, "delta", "")
                             if reasoning_text:
                                 self._fire_reasoning_delta(reasoning_text)
+                        # Log non-completed terminal events for diagnostics
+                        elif event_type in ("response.incomplete", "response.failed"):
+                            resp_obj = getattr(event, "response", None)
+                            status = getattr(resp_obj, "status", None) if resp_obj else None
+                            incomplete_details = getattr(resp_obj, "incomplete_details", None) if resp_obj else None
+                            logger.warning(
+                                "Codex Responses stream received terminal event %s "
+                                "(status=%s, incomplete_details=%s, streamed_chars=%d). %s",
+                                event_type, status, incomplete_details,
+                                sum(len(p) for p in self._codex_streamed_text_parts),
+                                self._client_log_context(),
+                            )
                     return stream.get_final_response()
             except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
                 if attempt < max_stream_retries:
@@ -7366,8 +7383,50 @@ class AIAgent:
                             response_invalid = True
                             error_details.append("response.output is not a list")
                         elif len(output_items) == 0:
-                            response_invalid = True
-                            error_details.append("response.output is empty")
+                            # Log diagnostics for empty output
+                            _resp_status = getattr(response, "status", None)
+                            _resp_incomplete = getattr(response, "incomplete_details", None)
+                            _streamed_parts = getattr(self, "_codex_streamed_text_parts", [])
+                            _streamed_text = "".join(_streamed_parts).strip() if _streamed_parts else ""
+                            logging.warning(
+                                "Codex response.output is empty "
+                                "(status=%s, incomplete_details=%s, streamed_chars=%d, "
+                                "output_text=%r, model=%s). %s",
+                                _resp_status, _resp_incomplete, len(_streamed_text),
+                                getattr(response, "output_text", None),
+                                getattr(response, "model", None),
+                                f"api_mode={self.api_mode} provider={self.provider}",
+                            )
+                            # Recovery: if we streamed text but the final response
+                            # lost it (e.g. response.incomplete from chatgpt backend-api),
+                            # synthesize a minimal response so the user gets the answer
+                            # the model already delivered.
+                            if _streamed_text:
+                                logging.info(
+                                    "Recovering %d chars of streamed text as response "
+                                    "(status was %s).", len(_streamed_text), _resp_status,
+                                )
+                                response = SimpleNamespace(
+                                    output=[SimpleNamespace(
+                                        type="message",
+                                        role="assistant",
+                                        status="completed",
+                                        content=[SimpleNamespace(
+                                            type="output_text",
+                                            text=_streamed_text,
+                                        )],
+                                    )],
+                                    status=_resp_status or "completed",
+                                    model=getattr(response, "model", self.model),
+                                    usage=getattr(response, "usage", None),
+                                    id=getattr(response, "id", None),
+                                    output_text=_streamed_text,
+                                )
+                                # Clear the accumulated parts so we don't double-recover
+                                self._codex_streamed_text_parts = []
+                            else:
+                                response_invalid = True
+                                error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
                         content_blocks = getattr(response, "content", None) if response is not None else None
                         if response is None:

From e5aaa38ca7295ca02309c926e3b6896ecd17e138 Mon Sep 17 00:00:00 2001
From: Grateful Dave <davidsandrews@gmail.com>
Date: Mon, 6 Apr 2026 21:16:56 -0400
Subject: [PATCH 033/154] fix: sync openai-codex pool entry from
 ~/.codex/auth.json on exhaustion (#5610)

OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
When the Codex CLI (or another Hermes profile) refreshes its token, the
pool entry's refresh_token becomes stale. Subsequent refresh attempts
fail with invalid_grant, and the entry enters a 24-hour exhaustion
cooldown with no recovery path.

This mirrors the existing _sync_anthropic_entry_from_credentials_file()
pattern: when an openai-codex entry is exhausted, compare its
refresh_token against ~/.codex/auth.json and sync the fresh pair if
they differ.

Fixes the common scenario where users run 'codex login' to refresh
their token externally and Hermes never picks it up.

Co-authored-by: David Andrews (LexGenius.ai) <david@lexgenius.ai>
---
 agent/credential_pool.py | 44 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 740fc59d..472f65f2 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -23,6 +23,7 @@ from hermes_cli.auth import (
     _agent_key_is_usable,
     _codex_access_token_is_expiring,
     _decode_jwt_claims,
+    _import_codex_cli_tokens,
     _is_expiring,
     _load_auth_store,
     _load_provider_state,
@@ -440,6 +441,39 @@ class CredentialPool:
             logger.debug("Failed to sync from credentials file: %s", exc)
         return entry
 
+    def _sync_codex_entry_from_cli(self, entry: PooledCredential) -> PooledCredential:
+        """Sync an openai-codex pool entry from ~/.codex/auth.json if tokens differ.
+
+        OpenAI OAuth refresh tokens are single-use and rotate on every refresh.
+        When the Codex CLI (or another Hermes profile) refreshes its token,
+        the pool entry's refresh_token becomes stale.  This method detects that
+        by comparing against ~/.codex/auth.json and syncing the fresh pair.
+        """
+        if self.provider != "openai-codex":
+            return entry
+        try:
+            cli_tokens = _import_codex_cli_tokens()
+            if not cli_tokens:
+                return entry
+            cli_refresh = cli_tokens.get("refresh_token", "")
+            cli_access = cli_tokens.get("access_token", "")
+            if cli_refresh and cli_refresh != entry.refresh_token:
+                logger.debug("Pool entry %s: syncing tokens from ~/.codex/auth.json (refresh token changed)", entry.id)
+                updated = replace(
+                    entry,
+                    access_token=cli_access,
+                    refresh_token=cli_refresh,
+                    last_status=None,
+                    last_status_at=None,
+                    last_error_code=None,
+                )
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync from ~/.codex/auth.json: %s", exc)
+        return entry
+
     def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
         if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
             if force:
@@ -629,6 +663,16 @@ class CredentialPool:
                 if synced is not entry:
                     entry = synced
                     cleared_any = True
+            # For openai-codex entries, sync from ~/.codex/auth.json before
+            # any status/refresh checks.  This picks up tokens refreshed by
+            # the Codex CLI or another Hermes profile.
+            if (self.provider == "openai-codex"
+                    and entry.last_status == STATUS_EXHAUSTED
+                    and entry.refresh_token):
+                synced = self._sync_codex_entry_from_cli(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
             if entry.last_status == STATUS_EXHAUSTED:
                 exhausted_until = _exhausted_until(entry)
                 if exhausted_until is not None and now < exhausted_until:

From 0e336b0e717027cbb81fcb5816246b7aec2d4a47 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 18:19:30 -0700
Subject: [PATCH 034/154] fix: backfill codex stream output from
 output_item.done events (#5689)

Salvages the core fix from PR #5673 (egerev) onto current main.

The chatgpt.com/backend-api/codex endpoint streams valid output items
via response.output_item.done events, but the OpenAI SDK's
get_final_response() returns an empty output list. This caused every
Codex response to be rejected as invalid.

Fix: collect output_item.done events during streaming and backfill
response.output when get_final_response() returns empty. Falls back
to synthesizing from text deltas when no done events were received.

Also moves the synthesis logic from the validation loop (too late, from
#5681) into _run_codex_stream() (before the response leaves the
streaming function), and simplifies the validation to just log
diagnostics since recovery now happens upstream.

Co-authored-by: Egor <egerev@users.noreply.github.com>
---
 run_agent.py | 84 +++++++++++++++++++++++++++-------------------------
 1 file changed, 44 insertions(+), 40 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 95926ff8..f5af556b 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3873,6 +3873,7 @@ class AIAgent:
         # response.incomplete instead of response.completed).
         self._codex_streamed_text_parts: list = []
         for attempt in range(max_stream_retries + 1):
+            collected_output_items: list = []
             try:
                 with active_client.responses.stream(**api_kwargs) as stream:
                     for event in stream:
@@ -3882,6 +3883,8 @@ class AIAgent:
                         # Fire callbacks on text content deltas (suppress during tool calls)
                         if "output_text.delta" in event_type or event_type == "response.output_text.delta":
                             delta_text = getattr(event, "delta", "")
+                            if delta_text:
+                                self._codex_streamed_text_parts.append(delta_text)
                             if delta_text and not has_tool_calls:
                                 if not first_delta_fired:
                                     first_delta_fired = True
@@ -3891,7 +3894,6 @@ class AIAgent:
                                         except Exception:
                                             pass
                                 self._fire_stream_delta(delta_text)
-                                self._codex_streamed_text_parts.append(delta_text)
                         # Track tool calls to suppress text streaming
                         elif "function_call" in event_type:
                             has_tool_calls = True
@@ -3900,6 +3902,14 @@ class AIAgent:
                             reasoning_text = getattr(event, "delta", "")
                             if reasoning_text:
                                 self._fire_reasoning_delta(reasoning_text)
+                        # Collect completed output items — some backends
+                        # (chatgpt.com/backend-api/codex) stream valid items
+                        # via response.output_item.done but the SDK's
+                        # get_final_response() returns an empty output list.
+                        elif event_type == "response.output_item.done":
+                            done_item = getattr(event, "item", None)
+                            if done_item is not None:
+                                collected_output_items.append(done_item)
                         # Log non-completed terminal events for diagnostics
                         elif event_type in ("response.incomplete", "response.failed"):
                             resp_obj = getattr(event, "response", None)
@@ -3912,7 +3922,31 @@ class AIAgent:
                                 sum(len(p) for p in self._codex_streamed_text_parts),
                                 self._client_log_context(),
                             )
-                    return stream.get_final_response()
+                    final_response = stream.get_final_response()
+                    # PATCH: ChatGPT Codex backend streams valid output items
+                    # but get_final_response() can return an empty output list.
+                    # Backfill from collected items or synthesize from deltas.
+                    _out = getattr(final_response, "output", None)
+                    if isinstance(_out, list) and not _out:
+                        if collected_output_items:
+                            final_response.output = list(collected_output_items)
+                            logger.debug(
+                                "Codex stream: backfilled %d output items from stream events",
+                                len(collected_output_items),
+                            )
+                        elif self._codex_streamed_text_parts and not has_tool_calls:
+                            assembled = "".join(self._codex_streamed_text_parts)
+                            final_response.output = [SimpleNamespace(
+                                type="message",
+                                role="assistant",
+                                status="completed",
+                                content=[SimpleNamespace(type="output_text", text=assembled)],
+                            )]
+                            logger.debug(
+                                "Codex stream: synthesized output from %d text deltas (%d chars)",
+                                len(self._codex_streamed_text_parts), len(assembled),
+                            )
+                    return final_response
             except (_httpx.RemoteProtocolError, _httpx.ReadTimeout, _httpx.ConnectError, ConnectionError) as exc:
                 if attempt < max_stream_retries:
                     logger.debug(
@@ -7383,50 +7417,20 @@ class AIAgent:
                             response_invalid = True
                             error_details.append("response.output is not a list")
                         elif len(output_items) == 0:
-                            # Log diagnostics for empty output
+                            # If we reach here, _run_codex_stream's backfill
+                            # from output_item.done events and text-delta
+                            # synthesis both failed to populate output.
                             _resp_status = getattr(response, "status", None)
                             _resp_incomplete = getattr(response, "incomplete_details", None)
-                            _streamed_parts = getattr(self, "_codex_streamed_text_parts", [])
-                            _streamed_text = "".join(_streamed_parts).strip() if _streamed_parts else ""
                             logging.warning(
-                                "Codex response.output is empty "
-                                "(status=%s, incomplete_details=%s, streamed_chars=%d, "
-                                "output_text=%r, model=%s). %s",
-                                _resp_status, _resp_incomplete, len(_streamed_text),
-                                getattr(response, "output_text", None),
+                                "Codex response.output is empty after stream backfill "
+                                "(status=%s, incomplete_details=%s, model=%s). %s",
+                                _resp_status, _resp_incomplete,
                                 getattr(response, "model", None),
                                 f"api_mode={self.api_mode} provider={self.provider}",
                             )
-                            # Recovery: if we streamed text but the final response
-                            # lost it (e.g. response.incomplete from chatgpt backend-api),
-                            # synthesize a minimal response so the user gets the answer
-                            # the model already delivered.
-                            if _streamed_text:
-                                logging.info(
-                                    "Recovering %d chars of streamed text as response "
-                                    "(status was %s).", len(_streamed_text), _resp_status,
-                                )
-                                response = SimpleNamespace(
-                                    output=[SimpleNamespace(
-                                        type="message",
-                                        role="assistant",
-                                        status="completed",
-                                        content=[SimpleNamespace(
-                                            type="output_text",
-                                            text=_streamed_text,
-                                        )],
-                                    )],
-                                    status=_resp_status or "completed",
-                                    model=getattr(response, "model", self.model),
-                                    usage=getattr(response, "usage", None),
-                                    id=getattr(response, "id", None),
-                                    output_text=_streamed_text,
-                                )
-                                # Clear the accumulated parts so we don't double-recover
-                                self._codex_streamed_text_parts = []
-                            else:
-                                response_invalid = True
-                                error_details.append("response.output is empty")
+                            response_invalid = True
+                            error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
                         content_blocks = getattr(response, "content", None) if response is not None else None
                         if response is None:

From 2021442c8a5cc982cf787e829de064c00ccc8a3d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 20:58:47 -0700
Subject: [PATCH 035/154] fix: cover remaining codex empty-output gaps in
 fallback + normalizer (#5724)

Two gaps in the codex empty-output handling:

1. _run_codex_create_stream_fallback() skipped all non-terminal events,
   so when the fallback path was used and the terminal response had
   empty output, there was no recovery. Now collects output_item.done
   and text deltas during the fallback stream, backfills on empty output.

2. _normalize_codex_response() hard-crashed with RuntimeError when
   output was empty, even when the response had output_text set. The
   function already had fallback logic at line 3562 to use output_text,
   but the guard at line 3446 killed it first. Now checks output_text
   before raising and synthesizes a minimal output item.
---
 run_agent.py | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/run_agent.py b/run_agent.py
index f5af556b..cc0e06bd 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -3444,7 +3444,22 @@ class AIAgent:
         """Normalize a Responses API object to an assistant_message-like object."""
         output = getattr(response, "output", None)
         if not isinstance(output, list) or not output:
-            raise RuntimeError("Responses API returned no output items")
+            # The Codex backend can return empty output when the answer was
+            # delivered entirely via stream events. Check output_text as a
+            # last-resort fallback before raising.
+            out_text = getattr(response, "output_text", None)
+            if isinstance(out_text, str) and out_text.strip():
+                logger.debug(
+                    "Codex response has empty output but output_text is present (%d chars); "
+                    "synthesizing output item.", len(out_text.strip()),
+                )
+                output = [SimpleNamespace(
+                    type="message", role="assistant", status="completed",
+                    content=[SimpleNamespace(type="output_text", text=out_text.strip())],
+                )]
+                response.output = output
+            else:
+                raise RuntimeError("Responses API returned no output items")
 
         response_status = getattr(response, "status", None)
         if isinstance(response_status, str):
@@ -3997,11 +4012,28 @@ class AIAgent:
             return stream_or_response
 
         terminal_response = None
+        collected_output_items: list = []
+        collected_text_deltas: list = []
         try:
             for event in stream_or_response:
                 event_type = getattr(event, "type", None)
                 if not event_type and isinstance(event, dict):
                     event_type = event.get("type")
+
+                # Collect output items and text deltas for backfill
+                if event_type == "response.output_item.done":
+                    done_item = getattr(event, "item", None)
+                    if done_item is None and isinstance(event, dict):
+                        done_item = event.get("item")
+                    if done_item is not None:
+                        collected_output_items.append(done_item)
+                elif event_type in ("response.output_text.delta",):
+                    delta = getattr(event, "delta", "")
+                    if not delta and isinstance(event, dict):
+                        delta = event.get("delta", "")
+                    if delta:
+                        collected_text_deltas.append(delta)
+
                 if event_type not in {"response.completed", "response.incomplete", "response.failed"}:
                     continue
 
@@ -4009,6 +4041,26 @@ class AIAgent:
                 if terminal_response is None and isinstance(event, dict):
                     terminal_response = event.get("response")
                 if terminal_response is not None:
+                    # Backfill empty output from collected stream events
+                    _out = getattr(terminal_response, "output", None)
+                    if isinstance(_out, list) and not _out:
+                        if collected_output_items:
+                            terminal_response.output = list(collected_output_items)
+                            logger.debug(
+                                "Codex fallback stream: backfilled %d output items",
+                                len(collected_output_items),
+                            )
+                        elif collected_text_deltas:
+                            assembled = "".join(collected_text_deltas)
+                            terminal_response.output = [SimpleNamespace(
+                                type="message", role="assistant",
+                                status="completed",
+                                content=[SimpleNamespace(type="output_text", text=assembled)],
+                            )]
+                            logger.debug(
+                                "Codex fallback stream: synthesized from %d deltas (%d chars)",
+                                len(collected_text_deltas), len(assembled),
+                            )
                     return terminal_response
         finally:
             close_fn = getattr(stream_or_response, "close", None)

From 21b48b2ff552b42d8df11272b8c7436bcf6e0b7f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 21:13:22 -0700
Subject: [PATCH 036/154] fix: backfill empty codex output in auxiliary client
 (#5730)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The _CodexCompletionsAdapter (used for compression, vision, web_extract,
session_search, and memory flush when on the codex provider) streamed
responses but discarded all events with 'for _event in stream: pass'.
When get_final_response() returned empty output (the same chatgpt.com
backend-api shape change), auxiliary calls silently returned None content.

Now collects response.output_item.done and text deltas during streaming
and backfills empty output — same pattern as _run_codex_stream().

Tested live against chatgpt.com/backend-api/codex with OAuth.
---
 agent/auxiliary_client.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 7cb8f9f5..8052d020 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -260,11 +260,44 @@ class _CodexCompletionsAdapter:
         usage = None
 
         try:
+            # Collect output items and text deltas during streaming —
+            # the Codex backend can return empty response.output from
+            # get_final_response() even when items were streamed.
+            collected_output_items: List[Any] = []
+            collected_text_deltas: List[str] = []
             with self._client.responses.stream(**resp_kwargs) as stream:
                 for _event in stream:
-                    pass
+                    _etype = getattr(_event, "type", "")
+                    if _etype == "response.output_item.done":
+                        _done = getattr(_event, "item", None)
+                        if _done is not None:
+                            collected_output_items.append(_done)
+                    elif "output_text.delta" in _etype:
+                        _delta = getattr(_event, "delta", "")
+                        if _delta:
+                            collected_text_deltas.append(_delta)
                 final = stream.get_final_response()
 
+            # Backfill empty output from collected stream events
+            _output = getattr(final, "output", None)
+            if isinstance(_output, list) and not _output:
+                if collected_output_items:
+                    final.output = list(collected_output_items)
+                    logger.debug(
+                        "Codex auxiliary: backfilled %d output items from stream events",
+                        len(collected_output_items),
+                    )
+                elif collected_text_deltas:
+                    assembled = "".join(collected_text_deltas)
+                    final.output = [SimpleNamespace(
+                        type="message", role="assistant", status="completed",
+                        content=[SimpleNamespace(type="output_text", text=assembled)],
+                    )]
+                    logger.debug(
+                        "Codex auxiliary: synthesized from %d deltas (%d chars)",
+                        len(collected_text_deltas), len(assembled),
+                    )
+
             # Extract text and tool calls from the Responses output
             for item in getattr(final, "output", []):
                 item_type = getattr(item, "type", None)

From a23fcae943ca0c022dd626acefb1a84184aba20b Mon Sep 17 00:00:00 2001
From: Matthew Hardwick <mrhwick@users.noreply.github.com>
Date: Mon, 6 Apr 2026 21:04:06 -0700
Subject: [PATCH 037/154] docs: add 'setup' command to docker run example

The docker container needs the explicit 'setup' subcommand to launch
the setup wizard. Without it, the container starts in default mode.

Co-authored-by: Omar <omar2535@users.noreply.github.com>
Cherry-picked from PR #4896 (also submitted independently as PR #5532).
---
 website/docs/user-guide/docker.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md
index 2940b867..c98f4abb 100644
--- a/website/docs/user-guide/docker.md
+++ b/website/docs/user-guide/docker.md
@@ -21,7 +21,7 @@ If this is your first time running Hermes Agent, create a data directory on the
 mkdir -p ~/.hermes
 docker run -it --rm \
   -v ~/.hermes:/opt/data \
-  nousresearch/hermes-agent
+  nousresearch/hermes-agent setup
 ```
 
 This drops you into the setup wizard, which will prompt you for your API keys and write them to `~/.hermes/.env`. You only need to do this once. It is highly recommended to set up a chat system for the gateway to work with at this point.

From bccd7d098c81730714aa004ed8ff8684463cee6a Mon Sep 17 00:00:00 2001
From: Jay Weeldreyer <submit77@users.noreply.github.com>
Date: Mon, 6 Apr 2026 21:04:43 -0700
Subject: [PATCH 038/154] docs: add post-update validation guidance
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a concise post-update validation checklist (git status, hermes
doctor, version check, gateway status). Adapted from PR #3050 with
corrections — removed inaccurate submodule claim (hermes update
already handles submodules) and tightened the checklist.

Cherry-picked and adapted from PR #3050.
---
 website/docs/getting-started/updating.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md
index 04abcc40..16bb0ce4 100644
--- a/website/docs/getting-started/updating.md
+++ b/website/docs/getting-started/updating.md
@@ -45,6 +45,20 @@ Already up to date.  (or: Updating abc1234..def5678)
 ✅ Hermes Agent updated successfully!
 ```
 
+### Recommended Post-Update Validation
+
+`hermes update` handles the main update path, but a quick validation confirms everything landed cleanly:
+
+1. `git status --short` — if the tree is unexpectedly dirty, inspect before continuing
+2. `hermes doctor` — checks config, dependencies, and service health
+3. `hermes --version` — confirm the version bumped as expected
+4. If you use the gateway: `hermes gateway status`
+5. If `doctor` reports npm audit issues: run `npm audit fix` in the flagged directory
+
+:::warning Dirty working tree after update
+If `git status --short` shows unexpected changes after `hermes update`, stop and inspect them before continuing. This usually means local modifications were reapplied on top of the updated code, or a dependency step refreshed lockfiles.
+:::
+
 ### Checking your current version
 
 ```bash

From 9ce8d59470b63556714d97ccfabc674f2c36b645 Mon Sep 17 00:00:00 2001
From: OmniWired <omniwired@users.noreply.github.com>
Date: Mon, 6 Apr 2026 21:05:32 -0700
Subject: [PATCH 039/154] docs: add local LLM on Mac guide (llama.cpp + MLX)

Comprehensive guide covering:
- llama.cpp and MLX (omlx) setup on Apple Silicon
- Model selection and memory optimization (quantized KV cache)
- Real benchmarks on M5 Max comparing both backends
- Hermes connection instructions

Cherry-picked from PR #2590.
---
 website/docs/guides/local-llm-on-mac.md | 219 ++++++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 website/docs/guides/local-llm-on-mac.md

diff --git a/website/docs/guides/local-llm-on-mac.md b/website/docs/guides/local-llm-on-mac.md
new file mode 100644
index 00000000..eaa8b238
--- /dev/null
+++ b/website/docs/guides/local-llm-on-mac.md
@@ -0,0 +1,219 @@
+---
+sidebar_position: 8
+title: "Run Local LLMs on Mac"
+description: "Set up a local OpenAI-compatible LLM server on macOS with llama.cpp or MLX, including model selection, memory optimization, and real benchmarks on Apple Silicon"
+---
+
+# Run Local LLMs on Mac
+
+This guide walks you through running a local LLM server on macOS with an OpenAI-compatible API. You get full privacy, zero API costs, and surprisingly good performance on Apple Silicon.
+
+We cover two backends:
+
+| Backend | Install | Best at | Format |
+|---------|---------|---------|--------|
+| **llama.cpp** | `brew install llama.cpp` | Fastest time-to-first-token, quantized KV cache for low memory | GGUF |
+| **omlx** | [omlx.ai](https://omlx.ai) | Fastest token generation, native Metal optimization | MLX (safetensors) |
+
+Both expose an OpenAI-compatible `/v1/chat/completions` endpoint. Hermes works with either one — just point it at `http://localhost:8080` or `http://localhost:8000`.
+
+:::info Apple Silicon only
+This guide targets Macs with Apple Silicon (M1 and later). Intel Macs will work with llama.cpp but without GPU acceleration — expect significantly slower performance.
+:::
+
+---
+
+## Choosing a model
+
+For getting started, we recommend **Qwen3.5-9B** — it's a strong reasoning model that fits comfortably in 8GB+ of unified memory with quantization.
+
+| Variant | Size on disk | RAM needed (128K context) | Backend |
+|---------|-------------|---------------------------|---------|
+| Qwen3.5-9B-Q4_K_M (GGUF) | 5.3 GB | ~10 GB with quantized KV cache | llama.cpp |
+| Qwen3.5-9B-mlx-lm-mxfp4 (MLX) | ~5 GB | ~12 GB | omlx |
+
+**Memory rule of thumb:** model size + KV cache. A 9B Q4 model is ~5 GB. The KV cache at 128K context with Q4 quantization adds ~4-5 GB. With default (f16) KV cache, that balloons to ~16 GB. The quantized KV cache flags in llama.cpp are the key trick for memory-constrained systems.
+
+For larger models (27B, 35B), you'll need 32 GB+ of unified memory. The 9B is the sweet spot for 8-16 GB machines.
+
+---
+
+## Option A: llama.cpp
+
+llama.cpp is the most portable local LLM runtime. On macOS it uses Metal for GPU acceleration out of the box.
+
+### Install
+
+```bash
+brew install llama.cpp
+```
+
+This gives you the `llama-server` command globally.
+
+### Download the model
+
+You need a GGUF-format model. The easiest source is Hugging Face via the `huggingface-cli`:
+
+```bash
+brew install huggingface-cli
+```
+
+Then download:
+
+```bash
+huggingface-cli download unsloth/Qwen3.5-9B-GGUF Qwen3.5-9B-Q4_K_M.gguf --local-dir ~/models
+```
+
+:::tip Gated models
+Some models on Hugging Face require authentication. Run `huggingface-cli login` first if you get a 401 or 404 error.
+:::
+
+### Start the server
+
+```bash
+llama-server -m ~/models/Qwen3.5-9B-Q4_K_M.gguf \
+  -ngl 99 \
+  -c 131072 \
+  -np 1 \
+  -fa on \
+  --cache-type-k q4_0 \
+  --cache-type-v q4_0 \
+  --host 0.0.0.0
+```
+
+Here's what each flag does:
+
+| Flag | Purpose |
+|------|---------|
+| `-ngl 99` | Offload all layers to GPU (Metal). Use a high number to ensure nothing stays on CPU. |
+| `-c 131072` | Context window size (128K tokens). Reduce this if you're low on memory. |
+| `-np 1` | Number of parallel slots. Keep at 1 for single-user use — more slots split your memory budget. |
+| `-fa on` | Flash attention. Reduces memory usage and speeds up long-context inference. |
+| `--cache-type-k q4_0` | Quantize the key cache to 4-bit. **This is the big memory saver.** |
+| `--cache-type-v q4_0` | Quantize the value cache to 4-bit. Together with the above, this cuts KV cache memory by ~75% vs f16. |
+| `--host 0.0.0.0` | Listen on all interfaces. Use `127.0.0.1` if you don't need network access. |
+
+The server is ready when you see:
+
+```
+main: server is listening on http://0.0.0.0:8080
+srv  update_slots: all slots are idle
+```
+
+### Memory optimization for constrained systems
+
+The `--cache-type-k q4_0 --cache-type-v q4_0` flags are the most important optimization for systems with limited memory. Here's the impact at 128K context:
+
+| KV cache type | KV cache memory (128K ctx, 9B model) |
+|---------------|--------------------------------------|
+| f16 (default) | ~16 GB |
+| q8_0 | ~8 GB |
+| **q4_0** | **~4 GB** |
+
+On an 8 GB Mac, use `q4_0` KV cache and reduce context to `-c 32768` (32K). On 16 GB, you can comfortably do 128K context. On 32 GB+, you can run larger models or multiple parallel slots.
+
+If you're still running out of memory, reduce context size first (`-c`), then try a smaller quantization (Q3_K_M instead of Q4_K_M).
+
+### Test it
+
+```bash
+curl -s http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen3.5-9B-Q4_K_M.gguf",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 50
+  }' | jq .choices[0].message.content
+```
+
+### Get the model name
+
+If you forget the model name, query the models endpoint:
+
+```bash
+curl -s http://localhost:8080/v1/models | jq '.data[].id'
+```
+
+---
+
+## Option B: MLX via omlx
+
+[omlx](https://omlx.ai) is a macOS-native app that manages and serves MLX models. MLX is Apple's own machine learning framework, optimized specifically for Apple Silicon's unified memory architecture.
+
+### Install
+
+Download and install from [omlx.ai](https://omlx.ai). It provides a GUI for model management and a built-in server.
+
+### Download the model
+
+Use the omlx app to browse and download models. Search for `Qwen3.5-9B-mlx-lm-mxfp4` and download it. Models are stored locally (typically in `~/.omlx/models/`).
+
+### Start the server
+
+omlx serves models on `http://127.0.0.1:8000` by default. Start serving from the app UI, or use the CLI if available.
+
+### Test it
+
+```bash
+curl -s http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen3.5-9B-mlx-lm-mxfp4",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "max_tokens": 50
+  }' | jq .choices[0].message.content
+```
+
+### List available models
+
+omlx can serve multiple models simultaneously:
+
+```bash
+curl -s http://127.0.0.1:8000/v1/models | jq '.data[].id'
+```
+
+---
+
+## Benchmarks: llama.cpp vs MLX
+
+Both backends tested on the same machine (Apple M5 Max, 128 GB unified memory) running the same model (Qwen3.5-9B) at comparable quantization levels (Q4_K_M for GGUF, mxfp4 for MLX). Five diverse prompts, three runs each, backends tested sequentially to avoid resource contention.
+
+### Results
+
+| Metric | llama.cpp (Q4_K_M) | MLX (mxfp4) | Winner |
+|--------|-------------------|-------------|--------|
+| **TTFT (avg)** | **67 ms** | 289 ms | llama.cpp (4.3x faster) |
+| **TTFT (p50)** | **66 ms** | 286 ms | llama.cpp (4.3x faster) |
+| **Generation (avg)** | 70 tok/s | **96 tok/s** | MLX (37% faster) |
+| **Generation (p50)** | 70 tok/s | **96 tok/s** | MLX (37% faster) |
+| **Total time (512 tokens)** | 7.3s | **5.5s** | MLX (25% faster) |
+
+### What this means
+
+- **llama.cpp** excels at prompt processing — its flash attention + quantized KV cache pipeline gets you the first token in ~66ms. If you're building interactive applications where perceived responsiveness matters (chatbots, autocomplete), this is a meaningful advantage.
+
+- **MLX** generates tokens ~37% faster once it gets going. For batch workloads, long-form generation, or any task where total completion time matters more than initial latency, MLX finishes sooner.
+
+- Both backends are **extremely consistent** — variance across runs was negligible. You can rely on these numbers.
+
+### Which one should you pick?
+
+| Use case | Recommendation |
+|----------|---------------|
+| Interactive chat, low-latency tools | llama.cpp |
+| Long-form generation, bulk processing | MLX (omlx) |
+| Memory-constrained (8-16 GB) | llama.cpp (quantized KV cache is unmatched) |
+| Serving multiple models simultaneously | omlx (built-in multi-model support) |
+| Maximum compatibility (Linux too) | llama.cpp |
+
+---
+
+## Connect to Hermes
+
+Once your local server is running:
+
+```bash
+hermes model
+```
+
+Select **Custom endpoint** and follow the prompts. It will ask for the base URL and model name — use the values from whichever backend you set up above.

From 43cf68055b688fcfe3b4d7c8689b3abf57de588a Mon Sep 17 00:00:00 2001
From: Andrian <AndrianAQZ@users.noreply.github.com>
Date: Mon, 6 Apr 2026 21:08:28 -0700
Subject: [PATCH 040/154] docs: fix signal-cli install instructions

signal-cli is not available via apt or snap. Replace the incorrect
'sudo apt install signal-cli' with the official install method:
downloading from GitHub releases (Linux) or brew (macOS).

Updated both signal.md docs and the gateway.py setup hint.

Inspired by PR #4225 (which proposed snap, also incorrect).
---
 hermes_cli/gateway.py                       |  3 +--
 website/docs/user-guide/messaging/signal.md | 16 ++++++++++------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index c99761d5..4a12a34b 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -1803,8 +1803,7 @@ def _setup_signal():
         print_warning("signal-cli not found on PATH.")
         print_info("  Signal requires signal-cli running as an HTTP daemon.")
         print_info("  Install options:")
-        print_info("    Linux:  sudo apt install signal-cli")
-        print_info("            or download from https://github.com/AsamK/signal-cli")
+        print_info("    Linux:  download from https://github.com/AsamK/signal-cli/releases")
         print_info("    macOS:  brew install signal-cli")
         print_info("    Docker: bbernhard/signal-cli-rest-api")
         print()
diff --git a/website/docs/user-guide/messaging/signal.md b/website/docs/user-guide/messaging/signal.md
index d47b7ca0..3fc9eba6 100644
--- a/website/docs/user-guide/messaging/signal.md
+++ b/website/docs/user-guide/messaging/signal.md
@@ -25,17 +25,21 @@ The Signal adapter uses `httpx` (already a core Hermes dependency) for all commu
 ### Installing signal-cli
 
 ```bash
-# Linux (Debian/Ubuntu)
-sudo apt install signal-cli
-
 # macOS
 brew install signal-cli
 
-# Manual install (any platform)
-# Download from https://github.com/AsamK/signal-cli/releases
-# Extract and add to PATH
+# Linux (download latest release)
+VERSION=$(curl -Ls -o /dev/null -w %{url_effective} \
+  https://github.com/AsamK/signal-cli/releases/latest | sed 's/^.*\/v//')
+curl -L -O "https://github.com/AsamK/signal-cli/releases/download/v${VERSION}/signal-cli-${VERSION}.tar.gz"
+sudo tar xf "signal-cli-${VERSION}.tar.gz" -C /opt
+sudo ln -sf "/opt/signal-cli-${VERSION}/bin/signal-cli" /usr/local/bin/
 ```
 
+:::caution
+signal-cli is **not** in apt or snap repositories. The Linux install above downloads directly from [GitHub releases](https://github.com/AsamK/signal-cli/releases).
+:::
+
 ---
 
 ## Step 1: Link Your Signal Account

From 9b6e5f6a04996a936d08ad0b3f3ccfd6cc3e9384 Mon Sep 17 00:00:00 2001
From: eizus <hello@cdr.xyz>
Date: Mon, 6 Apr 2026 11:59:54 -0400
Subject: [PATCH 041/154] fix(gateway): Apply markdown-to-mrkdwn conversion in
 edit_message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The edit_message method was sending raw content directly to Slack's
chat_update API without converting standard markdown to Slack's mrkdwn
format. This caused broken formatting and malformed URLs (e.g., trailing
** from bold syntax became part of clickable links → 404 errors).

The send() method already calls format_message() to handle this conversion,
but edit_message() was bypassing it. This change ensures edited messages
receive the same markdown → mrkdwn transformation as new messages.

Closes: PR #5558 formatting issue where links had trailing markdown syntax.
---
 gateway/platforms/slack.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 2e7bbee7..b4bf5ffd 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -276,10 +276,13 @@ class SlackAdapter(BasePlatformAdapter):
         if not self._app:
             return SendResult(success=False, error="Not connected")
         try:
+            # Convert standard markdown → Slack mrkdwn
+            formatted = self.format_message(content)
+
             await self._get_client(chat_id).chat_update(
                 channel=chat_id,
                 ts=message_id,
-                text=content,
+                text=formatted,
             )
             return SendResult(success=True, message_id=message_id)
         except Exception as e:  # pragma: no cover - defensive logging

From 4ec615b0c245d5dc4011cac6490ada5916efcdd9 Mon Sep 17 00:00:00 2001
From: eizus <hello@cdr.xyz>
Date: Mon, 6 Apr 2026 11:46:17 -0400
Subject: [PATCH 042/154] feat(gateway): Enable Slack thread replies without
 explicit @mentions

When a user replies in a Slack thread where the bot has an active
conversation session, the bot now processes the message even without
an explicit @mention. This improves UX for ongoing threaded
discussions.

Changes:
- Added set_session_store() to BasePlatformAdapter for adapters to
  check active sessions
- Modified SlackAdapter to detect thread replies and check if a
  session exists for that thread before requiring @mentions
- Updated GatewayRunner to inject the session store into adapters
- Added comprehensive tests for the new behavior

Fixes: Thread replies without @jarvis are now processed if there is
an active session, matching user expectations for conversation flow
---
 gateway/platforms/base.py   |  10 +++
 gateway/platforms/slack.py  |  85 +++++++++++++++++++++-
 gateway/run.py              |   2 +
 tests/gateway/test_slack.py | 141 ++++++++++++++++++++++++++++++++++++
 4 files changed, 235 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 0ba00d89..4335a51f 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -569,6 +569,16 @@ class BasePlatformAdapter(ABC):
         """
         self._message_handler = handler
     
+    def set_session_store(self, session_store: Any) -> None:
+        """
+        Set the session store for checking active sessions.
+        
+        Used by adapters that need to check if a thread/conversation
+        has an active session before processing messages (e.g., Slack
+        thread replies without explicit mentions).
+        """
+        self._session_store = session_store
+    
     @abstractmethod
     async def connect(self) -> bool:
         """
diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index b4bf5ffd..384f379d 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -766,11 +766,28 @@ class SlackAdapter(BasePlatformAdapter):
         else:
             thread_ts = event.get("thread_ts") or ts  # ts fallback for channels
 
-        # In channels, only respond if bot is mentioned
+        # In channels, only respond if bot is mentioned OR if this is a
+        # reply in a thread where the bot has an active session.
         bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
-        if not is_dm and bot_uid:
-            if f"<@{bot_uid}>" not in text:
+        is_mentioned = bot_uid and f"<@{bot_uid}>" in text
+        
+        if not is_dm and bot_uid and not is_mentioned:
+            # Check if this is a thread reply (thread_ts exists and differs from ts)
+            event_thread_ts = event.get("thread_ts")
+            is_thread_reply = event_thread_ts and event_thread_ts != ts
+            
+            if is_thread_reply and self._has_active_session_for_thread(
+                channel_id=channel_id,
+                thread_ts=event_thread_ts,
+                user_id=user_id,
+            ):
+                # Allow thread replies without mention if there's an active session
+                pass
+            else:
+                # Not a thread reply or no active session - ignore
                 return
+        
+        if is_mentioned:
             # Strip the bot mention from the text
             text = text.replace(f"<@{bot_uid}>", "").strip()
 
@@ -936,6 +953,68 @@ class SlackAdapter(BasePlatformAdapter):
 
         await self.handle_message(event)
 
+    def _has_active_session_for_thread(
+        self,
+        channel_id: str,
+        thread_ts: str,
+        user_id: str,
+    ) -> bool:
+        """Check if there's an active session for a thread.
+        
+        Used to determine if thread replies without @mentions should be
+        processed (they should if there's an active session).
+        
+        Args:
+            channel_id: The Slack channel ID
+            thread_ts: The thread timestamp (parent message ts)
+            user_id: The user ID of the sender
+            
+        Returns:
+            True if there's an active session for this thread
+        """
+        session_store = getattr(self, "_session_store", None)
+        if not session_store:
+            return False
+        
+        try:
+            # Build a SessionSource for this thread
+            from gateway.session import SessionSource
+            from gateway.config import Platform
+            
+            source = SessionSource(
+                platform=Platform.SLACK,
+                chat_id=channel_id,
+                chat_type="group",
+                user_id=user_id,
+                thread_id=thread_ts,
+            )
+            
+            # Generate the session key using the same logic as SessionStore
+            # This mirrors the logic in build_session_key for group sessions
+            key_parts = ["agent:main", "slack", "group", channel_id, thread_ts]
+            
+            # Include user_id if group_sessions_per_user is enabled
+            # We check the session store config if available
+            group_sessions_per_user = getattr(
+                session_store, "config", {}
+            )
+            if hasattr(group_sessions_per_user, "group_sessions_per_user"):
+                group_sessions_per_user = group_sessions_per_user.group_sessions_per_user
+            else:
+                group_sessions_per_user = True  # Default
+            
+            if group_sessions_per_user and user_id:
+                key_parts.append(str(user_id))
+            
+            session_key = ":".join(key_parts)
+            
+            # Check if the session exists in the store
+            session_store._ensure_loaded()
+            return session_key in session_store._entries
+        except Exception:
+            # If anything goes wrong, default to False (require mention)
+            return False
+
     async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
         """Download a Slack file using the bot token for auth, with retry."""
         import asyncio
diff --git a/gateway/run.py b/gateway/run.py
index 82cb10b4..9d5ac5aa 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1127,6 +1127,7 @@ class GatewayRunner:
             # Set up message + fatal error handlers
             adapter.set_message_handler(self._handle_message)
             adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+            adapter.set_session_store(self.session_store)
             
             # Try to connect
             logger.info("Connecting to %s...", platform.value)
@@ -1424,6 +1425,7 @@ class GatewayRunner:
 
                     adapter.set_message_handler(self._handle_message)
                     adapter.set_fatal_error_handler(self._handle_adapter_fatal_error)
+                    adapter.set_session_store(self.session_store)
 
                     success = await adapter.connect()
                     if success:
diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py
index 81f8077a..89b44718 100644
--- a/tests/gateway/test_slack.py
+++ b/tests/gateway/test_slack.py
@@ -699,6 +699,147 @@ class TestReactions:
         assert remove_calls[0].kwargs["name"] == "eyes"
 
 
+# ---------------------------------------------------------------------------
+# TestThreadReplyHandling
+# ---------------------------------------------------------------------------
+
+
+class TestThreadReplyHandling:
+    """Test thread reply processing without explicit bot mentions."""
+
+    @pytest.fixture()
+    def mock_session_store(self):
+        """Create a mock session store with entries dict."""
+        store = MagicMock()
+        store._entries = {}
+        store._ensure_loaded = MagicMock()
+        store.config = MagicMock()
+        store.config.group_sessions_per_user = True
+        return store
+
+    @pytest.fixture()
+    def adapter_with_session_store(self, mock_session_store):
+        """Create an adapter with a mock session store attached."""
+        config = PlatformConfig(enabled=True, token="***")
+        a = SlackAdapter(config)
+        a._app = MagicMock()
+        a._app.client = AsyncMock()
+        a._bot_user_id = "U_BOT"
+        a._team_bot_user_ids = {"T_TEAM": "U_BOT"}
+        a._running = True
+        a.handle_message = AsyncMock()
+        a.set_session_store(mock_session_store)
+        return a
+
+    @pytest.mark.asyncio
+    async def test_thread_reply_without_mention_no_session_ignored(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Thread replies without mention should be ignored if no active session."""
+        mock_session_store._entries = {}  # No active sessions
+
+        event = {
+            "text": "Just replying in the thread",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",  # Different from ts - this is a reply
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_thread_reply_without_mention_with_session_processed(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Thread replies without mention should be processed if there's an active session."""
+        # Simulate an active session for this thread
+        session_key = "agent:main:slack:group:C123:123.000:U_USER"
+        mock_session_store._entries = {session_key: MagicMock()}
+
+        event = {
+            "text": "Follow-up question",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",  # Reply in thread 123.000
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_called_once()
+
+        # Verify the text is passed through unchanged (no mention stripping needed)
+        msg_event = adapter_with_session_store.handle_message.call_args[0][0]
+        assert msg_event.text == "Follow-up question"
+
+    @pytest.mark.asyncio
+    async def test_thread_reply_with_mention_strips_bot_id(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Thread replies with @mention should still strip the bot ID."""
+        # Even with a session, mentions should be stripped
+        session_key = "agent:main:slack:group:C123:123.000:U_USER"
+        mock_session_store._entries = {session_key: MagicMock()}
+
+        event = {
+            "text": "<@U_BOT> thanks for the help",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_called_once()
+
+        msg_event = adapter_with_session_store.handle_message.call_args[0][0]
+        assert "<@U_BOT>" not in msg_event.text
+        assert msg_event.text == "thanks for the help"
+
+    @pytest.mark.asyncio
+    async def test_top_level_message_requires_mention_even_with_session(
+        self, adapter_with_session_store, mock_session_store
+    ):
+        """Top-level channel messages should require mention even if session exists."""
+        # Session exists but this is a top-level message (no thread_ts)
+        session_key = "agent:main:slack:group:C123:123.000:U_USER"
+        mock_session_store._entries = {session_key: MagicMock()}
+
+        event = {
+            "text": "New question without mention",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "456.789",
+            # No thread_ts - this is a top-level message
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter_with_session_store._handle_slack_message(event)
+        adapter_with_session_store.handle_message.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_no_session_store_ignores_thread_replies(
+        self, adapter
+    ):
+        """If no session store is attached, thread replies without mention should be ignored."""
+        # adapter fixture has no session store attached
+        event = {
+            "text": "Thread reply without mention",
+            "user": "U_USER",
+            "channel": "C123",
+            "ts": "123.456",
+            "thread_ts": "123.000",
+            "channel_type": "channel",
+            "team": "T_TEAM",
+        }
+        await adapter._handle_slack_message(event)
+        adapter.handle_message.assert_not_called()
+
+
 # ---------------------------------------------------------------------------
 # TestUserNameResolution
 # ---------------------------------------------------------------------------

From 888dc1e68079e50ca8ee148a29623decb85c99b3 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 21:35:33 -0700
Subject: [PATCH 043/154] =?UTF-8?q?fix:=20harden=20auxiliary=20codex=20ada?=
 =?UTF-8?q?pter=20=E2=80=94=20dict-shaped=20items=20+=20tool=20call=20guar?=
 =?UTF-8?q?d=20(#5734)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two remaining gaps from the codex empty-output spec:

1. Normalize dict-shaped streamed items: output_item.done events may
   yield dicts (raw/fallback paths) instead of SDK objects. The
   extraction loop now uses _item_get() that handles both getattr
   and dict .get() access.

2. Avoid plain-text synthesis when function_call events were streamed:
   tracks has_function_calls during streaming and skips text-delta
   synthesis when tool calls are present — prevents collapsing a
   tool-call response into a fake text message.
---
 agent/auxiliary_client.py | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 8052d020..9edc505e 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -265,6 +265,7 @@ class _CodexCompletionsAdapter:
             # get_final_response() even when items were streamed.
             collected_output_items: List[Any] = []
             collected_text_deltas: List[str] = []
+            has_function_calls = False
             with self._client.responses.stream(**resp_kwargs) as stream:
                 for _event in stream:
                     _etype = getattr(_event, "type", "")
@@ -276,6 +277,8 @@ class _CodexCompletionsAdapter:
                         _delta = getattr(_event, "delta", "")
                         if _delta:
                             collected_text_deltas.append(_delta)
+                    elif "function_call" in _etype:
+                        has_function_calls = True
                 final = stream.get_final_response()
 
             # Backfill empty output from collected stream events
@@ -287,7 +290,10 @@ class _CodexCompletionsAdapter:
                         "Codex auxiliary: backfilled %d output items from stream events",
                         len(collected_output_items),
                     )
-                elif collected_text_deltas:
+                elif collected_text_deltas and not has_function_calls:
+                    # Only synthesize text when no tool calls were streamed —
+                    # a function_call response with incidental text should not
+                    # be collapsed into a plain-text message.
                     assembled = "".join(collected_text_deltas)
                     final.output = [SimpleNamespace(
                         type="message", role="assistant", status="completed",
@@ -298,21 +304,29 @@ class _CodexCompletionsAdapter:
                         len(collected_text_deltas), len(assembled),
                     )
 
-            # Extract text and tool calls from the Responses output
+            # Extract text and tool calls from the Responses output.
+            # Items may be SDK objects (attrs) or dicts (raw/fallback paths),
+            # so use a helper that handles both shapes.
+            def _item_get(obj: Any, key: str, default: Any = None) -> Any:
+                val = getattr(obj, key, None)
+                if val is None and isinstance(obj, dict):
+                    val = obj.get(key, default)
+                return val if val is not None else default
+
             for item in getattr(final, "output", []):
-                item_type = getattr(item, "type", None)
+                item_type = _item_get(item, "type")
                 if item_type == "message":
-                    for part in getattr(item, "content", []):
-                        ptype = getattr(part, "type", None)
+                    for part in (_item_get(item, "content") or []):
+                        ptype = _item_get(part, "type")
                         if ptype in ("output_text", "text"):
-                            text_parts.append(getattr(part, "text", ""))
+                            text_parts.append(_item_get(part, "text", ""))
                 elif item_type == "function_call":
                     tool_calls_raw.append(SimpleNamespace(
-                        id=getattr(item, "call_id", ""),
+                        id=_item_get(item, "call_id", ""),
                         type="function",
                         function=SimpleNamespace(
-                            name=getattr(item, "name", ""),
-                            arguments=getattr(item, "arguments", "{}"),
+                            name=_item_get(item, "name", ""),
+                            arguments=_item_get(item, "arguments", "{}"),
                         ),
                     ))
 

From 972482e28e36d81ed84026538c9a882d5fc218c5 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 22:02:47 -0700
Subject: [PATCH 044/154] =?UTF-8?q?docs:=20guides=20section=20overhaul=20?=
 =?UTF-8?q?=E2=80=94=20fix=20existing=20+=20add=203=20new=20tutorials=20(#?=
 =?UTF-8?q?5735)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: fix guides section — sidebar ordering, broken links, position conflicts

- Add local-llm-on-mac.md to sidebars.ts (was missing after salvage PR)
- Reorder sidebar: tips first, then local LLM guide, then tutorials
- Fix 10 broken links in team-telegram-assistant.md (missing /docs/ prefix)
- Fix relative link in migrate-from-openclaw.md
- Fix installation link pointing to learning-path instead of installation
- Renumber all sidebar_position values to eliminate conflicts and match
  the explicit sidebars.ts ordering

* docs: add 3 new guides — cron automation, skills, delegation

New tutorial-style guides covering core features:

- automate-with-cron.md (261 lines): 5 real-world patterns — website
  monitoring with scripts, weekly reports, GitHub watchers, data
  collection pipelines, multi-skill workflows. Covers [SILENT] trick,
  delivery targets, job management.

- work-with-skills.md (268 lines): End-to-end skill workflow — finding,
  installing from Hub, configuring, creating from scratch with reference
  files, per-platform management, skills vs memory comparison.

- delegation-patterns.md (239 lines): 5 patterns — parallel research,
  code review, alternative comparison, multi-file refactoring,
  gather-then-analyze (execute_code + delegate). Covers the context
  problem, toolset selection, constraints.

Added all three to sidebars.ts in the Guides & Tutorials section.
---
 website/docs/guides/automate-with-cron.md     | 261 +++++++++++++++++
 website/docs/guides/build-a-hermes-plugin.md  |   2 +-
 website/docs/guides/daily-briefing-bot.md     |   2 +-
 website/docs/guides/delegation-patterns.md    | 239 ++++++++++++++++
 website/docs/guides/local-llm-on-mac.md       |   2 +-
 website/docs/guides/migrate-from-openclaw.md  |   4 +-
 website/docs/guides/python-library.md         |   2 +-
 .../docs/guides/team-telegram-assistant.md    |  18 +-
 website/docs/guides/use-mcp-with-hermes.md    |   2 +-
 website/docs/guides/use-soul-with-hermes.md   |   2 +-
 .../docs/guides/use-voice-mode-with-hermes.md |   2 +-
 website/docs/guides/work-with-skills.md       | 268 ++++++++++++++++++
 website/sidebars.ts                           |   6 +-
 13 files changed, 791 insertions(+), 19 deletions(-)
 create mode 100644 website/docs/guides/automate-with-cron.md
 create mode 100644 website/docs/guides/delegation-patterns.md
 create mode 100644 website/docs/guides/work-with-skills.md

diff --git a/website/docs/guides/automate-with-cron.md b/website/docs/guides/automate-with-cron.md
new file mode 100644
index 00000000..fba8a082
--- /dev/null
+++ b/website/docs/guides/automate-with-cron.md
@@ -0,0 +1,261 @@
+---
+sidebar_position: 11
+title: "Automate Anything with Cron"
+description: "Real-world automation patterns using Hermes cron — monitoring, reports, pipelines, and multi-skill workflows"
+---
+
+# Automate Anything with Cron
+
+The [daily briefing bot tutorial](/docs/guides/daily-briefing-bot) covers the basics. This guide goes further — five real-world automation patterns you can adapt for your own workflows.
+
+For the full feature reference, see [Scheduled Tasks (Cron)](/docs/user-guide/features/cron).
+
+:::info Key Concept
+Cron jobs run in fresh agent sessions with no memory of your current chat. Prompts must be **completely self-contained** — include everything the agent needs to know.
+:::
+
+---
+
+## Pattern 1: Website Change Monitor
+
+Watch a URL for changes and get notified only when something is different.
+
+The `script` parameter is the secret weapon here. A Python script runs before each execution, and its stdout becomes context for the agent. The script handles the mechanical work (fetching, diffing); the agent handles the reasoning (is this change interesting?).
+
+Create the monitoring script:
+
+```bash
+mkdir -p ~/.hermes/scripts
+```
+
+```python title="~/.hermes/scripts/watch-site.py"
+import hashlib, json, os, urllib.request
+
+URL = "https://example.com/pricing"
+STATE_FILE = os.path.expanduser("~/.hermes/scripts/.watch-site-state.json")
+
+# Fetch current content
+req = urllib.request.Request(URL, headers={"User-Agent": "Hermes-Monitor/1.0"})
+content = urllib.request.urlopen(req, timeout=30).read().decode()
+current_hash = hashlib.sha256(content.encode()).hexdigest()
+
+# Load previous state
+prev_hash = None
+if os.path.exists(STATE_FILE):
+    with open(STATE_FILE) as f:
+        prev_hash = json.load(f).get("hash")
+
+# Save current state
+with open(STATE_FILE, "w") as f:
+    json.dump({"hash": current_hash, "url": URL}, f)
+
+# Output for the agent
+if prev_hash and prev_hash != current_hash:
+    print(f"CHANGE DETECTED on {URL}")
+    print(f"Previous hash: {prev_hash}")
+    print(f"Current hash: {current_hash}")
+    print(f"\nCurrent content (first 2000 chars):\n{content[:2000]}")
+else:
+    print("NO_CHANGE")
+```
+
+Set up the cron job:
+
+```bash
+/cron add "every 1h" "If the script output says CHANGE DETECTED, summarize what changed on the page and why it might matter. If it says NO_CHANGE, respond with just [SILENT]." --script ~/.hermes/scripts/watch-site.py --name "Pricing monitor" --deliver telegram
+```
+
+:::tip The [SILENT] Trick
+When the agent's final response contains `[SILENT]`, delivery is suppressed. This means you only get notified when something actually happens — no spam on quiet hours.
+:::
+
+---
+
+## Pattern 2: Weekly Report
+
+Compile information from multiple sources into a formatted summary. This runs once a week and delivers to your home channel.
+
+```bash
+/cron add "0 9 * * 1" "Generate a weekly report covering:
+
+1. Search the web for the top 5 AI news stories from the past week
+2. Search GitHub for trending repositories in the 'machine-learning' topic
+3. Check Hacker News for the most discussed AI/ML posts
+
+Format as a clean summary with sections for each source. Include links.
+Keep it under 500 words — highlight only what matters." --name "Weekly AI digest" --deliver telegram
+```
+
+From the CLI:
+
+```bash
+hermes cron create "0 9 * * 1" \
+  "Generate a weekly report covering the top AI news, trending ML GitHub repos, and most-discussed HN posts. Format with sections, include links, keep under 500 words." \
+  --name "Weekly AI digest" \
+  --deliver telegram
+```
+
+The `0 9 * * 1` is a standard cron expression: 9:00 AM every Monday.
+
+---
+
+## Pattern 3: GitHub Repository Watcher
+
+Monitor a repository for new issues, PRs, or releases.
+
+```bash
+/cron add "every 6h" "Check the GitHub repository NousResearch/hermes-agent for:
+- New issues opened in the last 6 hours
+- New PRs opened or merged in the last 6 hours
+- Any new releases
+
+Use the terminal to run gh commands:
+  gh issue list --repo NousResearch/hermes-agent --state open --json number,title,author,createdAt --limit 10
+  gh pr list --repo NousResearch/hermes-agent --state all --json number,title,author,createdAt,mergedAt --limit 10
+
+Filter to only items from the last 6 hours. If nothing new, respond with [SILENT].
+Otherwise, provide a concise summary of the activity." --name "Repo watcher" --deliver discord
+```
+
+:::warning Self-Contained Prompts
+Notice how the prompt includes the exact `gh` commands. The cron agent has no memory of previous runs or your preferences — spell everything out.
+:::
+
+---
+
+## Pattern 4: Data Collection Pipeline
+
+Scrape data at regular intervals, save to files, and detect trends over time. This pattern combines a script (for collection) with the agent (for analysis).
+
+```python title="~/.hermes/scripts/collect-prices.py"
+import json, os, urllib.request
+from datetime import datetime
+
+DATA_DIR = os.path.expanduser("~/.hermes/data/prices")
+os.makedirs(DATA_DIR, exist_ok=True)
+
+# Fetch current data (example: crypto prices)
+url = "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin,ethereum&vs_currencies=usd"
+data = json.loads(urllib.request.urlopen(url, timeout=30).read())
+
+# Append to history file
+entry = {"timestamp": datetime.now().isoformat(), "prices": data}
+history_file = os.path.join(DATA_DIR, "history.jsonl")
+with open(history_file, "a") as f:
+    f.write(json.dumps(entry) + "\n")
+
+# Load recent history for analysis
+lines = open(history_file).readlines()
+recent = [json.loads(l) for l in lines[-24:]]  # Last 24 data points
+
+# Output for the agent
+print(f"Current: BTC=${data['bitcoin']['usd']}, ETH=${data['ethereum']['usd']}")
+print(f"Data points collected: {len(lines)} total, showing last {len(recent)}")
+print(f"\nRecent history:")
+for r in recent[-6:]:
+    print(f"  {r['timestamp']}: BTC=${r['prices']['bitcoin']['usd']}, ETH=${r['prices']['ethereum']['usd']}")
+```
+
+```bash
+/cron add "every 1h" "Analyze the price data from the script output. Report:
+1. Current prices
+2. Trend direction over the last 6 data points (up/down/flat)
+3. Any notable movements (>5% change)
+
+If prices are flat and nothing notable, respond with [SILENT].
+If there's a significant move, explain what happened." \
+  --script ~/.hermes/scripts/collect-prices.py \
+  --name "Price tracker" \
+  --deliver telegram
+```
+
+The script does the mechanical collection; the agent adds the reasoning layer.
+
+---
+
+## Pattern 5: Multi-Skill Workflow
+
+Chain skills together for complex scheduled tasks. Skills are loaded in order before the prompt executes.
+
+```bash
+# Use the arxiv skill to find papers, then the obsidian skill to save notes
+/cron add "0 8 * * *" "Search arXiv for the 3 most interesting papers on 'language model reasoning' from the past day. For each paper, create an Obsidian note with the title, authors, abstract summary, and key contribution." \
+  --skill arxiv \
+  --skill obsidian \
+  --name "Paper digest"
+```
+
+From the tool directly:
+
+```python
+cronjob(
+    action="create",
+    skills=["arxiv", "obsidian"],
+    prompt="Search arXiv for papers on 'language model reasoning' from the past day. Save the top 3 as Obsidian notes.",
+    schedule="0 8 * * *",
+    name="Paper digest",
+    deliver="local"
+)
+```
+
+Skills are loaded in order — `arxiv` first (teaches the agent how to search papers), then `obsidian` (teaches how to write notes). The prompt ties them together.
+
+---
+
+## Managing Your Jobs
+
+```bash
+# List all active jobs
+/cron list
+
+# Trigger a job immediately (for testing)
+/cron run <job_id>
+
+# Pause a job without deleting it
+/cron pause <job_id>
+
+# Edit a running job's schedule or prompt
+/cron edit <job_id> --schedule "every 4h"
+/cron edit <job_id> --prompt "Updated task description"
+
+# Add or remove skills from an existing job
+/cron edit <job_id> --skill arxiv --skill obsidian
+/cron edit <job_id> --clear-skills
+
+# Remove a job permanently
+/cron remove <job_id>
+```
+
+---
+
+## Delivery Targets
+
+The `--deliver` flag controls where results go:
+
+| Target | Example | Use case |
+|--------|---------|----------|
+| `origin` | `--deliver origin` | Same chat that created the job (default) |
+| `local` | `--deliver local` | Save to local file only |
+| `telegram` | `--deliver telegram` | Your Telegram home channel |
+| `discord` | `--deliver discord` | Your Discord home channel |
+| `slack` | `--deliver slack` | Your Slack home channel |
+| Specific chat | `--deliver telegram:-1001234567890` | A specific Telegram group |
+| Threaded | `--deliver telegram:-1001234567890:17585` | A specific Telegram topic thread |
+
+---
+
+## Tips
+
+**Make prompts self-contained.** The agent in a cron job has no memory of your conversations. Include URLs, repo names, format preferences, and delivery instructions directly in the prompt.
+
+**Use `[SILENT]` liberally.** For monitoring jobs, always include instructions like "if nothing changed, respond with `[SILENT]`." This prevents notification noise.
+
+**Use scripts for data collection.** The `script` parameter lets a Python script handle the boring parts (HTTP requests, file I/O, state tracking). The agent only sees the script's stdout and applies reasoning to it. This is cheaper and more reliable than having the agent do the fetching itself.
+
+**Test with `/cron run`.** Before waiting for the schedule to trigger, use `/cron run <job_id>` to execute immediately and verify the output looks right.
+
+**Schedule expressions.** Human-readable formats like `every 2h`, `30m`, and `daily at 9am` all work alongside standard cron expressions like `0 9 * * *`.
+
+---
+
+*For the complete cron reference — all parameters, edge cases, and internals — see [Scheduled Tasks (Cron)](/docs/user-guide/features/cron).*
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index e0a7f662..91fb62f3 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 8
+sidebar_position: 9
 sidebar_label: "Build a Plugin"
 title: "Build a Hermes Plugin"
 description: "Step-by-step guide to building a complete Hermes plugin with tools, hooks, data files, and skills"
diff --git a/website/docs/guides/daily-briefing-bot.md b/website/docs/guides/daily-briefing-bot.md
index 78bfd690..dc1ac445 100644
--- a/website/docs/guides/daily-briefing-bot.md
+++ b/website/docs/guides/daily-briefing-bot.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 2
+sidebar_position: 3
 title: "Tutorial: Daily Briefing Bot"
 description: "Build an automated daily briefing bot that researches topics, summarizes findings, and delivers them to Telegram or Discord every morning"
 ---
diff --git a/website/docs/guides/delegation-patterns.md b/website/docs/guides/delegation-patterns.md
new file mode 100644
index 00000000..e2eaa05c
--- /dev/null
+++ b/website/docs/guides/delegation-patterns.md
@@ -0,0 +1,239 @@
+---
+sidebar_position: 13
+title: "Delegation & Parallel Work"
+description: "When and how to use subagent delegation — patterns for parallel research, code review, and multi-file work"
+---
+
+# Delegation & Parallel Work
+
+Hermes can spawn isolated child agents to work on tasks in parallel. Each subagent gets its own conversation, terminal session, and toolset. Only the final summary comes back — intermediate tool calls never enter your context window.
+
+For the full feature reference, see [Subagent Delegation](/docs/user-guide/features/delegation).
+
+---
+
+## When to Delegate
+
+**Good candidates for delegation:**
+- Reasoning-heavy subtasks (debugging, code review, research synthesis)
+- Tasks that would flood your context with intermediate data
+- Parallel independent workstreams (research A and B simultaneously)
+- Fresh-context tasks where you want the agent to approach without bias
+
+**Use something else:**
+- Single tool call → just use the tool directly
+- Mechanical multi-step work with logic between steps → `execute_code`
+- Tasks needing user interaction → subagents can't use `clarify`
+- Quick file edits → do them directly
+
+---
+
+## Pattern: Parallel Research
+
+Research three topics simultaneously and get structured summaries back:
+
+```
+Research these three topics in parallel:
+1. Current state of WebAssembly outside the browser
+2. RISC-V server chip adoption in 2025
+3. Practical quantum computing applications
+
+Focus on recent developments and key players.
+```
+
+Behind the scenes, Hermes uses:
+
+```python
+delegate_task(tasks=[
+    {
+        "goal": "Research WebAssembly outside the browser in 2025",
+        "context": "Focus on: runtimes (Wasmtime, Wasmer), cloud/edge use cases, WASI progress",
+        "toolsets": ["web"]
+    },
+    {
+        "goal": "Research RISC-V server chip adoption",
+        "context": "Focus on: server chips shipping, cloud providers adopting, software ecosystem",
+        "toolsets": ["web"]
+    },
+    {
+        "goal": "Research practical quantum computing applications",
+        "context": "Focus on: error correction breakthroughs, real-world use cases, key companies",
+        "toolsets": ["web"]
+    }
+])
+```
+
+All three run concurrently. Each subagent searches the web independently and returns a summary. The parent agent then synthesizes them into a coherent briefing.
+
+---
+
+## Pattern: Code Review
+
+Delegate a security review to a fresh-context subagent that approaches the code without preconceptions:
+
+```
+Review the authentication module at src/auth/ for security issues.
+Check for SQL injection, JWT validation problems, password handling,
+and session management. Fix anything you find and run the tests.
+```
+
+The key is the `context` field — it must include everything the subagent needs:
+
+```python
+delegate_task(
+    goal="Review src/auth/ for security issues and fix any found",
+    context="""Project at /home/user/webapp. Python 3.11, Flask, PyJWT, bcrypt.
+    Auth files: src/auth/login.py, src/auth/jwt.py, src/auth/middleware.py
+    Test command: pytest tests/auth/ -v
+    Focus on: SQL injection, JWT validation, password hashing, session management.
+    Fix issues found and verify tests pass.""",
+    toolsets=["terminal", "file"]
+)
+```
+
+:::warning The Context Problem
+Subagents know **absolutely nothing** about your conversation. They start completely fresh. If you delegate "fix the bug we were discussing," the subagent has no idea what bug you mean. Always pass file paths, error messages, project structure, and constraints explicitly.
+:::
+
+---
+
+## Pattern: Compare Alternatives
+
+Evaluate multiple approaches to the same problem in parallel, then pick the best:
+
+```
+I need to add full-text search to our Django app. Evaluate three approaches
+in parallel:
+1. PostgreSQL tsvector (built-in)
+2. Elasticsearch via django-elasticsearch-dsl
+3. Meilisearch via meilisearch-python
+
+For each: setup complexity, query capabilities, resource requirements,
+and maintenance overhead. Compare them and recommend one.
+```
+
+Each subagent researches one option independently. Because they're isolated, there's no cross-contamination — each evaluation stands on its own merits. The parent agent gets all three summaries and makes the comparison.
+
+---
+
+## Pattern: Multi-File Refactoring
+
+Split a large refactoring task across parallel subagents, each handling a different part of the codebase:
+
+```python
+delegate_task(tasks=[
+    {
+        "goal": "Refactor all API endpoint handlers to use the new response format",
+        "context": """Project at /home/user/api-server.
+        Files: src/handlers/users.py, src/handlers/auth.py, src/handlers/billing.py
+        Old format: return {"data": result, "status": "ok"}
+        New format: return APIResponse(data=result, status=200).to_dict()
+        Import: from src.responses import APIResponse
+        Run tests after: pytest tests/handlers/ -v""",
+        "toolsets": ["terminal", "file"]
+    },
+    {
+        "goal": "Update all client SDK methods to handle the new response format",
+        "context": """Project at /home/user/api-server.
+        Files: sdk/python/client.py, sdk/python/models.py
+        Old parsing: result = response.json()["data"]
+        New parsing: result = response.json()["data"] (same key, but add status code checking)
+        Also update sdk/python/tests/test_client.py""",
+        "toolsets": ["terminal", "file"]
+    },
+    {
+        "goal": "Update API documentation to reflect the new response format",
+        "context": """Project at /home/user/api-server.
+        Docs at: docs/api/. Format: Markdown with code examples.
+        Update all response examples from old format to new format.
+        Add a 'Response Format' section to docs/api/overview.md explaining the schema.""",
+        "toolsets": ["terminal", "file"]
+    }
+])
+```
+
+:::tip
+Each subagent gets its own terminal session. They can work on the same project directory without stepping on each other — as long as they're editing different files. If two subagents might touch the same file, handle that file yourself after the parallel work completes.
+:::
+
+---
+
+## Pattern: Gather Then Analyze
+
+Use `execute_code` for mechanical data gathering, then delegate the reasoning-heavy analysis:
+
+```python
+# Step 1: Mechanical gathering (execute_code is better here — no reasoning needed)
+execute_code("""
+from hermes_tools import web_search, web_extract
+
+results = []
+for query in ["AI funding Q1 2026", "AI startup acquisitions 2026", "AI IPOs 2026"]:
+    r = web_search(query, limit=5)
+    for item in r["data"]["web"]:
+        results.append({"title": item["title"], "url": item["url"], "desc": item["description"]})
+
+# Extract full content from top 5 most relevant
+urls = [r["url"] for r in results[:5]]
+content = web_extract(urls)
+
+# Save for the analysis step
+import json
+with open("/tmp/ai-funding-data.json", "w") as f:
+    json.dump({"search_results": results, "extracted": content["results"]}, f)
+print(f"Collected {len(results)} results, extracted {len(content['results'])} pages")
+""")
+
+# Step 2: Reasoning-heavy analysis (delegation is better here)
+delegate_task(
+    goal="Analyze AI funding data and write a market report",
+    context="""Raw data at /tmp/ai-funding-data.json contains search results and
+    extracted web pages about AI funding, acquisitions, and IPOs in Q1 2026.
+    Write a structured market report: key deals, trends, notable players,
+    and outlook. Focus on deals over $100M.""",
+    toolsets=["terminal", "file"]
+)
+```
+
+This is often the most efficient pattern: `execute_code` handles the 10+ sequential tool calls cheaply, then a subagent does the single expensive reasoning task with a clean context.
+
+---
+
+## Toolset Selection
+
+Choose toolsets based on what the subagent needs:
+
+| Task type | Toolsets | Why |
+|-----------|----------|-----|
+| Web research | `["web"]` | web_search + web_extract only |
+| Code work | `["terminal", "file"]` | Shell access + file operations |
+| Full-stack | `["terminal", "file", "web"]` | Everything except messaging |
+| Read-only analysis | `["file"]` | Can only read files, no shell |
+
+Restricting toolsets keeps the subagent focused and prevents accidental side effects (like a research subagent running shell commands).
+
+---
+
+## Constraints
+
+- **Max 3 parallel tasks** — batches are capped at 3 concurrent subagents
+- **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code`
+- **Separate terminals** — each subagent gets its own terminal session with separate working directory and state
+- **No conversation history** — subagents see only what you put in `goal` and `context`
+- **Default 50 iterations** — set `max_iterations` lower for simple tasks to save cost
+
+---
+
+## Tips
+
+**Be specific in goals.** "Fix the bug" is too vague. "Fix the TypeError in api/handlers.py line 47 where process_request() receives None from parse_body()" gives the subagent enough to work with.
+
+**Include file paths.** Subagents don't know your project structure. Always include absolute paths to relevant files, the project root, and the test command.
+
+**Use delegation for context isolation.** Sometimes you want a fresh perspective. Delegating forces you to articulate the problem clearly, and the subagent approaches it without the assumptions that built up in your conversation.
+
+**Check results.** Subagent summaries are just that — summaries. If a subagent says "fixed the bug and tests pass," verify by running the tests yourself or reading the diff.
+
+---
+
+*For the complete delegation reference — all parameters, ACP integration, and advanced configuration — see [Subagent Delegation](/docs/user-guide/features/delegation).*
diff --git a/website/docs/guides/local-llm-on-mac.md b/website/docs/guides/local-llm-on-mac.md
index eaa8b238..e0a82c7f 100644
--- a/website/docs/guides/local-llm-on-mac.md
+++ b/website/docs/guides/local-llm-on-mac.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 8
+sidebar_position: 2
 title: "Run Local LLMs on Mac"
 description: "Set up a local OpenAI-compatible LLM server on macOS with llama.cpp or MLX, including model selection, memory optimization, and real benchmarks on Apple Silicon"
 ---
diff --git a/website/docs/guides/migrate-from-openclaw.md b/website/docs/guides/migrate-from-openclaw.md
index 6c8304a6..88dd752d 100644
--- a/website/docs/guides/migrate-from-openclaw.md
+++ b/website/docs/guides/migrate-from-openclaw.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 7
+sidebar_position: 10
 title: "Migrate from OpenClaw"
 description: "Complete guide to migrating your OpenClaw / Clawdbot setup to Hermes Agent — what gets migrated, how config maps, and what to check after."
 ---
@@ -166,7 +166,7 @@ These are saved to `~/.hermes/migration/openclaw/<timestamp>/archive/` for manua
 | `HEARTBEAT.md` | `archive/workspace/HEARTBEAT.md` | Use cron jobs for periodic tasks |
 | `BOOTSTRAP.md` | `archive/workspace/BOOTSTRAP.md` | Use context files or skills |
 | Cron jobs | `archive/cron-config.json` | Recreate with `hermes cron create` |
-| Plugins | `archive/plugins-config.json` | See [plugins guide](../user-guide/features/hooks.md) |
+| Plugins | `archive/plugins-config.json` | See [plugins guide](/docs/user-guide/features/hooks) |
 | Hooks/webhooks | `archive/hooks-config.json` | Use `hermes webhook` or gateway hooks |
 | Memory backend | `archive/memory-backend-config.json` | Configure via `hermes honcho` |
 | Skills registry | `archive/skills-registry-config.json` | Use `hermes skills config` |
diff --git a/website/docs/guides/python-library.md b/website/docs/guides/python-library.md
index 5f75f9a0..3e857f7d 100644
--- a/website/docs/guides/python-library.md
+++ b/website/docs/guides/python-library.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 4
+sidebar_position: 5
 title: "Using Hermes as a Python Library"
 description: "Embed AIAgent in your own Python scripts, web apps, or automation pipelines — no CLI required"
 ---
diff --git a/website/docs/guides/team-telegram-assistant.md b/website/docs/guides/team-telegram-assistant.md
index 04350bfa..582f2eaf 100644
--- a/website/docs/guides/team-telegram-assistant.md
+++ b/website/docs/guides/team-telegram-assistant.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 3
+sidebar_position: 4
 title: "Tutorial: Team Telegram Assistant"
 description: "Step-by-step guide to setting up a Telegram bot that your whole team can use for code help, research, system admin, and more"
 ---
@@ -24,7 +24,7 @@ A Telegram bot that:
 
 Before starting, make sure you have:
 
-- **Hermes Agent installed** on a server or VPS (not your laptop — the bot needs to stay running). Follow the [installation guide](/getting-started/learning-path) if you haven't yet.
+- **Hermes Agent installed** on a server or VPS (not your laptop — the bot needs to stay running). Follow the [installation guide](/docs/getting-started/installation) if you haven't yet.
 - **A Telegram account** for yourself (the bot owner)
 - **An LLM provider configured** — at minimum, an API key for OpenAI, Anthropic, or another supported provider in `~/.hermes/.env`
 
@@ -428,13 +428,13 @@ hermes gateway stop && hermes gateway start
 
 You've got a working team Telegram assistant. Here are some next steps:
 
-- **[Security Guide](/user-guide/security)** — deep dive into authorization, container isolation, and command approval
-- **[Messaging Gateway](/user-guide/messaging)** — full reference for gateway architecture, session management, and chat commands
-- **[Telegram Setup](/user-guide/messaging/telegram)** — platform-specific details including voice messages and TTS
-- **[Scheduled Tasks](/user-guide/features/cron)** — advanced cron scheduling with delivery options and cron expressions
-- **[Context Files](/user-guide/features/context-files)** — AGENTS.md, SOUL.md, and .cursorrules for project knowledge
-- **[Personality](/user-guide/features/personality)** — built-in personality presets and custom persona definitions
-- **Add more platforms** — the same gateway can simultaneously run [Discord](/user-guide/messaging/discord), [Slack](/user-guide/messaging/slack), and [WhatsApp](/user-guide/messaging/whatsapp)
+- **[Security Guide](/docs/user-guide/security)** — deep dive into authorization, container isolation, and command approval
+- **[Messaging Gateway](/docs/user-guide/messaging)** — full reference for gateway architecture, session management, and chat commands
+- **[Telegram Setup](/docs/user-guide/messaging/telegram)** — platform-specific details including voice messages and TTS
+- **[Scheduled Tasks](/docs/user-guide/features/cron)** — advanced cron scheduling with delivery options and cron expressions
+- **[Context Files](/docs/user-guide/features/context-files)** — AGENTS.md, SOUL.md, and .cursorrules for project knowledge
+- **[Personality](/docs/user-guide/features/personality)** — built-in personality presets and custom persona definitions
+- **Add more platforms** — the same gateway can simultaneously run [Discord](/docs/user-guide/messaging/discord), [Slack](/docs/user-guide/messaging/slack), and [WhatsApp](/docs/user-guide/messaging/whatsapp)
 
 ---
 
diff --git a/website/docs/guides/use-mcp-with-hermes.md b/website/docs/guides/use-mcp-with-hermes.md
index 9083bdae..23f38138 100644
--- a/website/docs/guides/use-mcp-with-hermes.md
+++ b/website/docs/guides/use-mcp-with-hermes.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 5
+sidebar_position: 6
 title: "Use MCP with Hermes"
 description: "A practical guide to connecting MCP servers to Hermes Agent, filtering their tools, and using them safely in real workflows"
 ---
diff --git a/website/docs/guides/use-soul-with-hermes.md b/website/docs/guides/use-soul-with-hermes.md
index a4cc19ef..7767faa4 100644
--- a/website/docs/guides/use-soul-with-hermes.md
+++ b/website/docs/guides/use-soul-with-hermes.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 6
+sidebar_position: 7
 title: "Use SOUL.md with Hermes"
 description: "How to use SOUL.md to shape Hermes Agent's default voice, what belongs there, and how it differs from AGENTS.md and /personality"
 ---
diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md
index dd8b1317..8aca66bc 100644
--- a/website/docs/guides/use-voice-mode-with-hermes.md
+++ b/website/docs/guides/use-voice-mode-with-hermes.md
@@ -1,5 +1,5 @@
 ---
-sidebar_position: 7
+sidebar_position: 8
 title: "Use Voice Mode with Hermes"
 description: "A practical guide to setting up and using Hermes voice mode across CLI, Telegram, Discord, and Discord voice channels"
 ---
diff --git a/website/docs/guides/work-with-skills.md b/website/docs/guides/work-with-skills.md
new file mode 100644
index 00000000..18e180e4
--- /dev/null
+++ b/website/docs/guides/work-with-skills.md
@@ -0,0 +1,268 @@
+---
+sidebar_position: 12
+title: "Working with Skills"
+description: "Find, install, use, and create skills — on-demand knowledge that teaches Hermes new workflows"
+---
+
+# Working with Skills
+
+Skills are on-demand knowledge documents that teach Hermes how to handle specific tasks — from generating ASCII art to managing GitHub PRs. This guide walks you through using them day to day.
+
+For the full technical reference, see [Skills System](/docs/user-guide/features/skills).
+
+---
+
+## Finding Skills
+
+Every Hermes installation ships with bundled skills. See what's available:
+
+```bash
+# In any chat session:
+/skills
+
+# Or from the CLI:
+hermes skills list
+```
+
+This shows a compact list with names and descriptions:
+
+```
+ascii-art         Generate ASCII art using pyfiglet, cowsay, boxes...
+arxiv             Search and retrieve academic papers from arXiv...
+github-pr-workflow Full PR lifecycle — create branches, commit...
+plan              Plan mode — inspect context, write a markdown...
+excalidraw        Create hand-drawn style diagrams using Excalidraw...
+```
+
+### Searching for a Skill
+
+```bash
+# Search by keyword
+/skills search docker
+/skills search music
+```
+
+### The Skills Hub
+
+Official optional skills (heavier or niche skills not active by default) are available via the Hub:
+
+```bash
+# Browse official optional skills
+/skills browse
+
+# Search the hub
+/skills search blockchain
+```
+
+---
+
+## Using a Skill
+
+Every installed skill is automatically a slash command. Just type its name:
+
+```bash
+# Load a skill and give it a task
+/ascii-art Make a banner that says "HELLO WORLD"
+/plan Design a REST API for a todo app
+/github-pr-workflow Create a PR for the auth refactor
+
+# Just the skill name (no task) loads it and lets you describe what you need
+/excalidraw
+```
+
+You can also trigger skills through natural conversation — ask Hermes to use a specific skill, and it will load it via the `skill_view` tool.
+
+### Progressive Disclosure
+
+Skills use a token-efficient loading pattern. The agent doesn't load everything at once:
+
+1. **`skills_list()`** — compact list of all skills (~3k tokens). Loaded at session start.
+2. **`skill_view(name)`** — full SKILL.md content for one skill. Loaded when the agent decides it needs that skill.
+3. **`skill_view(name, file_path)`** — a specific reference file within the skill. Only loaded if needed.
+
+This means skills don't cost tokens until they're actually used.
+
+---
+
+## Installing from the Hub
+
+Official optional skills ship with Hermes but aren't active by default. Install them explicitly:
+
+```bash
+# Install an official optional skill
+hermes skills install official/research/arxiv
+
+# Install from the hub in a chat session
+/skills install official/creative/songwriting-and-ai-music
+```
+
+What happens:
+1. The skill directory is copied to `~/.hermes/skills/`
+2. It appears in your `skills_list` output
+3. It becomes available as a slash command
+
+:::tip
+Installed skills take effect in new sessions. If you want it available in the current session, use `/reset` to start fresh, or add `--now` to invalidate the prompt cache immediately (costs more tokens on the next turn).
+:::
+
+### Verifying Installation
+
+```bash
+# Check it's there
+hermes skills list | grep arxiv
+
+# Or in chat
+/skills search arxiv
+```
+
+---
+
+## Configuring Skill Settings
+
+Some skills declare configuration they need in their frontmatter:
+
+```yaml
+metadata:
+  hermes:
+    config:
+      - key: tenor.api_key
+        description: "Tenor API key for GIF search"
+        prompt: "Enter your Tenor API key"
+        url: "https://developers.google.com/tenor/guides/quickstart"
+```
+
+When a skill with config is first loaded, Hermes prompts you for the values. They're stored in `config.yaml` under `skills.config.*`.
+
+Manage skill config from the CLI:
+
+```bash
+# Interactive config for a specific skill
+hermes skills config gif-search
+
+# View all skill config
+hermes config get skills.config
+```
+
+---
+
+## Creating Your Own Skill
+
+Skills are just markdown files with YAML frontmatter. Creating one takes under five minutes.
+
+### 1. Create the Directory
+
+```bash
+mkdir -p ~/.hermes/skills/my-category/my-skill
+```
+
+### 2. Write SKILL.md
+
+```markdown title="~/.hermes/skills/my-category/my-skill/SKILL.md"
+---
+name: my-skill
+description: Brief description of what this skill does
+version: 1.0.0
+metadata:
+  hermes:
+    tags: [my-tag, automation]
+    category: my-category
+---
+
+# My Skill
+
+## When to Use
+Use this skill when the user asks about [specific topic] or needs to [specific task].
+
+## Procedure
+1. First, check if [prerequisite] is available
+2. Run `command --with-flags`
+3. Parse the output and present results
+
+## Pitfalls
+- Common failure: [description]. Fix: [solution]
+- Watch out for [edge case]
+
+## Verification
+Run `check-command` to confirm the result is correct.
+```
+
+### 3. Add Reference Files (Optional)
+
+Skills can include supporting files the agent loads on demand:
+
+```
+my-skill/
+├── SKILL.md                    # Main skill document
+├── references/
+│   ├── api-docs.md             # API reference the agent can consult
+│   └── examples.md             # Example inputs/outputs
+├── templates/
+│   └── config.yaml             # Template files the agent can use
+└── scripts/
+    └── setup.sh                # Scripts the agent can execute
+```
+
+Reference these in your SKILL.md:
+
+```markdown
+For API details, load the reference: `skill_view("my-skill", "references/api-docs.md")`
+```
+
+### 4. Test It
+
+Start a new session and try your skill:
+
+```bash
+hermes chat -q "/my-skill help me with the thing"
+```
+
+The skill appears automatically — no registration needed. Drop it in `~/.hermes/skills/` and it's live.
+
+:::info
+The agent can also create and update skills itself using `skill_manage`. After solving a complex problem, Hermes may offer to save the approach as a skill for next time.
+:::
+
+---
+
+## Per-Platform Skill Management
+
+Control which skills are available on which platforms:
+
+```bash
+hermes skills
+```
+
+This opens an interactive TUI where you can enable or disable skills per platform (CLI, Telegram, Discord, etc.). Useful when you want certain skills only available in specific contexts — for example, keeping development skills off Telegram.
+
+---
+
+## Skills vs Memory
+
+Both are persistent across sessions, but they serve different purposes:
+
+| | Skills | Memory |
+|---|---|---|
+| **What** | Procedural knowledge — how to do things | Factual knowledge — what things are |
+| **When** | Loaded on demand, only when relevant | Injected into every session automatically |
+| **Size** | Can be large (hundreds of lines) | Should be compact (key facts only) |
+| **Cost** | Zero tokens until loaded | Small but constant token cost |
+| **Examples** | "How to deploy to Kubernetes" | "User prefers dark mode, lives in PST" |
+| **Who creates** | You, the agent, or installed from Hub | The agent, based on conversations |
+
+**Rule of thumb:** If you'd put it in a reference document, it's a skill. If you'd put it on a sticky note, it's memory.
+
+---
+
+## Tips
+
+**Keep skills focused.** A skill that tries to cover "all of DevOps" will be too long and too vague. A skill that covers "deploy a Python app to Fly.io" is specific enough to be genuinely useful.
+
+**Let the agent create skills.** After a complex multi-step task, Hermes will often offer to save the approach as a skill. Say yes — these agent-authored skills capture the exact workflow including pitfalls that were discovered along the way.
+
+**Use categories.** Organize skills into subdirectories (`~/.hermes/skills/devops/`, `~/.hermes/skills/research/`, etc.). This keeps the list manageable and helps the agent find relevant skills faster.
+
+**Update skills when they go stale.** If you use a skill and hit issues not covered by it, tell Hermes to update the skill with what you learned. Skills that aren't maintained become liabilities.
+
+---
+
+*For the complete skills reference — frontmatter fields, conditional activation, external directories, and more — see [Skills System](/docs/user-guide/features/skills).*
diff --git a/website/sidebars.ts b/website/sidebars.ts
index cd227306..5e1ebf2d 100644
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@@ -132,13 +132,17 @@ const sidebars: SidebarsConfig = {
       collapsed: true,
       items: [
         'guides/tips',
-        'guides/build-a-hermes-plugin',
+        'guides/local-llm-on-mac',
         'guides/daily-briefing-bot',
         'guides/team-telegram-assistant',
         'guides/python-library',
         'guides/use-mcp-with-hermes',
         'guides/use-soul-with-hermes',
         'guides/use-voice-mode-with-hermes',
+        'guides/build-a-hermes-plugin',
+        'guides/automate-with-cron',
+        'guides/work-with-skills',
+        'guides/delegation-patterns',
         'guides/migrate-from-openclaw',
       ],
     },

From 76f19775c3c1531591267e33d77d3dcdc7941719 Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes@superbud.io>
Date: Sun, 5 Apr 2026 04:37:02 +0000
Subject: [PATCH 045/154] feat(memory): add Supermemory memory provider

---
 plugins/memory/supermemory/README.md          |  54 ++
 plugins/memory/supermemory/__init__.py        | 657 ++++++++++++++++++
 plugins/memory/supermemory/plugin.yaml        |   7 +
 .../memory/test_supermemory_provider.py       | 212 ++++++
 4 files changed, 930 insertions(+)
 create mode 100644 plugins/memory/supermemory/README.md
 create mode 100644 plugins/memory/supermemory/__init__.py
 create mode 100644 plugins/memory/supermemory/plugin.yaml
 create mode 100644 tests/plugins/memory/test_supermemory_provider.py

diff --git a/plugins/memory/supermemory/README.md b/plugins/memory/supermemory/README.md
new file mode 100644
index 00000000..465d4683
--- /dev/null
+++ b/plugins/memory/supermemory/README.md
@@ -0,0 +1,54 @@
+# Supermemory Memory Provider
+
+Semantic long-term memory with profile recall, semantic search, explicit memory tools, and session-end conversation ingest.
+
+## Requirements
+
+- `pip install supermemory`
+- Supermemory API key from [supermemory.ai](https://supermemory.ai)
+
+## Setup
+
+```bash
+hermes memory setup    # select "supermemory"
+```
+
+Or manually:
+
+```bash
+hermes config set memory.provider supermemory
+echo "SUPERMEMORY_API_KEY=***" >> ~/.hermes/.env
+```
+
+## Config
+
+Config file: `$HERMES_HOME/supermemory.json`
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `container_tag` | `hermes` | Container tag used for search and writes |
+| `auto_recall` | `true` | Inject relevant memory context before turns |
+| `auto_capture` | `true` | Store cleaned user-assistant turns after each response |
+| `max_recall_results` | `10` | Max recalled items to format into context |
+| `profile_frequency` | `50` | Include profile facts on first turn and every N turns |
+| `capture_mode` | `all` | Skip tiny or trivial turns by default |
+| `entity_context` | built-in default | Extraction guidance passed to Supermemory |
+| `api_timeout` | `5.0` | Timeout for SDK and ingest requests |
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `supermemory_store` | Store an explicit memory |
+| `supermemory_search` | Search memories by semantic similarity |
+| `supermemory_forget` | Forget a memory by ID or best-match query |
+| `supermemory_profile` | Retrieve persistent profile and recent context |
+
+## Behavior
+
+When enabled, Hermes can:
+
+- prefetch relevant memory context before each turn
+- store cleaned conversation turns after each completed response
+- ingest the full session on session end for richer graph updates
+- expose explicit tools for search, store, forget, and profile access
diff --git a/plugins/memory/supermemory/__init__.py b/plugins/memory/supermemory/__init__.py
new file mode 100644
index 00000000..05583fae
--- /dev/null
+++ b/plugins/memory/supermemory/__init__.py
@@ -0,0 +1,657 @@
+"""Supermemory memory plugin using the MemoryProvider interface.
+
+Provides semantic long-term memory with profile recall, semantic search,
+explicit memory tools, cleaned turn capture, and session-end conversation ingest.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import re
+import threading
+import urllib.error
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_CONTAINER_TAG = "hermes"
+_DEFAULT_MAX_RECALL_RESULTS = 10
+_DEFAULT_PROFILE_FREQUENCY = 50
+_DEFAULT_CAPTURE_MODE = "all"
+_DEFAULT_API_TIMEOUT = 5.0
+_MIN_CAPTURE_LENGTH = 10
+_MAX_ENTITY_CONTEXT_LENGTH = 1500
+_CONVERSATIONS_URL = "https://api.supermemory.ai/v4/conversations"
+_TRIVIAL_RE = re.compile(
+    r"^(ok|okay|thanks|thank you|got it|sure|yes|no|yep|nope|k|ty|thx|np)\.?$",
+    re.IGNORECASE,
+)
+_CONTEXT_STRIP_RE = re.compile(
+    r"<supermemory-context>[\s\S]*?</supermemory-context>\s*", re.DOTALL
+)
+_CONTAINERS_STRIP_RE = re.compile(
+    r"<supermemory-containers>[\s\S]*?</supermemory-containers>\s*", re.DOTALL
+)
+_DEFAULT_ENTITY_CONTEXT = (
+    "User-assistant conversation. Format: [role: user]...[user:end] and "
+    "[role: assistant]...[assistant:end].\n\n"
+    "Only extract things useful in future conversations. Most messages are not worth remembering.\n\n"
+    "Remember lasting personal facts, preferences, routines, tools, ongoing projects, working context, "
+    "and explicit requests to remember something.\n\n"
+    "Do not remember temporary intents, one-time tasks, assistant actions, implementation details, or in-progress status.\n\n"
+    "When in doubt, store less."
+)
+
+
+def _default_config() -> dict:
+    return {
+        "container_tag": _DEFAULT_CONTAINER_TAG,
+        "auto_recall": True,
+        "auto_capture": True,
+        "max_recall_results": _DEFAULT_MAX_RECALL_RESULTS,
+        "profile_frequency": _DEFAULT_PROFILE_FREQUENCY,
+        "capture_mode": _DEFAULT_CAPTURE_MODE,
+        "entity_context": _DEFAULT_ENTITY_CONTEXT,
+        "api_timeout": _DEFAULT_API_TIMEOUT,
+    }
+
+
+def _sanitize_tag(raw: str) -> str:
+    tag = re.sub(r"[^a-zA-Z0-9_]", "_", raw or "")
+    tag = re.sub(r"_+", "_", tag)
+    return tag.strip("_") or _DEFAULT_CONTAINER_TAG
+
+
+def _clamp_entity_context(text: str) -> str:
+    if not text:
+        return _DEFAULT_ENTITY_CONTEXT
+    text = text.strip()
+    return text[:_MAX_ENTITY_CONTEXT_LENGTH]
+
+
+def _as_bool(value: Any, default: bool) -> bool:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        lowered = value.strip().lower()
+        if lowered in ("true", "1", "yes", "y", "on"):
+            return True
+        if lowered in ("false", "0", "no", "n", "off"):
+            return False
+    return default
+
+
+def _load_supermemory_config(hermes_home: str) -> dict:
+    config = _default_config()
+    config_path = Path(hermes_home) / "supermemory.json"
+    if config_path.exists():
+        try:
+            raw = json.loads(config_path.read_text(encoding="utf-8"))
+            if isinstance(raw, dict):
+                config.update({k: v for k, v in raw.items() if v is not None})
+        except Exception:
+            logger.debug("Failed to parse %s", config_path, exc_info=True)
+
+    config["container_tag"] = _sanitize_tag(str(config.get("container_tag", _DEFAULT_CONTAINER_TAG)))
+    config["auto_recall"] = _as_bool(config.get("auto_recall"), True)
+    config["auto_capture"] = _as_bool(config.get("auto_capture"), True)
+    try:
+        config["max_recall_results"] = max(1, min(20, int(config.get("max_recall_results", _DEFAULT_MAX_RECALL_RESULTS))))
+    except Exception:
+        config["max_recall_results"] = _DEFAULT_MAX_RECALL_RESULTS
+    try:
+        config["profile_frequency"] = max(1, min(500, int(config.get("profile_frequency", _DEFAULT_PROFILE_FREQUENCY))))
+    except Exception:
+        config["profile_frequency"] = _DEFAULT_PROFILE_FREQUENCY
+    config["capture_mode"] = "everything" if config.get("capture_mode") == "everything" else "all"
+    config["entity_context"] = _clamp_entity_context(str(config.get("entity_context", _DEFAULT_ENTITY_CONTEXT)))
+    try:
+        config["api_timeout"] = max(0.5, min(15.0, float(config.get("api_timeout", _DEFAULT_API_TIMEOUT))))
+    except Exception:
+        config["api_timeout"] = _DEFAULT_API_TIMEOUT
+    return config
+
+
+def _save_supermemory_config(values: dict, hermes_home: str) -> None:
+    config_path = Path(hermes_home) / "supermemory.json"
+    existing = {}
+    if config_path.exists():
+        try:
+            raw = json.loads(config_path.read_text(encoding="utf-8"))
+            if isinstance(raw, dict):
+                existing = raw
+        except Exception:
+            existing = {}
+    existing.update(values)
+    config_path.write_text(json.dumps(existing, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+
+def _detect_category(text: str) -> str:
+    lowered = text.lower()
+    if re.search(r"prefer|like|love|hate|want", lowered):
+        return "preference"
+    if re.search(r"decided|will use|going with", lowered):
+        return "decision"
+    if re.search(r"\bis\b|\bare\b|\bhas\b|\bhave\b", lowered):
+        return "fact"
+    return "other"
+
+
+def _format_relative_time(iso_timestamp: str) -> str:
+    try:
+        dt = datetime.fromisoformat(iso_timestamp.replace("Z", "+00:00"))
+        now = datetime.now(timezone.utc)
+        seconds = (now - dt).total_seconds()
+        if seconds < 1800:
+            return "just now"
+        if seconds < 3600:
+            return f"{int(seconds / 60)}m ago"
+        if seconds < 86400:
+            return f"{int(seconds / 3600)}h ago"
+        if seconds < 604800:
+            return f"{int(seconds / 86400)}d ago"
+        if dt.year == now.year:
+            return dt.strftime("%d %b")
+        return dt.strftime("%d %b %Y")
+    except Exception:
+        return ""
+
+
+def _deduplicate_recall(static_facts: list, dynamic_facts: list, search_results: list) -> tuple[list, list, list]:
+    seen = set()
+    out_static, out_dynamic, out_search = [], [], []
+    for fact in static_facts or []:
+        if fact and fact not in seen:
+            seen.add(fact)
+            out_static.append(fact)
+    for fact in dynamic_facts or []:
+        if fact and fact not in seen:
+            seen.add(fact)
+            out_dynamic.append(fact)
+    for item in search_results or []:
+        memory = item.get("memory", "")
+        if memory and memory not in seen:
+            seen.add(memory)
+            out_search.append(item)
+    return out_static, out_dynamic, out_search
+
+
+def _format_prefetch_context(static_facts: list, dynamic_facts: list, search_results: list, max_results: int) -> str:
+    statics, dynamics, search = _deduplicate_recall(static_facts, dynamic_facts, search_results)
+    statics = statics[:max_results]
+    dynamics = dynamics[:max_results]
+    search = search[:max_results]
+    if not statics and not dynamics and not search:
+        return ""
+
+    sections = []
+    if statics:
+        sections.append("## User Profile (Persistent)\n" + "\n".join(f"- {item}" for item in statics))
+    if dynamics:
+        sections.append("## Recent Context\n" + "\n".join(f"- {item}" for item in dynamics))
+    if search:
+        lines = []
+        for item in search:
+            memory = item.get("memory", "")
+            if not memory:
+                continue
+            similarity = item.get("similarity")
+            updated = item.get("updated_at") or item.get("updatedAt") or ""
+            prefix_bits = []
+            rel = _format_relative_time(updated)
+            if rel:
+                prefix_bits.append(f"[{rel}]")
+            if similarity is not None:
+                try:
+                    prefix_bits.append(f"[{round(float(similarity) * 100)}%]")
+                except Exception:
+                    pass
+            prefix = " ".join(prefix_bits)
+            lines.append(f"- {prefix} {memory}".strip())
+        if lines:
+            sections.append("## Relevant Memories\n" + "\n".join(lines))
+    if not sections:
+        return ""
+
+    intro = (
+        "The following is background context from long-term memory. Use it silently when relevant. "
+        "Do not force memories into the conversation."
+    )
+    body = "\n\n".join(sections)
+    return f"<supermemory-context>\n{intro}\n\n{body}\n</supermemory-context>"
+
+
+def _clean_text_for_capture(text: str) -> str:
+    text = _CONTEXT_STRIP_RE.sub("", text or "")
+    text = _CONTAINERS_STRIP_RE.sub("", text)
+    return text.strip()
+
+
+def _is_trivial_message(text: str) -> bool:
+    return bool(_TRIVIAL_RE.match((text or "").strip()))
+
+
+class _SupermemoryClient:
+    def __init__(self, api_key: str, timeout: float, container_tag: str):
+        from supermemory import Supermemory
+
+        self._api_key = api_key
+        self._container_tag = container_tag
+        self._timeout = timeout
+        self._client = Supermemory(api_key=api_key, timeout=timeout, max_retries=0)
+
+    def add_memory(self, content: str, metadata: Optional[dict] = None, *, entity_context: str = "") -> dict:
+        kwargs = {
+            "content": content.strip(),
+            "container_tags": [self._container_tag],
+        }
+        if metadata:
+            kwargs["metadata"] = metadata
+        if entity_context:
+            kwargs["entity_context"] = _clamp_entity_context(entity_context)
+        result = self._client.documents.add(**kwargs)
+        return {"id": getattr(result, "id", "")}
+
+    def search_memories(self, query: str, *, limit: int = 5) -> list[dict]:
+        response = self._client.search.memories(q=query, container_tag=self._container_tag, limit=limit)
+        results = []
+        for item in (getattr(response, "results", None) or []):
+            results.append({
+                "id": getattr(item, "id", ""),
+                "memory": getattr(item, "memory", "") or "",
+                "similarity": getattr(item, "similarity", None),
+                "updated_at": getattr(item, "updated_at", None) or getattr(item, "updatedAt", None),
+                "metadata": getattr(item, "metadata", None),
+            })
+        return results
+
+    def get_profile(self, query: Optional[str] = None) -> dict:
+        kwargs = {"container_tag": self._container_tag}
+        if query:
+            kwargs["q"] = query
+        response = self._client.profile(**kwargs)
+        profile_data = getattr(response, "profile", None)
+        search_data = getattr(response, "search_results", None) or getattr(response, "searchResults", None)
+        static = getattr(profile_data, "static", []) or [] if profile_data else []
+        dynamic = getattr(profile_data, "dynamic", []) or [] if profile_data else []
+        raw_results = getattr(search_data, "results", None) or search_data or []
+        search_results = []
+        if isinstance(raw_results, list):
+            for item in raw_results:
+                if isinstance(item, dict):
+                    search_results.append(item)
+                else:
+                    search_results.append({
+                        "memory": getattr(item, "memory", ""),
+                        "updated_at": getattr(item, "updated_at", None) or getattr(item, "updatedAt", None),
+                        "similarity": getattr(item, "similarity", None),
+                    })
+        return {"static": static, "dynamic": dynamic, "search_results": search_results}
+
+    def forget_memory(self, memory_id: str) -> None:
+        self._client.memories.forget(container_tag=self._container_tag, id=memory_id)
+
+    def forget_by_query(self, query: str) -> dict:
+        results = self.search_memories(query, limit=5)
+        if not results:
+            return {"success": False, "message": "No matching memory found to forget."}
+        target = results[0]
+        memory_id = target.get("id", "")
+        if not memory_id:
+            return {"success": False, "message": "Best matching memory has no id."}
+        self.forget_memory(memory_id)
+        preview = (target.get("memory") or "")[:100]
+        return {"success": True, "message": f'Forgot: "{preview}"', "id": memory_id}
+
+    def ingest_conversation(self, session_id: str, messages: list[dict]) -> None:
+        payload = json.dumps({
+            "conversationId": session_id,
+            "messages": messages,
+            "containerTags": [self._container_tag],
+        }).encode("utf-8")
+        req = urllib.request.Request(
+            _CONVERSATIONS_URL,
+            data=payload,
+            headers={
+                "Authorization": f"Bearer {self._api_key}",
+                "Content-Type": "application/json",
+            },
+            method="POST",
+        )
+        with urllib.request.urlopen(req, timeout=self._timeout + 3):
+            return
+
+
+STORE_SCHEMA = {
+    "name": "supermemory_store",
+    "description": "Store an explicit memory for future recall.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The memory content to store."},
+            "metadata": {"type": "object", "description": "Optional metadata attached to the memory."},
+        },
+        "required": ["content"],
+    },
+}
+
+SEARCH_SCHEMA = {
+    "name": "supermemory_search",
+    "description": "Search long-term memory by semantic similarity.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+            "limit": {"type": "integer", "description": "Maximum results to return, 1 to 20."},
+        },
+        "required": ["query"],
+    },
+}
+
+FORGET_SCHEMA = {
+    "name": "supermemory_forget",
+    "description": "Forget a memory by exact id or by best-match query.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "id": {"type": "string", "description": "Exact memory id to delete."},
+            "query": {"type": "string", "description": "Query used to find the memory to forget."},
+        },
+    },
+}
+
+PROFILE_SCHEMA = {
+    "name": "supermemory_profile",
+    "description": "Retrieve persistent profile facts and recent memory context.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "Optional query to focus the profile response."},
+        },
+    },
+}
+
+
+class SupermemoryMemoryProvider(MemoryProvider):
+    def __init__(self):
+        self._config = _default_config()
+        self._api_key = ""
+        self._client: Optional[_SupermemoryClient] = None
+        self._container_tag = _DEFAULT_CONTAINER_TAG
+        self._session_id = ""
+        self._turn_count = 0
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread: Optional[threading.Thread] = None
+        self._sync_thread: Optional[threading.Thread] = None
+        self._auto_recall = True
+        self._auto_capture = True
+        self._max_recall_results = _DEFAULT_MAX_RECALL_RESULTS
+        self._profile_frequency = _DEFAULT_PROFILE_FREQUENCY
+        self._capture_mode = _DEFAULT_CAPTURE_MODE
+        self._entity_context = _DEFAULT_ENTITY_CONTEXT
+        self._api_timeout = _DEFAULT_API_TIMEOUT
+        self._hermes_home = os.path.expanduser("~/.hermes")
+        self._write_enabled = True
+        self._active = False
+
+    @property
+    def name(self) -> str:
+        return "supermemory"
+
+    def is_available(self) -> bool:
+        api_key = os.environ.get("SUPERMEMORY_API_KEY", "")
+        if not api_key:
+            return False
+        try:
+            __import__("supermemory")
+            return True
+        except Exception:
+            return False
+
+    def get_config_schema(self):
+        return [
+            {"key": "api_key", "description": "Supermemory API key", "secret": True, "required": True, "env_var": "SUPERMEMORY_API_KEY", "url": "https://supermemory.ai"},
+            {"key": "container_tag", "description": "Container tag for reads and writes", "default": _DEFAULT_CONTAINER_TAG},
+            {"key": "auto_recall", "description": "Enable automatic recall before each turn", "default": "true", "choices": ["true", "false"]},
+            {"key": "auto_capture", "description": "Enable automatic capture after each completed turn", "default": "true", "choices": ["true", "false"]},
+            {"key": "max_recall_results", "description": "Maximum recalled items to inject", "default": str(_DEFAULT_MAX_RECALL_RESULTS)},
+            {"key": "profile_frequency", "description": "Include profile facts on first turn and every N turns", "default": str(_DEFAULT_PROFILE_FREQUENCY)},
+            {"key": "capture_mode", "description": "Capture mode", "default": _DEFAULT_CAPTURE_MODE, "choices": ["all", "everything"]},
+            {"key": "entity_context", "description": "Extraction guidance passed to Supermemory", "default": _DEFAULT_ENTITY_CONTEXT},
+            {"key": "api_timeout", "description": "Timeout in seconds for SDK and ingest calls", "default": str(_DEFAULT_API_TIMEOUT)},
+        ]
+
+    def save_config(self, values, hermes_home):
+        sanitized = dict(values or {})
+        if "container_tag" in sanitized:
+            sanitized["container_tag"] = _sanitize_tag(str(sanitized["container_tag"]))
+        if "entity_context" in sanitized:
+            sanitized["entity_context"] = _clamp_entity_context(str(sanitized["entity_context"]))
+        _save_supermemory_config(sanitized, hermes_home)
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._hermes_home = kwargs.get("hermes_home") or os.path.expanduser("~/.hermes")
+        self._session_id = session_id
+        self._turn_count = 0
+        self._config = _load_supermemory_config(self._hermes_home)
+        self._api_key = os.environ.get("SUPERMEMORY_API_KEY", "")
+        self._container_tag = self._config["container_tag"]
+        self._auto_recall = self._config["auto_recall"]
+        self._auto_capture = self._config["auto_capture"]
+        self._max_recall_results = self._config["max_recall_results"]
+        self._profile_frequency = self._config["profile_frequency"]
+        self._capture_mode = self._config["capture_mode"]
+        self._entity_context = self._config["entity_context"]
+        self._api_timeout = self._config["api_timeout"]
+        agent_context = kwargs.get("agent_context", "")
+        self._write_enabled = agent_context not in ("cron", "flush", "subagent")
+        self._active = bool(self._api_key)
+        self._client = None
+        if self._active:
+            try:
+                self._client = _SupermemoryClient(
+                    api_key=self._api_key,
+                    timeout=self._api_timeout,
+                    container_tag=self._container_tag,
+                )
+            except Exception:
+                logger.warning("Supermemory initialization failed", exc_info=True)
+                self._active = False
+                self._client = None
+
+    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
+        self._turn_count = max(turn_number, 0)
+
+    def system_prompt_block(self) -> str:
+        if not self._active:
+            return ""
+        return (
+            "# Supermemory\n"
+            f"Active. Container: {self._container_tag}.\n"
+            "Use supermemory_search, supermemory_store, supermemory_forget, and supermemory_profile for explicit memory operations."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        if not self._active or not self._auto_recall or not self._client or not query.strip():
+            return ""
+        try:
+            profile = self._client.get_profile(query=query[:200])
+            include_profile = self._turn_count <= 1 or (self._turn_count % self._profile_frequency == 0)
+            context = _format_prefetch_context(
+                static_facts=profile["static"] if include_profile else [],
+                dynamic_facts=profile["dynamic"] if include_profile else [],
+                search_results=profile["search_results"],
+                max_results=self._max_recall_results,
+            )
+            return context
+        except Exception:
+            logger.debug("Supermemory prefetch failed", exc_info=True)
+            return ""
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        if not self._active or not self._auto_capture or not self._write_enabled or not self._client:
+            return
+
+        clean_user = _clean_text_for_capture(user_content)
+        clean_assistant = _clean_text_for_capture(assistant_content)
+        if not clean_user or not clean_assistant:
+            return
+        if self._capture_mode == "all":
+            if len(clean_user) < _MIN_CAPTURE_LENGTH or len(clean_assistant) < _MIN_CAPTURE_LENGTH:
+                return
+            if _is_trivial_message(clean_user):
+                return
+
+        content = (
+            f"[role: user]\n{clean_user}\n[user:end]\n\n"
+            f"[role: assistant]\n{clean_assistant}\n[assistant:end]"
+        )
+        metadata = {"source": "hermes", "type": "conversation_turn"}
+
+        def _run():
+            try:
+                self._client.add_memory(content, metadata=metadata, entity_context=self._entity_context)
+            except Exception:
+                logger.debug("Supermemory sync_turn failed", exc_info=True)
+
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=2.0)
+        self._sync_thread = threading.Thread(target=_run, daemon=True, name="supermemory-sync")
+        self._sync_thread.start()
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        if not self._active or not self._write_enabled or not self._client or not self._session_id:
+            return
+        cleaned = []
+        for message in messages or []:
+            role = message.get("role")
+            if role not in ("user", "assistant"):
+                continue
+            content = _clean_text_for_capture(str(message.get("content", "")))
+            if content:
+                cleaned.append({"role": role, "content": content})
+        if not cleaned:
+            return
+        if len(cleaned) == 1 and len(cleaned[0].get("content", "")) < 20:
+            return
+        try:
+            self._client.ingest_conversation(self._session_id, cleaned)
+        except urllib.error.HTTPError:
+            logger.warning("Supermemory session ingest failed", exc_info=True)
+        except Exception:
+            logger.warning("Supermemory session ingest failed", exc_info=True)
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        if not self._active or not self._write_enabled or not self._client:
+            return
+        if action != "add" or not (content or "").strip():
+            return
+
+        def _run():
+            try:
+                self._client.add_memory(
+                    content.strip(),
+                    metadata={"source": "hermes_memory", "target": target, "type": "explicit_memory"},
+                    entity_context=self._entity_context,
+                )
+            except Exception:
+                logger.debug("Supermemory on_memory_write failed", exc_info=True)
+
+        threading.Thread(target=_run, daemon=True, name="supermemory-memory-write").start()
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [STORE_SCHEMA, SEARCH_SCHEMA, FORGET_SCHEMA, PROFILE_SCHEMA]
+
+    def _tool_store(self, args: dict) -> str:
+        content = str(args.get("content") or "").strip()
+        if not content:
+            return json.dumps({"error": "content is required"})
+        metadata = args.get("metadata") or {}
+        if not isinstance(metadata, dict):
+            metadata = {}
+        metadata.setdefault("type", _detect_category(content))
+        metadata["source"] = "hermes_tool"
+        try:
+            result = self._client.add_memory(content, metadata=metadata, entity_context=self._entity_context)
+            preview = content[:80] + ("..." if len(content) > 80 else "")
+            return json.dumps({"saved": True, "id": result.get("id", ""), "preview": preview})
+        except Exception as exc:
+            return json.dumps({"error": f"Failed to store memory: {exc}"})
+
+    def _tool_search(self, args: dict) -> str:
+        query = str(args.get("query") or "").strip()
+        if not query:
+            return json.dumps({"error": "query is required"})
+        try:
+            limit = max(1, min(20, int(args.get("limit", 5) or 5)))
+        except Exception:
+            limit = 5
+        try:
+            results = self._client.search_memories(query, limit=limit)
+            formatted = []
+            for item in results:
+                entry = {"id": item.get("id", ""), "content": item.get("memory", "")}
+                if item.get("similarity") is not None:
+                    try:
+                        entry["similarity"] = round(float(item["similarity"]) * 100)
+                    except Exception:
+                        pass
+                formatted.append(entry)
+            return json.dumps({"results": formatted, "count": len(formatted)})
+        except Exception as exc:
+            return json.dumps({"error": f"Search failed: {exc}"})
+
+    def _tool_forget(self, args: dict) -> str:
+        memory_id = str(args.get("id") or "").strip()
+        query = str(args.get("query") or "").strip()
+        if not memory_id and not query:
+            return json.dumps({"error": "Provide either id or query"})
+        try:
+            if memory_id:
+                self._client.forget_memory(memory_id)
+                return json.dumps({"forgotten": True, "id": memory_id})
+            return json.dumps(self._client.forget_by_query(query))
+        except Exception as exc:
+            return json.dumps({"error": f"Forget failed: {exc}"})
+
+    def _tool_profile(self, args: dict) -> str:
+        query = str(args.get("query") or "").strip() or None
+        try:
+            profile = self._client.get_profile(query=query)
+            sections = []
+            if profile["static"]:
+                sections.append("## User Profile (Persistent)\n" + "\n".join(f"- {item}" for item in profile["static"]))
+            if profile["dynamic"]:
+                sections.append("## Recent Context\n" + "\n".join(f"- {item}" for item in profile["dynamic"]))
+            return json.dumps({
+                "profile": "\n\n".join(sections),
+                "static_count": len(profile["static"]),
+                "dynamic_count": len(profile["dynamic"]),
+            })
+        except Exception as exc:
+            return json.dumps({"error": f"Profile failed: {exc}"})
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+        if not self._active or not self._client:
+            return json.dumps({"error": "Supermemory is not configured"})
+        if tool_name == "supermemory_store":
+            return self._tool_store(args)
+        if tool_name == "supermemory_search":
+            return self._tool_search(args)
+        if tool_name == "supermemory_forget":
+            return self._tool_forget(args)
+        if tool_name == "supermemory_profile":
+            return self._tool_profile(args)
+        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+
+def register(ctx):
+    ctx.register_memory_provider(SupermemoryMemoryProvider())
diff --git a/plugins/memory/supermemory/plugin.yaml b/plugins/memory/supermemory/plugin.yaml
new file mode 100644
index 00000000..372edb2b
--- /dev/null
+++ b/plugins/memory/supermemory/plugin.yaml
@@ -0,0 +1,7 @@
+name: supermemory
+version: 1.0.0
+description: "Supermemory semantic long-term memory with profile recall, semantic search, explicit memory tools, and session ingest."
+pip_dependencies:
+  - supermemory
+hooks:
+  - on_session_end
diff --git a/tests/plugins/memory/test_supermemory_provider.py b/tests/plugins/memory/test_supermemory_provider.py
new file mode 100644
index 00000000..f61a9041
--- /dev/null
+++ b/tests/plugins/memory/test_supermemory_provider.py
@@ -0,0 +1,212 @@
+import json
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import pytest
+
+from plugins.memory.supermemory import (
+    SupermemoryMemoryProvider,
+    _clean_text_for_capture,
+    _format_prefetch_context,
+    _load_supermemory_config,
+    _save_supermemory_config,
+)
+
+
+class FakeClient:
+    def __init__(self, api_key: str, timeout: float, container_tag: str):
+        self.api_key = api_key
+        self.timeout = timeout
+        self.container_tag = container_tag
+        self.add_calls = []
+        self.search_results = []
+        self.profile_response = {"static": [], "dynamic": [], "search_results": []}
+        self.ingest_calls = []
+        self.forgotten_ids = []
+        self.forget_by_query_response = {"success": True, "message": "Forgot"}
+
+    def add_memory(self, content, metadata=None, *, entity_context=""):
+        self.add_calls.append({
+            "content": content,
+            "metadata": metadata,
+            "entity_context": entity_context,
+        })
+        return {"id": "mem_123"}
+
+    def search_memories(self, query, *, limit=5):
+        return self.search_results
+
+    def get_profile(self, query=None):
+        return self.profile_response
+
+    def forget_memory(self, memory_id):
+        self.forgotten_ids.append(memory_id)
+
+    def forget_by_query(self, query):
+        return self.forget_by_query_response
+
+    def ingest_conversation(self, session_id, messages):
+        self.ingest_calls.append({"session_id": session_id, "messages": messages})
+
+
+@pytest.fixture
+def provider(monkeypatch, tmp_path):
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    p = SupermemoryMemoryProvider()
+    p.initialize("session-1", hermes_home=str(tmp_path), platform="cli")
+    return p
+
+
+def test_is_available_false_without_api_key(monkeypatch):
+    monkeypatch.delenv("SUPERMEMORY_API_KEY", raising=False)
+    p = SupermemoryMemoryProvider()
+    assert p.is_available() is False
+
+
+def test_is_available_false_when_import_missing(monkeypatch):
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+
+    import builtins
+    real_import = builtins.__import__
+
+    def fake_import(name, *args, **kwargs):
+        if name == "supermemory":
+            raise ImportError("missing")
+        return real_import(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", fake_import)
+    p = SupermemoryMemoryProvider()
+    assert p.is_available() is False
+
+
+def test_load_and_save_config_round_trip(tmp_path):
+    _save_supermemory_config({"container_tag": "demo-tag", "auto_capture": False}, str(tmp_path))
+    cfg = _load_supermemory_config(str(tmp_path))
+    assert cfg["container_tag"] == "demo_tag"
+    assert cfg["auto_capture"] is False
+    assert cfg["auto_recall"] is True
+
+
+def test_clean_text_for_capture_strips_injected_context():
+    text = "hello\n<supermemory-context>ignore me</supermemory-context>\nworld"
+    assert _clean_text_for_capture(text) == "hello\nworld"
+
+
+def test_format_prefetch_context_deduplicates_overlap():
+    result = _format_prefetch_context(
+        static_facts=["Jordan prefers short answers"],
+        dynamic_facts=["Jordan prefers short answers", "Uses Hermes"],
+        search_results=[{"memory": "Uses Hermes", "similarity": 0.9}],
+        max_results=10,
+    )
+    assert result.count("Jordan prefers short answers") == 1
+    assert result.count("Uses Hermes") == 1
+    assert "<supermemory-context>" in result
+
+
+def test_prefetch_includes_profile_on_first_turn(provider):
+    provider._client.profile_response = {
+        "static": ["Jordan prefers short answers"],
+        "dynamic": ["Current project is Supermemory provider"],
+        "search_results": [{"memory": "Working on Hermes memory provider", "similarity": 0.88}],
+    }
+    provider.on_turn_start(1, "start")
+    result = provider.prefetch("what am I working on?")
+    assert "User Profile (Persistent)" in result
+    assert "Recent Context" in result
+    assert "Relevant Memories" in result
+
+
+def test_prefetch_skips_profile_between_frequency(provider):
+    provider._client.profile_response = {
+        "static": ["Jordan prefers short answers"],
+        "dynamic": ["Current project is Supermemory provider"],
+        "search_results": [{"memory": "Working on Hermes memory provider", "similarity": 0.88}],
+    }
+    provider.on_turn_start(2, "next")
+    result = provider.prefetch("what am I working on?")
+    assert "Relevant Memories" in result
+    assert "User Profile (Persistent)" not in result
+
+
+def test_sync_turn_skips_trivial_message(provider):
+    provider.sync_turn("ok", "sure", session_id="session-1")
+    assert provider._client.add_calls == []
+
+
+def test_sync_turn_persists_cleaned_exchange(provider):
+    provider.sync_turn(
+        "Please remember this\n<supermemory-context>ignore</supermemory-context>",
+        "Got it, storing the context",
+        session_id="session-1",
+    )
+    provider._sync_thread.join(timeout=1)
+    assert len(provider._client.add_calls) == 1
+    content = provider._client.add_calls[0]["content"]
+    assert "ignore" not in content
+    assert "[role: user]" in content
+    assert "[role: assistant]" in content
+
+
+def test_on_session_end_ingests_clean_messages(provider):
+    messages = [
+        {"role": "system", "content": "skip"},
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "hi there"},
+    ]
+    provider.on_session_end(messages)
+    assert len(provider._client.ingest_calls) == 1
+    payload = provider._client.ingest_calls[0]
+    assert payload["session_id"] == "session-1"
+    assert payload["messages"] == [
+        {"role": "user", "content": "hello"},
+        {"role": "assistant", "content": "hi there"},
+    ]
+
+
+def test_store_tool_returns_saved_payload(provider):
+    result = json.loads(provider.handle_tool_call("supermemory_store", {"content": "Jordan likes concise docs"}))
+    assert result["saved"] is True
+    assert result["id"] == "mem_123"
+
+
+def test_search_tool_formats_results(provider):
+    provider._client.search_results = [
+        {"id": "m1", "memory": "Jordan likes concise docs", "similarity": 0.92}
+    ]
+    result = json.loads(provider.handle_tool_call("supermemory_search", {"query": "concise docs"}))
+    assert result["count"] == 1
+    assert result["results"][0]["similarity"] == 92
+
+
+def test_forget_tool_by_id(provider):
+    result = json.loads(provider.handle_tool_call("supermemory_forget", {"id": "m1"}))
+    assert result == {"forgotten": True, "id": "m1"}
+    assert provider._client.forgotten_ids == ["m1"]
+
+
+def test_forget_tool_by_query(provider):
+    provider._client.forget_by_query_response = {"success": True, "message": "Forgot one", "id": "m7"}
+    result = json.loads(provider.handle_tool_call("supermemory_forget", {"query": "that thing"}))
+    assert result["success"] is True
+    assert result["id"] == "m7"
+
+
+def test_profile_tool_formats_sections(provider):
+    provider._client.profile_response = {
+        "static": ["Jordan prefers concise docs"],
+        "dynamic": ["Working on Supermemory provider"],
+        "search_results": [],
+    }
+    result = json.loads(provider.handle_tool_call("supermemory_profile", {}))
+    assert result["static_count"] == 1
+    assert result["dynamic_count"] == 1
+    assert "User Profile (Persistent)" in result["profile"]
+
+
+def test_handle_tool_call_returns_error_when_unconfigured(monkeypatch):
+    monkeypatch.delenv("SUPERMEMORY_API_KEY", raising=False)
+    p = SupermemoryMemoryProvider()
+    result = json.loads(p.handle_tool_call("supermemory_search", {"query": "x"}))
+    assert "error" in result

From dc333388ec01afe55a47c5a4fdae377cfd000278 Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes@superbud.io>
Date: Sun, 5 Apr 2026 04:42:16 +0000
Subject: [PATCH 046/154] docs(memory): add Supermemory PR draft and cleanup

---
 SUPERMEMORY_PR_DRAFT.md                       | 82 +++++++++++++++++++
 plugins/memory/supermemory/README.md          |  2 +-
 .../memory/test_supermemory_provider.py       |  2 -
 3 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 SUPERMEMORY_PR_DRAFT.md

diff --git a/SUPERMEMORY_PR_DRAFT.md b/SUPERMEMORY_PR_DRAFT.md
new file mode 100644
index 00000000..64bb7c21
--- /dev/null
+++ b/SUPERMEMORY_PR_DRAFT.md
@@ -0,0 +1,82 @@
+Title
+feat(memory): add Supermemory memory provider
+
+Summary
+This PR adds Supermemory as a native Hermes memory provider.
+
+It implements the upstream `MemoryProvider` interface rather than the general hook-based plugin system, so it works with the current memory-provider lifecycle and setup flow. The provider supports automatic recall, cleaned turn capture, session-end conversation ingest, and four explicit memory tools.
+
+What is included
+- `plugins/memory/supermemory/plugin.yaml`
+- `plugins/memory/supermemory/README.md`
+- `plugins/memory/supermemory/__init__.py`
+- focused tests for provider behavior and failure modes
+
+Behavior
+When enabled, the provider can:
+- prefetch relevant memory context before turns
+- include profile facts on the first turn and on a configurable cadence
+- store cleaned user-assistant turns after each completed response
+- ingest the full session on session end via Supermemory conversations API
+- expose explicit tools for memory store, search, forget, and profile access
+
+Tools
+- `supermemory_store`
+- `supermemory_search`
+- `supermemory_forget`
+- `supermemory_profile`
+
+Setup
+Use the standard memory setup flow:
+
+```bash
+hermes memory setup
+```
+
+Select `supermemory`, then provide:
+- `SUPERMEMORY_API_KEY` in `.env`
+- optional non-secret config written to `$HERMES_HOME/supermemory.json`
+
+Config surface
+The provider currently supports:
+- `container_tag`
+- `auto_recall`
+- `auto_capture`
+- `max_recall_results`
+- `profile_frequency`
+- `capture_mode`
+- `entity_context`
+- `api_timeout`
+
+Design notes
+- Implemented as a native memory provider so it fits Hermes's pluggable memory system and `hermes memory setup` / `status` flow.
+- The implementation preserves the useful behavior of a working local Supermemory integration, but removes local-only naming and packaging assumptions.
+- Recall context is fenced and stripped before capture to avoid recursive memory pollution.
+- `on_memory_write()` is intentionally conservative in v1 and mirrors `add` writes only. Supermemory is not a simple CRUD table, so pretending replace/remove are lossless would be dishonest.
+- Session-end ingest is kept in the provider because Supermemory's conversation ingestion endpoint is a meaningful part of the backend's graph-building behavior.
+
+Failure behavior
+- `is_available()` performs no network calls
+- missing API key or missing SDK leaves the provider unavailable without crashing Hermes
+- recall, capture, and ingest failures degrade quietly and do not break the agent loop
+
+Validation
+Ran:
+
+```bash
+python3 -m pytest tests/plugins/memory/test_supermemory_provider.py tests/agent/test_memory_provider.py tests/agent/test_memory_plugin_e2e.py -q
+```
+
+Result:
+- `76 passed`
+
+Reviewer-facing summary
+This PR adds Supermemory as a first-class Hermes memory provider in the same structural shape as the existing built-in providers. It is setup-compatible, failure-tolerant, and intentionally conservative where backend semantics differ from simple CRUD expectations.
+
+The main reason for this PR is straightforward: Supermemory is a real external memory backend, and the right way to integrate it upstream is through the native `MemoryProvider` interface, not through a user-local hook plugin.
+
+Points to review
+- provider shape and config UX alignment with other memory providers
+- recall formatting and profile cadence behavior
+- whether the add-only `on_memory_write()` bridge is the right v1 scope
+- test coverage for the provider lifecycle and failure paths
diff --git a/plugins/memory/supermemory/README.md b/plugins/memory/supermemory/README.md
index 465d4683..f34e4c99 100644
--- a/plugins/memory/supermemory/README.md
+++ b/plugins/memory/supermemory/README.md
@@ -17,7 +17,7 @@ Or manually:
 
 ```bash
 hermes config set memory.provider supermemory
-echo "SUPERMEMORY_API_KEY=***" >> ~/.hermes/.env
+echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
 ```
 
 ## Config
diff --git a/tests/plugins/memory/test_supermemory_provider.py b/tests/plugins/memory/test_supermemory_provider.py
index f61a9041..0bee1d21 100644
--- a/tests/plugins/memory/test_supermemory_provider.py
+++ b/tests/plugins/memory/test_supermemory_provider.py
@@ -1,6 +1,4 @@
 import json
-from pathlib import Path
-from unittest.mock import MagicMock
 
 import pytest
 

From 4fc7f3eaa59a50000bd839e2c2271e84f48843f1 Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes@superbud.io>
Date: Sun, 5 Apr 2026 05:50:29 +0000
Subject: [PATCH 047/154] fix(memory): clean up supermemory provider threads

---
 plugins/memory/supermemory/__init__.py        | 15 +++++-
 .../memory/test_supermemory_provider.py       | 47 +++++++++++++++++++
 2 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/plugins/memory/supermemory/__init__.py b/plugins/memory/supermemory/__init__.py
index 05583fae..f798b4a1 100644
--- a/plugins/memory/supermemory/__init__.py
+++ b/plugins/memory/supermemory/__init__.py
@@ -391,6 +391,7 @@ class SupermemoryMemoryProvider(MemoryProvider):
         self._prefetch_lock = threading.Lock()
         self._prefetch_thread: Optional[threading.Thread] = None
         self._sync_thread: Optional[threading.Thread] = None
+        self._write_thread: Optional[threading.Thread] = None
         self._auto_recall = True
         self._auto_capture = True
         self._max_recall_results = _DEFAULT_MAX_RECALL_RESULTS
@@ -524,6 +525,7 @@ class SupermemoryMemoryProvider(MemoryProvider):
 
         if self._sync_thread and self._sync_thread.is_alive():
             self._sync_thread.join(timeout=2.0)
+        self._sync_thread = None
         self._sync_thread = threading.Thread(target=_run, daemon=True, name="supermemory-sync")
         self._sync_thread.start()
 
@@ -565,7 +567,18 @@ class SupermemoryMemoryProvider(MemoryProvider):
             except Exception:
                 logger.debug("Supermemory on_memory_write failed", exc_info=True)
 
-        threading.Thread(target=_run, daemon=True, name="supermemory-memory-write").start()
+        if self._write_thread and self._write_thread.is_alive():
+            self._write_thread.join(timeout=2.0)
+        self._write_thread = None
+        self._write_thread = threading.Thread(target=_run, daemon=False, name="supermemory-memory-write")
+        self._write_thread.start()
+
+    def shutdown(self) -> None:
+        for attr_name in ("_prefetch_thread", "_sync_thread", "_write_thread"):
+            thread = getattr(self, attr_name, None)
+            if thread and thread.is_alive():
+                thread.join(timeout=5.0)
+            setattr(self, attr_name, None)
 
     def get_tool_schemas(self) -> List[Dict[str, Any]]:
         return [STORE_SCHEMA, SEARCH_SCHEMA, FORGET_SCHEMA, PROFILE_SCHEMA]
diff --git a/tests/plugins/memory/test_supermemory_provider.py b/tests/plugins/memory/test_supermemory_provider.py
index 0bee1d21..689793f1 100644
--- a/tests/plugins/memory/test_supermemory_provider.py
+++ b/tests/plugins/memory/test_supermemory_provider.py
@@ -1,4 +1,5 @@
 import json
+import threading
 
 import pytest
 
@@ -163,6 +164,52 @@ def test_on_session_end_ingests_clean_messages(provider):
     ]
 
 
+def test_on_memory_write_tracks_thread(provider):
+    provider.on_memory_write("add", "memory", "Jordan likes concise docs")
+    assert provider._write_thread is not None
+    provider._write_thread.join(timeout=1)
+    assert len(provider._client.add_calls) == 1
+    assert provider._client.add_calls[0]["metadata"]["type"] == "explicit_memory"
+
+
+def test_shutdown_joins_and_clears_threads(provider, monkeypatch):
+    started = threading.Event()
+    release = threading.Event()
+
+    def slow_add_memory(content, metadata=None, *, entity_context=""):
+        started.set()
+        release.wait(timeout=1)
+        provider._client.add_calls.append({
+            "content": content,
+            "metadata": metadata,
+            "entity_context": entity_context,
+        })
+        return {"id": "mem_slow"}
+
+    monkeypatch.setattr(provider._client, "add_memory", slow_add_memory)
+
+    provider.sync_turn(
+        "Please remember this request in long-term memory",
+        "Absolutely, I will keep that in long-term memory.",
+        session_id="session-1",
+    )
+    assert started.wait(timeout=1)
+    assert provider._sync_thread is not None
+
+    started.clear()
+    provider.on_memory_write("add", "memory", "Jordan likes concise docs")
+    assert started.wait(timeout=1)
+    assert provider._write_thread is not None
+
+    release.set()
+    provider.shutdown()
+
+    assert provider._sync_thread is None
+    assert provider._write_thread is None
+    assert provider._prefetch_thread is None
+    assert len(provider._client.add_calls) == 2
+
+
 def test_store_tool_returns_saved_payload(provider):
     result = json.loads(provider.handle_tool_call("supermemory_store", {"content": "Jordan likes concise docs"}))
     assert result["saved"] is True

From ac80d595cd6604fa2ab7bc0a18e880cbc0bce22d Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes@superbud.io>
Date: Mon, 6 Apr 2026 02:13:27 +0000
Subject: [PATCH 048/154] chore(memory): remove supermemory PR scaffolding

---
 SUPERMEMORY_PR_DRAFT.md                | 82 --------------------------
 plugins/memory/supermemory/plugin.yaml |  2 -
 2 files changed, 84 deletions(-)
 delete mode 100644 SUPERMEMORY_PR_DRAFT.md

diff --git a/SUPERMEMORY_PR_DRAFT.md b/SUPERMEMORY_PR_DRAFT.md
deleted file mode 100644
index 64bb7c21..00000000
--- a/SUPERMEMORY_PR_DRAFT.md
+++ /dev/null
@@ -1,82 +0,0 @@
-Title
-feat(memory): add Supermemory memory provider
-
-Summary
-This PR adds Supermemory as a native Hermes memory provider.
-
-It implements the upstream `MemoryProvider` interface rather than the general hook-based plugin system, so it works with the current memory-provider lifecycle and setup flow. The provider supports automatic recall, cleaned turn capture, session-end conversation ingest, and four explicit memory tools.
-
-What is included
-- `plugins/memory/supermemory/plugin.yaml`
-- `plugins/memory/supermemory/README.md`
-- `plugins/memory/supermemory/__init__.py`
-- focused tests for provider behavior and failure modes
-
-Behavior
-When enabled, the provider can:
-- prefetch relevant memory context before turns
-- include profile facts on the first turn and on a configurable cadence
-- store cleaned user-assistant turns after each completed response
-- ingest the full session on session end via Supermemory conversations API
-- expose explicit tools for memory store, search, forget, and profile access
-
-Tools
-- `supermemory_store`
-- `supermemory_search`
-- `supermemory_forget`
-- `supermemory_profile`
-
-Setup
-Use the standard memory setup flow:
-
-```bash
-hermes memory setup
-```
-
-Select `supermemory`, then provide:
-- `SUPERMEMORY_API_KEY` in `.env`
-- optional non-secret config written to `$HERMES_HOME/supermemory.json`
-
-Config surface
-The provider currently supports:
-- `container_tag`
-- `auto_recall`
-- `auto_capture`
-- `max_recall_results`
-- `profile_frequency`
-- `capture_mode`
-- `entity_context`
-- `api_timeout`
-
-Design notes
-- Implemented as a native memory provider so it fits Hermes's pluggable memory system and `hermes memory setup` / `status` flow.
-- The implementation preserves the useful behavior of a working local Supermemory integration, but removes local-only naming and packaging assumptions.
-- Recall context is fenced and stripped before capture to avoid recursive memory pollution.
-- `on_memory_write()` is intentionally conservative in v1 and mirrors `add` writes only. Supermemory is not a simple CRUD table, so pretending replace/remove are lossless would be dishonest.
-- Session-end ingest is kept in the provider because Supermemory's conversation ingestion endpoint is a meaningful part of the backend's graph-building behavior.
-
-Failure behavior
-- `is_available()` performs no network calls
-- missing API key or missing SDK leaves the provider unavailable without crashing Hermes
-- recall, capture, and ingest failures degrade quietly and do not break the agent loop
-
-Validation
-Ran:
-
-```bash
-python3 -m pytest tests/plugins/memory/test_supermemory_provider.py tests/agent/test_memory_provider.py tests/agent/test_memory_plugin_e2e.py -q
-```
-
-Result:
-- `76 passed`
-
-Reviewer-facing summary
-This PR adds Supermemory as a first-class Hermes memory provider in the same structural shape as the existing built-in providers. It is setup-compatible, failure-tolerant, and intentionally conservative where backend semantics differ from simple CRUD expectations.
-
-The main reason for this PR is straightforward: Supermemory is a real external memory backend, and the right way to integrate it upstream is through the native `MemoryProvider` interface, not through a user-local hook plugin.
-
-Points to review
-- provider shape and config UX alignment with other memory providers
-- recall formatting and profile cadence behavior
-- whether the add-only `on_memory_write()` bridge is the right v1 scope
-- test coverage for the provider lifecycle and failure paths
diff --git a/plugins/memory/supermemory/plugin.yaml b/plugins/memory/supermemory/plugin.yaml
index 372edb2b..23321bdb 100644
--- a/plugins/memory/supermemory/plugin.yaml
+++ b/plugins/memory/supermemory/plugin.yaml
@@ -3,5 +3,3 @@ version: 1.0.0
 description: "Supermemory semantic long-term memory with profile recall, semantic search, explicit memory tools, and session ingest."
 pip_dependencies:
   - supermemory
-hooks:
-  - on_session_end

From 88bba31b7d652b9bc9a5f85fd9d24326611b8f1e Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 21:50:37 -0700
Subject: [PATCH 049/154] fix: use get_hermes_home() for profile-scoped
 storage, fix README

- Replace hardcoded os.path.expanduser('~/.hermes') with
  get_hermes_home() from hermes_constants for profile isolation
- Fix README echo command quoting error
---
 plugins/memory/supermemory/README.md   | 2 +-
 plugins/memory/supermemory/__init__.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/plugins/memory/supermemory/README.md b/plugins/memory/supermemory/README.md
index f34e4c99..7c1310fe 100644
--- a/plugins/memory/supermemory/README.md
+++ b/plugins/memory/supermemory/README.md
@@ -17,7 +17,7 @@ Or manually:
 
 ```bash
 hermes config set memory.provider supermemory
-echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
+echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
 ```
 
 ## Config
diff --git a/plugins/memory/supermemory/__init__.py b/plugins/memory/supermemory/__init__.py
index f798b4a1..ee2c35e6 100644
--- a/plugins/memory/supermemory/__init__.py
+++ b/plugins/memory/supermemory/__init__.py
@@ -399,7 +399,7 @@ class SupermemoryMemoryProvider(MemoryProvider):
         self._capture_mode = _DEFAULT_CAPTURE_MODE
         self._entity_context = _DEFAULT_ENTITY_CONTEXT
         self._api_timeout = _DEFAULT_API_TIMEOUT
-        self._hermes_home = os.path.expanduser("~/.hermes")
+        self._hermes_home = ""
         self._write_enabled = True
         self._active = False
 
@@ -439,7 +439,8 @@ class SupermemoryMemoryProvider(MemoryProvider):
         _save_supermemory_config(sanitized, hermes_home)
 
     def initialize(self, session_id: str, **kwargs) -> None:
-        self._hermes_home = kwargs.get("hermes_home") or os.path.expanduser("~/.hermes")
+        from hermes_constants import get_hermes_home
+        self._hermes_home = kwargs.get("hermes_home") or str(get_hermes_home())
         self._session_id = session_id
         self._turn_count = 0
         self._config = _load_supermemory_config(self._hermes_home)

From c7768137fa058f8462a24e804c2e4b694ba9c5fb Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 21:54:42 -0700
Subject: [PATCH 050/154] docs: add Supermemory to memory providers docs, env
 vars, CLI reference

- Add full Supermemory section to memory-providers.md with config table,
  tools, setup instructions, and key features
- Update provider count from 7 to 8 across memory.md and memory-providers.md
- Add SUPERMEMORY_API_KEY to environment-variables.md
- Add Supermemory to integrations/providers.md optional API keys table
- Add supermemory to cli-commands.md provider list
- Add Supermemory to profile isolation section (config file providers)
---
 website/docs/integrations/providers.md        |  1 +
 website/docs/reference/cli-commands.md        |  2 +-
 .../docs/reference/environment-variables.md   |  1 +
 .../user-guide/features/memory-providers.md   | 50 +++++++++++++++++--
 website/docs/user-guide/features/memory.md    |  2 +-
 5 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index 643cdbf5..ca6a0c51 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -846,6 +846,7 @@ You can switch between providers at any time with `hermes model` — no restart
 | OpenAI TTS + voice transcription | [OpenAI](https://platform.openai.com/api-keys) | `VOICE_TOOLS_OPENAI_KEY` |
 | RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` |
 | Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` |
+| Semantic long-term memory | [Supermemory](https://supermemory.ai) | `SUPERMEMORY_API_KEY` |
 
 ### Self-Hosting Firecrawl
 
diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md
index 5fbe921b..55983b1c 100644
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@@ -383,7 +383,7 @@ Subcommands:
 hermes memory <subcommand>
 ```
 
-Set up and manage external memory provider plugins. Available providers: honcho, openviking, mem0, hindsight, holographic, retaindb, byterover. Only one external provider can be active at a time. Built-in memory (MEMORY.md/USER.md) is always active.
+Set up and manage external memory provider plugins. Available providers: honcho, openviking, mem0, hindsight, holographic, retaindb, byterover, supermemory. Only one external provider can be active at a time. Built-in memory (MEMORY.md/USER.md) is always active.
 
 Subcommands:
 
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index fb2a6752..89934932 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -98,6 +98,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | `GITHUB_TOKEN` | GitHub token for Skills Hub (higher API rate limits, skill publish) |
 | `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) |
 | `HONCHO_BASE_URL` | Base URL for self-hosted Honcho instances (default: Honcho cloud). No API key required for local instances |
+| `SUPERMEMORY_API_KEY` | Semantic long-term memory with profile recall and session ingest ([supermemory.ai](https://supermemory.ai)) |
 | `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) |
 | `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) |
 | `DAYTONA_API_KEY` | Daytona cloud sandboxes ([daytona.io](https://daytona.io/)) |
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index 3c4150ff..3396c70e 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -1,12 +1,12 @@
 ---
 sidebar_position: 4
 title: "Memory Providers"
-description: "External memory provider plugins — Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover"
+description: "External memory provider plugins — Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover, Supermemory"
 ---
 
 # Memory Providers
 
-Hermes Agent ships with 7 external memory provider plugins that give the agent persistent, cross-session knowledge beyond the built-in MEMORY.md and USER.md. Only **one** external provider can be active at a time — the built-in memory is always active alongside it.
+Hermes Agent ships with 8 external memory provider plugins that give the agent persistent, cross-session knowledge beyond the built-in MEMORY.md and USER.md. Only **one** external provider can be active at a time — the built-in memory is always active alongside it.
 
 ## Quick Start
 
@@ -20,7 +20,7 @@ Or set manually in `~/.hermes/config.yaml`:
 
 ```yaml
 memory:
-  provider: openviking   # or honcho, mem0, hindsight, holographic, retaindb, byterover
+  provider: openviking   # or honcho, mem0, hindsight, holographic, retaindb, byterover, supermemory
 ```
 
 ## How It Works
@@ -382,6 +382,47 @@ hermes config set memory.provider byterover
 
 ---
 
+### Supermemory
+
+Semantic long-term memory with profile recall, semantic search, explicit memory tools, and session-end conversation ingest via the Supermemory graph API.
+
+| | |
+|---|---|
+| **Best for** | Semantic recall with user profiling and session-level graph building |
+| **Requires** | `pip install supermemory` + [API key](https://supermemory.ai) |
+| **Data storage** | Supermemory Cloud |
+| **Cost** | Supermemory pricing |
+
+**Tools:** `supermemory_store` (save explicit memories), `supermemory_search` (semantic similarity search), `supermemory_forget` (forget by ID or best-match query), `supermemory_profile` (persistent profile + recent context)
+
+**Setup:**
+```bash
+hermes memory setup    # select "supermemory"
+# Or manually:
+hermes config set memory.provider supermemory
+echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
+```
+
+**Config:** `$HERMES_HOME/supermemory.json`
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `container_tag` | `hermes` | Container tag used for search and writes |
+| `auto_recall` | `true` | Inject relevant memory context before turns |
+| `auto_capture` | `true` | Store cleaned user-assistant turns after each response |
+| `max_recall_results` | `10` | Max recalled items to format into context |
+| `profile_frequency` | `50` | Include profile facts on first turn and every N turns |
+| `capture_mode` | `all` | Skip tiny or trivial turns by default |
+| `api_timeout` | `5.0` | Timeout for SDK and ingest requests |
+
+**Key features:**
+- Automatic context fencing — strips recalled memories from captured turns to prevent recursive memory pollution
+- Session-end conversation ingest for richer graph-level knowledge building
+- Profile facts injected on first turn and at configurable intervals
+- Trivial message filtering (skips "ok", "thanks", etc.)
+
+---
+
 ## Provider Comparison
 
 | Provider | Storage | Cost | Tools | Dependencies | Unique Feature |
@@ -393,13 +434,14 @@ hermes config set memory.provider byterover
 | **Holographic** | Local | Free | 2 | None | HRR algebra + trust scoring |
 | **RetainDB** | Cloud | $20/mo | 5 | `requests` | Delta compression |
 | **ByteRover** | Local/Cloud | Free/Paid | 3 | `brv` CLI | Pre-compression extraction |
+| **Supermemory** | Cloud | Paid | 4 | `supermemory` | Context fencing + session graph ingest |
 
 ## Profile Isolation
 
 Each provider's data is isolated per [profile](/docs/user-guide/profiles):
 
 - **Local storage providers** (Holographic, ByteRover) use `$HERMES_HOME/` paths which differ per profile
-- **Config file providers** (Honcho, Mem0, Hindsight) store config in `$HERMES_HOME/` so each profile has its own credentials
+- **Config file providers** (Honcho, Mem0, Hindsight, Supermemory) store config in `$HERMES_HOME/` so each profile has its own credentials
 - **Cloud providers** (RetainDB) auto-derive profile-scoped project names
 - **Env var providers** (OpenViking) are configured via each profile's `.env` file
 
diff --git a/website/docs/user-guide/features/memory.md b/website/docs/user-guide/features/memory.md
index 8be3f748..77f74d28 100644
--- a/website/docs/user-guide/features/memory.md
+++ b/website/docs/user-guide/features/memory.md
@@ -209,7 +209,7 @@ memory:
 
 ## External Memory Providers
 
-For deeper, persistent memory that goes beyond MEMORY.md and USER.md, Hermes ships with 7 external memory provider plugins — including Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, and ByteRover.
+For deeper, persistent memory that goes beyond MEMORY.md and USER.md, Hermes ships with 8 external memory provider plugins — including Honcho, OpenViking, Mem0, Hindsight, Holographic, RetainDB, ByteRover, and Supermemory.
 
 External providers run **alongside** built-in memory (never replacing it) and add capabilities like knowledge graphs, semantic search, automatic fact extraction, and cross-session user modeling.
 

From bff47eee486858ec00744b08962c8854cc3dd030 Mon Sep 17 00:00:00 2001
From: Ben <ben@nousresearch.com>
Date: Tue, 7 Apr 2026 15:48:16 +1000
Subject: [PATCH 051/154] fix: HERMES_PORTAL_BASE_URL env var ignored during
 Nous login
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_login_nous() was passing pconfig.portal_base_url (hardcoded production
URL) as a fallback when no --portal-url CLI flag was given. This meant
_nous_device_code_login() received a truthy portal_base_url argument
and never reached the env var fallback chain.

Users setting HERMES_PORTAL_BASE_URL or NOUS_PORTAL_BASE_URL in .env
to point at a staging portal were silently ignored — login always went
to production.

Fix: pass None when no CLI flag is provided, letting the downstream
function properly check env vars before falling back to the default.

Fallback chain is now:
1. --portal-url CLI arg
2. HERMES_PORTAL_BASE_URL env var
3. NOUS_PORTAL_BASE_URL env var
4. DEFAULT_NOUS_PORTAL_URL (production)

Same fix applied to inference_base_url for consistency.
---
 hermes_cli/auth.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 94cc08f2..d8c628bb 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -2577,8 +2577,8 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
 
     try:
         auth_state = _nous_device_code_login(
-            portal_base_url=getattr(args, "portal_url", None) or pconfig.portal_base_url,
-            inference_base_url=getattr(args, "inference_url", None) or pconfig.inference_base_url,
+            portal_base_url=getattr(args, "portal_url", None),
+            inference_base_url=getattr(args, "inference_url", None),
             client_id=getattr(args, "client_id", None) or pconfig.client_id,
             scope=getattr(args, "scope", None) or pconfig.scope,
             open_browser=not getattr(args, "no_browser", False),

From 5a2cf280a3d652b10cfc15c4944dbc167df3ed5e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 23:00:04 -0700
Subject: [PATCH 052/154] feat: interactive model picker for Telegram and
 Discord (#5742)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/model with no args now shows an interactive UI on Telegram and Discord
instead of a text list:

Telegram: Inline keyboard buttons — two-step drill-down.
  Step 1: Provider buttons with model counts (e.g. 'OpenRouter (15)')
  Step 2: Model buttons within the selected provider
  Edits the same message in-place as the user navigates.
  Back/Cancel buttons for navigation.

Discord: Embed + Select dropdown menus via discord.ui.View.
  Step 1: Provider dropdown with model counts
  Step 2: Model dropdown within the selected provider
  Back/Cancel buttons. Auth-gated to allowed users.

Platforms without picker support (Slack, WhatsApp, Signal, etc.)
fall back to the existing text list.

/model <name> continues to work as a direct text switch on all
platforms — the interactive picker is only for bare /model.

Implementation:
- TelegramAdapter.send_model_picker() + _handle_model_picker_callback()
  with compact callback_data (mp:/mm:/mb/mx, all within 64-byte limit)
- DiscordAdapter.send_model_picker() + ModelPickerView (discord.ui.View)
  with Select menus (up to 25 options per dropdown)
- GatewayRunner._handle_model_command() detects adapter capability via
  getattr(type(adapter), 'send_model_picker', None) (safe with mocks)
  and sends picker with async callback closure for the switch logic
- Callback performs full switch: switch_model(), cached agent update,
  session override, pending model note — same as /model <name>
---
 gateway/platforms/discord.py  | 276 ++++++++++++++++++++++++++++++++++
 gateway/platforms/telegram.py | 242 ++++++++++++++++++++++++++++-
 gateway/run.py                | 116 +++++++++++++-
 3 files changed, 630 insertions(+), 4 deletions(-)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 75ba3d11..5e1be74a 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -2039,6 +2039,66 @@ class DiscordAdapter(BasePlatformAdapter):
         except Exception as e:
             return SendResult(success=False, error=str(e))
 
+    async def send_model_picker(
+        self,
+        chat_id: str,
+        providers: list,
+        current_model: str,
+        current_provider: str,
+        session_key: str,
+        on_model_selected,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive select-menu model picker.
+
+        Two-step drill-down: provider dropdown → model dropdown.
+        Uses Discord embeds + Select menus via ``ModelPickerView``.
+        """
+        if not self._client or not DISCORD_AVAILABLE:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            # Resolve target channel (use thread_id if present)
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
+            if not channel:
+                channel = await self._client.fetch_channel(int(target_id))
+
+            try:
+                from hermes_cli.providers import get_label
+                provider_label = get_label(current_provider)
+            except Exception:
+                provider_label = current_provider
+
+            embed = discord.Embed(
+                title="⚙ Model Configuration",
+                description=(
+                    f"Current model: `{current_model or 'unknown'}`\n"
+                    f"Provider: {provider_label}\n\n"
+                    f"Select a provider:"
+                ),
+                color=discord.Color.blue(),
+            )
+
+            view = ModelPickerView(
+                providers=providers,
+                current_model=current_model,
+                current_provider=current_provider,
+                session_key=session_key,
+                on_model_selected=on_model_selected,
+                allowed_user_ids=self._allowed_user_ids,
+            )
+
+            msg = await channel.send(embed=embed, view=view)
+            return SendResult(success=True, message_id=str(msg.id))
+
+        except Exception as e:
+            logger.warning("[%s] send_model_picker failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
     def _get_parent_channel_id(self, channel: Any) -> Optional[str]:
         """Return the parent channel ID for a Discord thread-like channel, if present."""
         parent = getattr(channel, "parent", None)
@@ -2530,3 +2590,219 @@ if DISCORD_AVAILABLE:
             self.resolved = True
             for child in self.children:
                 child.disabled = True
+
+    class ModelPickerView(discord.ui.View):
+        """Interactive select-menu view for model switching.
+
+        Two-step drill-down: provider dropdown → model dropdown.
+        Edits the original message in-place as the user navigates.
+        Times out after 2 minutes.
+        """
+
+        def __init__(
+            self,
+            providers: list,
+            current_model: str,
+            current_provider: str,
+            session_key: str,
+            on_model_selected,
+            allowed_user_ids: set,
+        ):
+            super().__init__(timeout=120)
+            self.providers = providers
+            self.current_model = current_model
+            self.current_provider = current_provider
+            self.session_key = session_key
+            self.on_model_selected = on_model_selected
+            self.allowed_user_ids = allowed_user_ids
+            self.resolved = False
+            self._selected_provider: str = ""
+
+            self._build_provider_select()
+
+        def _check_auth(self, interaction: discord.Interaction) -> bool:
+            if not self.allowed_user_ids:
+                return True
+            return str(interaction.user.id) in self.allowed_user_ids
+
+        def _build_provider_select(self):
+            """Build the provider dropdown menu."""
+            self.clear_items()
+            options = []
+            for p in self.providers:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count} models)"
+                desc = "current" if p.get("is_current") else None
+                options.append(
+                    discord.SelectOption(
+                        label=label[:100],
+                        value=p["slug"],
+                        default=bool(p.get("is_current")),
+                        description=desc,
+                    )
+                )
+            if not options:
+                return
+
+            select = discord.ui.Select(
+                placeholder="Choose a provider...",
+                options=options[:25],
+                custom_id="model_provider_select",
+            )
+            select.callback = self._on_provider_selected
+            self.add_item(select)
+
+            cancel_btn = discord.ui.Button(
+                label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel"
+            )
+            cancel_btn.callback = self._on_cancel
+            self.add_item(cancel_btn)
+
+        def _build_model_select(self, provider_slug: str):
+            """Build the model dropdown for a specific provider."""
+            self.clear_items()
+            provider = next(
+                (p for p in self.providers if p["slug"] == provider_slug), None
+            )
+            if not provider:
+                return
+
+            models = provider.get("models", [])
+            options = []
+            for model_id in models[:25]:
+                short = model_id.split("/")[-1] if "/" in model_id else model_id
+                options.append(
+                    discord.SelectOption(
+                        label=short[:100],
+                        value=model_id[:100],
+                    )
+                )
+            if not options:
+                return
+
+            select = discord.ui.Select(
+                placeholder=f"Choose a model from {provider.get('name', provider_slug)}...",
+                options=options,
+                custom_id="model_model_select",
+            )
+            select.callback = self._on_model_selected
+            self.add_item(select)
+
+            back_btn = discord.ui.Button(
+                label="◀ Back", style=discord.ButtonStyle.grey, custom_id="model_back"
+            )
+            back_btn.callback = self._on_back
+            self.add_item(back_btn)
+
+            cancel_btn = discord.ui.Button(
+                label="Cancel", style=discord.ButtonStyle.red, custom_id="model_cancel2"
+            )
+            cancel_btn.callback = self._on_cancel
+            self.add_item(cancel_btn)
+
+        async def _on_provider_selected(self, interaction: discord.Interaction):
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            provider_slug = interaction.data["values"][0]
+            self._selected_provider = provider_slug
+            provider = next(
+                (p for p in self.providers if p["slug"] == provider_slug), None
+            )
+            pname = provider.get("name", provider_slug) if provider else provider_slug
+
+            self._build_model_select(provider_slug)
+
+            total = provider.get("total_models", 0) if provider else 0
+            shown = min(len(provider.get("models", [])), 25) if provider else 0
+            extra = f"\n*{total - shown} more available — type `/model <name>` directly*" if total > shown else ""
+
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description=f"Provider: **{pname}**\nSelect a model:{extra}",
+                    color=discord.Color.blue(),
+                ),
+                view=self,
+            )
+
+        async def _on_model_selected(self, interaction: discord.Interaction):
+            if self.resolved:
+                await interaction.response.send_message(
+                    "Already resolved~", ephemeral=True
+                )
+                return
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            self.resolved = True
+            model_id = interaction.data["values"][0]
+
+            try:
+                result_text = await self.on_model_selected(
+                    str(interaction.channel_id),
+                    model_id,
+                    self._selected_provider,
+                )
+            except Exception as exc:
+                result_text = f"Error switching model: {exc}"
+
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Switched",
+                    description=result_text,
+                    color=discord.Color.green(),
+                ),
+                view=self,
+            )
+
+        async def _on_back(self, interaction: discord.Interaction):
+            if not self._check_auth(interaction):
+                await interaction.response.send_message(
+                    "You're not authorized~", ephemeral=True
+                )
+                return
+
+            self._build_provider_select()
+
+            try:
+                from hermes_cli.providers import get_label
+                provider_label = get_label(self.current_provider)
+            except Exception:
+                provider_label = self.current_provider
+
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description=(
+                        f"Current model: `{self.current_model or 'unknown'}`\n"
+                        f"Provider: {provider_label}\n\n"
+                        f"Select a provider:"
+                    ),
+                    color=discord.Color.blue(),
+                ),
+                view=self,
+            )
+
+        async def _on_cancel(self, interaction: discord.Interaction):
+            self.resolved = True
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Model Configuration",
+                    description="Model selection cancelled.",
+                    color=discord.Color.greyple(),
+                ),
+                view=self,
+            )
+
+        async def on_timeout(self):
+            self.resolved = True
+            self.clear_items()
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 7575c10f..0362b9f9 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -151,6 +151,8 @@ class TelegramAdapter(BasePlatformAdapter):
         self._dm_topics: Dict[str, int] = {}
         # DM Topics config from extra.dm_topics
         self._dm_topics_config: List[Dict[str, Any]] = self.config.extra.get("dm_topics", [])
+        # Interactive model picker state per chat
+        self._model_picker_state: Dict[str, dict] = {}
 
     def _fallback_ips(self) -> list[str]:
         """Return validated fallback IPs from config (populated by _apply_env_overrides)."""
@@ -1008,14 +1010,252 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.warning("[%s] send_update_prompt failed: %s", self.name, e)
             return SendResult(success=False, error=str(e))
 
+    async def send_model_picker(
+        self,
+        chat_id: str,
+        providers: list,
+        current_model: str,
+        current_provider: str,
+        session_key: str,
+        on_model_selected,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive inline-keyboard model picker.
+
+        Two-step drill-down: provider selection → model selection.
+        Edits the same message in-place as the user navigates.
+        """
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            from hermes_cli.providers import get_label
+        except ImportError:
+            def get_label(slug):
+                return slug
+
+        try:
+            # Build provider buttons — 2 per row
+            buttons: list = []
+            for p in providers:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count})"
+                if p.get("is_current"):
+                    label = f"✓ {label}"
+                # Compact callback data: mp:<slug>  (max 64 bytes)
+                buttons.append(
+                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            provider_label = get_label(current_provider)
+            text = (
+                f"⚙ *Model Configuration*\n\n"
+                f"Current model: `{current_model or 'unknown'}`\n"
+                f"Provider: {provider_label}\n\n"
+                f"Select a provider:"
+            )
+
+            thread_id = metadata.get("thread_id") if metadata else None
+            msg = await self._bot.send_message(
+                chat_id=int(chat_id),
+                text=text,
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+                message_thread_id=int(thread_id) if thread_id else None,
+            )
+
+            # Store picker state keyed by chat_id
+            self._model_picker_state[str(chat_id)] = {
+                "msg_id": msg.message_id,
+                "providers": providers,
+                "session_key": session_key,
+                "on_model_selected": on_model_selected,
+                "current_model": current_model,
+                "current_provider": current_provider,
+            }
+
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_model_picker failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
+    async def _handle_model_picker_callback(
+        self, query, data: str, chat_id: str
+    ) -> None:
+        """Handle model picker inline keyboard callbacks (mp:/mm:/mb:/mx:)."""
+        state = self._model_picker_state.get(chat_id)
+        if not state:
+            await query.answer(text="Picker expired — use /model again.")
+            return
+
+        try:
+            from hermes_cli.providers import get_label
+        except ImportError:
+            def get_label(slug):
+                return slug
+
+        if data.startswith("mp:"):
+            # --- Provider selected: show model buttons ---
+            provider_slug = data[3:]
+            provider = next(
+                (p for p in state["providers"] if p["slug"] == provider_slug),
+                None,
+            )
+            if not provider:
+                await query.answer(text="Provider not found.")
+                return
+
+            models = provider.get("models", [])
+            state["selected_provider"] = provider_slug
+            state["selected_provider_name"] = provider.get("name", provider_slug)
+            state["model_list"] = models
+
+            buttons: list = []
+            for i, model_id in enumerate(models):
+                # Short display label: strip vendor prefix
+                short = model_id.split("/")[-1] if "/" in model_id else model_id
+                # Truncate long model names for button label (max ~40 chars)
+                if len(short) > 38:
+                    short = short[:35] + "..."
+                buttons.append(
+                    InlineKeyboardButton(short, callback_data=f"mm:{i}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([
+                InlineKeyboardButton("◀ Back", callback_data="mb"),
+                InlineKeyboardButton("✗ Cancel", callback_data="mx"),
+            ])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            pname = provider.get("name", provider_slug)
+            total = provider.get("total_models", len(models))
+            shown = len(models)
+            extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Provider: *{pname}*\n"
+                    f"Select a model:{extra}"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data.startswith("mm:"):
+            # --- Model selected: perform the switch ---
+            try:
+                idx = int(data[3:])
+            except ValueError:
+                await query.answer(text="Invalid selection.")
+                return
+
+            model_list = state.get("model_list", [])
+            if idx < 0 or idx >= len(model_list):
+                await query.answer(text="Invalid model index.")
+                return
+
+            model_id = model_list[idx]
+            provider_slug = state.get("selected_provider", "")
+            callback = state.get("on_model_selected")
+
+            if not callback:
+                await query.answer(text="Picker expired.")
+                return
+
+            try:
+                result_text = await callback(chat_id, model_id, provider_slug)
+            except Exception as exc:
+                logger.error("Model picker switch failed: %s", exc)
+                result_text = f"Error switching model: {exc}"
+
+            # Edit message to show confirmation, remove buttons
+            try:
+                await query.edit_message_text(
+                    text=result_text,
+                    parse_mode=ParseMode.MARKDOWN,
+                    reply_markup=None,
+                )
+            except Exception:
+                # Markdown parse failure — retry as plain text
+                try:
+                    await query.edit_message_text(
+                        text=result_text,
+                        parse_mode=None,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass
+            await query.answer(text="Model switched!")
+
+            # Clean up state
+            self._model_picker_state.pop(chat_id, None)
+
+        elif data == "mb":
+            # --- Back to provider list ---
+            buttons = []
+            for p in state["providers"]:
+                count = p.get("total_models", len(p.get("models", [])))
+                label = f"{p['name']} ({count})"
+                if p.get("is_current"):
+                    label = f"✓ {label}"
+                buttons.append(
+                    InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
+                )
+
+            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+            rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
+            keyboard = InlineKeyboardMarkup(rows)
+
+            try:
+                provider_label = get_label(state["current_provider"])
+            except Exception:
+                provider_label = state["current_provider"]
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Current model: `{state['current_model'] or 'unknown'}`\n"
+                    f"Provider: {provider_label}\n\n"
+                    f"Select a provider:"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data == "mx":
+            # --- Cancel ---
+            self._model_picker_state.pop(chat_id, None)
+            await query.edit_message_text(
+                text="Model selection cancelled.",
+                reply_markup=None,
+            )
+            await query.answer()
+
     async def _handle_callback_query(
         self, update: "Update", context: "ContextTypes.DEFAULT_TYPE"
     ) -> None:
-        """Handle inline keyboard button clicks (update prompts)."""
+        """Handle inline keyboard button clicks."""
         query = update.callback_query
         if not query or not query.data:
             return
         data = query.data
+
+        # --- Model picker callbacks ---
+        if data.startswith(("mp:", "mm:", "mb", "mx")):
+            chat_id = str(query.message.chat_id) if query.message else None
+            if chat_id:
+                await self._handle_model_picker_callback(query, data, chat_id)
+            return
+
+        # --- Update prompt callbacks ---
         if not data.startswith("update_prompt:"):
             return
         answer = data.split(":", 1)[1]  # "y" or "n"
diff --git a/gateway/run.py b/gateway/run.py
index 9d5ac5aa..08be2b9d 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3464,11 +3464,11 @@ class GatewayRunner:
             lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
         return "\n".join(lines)
     
-    async def _handle_model_command(self, event: MessageEvent) -> str:
+    async def _handle_model_command(self, event: MessageEvent) -> Optional[str]:
         """Handle /model command — switch model for this session.
 
         Supports:
-          /model                              — show current model info
+          /model                              — interactive picker (Telegram/Discord) or text list
           /model <name>                       — switch for this session only
           /model <name> --global              — switch and persist to config.yaml
           /model <name> --provider <provider> — switch provider + model
@@ -3516,8 +3516,118 @@ class GatewayRunner:
             current_base_url = override.get("base_url", current_base_url)
             current_api_key = override.get("api_key", current_api_key)
 
-        # No args: show authenticated providers with models
+        # No args: show interactive picker (Telegram/Discord) or text list
         if not model_input and not explicit_provider:
+            # Try interactive picker if the platform supports it
+            adapter = self.adapters.get(source.platform)
+            has_picker = (
+                adapter is not None
+                and getattr(type(adapter), "send_model_picker", None) is not None
+            )
+
+            if has_picker:
+                try:
+                    providers = list_authenticated_providers(
+                        current_provider=current_provider,
+                        user_providers=user_provs,
+                        max_models=8,
+                    )
+                except Exception:
+                    providers = []
+
+                if providers:
+                    # Build a callback closure for when the user picks a model.
+                    # Captures self + locals needed for the switch logic.
+                    _self = self
+                    _session_key = session_key
+                    _cur_model = current_model
+                    _cur_provider = current_provider
+                    _cur_base_url = current_base_url
+                    _cur_api_key = current_api_key
+
+                    async def _on_model_selected(
+                        _chat_id: str, model_id: str, provider_slug: str
+                    ) -> str:
+                        """Perform the model switch and return confirmation text."""
+                        result = _switch_model(
+                            raw_input=model_id,
+                            current_provider=_cur_provider,
+                            current_model=_cur_model,
+                            current_base_url=_cur_base_url,
+                            current_api_key=_cur_api_key,
+                            is_global=False,
+                            explicit_provider=provider_slug,
+                        )
+                        if not result.success:
+                            return f"Error: {result.error_message}"
+
+                        # Update cached agent in-place
+                        cached_entry = None
+                        _cache_lock = getattr(_self, "_agent_cache_lock", None)
+                        _cache = getattr(_self, "_agent_cache", None)
+                        if _cache_lock and _cache is not None:
+                            with _cache_lock:
+                                cached_entry = _cache.get(_session_key)
+                        if cached_entry and cached_entry[0] is not None:
+                            try:
+                                cached_entry[0].switch_model(
+                                    new_model=result.new_model,
+                                    new_provider=result.target_provider,
+                                    api_key=result.api_key,
+                                    base_url=result.base_url,
+                                    api_mode=result.api_mode,
+                                )
+                            except Exception as exc:
+                                logger.warning("Picker model switch failed for cached agent: %s", exc)
+
+                        # Store model note + session override
+                        if not hasattr(_self, "_pending_model_notes"):
+                            _self._pending_model_notes = {}
+                        _self._pending_model_notes[_session_key] = (
+                            f"[Note: model was just switched from {_cur_model} to {result.new_model} "
+                            f"via {result.provider_label or result.target_provider}. "
+                            f"Adjust your self-identification accordingly.]"
+                        )
+                        if not hasattr(_self, "_session_model_overrides"):
+                            _self._session_model_overrides = {}
+                        _self._session_model_overrides[_session_key] = {
+                            "model": result.new_model,
+                            "provider": result.target_provider,
+                            "api_key": result.api_key,
+                            "base_url": result.base_url,
+                            "api_mode": result.api_mode,
+                        }
+
+                        # Build confirmation text
+                        plabel = result.provider_label or result.target_provider
+                        lines = [f"Model switched to `{result.new_model}`"]
+                        lines.append(f"Provider: {plabel}")
+                        mi = result.model_info
+                        if mi:
+                            if mi.context_window:
+                                lines.append(f"Context: {mi.context_window:,} tokens")
+                            if mi.max_output:
+                                lines.append(f"Max output: {mi.max_output:,} tokens")
+                            if mi.has_cost_data():
+                                lines.append(f"Cost: {mi.format_cost()}")
+                            lines.append(f"Capabilities: {mi.format_capabilities()}")
+                        lines.append("_(session only — use `/model <name> --global` to persist)_")
+                        return "\n".join(lines)
+
+                    metadata = {"thread_id": source.thread_id} if source.thread_id else None
+                    result = await adapter.send_model_picker(
+                        chat_id=source.chat_id,
+                        providers=providers,
+                        current_model=current_model,
+                        current_provider=current_provider,
+                        session_key=session_key,
+                        on_model_selected=_on_model_selected,
+                        metadata=metadata,
+                    )
+                    if result.success:
+                        return None  # Picker sent — adapter handles the response
+
+            # Fallback: text list (for platforms without picker or if picker failed)
             provider_label = get_label(current_provider)
             lines = [f"Current: `{current_model or 'unknown'}` on {provider_label}", ""]
 

From 8dee82ea1e1783b95b380f074386c7a9c5cf376c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 23:00:14 -0700
Subject: [PATCH 053/154] fix: stream consumer creates new message after tool
 boundaries (#5739)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When streaming was enabled on the gateway, the stream consumer created a
single message at the start and kept editing it as tokens arrived. Tool
progress messages were sent as separate messages below it. Since edits
don't change message position on Telegram/Matrix/Discord, the final
response ended up stuck above all tool progress messages — users had to
scroll up past potentially dozens of tool call lines to read the answer.

The agent already sends stream_delta_callback(None) at tool boundaries
(before _execute_tool_calls). The stream consumer was ignoring this
signal. Now it treats None as a segment break: finalizes the current
message (removes cursor), resets _message_id, and the next text chunk
creates a fresh message below the tool progress messages.

Timeline before:
  [msg 1: 'Let me search...' → edits → 'Here is the answer'] ← top
  [msg 2: tool progress lines]                                ← bottom

Timeline after:
  [msg 1: 'Let me search...']          ← top
  [msg 2: tool progress lines]
  [msg 3: 'Here is the answer']        ← bottom (visible)

Reported by SkyLinx on Discord.
---
 gateway/stream_consumer.py            |  29 ++++-
 tests/gateway/test_stream_consumer.py | 147 ++++++++++++++++++++++++++
 2 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index 7f4a73d0..59e72755 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -28,6 +28,10 @@ logger = logging.getLogger("gateway.stream_consumer")
 # Sentinel to signal the stream is complete
 _DONE = object()
 
+# Sentinel to signal a tool boundary — finalize current message and start a
+# new one so that subsequent text appears below tool progress messages.
+_NEW_SEGMENT = object()
+
 
 @dataclass
 class StreamConsumerConfig:
@@ -78,9 +82,16 @@ class GatewayStreamConsumer:
         return self._already_sent
 
     def on_delta(self, text: str) -> None:
-        """Thread-safe callback — called from the agent's worker thread."""
+        """Thread-safe callback — called from the agent's worker thread.
+
+        When *text* is ``None``, signals a tool boundary: the current message
+        is finalized and subsequent text will be sent as a new message so it
+        appears below any tool-progress messages the gateway sent in between.
+        """
         if text:
             self._queue.put(text)
+        elif text is None:
+            self._queue.put(_NEW_SEGMENT)
 
     def finish(self) -> None:
         """Signal that the stream is complete."""
@@ -96,12 +107,16 @@ class GatewayStreamConsumer:
             while True:
                 # Drain all available items from the queue
                 got_done = False
+                got_segment_break = False
                 while True:
                     try:
                         item = self._queue.get_nowait()
                         if item is _DONE:
                             got_done = True
                             break
+                        if item is _NEW_SEGMENT:
+                            got_segment_break = True
+                            break
                         self._accumulated += item
                     except queue.Empty:
                         break
@@ -111,6 +126,7 @@ class GatewayStreamConsumer:
                 elapsed = now - self._last_edit_time
                 should_edit = (
                     got_done
+                    or got_segment_break
                     or (elapsed >= self.cfg.edit_interval
                         and len(self._accumulated) > 0)
                     or len(self._accumulated) >= self.cfg.buffer_threshold
@@ -133,7 +149,7 @@ class GatewayStreamConsumer:
                         self._last_sent_text = ""
 
                     display_text = self._accumulated
-                    if not got_done:
+                    if not got_done and not got_segment_break:
                         display_text += self.cfg.cursor
 
                     await self._send_or_edit(display_text)
@@ -145,6 +161,15 @@ class GatewayStreamConsumer:
                         await self._send_or_edit(self._accumulated)
                     return
 
+                # Tool boundary: the should_edit block above already flushed
+                # accumulated text without a cursor.  Reset state so the next
+                # text chunk creates a fresh message below any tool-progress
+                # messages the gateway sent in between.
+                if got_segment_break:
+                    self._message_id = None
+                    self._accumulated = ""
+                    self._last_sent_text = ""
+
                 await asyncio.sleep(0.05)  # Small yield to not busy-loop
 
         except asyncio.CancelledError:
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 1234307c..6c908bbe 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -177,3 +177,150 @@ class TestStreamRunMediaStripping:
             assert "MEDIA:" not in sent_text, f"MEDIA: leaked into display: {sent_text!r}"
 
         assert consumer.already_sent
+
+
+# ── Segment break (tool boundary) tests ──────────────────────────────────
+
+
+class TestSegmentBreakOnToolBoundary:
+    """Verify that on_delta(None) finalizes the current message and starts a
+    new one so the final response appears below tool-progress messages."""
+
+    @pytest.mark.asyncio
+    async def test_segment_break_creates_new_message(self):
+        """After a None boundary, next text creates a fresh message."""
+        adapter = MagicMock()
+        send_result_1 = SimpleNamespace(success=True, message_id="msg_1")
+        send_result_2 = SimpleNamespace(success=True, message_id="msg_2")
+        edit_result = SimpleNamespace(success=True)
+        adapter.send = AsyncMock(side_effect=[send_result_1, send_result_2])
+        adapter.edit_message = AsyncMock(return_value=edit_result)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # Phase 1: intermediate text before tool calls
+        consumer.on_delta("Let me search for that...")
+        # Tool boundary — model is about to call tools
+        consumer.on_delta(None)
+        # Phase 2: final response text after tools finished
+        consumer.on_delta("Here are the results.")
+        consumer.finish()
+
+        await consumer.run()
+
+        # Should have sent TWO separate messages (two adapter.send calls),
+        # not just edited the first one.
+        assert adapter.send.call_count == 2
+        first_text = adapter.send.call_args_list[0][1]["content"]
+        second_text = adapter.send.call_args_list[1][1]["content"]
+        assert "search" in first_text
+        assert "results" in second_text
+
+    @pytest.mark.asyncio
+    async def test_segment_break_no_text_before(self):
+        """A None boundary with no preceding text is a no-op."""
+        adapter = MagicMock()
+        send_result = SimpleNamespace(success=True, message_id="msg_1")
+        adapter.send = AsyncMock(return_value=send_result)
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        # No text before the boundary — model went straight to tool calls
+        consumer.on_delta(None)
+        consumer.on_delta("Final answer.")
+        consumer.finish()
+
+        await consumer.run()
+
+        # Only one send call (the final answer)
+        assert adapter.send.call_count == 1
+        assert "Final answer" in adapter.send.call_args_list[0][1]["content"]
+
+    @pytest.mark.asyncio
+    async def test_segment_break_removes_cursor(self):
+        """The finalized segment message should not have a cursor."""
+        adapter = MagicMock()
+        send_result = SimpleNamespace(success=True, message_id="msg_1")
+        edit_result = SimpleNamespace(success=True)
+        adapter.send = AsyncMock(return_value=send_result)
+        adapter.edit_message = AsyncMock(return_value=edit_result)
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Thinking...")
+        consumer.on_delta(None)
+        consumer.on_delta("Done.")
+        consumer.finish()
+
+        await consumer.run()
+
+        # The first segment should have been finalized without cursor.
+        # Check all edit_message calls + the initial send for the first segment.
+        # The last state of msg_1 should NOT have the cursor.
+        all_texts = []
+        for call in adapter.send.call_args_list:
+            all_texts.append(call[1].get("content", ""))
+        for call in adapter.edit_message.call_args_list:
+            all_texts.append(call[1].get("content", ""))
+
+        # Find the text(s) that contain "Thinking" — the finalized version
+        # should not have the cursor.
+        thinking_texts = [t for t in all_texts if "Thinking" in t]
+        assert thinking_texts, "Expected at least one message with 'Thinking'"
+        # The LAST occurrence is the finalized version
+        assert "▉" not in thinking_texts[-1], (
+            f"Cursor found in finalized segment: {thinking_texts[-1]!r}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_multiple_segment_breaks(self):
+        """Multiple tool boundaries create multiple message segments."""
+        adapter = MagicMock()
+        msg_counter = iter(["msg_1", "msg_2", "msg_3"])
+        adapter.send = AsyncMock(
+            side_effect=lambda **kw: SimpleNamespace(success=True, message_id=next(msg_counter))
+        )
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Phase 1")
+        consumer.on_delta(None)  # tool boundary
+        consumer.on_delta("Phase 2")
+        consumer.on_delta(None)  # another tool boundary
+        consumer.on_delta("Phase 3")
+        consumer.finish()
+
+        await consumer.run()
+
+        # Three separate messages
+        assert adapter.send.call_count == 3
+
+    @pytest.mark.asyncio
+    async def test_already_sent_stays_true_after_segment(self):
+        """already_sent remains True after a segment break."""
+        adapter = MagicMock()
+        send_result = SimpleNamespace(success=True, message_id="msg_1")
+        adapter.send = AsyncMock(return_value=send_result)
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=True))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5)
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Text")
+        consumer.on_delta(None)
+        consumer.finish()
+
+        await consumer.run()
+
+        assert consumer.already_sent

From f2c11ff30cd5601a4017cae64cbbeac0a481f5c9 Mon Sep 17 00:00:00 2001
From: Mateus Scheuer Macedo <mestre_y0d4@hotmail.com>
Date: Mon, 6 Apr 2026 22:59:14 -0700
Subject: [PATCH 054/154] fix(delegate): share credential pools with subagents
 + per-task leasing

Cherry-picked from PR #5580 by MestreY0d4-Uninter.

- Share parent's credential pool with child agents for key rotation
- Leasing layer spreads parallel children across keys (least-loaded)
- Thread-safe acquire_lease/release_lease in CredentialPool
- Reverted sneaked-in tool-name restoration change (kept original
  getattr + isinstance guard pattern)
---
 agent/credential_pool.py      |  50 ++++++++++++++
 tests/test_credential_pool.py |  81 +++++++++++++++++++++-
 tests/tools/test_delegate.py  | 122 ++++++++++++++++++++++++++++++++++
 tools/delegate_tool.py        |  58 ++++++++++++++++
 4 files changed, 308 insertions(+), 3 deletions(-)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 472f65f2..f57ae049 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -348,6 +348,9 @@ def get_pool_strategy(provider: str) -> str:
     return STRATEGY_FILL_FIRST
 
 
+DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL = 1
+
+
 class CredentialPool:
     def __init__(self, provider: str, entries: List[PooledCredential]):
         self.provider = provider
@@ -355,6 +358,8 @@ class CredentialPool:
         self._current_id: Optional[str] = None
         self._strategy = get_pool_strategy(provider)
         self._lock = threading.Lock()
+        self._active_leases: Dict[str, int] = {}
+        self._max_concurrent = DEFAULT_MAX_CONCURRENT_PER_CREDENTIAL
 
     def has_credentials(self) -> bool:
         return bool(self._entries)
@@ -760,6 +765,51 @@ class CredentialPool:
                 logger.info("credential pool: rotated to %s", _next_label)
             return next_entry
 
+    def acquire_lease(self, credential_id: Optional[str] = None) -> Optional[str]:
+        """Acquire a soft lease on a credential.
+
+        If a specific credential_id is provided, lease that entry directly.
+        Otherwise prefer the least-leased available credential, using priority as
+        a stable tie-breaker. When every credential is already at the soft cap,
+        still return the least-leased one instead of blocking.
+        """
+        with self._lock:
+            if credential_id:
+                self._active_leases[credential_id] = self._active_leases.get(credential_id, 0) + 1
+                self._current_id = credential_id
+                return credential_id
+
+            available = self._available_entries(clear_expired=True, refresh=True)
+            if not available:
+                return None
+
+            below_cap = [
+                entry for entry in available
+                if self._active_leases.get(entry.id, 0) < self._max_concurrent
+            ]
+            candidates = below_cap if below_cap else available
+            chosen = min(
+                candidates,
+                key=lambda entry: (self._active_leases.get(entry.id, 0), entry.priority),
+            )
+            self._active_leases[chosen.id] = self._active_leases.get(chosen.id, 0) + 1
+            self._current_id = chosen.id
+            return chosen.id
+
+    def release_lease(self, credential_id: str) -> None:
+        """Release a previously acquired credential lease."""
+        with self._lock:
+            count = self._active_leases.get(credential_id, 0)
+            if count <= 1:
+                self._active_leases.pop(credential_id, None)
+            else:
+                self._active_leases[credential_id] = count - 1
+
+    def active_lease_count(self, credential_id: str) -> int:
+        """Return the number of active leases for a credential."""
+        with self._lock:
+            return self._active_leases.get(credential_id, 0)
+
     def try_refresh_current(self) -> Optional[PooledCredential]:
         with self._lock:
             return self._try_refresh_current_unlocked()
diff --git a/tests/test_credential_pool.py b/tests/test_credential_pool.py
index ff6e037b..891ab68a 100644
--- a/tests/test_credential_pool.py
+++ b/tests/test_credential_pool.py
@@ -947,7 +947,7 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
                         "auth_type": "api_key",
                         "priority": 0,
                         "source": "manual",
-                        "access_token": "sk-ant-xxx",
+                        "access_token": "***",
                     }
                 ],
                 "custom:together.ai": [
@@ -957,7 +957,7 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
                         "auth_type": "api_key",
                         "priority": 0,
                         "source": "manual",
-                        "access_token": "sk-tog-xxx",
+                        "access_token": "***",
                     }
                 ],
                 "custom:fireworks": [
@@ -967,7 +967,7 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
                         "auth_type": "api_key",
                         "priority": 0,
                         "source": "manual",
-                        "access_token": "sk-fw-xxx",
+                        "access_token": "***",
                     }
                 ],
                 "custom:empty": [],
@@ -980,3 +980,78 @@ def test_list_custom_pool_providers(tmp_path, monkeypatch):
     result = list_custom_pool_providers()
     assert result == ["custom:fireworks", "custom:together.ai"]
     # "custom:empty" not included because it's empty
+
+
+
+def test_acquire_lease_prefers_unleased_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "api_key",
+                        "priority": 1,
+                        "source": "manual",
+                        "access_token": "***",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    first = pool.acquire_lease()
+    second = pool.acquire_lease()
+
+    assert first == "cred-1"
+    assert second == "cred-2"
+    assert pool.active_lease_count("cred-1") == 1
+    assert pool.active_lease_count("cred-2") == 1
+
+
+
+def test_release_lease_decrements_counter(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openrouter": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "api_key",
+                        "priority": 0,
+                        "source": "manual",
+                        "access_token": "***",
+                    }
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    pool = load_pool("openrouter")
+    leased = pool.acquire_lease()
+    assert leased == "cred-1"
+    assert pool.active_lease_count("cred-1") == 1
+
+    pool.release_lease("cred-1")
+    assert pool.active_lease_count("cred-1") == 0
diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py
index 0e5e63a7..ebdf60d2 100644
--- a/tests/tools/test_delegate.py
+++ b/tests/tools/test_delegate.py
@@ -26,6 +26,7 @@ from tools.delegate_tool import (
     _build_child_agent,
     _build_child_system_prompt,
     _strip_blocked_tools,
+    _resolve_child_credential_pool,
     _resolve_delegation_credentials,
 )
 
@@ -930,5 +931,126 @@ class TestDelegationProviderIntegration(unittest.TestCase):
             self.assertEqual(kwargs["base_url"], parent.base_url)
 
 
+class TestChildCredentialPoolResolution(unittest.TestCase):
+    def test_same_provider_shares_parent_pool(self):
+        parent = _make_mock_parent()
+        mock_pool = MagicMock()
+        parent._credential_pool = mock_pool
+
+        result = _resolve_child_credential_pool("openrouter", parent)
+        self.assertIs(result, mock_pool)
+
+    def test_no_provider_inherits_parent_pool(self):
+        parent = _make_mock_parent()
+        mock_pool = MagicMock()
+        parent._credential_pool = mock_pool
+
+        result = _resolve_child_credential_pool(None, parent)
+        self.assertIs(result, mock_pool)
+
+    def test_different_provider_loads_own_pool(self):
+        parent = _make_mock_parent()
+        parent._credential_pool = MagicMock()
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = True
+
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool):
+            result = _resolve_child_credential_pool("anthropic", parent)
+
+        self.assertIs(result, mock_pool)
+
+    def test_different_provider_empty_pool_returns_none(self):
+        parent = _make_mock_parent()
+        parent._credential_pool = MagicMock()
+        mock_pool = MagicMock()
+        mock_pool.has_credentials.return_value = False
+
+        with patch("agent.credential_pool.load_pool", return_value=mock_pool):
+            result = _resolve_child_credential_pool("anthropic", parent)
+
+        self.assertIsNone(result)
+
+    def test_different_provider_load_failure_returns_none(self):
+        parent = _make_mock_parent()
+        parent._credential_pool = MagicMock()
+
+        with patch("agent.credential_pool.load_pool", side_effect=Exception("disk error")):
+            result = _resolve_child_credential_pool("anthropic", parent)
+
+        self.assertIsNone(result)
+
+    def test_build_child_agent_assigns_parent_pool_when_shared(self):
+        parent = _make_mock_parent()
+        mock_pool = MagicMock()
+        parent._credential_pool = mock_pool
+
+        with patch("run_agent.AIAgent") as MockAgent:
+            mock_child = MagicMock()
+            MockAgent.return_value = mock_child
+
+            _build_child_agent(
+                task_index=0,
+                goal="Test pool assignment",
+                context=None,
+                toolsets=["terminal"],
+                model=None,
+                max_iterations=10,
+                parent_agent=parent,
+            )
+
+            self.assertEqual(mock_child._credential_pool, mock_pool)
+
+
+class TestChildCredentialLeasing(unittest.TestCase):
+    def test_run_single_child_acquires_and_releases_lease(self):
+        from tools.delegate_tool import _run_single_child
+
+        leased_entry = MagicMock()
+        leased_entry.id = "cred-b"
+
+        child = MagicMock()
+        child._credential_pool = MagicMock()
+        child._credential_pool.acquire_lease.return_value = "cred-b"
+        child._credential_pool.current.return_value = leased_entry
+        child.run_conversation.return_value = {
+            "final_response": "done",
+            "completed": True,
+            "interrupted": False,
+            "api_calls": 1,
+            "messages": [],
+        }
+
+        result = _run_single_child(
+            task_index=0,
+            goal="Investigate rate limits",
+            child=child,
+            parent_agent=_make_mock_parent(),
+        )
+
+        self.assertEqual(result["status"], "completed")
+        child._credential_pool.acquire_lease.assert_called_once_with()
+        child._swap_credential.assert_called_once_with(leased_entry)
+        child._credential_pool.release_lease.assert_called_once_with("cred-b")
+
+    def test_run_single_child_releases_lease_after_failure(self):
+        from tools.delegate_tool import _run_single_child
+
+        child = MagicMock()
+        child._credential_pool = MagicMock()
+        child._credential_pool.acquire_lease.return_value = "cred-a"
+        child._credential_pool.current.return_value = MagicMock(id="cred-a")
+        child.run_conversation.side_effect = RuntimeError("boom")
+
+        result = _run_single_child(
+            task_index=1,
+            goal="Trigger failure",
+            child=child,
+            parent_agent=_make_mock_parent(),
+        )
+
+        self.assertEqual(result["status"], "error")
+        child._credential_pool.release_lease.assert_called_once_with("cred-a")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 28ffc795..9cae3ddd 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -279,6 +279,12 @@ def _build_child_agent(
     # Set delegation depth so children can't spawn grandchildren
     child._delegate_depth = getattr(parent_agent, '_delegate_depth', 0) + 1
 
+    # Share a credential pool with the child when possible so subagents can
+    # rotate credentials on rate limits instead of getting pinned to one key.
+    child_pool = _resolve_child_credential_pool(effective_provider, parent_agent)
+    if child_pool is not None:
+        child._credential_pool = child_pool
+
     # Register child for interrupt propagation
     if hasattr(parent_agent, '_active_children'):
         lock = getattr(parent_agent, '_active_children_lock', None)
@@ -312,6 +318,18 @@ def _run_single_child(
     _saved_tool_names = getattr(child, "_delegate_saved_tool_names",
                                 list(model_tools._last_resolved_tool_names))
 
+    child_pool = getattr(child, '_credential_pool', None)
+    leased_cred_id = None
+    if child_pool is not None:
+        leased_cred_id = child_pool.acquire_lease()
+        if leased_cred_id is not None:
+            try:
+                leased_entry = child_pool.current()
+                if leased_entry is not None and hasattr(child, '_swap_credential'):
+                    child._swap_credential(leased_entry)
+            except Exception as exc:
+                logger.debug("Failed to bind child to leased credential: %s", exc)
+
     try:
         result = child.run_conversation(user_message=goal)
 
@@ -422,6 +440,12 @@ def _run_single_child(
         }
 
     finally:
+        if child_pool is not None and leased_cred_id is not None:
+            try:
+                child_pool.release_lease(leased_cred_id)
+            except Exception as exc:
+                logger.debug("Failed to release credential lease: %s", exc)
+
         # Restore the parent's tool names so the process-global is correct
         # for any subsequent execute_code calls or other consumers.
         import model_tools
@@ -430,6 +454,8 @@ def _run_single_child(
         if isinstance(saved_tool_names, list):
             model_tools._last_resolved_tool_names = list(saved_tool_names)
 
+        # Remove child from active tracking
+
         # Unregister child from interrupt propagation
         if hasattr(parent_agent, '_active_children'):
             try:
@@ -626,6 +652,38 @@ def delegate_task(
     }, ensure_ascii=False)
 
 
+def _resolve_child_credential_pool(effective_provider: Optional[str], parent_agent):
+    """Resolve a credential pool for the child agent.
+
+    Rules:
+    1. Same provider as the parent -> share the parent's pool so cooldown state
+       and rotation stay synchronized.
+    2. Different provider -> try to load that provider's own pool.
+    3. No pool available -> return None and let the child keep the inherited
+       fixed credential behavior.
+    """
+    if not effective_provider:
+        return getattr(parent_agent, "_credential_pool", None)
+
+    parent_provider = getattr(parent_agent, "provider", None) or ""
+    parent_pool = getattr(parent_agent, "_credential_pool", None)
+    if parent_pool is not None and effective_provider == parent_provider:
+        return parent_pool
+
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool(effective_provider)
+        if pool is not None and pool.has_credentials():
+            return pool
+    except Exception as exc:
+        logger.debug(
+            "Could not load credential pool for child provider '%s': %s",
+            effective_provider,
+            exc,
+        )
+    return None
+
+
 def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict:
     """Resolve credentials for subagent delegation.
 

From c706568993ab51d18a15acc5cb2dc8ec2b54abfc Mon Sep 17 00:00:00 2001
From: Mateus Scheuer Macedo <mestre_y0d4@hotmail.com>
Date: Mon, 6 Apr 2026 23:00:26 -0700
Subject: [PATCH 055/154] fix(delegate): pass workspace path hints to child
 agents

Selectively cherry-picked from PR #5501 by MestreY0d4-Uninter.

- Add _resolve_workspace_hint() to detect parent's working directory
- Inject WORKSPACE PATH into child system prompts
- Add rule: never assume /workspace/ container paths
- Excludes the cli.py queue-busy-input changes from the original PR
---
 tools/delegate_tool.py | 43 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index 9cae3ddd..ad9b54c4 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -45,7 +45,12 @@ def check_delegate_requirements() -> bool:
     return True
 
 
-def _build_child_system_prompt(goal: str, context: Optional[str] = None) -> str:
+def _build_child_system_prompt(
+    goal: str,
+    context: Optional[str] = None,
+    *,
+    workspace_path: Optional[str] = None,
+) -> str:
     """Build a focused system prompt for a child agent."""
     parts = [
         "You are a focused subagent working on a specific delegated task.",
@@ -54,6 +59,12 @@ def _build_child_system_prompt(goal: str, context: Optional[str] = None) -> str:
     ]
     if context and context.strip():
         parts.append(f"\nCONTEXT:\n{context}")
+    if workspace_path and str(workspace_path).strip():
+        parts.append(
+            "\nWORKSPACE PATH:\n"
+            f"{workspace_path}\n"
+            "Use this exact path for local repository/workdir operations unless the task explicitly says otherwise."
+        )
     parts.append(
         "\nComplete this task using the tools available to you. "
         "When finished, provide a clear, concise summary of:\n"
@@ -61,12 +72,39 @@ def _build_child_system_prompt(goal: str, context: Optional[str] = None) -> str:
         "- What you found or accomplished\n"
         "- Any files you created or modified\n"
         "- Any issues encountered\n\n"
+        "Important workspace rule: Never assume a repository lives at /workspace/... or any other container-style path unless the task/context explicitly gives that path. "
+        "If no exact local path is provided, discover it first before issuing git/workdir-specific commands.\n\n"
         "Be thorough but concise -- your response is returned to the "
         "parent agent as a summary."
     )
     return "\n".join(parts)
 
 
+def _resolve_workspace_hint(parent_agent) -> Optional[str]:
+    """Best-effort local workspace hint for child prompts.
+
+    We only inject a path when we have a concrete absolute directory. This avoids
+    teaching subagents a fake container path while still helping them avoid
+    guessing `/workspace/...` for local repo tasks.
+    """
+    candidates = [
+        os.getenv("TERMINAL_CWD"),
+        getattr(getattr(parent_agent, "_subdirectory_hints", None), "working_dir", None),
+        getattr(parent_agent, "terminal_cwd", None),
+        getattr(parent_agent, "cwd", None),
+    ]
+    for candidate in candidates:
+        if not candidate:
+            continue
+        try:
+            text = os.path.abspath(os.path.expanduser(str(candidate)))
+        except Exception:
+            continue
+        if os.path.isabs(text) and os.path.isdir(text):
+            return text
+    return None
+
+
 def _strip_blocked_tools(toolsets: List[str]) -> List[str]:
     """Remove toolsets that contain only blocked tools."""
     blocked_toolset_names = {
@@ -210,7 +248,8 @@ def _build_child_agent(
     else:
         child_toolsets = _strip_blocked_tools(DEFAULT_TOOLSETS)
 
-    child_prompt = _build_child_system_prompt(goal, context)
+    workspace_hint = _resolve_workspace_hint(parent_agent)
+    child_prompt = _build_child_system_prompt(goal, context, workspace_path=workspace_hint)
     # Extract parent's API key so subagents inherit auth (e.g. Nous Portal).
     parent_api_key = getattr(parent_agent, "api_key", None)
     if (not parent_api_key) and hasattr(parent_agent, "_client_kwargs"):

From 8e64f795a1d1427be93059adfd81a32de78284b7 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 23:01:43 -0700
Subject: [PATCH 056/154] fix: stale OAuth credentials block OpenRouter users
 on auto-detect (#5746)

When resolve_runtime_provider is called with requested='auto' and
auth.json has a stale active_provider (nous or openai-codex) whose
OAuth refresh token has been revoked, the AuthError now falls through
to the next provider in the chain (e.g. OpenRouter via env vars)
instead of propagating to the user as a blocking error.

When the user explicitly requested the OAuth provider, the error
still propagates so they know to re-authenticate.

Root cause: resolve_provider('auto') checks auth.json for an active
OAuth provider before checking env vars. get_nous_auth_status()
reports logged_in=True if any access_token exists (even expired),
so the Nous path is taken. resolve_nous_runtime_credentials() then
tries to refresh the token, fails with 'Refresh session has been
revoked', and the AuthError bubbles up to the CLI bold-red display.

Adds 3 tests: Nous fallthrough, Codex fallthrough, explicit-request
still raises.
---
 hermes_cli/runtime_provider.py            | 62 ++++++++++-------
 tests/test_runtime_provider_resolution.py | 83 +++++++++++++++++++++++
 2 files changed, 122 insertions(+), 23 deletions(-)

diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 8ed60191..9c82ef62 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -639,31 +639,47 @@ def resolve_runtime_provider(
             )
 
     if provider == "nous":
-        creds = resolve_nous_runtime_credentials(
-            min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
-            timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-        )
-        return {
-            "provider": "nous",
-            "api_mode": "chat_completions",
-            "base_url": creds.get("base_url", "").rstrip("/"),
-            "api_key": creds.get("api_key", ""),
-            "source": creds.get("source", "portal"),
-            "expires_at": creds.get("expires_at"),
-            "requested_provider": requested_provider,
-        }
+        try:
+            creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            )
+            return {
+                "provider": "nous",
+                "api_mode": "chat_completions",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "portal"),
+                "expires_at": creds.get("expires_at"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            # Auto-detected Nous but credentials are stale/revoked —
+            # fall through to env-var providers (e.g. OpenRouter).
+            logger.info("Auto-detected Nous provider but credentials failed; "
+                        "falling through to next provider.")
 
     if provider == "openai-codex":
-        creds = resolve_codex_runtime_credentials()
-        return {
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "base_url": creds.get("base_url", "").rstrip("/"),
-            "api_key": creds.get("api_key", ""),
-            "source": creds.get("source", "hermes-auth-store"),
-            "last_refresh": creds.get("last_refresh"),
-            "requested_provider": requested_provider,
-        }
+        try:
+            creds = resolve_codex_runtime_credentials()
+            return {
+                "provider": "openai-codex",
+                "api_mode": "codex_responses",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "hermes-auth-store"),
+                "last_refresh": creds.get("last_refresh"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            # Auto-detected Codex but credentials are stale/revoked —
+            # fall through to env-var providers (e.g. OpenRouter).
+            logger.info("Auto-detected Codex provider but credentials failed; "
+                        "falling through to next provider.")
 
     if provider == "copilot-acp":
         creds = resolve_external_process_provider_credentials(provider)
diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py
index 11604704..ded0c920 100644
--- a/tests/test_runtime_provider_resolution.py
+++ b/tests/test_runtime_provider_resolution.py
@@ -996,6 +996,89 @@ def test_custom_provider_no_key_gets_placeholder(monkeypatch):
     assert resolved["base_url"] == "http://localhost:8080/v1"
 
 
+def test_auto_detected_nous_auth_failure_falls_through_to_openrouter(monkeypatch):
+    """When auto-detect picks Nous but credentials are revoked, fall through to OpenRouter."""
+    from hermes_cli.auth import AuthError
+
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    # resolve_provider returns "nous" (stale active_provider in auth.json)
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
+    # load_pool returns empty pool so we hit the direct credential resolution
+    monkeypatch.setattr(rp, "load_pool", lambda p: type("P", (), {
+        "has_credentials": lambda self: False,
+    })())
+    # Nous credential resolution fails with revoked token
+    monkeypatch.setattr(
+        rp, "resolve_nous_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(
+            AuthError("Refresh session has been revoked",
+                      provider="nous", code="invalid_grant", relogin_required=True)
+        ),
+    )
+
+    # With requested="auto", should fall through to OpenRouter
+    resolved = rp.resolve_runtime_provider(requested="auto")
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_key"] == "test-or-key"
+
+
+def test_auto_detected_codex_auth_failure_falls_through_to_openrouter(monkeypatch):
+    """When auto-detect picks Codex but credentials are revoked, fall through to OpenRouter."""
+    from hermes_cli.auth import AuthError
+
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
+    monkeypatch.setattr(rp, "load_pool", lambda p: type("P", (), {
+        "has_credentials": lambda self: False,
+    })())
+    monkeypatch.setattr(
+        rp, "resolve_codex_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(
+            AuthError("Codex token refresh failed: session revoked",
+                      provider="openai-codex", code="invalid_grant", relogin_required=True)
+        ),
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="auto")
+    assert resolved["provider"] == "openrouter"
+    assert resolved["api_key"] == "test-or-key"
+
+
+def test_explicit_nous_auth_failure_still_raises(monkeypatch):
+    """When user explicitly requests Nous and auth fails, the error should propagate."""
+    from hermes_cli.auth import AuthError
+    import pytest
+
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous")
+    monkeypatch.setattr(rp, "load_pool", lambda p: type("P", (), {
+        "has_credentials": lambda self: False,
+    })())
+    monkeypatch.setattr(
+        rp, "resolve_nous_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(
+            AuthError("Refresh session has been revoked",
+                      provider="nous", code="invalid_grant", relogin_required=True)
+        ),
+    )
+
+    # With explicit "nous", should raise — don't silently switch providers
+    with pytest.raises(AuthError, match="Refresh session has been revoked"):
+        rp.resolve_runtime_provider(requested="nous")
+
+
 def test_openrouter_provider_not_affected_by_custom_fix(monkeypatch):
     """Fixing custom must not change openrouter behavior."""
     monkeypatch.delenv("OPENAI_API_KEY", raising=False)

From 2b79569a07aae2b0fb7a04e7a054278b51c3ac26 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 23:06:33 -0700
Subject: [PATCH 057/154] fix(discord): remove default selection from model
 picker provider dropdown

Discord doesn't fire the select callback when clicking an already-selected
default option (no change detected). This prevented users from selecting
the current provider to browse its models. The 'current' indicator is
already shown via the description field.
---
 gateway/platforms/discord.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 5e1be74a..83ea2694 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -2637,7 +2637,6 @@ if DISCORD_AVAILABLE:
                     discord.SelectOption(
                         label=label[:100],
                         value=p["slug"],
-                        default=bool(p.get("is_current")),
                         description=desc,
                     )
                 )

From 3bc2fe802e81a337728b48ed8d39b5c5cbc453db Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Mon, 6 Apr 2026 23:10:40 -0700
Subject: [PATCH 058/154] feat(telegram): paginated model picker with Next/Prev
 navigation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Raise max_models from 8 to 50 so all curated models come through
- Add _build_model_keyboard() helper with 8-per-page pagination
- Next ▶ / ◀ Prev buttons with page counter (e.g. 2/4)
- mg:<page> callback data for page navigation
- Catch-all query.answer() for noop buttons
---
 gateway/platforms/telegram.py | 108 +++++++++++++++++++++++++++-------
 gateway/run.py                |   2 +-
 2 files changed, 88 insertions(+), 22 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 0362b9f9..355bf3ae 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1083,10 +1083,53 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.warning("[%s] send_model_picker failed: %s", self.name, e)
             return SendResult(success=False, error=str(e))
 
+    _MODEL_PAGE_SIZE = 8
+
+    def _build_model_keyboard(self, models: list, page: int) -> tuple:
+        """Build paginated model buttons. Returns (keyboard, page_info_text)."""
+        page_size = self._MODEL_PAGE_SIZE
+        total = len(models)
+        total_pages = max(1, (total + page_size - 1) // page_size)
+        page = max(0, min(page, total_pages - 1))
+
+        start = page * page_size
+        end = min(start + page_size, total)
+        page_models = models[start:end]
+
+        buttons: list = []
+        for i, model_id in enumerate(page_models):
+            abs_idx = start + i
+            short = model_id.split("/")[-1] if "/" in model_id else model_id
+            if len(short) > 38:
+                short = short[:35] + "..."
+            buttons.append(
+                InlineKeyboardButton(short, callback_data=f"mm:{abs_idx}")
+            )
+
+        rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
+
+        # Pagination row (if needed)
+        if total_pages > 1:
+            nav: list = []
+            if page > 0:
+                nav.append(InlineKeyboardButton("◀ Prev", callback_data=f"mg:{page - 1}"))
+            nav.append(InlineKeyboardButton(f"{page + 1}/{total_pages}", callback_data="mx:noop"))
+            if page < total_pages - 1:
+                nav.append(InlineKeyboardButton("Next ▶", callback_data=f"mg:{page + 1}"))
+            rows.append(nav)
+
+        rows.append([
+            InlineKeyboardButton("◀ Back", callback_data="mb"),
+            InlineKeyboardButton("✗ Cancel", callback_data="mx"),
+        ])
+
+        page_info = f" ({start + 1}–{end} of {total})" if total_pages > 1 else ""
+        return InlineKeyboardMarkup(rows), page_info
+
     async def _handle_model_picker_callback(
         self, query, data: str, chat_id: str
     ) -> None:
-        """Handle model picker inline keyboard callbacks (mp:/mm:/mb:/mx:)."""
+        """Handle model picker inline keyboard callbacks (mp:/mm:/mb:/mx:/mg:)."""
         state = self._model_picker_state.get(chat_id)
         if not state:
             await query.answer(text="Picker expired — use /model again.")
@@ -1099,7 +1142,7 @@ class TelegramAdapter(BasePlatformAdapter):
                 return slug
 
         if data.startswith("mp:"):
-            # --- Provider selected: show model buttons ---
+            # --- Provider selected: show model buttons (page 0) ---
             provider_slug = data[3:]
             provider = next(
                 (p for p in state["providers"] if p["slug"] == provider_slug),
@@ -1113,24 +1156,9 @@ class TelegramAdapter(BasePlatformAdapter):
             state["selected_provider"] = provider_slug
             state["selected_provider_name"] = provider.get("name", provider_slug)
             state["model_list"] = models
+            state["model_page"] = 0
 
-            buttons: list = []
-            for i, model_id in enumerate(models):
-                # Short display label: strip vendor prefix
-                short = model_id.split("/")[-1] if "/" in model_id else model_id
-                # Truncate long model names for button label (max ~40 chars)
-                if len(short) > 38:
-                    short = short[:35] + "..."
-                buttons.append(
-                    InlineKeyboardButton(short, callback_data=f"mm:{i}")
-                )
-
-            rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
-            rows.append([
-                InlineKeyboardButton("◀ Back", callback_data="mb"),
-                InlineKeyboardButton("✗ Cancel", callback_data="mx"),
-            ])
-            keyboard = InlineKeyboardMarkup(rows)
+            keyboard, page_info = self._build_model_keyboard(models, 0)
 
             pname = provider.get("name", provider_slug)
             total = provider.get("total_models", len(models))
@@ -1140,7 +1168,41 @@ class TelegramAdapter(BasePlatformAdapter):
             await query.edit_message_text(
                 text=(
                     f"⚙ *Model Configuration*\n\n"
-                    f"Provider: *{pname}*\n"
+                    f"Provider: *{pname}*{page_info}\n"
+                    f"Select a model:{extra}"
+                ),
+                parse_mode=ParseMode.MARKDOWN,
+                reply_markup=keyboard,
+            )
+            await query.answer()
+
+        elif data.startswith("mg:"):
+            # --- Page navigation ---
+            try:
+                page = int(data[3:])
+            except ValueError:
+                await query.answer(text="Invalid page.")
+                return
+
+            models = state.get("model_list", [])
+            state["model_page"] = page
+
+            keyboard, page_info = self._build_model_keyboard(models, page)
+
+            pname = state.get("selected_provider_name", "")
+            provider_slug = state.get("selected_provider", "")
+            provider = next(
+                (p for p in state["providers"] if p["slug"] == provider_slug),
+                None,
+            )
+            total = provider.get("total_models", len(models)) if provider else len(models)
+            shown = len(models)
+            extra = f"\n_{total - shown} more available — type `/model <name>` directly_" if total > shown else ""
+
+            await query.edit_message_text(
+                text=(
+                    f"⚙ *Model Configuration*\n\n"
+                    f"Provider: *{pname}*{page_info}\n"
                     f"Select a model:{extra}"
                 ),
                 parse_mode=ParseMode.MARKDOWN,
@@ -1239,6 +1301,10 @@ class TelegramAdapter(BasePlatformAdapter):
             )
             await query.answer()
 
+        else:
+            # Catch-all (e.g. page counter button "mx:noop")
+            await query.answer()
+
     async def _handle_callback_query(
         self, update: "Update", context: "ContextTypes.DEFAULT_TYPE"
     ) -> None:
@@ -1249,7 +1315,7 @@ class TelegramAdapter(BasePlatformAdapter):
         data = query.data
 
         # --- Model picker callbacks ---
-        if data.startswith(("mp:", "mm:", "mb", "mx")):
+        if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
             chat_id = str(query.message.chat_id) if query.message else None
             if chat_id:
                 await self._handle_model_picker_callback(query, data, chat_id)
diff --git a/gateway/run.py b/gateway/run.py
index 08be2b9d..0030f434 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3530,7 +3530,7 @@ class GatewayRunner:
                     providers = list_authenticated_providers(
                         current_provider=current_provider,
                         user_providers=user_provs,
-                        max_models=8,
+                        max_models=50,
                     )
                 except Exception:
                     providers = []

From f609bf277db4d174e16d9f872f5aae25461bc0a6 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Mon, 6 Apr 2026 23:59:26 -0700
Subject: [PATCH 059/154] feat: update blogwatcher skill to JulienTant's fork
 (#5759)

Replace Hyaxia/blogwatcher with JulienTant/blogwatcher-cli fork which adds:
- Docker support with BLOGWATCHER_DB env var for persistent storage
- SQL injection prevention
- SSRF protection (blocks private IPs/metadata endpoints)
- HTML scraping fallback when RSS unavailable
- OPML import from Feedly/Inoreader/NewsBlur
- Category filtering for articles
- Direct binary downloads (no Go required)
- Migration guide from original blogwatcher

Binary name changed: blogwatcher -> blogwatcher-cli

Community contribution by Ao (JulienTant).
Closes discussion about Docker compatibility.
---
 skills/research/blogwatcher/SKILL.md | 118 ++++++++++++++++++++++-----
 1 file changed, 99 insertions(+), 19 deletions(-)

diff --git a/skills/research/blogwatcher/SKILL.md b/skills/research/blogwatcher/SKILL.md
index c1ea4ac2..bfcc4f1d 100644
--- a/skills/research/blogwatcher/SKILL.md
+++ b/skills/research/blogwatcher/SKILL.md
@@ -1,48 +1,106 @@
 ---
 name: blogwatcher
-description: Monitor blogs and RSS/Atom feeds for updates using the blogwatcher CLI. Add blogs, scan for new articles, and track what you've read.
-version: 1.0.0
-author: community
+description: Monitor blogs and RSS/Atom feeds for updates using the blogwatcher-cli tool. Add blogs, scan for new articles, track read status, and filter by category.
+version: 2.0.0
+author: JulienTant (fork of Hyaxia/blogwatcher)
 license: MIT
 metadata:
   hermes:
     tags: [RSS, Blogs, Feed-Reader, Monitoring]
-    homepage: https://github.com/Hyaxia/blogwatcher
+    homepage: https://github.com/JulienTant/blogwatcher-cli
 prerequisites:
-  commands: [blogwatcher]
+  commands: [blogwatcher-cli]
 ---
 
 # Blogwatcher
 
-Track blog and RSS/Atom feed updates with the `blogwatcher` CLI.
+Track blog and RSS/Atom feed updates with the `blogwatcher-cli` tool. Supports automatic feed discovery, HTML scraping fallback, OPML import, and read/unread article management.
 
-## Prerequisites
+## Installation
 
-- Go installed (`go version` to check)
-- Install: `go install github.com/Hyaxia/blogwatcher/cmd/blogwatcher@latest`
+Pick one method:
+
+- **Go:** `go install github.com/JulienTant/blogwatcher-cli/cmd/blogwatcher-cli@latest`
+- **Docker:** `docker run --rm -v blogwatcher-cli:/data ghcr.io/julientant/blogwatcher-cli`
+- **Binary (Linux amd64):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_linux_amd64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+- **Binary (Linux arm64):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_linux_arm64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+- **Binary (macOS Apple Silicon):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_darwin_arm64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+- **Binary (macOS Intel):** `curl -sL https://github.com/JulienTant/blogwatcher-cli/releases/latest/download/blogwatcher-cli_darwin_amd64.tar.gz | tar xz -C /usr/local/bin blogwatcher-cli`
+
+All releases: https://github.com/JulienTant/blogwatcher-cli/releases
+
+### Docker with persistent storage
+
+By default the database lives at `~/.blogwatcher-cli/blogwatcher-cli.db`. In Docker this is lost on container restart. Use `BLOGWATCHER_DB` or a volume mount to persist it:
+
+```bash
+# Named volume (simplest)
+docker run --rm -v blogwatcher-cli:/data -e BLOGWATCHER_DB=/data/blogwatcher-cli.db ghcr.io/julientant/blogwatcher-cli scan
+
+# Host bind mount
+docker run --rm -v /path/on/host:/data -e BLOGWATCHER_DB=/data/blogwatcher-cli.db ghcr.io/julientant/blogwatcher-cli scan
+```
+
+### Migrating from the original blogwatcher
+
+If upgrading from `Hyaxia/blogwatcher`, move your database:
+
+```bash
+mv ~/.blogwatcher/blogwatcher.db ~/.blogwatcher-cli/blogwatcher-cli.db
+```
+
+The binary name changed from `blogwatcher` to `blogwatcher-cli`.
 
 ## Common Commands
 
-- Add a blog: `blogwatcher add "My Blog" https://example.com`
-- List blogs: `blogwatcher blogs`
-- Scan for updates: `blogwatcher scan`
-- List articles: `blogwatcher articles`
-- Mark an article read: `blogwatcher read 1`
-- Mark all articles read: `blogwatcher read-all`
-- Remove a blog: `blogwatcher remove "My Blog"`
+### Managing blogs
+
+- Add a blog: `blogwatcher-cli add "My Blog" https://example.com`
+- Add with explicit feed: `blogwatcher-cli add "My Blog" https://example.com --feed-url https://example.com/feed.xml`
+- Add with HTML scraping: `blogwatcher-cli add "My Blog" https://example.com --scrape-selector "article h2 a"`
+- List tracked blogs: `blogwatcher-cli blogs`
+- Remove a blog: `blogwatcher-cli remove "My Blog" --yes`
+- Import from OPML: `blogwatcher-cli import subscriptions.opml`
+
+### Scanning and reading
+
+- Scan all blogs: `blogwatcher-cli scan`
+- Scan one blog: `blogwatcher-cli scan "My Blog"`
+- List unread articles: `blogwatcher-cli articles`
+- List all articles: `blogwatcher-cli articles --all`
+- Filter by blog: `blogwatcher-cli articles --blog "My Blog"`
+- Filter by category: `blogwatcher-cli articles --category "Engineering"`
+- Mark article read: `blogwatcher-cli read 1`
+- Mark article unread: `blogwatcher-cli unread 1`
+- Mark all read: `blogwatcher-cli read-all`
+- Mark all read for a blog: `blogwatcher-cli read-all --blog "My Blog" --yes`
+
+## Environment Variables
+
+All flags can be set via environment variables with the `BLOGWATCHER_` prefix:
+
+| Variable | Description |
+|---|---|
+| `BLOGWATCHER_DB` | Path to SQLite database file |
+| `BLOGWATCHER_WORKERS` | Number of concurrent scan workers (default: 8) |
+| `BLOGWATCHER_SILENT` | Only output "scan done" when scanning |
+| `BLOGWATCHER_YES` | Skip confirmation prompts |
+| `BLOGWATCHER_CATEGORY` | Default filter for articles by category |
 
 ## Example Output
 
 ```
-$ blogwatcher blogs
+$ blogwatcher-cli blogs
 Tracked blogs (1):
 
   xkcd
     URL: https://xkcd.com
+    Feed: https://xkcd.com/atom.xml
+    Last scanned: 2026-04-03 10:30
 ```
 
 ```
-$ blogwatcher scan
+$ blogwatcher-cli scan
 Scanning 1 blog(s)...
 
   xkcd
@@ -51,6 +109,28 @@ Scanning 1 blog(s)...
 Found 4 new article(s) total!
 ```
 
+```
+$ blogwatcher-cli articles
+Unread articles (2):
+
+  [1] [new] Barrel - Part 13
+       Blog: xkcd
+       URL: https://xkcd.com/3095/
+       Published: 2026-04-02
+       Categories: Comics, Science
+
+  [2] [new] Volcano Fact
+       Blog: xkcd
+       URL: https://xkcd.com/3094/
+       Published: 2026-04-01
+       Categories: Comics
+```
+
 ## Notes
 
-- Use `blogwatcher <command> --help` to discover flags and options.
+- Auto-discovers RSS/Atom feeds from blog homepages when no `--feed-url` is provided.
+- Falls back to HTML scraping if RSS fails and `--scrape-selector` is configured.
+- Categories from RSS/Atom feeds are stored and can be used to filter articles.
+- Import blogs in bulk from OPML files exported by Feedly, Inoreader, NewsBlur, etc.
+- Database stored at `~/.blogwatcher-cli/blogwatcher-cli.db` by default (override with `--db` or `BLOGWATCHER_DB`).
+- Use `blogwatcher-cli <command> --help` to discover all flags and options.

From 9e844160f9b6e6485abe4294cb1962d7ef7813e3 Mon Sep 17 00:00:00 2001
From: Yang Zhi <yangzhi.see@gmail.com>
Date: Tue, 7 Apr 2026 00:52:29 +0800
Subject: [PATCH 060/154] fix(credential_pool): auto-detect Z.AI endpoint via
 probe and cache

The credential pool seeder and runtime credential resolver hardcoded
api.z.ai/api/paas/v4 for all Z.AI keys.  Keys on the Coding Plan (or CN
endpoint) would hit the wrong endpoint, causing 401/429 errors on the
first request even though a working endpoint exists.

Add _resolve_zai_base_url() that:
- Respects GLM_BASE_URL env var (no probe when explicitly set)
- Probes all candidate endpoints (global, cn, coding-global, coding-cn)
  via detect_zai_endpoint() to find one that returns HTTP 200
- Caches the detected endpoint in provider state (auth.json) keyed on
  a SHA-256 hash of the API key so subsequent starts skip the probe
- Falls back to the default URL if all probes fail

Wire into both _seed_from_env() in the credential pool and
resolve_api_key_provider_credentials() in the runtime resolver,
matching the pattern from the kimi-coding fix (PR #5566).

Fixes the same class of bug as #5561 but for the zai provider.
---
 agent/credential_pool.py        |  3 ++
 hermes_cli/auth.py              | 43 +++++++++++++++++++++++++++
 tests/test_api_key_providers.py | 52 ++++++++++++++++++++++++++++++++-
 3 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index f57ae049..144a9101 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -27,6 +27,7 @@ from hermes_cli.auth import (
     _is_expiring,
     _load_auth_store,
     _load_provider_state,
+    _resolve_zai_base_url,
     read_credential_pool,
     write_credential_pool,
 )
@@ -1086,6 +1087,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
         active_sources.add(source)
         auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
         base_url = env_url or pconfig.inference_base_url
+        if provider == "zai":
+            base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
         changed |= _upsert_entry(
             entries,
             provider,
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 588d06d4..23119c66 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -404,6 +404,47 @@ def detect_zai_endpoint(api_key: str, timeout: float = 8.0) -> Optional[Dict[str
     return None
 
 
+def _resolve_zai_base_url(api_key: str, default_url: str, env_override: str) -> str:
+    """Return the correct Z.AI base URL by probing endpoints.
+
+    If the user has explicitly set GLM_BASE_URL, that always wins.
+    Otherwise, probe the candidate endpoints to find one that accepts the
+    key.  The detected endpoint is cached in provider state (auth.json) keyed
+    on a hash of the API key so subsequent starts skip the probe.
+    """
+    if env_override:
+        return env_override
+
+    # Check provider-state cache for a previously-detected endpoint.
+    auth_store = _load_auth_store()
+    state = _load_provider_state(auth_store, "zai") or {}
+    cached = state.get("detected_endpoint")
+    if isinstance(cached, dict) and cached.get("base_url"):
+        key_hash = cached.get("key_hash", "")
+        if key_hash == hashlib.sha256(api_key.encode()).hexdigest()[:16]:
+            logger.debug("Z.AI: using cached endpoint %s", cached["base_url"])
+            return cached["base_url"]
+
+    # Probe — may take up to ~8s per endpoint.
+    detected = detect_zai_endpoint(api_key)
+    if detected and detected.get("base_url"):
+        # Persist the detection result keyed on the API key hash.
+        key_hash = hashlib.sha256(api_key.encode()).hexdigest()[:16]
+        state["detected_endpoint"] = {
+            "base_url": detected["base_url"],
+            "endpoint_id": detected.get("id", ""),
+            "model": detected.get("model", ""),
+            "label": detected.get("label", ""),
+            "key_hash": key_hash,
+        }
+        _save_provider_state(auth_store, "zai", state)
+        logger.info("Z.AI: auto-detected endpoint %s (%s)", detected["label"], detected["base_url"])
+        return detected["base_url"]
+
+    logger.debug("Z.AI: probe failed, falling back to default %s", default_url)
+    return default_url
+
+
 # =============================================================================
 # Error Types
 # =============================================================================
@@ -2063,6 +2104,8 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
 
     if provider_id == "kimi-coding":
         base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
+    elif provider_id == "zai":
+        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
     elif env_url:
         base_url = env_url.rstrip("/")
     else:
diff --git a/tests/test_api_key_providers.py b/tests/test_api_key_providers.py
index ddf1d972..ee86507a 100644
--- a/tests/test_api_key_providers.py
+++ b/tests/test_api_key_providers.py
@@ -350,6 +350,7 @@ class TestResolveApiKeyProviderCredentials:
 
     def test_resolve_zai_with_key(self, monkeypatch):
         monkeypatch.setenv("GLM_API_KEY", "glm-secret-key")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["provider"] == "zai"
         assert creds["api_key"] == "glm-secret-key"
@@ -471,6 +472,7 @@ class TestResolveApiKeyProviderCredentials:
         """GLM_API_KEY takes priority over ZAI_API_KEY."""
         monkeypatch.setenv("GLM_API_KEY", "primary")
         monkeypatch.setenv("ZAI_API_KEY", "secondary")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["api_key"] == "primary"
         assert creds["source"] == "GLM_API_KEY"
@@ -478,6 +480,7 @@ class TestResolveApiKeyProviderCredentials:
     def test_zai_key_fallback(self, monkeypatch):
         """ZAI_API_KEY used when GLM_API_KEY not set."""
         monkeypatch.setenv("ZAI_API_KEY", "secondary")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["api_key"] == "secondary"
         assert creds["source"] == "ZAI_API_KEY"
@@ -830,11 +833,58 @@ class TestKimiCodeCredentialAutoDetect:
 
     def test_non_kimi_providers_unaffected(self, monkeypatch):
         """Ensure the auto-detect logic doesn't leak to other providers."""
-        monkeypatch.setenv("GLM_API_KEY", "sk-kimi-looks-like-kimi-but-isnt")
+        monkeypatch.setenv("GLM_API_KEY", "sk-kim...isnt")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
         creds = resolve_api_key_provider_credentials("zai")
         assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
 
 
+class TestZaiEndpointAutoDetect:
+    """Test that resolve_api_key_provider_credentials auto-detects Z.AI endpoints."""
+
+    def test_probe_success_returns_detected_url(self, monkeypatch):
+        monkeypatch.setenv("GLM_API_KEY", "glm-coding-key")
+        monkeypatch.setattr(
+            "hermes_cli.auth.detect_zai_endpoint",
+            lambda *a, **kw: {
+                "id": "coding-global",
+                "base_url": "https://api.z.ai/api/coding/paas/v4",
+                "model": "glm-4.7",
+                "label": "Global (Coding Plan)",
+            },
+        )
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/coding/paas/v4"
+
+    def test_probe_failure_falls_back_to_default(self, monkeypatch):
+        monkeypatch.setenv("GLM_API_KEY", "glm-key")
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://api.z.ai/api/paas/v4"
+
+    def test_env_override_skips_probe(self, monkeypatch):
+        """GLM_BASE_URL should always win without probing."""
+        monkeypatch.setenv("GLM_API_KEY", "glm-key")
+        monkeypatch.setenv("GLM_BASE_URL", "https://custom.example/v4")
+        probe_called = False
+
+        def _never_called(*a, **kw):
+            nonlocal probe_called
+            probe_called = True
+            return None
+
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", _never_called)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["base_url"] == "https://custom.example/v4"
+        assert not probe_called
+
+    def test_no_key_skips_probe(self, monkeypatch):
+        """Without an API key, no probe should occur."""
+        monkeypatch.setattr("hermes_cli.auth.detect_zai_endpoint", lambda *a, **kw: None)
+        creds = resolve_api_key_provider_credentials("zai")
+        assert creds["api_key"] == ""
+
+
 # =============================================================================
 # Kimi / Moonshot model list isolation tests
 # =============================================================================

From eb7c4084451722f05a1508173b5275800c4860c9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 00:53:45 -0700
Subject: [PATCH 061/154] fix(gateway): /stop and /new bypass Level 1
 active-session guard (#5765)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(gateway): /stop and /new bypass Level 1 active-session guard

The base adapter's Level 1 guard intercepted ALL messages while an
agent was running, including /stop and /new. These commands were queued
as pending messages instead of being dispatched to the gateway runner's
Level 2 handler. When the agent eventually stopped (via the interrupt
mechanism), the command text leaked into the conversation as a user
message — the model would receive '/stop' as input and respond to it.

Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside
/approve, /deny, and /status. Consolidate the three separate bypass
blocks into one. Commands in the bypass set are dispatched inline to the
gateway runner, where Level 2 handles them correctly (hard-kill for
/stop, session reset for /new).

Also add a safety net in _run_agent's pending-message processing: if the
pending text resolves to a known slash command, discard it instead of
passing it to the agent. This catches edge cases where command text
leaks through the interrupt_message fallback.

Refs: #5244

* test: regression tests for command bypass of active-session guard

17 tests covering:
- /stop, /new, /reset bypass the Level 1 guard when agent is running
- /approve, /deny, /status bypass (existing behavior, now tested)
- Regular text and unknown commands still queued (not bypassed)
- File paths like '/path/to/file' not treated as commands
- Telegram @botname suffix handled correctly
- Safety net command resolution (resolve_command detects known commands)
---
 gateway/platforms/base.py                     |  44 +--
 gateway/run.py                                |  21 ++
 .../test_command_bypass_active_session.py     | 313 ++++++++++++++++++
 3 files changed, 347 insertions(+), 31 deletions(-)
 create mode 100644 tests/gateway/test_command_bypass_active_session.py

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 4335a51f..66fc5bac 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1103,16 +1103,20 @@ class BasePlatformAdapter(ABC):
         
         # Check if there's already an active handler for this session
         if session_key in self._active_sessions:
-            # /approve and /deny must bypass the active-session guard.
-            # The agent thread is blocked on threading.Event.wait() inside
-            # tools/approval.py — queuing these commands creates a deadlock:
-            # the agent waits for approval, approval waits for agent to finish.
-            # Dispatch directly to the message handler without touching session
-            # lifecycle (no competing background task, no session guard removal).
+            # Certain commands must bypass the active-session guard and be
+            # dispatched directly to the gateway runner.  Without this, they
+            # are queued as pending messages and either:
+            #   - leak into the conversation as user text (/stop, /new), or
+            #   - deadlock (/approve, /deny — agent is blocked on Event.wait)
+            #
+            # Dispatch inline: call the message handler directly and send the
+            # response.  Do NOT use _process_message_background — it manages
+            # session lifecycle and its cleanup races with the running task
+            # (see PR #4926).
             cmd = event.get_command()
-            if cmd in ("approve", "deny"):
+            if cmd in ("approve", "deny", "status", "stop", "new", "reset"):
                 logger.debug(
-                    "[%s] Approval command '/%s' bypassing active-session guard for %s",
+                    "[%s] Command '/%s' bypassing active-session guard for %s",
                     self.name, cmd, session_key,
                 )
                 try:
@@ -1126,29 +1130,7 @@ class BasePlatformAdapter(ABC):
                             metadata=_thread_meta,
                         )
                 except Exception as e:
-                    logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True)
-                return
-
-            # /status must also bypass the active-session guard so it always
-            # returns a system-generated response instead of being queued as
-            # user text and passed to the agent (#5046).
-            if cmd == "status":
-                logger.debug(
-                    "[%s] Status command bypassing active-session guard for %s",
-                    self.name, session_key,
-                )
-                try:
-                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-                    response = await self._message_handler(event)
-                    if response:
-                        await self._send_with_retry(
-                            chat_id=event.source.chat_id,
-                            content=response,
-                            reply_to=event.message_id,
-                            metadata=_thread_meta,
-                        )
-                except Exception as e:
-                    logger.error("[%s] Status dispatch failed: %s", self.name, e, exc_info=True)
+                    logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True)
                 return
 
             # Special case: photo bursts/albums frequently arrive as multiple near-
diff --git a/gateway/run.py b/gateway/run.py
index 0030f434..e4a5324a 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -7131,6 +7131,27 @@ class GatewayRunner:
                     if pending:
                         logger.debug("Processing queued message after agent completion: '%s...'", pending[:40])
             
+            # Safety net: if the pending text is a slash command (e.g. "/stop",
+            # "/new"), discard it — commands should never be passed to the agent
+            # as user input.  The primary fix is in base.py (commands bypass the
+            # active-session guard), but this catches edge cases where command
+            # text leaks through the interrupt_message fallback.
+            if pending and pending.strip().startswith("/"):
+                _pending_parts = pending.strip().split(None, 1)
+                _pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else ""
+                if _pending_cmd_word:
+                    try:
+                        from hermes_cli.commands import resolve_command as _rc_pending
+                        if _rc_pending(_pending_cmd_word):
+                            logger.info(
+                                "Discarding command '/%s' from pending queue — "
+                                "commands must not be passed as agent input",
+                                _pending_cmd_word,
+                            )
+                            pending = None
+                    except Exception:
+                        pass
+
             if pending:
                 logger.debug("Processing pending message: '%s...'", pending[:40])
                 
diff --git a/tests/gateway/test_command_bypass_active_session.py b/tests/gateway/test_command_bypass_active_session.py
new file mode 100644
index 00000000..e90dee69
--- /dev/null
+++ b/tests/gateway/test_command_bypass_active_session.py
@@ -0,0 +1,313 @@
+"""Regression tests: slash commands must bypass the base adapter's active-session guard.
+
+When an agent is running, the base adapter's Level 1 guard in
+handle_message() intercepts all incoming messages and queues them as
+pending.  Certain commands (/stop, /new, /reset, /approve, /deny,
+/status) must bypass this guard and be dispatched directly to the gateway
+runner — otherwise they are queued as user text and either:
+  - leak into the conversation as agent input (/stop, /new), or
+  - deadlock (/approve, /deny — agent blocks on Event.wait)
+
+These tests verify that the bypass works at the adapter level and that
+the safety net in _run_agent discards leaked command text.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
+from gateway.session import SessionSource, build_session_key
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+class _StubAdapter(BasePlatformAdapter):
+    """Concrete adapter with abstract methods stubbed out."""
+
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        pass
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    """Create a minimal adapter for testing the active-session guard."""
+    config = PlatformConfig(enabled=True, token="test-token")
+    adapter = _StubAdapter(config, Platform.TELEGRAM)
+    adapter.sent_responses = []
+
+    async def _mock_handler(event):
+        cmd = event.get_command()
+        return f"handled:{cmd}" if cmd else f"handled:text:{event.text}"
+
+    adapter._message_handler = _mock_handler
+
+    async def _mock_send_retry(chat_id, content, **kwargs):
+        adapter.sent_responses.append(content)
+
+    adapter._send_with_retry = _mock_send_retry
+    return adapter
+
+
+def _make_event(text="/stop", chat_id="12345"):
+    source = SessionSource(
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+    )
+    return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
+
+
+def _session_key(chat_id="12345"):
+    source = SessionSource(
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+    )
+    return build_session_key(source)
+
+
+# ---------------------------------------------------------------------------
+# Tests: commands bypass Level 1 when session is active
+# ---------------------------------------------------------------------------
+
+
+class TestCommandBypassActiveSession:
+    """Commands that must bypass the active-session guard."""
+
+    @pytest.mark.asyncio
+    async def test_stop_bypasses_guard(self):
+        """/stop must be dispatched directly, not queued."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/stop"))
+
+        assert sk not in adapter._pending_messages, (
+            "/stop was queued as a pending message instead of being dispatched"
+        )
+        assert any("handled:stop" in r for r in adapter.sent_responses), (
+            "/stop response was not sent back to the user"
+        )
+
+    @pytest.mark.asyncio
+    async def test_new_bypasses_guard(self):
+        """/new must be dispatched directly, not queued."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/new"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:new" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_reset_bypasses_guard(self):
+        """/reset (alias for /new) must be dispatched directly."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/reset"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:reset" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_approve_bypasses_guard(self):
+        """/approve must bypass (deadlock prevention)."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/approve"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:approve" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_deny_bypasses_guard(self):
+        """/deny must bypass (deadlock prevention)."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/deny"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:deny" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_status_bypasses_guard(self):
+        """/status must bypass so it returns a system response."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/status"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:status" in r for r in adapter.sent_responses)
+
+
+# ---------------------------------------------------------------------------
+# Tests: non-bypass messages still get queued
+# ---------------------------------------------------------------------------
+
+
+class TestNonBypassStillQueued:
+    """Regular messages and unknown commands must be queued, not dispatched."""
+
+    @pytest.mark.asyncio
+    async def test_regular_text_queued(self):
+        """Plain text while agent is running must be queued as pending."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("hello world"))
+
+        assert sk in adapter._pending_messages, (
+            "Regular text was not queued — it should be pending"
+        )
+        assert len(adapter.sent_responses) == 0, (
+            "Regular text should not produce a direct response"
+        )
+
+    @pytest.mark.asyncio
+    async def test_unknown_command_queued(self):
+        """Unknown /commands must be queued, not dispatched."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/foobar"))
+
+        assert sk in adapter._pending_messages
+        assert len(adapter.sent_responses) == 0
+
+    @pytest.mark.asyncio
+    async def test_file_path_not_treated_as_command(self):
+        """A message like '/path/to/file' must not bypass the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/path/to/file.py"))
+
+        assert sk in adapter._pending_messages
+        assert len(adapter.sent_responses) == 0
+
+
+# ---------------------------------------------------------------------------
+# Tests: no active session — commands go through normally
+# ---------------------------------------------------------------------------
+
+
+class TestNoActiveSessionNormalDispatch:
+    """When no agent is running, messages spawn a background task normally."""
+
+    @pytest.mark.asyncio
+    async def test_stop_when_no_session_active(self):
+        """/stop without an active session spawns a background task
+        (the Level 2 handler will return 'No active task')."""
+        adapter = _make_adapter()
+        sk = _session_key()
+
+        # No active session — _active_sessions is empty
+        assert sk not in adapter._active_sessions
+
+        await adapter.handle_message(_make_event("/stop"))
+
+        # Should have gone through the normal path (background task spawned)
+        # and NOT be in _pending_messages (that's the queued-during-active path)
+        assert sk not in adapter._pending_messages
+
+
+# ---------------------------------------------------------------------------
+# Tests: safety net in _run_agent discards command text from pending queue
+# ---------------------------------------------------------------------------
+
+
+class TestPendingCommandSafetyNet:
+    """The safety net in gateway/run.py _run_agent must discard command text
+    that leaks into the pending queue via interrupt_message fallback."""
+
+    def test_stop_command_detected(self):
+        """resolve_command must recognize /stop so the safety net can
+        discard it."""
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("stop") is not None
+        assert resolve_command("stop").name == "stop"
+
+    def test_new_command_detected(self):
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("new") is not None
+        assert resolve_command("new").name == "new"
+
+    def test_reset_alias_detected(self):
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("reset") is not None
+        assert resolve_command("reset").name == "new"  # alias
+
+    def test_unknown_command_not_detected(self):
+        from hermes_cli.commands import resolve_command
+
+        assert resolve_command("foobar") is None
+
+    def test_file_path_not_detected_as_command(self):
+        """'/path/to/file' should not resolve as a command."""
+        from hermes_cli.commands import resolve_command
+
+        # The safety net splits on whitespace and takes the first word
+        # after stripping '/'.  For '/path/to/file', that's 'path/to/file'.
+        assert resolve_command("path/to/file") is None
+
+
+# ---------------------------------------------------------------------------
+# Tests: bypass with @botname suffix (Telegram-style)
+# ---------------------------------------------------------------------------
+
+
+class TestBypassWithBotnameSuffix:
+    """Telegram appends @botname to commands. The bypass must still work."""
+
+    @pytest.mark.asyncio
+    async def test_stop_with_botname(self):
+        """/stop@MyHermesBot must bypass the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/stop@MyHermesBot"))
+
+        assert sk not in adapter._pending_messages, (
+            "/stop@MyHermesBot was queued instead of bypassing"
+        )
+        assert any("handled:stop" in r for r in adapter.sent_responses)
+
+    @pytest.mark.asyncio
+    async def test_new_with_botname(self):
+        """/new@MyHermesBot must bypass the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+
+        await adapter.handle_message(_make_event("/new@MyHermesBot"))
+
+        assert sk not in adapter._pending_messages
+        assert any("handled:new" in r for r in adapter.sent_responses)

From d9e7e42d0b692b91168b4d69cb5a87e6460f3100 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 01:00:02 -0700
Subject: [PATCH 062/154] fix(approval): load permanent command allowlist on
 startup (#5076)

Co-authored-by: Timo Karp <timo@timos-macbook-pro.taildbbd26.ts.net>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tools/approval.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/approval.py b/tools/approval.py
index 19399836..b49e444a 100644
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -871,3 +871,7 @@ def check_all_command_guards(command: str, env_type: str,
 
     return {"approved": True, "message": None,
             "user_approved": True, "description": combined_desc}
+
+
+# Load permanent allowlist from config on module import
+load_permanent_allowlist()

From 1c425f219ecde160daddeec82283c735f1df9aeb Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 01:03:52 -0700
Subject: [PATCH 063/154] fix(cli): defer response content until reasoning
 block completes (#5773)

When show_reasoning is on with streaming, content tokens could arrive
while the reasoning box was still rendering (interleaved thinking mode).
This caused the response box to open before reasoning finished, resulting
in reasoning appearing after the response in the terminal.

Fix: buffer content in _deferred_content while _reasoning_box_opened is
True. Flush the buffer through _emit_stream_text when _close_reasoning_box
runs, ensuring reasoning always renders before the response.
---
 cli.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/cli.py b/cli.py
index 29e6257d..09cf2094 100644
--- a/cli.py
+++ b/cli.py
@@ -1920,6 +1920,12 @@ class HermesCLI:
             _cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
             self._reasoning_box_opened = False
 
+            # Flush any content that was deferred while reasoning was rendering.
+            deferred = getattr(self, "_deferred_content", "")
+            if deferred:
+                self._deferred_content = ""
+                self._emit_stream_text(deferred)
+
     def _stream_delta(self, text) -> None:
         """Line-buffered streaming callback for real-time token rendering.
 
@@ -2022,6 +2028,13 @@ class HermesCLI:
         if not text:
             return
 
+        # When show_reasoning is on and reasoning is still rendering,
+        # defer content until the reasoning box closes.  This ensures the
+        # reasoning block always appears BEFORE the response in the terminal.
+        if self.show_reasoning and getattr(self, "_reasoning_box_opened", False):
+            self._deferred_content = getattr(self, "_deferred_content", "") + text
+            return
+
         # Close the live reasoning box before opening the response box
         self._close_reasoning_box()
 
@@ -2088,6 +2101,7 @@ class HermesCLI:
         self._reasoning_box_opened = False
         self._reasoning_buf = ""
         self._reasoning_preview_buf = ""
+        self._deferred_content = ""
 
     def _slow_command_status(self, command: str) -> str:
         """Return a user-facing status message for slower slash commands."""

From e8f6854cabeb2d71e22b9a2b28dcdfd20dc1b787 Mon Sep 17 00:00:00 2001
From: Leo Torres <leo@leotrs.com>
Date: Tue, 7 Apr 2026 11:36:13 +0200
Subject: [PATCH 064/154] docs: expand Manim CE reference docs with additional
 API coverage

Add geometry mobjects, movement/creation animations, and LaTeX
environments to the skill's reference docs. All verified against
Manim CE v0.20.1.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 skills/creative/manim-video/SKILL.md          |  2 +-
 .../manim-video/references/animations.md      | 25 +++++++
 .../manim-video/references/equations.md       | 51 ++++++++++++++
 .../manim-video/references/mobjects.md        | 69 +++++++++++++++++++
 4 files changed, 146 insertions(+), 1 deletion(-)

diff --git a/skills/creative/manim-video/SKILL.md b/skills/creative/manim-video/SKILL.md
index 5c82526f..35c09bc7 100644
--- a/skills/creative/manim-video/SKILL.md
+++ b/skills/creative/manim-video/SKILL.md
@@ -24,7 +24,7 @@ This is educational cinema. Every frame teaches. Every animation reveals structu
 
 ## Prerequisites
 
-Run `scripts/setup.sh` to verify all dependencies. Requires: Python 3.10+, Manim Community Edition (`pip install manim`), LaTeX (`texlive-full` on Linux, `mactex` on macOS), and ffmpeg.
+Run `scripts/setup.sh` to verify all dependencies. Requires: Python 3.10+, Manim Community Edition v0.20+ (`pip install manim`), LaTeX (`texlive-full` on Linux, `mactex` on macOS), and ffmpeg. Reference docs tested against Manim CE v0.20.1.
 
 ## Modes
 
diff --git a/skills/creative/manim-video/references/animations.md b/skills/creative/manim-video/references/animations.md
index 84b2cb01..1bbbc034 100644
--- a/skills/creative/manim-video/references/animations.md
+++ b/skills/creative/manim-video/references/animations.md
@@ -50,6 +50,31 @@ self.play(circle.animate.set_color(RED))
 self.play(circle.animate.shift(RIGHT * 2).scale(0.5))  # chain multiple
 ```
 
+## Additional Creation Animations
+
+```python
+self.play(GrowFromPoint(circle, LEFT * 3))     # scale 0 -> 1 from a specific point
+self.play(GrowFromEdge(rect, DOWN))             # grow from one edge
+self.play(SpinInFromNothing(square))            # scale up while rotating (default PI/2)
+self.play(GrowArrow(arrow))                     # grows arrow from start to tip
+```
+
+## Movement Animations
+
+```python
+# Move a mobject along an arbitrary path
+path = Arc(radius=2, angle=PI)
+self.play(MoveAlongPath(dot, path), run_time=2)
+
+# Rotate (as a Transform, not .animate — supports about_point)
+self.play(Rotate(square, angle=PI / 2, about_point=ORIGIN), run_time=1.5)
+
+# Rotating (continuous rotation, updater-style — good for spinning objects)
+self.play(Rotating(gear, angle=TAU, run_time=4, rate_func=linear))
+```
+
+`MoveAlongPath` takes any `VMobject` as the path — use `Arc`, `CubicBezier`, `Line`, or a custom `VMobject`. Position is computed via `path.point_from_proportion()`.
+
 ## Emphasis Animations
 
 ```python
diff --git a/skills/creative/manim-video/references/equations.md b/skills/creative/manim-video/references/equations.md
index 78d63f2b..0a08a5dd 100644
--- a/skills/creative/manim-video/references/equations.md
+++ b/skills/creative/manim-video/references/equations.md
@@ -65,6 +65,57 @@ MathTex(r"\vec{v}")                       # vector
 MathTex(r"\lim_{x \to \infty} f(x)")    # limit
 ```
 
+## Matrices
+
+`MathTex` supports standard LaTeX matrix environments via `amsmath` (loaded by default):
+
+```python
+# Bracketed matrix
+MathTex(r"\begin{bmatrix} 1 & 0 \\ 0 & 1 \end{bmatrix}")
+
+# Parenthesized matrix
+MathTex(r"\begin{pmatrix} a & b \\ c & d \end{pmatrix}")
+
+# Determinant (vertical bars)
+MathTex(r"\begin{vmatrix} a & b \\ c & d \end{vmatrix}")
+
+# Plain (no delimiters)
+MathTex(r"\begin{matrix} x_1 \\ x_2 \\ x_3 \end{matrix}")
+```
+
+For matrices you need to animate element-by-element or color individual entries, use the `IntegerMatrix`, `DecimalMatrix`, or `MobjectMatrix` mobjects instead — see `mobjects.md`.
+
+## Cases and Piecewise Functions
+
+```python
+MathTex(r"""
+    f(x) = \begin{cases}
+        x^2    & \text{if } x \geq 0 \\
+        -x^2   & \text{if } x < 0
+    \end{cases}
+""")
+```
+
+## Aligned Environments
+
+For multi-line derivations with alignment, use `aligned` inside `MathTex`:
+
+```python
+MathTex(r"""
+    \begin{aligned}
+        \nabla \cdot \mathbf{E} &= \frac{\rho}{\epsilon_0} \\
+        \nabla \cdot \mathbf{B} &= 0 \\
+        \nabla \times \mathbf{E} &= -\frac{\partial \mathbf{B}}{\partial t} \\
+        \nabla \times \mathbf{B} &= \mu_0 \mathbf{J} + \mu_0 \epsilon_0 \frac{\partial \mathbf{E}}{\partial t}
+    \end{aligned}
+""")
+```
+
+Note: `MathTex` wraps content in `align*` by default. Override with `tex_environment` if needed:
+```python
+MathTex(r"...", tex_environment="gather*")
+```
+
 ## Derivation Pattern
 
 ```python
diff --git a/skills/creative/manim-video/references/mobjects.md b/skills/creative/manim-video/references/mobjects.md
index d9c7b50b..ec68b375 100644
--- a/skills/creative/manim-video/references/mobjects.md
+++ b/skills/creative/manim-video/references/mobjects.md
@@ -35,6 +35,52 @@ rrect = RoundedRectangle(corner_radius=0.3, width=4, height=2)
 brace = Brace(rect, DOWN, color=YELLOW)
 ```
 
+## Polygons and Arcs
+
+```python
+# Arbitrary polygon from vertices
+poly = Polygon(LEFT, UP * 2, RIGHT, color=GREEN, fill_opacity=0.3)
+
+# Regular n-sided polygon
+hexagon = RegularPolygon(n=6, color=TEAL, fill_opacity=0.4)
+
+# Triangle (shorthand for RegularPolygon(n=3))
+tri = Triangle(color=YELLOW, fill_opacity=0.5)
+
+# Arc (portion of a circle)
+arc = Arc(radius=2, start_angle=0, angle=PI / 2, color=BLUE)
+
+# Arc between two points
+arc_between = ArcBetweenPoints(LEFT * 2, RIGHT * 2, angle=TAU / 4, color=RED)
+
+# Curved arrow (arc with tip)
+curved_arrow = CurvedArrow(LEFT * 2, RIGHT * 2, color=ORANGE)
+```
+
+## Sectors and Annuli
+
+```python
+# Sector (pie slice)
+sector = Sector(outer_radius=2, start_angle=0, angle=PI / 3, fill_opacity=0.7, color=BLUE)
+
+# Annulus (ring)
+ring = Annulus(inner_radius=1, outer_radius=2, fill_opacity=0.5, color=GREEN)
+
+# Annular sector (partial ring)
+partial_ring = AnnularSector(
+    inner_radius=1, outer_radius=2,
+    angle=PI / 2, start_angle=0,
+    fill_opacity=0.7, color=TEAL
+)
+
+# Cutout (punch holes in a shape)
+background = Square(side_length=4, fill_opacity=1, color=BLUE)
+hole = Circle(radius=0.5)
+cutout = Cutout(background, hole, fill_opacity=1, color=BLUE)
+```
+
+Use cases: pie charts, ring progress indicators, Venn diagrams with arcs, geometric proofs.
+
 ## Positioning
 
 ```python
@@ -99,6 +145,29 @@ class NetworkNode(Group):
         self.add(self.circle, self.label)
 ```
 
+## Matrix Mobjects
+
+Display matrices as grids of numbers or mobjects:
+
+```python
+# Integer matrix
+m = IntegerMatrix([[1, 2], [3, 4]])
+
+# Decimal matrix (control decimal places)
+m = DecimalMatrix([[1.5, 2.7], [3.1, 4.9]], element_to_mobject_config={"num_decimal_places": 2})
+
+# Mobject matrix (any mobject in each cell)
+m = MobjectMatrix([
+    [MathTex(r"\pi"), MathTex(r"e")],
+    [MathTex(r"\phi"), MathTex(r"\tau")]
+])
+
+# Bracket types: "(" "[" "|" or "\\{"
+m = IntegerMatrix([[1, 0], [0, 1]], left_bracket="[", right_bracket="]")
+```
+
+Use cases: linear algebra, transformation matrices, system-of-equations coefficient display.
+
 ## Constants
 
 Directions: `UP, DOWN, LEFT, RIGHT, ORIGIN, UL, UR, DL, DR`

From e120d2afacf90f3fec243c814da27216cba1943b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 02:40:16 -0700
Subject: [PATCH 065/154] feat: notify_on_complete for background processes
 (#5779)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: notify_on_complete for background processes

When terminal(background=true, notify_on_complete=true), the system
auto-triggers a new agent turn when the process exits — no polling needed.

Changes:
- ProcessSession: add notify_on_complete field
- ProcessRegistry: add completion_queue, populate on _move_to_finished()
- Terminal tool: add notify_on_complete parameter to schema + handler
- CLI: drain completion_queue after agent turn AND during idle loop
- Gateway: enhanced _run_process_watcher injects synthetic MessageEvent
  on completion, triggering a full agent turn
- Checkpoint persistence includes notify_on_complete for crash recovery
- code_execution_tool: block notify_on_complete in sandbox scripts
- 15 new tests covering queue mechanics, checkpoint round-trip, schema

* docs: update terminal tool descriptions for notify_on_complete

- background: remove 'ONLY for servers' language, describe both patterns
  (long-lived processes AND long-running tasks with notify_on_complete)
- notify_on_complete: more prescriptive about when to use it
- TERMINAL_TOOL_DESCRIPTION: remove 'Do NOT use background for builds'
  guidance that contradicted the new feature
---
 cli.py                                 |  43 ++++-
 gateway/run.py                         |  51 ++++-
 tests/tools/test_notify_on_complete.py | 247 +++++++++++++++++++++++++
 tools/code_execution_tool.py           |   2 +-
 tools/process_registry.py              |  21 +++
 tools/terminal_tool.py                 |  44 ++++-
 6 files changed, 398 insertions(+), 10 deletions(-)
 create mode 100644 tests/tools/test_notify_on_complete.py

diff --git a/cli.py b/cli.py
index 09cf2094..6f02dc93 100644
--- a/cli.py
+++ b/cli.py
@@ -8134,6 +8134,25 @@ class HermesCLI:
                         # Periodic config watcher — auto-reload MCP on mcp_servers change
                         if not self._agent_running:
                             self._check_config_mcp_changes()
+                            # Check for background process completion notifications
+                            # while the agent is idle (user hasn't typed anything yet).
+                            try:
+                                from tools.process_registry import process_registry
+                                if not process_registry.completion_queue.empty():
+                                    completion = process_registry.completion_queue.get_nowait()
+                                    _exit = completion.get("exit_code", "?")
+                                    _cmd = completion.get("command", "unknown")
+                                    _sid = completion.get("session_id", "unknown")
+                                    _out = completion.get("output", "")
+                                    _synth = (
+                                        f"[SYSTEM: Background process {_sid} completed "
+                                        f"(exit code {_exit}).\n"
+                                        f"Command: {_cmd}\n"
+                                        f"Output:\n{_out}]"
+                                    )
+                                    self._pending_input.put(_synth)
+                            except Exception:
+                                pass
                         continue
                     
                     if not user_input:
@@ -8247,7 +8266,29 @@ class HermesCLI:
                                 except Exception as e:
                                     _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}")
                             threading.Thread(target=_restart_recording, daemon=True).start()
-                    
+
+                        # Drain process completion notifications — any background
+                        # process that finished with notify_on_complete while the
+                        # agent was running (or before) gets auto-injected as a
+                        # new user message so the agent can react to it.
+                        try:
+                            from tools.process_registry import process_registry
+                            while not process_registry.completion_queue.empty():
+                                completion = process_registry.completion_queue.get_nowait()
+                                _exit = completion.get("exit_code", "?")
+                                _cmd = completion.get("command", "unknown")
+                                _sid = completion.get("session_id", "unknown")
+                                _out = completion.get("output", "")
+                                _synth = (
+                                    f"[SYSTEM: Background process {_sid} completed "
+                                    f"(exit code {_exit}).\n"
+                                    f"Command: {_cmd}\n"
+                                    f"Output:\n{_out}]"
+                                )
+                                self._pending_input.put(_synth)
+                        except Exception:
+                            pass  # Non-fatal — don't break the main loop
+
                 except Exception as e:
                     print(f"Error: {e}")
         
diff --git a/gateway/run.py b/gateway/run.py
index e4a5324a..7a45be62 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -6048,12 +6048,13 @@ class GatewayRunner:
         platform_name = watcher.get("platform", "")
         chat_id = watcher.get("chat_id", "")
         thread_id = watcher.get("thread_id", "")
+        agent_notify = watcher.get("notify_on_complete", False)
         notify_mode = self._load_background_notifications_mode()
 
-        logger.debug("Process watcher started: %s (every %ss, notify=%s)",
-                      session_id, interval, notify_mode)
+        logger.debug("Process watcher started: %s (every %ss, notify=%s, agent_notify=%s)",
+                      session_id, interval, notify_mode, agent_notify)
 
-        if notify_mode == "off":
+        if notify_mode == "off" and not agent_notify:
             # Still wait for the process to exit so we can log it, but don't
             # push any messages to the user.
             while True:
@@ -6077,6 +6078,47 @@ class GatewayRunner:
             last_output_len = current_output_len
 
             if session.exited:
+                # --- Agent-triggered completion: inject synthetic message ---
+                if agent_notify:
+                    from tools.ansi_strip import strip_ansi
+                    _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
+                    synth_text = (
+                        f"[SYSTEM: Background process {session_id} completed "
+                        f"(exit code {session.exit_code}).\n"
+                        f"Command: {session.command}\n"
+                        f"Output:\n{_out}]"
+                    )
+                    adapter = None
+                    for p, a in self.adapters.items():
+                        if p.value == platform_name:
+                            adapter = a
+                            break
+                    if adapter and chat_id:
+                        try:
+                            from gateway.platforms.base import MessageEvent, MessageType
+                            from gateway.session import SessionSource
+                            from gateway.config import Platform
+                            _platform_enum = Platform(platform_name)
+                            _source = SessionSource(
+                                platform=_platform_enum,
+                                chat_id=chat_id,
+                                thread_id=thread_id or None,
+                            )
+                            synth_event = MessageEvent(
+                                text=synth_text,
+                                message_type=MessageType.TEXT,
+                                source=_source,
+                            )
+                            logger.info(
+                                "Process %s finished — injecting agent notification for session %s",
+                                session_id, session_key,
+                            )
+                            await adapter.handle_message(synth_event)
+                        except Exception as e:
+                            logger.error("Agent notify injection error: %s", e)
+                    break
+
+                # --- Normal text-only notification ---
                 # Decide whether to notify based on mode
                 should_notify = (
                     notify_mode in ("all", "result")
@@ -6101,8 +6143,9 @@ class GatewayRunner:
                             logger.error("Watcher delivery error: %s", e)
                 break
 
-            elif has_new_output and notify_mode == "all":
+            elif has_new_output and notify_mode == "all" and not agent_notify:
                 # New output available -- deliver status update (only in "all" mode)
+                # Skip periodic updates for agent_notify watchers (they only care about completion)
                 new_output = session.output_buffer[-500:] if session.output_buffer else ""
                 message_text = (
                     f"[Background process {session_id} is still running~ "
diff --git a/tests/tools/test_notify_on_complete.py b/tests/tools/test_notify_on_complete.py
new file mode 100644
index 00000000..88872190
--- /dev/null
+++ b/tests/tools/test_notify_on_complete.py
@@ -0,0 +1,247 @@
+"""Tests for notify_on_complete background process feature.
+
+Covers:
+  - ProcessSession.notify_on_complete field
+  - ProcessRegistry.completion_queue population on _move_to_finished()
+  - Checkpoint persistence of notify_on_complete
+  - Terminal tool schema includes notify_on_complete
+  - Terminal tool handler passes notify_on_complete through
+"""
+
+import json
+import os
+import queue
+import time
+import pytest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from tools.process_registry import (
+    ProcessRegistry,
+    ProcessSession,
+)
+
+
+@pytest.fixture()
+def registry():
+    """Create a fresh ProcessRegistry."""
+    return ProcessRegistry()
+
+
+def _make_session(
+    sid="proc_test_notify",
+    command="echo hello",
+    task_id="t1",
+    exited=False,
+    exit_code=None,
+    output="",
+    notify_on_complete=False,
+) -> ProcessSession:
+    s = ProcessSession(
+        id=sid,
+        command=command,
+        task_id=task_id,
+        started_at=time.time(),
+        exited=exited,
+        exit_code=exit_code,
+        output_buffer=output,
+        notify_on_complete=notify_on_complete,
+    )
+    return s
+
+
+# =========================================================================
+# ProcessSession field
+# =========================================================================
+
+class TestProcessSessionField:
+    def test_default_false(self):
+        s = ProcessSession(id="proc_1", command="echo hi")
+        assert s.notify_on_complete is False
+
+    def test_set_true(self):
+        s = ProcessSession(id="proc_1", command="echo hi", notify_on_complete=True)
+        assert s.notify_on_complete is True
+
+
+# =========================================================================
+# Completion queue
+# =========================================================================
+
+class TestCompletionQueue:
+    def test_queue_exists(self, registry):
+        assert hasattr(registry, "completion_queue")
+        assert registry.completion_queue.empty()
+
+    def test_move_to_finished_no_notify(self, registry):
+        """Processes without notify_on_complete don't enqueue."""
+        s = _make_session(notify_on_complete=False, output="done")
+        s.exited = True
+        s.exit_code = 0
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+        assert registry.completion_queue.empty()
+
+    def test_move_to_finished_with_notify(self, registry):
+        """Processes with notify_on_complete push to queue."""
+        s = _make_session(
+            notify_on_complete=True,
+            output="build succeeded",
+            exit_code=0,
+        )
+        s.exited = True
+        s.exit_code = 0
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+
+        assert not registry.completion_queue.empty()
+        completion = registry.completion_queue.get_nowait()
+        assert completion["session_id"] == s.id
+        assert completion["command"] == "echo hello"
+        assert completion["exit_code"] == 0
+        assert "build succeeded" in completion["output"]
+
+    def test_move_to_finished_nonzero_exit(self, registry):
+        """Nonzero exit codes are captured correctly."""
+        s = _make_session(
+            notify_on_complete=True,
+            output="FAILED",
+            exit_code=1,
+        )
+        s.exited = True
+        s.exit_code = 1
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+
+        completion = registry.completion_queue.get_nowait()
+        assert completion["exit_code"] == 1
+        assert "FAILED" in completion["output"]
+
+    def test_output_truncated_to_2000(self, registry):
+        """Long output is truncated to last 2000 chars."""
+        long_output = "x" * 5000
+        s = _make_session(
+            notify_on_complete=True,
+            output=long_output,
+        )
+        s.exited = True
+        s.exit_code = 0
+        registry._running[s.id] = s
+        with patch.object(registry, "_write_checkpoint"):
+            registry._move_to_finished(s)
+
+        completion = registry.completion_queue.get_nowait()
+        assert len(completion["output"]) == 2000
+
+    def test_multiple_completions_queued(self, registry):
+        """Multiple notify processes all push to the same queue."""
+        for i in range(3):
+            s = _make_session(
+                sid=f"proc_{i}",
+                notify_on_complete=True,
+                output=f"output_{i}",
+            )
+            s.exited = True
+            s.exit_code = 0
+            registry._running[s.id] = s
+            with patch.object(registry, "_write_checkpoint"):
+                registry._move_to_finished(s)
+
+        completions = []
+        while not registry.completion_queue.empty():
+            completions.append(registry.completion_queue.get_nowait())
+        assert len(completions) == 3
+        ids = {c["session_id"] for c in completions}
+        assert ids == {"proc_0", "proc_1", "proc_2"}
+
+
+# =========================================================================
+# Checkpoint persistence
+# =========================================================================
+
+class TestCheckpointNotify:
+    def test_checkpoint_includes_notify(self, registry, tmp_path):
+        with patch("tools.process_registry.CHECKPOINT_PATH", tmp_path / "procs.json"):
+            s = _make_session(notify_on_complete=True)
+            registry._running[s.id] = s
+            registry._write_checkpoint()
+
+            data = json.loads((tmp_path / "procs.json").read_text())
+            assert len(data) == 1
+            assert data[0]["notify_on_complete"] is True
+
+    def test_checkpoint_without_notify(self, registry, tmp_path):
+        with patch("tools.process_registry.CHECKPOINT_PATH", tmp_path / "procs.json"):
+            s = _make_session(notify_on_complete=False)
+            registry._running[s.id] = s
+            registry._write_checkpoint()
+
+            data = json.loads((tmp_path / "procs.json").read_text())
+            assert data[0]["notify_on_complete"] is False
+
+    def test_recover_preserves_notify(self, registry, tmp_path):
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+            "notify_on_complete": True,
+        }]))
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 1
+            s = registry.get("proc_live")
+            assert s.notify_on_complete is True
+
+    def test_recover_defaults_false(self, registry, tmp_path):
+        """Old checkpoint entries without the field default to False."""
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+        }]))
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 1
+            s = registry.get("proc_live")
+            assert s.notify_on_complete is False
+
+
+# =========================================================================
+# Terminal tool schema
+# =========================================================================
+
+class TestTerminalSchema:
+    def test_schema_has_notify_on_complete(self):
+        from tools.terminal_tool import TERMINAL_SCHEMA
+        props = TERMINAL_SCHEMA["parameters"]["properties"]
+        assert "notify_on_complete" in props
+        assert props["notify_on_complete"]["type"] == "boolean"
+        assert props["notify_on_complete"]["default"] is False
+
+    def test_handler_passes_notify(self):
+        """_handle_terminal passes notify_on_complete to terminal_tool."""
+        from tools.terminal_tool import _handle_terminal
+        with patch("tools.terminal_tool.terminal_tool", return_value='{"ok":true}') as mock_tt:
+            _handle_terminal(
+                {"command": "echo hi", "background": True, "notify_on_complete": True},
+                task_id="t1",
+            )
+            _, kwargs = mock_tt.call_args
+            assert kwargs["notify_on_complete"] is True
+
+
+# =========================================================================
+# Code execution blocked params
+# =========================================================================
+
+class TestCodeExecutionBlocked:
+    def test_notify_on_complete_blocked_in_sandbox(self):
+        from tools.code_execution_tool import _TERMINAL_BLOCKED_PARAMS
+        assert "notify_on_complete" in _TERMINAL_BLOCKED_PARAMS
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index ff5c7f7f..5c4658b6 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -300,7 +300,7 @@ def _call(tool_name, args):
 # ---------------------------------------------------------------------------
 
 # Terminal parameters that must not be used from ephemeral sandbox scripts
-_TERMINAL_BLOCKED_PARAMS = {"background", "check_interval", "pty"}
+_TERMINAL_BLOCKED_PARAMS = {"background", "check_interval", "pty", "notify_on_complete"}
 
 
 def _rpc_server_loop(
diff --git a/tools/process_registry.py b/tools/process_registry.py
index a3796c8a..f5ac9543 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -81,6 +81,7 @@ class ProcessSession:
     watcher_chat_id: str = ""
     watcher_thread_id: str = ""
     watcher_interval: int = 0                   # 0 = no watcher configured
+    notify_on_complete: bool = False             # Queue agent notification on exit
     _lock: threading.Lock = field(default_factory=threading.Lock)
     _reader_thread: Optional[threading.Thread] = field(default=None, repr=False)
     _pty: Any = field(default=None, repr=False)  # ptyprocess handle (when use_pty=True)
@@ -112,6 +113,12 @@ class ProcessRegistry:
         # Side-channel for check_interval watchers (gateway reads after agent run)
         self.pending_watchers: List[Dict[str, Any]] = []
 
+        # Completion notifications — processes with notify_on_complete push here
+        # on exit.  CLI process_loop and gateway drain this after each agent turn
+        # to auto-trigger a new agent turn with the process results.
+        import queue as _queue_mod
+        self.completion_queue: _queue_mod.Queue = _queue_mod.Queue()
+
     @staticmethod
     def _clean_shell_noise(text: str) -> str:
         """Strip shell startup warnings from the beginning of output."""
@@ -415,6 +422,18 @@ class ProcessRegistry:
             self._finished[session.id] = session
         self._write_checkpoint()
 
+        # If the caller requested agent notification, enqueue the completion
+        # so the CLI/gateway can auto-trigger a new agent turn.
+        if session.notify_on_complete:
+            from tools.ansi_strip import strip_ansi
+            output_tail = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else ""
+            self.completion_queue.put({
+                "session_id": session.id,
+                "command": session.command,
+                "exit_code": session.exit_code,
+                "output": output_tail,
+            })
+
     # ----- Query Methods -----
 
     def get(self, session_id: str) -> Optional[ProcessSession]:
@@ -721,6 +740,7 @@ class ProcessRegistry:
                             "watcher_chat_id": s.watcher_chat_id,
                             "watcher_thread_id": s.watcher_thread_id,
                             "watcher_interval": s.watcher_interval,
+                            "notify_on_complete": s.notify_on_complete,
                         })
             
             # Atomic write to avoid corruption on crash
@@ -771,6 +791,7 @@ class ProcessRegistry:
                     watcher_chat_id=entry.get("watcher_chat_id", ""),
                     watcher_thread_id=entry.get("watcher_thread_id", ""),
                     watcher_interval=entry.get("watcher_interval", 0),
+                    notify_on_complete=entry.get("notify_on_complete", False),
                 )
                 with self._lock:
                     self._running[session.id] = session
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index be565f19..305d0801 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -421,9 +421,11 @@ Do NOT use sed/awk to edit files — use patch instead.
 Do NOT use echo/cat heredoc to create files — use write_file instead.
 Reserve terminal for: builds, installs, git, processes, scripts, network, package managers, and anything that needs a shell.
 
-Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for everything that finishes.
-Background: ONLY for long-running servers, watchers, or processes that never exit. Set background=true to get a session_id, then use process(action="wait") to block until done — it returns instantly on completion, same as foreground. Use process(action="poll") only when you need a progress check without blocking.
-Do NOT use background for scripts, builds, or installs — foreground with a generous timeout is always better (fewer tool calls, instant results).
+Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for short commands.
+Background: Set background=true to get a session_id. Two patterns:
+  (1) Long-lived processes that never exit (servers, watchers).
+  (2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute.
+Use process(action="poll") for progress checks, process(action="wait") to block until done.
 Working directory: Use 'workdir' for per-command cwd.
 PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).
 
@@ -1009,6 +1011,7 @@ def terminal_tool(
     workdir: Optional[str] = None,
     check_interval: Optional[int] = None,
     pty: bool = False,
+    notify_on_complete: bool = False,
 ) -> str:
     """
     Execute a command in the configured terminal environment.
@@ -1022,6 +1025,7 @@ def terminal_tool(
         workdir: Working directory for this command (optional, uses session cwd if not set)
         check_interval: Seconds between auto-checks for background processes (gateway only, min 30)
         pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
+        notify_on_complete: If True and background=True, auto-notify the agent when the process exits
 
     Returns:
         str: JSON string with output, exit_code, and error fields
@@ -1254,6 +1258,32 @@ def terminal_tool(
                         f"configured limit of {max_timeout}s"
                     )
 
+                # Mark for agent notification on completion
+                if notify_on_complete and background:
+                    proc_session.notify_on_complete = True
+                    result_data["notify_on_complete"] = True
+
+                    # In gateway mode, auto-register a fast watcher so the
+                    # gateway can detect completion and trigger a new agent
+                    # turn.  CLI mode uses the completion_queue directly.
+                    _gw_platform = os.getenv("HERMES_SESSION_PLATFORM", "")
+                    if _gw_platform and not check_interval:
+                        _gw_chat_id = os.getenv("HERMES_SESSION_CHAT_ID", "")
+                        _gw_thread_id = os.getenv("HERMES_SESSION_THREAD_ID", "")
+                        proc_session.watcher_platform = _gw_platform
+                        proc_session.watcher_chat_id = _gw_chat_id
+                        proc_session.watcher_thread_id = _gw_thread_id
+                        proc_session.watcher_interval = 5
+                        process_registry.pending_watchers.append({
+                            "session_id": proc_session.id,
+                            "check_interval": 5,
+                            "session_key": session_key,
+                            "platform": _gw_platform,
+                            "chat_id": _gw_chat_id,
+                            "thread_id": _gw_thread_id,
+                            "notify_on_complete": True,
+                        })
+
                 # Register check_interval watcher (gateway picks this up after agent run)
                 if check_interval and background:
                     effective_interval = max(30, check_interval)
@@ -1550,7 +1580,7 @@ TERMINAL_SCHEMA = {
             },
             "background": {
                 "type": "boolean",
-                "description": "ONLY for servers/watchers that never exit. For scripts, builds, installs — use foreground with timeout instead (it returns instantly when done).",
+                "description": "Run the command in the background. Two patterns: (1) Long-lived processes that never exit (servers, watchers). (2) Long-running tasks paired with notify_on_complete=true — you can keep working and get notified when the task finishes. For short commands, prefer foreground with a generous timeout instead.",
                 "default": False
             },
             "timeout": {
@@ -1571,6 +1601,11 @@ TERMINAL_SCHEMA = {
                 "type": "boolean",
                 "description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.",
                 "default": False
+            },
+            "notify_on_complete": {
+                "type": "boolean",
+                "description": "When true (and background=true), you'll be automatically notified when the process finishes — no polling needed. Use this for tasks that take a while (tests, builds, deployments) so you can keep working on other things in the meantime.",
+                "default": False
             }
         },
         "required": ["command"]
@@ -1587,6 +1622,7 @@ def _handle_terminal(args, **kw):
         workdir=args.get("workdir"),
         check_interval=args.get("check_interval"),
         pty=args.get("pty", False),
+        notify_on_complete=args.get("notify_on_complete", False),
     )
 
 

From cafdfd36549538713b0a91aaef42877c2be2845a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 02:49:20 -0700
Subject: [PATCH 066/154] fix: sync bundled skills to default profile when
 updating from a named profile (#5795)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The filter in cmd_update() excluded is_default profiles from the
cross-profile skill sync loop. When running 'hermes update' from a
named profile (e.g. hermes -p coder update), the default profile
(~/.hermes) never received new bundled skills.

Remove the 'not p.is_default' condition so all profiles — including
default — are synced regardless of which profile runs the update.

Reported by olafgeibig.
---
 hermes_cli/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 1a968952..55faf841 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -3566,7 +3566,7 @@ def cmd_update(args):
         try:
             from hermes_cli.profiles import list_profiles, get_active_profile_name, seed_profile_skills
             active = get_active_profile_name()
-            other_profiles = [p for p in list_profiles() if not p.is_default and p.name != active]
+            other_profiles = [p for p in list_profiles() if p.name != active]
             if other_profiles:
                 print()
                 print("→ Syncing bundled skills to other profiles...")

From 8b861b77c1f854a2b7914be1afa52facebfb046f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 03:28:44 -0700
Subject: [PATCH 067/154] =?UTF-8?q?refactor:=20remove=20browser=5Fclose=20?=
 =?UTF-8?q?tool=20=E2=80=94=20auto-cleanup=20handles=20it=20(#5792)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: remove browser_close tool — auto-cleanup handles it

The browser_close tool was called in only 9% of browser sessions (13/144
navigations across 66 sessions), always redundantly — cleanup_browser()
already runs via _cleanup_task_resources() at conversation end, and the
background inactivity reaper catches anything else.

Removing it saves one tool schema slot in every browser-enabled API call.

Also fixes a latent bug: cleanup_browser() now handles Camofox sessions
too (previously only Browserbase). Camofox sessions were never auto-cleaned
per-task because they live in a separate dict from _active_sessions.

Files changed (13):
- tools/browser_tool.py: remove function, schema, registry entry; add
  camofox cleanup to cleanup_browser()
- toolsets.py, model_tools.py, prompt_builder.py, display.py,
  acp_adapter/tools.py: remove browser_close from all tool lists
- tests/: remove browser_close test, update toolset assertion
- docs/skills: remove all browser_close references

* fix: repeat browser_scroll 5x per call for meaningful page movement

Most backends scroll ~100px per call — barely visible on a typical
viewport. Repeating 5x gives ~500px (~half a viewport), making each
scroll tool call actually useful.

Backend-agnostic approach: works across all 7+ browser backends without
needing to configure each one's scroll amount individually. Breaks
early on error for the agent-browser path.

* feat: auto-return compact snapshot from browser_navigate

Every browser session starts with navigate → snapshot. Now navigate
returns the compact accessibility tree snapshot inline, saving one
tool call per browser task.

The snapshot captures the full page DOM (not viewport-limited), so
scroll position doesn't affect it. browser_snapshot remains available
for refreshing after interactions or getting full=true content.

Both Browserbase and Camofox paths auto-snapshot. If the snapshot
fails for any reason, navigation still succeeds — the snapshot is
a bonus, not a requirement.

Schema descriptions updated to guide models: navigate mentions it
returns a snapshot, snapshot mentions it's for refresh/full content.

* refactor: slim cronjob tool schema — consolidate model/provider, drop unused params

Session data (151 calls across 67 sessions) showed several schema
properties were never used by models. Consolidated and cleaned up:

Removed from schema (still work via backend/CLI):
- skill (singular): use skills array instead
- reason: pause-only, unnecessary
- include_disabled: now defaults to true
- base_url: extreme edge case, zero usage
- provider (standalone): merged into model object

Consolidated:
- model + provider → single 'model' object with {model, provider} fields.
  If provider is omitted, the current main provider is pinned at creation
  time so the job stays stable even if the user changes their default.

Kept:
- script: useful data collection feature
- skills array: standard interface for skill loading

Schema shrinks from 14 to 10 properties. All backend functionality
preserved — the Python function signature and handler lambda still
accept every parameter.

* fix: remove mixture_of_agents from core toolsets — opt-in only via hermes tools

MoA was in _HERMES_CORE_TOOLS and composite toolsets (hermes-cli,
hermes-messaging, safe), which meant it appeared in every session
for anyone with OPENROUTER_API_KEY set. The _DEFAULT_OFF_TOOLSETS
gate only works after running 'hermes tools' explicitly.

Now MoA only appears when a user explicitly enables it via
'hermes tools'. The moa toolset definition and check_fn remain
unchanged — it just needs to be opted into.
---
 acp_adapter/tools.py                         |   1 -
 agent/display.py                             |   2 -
 agent/prompt_builder.py                      |   1 -
 cli-config.yaml.example                      |   2 +-
 model_tools.py                               |   2 +-
 skills/dogfood/SKILL.md                      |   3 +-
 tests/gateway/test_api_server_toolset.py     |   2 +-
 tests/tools/test_browser_cleanup.py          |  12 --
 tools/browser_camofox.py                     |  19 +++
 tools/browser_tool.py                        | 123 +++++++++----------
 tools/cronjob_tools.py                       |  86 +++++++------
 toolsets.py                                  |  14 +--
 website/docs/reference/tools-reference.md    |   1 -
 website/docs/reference/toolsets-reference.md |   2 +-
 website/docs/user-guide/features/browser.md  |   8 +-
 15 files changed, 136 insertions(+), 142 deletions(-)

diff --git a/acp_adapter/tools.py b/acp_adapter/tools.py
index 8756aa92..52313220 100644
--- a/acp_adapter/tools.py
+++ b/acp_adapter/tools.py
@@ -39,7 +39,6 @@ TOOL_KIND_MAP: Dict[str, ToolKind] = {
     "browser_scroll": "execute",
     "browser_press": "execute",
     "browser_back": "execute",
-    "browser_close": "execute",
     "browser_get_images": "read",
     # Agent internals
     "delegate_task": "execute",
diff --git a/agent/display.py b/agent/display.py
index 94259fa8..5eac70a4 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -890,8 +890,6 @@ def get_cute_tool_message(
         return _wrap(f"┊ ◀️  back      {dur}")
     if tool_name == "browser_press":
         return _wrap(f"┊ ⌨️  press     {args.get('key', '?')}  {dur}")
-    if tool_name == "browser_close":
-        return _wrap(f"┊ 🚪 close     browser  {dur}")
     if tool_name == "browser_get_images":
         return _wrap(f"┊ 🖼️  images    extracting  {dur}")
     if tool_name == "browser_vision":
diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index 0a2cbe37..d6c296f6 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -744,7 +744,6 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
         "browser_type",
         "browser_scroll",
         "browser_console",
-        "browser_close",
         "browser_press",
         "browser_get_images",
         "browser_vision",
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index e26ee920..73bff981 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -539,7 +539,7 @@ platform_toolsets:
 #   terminal     - terminal, process
 #   file         - read_file, write_file, patch, search
 #   browser      - browser_navigate, browser_snapshot, browser_click, browser_type,
-#                  browser_scroll, browser_back, browser_press, browser_close,
+#                  browser_scroll, browser_back, browser_press,
 #                  browser_get_images, browser_vision  (requires BROWSERBASE_API_KEY)
 #   vision       - vision_analyze  (requires OPENROUTER_API_KEY)
 #   image_gen    - image_generate  (requires FAL_KEY)
diff --git a/model_tools.py b/model_tools.py
index da5ba715..c37007c4 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -211,7 +211,7 @@ _LEGACY_TOOLSET_MAP = {
     "browser_tools": [
         "browser_navigate", "browser_snapshot", "browser_click",
         "browser_type", "browser_scroll", "browser_back",
-        "browser_press", "browser_close", "browser_get_images",
+        "browser_press", "browser_get_images",
         "browser_vision", "browser_console"
     ],
     "cronjob_tools": ["cronjob"],
diff --git a/skills/dogfood/SKILL.md b/skills/dogfood/SKILL.md
index 81a4ebfd..b7ba3663 100644
--- a/skills/dogfood/SKILL.md
+++ b/skills/dogfood/SKILL.md
@@ -16,7 +16,7 @@ This skill guides you through systematic exploratory QA testing of web applicati
 
 ## Prerequisites
 
-- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`, `browser_close`)
+- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`)
 - A target URL and testing scope from the user
 
 ## Inputs
@@ -148,7 +148,6 @@ Save the report to `{output_dir}/report.md`.
 | `browser_press` | Press a keyboard key |
 | `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels |
 | `browser_console` | Get JS console output and errors |
-| `browser_close` | Close the browser session |
 
 ## Tips
 
diff --git a/tests/gateway/test_api_server_toolset.py b/tests/gateway/test_api_server_toolset.py
index 3b4ff254..943d867e 100644
--- a/tests/gateway/test_api_server_toolset.py
+++ b/tests/gateway/test_api_server_toolset.py
@@ -39,7 +39,7 @@ class TestHermesApiServerToolset:
         tools = resolve_toolset("hermes-api-server")
         for tool in ["browser_navigate", "browser_snapshot", "browser_click",
                       "browser_type", "browser_scroll", "browser_back",
-                      "browser_press", "browser_close"]:
+                      "browser_press"]:
             assert tool in tools, f"Missing browser tool: {tool}"
 
     def test_toolset_includes_homeassistant_tools(self):
diff --git a/tests/tools/test_browser_cleanup.py b/tests/tools/test_browser_cleanup.py
index 9dfabe64..df21f3a0 100644
--- a/tests/tools/test_browser_cleanup.py
+++ b/tests/tools/test_browser_cleanup.py
@@ -65,18 +65,6 @@ class TestBrowserCleanup:
         mock_stop.assert_called_once_with("task-1")
         mock_run.assert_called_once_with("task-1", "close", [], timeout=10)
 
-    def test_browser_close_delegates_to_cleanup_browser(self):
-        import json
-
-        browser_tool = self.browser_tool
-        browser_tool._active_sessions["task-2"] = {"session_name": "sess-2"}
-
-        with patch("tools.browser_tool.cleanup_browser") as mock_cleanup:
-            result = json.loads(browser_tool.browser_close("task-2"))
-
-        assert result == {"success": True, "closed": True}
-        mock_cleanup.assert_called_once_with("task-2")
-
     def test_emergency_cleanup_clears_all_tracking_state(self):
         browser_tool = self.browser_tool
         browser_tool._cleanup_done = False
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index c2278f83..91f8fa4f 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -240,6 +240,25 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
                 "Browser is visible via VNC. "
                 "Share this link with the user so they can watch the browser live."
             )
+
+        # Auto-take a compact snapshot so the model can act immediately
+        try:
+            snap_data = _get(
+                f"/tabs/{session['tab_id']}/snapshot",
+                params={"userId": session["user_id"]},
+            )
+            snapshot_text = snap_data.get("snapshot", "")
+            from tools.browser_tool import (
+                SNAPSHOT_SUMMARIZE_THRESHOLD,
+                _truncate_snapshot,
+            )
+            if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
+                snapshot_text = _truncate_snapshot(snapshot_text)
+            result["snapshot"] = snapshot_text
+            result["element_count"] = snap_data.get("refsCount", 0)
+        except Exception:
+            pass  # Navigation succeeded; snapshot is a bonus
+
         return json.dumps(result)
     except requests.HTTPError as e:
         return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index a6043e0b..ba2f81cf 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -518,7 +518,7 @@ atexit.register(_stop_browser_cleanup_thread)
 BROWSER_TOOL_SCHEMAS = [
     {
         "name": "browser_navigate",
-        "description": "Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need to interact with a page (click, fill forms, dynamic content).",
+        "description": "Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need to interact with a page (click, fill forms, dynamic content). Returns a compact page snapshot with interactive elements and ref IDs — no need to call browser_snapshot separately after navigating.",
         "parameters": {
             "type": "object",
             "properties": {
@@ -532,7 +532,7 @@ BROWSER_TOOL_SCHEMAS = [
     },
     {
         "name": "browser_snapshot",
-        "description": "Get a text-based snapshot of the current page's accessibility tree. Returns interactive elements with ref IDs (like @e1, @e2) for browser_click and browser_type. full=false (default): compact view with interactive elements. full=true: complete page content. Snapshots over 8000 chars are truncated or LLM-summarized. Requires browser_navigate first.",
+        "description": "Get a text-based snapshot of the current page's accessibility tree. Returns interactive elements with ref IDs (like @e1, @e2) for browser_click and browser_type. full=false (default): compact view with interactive elements. full=true: complete page content. Snapshots over 8000 chars are truncated or LLM-summarized. Requires browser_navigate first. Note: browser_navigate already returns a compact snapshot — use this to refresh after interactions that change the page, or with full=true for complete content.",
         "parameters": {
             "type": "object",
             "properties": {
@@ -615,15 +615,7 @@ BROWSER_TOOL_SCHEMAS = [
             "required": ["key"]
         }
     },
-    {
-        "name": "browser_close",
-        "description": "Close the browser session and release resources. Call this when done with browser tasks to free up Browserbase session quota.",
-        "parameters": {
-            "type": "object",
-            "properties": {},
-            "required": []
-        }
-    },
+
     {
         "name": "browser_get_images",
         "description": "Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first.",
@@ -1229,7 +1221,22 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
                     "Consider upgrading Browserbase plan for proxy support."
                 )
             response["stealth_features"] = active_features
-        
+
+        # Auto-take a compact snapshot so the model can act immediately
+        # without a separate browser_snapshot call.
+        try:
+            snap_result = _run_browser_command(effective_task_id, "snapshot", ["-c"])
+            if snap_result.get("success"):
+                snap_data = snap_result.get("data", {})
+                snapshot_text = snap_data.get("snapshot", "")
+                refs = snap_data.get("refs", {})
+                if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
+                    snapshot_text = _truncate_snapshot(snapshot_text)
+                response["snapshot"] = snapshot_text
+                response["element_count"] = len(refs) if refs else 0
+        except Exception as e:
+            logger.debug("Auto-snapshot after navigate failed: %s", e)
+
         return json.dumps(response, ensure_ascii=False)
     else:
         return json.dumps({
@@ -1376,31 +1383,40 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
     Returns:
         JSON string with scroll result
     """
-    if _is_camofox_mode():
-        from tools.browser_camofox import camofox_scroll
-        return camofox_scroll(direction, task_id)
-
-    effective_task_id = task_id or "default"
-    
     # Validate direction
     if direction not in ["up", "down"]:
         return json.dumps({
             "success": False,
             "error": f"Invalid direction '{direction}'. Use 'up' or 'down'."
         }, ensure_ascii=False)
-    
-    result = _run_browser_command(effective_task_id, "scroll", [direction])
-    
-    if result.get("success"):
-        return json.dumps({
-            "success": True,
-            "scrolled": direction
-        }, ensure_ascii=False)
-    else:
-        return json.dumps({
-            "success": False,
-            "error": result.get("error", f"Failed to scroll {direction}")
-        }, ensure_ascii=False)
+
+    # Repeat the scroll 5 times to get meaningful page movement.
+    # Most backends scroll ~100px per call, which is barely visible.
+    # 5x gives roughly half a viewport of travel, backend-agnostic.
+    _SCROLL_REPEATS = 5
+
+    if _is_camofox_mode():
+        from tools.browser_camofox import camofox_scroll
+        result = None
+        for _ in range(_SCROLL_REPEATS):
+            result = camofox_scroll(direction, task_id)
+        return result
+
+    effective_task_id = task_id or "default"
+
+    result = None
+    for _ in range(_SCROLL_REPEATS):
+        result = _run_browser_command(effective_task_id, "scroll", [direction])
+        if not result.get("success"):
+            return json.dumps({
+                "success": False,
+                "error": result.get("error", f"Failed to scroll {direction}")
+            }, ensure_ascii=False)
+
+    return json.dumps({
+        "success": True,
+        "scrolled": direction
+    }, ensure_ascii=False)
 
 
 def browser_back(task_id: Optional[str] = None) -> str:
@@ -1463,33 +1479,7 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
         }, ensure_ascii=False)
 
 
-def browser_close(task_id: Optional[str] = None) -> str:
-    """
-    Close the browser session.
 
-    Args:
-        task_id: Task identifier for session isolation
-
-    Returns:
-        JSON string with close result
-    """
-    if _is_camofox_mode():
-        from tools.browser_camofox import camofox_close
-        return camofox_close(task_id)
-
-    effective_task_id = task_id or "default"
-    with _cleanup_lock:
-        had_session = effective_task_id in _active_sessions
-
-    cleanup_browser(effective_task_id)
-
-    response = {
-        "success": True,
-        "closed": True,
-    }
-    if not had_session:
-        response["warning"] = "Session may not have been active"
-    return json.dumps(response, ensure_ascii=False)
 
 
 def browser_console(clear: bool = False, expression: Optional[str] = None, task_id: Optional[str] = None) -> str:
@@ -1942,7 +1932,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
     Clean up browser session for a task.
     
     Called automatically when a task completes or when inactivity timeout is reached.
-    Closes both the agent-browser session and the Browserbase session.
+    Closes both the agent-browser/Browserbase session and Camofox sessions.
     
     Args:
         task_id: Task identifier to clean up
@@ -1950,6 +1940,14 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
     if task_id is None:
         task_id = "default"
     
+    # Also clean up Camofox session if running in Camofox mode
+    if _is_camofox_mode():
+        try:
+            from tools.browser_camofox import camofox_close
+            camofox_close(task_id)
+        except Exception as e:
+            logger.debug("Camofox cleanup for task %s: %s", task_id, e)
+    
     logger.debug("cleanup_browser called for task_id: %s", task_id)
     logger.debug("Active sessions: %s", list(_active_sessions.keys()))
     
@@ -2168,14 +2166,7 @@ registry.register(
     check_fn=check_browser_requirements,
     emoji="⌨️",
 )
-registry.register(
-    name="browser_close",
-    toolset="browser",
-    schema=_BROWSER_SCHEMA_MAP["browser_close"],
-    handler=lambda args, **kw: browser_close(task_id=kw.get("task_id")),
-    check_fn=check_browser_requirements,
-    emoji="🚪",
-)
+
 registry.register(
     name="browser_get_images",
     toolset="browser",
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index eb13240b..8dbcf7c3 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -103,6 +103,32 @@ def _canonical_skills(skill: Optional[str] = None, skills: Optional[Any] = None)
 
 
 
+
+def _resolve_model_override(model_obj: Optional[Dict[str, Any]]) -> tuple:
+    """Resolve a model override object into (provider, model) for job storage.
+
+    If provider is omitted, pins the current main provider from config so the
+    job doesn't drift when the user later changes their default via hermes model.
+
+    Returns (provider_str_or_none, model_str_or_none).
+    """
+    if not model_obj or not isinstance(model_obj, dict):
+        return (None, None)
+    model_name = (model_obj.get("model") or "").strip() or None
+    provider_name = (model_obj.get("provider") or "").strip() or None
+    if model_name and not provider_name:
+        # Pin to the current main provider so the job is stable
+        try:
+            from hermes_cli.config import load_config
+            cfg = load_config()
+            model_cfg = cfg.get("model", {})
+            if isinstance(model_cfg, dict):
+                provider_name = model_cfg.get("provider") or None
+        except Exception:
+            pass  # Best-effort; provider stays None
+    return (provider_name, model_name)
+
+
 def _normalize_optional_job_value(value: Optional[Any], *, strip_trailing_slash: bool = False) -> Optional[str]:
     if value is None:
         return None
@@ -392,14 +418,9 @@ Use action='list' to inspect jobs.
 Use action='update', 'pause', 'resume', 'remove', or 'run' to manage an existing job.
 
 Jobs run in a fresh session with no current-chat context, so prompts must be self-contained.
-If skill or skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
+If skills are provided on create, the future cron run loads those skills in order, then follows the prompt as the task instruction.
 On update, passing skills=[] clears attached skills.
 
-If script is provided on create, the referenced Python script runs before each agent turn.
-Its stdout is injected into the prompt as context. Use this for data collection and change
-detection — the script handles gathering data, the agent analyzes and reports.
-On update, pass script="" to clear an attached script.
-
 NOTE: The agent's final response is auto-delivered to the target. Put the primary
 user-facing content in the final response. Cron jobs run autonomously with no user
 present — they cannot ask questions or request clarification.
@@ -418,7 +439,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
             },
             "prompt": {
                 "type": "string",
-                "description": "For create: the full self-contained prompt. If skill or skills are also provided, this becomes the task instruction paired with those skills."
+                "description": "For create: the full self-contained prompt. If skills are also provided, this becomes the task instruction paired with those skills."
             },
             "schedule": {
                 "type": "string",
@@ -436,39 +457,30 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
                 "type": "string",
                 "description": "Delivery target: origin, local, telegram, discord, slack, whatsapp, signal, matrix, mattermost, homeassistant, dingtalk, feishu, wecom, email, sms, or platform:chat_id or platform:chat_id:thread_id for Telegram topics. Examples: 'origin', 'local', 'telegram', 'telegram:-1001234567890:17585', 'discord:#engineering'"
             },
-            "model": {
-                "type": "string",
-                "description": "Optional per-job model override used when the cron job runs"
-            },
-            "provider": {
-                "type": "string",
-                "description": "Optional per-job provider override used when resolving runtime credentials"
-            },
-            "base_url": {
-                "type": "string",
-                "description": "Optional per-job base URL override paired with provider/model routing"
-            },
-            "include_disabled": {
-                "type": "boolean",
-                "description": "For list: include paused/completed jobs"
-            },
-            "skill": {
-                "type": "string",
-                "description": "Optional single skill name to load before executing the cron prompt"
-            },
             "skills": {
                 "type": "array",
                 "items": {"type": "string"},
-                "description": "Optional ordered list of skills to load before executing the cron prompt. On update, pass an empty array to clear attached skills."
+                "description": "Optional ordered list of skill names to load before executing the cron prompt. On update, pass an empty array to clear attached skills."
             },
-            "reason": {
-                "type": "string",
-                "description": "Optional pause reason"
+            "model": {
+                "type": "object",
+                "description": "Optional per-job model override. If provider is omitted, the current main provider is pinned at creation time so the job stays stable.",
+                "properties": {
+                    "provider": {
+                        "type": "string",
+                        "description": "Provider name (e.g. 'openrouter', 'anthropic'). Omit to use and pin the current provider."
+                    },
+                    "model": {
+                        "type": "string",
+                        "description": "Model name (e.g. 'anthropic/claude-sonnet-4', 'claude-sonnet-4')"
+                    }
+                },
+                "required": ["model"]
             },
             "script": {
                 "type": "string",
                 "description": "Optional path to a Python script that runs before each cron job execution. Its stdout is injected into the prompt as context. Use for data collection and change detection. Relative paths resolve under ~/.hermes/scripts/. On update, pass empty string to clear."
-            }
+            },
         },
         "required": ["action"]
     }
@@ -502,7 +514,7 @@ registry.register(
     name="cronjob",
     toolset="cronjob",
     schema=CRONJOB_SCHEMA,
-    handler=lambda args, **kw: cronjob(
+    handler=lambda args, **kw: (lambda _mo=_resolve_model_override(args.get("model")): cronjob(
         action=args.get("action", ""),
         job_id=args.get("job_id"),
         prompt=args.get("prompt"),
@@ -510,16 +522,16 @@ registry.register(
         name=args.get("name"),
         repeat=args.get("repeat"),
         deliver=args.get("deliver"),
-        include_disabled=args.get("include_disabled", False),
+        include_disabled=args.get("include_disabled", True),
         skill=args.get("skill"),
         skills=args.get("skills"),
-        model=args.get("model"),
-        provider=args.get("provider"),
+        model=_mo[1],
+        provider=_mo[0] or args.get("provider"),
         base_url=args.get("base_url"),
         reason=args.get("reason"),
         script=args.get("script"),
         task_id=kw.get("task_id"),
-    ),
+    ))(),
     check_fn=check_cronjob_requirements,
     emoji="⏰",
 )
diff --git a/toolsets.py b/toolsets.py
index 84c19637..04e43b28 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -37,14 +37,12 @@ _HERMES_CORE_TOOLS = [
     "read_file", "write_file", "patch", "search_files",
     # Vision + image generation
     "vision_analyze", "image_generate",
-    # MoA
-    "mixture_of_agents",
     # Skills
     "skills_list", "skill_view", "skill_manage",
     # Browser automation
     "browser_navigate", "browser_snapshot", "browser_click",
     "browser_type", "browser_scroll", "browser_back",
-    "browser_press", "browser_close", "browser_get_images",
+    "browser_press", "browser_get_images",
     "browser_vision", "browser_console",
     # Text-to-speech
     "text_to_speech",
@@ -116,7 +114,7 @@ TOOLSETS = {
         "tools": [
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
-            "browser_press", "browser_close", "browser_get_images",
+            "browser_press", "browser_get_images",
             "browser_vision", "browser_console", "web_search"
         ],
         "includes": []
@@ -214,7 +212,7 @@ TOOLSETS = {
     
     "safe": {
         "description": "Safe toolkit without terminal access",
-        "tools": ["mixture_of_agents"],
+        "tools": [],
         "includes": ["web", "vision", "image_gen"]
     },
     
@@ -235,7 +233,7 @@ TOOLSETS = {
             "skills_list", "skill_view", "skill_manage",
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
-            "browser_press", "browser_close", "browser_get_images",
+            "browser_press", "browser_get_images",
             "browser_vision", "browser_console",
             "todo", "memory",
             "session_search",
@@ -255,14 +253,12 @@ TOOLSETS = {
             "read_file", "write_file", "patch", "search_files",
             # Vision + image generation
             "vision_analyze", "image_generate",
-            # MoA
-            "mixture_of_agents",
             # Skills
             "skills_list", "skill_view", "skill_manage",
             # Browser automation
             "browser_navigate", "browser_snapshot", "browser_click",
             "browser_type", "browser_scroll", "browser_back",
-            "browser_press", "browser_close", "browser_get_images",
+            "browser_press", "browser_get_images",
             "browser_vision", "browser_console",
             # Planning & memory
             "todo", "memory",
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 5353ca5f..cd798697 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -20,7 +20,6 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 |------|-------------|----------------------|
 | `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
 | `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
-| `browser_close` | Close the browser session and release resources. Call this when done with browser tasks to free up Browserbase session quota. | — |
 | `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
 | `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
 | `browser_navigate` | Navigate to a URL in the browser. Initializes the session and loads the page. Must be called before other browser tools. For simple information retrieval, prefer web_search or web_extract (faster, cheaper). Use browser tools when you need… | — |
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index 19ff00a3..7d566e60 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -52,7 +52,7 @@ Or in-session:
 
 | Toolset | Tools | Purpose |
 |---------|-------|---------|
-| `browser` | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
+| `browser` | `browser_back`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. |
 | `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
 | `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
 | `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 8f9fc24e..0dafec10 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -277,10 +277,6 @@ Check the browser console for any JavaScript errors
 
 Use `clear=True` to clear the console after reading, so subsequent calls only show new messages.
 
-### `browser_close`
-
-Close the browser session and release resources. Call this when done to free up Browserbase session quota.
-
 ## Practical Examples
 
 ### Filling Out a Web Form
@@ -295,7 +291,6 @@ Agent workflow:
 4. browser_type(ref="@e5", text="SecurePass123")
 5. browser_click(ref="@e8")  → clicks "Create Account"
 6. browser_snapshot()  → confirms success
-7. browser_close()
 ```
 
 ### Researching Dynamic Content
@@ -307,7 +302,6 @@ Agent workflow:
 1. browser_navigate("https://github.com/trending")
 2. browser_snapshot(full=true)  → reads trending repo list
 3. Returns formatted results
-4. browser_close()
 ```
 
 ## Session Recording
@@ -349,5 +343,5 @@ If paid features aren't available on your plan, Hermes automatically falls back
 - **Text-based interaction** — relies on accessibility tree, not pixel coordinates
 - **Snapshot size** — large pages may be truncated or LLM-summarized at 8000 characters
 - **Session timeout** — cloud sessions expire based on your provider's plan settings
-- **Cost** — cloud sessions consume provider credits; use `browser_close` when done. Use `/browser connect` for free local browsing.
+- **Cost** — cloud sessions consume provider credits; sessions are automatically cleaned up when the conversation ends or after inactivity. Use `/browser connect` for free local browsing.
 - **No file downloads** — cannot download files from the browser

From b2f477a30b3c05d0f383c543af98496ae8a96070 Mon Sep 17 00:00:00 2001
From: Ben Barclay <ben@nousresearch.com>
Date: Tue, 7 Apr 2026 22:40:22 +1000
Subject: [PATCH 068/154] feat: switch managed browser provider from
 Browserbase to Browser Use (#5750)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: switch managed browser provider from Browserbase to Browser Use

The Nous subscription tool gateway now routes browser automation through
Browser Use instead of Browserbase. This commit:

- Adds managed Nous gateway support to BrowserUseProvider (idempotency
  keys, X-BB-API-Key auth header, external_call_id persistence)
- Removes managed gateway support from BrowserbaseProvider (now
  direct-only via BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID)
- Updates browser_tool.py fallback: prefers Browser Use over Browserbase
- Updates nous_subscription.py: gateway vendor 'browser-use', auto-config
  sets cloud_provider='browser-use' for new subscribers
- Updates tools_config.py: Nous Subscription entry now uses Browser Use
- Updates setup.py, cli.py, status.py, prompt_builder.py display strings
- Updates all affected tests to match new behavior

Browserbase remains fully functional for users with direct API credentials.
The change only affects the managed/subscription path.

* chore: remove redundant Browser Use hint from system prompt

* fix: upgrade Browser Use provider to v3 API

- Base URL: api/v2 -> api/v3 (v2 is legacy)
- Unified all endpoints to use native Browser Use paths:
  - POST /browsers (create session, returns cdpUrl)
  - PATCH /browsers/{id} with {action: stop} (close session)
- Removed managed-mode branching that used Browserbase-style
  /v1/sessions paths — v3 gateway now supports /browsers directly
- Removed unused managed_mode variable in close_session

* fix(browser-use): use X-Browser-Use-API-Key header for managed mode

The managed gateway expects X-Browser-Use-API-Key, not X-BB-API-Key
(which is a Browserbase-specific header). Using the wrong header caused
a 401 AUTH_ERROR on every managed-mode browser session create.

Simplified _headers() to always use X-Browser-Use-API-Key regardless
of direct vs managed mode.

* fix(nous_subscription): browserbase explicit provider is direct-only

Since managed Nous gateway now routes through Browser Use, the
browserbase explicit provider path should not check managed_browser_available
(which resolves against the browser-use gateway). Simplified to direct-only
with managed=False.

* fix(browser-use): port missing improvements from PR #5605

- CDP URL normalization: resolve HTTP discovery URLs to websocket after
  cloud provider create_session() (prevents agent-browser failures)
- Managed session payload: send timeout=5 and proxyCountryCode=us for
  gateway-backed sessions (prevents billing overruns)
- Update prompt builder, browser_close schema, and module docstring to
  replace remaining Browserbase references with Browser Use
- Dynamic /browser status detection via _get_cloud_provider() instead
  of hardcoded env var checks (future-proof for new providers)
- Rename post_setup key from 'browserbase' to 'agent_browser'
- Update setup hint to mention Browser Use alongside Browserbase
- Add tests: CDP normalization, browserbase direct-only guard,
  managed browser-use gateway, direct browserbase fallback

---------

Co-authored-by: rob-maron <132852777+rob-maron@users.noreply.github.com>
---
 agent/prompt_builder.py                       |   4 +-
 cli.py                                        |  17 +-
 hermes_cli/nous_subscription.py               |  31 +--
 hermes_cli/setup.py                           |   4 +-
 hermes_cli/status.py                          |   3 +-
 hermes_cli/tools_config.py                    |  20 +-
 tests/agent/test_prompt_builder.py            |   6 +-
 tests/hermes_cli/test_nous_subscription.py    |  61 +++++-
 .../hermes_cli/test_status_model_provider.py  |   2 +-
 tests/hermes_cli/test_tools_config.py         |   2 +-
 tests/tools/test_browser_cdp_override.py      |  32 +++
 .../test_managed_browserbase_and_modal.py     | 202 ++++++++++--------
 tests/tools/test_managed_tool_gateway.py      |   6 +-
 tools/browser_providers/browser_use.py        | 158 +++++++++++---
 tools/browser_providers/browserbase.py        |  88 ++------
 tools/browser_tool.py                         |  51 +++--
 16 files changed, 429 insertions(+), 258 deletions(-)

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index d6c296f6..df5532e1 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -773,13 +773,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
 
     lines = [
         "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser Use) by default. Modal execution is optional.",
         "Current capability status:",
     ]
     lines.extend(_status_line(feature) for feature in features.items())
     lines.extend(
         [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
+            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
             "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
             "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
             "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
diff --git a/cli.py b/cli.py
index 6f02dc93..a60f699d 100644
--- a/cli.py
+++ b/cli.py
@@ -5037,13 +5037,13 @@ class HermesCLI:
                     pass
                 print()
                 print("🌐 Browser disconnected from live Chrome")
-                print("   Browser tools reverted to default mode (local headless or Browserbase)")
+                print("   Browser tools reverted to default mode (local headless or cloud provider)")
                 print()
 
                 if hasattr(self, '_pending_input'):
                     self._pending_input.put(
                         "[System note: The user has disconnected the browser tools from their live Chrome. "
-                        "Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
+                        "Browser tools are back to default mode (headless local browser or cloud provider).]"
                     )
             else:
                 print()
@@ -5070,10 +5070,17 @@ class HermesCLI:
                     print("   Status: ✓ reachable")
                 except (OSError, Exception):
                     print("   Status: ⚠ not reachable (Chrome may not be running)")
-            elif os.environ.get("BROWSERBASE_API_KEY"):
-                print("🌐 Browser: Browserbase (cloud)")
             else:
-                print("🌐 Browser: local headless Chromium (agent-browser)")
+                try:
+                    from tools.browser_tool import _get_cloud_provider
+                    provider = _get_cloud_provider()
+                except Exception:
+                    provider = None
+
+                if provider is not None:
+                    print(f"🌐 Browser: {provider.provider_name()} (cloud)")
+                else:
+                    print("🌐 Browser: local headless Chromium (agent-browser)")
             print()
             print("   /browser connect      — connect to your live Chrome")
             print("   /browser disconnect   — revert to default")
diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py
index 82152911..fe86ac20 100644
--- a/hermes_cli/nous_subscription.py
+++ b/hermes_cli/nous_subscription.py
@@ -167,20 +167,20 @@ def _resolve_browser_feature_state(
     if browser_provider_explicit:
         current_provider = browser_provider or "local"
         if current_provider == "browserbase":
-            provider_available = managed_browser_available or direct_browserbase
+            available = bool(browser_local_available and direct_browserbase)
+            active = bool(browser_tool_enabled and available)
+            return current_provider, available, active, False
+        if current_provider == "browser-use":
+            provider_available = managed_browser_available or direct_browser_use
             available = bool(browser_local_available and provider_available)
             managed = bool(
                 browser_tool_enabled
                 and browser_local_available
                 and managed_browser_available
-                and not direct_browserbase
+                and not direct_browser_use
             )
             active = bool(browser_tool_enabled and available)
             return current_provider, available, active, managed
-        if current_provider == "browser-use":
-            available = bool(browser_local_available and direct_browser_use)
-            active = bool(browser_tool_enabled and available)
-            return current_provider, available, active, False
         if current_provider == "firecrawl":
             available = bool(browser_local_available and direct_firecrawl)
             active = bool(browser_tool_enabled and available)
@@ -193,16 +193,21 @@ def _resolve_browser_feature_state(
         active = bool(browser_tool_enabled and available)
         return current_provider, available, active, False
 
-    if managed_browser_available or direct_browserbase:
+    if managed_browser_available or direct_browser_use:
         available = bool(browser_local_available)
         managed = bool(
             browser_tool_enabled
             and browser_local_available
             and managed_browser_available
-            and not direct_browserbase
+            and not direct_browser_use
         )
         active = bool(browser_tool_enabled and available)
-        return "browserbase", available, active, managed
+        return "browser-use", available, active, managed
+
+    if direct_browserbase:
+        available = bool(browser_local_available)
+        active = bool(browser_tool_enabled and available)
+        return "browserbase", available, active, False
 
     available = bool(browser_local_available)
     active = bool(browser_tool_enabled and available)
@@ -266,7 +271,7 @@ def get_nous_subscription_features(
     managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
     managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
     managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
-    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase")
+    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
     managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
     modal_state = resolve_modal_backend_state(
         modal_mode,
@@ -512,10 +517,10 @@ def apply_nous_managed_defaults(
         changed.add("tts")
 
     if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
-        get_env_value("BROWSERBASE_API_KEY")
-        or get_env_value("BROWSER_USE_API_KEY")
+        get_env_value("BROWSER_USE_API_KEY")
+        or get_env_value("BROWSERBASE_API_KEY")
     ):
-        browser_cfg["cloud_provider"] = "browserbase"
+        browser_cfg["cloud_provider"] = "browser-use"
         changed.add("browser")
 
     if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 5abde51b..29cb6472 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -660,14 +660,14 @@ def _print_setup_summary(config: dict, hermes_home):
     # Browser tools (local Chromium, Camofox, Browserbase, Browser Use, or Firecrawl)
     browser_provider = subscription_features.browser.current_provider
     if subscription_features.browser.managed_by_nous:
-        tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
+        tool_status.append(("Browser Automation (Nous Browser Use)", True, None))
     elif subscription_features.browser.available:
         label = "Browser Automation"
         if browser_provider:
             label = f"Browser Automation ({browser_provider})"
         tool_status.append((label, True, None))
     else:
-        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase"
+        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browser Use or Browserbase"
         if browser_provider == "Browserbase":
             missing_browser_hint = (
                 "npm install -g agent-browser and set "
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 67b15bab..77a3e0ef 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -123,7 +123,8 @@ def show_status(args):
         "MiniMax-CN": "MINIMAX_CN_API_KEY",
         "Firecrawl": "FIRECRAWL_API_KEY",
         "Tavily": "TAVILY_API_KEY",
-        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — local browser works without this
+        "Browser Use": "BROWSER_USE_API_KEY",  # Optional — local browser works without this
+        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — direct credentials only
         "FAL": "FAL_KEY",
         "Tinker": "TINKER_API_KEY",
         "WandB": "WANDB_API_KEY",
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 8a28e224..804a7a4f 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -280,21 +280,21 @@ TOOL_CATEGORIES = {
         "icon": "🌐",
         "providers": [
             {
-                "name": "Nous Subscription (Browserbase cloud)",
-                "tag": "Managed Browserbase billed to your subscription",
+                "name": "Nous Subscription (Browser Use cloud)",
+                "tag": "Managed Browser Use billed to your subscription",
                 "env_vars": [],
-                "browser_provider": "browserbase",
+                "browser_provider": "browser-use",
                 "requires_nous_auth": True,
                 "managed_nous_feature": "browser",
-                "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
-                "post_setup": "browserbase",
+                "override_env_vars": ["BROWSER_USE_API_KEY"],
+                "post_setup": "agent_browser",
             },
             {
                 "name": "Local Browser",
                 "tag": "Free headless Chromium (no API key needed)",
                 "env_vars": [],
                 "browser_provider": "local",
-                "post_setup": "browserbase",  # Same npm install for agent-browser
+                "post_setup": "agent_browser",
             },
             {
                 "name": "Browserbase",
@@ -304,7 +304,7 @@ TOOL_CATEGORIES = {
                     {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
                 ],
                 "browser_provider": "browserbase",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
             },
             {
                 "name": "Browser Use",
@@ -313,7 +313,7 @@ TOOL_CATEGORIES = {
                     {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
                 ],
                 "browser_provider": "browser-use",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
             },
             {
                 "name": "Firecrawl",
@@ -322,7 +322,7 @@ TOOL_CATEGORIES = {
                     {"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
                 ],
                 "browser_provider": "firecrawl",
-                "post_setup": "browserbase",
+                "post_setup": "agent_browser",
             },
             {
                 "name": "Camofox",
@@ -381,7 +381,7 @@ TOOLSET_ENV_REQUIREMENTS = {
 def _run_post_setup(post_setup_key: str):
     """Run post-setup hooks for tools that need extra installation steps."""
     import shutil
-    if post_setup_key == "browserbase":
+    if post_setup_key in ("agent_browser", "browserbase"):
         node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
         if not node_modules.exists() and shutil.which("npm"):
             _print_info("    Installing Node.js dependencies for browser tools...")
diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py
index 17e3523c..00e13d26 100644
--- a/tests/agent/test_prompt_builder.py
+++ b/tests/agent/test_prompt_builder.py
@@ -423,7 +423,7 @@ class TestBuildNousSubscriptionPrompt:
                     "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
                     "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
                     "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
-                    "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"),
+                    "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                     "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
                 },
             ),
@@ -431,9 +431,9 @@ class TestBuildNousSubscriptionPrompt:
 
         prompt = build_nous_subscription_prompt({"web_search", "browser_navigate"})
 
-        assert "Browserbase" in prompt
+        assert "Browser Use" in prompt
         assert "Modal execution is optional" in prompt
-        assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys" in prompt
+        assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt
 
     def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch):
         monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")
diff --git a/tests/hermes_cli/test_nous_subscription.py b/tests/hermes_cli/test_nous_subscription.py
index 69428ab0..c0427697 100644
--- a/tests/hermes_cli/test_nous_subscription.py
+++ b/tests/hermes_cli/test_nous_subscription.py
@@ -44,7 +44,62 @@ def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monke
     assert features.modal.direct_override is False
 
 
-def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase(monkeypatch):
+def test_get_nous_subscription_features_marks_browser_use_as_managed_when_gateway_ready(monkeypatch):
+    monkeypatch.setattr(ns, "get_env_value", lambda name: "")
+    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
+    monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
+    monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
+    monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
+    monkeypatch.setattr(
+        ns,
+        "is_managed_tool_gateway_ready",
+        lambda vendor: vendor == "browser-use",
+    )
+
+    features = ns.get_nous_subscription_features(
+        {"browser": {"cloud_provider": "browser-use"}}
+    )
+
+    assert features.browser.available is True
+    assert features.browser.active is True
+    assert features.browser.managed_by_nous is True
+    assert features.browser.direct_override is False
+    assert features.browser.current_provider == "Browser Use"
+
+
+def test_get_nous_subscription_features_uses_direct_browserbase_when_no_managed_gateway(monkeypatch):
+    """When direct Browserbase keys are set and no managed gateway is available,
+    the unconfigured fallback should pick Browserbase as a direct provider."""
+    env = {
+        "BROWSERBASE_API_KEY": "bb-key",
+        "BROWSERBASE_PROJECT_ID": "bb-project",
+    }
+
+    monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
+    monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
+    monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
+    monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
+    monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
+    monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
+    monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
+    monkeypatch.setattr(
+        ns,
+        "is_managed_tool_gateway_ready",
+        lambda vendor: False,  # No managed gateway available
+    )
+
+    features = ns.get_nous_subscription_features({})
+
+    assert features.browser.available is True
+    assert features.browser.active is True
+    assert features.browser.managed_by_nous is False
+    assert features.browser.direct_override is True
+    assert features.browser.current_provider == "Browserbase"
+
+
+def test_get_nous_subscription_features_prefers_camofox_over_managed_browser_use(monkeypatch):
     env = {"CAMOFOX_URL": "http://localhost:9377"}
 
     monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
@@ -57,11 +112,11 @@ def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase
     monkeypatch.setattr(
         ns,
         "is_managed_tool_gateway_ready",
-        lambda vendor: vendor == "browserbase",
+        lambda vendor: vendor == "browser-use",
     )
 
     features = ns.get_nous_subscription_features(
-        {"browser": {"cloud_provider": "browserbase"}}
+        {"browser": {"cloud_provider": "browser-use"}}
     )
 
     assert features.browser.available is True
diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py
index 1e6531d3..04221d88 100644
--- a/tests/hermes_cli/test_status_model_provider.py
+++ b/tests/hermes_cli/test_status_model_provider.py
@@ -88,7 +88,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
                 "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
                 "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
                 "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
-                "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"),
+                "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
                 "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
             },
         ),
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index 946ba77f..b02b3c1f 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -330,7 +330,7 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
 
     assert config["web"]["backend"] == "firecrawl"
     assert config["tts"]["provider"] == "openai"
-    assert config["browser"]["cloud_provider"] == "browserbase"
+    assert config["browser"]["cloud_provider"] == "browser-use"
     assert configured == []
 
 # ── Platform / toolset consistency ────────────────────────────────────────────
diff --git a/tests/tools/test_browser_cdp_override.py b/tests/tools/test_browser_cdp_override.py
index a29971fa..aa388773 100644
--- a/tests/tools/test_browser_cdp_override.py
+++ b/tests/tools/test_browser_cdp_override.py
@@ -45,3 +45,35 @@ class TestResolveCdpOverride:
 
         with patch("tools.browser_tool.requests.get", side_effect=RuntimeError("boom")):
             assert _resolve_cdp_override(HTTP_URL) == HTTP_URL
+
+    def test_normalizes_provider_returned_http_cdp_url_when_creating_session(self, monkeypatch):
+        import tools.browser_tool as browser_tool
+
+        provider = Mock()
+        provider.create_session.return_value = {
+            "session_name": "cloud-session",
+            "bb_session_id": "bu_123",
+            "cdp_url": "https://cdp.browser-use.example/session",
+            "features": {"browser_use": True},
+        }
+
+        response = Mock()
+        response.raise_for_status.return_value = None
+        response.json.return_value = {"webSocketDebuggerUrl": WS_URL}
+
+        monkeypatch.setattr(browser_tool, "_active_sessions", {})
+        monkeypatch.setattr(browser_tool, "_session_last_activity", {})
+        monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
+        monkeypatch.setattr(browser_tool, "_update_session_activity", lambda task_id: None)
+        monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "")
+        monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
+
+        with patch("tools.browser_tool.requests.get", return_value=response) as mock_get:
+            session_info = browser_tool._get_session_info("task-browser-use")
+
+        assert session_info["cdp_url"] == WS_URL
+        provider.create_session.assert_called_once_with("task-browser-use")
+        mock_get.assert_called_once_with(
+            "https://cdp.browser-use.example/session/json/version",
+            timeout=10,
+        )
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index 3c8bb121..d07dcb36 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -113,16 +113,15 @@ def _install_fake_tools_package():
     sys.modules["tools.environments.managed_modal"] = types.SimpleNamespace(ManagedModalEnvironment=_DummyEnvironment)
 
 
-def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path):
+def test_browser_use_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path):
     _install_fake_tools_package()
     (tmp_path / "config.yaml").write_text("browser:\n  cloud_provider: local\n", encoding="utf-8")
     env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
+    env.pop("BROWSER_USE_API_KEY", None)
     env.update({
         "HERMES_HOME": str(tmp_path),
         "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
     })
 
     with patch.dict(os.environ, env, clear=True):
@@ -135,7 +134,7 @@ def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_i
     assert provider is None
 
 
-def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
+def test_browserbase_does_not_use_gateway_only_configuration():
     _install_fake_tools_package()
     env = os.environ.copy()
     env.pop("BROWSERBASE_API_KEY", None)
@@ -145,104 +144,124 @@ def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_
         "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
     })
 
-    class _Response:
-        status_code = 200
-        ok = True
-        text = ""
-        headers = {"x-external-call-id": "call-browserbase-1"}
-
-        def json(self):
-            return {
-                "id": "bb_local_session_1",
-                "connectUrl": "wss://connect.browserbase.example/session",
-            }
-
-    with patch.dict(os.environ, env, clear=True):
-        browserbase_module = _load_tool_module(
-            "tools.browser_providers.browserbase",
-            "browser_providers/browserbase.py",
-        )
-
-        with patch.object(browserbase_module.requests, "post", return_value=_Response()) as post:
-            provider = browserbase_module.BrowserbaseProvider()
-            session = provider.create_session("task-browserbase-managed")
-
-    sent_headers = post.call_args.kwargs["headers"]
-    assert sent_headers["X-BB-API-Key"] == "nous-token"
-    assert sent_headers["X-Idempotency-Key"].startswith("browserbase-session-create:")
-    assert session["external_call_id"] == "call-browserbase-1"
-
-
-def test_browserbase_managed_gateway_reuses_pending_idempotency_key_after_timeout():
-    _install_fake_tools_package()
-    env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
-    env.update({
-        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
-    })
-
-    class _Response:
-        status_code = 200
-        ok = True
-        text = ""
-        headers = {"x-external-call-id": "call-browserbase-2"}
-
-        def json(self):
-            return {
-                "id": "bb_local_session_2",
-                "connectUrl": "wss://connect.browserbase.example/session2",
-            }
-
     with patch.dict(os.environ, env, clear=True):
         browserbase_module = _load_tool_module(
             "tools.browser_providers.browserbase",
             "browser_providers/browserbase.py",
         )
         provider = browserbase_module.BrowserbaseProvider()
-        timeout = browserbase_module.requests.Timeout("timed out")
+
+    assert provider.is_configured() is False
+
+
+def test_browser_use_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
+    _install_fake_tools_package()
+    env = os.environ.copy()
+    env.pop("BROWSER_USE_API_KEY", None)
+    env.update({
+        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
+    })
+
+    class _Response:
+        status_code = 200
+        ok = True
+        text = ""
+        headers = {"x-external-call-id": "call-browser-use-1"}
+
+        def json(self):
+            return {
+                "id": "bu_local_session_1",
+                "connectUrl": "wss://connect.browser-use.example/session",
+            }
+
+    with patch.dict(os.environ, env, clear=True):
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
+        )
+
+        with patch.object(browser_use_module.requests, "post", return_value=_Response()) as post:
+            provider = browser_use_module.BrowserUseProvider()
+            session = provider.create_session("task-browser-use-managed")
+
+    sent_headers = post.call_args.kwargs["headers"]
+    assert sent_headers["X-Browser-Use-API-Key"] == "nous-token"
+    assert sent_headers["X-Idempotency-Key"].startswith("browser-use-session-create:")
+    sent_payload = post.call_args.kwargs["json"]
+    assert sent_payload["timeout"] == 5
+    assert sent_payload["proxyCountryCode"] == "us"
+    assert session["external_call_id"] == "call-browser-use-1"
+
+
+def test_browser_use_managed_gateway_reuses_pending_idempotency_key_after_timeout():
+    _install_fake_tools_package()
+    env = os.environ.copy()
+    env.pop("BROWSER_USE_API_KEY", None)
+    env.update({
+        "TOOL_GATEWAY_USER_TOKEN": "nous-token",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
+    })
+
+    class _Response:
+        status_code = 200
+        ok = True
+        text = ""
+        headers = {"x-external-call-id": "call-browser-use-2"}
+
+        def json(self):
+            return {
+                "id": "bu_local_session_2",
+                "connectUrl": "wss://connect.browser-use.example/session2",
+            }
+
+    with patch.dict(os.environ, env, clear=True):
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
+        )
+        provider = browser_use_module.BrowserUseProvider()
+        timeout = browser_use_module.requests.Timeout("timed out")
 
         with patch.object(
-            browserbase_module.requests,
+            browser_use_module.requests,
             "post",
             side_effect=[timeout, _Response()],
         ) as post:
             try:
-                provider.create_session("task-browserbase-timeout")
-            except browserbase_module.requests.Timeout:
+                provider.create_session("task-browser-use-timeout")
+            except browser_use_module.requests.Timeout:
                 pass
             else:
-                raise AssertionError("Expected Browserbase create_session to propagate timeout")
+                raise AssertionError("Expected Browser Use create_session to propagate timeout")
 
-            provider.create_session("task-browserbase-timeout")
+            provider.create_session("task-browser-use-timeout")
 
     first_headers = post.call_args_list[0].kwargs["headers"]
     second_headers = post.call_args_list[1].kwargs["headers"]
     assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"]
 
 
-def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts():
+def test_browser_use_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts():
     _install_fake_tools_package()
     env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
+    env.pop("BROWSER_USE_API_KEY", None)
     env.update({
         "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
     })
 
     class _ConflictResponse:
         status_code = 409
         ok = False
-        text = '{"error":{"code":"CONFLICT","message":"Managed Browserbase session creation is already in progress for this idempotency key"}}'
+        text = '{"error":{"code":"CONFLICT","message":"Managed Browser Use session creation is already in progress for this idempotency key"}}'
         headers = {}
 
         def json(self):
             return {
                 "error": {
                     "code": "CONFLICT",
-                    "message": "Managed Browserbase session creation is already in progress for this idempotency key",
+                    "message": "Managed Browser Use session creation is already in progress for this idempotency key",
                 }
             }
 
@@ -250,72 +269,71 @@ def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_pr
         status_code = 200
         ok = True
         text = ""
-        headers = {"x-external-call-id": "call-browserbase-4"}
+        headers = {"x-external-call-id": "call-browser-use-4"}
 
         def json(self):
             return {
-                "id": "bb_local_session_4",
-                "connectUrl": "wss://connect.browserbase.example/session4",
+                "id": "bu_local_session_4",
+                "connectUrl": "wss://connect.browser-use.example/session4",
             }
 
     with patch.dict(os.environ, env, clear=True):
-        browserbase_module = _load_tool_module(
-            "tools.browser_providers.browserbase",
-            "browser_providers/browserbase.py",
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
         )
-        provider = browserbase_module.BrowserbaseProvider()
+        provider = browser_use_module.BrowserUseProvider()
 
         with patch.object(
-            browserbase_module.requests,
+            browser_use_module.requests,
             "post",
             side_effect=[_ConflictResponse(), _SuccessResponse()],
         ) as post:
             try:
-                provider.create_session("task-browserbase-conflict")
+                provider.create_session("task-browser-use-conflict")
             except RuntimeError:
                 pass
             else:
-                raise AssertionError("Expected Browserbase create_session to propagate the in-progress conflict")
+                raise AssertionError("Expected Browser Use create_session to propagate the in-progress conflict")
 
-            provider.create_session("task-browserbase-conflict")
+            provider.create_session("task-browser-use-conflict")
 
     first_headers = post.call_args_list[0].kwargs["headers"]
     second_headers = post.call_args_list[1].kwargs["headers"]
     assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"]
 
 
-def test_browserbase_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success():
+def test_browser_use_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success():
     _install_fake_tools_package()
     env = os.environ.copy()
-    env.pop("BROWSERBASE_API_KEY", None)
-    env.pop("BROWSERBASE_PROJECT_ID", None)
+    env.pop("BROWSER_USE_API_KEY", None)
     env.update({
         "TOOL_GATEWAY_USER_TOKEN": "nous-token",
-        "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
+        "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
     })
 
     class _Response:
         status_code = 200
         ok = True
         text = ""
-        headers = {"x-external-call-id": "call-browserbase-3"}
+        headers = {"x-external-call-id": "call-browser-use-3"}
 
         def json(self):
             return {
-                "id": "bb_local_session_3",
-                "connectUrl": "wss://connect.browserbase.example/session3",
+                "id": "bu_local_session_3",
+                "connectUrl": "wss://connect.browser-use.example/session3",
             }
 
     with patch.dict(os.environ, env, clear=True):
-        browserbase_module = _load_tool_module(
-            "tools.browser_providers.browserbase",
-            "browser_providers/browserbase.py",
+        browser_use_module = _load_tool_module(
+            "tools.browser_providers.browser_use",
+            "browser_providers/browser_use.py",
         )
-        provider = browserbase_module.BrowserbaseProvider()
+        provider = browser_use_module.BrowserUseProvider()
 
-        with patch.object(browserbase_module.requests, "post", side_effect=[_Response(), _Response()]) as post:
-            provider.create_session("task-browserbase-new")
-            provider.create_session("task-browserbase-new")
+        with patch.object(browser_use_module.requests, "post", side_effect=[_Response(), _Response()]) as post:
+            provider.create_session("task-browser-use-new")
+            provider.create_session("task-browser-use-new")
 
     first_headers = post.call_args_list[0].kwargs["headers"]
     second_headers = post.call_args_list[1].kwargs["headers"]
diff --git a/tests/tools/test_managed_tool_gateway.py b/tests/tools/test_managed_tool_gateway.py
index 39b9125e..f854732b 100644
--- a/tests/tools/test_managed_tool_gateway.py
+++ b/tests/tools/test_managed_tool_gateway.py
@@ -40,17 +40,17 @@ def test_resolve_managed_tool_gateway_uses_vendor_specific_override():
         os.environ,
         {
             "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1",
-            "BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/",
+            "BROWSER_USE_GATEWAY_URL": "http://browser-use-gateway.localhost:3009/",
         },
         clear=False,
     ):
         result = resolve_managed_tool_gateway(
-            "browserbase",
+            "browser-use",
             token_reader=lambda: "nous-token",
         )
 
     assert result is not None
-    assert result.gateway_origin == "http://browserbase-gateway.localhost:3009"
+    assert result.gateway_origin == "http://browser-use-gateway.localhost:3009"
 
 
 def test_resolve_managed_tool_gateway_is_inactive_without_nous_token():
diff --git a/tools/browser_providers/browser_use.py b/tools/browser_providers/browser_use.py
index 48a61840..0f12dc44 100644
--- a/tools/browser_providers/browser_use.py
+++ b/tools/browser_providers/browser_use.py
@@ -2,16 +2,62 @@
 
 import logging
 import os
+import threading
 import uuid
-from typing import Dict
+from typing import Any, Dict, Optional
 
 import requests
 
 from tools.browser_providers.base import CloudBrowserProvider
+from tools.managed_tool_gateway import resolve_managed_tool_gateway
+from tools.tool_backend_helpers import managed_nous_tools_enabled
 
 logger = logging.getLogger(__name__)
+_pending_create_keys: Dict[str, str] = {}
+_pending_create_keys_lock = threading.Lock()
 
-_BASE_URL = "https://api.browser-use.com/api/v2"
+_BASE_URL = "https://api.browser-use.com/api/v3"
+_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
+_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
+
+
+def _get_or_create_pending_create_key(task_id: str) -> str:
+    with _pending_create_keys_lock:
+        existing = _pending_create_keys.get(task_id)
+        if existing:
+            return existing
+
+        created = f"browser-use-session-create:{uuid.uuid4().hex}"
+        _pending_create_keys[task_id] = created
+        return created
+
+
+def _clear_pending_create_key(task_id: str) -> None:
+    with _pending_create_keys_lock:
+        _pending_create_keys.pop(task_id, None)
+
+
+def _should_preserve_pending_create_key(response: requests.Response) -> bool:
+    if response.status_code >= 500:
+        return True
+
+    if response.status_code != 409:
+        return False
+
+    try:
+        payload = response.json()
+    except Exception:
+        return False
+
+    if not isinstance(payload, dict):
+        return False
+
+    error = payload.get("error")
+    if not isinstance(error, dict):
+        return False
+
+    message = str(error.get("message") or "").lower()
+    return "already in progress" in message
 
 
 class BrowserUseProvider(CloudBrowserProvider):
@@ -21,55 +67,120 @@ class BrowserUseProvider(CloudBrowserProvider):
         return "Browser Use"
 
     def is_configured(self) -> bool:
-        return bool(os.environ.get("BROWSER_USE_API_KEY"))
+        return self._get_config_or_none() is not None
+
+    # ------------------------------------------------------------------
+    # Config resolution (direct API key OR managed Nous gateway)
+    # ------------------------------------------------------------------
+
+    def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
+        api_key = os.environ.get("BROWSER_USE_API_KEY")
+        if api_key:
+            return {
+                "api_key": api_key,
+                "base_url": _BASE_URL,
+                "managed_mode": False,
+            }
+
+        managed = resolve_managed_tool_gateway("browser-use")
+        if managed is None:
+            return None
+
+        return {
+            "api_key": managed.nous_user_token,
+            "base_url": managed.gateway_origin.rstrip("/"),
+            "managed_mode": True,
+        }
+
+    def _get_config(self) -> Dict[str, Any]:
+        config = self._get_config_or_none()
+        if config is None:
+            message = (
+                "Browser Use requires a direct BROWSER_USE_API_KEY credential."
+            )
+            if managed_nous_tools_enabled():
+                message = (
+                    "Browser Use requires either a direct BROWSER_USE_API_KEY "
+                    "credential or a managed Browser Use gateway configuration."
+                )
+            raise ValueError(message)
+        return config
 
     # ------------------------------------------------------------------
     # Session lifecycle
     # ------------------------------------------------------------------
 
-    def _headers(self) -> Dict[str, str]:
-        api_key = os.environ.get("BROWSER_USE_API_KEY")
-        if not api_key:
-            raise ValueError(
-                "BROWSER_USE_API_KEY environment variable is required. "
-                "Get your key at https://browser-use.com"
-            )
-        return {
+    def _headers(self, config: Dict[str, Any]) -> Dict[str, str]:
+        headers = {
             "Content-Type": "application/json",
-            "X-Browser-Use-API-Key": api_key,
+            "X-Browser-Use-API-Key": config["api_key"],
         }
+        return headers
 
     def create_session(self, task_id: str) -> Dict[str, object]:
+        config = self._get_config()
+        managed_mode = bool(config.get("managed_mode"))
+
+        headers = self._headers(config)
+        if managed_mode:
+            headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
+
+        # Keep gateway-backed sessions short so billing authorization does not
+        # default to a long Browser-Use timeout when Hermes only needs a task-
+        # scoped ephemeral browser.
+        payload = (
+            {
+                "timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
+                "proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
+            }
+            if managed_mode
+            else {}
+        )
+
         response = requests.post(
-            f"{_BASE_URL}/browsers",
-            headers=self._headers(),
-            json={},
+            f"{config['base_url']}/browsers",
+            headers=headers,
+            json=payload,
             timeout=30,
         )
 
         if not response.ok:
+            if managed_mode and not _should_preserve_pending_create_key(response):
+                _clear_pending_create_key(task_id)
             raise RuntimeError(
                 f"Failed to create Browser Use session: "
                 f"{response.status_code} {response.text}"
             )
 
         session_data = response.json()
+        if managed_mode:
+            _clear_pending_create_key(task_id)
         session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
+        external_call_id = response.headers.get("x-external-call-id") if managed_mode else None
 
         logger.info("Created Browser Use session %s", session_name)
 
+        cdp_url = session_data.get("cdpUrl") or session_data.get("connectUrl") or ""
+
         return {
             "session_name": session_name,
             "bb_session_id": session_data["id"],
-            "cdp_url": session_data["cdpUrl"],
+            "cdp_url": cdp_url,
             "features": {"browser_use": True},
+            "external_call_id": external_call_id,
         }
 
     def close_session(self, session_id: str) -> bool:
+        try:
+            config = self._get_config()
+        except ValueError:
+            logger.warning("Cannot close Browser Use session %s — missing credentials", session_id)
+            return False
+
         try:
             response = requests.patch(
-                f"{_BASE_URL}/browsers/{session_id}",
-                headers=self._headers(),
+                f"{config['base_url']}/browsers/{session_id}",
+                headers=self._headers(config),
                 json={"action": "stop"},
                 timeout=10,
             )
@@ -89,17 +200,14 @@ class BrowserUseProvider(CloudBrowserProvider):
             return False
 
     def emergency_cleanup(self, session_id: str) -> None:
-        api_key = os.environ.get("BROWSER_USE_API_KEY")
-        if not api_key:
+        config = self._get_config_or_none()
+        if config is None:
             logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id)
             return
         try:
             requests.patch(
-                f"{_BASE_URL}/browsers/{session_id}",
-                headers={
-                    "Content-Type": "application/json",
-                    "X-Browser-Use-API-Key": api_key,
-                },
+                f"{config['base_url']}/browsers/{session_id}",
+                headers=self._headers(config),
                 json={"action": "stop"},
                 timeout=5,
             )
diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py
index 5c580c3f..338ebf89 100644
--- a/tools/browser_providers/browserbase.py
+++ b/tools/browser_providers/browserbase.py
@@ -1,63 +1,24 @@
-"""Browserbase cloud browser provider."""
+"""Browserbase cloud browser provider (direct credentials only)."""
 
 import logging
 import os
-import threading
 import uuid
 from typing import Any, Dict, Optional
 
 import requests
 
 from tools.browser_providers.base import CloudBrowserProvider
-from tools.managed_tool_gateway import resolve_managed_tool_gateway
-from tools.tool_backend_helpers import managed_nous_tools_enabled
 
 logger = logging.getLogger(__name__)
-_pending_create_keys: Dict[str, str] = {}
-_pending_create_keys_lock = threading.Lock()
-
-
-def _get_or_create_pending_create_key(task_id: str) -> str:
-    with _pending_create_keys_lock:
-        existing = _pending_create_keys.get(task_id)
-        if existing:
-            return existing
-
-        created = f"browserbase-session-create:{uuid.uuid4().hex}"
-        _pending_create_keys[task_id] = created
-        return created
-
-
-def _clear_pending_create_key(task_id: str) -> None:
-    with _pending_create_keys_lock:
-        _pending_create_keys.pop(task_id, None)
-
-
-def _should_preserve_pending_create_key(response: requests.Response) -> bool:
-    if response.status_code >= 500:
-        return True
-
-    if response.status_code != 409:
-        return False
-
-    try:
-        payload = response.json()
-    except Exception:
-        return False
-
-    if not isinstance(payload, dict):
-        return False
-
-    error = payload.get("error")
-    if not isinstance(error, dict):
-        return False
-
-    message = str(error.get("message") or "").lower()
-    return "already in progress" in message
 
 
 class BrowserbaseProvider(CloudBrowserProvider):
-    """Browserbase (https://browserbase.com) cloud browser backend."""
+    """Browserbase (https://browserbase.com) cloud browser backend.
+
+    This provider requires direct BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID
+    credentials.  Managed Nous gateway support has been removed — the Nous
+    subscription now routes through Browser Use instead.
+    """
 
     def provider_name(self) -> str:
         return "Browserbase"
@@ -77,37 +38,20 @@ class BrowserbaseProvider(CloudBrowserProvider):
                 "api_key": api_key,
                 "project_id": project_id,
                 "base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"),
-                "managed_mode": False,
             }
-
-        managed = resolve_managed_tool_gateway("browserbase")
-        if managed is None:
-            return None
-
-        return {
-            "api_key": managed.nous_user_token,
-            "project_id": "managed",
-            "base_url": managed.gateway_origin.rstrip("/"),
-            "managed_mode": True,
-        }
+        return None
 
     def _get_config(self) -> Dict[str, Any]:
         config = self._get_config_or_none()
         if config is None:
-            message = (
-                "Browserbase requires direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials."
+            raise ValueError(
+                "Browserbase requires BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID "
+                "environment variables."
             )
-            if managed_nous_tools_enabled():
-                message = (
-                    "Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID "
-                    "credentials or a managed Browserbase gateway configuration."
-                )
-            raise ValueError(message)
         return config
 
     def create_session(self, task_id: str) -> Dict[str, object]:
         config = self._get_config()
-        managed_mode = bool(config.get("managed_mode"))
 
         # Optional env-var knobs
         enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
@@ -147,8 +91,6 @@ class BrowserbaseProvider(CloudBrowserProvider):
             "Content-Type": "application/json",
             "X-BB-API-Key": config["api_key"],
         }
-        if managed_mode:
-            headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
 
         response = requests.post(
             f"{config['base_url']}/v1/sessions",
@@ -161,7 +103,7 @@ class BrowserbaseProvider(CloudBrowserProvider):
         keepalive_fallback = False
 
         # Handle 402 — paid features unavailable
-        if response.status_code == 402 and not managed_mode:
+        if response.status_code == 402:
             if enable_keep_alive:
                 keepalive_fallback = True
                 logger.warning(
@@ -191,18 +133,13 @@ class BrowserbaseProvider(CloudBrowserProvider):
                 )
 
         if not response.ok:
-            if managed_mode and not _should_preserve_pending_create_key(response):
-                _clear_pending_create_key(task_id)
             raise RuntimeError(
                 f"Failed to create Browserbase session: "
                 f"{response.status_code} {response.text}"
             )
 
         session_data = response.json()
-        if managed_mode:
-            _clear_pending_create_key(task_id)
         session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
-        external_call_id = response.headers.get("x-external-call-id") if managed_mode else None
 
         if enable_proxies and not proxies_fallback:
             features_enabled["proxies"] = True
@@ -221,7 +158,6 @@ class BrowserbaseProvider(CloudBrowserProvider):
             "bb_session_id": session_data["id"],
             "cdp_url": session_data["connectUrl"],
             "features": features_enabled,
-            "external_call_id": external_call_id,
         }
 
     def close_session(self, session_id: str) -> bool:
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index ba2f81cf..faa872a9 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -3,10 +3,10 @@
 Browser Tool Module
 
 This module provides browser automation tools using agent-browser CLI.  It
-supports two backends — **Browserbase** (cloud) and **local Chromium** — with
-identical agent-facing behaviour.  The backend is auto-detected: if
-``BROWSERBASE_API_KEY`` is set the cloud service is used; otherwise a local
-headless Chromium instance is launched automatically.
+supports multiple backends — **Browser Use** (cloud, default for Nous
+subscribers), **Browserbase** (cloud, direct credentials), and **local
+Chromium** — with identical agent-facing behaviour.  The backend is
+auto-detected from config and available credentials.
 
 The tool uses agent-browser's accessibility tree (ariaSnapshot) for text-based
 page representation, making it ideal for LLM agents without vision capabilities.
@@ -17,8 +17,7 @@ Features:
   ``agent-browser install`` (downloads Chromium) or
   ``agent-browser install --with-deps`` (also installs system libraries for
   Debian/Ubuntu/Docker).
-- **Cloud mode**: Browserbase cloud execution with stealth features, proxies,
-  and CAPTCHA solving.  Activated when BROWSERBASE_API_KEY is set.
+- **Cloud mode**: Browserbase or Browser Use cloud execution when configured.
 - Session isolation per task ID
 - Text-based page snapshots using accessibility tree
 - Element interaction via ref selectors (@e1, @e2, etc.)
@@ -26,8 +25,9 @@ Features:
 - Automatic cleanup of browser sessions
 
 Environment Variables:
-- BROWSERBASE_API_KEY: API key for Browserbase (enables cloud mode)
-- BROWSERBASE_PROJECT_ID: Project ID for Browserbase (required for cloud mode)
+- BROWSERBASE_API_KEY: API key for direct Browserbase cloud mode
+- BROWSERBASE_PROJECT_ID: Project ID for direct Browserbase cloud mode
+- BROWSER_USE_API_KEY: API key for direct Browser Use cloud mode
 - BROWSERBASE_PROXIES: Enable/disable residential proxies (default: "true")
 - BROWSERBASE_ADVANCED_STEALTH: Enable advanced stealth mode with custom Chromium,
   requires Scale Plan (default: "false")
@@ -280,23 +280,19 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
         logger.debug("Could not read cloud_provider from config: %s", e)
 
     if _cached_cloud_provider is None:
-        fallback_provider = BrowserbaseProvider()
+        # Prefer Browser Use (managed Nous gateway or direct API key),
+        # fall back to Browserbase (direct credentials only).
+        fallback_provider = BrowserUseProvider()
         if fallback_provider.is_configured():
             _cached_cloud_provider = fallback_provider
+        else:
+            fallback_provider = BrowserbaseProvider()
+            if fallback_provider.is_configured():
+                _cached_cloud_provider = fallback_provider
 
     return _cached_cloud_provider
 
 
-def _get_browserbase_config_or_none() -> Optional[Dict[str, Any]]:
-    """Return Browserbase direct or managed config, or None when unavailable."""
-    return BrowserbaseProvider()._get_config_or_none()
-
-
-def _get_browserbase_config() -> Dict[str, Any]:
-    """Return Browserbase config or raise when neither direct nor managed mode is available."""
-    return BrowserbaseProvider()._get_config()
-
-
 def _is_local_mode() -> bool:
     """Return True when the browser tool will use a local browser backend."""
     if _get_cdp_override():
@@ -615,7 +611,15 @@ BROWSER_TOOL_SCHEMAS = [
             "required": ["key"]
         }
     },
-
+    {
+        "name": "browser_close",
+        "description": "Close the browser session and release resources. Call this when done with browser tasks to free up cloud browser session quota.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": []
+        }
+    },
     {
         "name": "browser_get_images",
         "description": "Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first.",
@@ -736,6 +740,11 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
             session_info = _create_local_session(task_id)
         else:
             session_info = provider.create_session(task_id)
+            if session_info.get("cdp_url"):
+                # Some cloud providers (including Browser-Use v3) return an HTTP
+                # CDP discovery URL instead of a raw websocket endpoint.
+                session_info = dict(session_info)
+                session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
     
     with _cleanup_lock:
         # Double-check: another thread may have created a session while we
@@ -1947,7 +1956,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
             camofox_close(task_id)
         except Exception as e:
             logger.debug("Camofox cleanup for task %s: %s", task_id, e)
-    
+
     logger.debug("cleanup_browser called for task_id: %s", task_id)
     logger.debug("Active sessions: %s", list(_active_sessions.keys()))
     

From 29065cb9b50d049922b9477782c1a05d2d186fb0 Mon Sep 17 00:00:00 2001
From: emozilla <emozilla@nousresearch.com>
Date: Tue, 7 Apr 2026 02:17:14 -0400
Subject: [PATCH 069/154] feat(nous): free-tier model gating, pricing display,
 and vision fallback

- Show pricing during initial Nous Portal login (was missing from
  _login_nous, only shown in the already-logged-in hermes model path)

- Filter free models for paid subscribers: non-allowlisted free models
  are hidden; allowlisted models (xiaomi/mimo-v2-pro, xiaomi/mimo-v2-omni)
  only appear when actually priced as free

- Detect free-tier accounts via portal api/oauth/account endpoint
  (monthly_charge == 0); free-tier users see only free models as
  selectable, with paid models shown dimmed and unselectable

- Use xiaomi/mimo-v2-omni as the auxiliary vision model for free-tier
  Nous users so vision_analyze and browser_vision work without paid
  model access (replaces the default google/gemini-3-flash-preview)

- Unavailable models rendered via print() before TerminalMenu to avoid
  simple_term_menu line-width padding artifacts; upgrade URL resolved
  from auth state portal_base_url (supports staging/custom portals)

- Add 21 tests covering filter_nous_free_models, is_nous_free_tier,
  and partition_nous_models_by_tier
---
 agent/auxiliary_client.py       |  15 ++-
 hermes_cli/auth.py              |  73 ++++++++++++--
 hermes_cli/main.py              |  46 ++++++++-
 hermes_cli/models.py            | 166 +++++++++++++++++++++++++++++++
 tests/hermes_cli/test_models.py | 169 +++++++++++++++++++++++++++++++-
 5 files changed, 457 insertions(+), 12 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 9edc505e..5f13994c 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -91,6 +91,7 @@ auxiliary_is_nous: bool = False
 # Default auxiliary models per provider
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
+_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -720,7 +721,19 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
     global auxiliary_is_nous
     auxiliary_is_nous = True
     logger.debug("Auxiliary client: Nous Portal")
-    model = "gemini-3-flash" if nous.get("source") == "pool" else _NOUS_MODEL
+    if nous.get("source") == "pool":
+        model = "gemini-3-flash"
+    else:
+        model = _NOUS_MODEL
+    # Free-tier users can't use paid auxiliary models — use the free
+    # multimodal model instead so vision/browser-vision still works.
+    try:
+        from hermes_cli.models import check_nous_free_tier
+        if check_nous_free_tier():
+            model = _NOUS_FREE_TIER_VISION_MODEL
+            logger.debug("Free-tier Nous account — using %s for auxiliary/vision", model)
+    except Exception:
+        pass
     return (
         OpenAI(
             api_key=_nous_api_key(nous),
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 23119c66..9e92b450 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -2279,14 +2279,21 @@ def _prompt_model_selection(
     model_ids: List[str],
     current_model: str = "",
     pricing: Optional[Dict[str, Dict[str, str]]] = None,
+    unavailable_models: Optional[List[str]] = None,
+    portal_url: str = "",
 ) -> Optional[str]:
     """Interactive model selection. Puts current_model first with a marker. Returns chosen model ID or None.
 
     If *pricing* is provided (``{model_id: {prompt, completion}}``), a compact
     price indicator is shown next to each model in aligned columns.
+
+    If *unavailable_models* is provided, those models are shown grayed out
+    and unselectable, with an upgrade link to *portal_url*.
     """
     from hermes_cli.models import _format_price_per_mtok
 
+    _unavailable = unavailable_models or []
+
     # Reorder: current model first, then the rest (deduplicated)
     ordered = []
     if current_model and current_model in model_ids:
@@ -2295,9 +2302,12 @@ def _prompt_model_selection(
         if mid not in ordered:
             ordered.append(mid)
 
+    # All models for column-width computation (selectable + unavailable)
+    all_models = list(ordered) + list(_unavailable)
+
     # Column-aligned labels when pricing is available
-    has_pricing = bool(pricing and any(pricing.get(m) for m in ordered))
-    name_col = max((len(m) for m in ordered), default=0) + 2 if has_pricing else 0
+    has_pricing = bool(pricing and any(pricing.get(m) for m in all_models))
+    name_col = max((len(m) for m in all_models), default=0) + 2 if has_pricing else 0
 
     # Pre-compute formatted prices and dynamic column widths
     _price_cache: dict[str, tuple[str, str, str]] = {}
@@ -2305,7 +2315,7 @@ def _prompt_model_selection(
     cache_col = 0  # only set if any model has cache pricing
     has_cache = False
     if has_pricing:
-        for mid in ordered:
+        for mid in all_models:
             p = pricing.get(mid)  # type: ignore[union-attr]
             if p:
                 inp = _format_price_per_mtok(p.get("prompt", ""))
@@ -2350,12 +2360,35 @@ def _prompt_model_selection(
             header += f"  {'Cache':>{cache_col}}"
         menu_title += header + "  /Mtok"
 
+    # ANSI escape for dim text
+    _DIM = "\033[2m"
+    _RESET = "\033[0m"
+
     # Try arrow-key menu first, fall back to number input
     try:
         from simple_term_menu import TerminalMenu
+
         choices = [f"  {_label(mid)}" for mid in ordered]
         choices.append("  Enter custom model name")
         choices.append("  Skip (keep current)")
+
+        # Print the unavailable block BEFORE the menu via regular print().
+        # simple_term_menu pads title lines to terminal width (causes wrapping),
+        # so we keep the title minimal and use stdout for the static block.
+        # clear_screen=False means our printed output stays visible above.
+        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+        if _unavailable:
+            print(menu_title)
+            print()
+            for mid in _unavailable:
+                print(f"{_DIM}     {_label(mid)}{_RESET}")
+            print()
+            print(f"{_DIM}  ── Upgrade at {_upgrade_url} for paid models ──{_RESET}")
+            print()
+            effective_title = "Available free models:"
+        else:
+            effective_title = menu_title
+
         menu = TerminalMenu(
             choices,
             cursor_index=default_idx,
@@ -2364,7 +2397,7 @@ def _prompt_model_selection(
             menu_highlight_style=("fg_green",),
             cycle_cursor=True,
             clear_screen=False,
-            title=menu_title,
+            title=effective_title,
         )
         idx = menu.show()
         if idx is None:
@@ -2387,6 +2420,13 @@ def _prompt_model_selection(
     n = len(ordered)
     print(f"  {n + 1:>{num_width}}. Enter custom model name")
     print(f"  {n + 2:>{num_width}}. Skip (keep current)")
+
+    if _unavailable:
+        _upgrade_url = (portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+        print()
+        print(f"  {_DIM}── Unavailable models (requires paid tier — upgrade at {_upgrade_url}) ──{_RESET}")
+        for mid in _unavailable:
+            print(f"  {'':>{num_width}}  {_DIM}{_label(mid)}{_RESET}")
     print()
 
     while True:
@@ -2821,16 +2861,37 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
                     code="invalid_token",
                 )
 
-            from hermes_cli.models import _PROVIDER_MODELS
+            from hermes_cli.models import (
+                _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+                check_nous_free_tier, partition_nous_models_by_tier,
+            )
             model_ids = _PROVIDER_MODELS.get("nous", [])
 
             print()
+            unavailable_models: list = []
+            if model_ids:
+                pricing = get_pricing_for_provider("nous")
+                model_ids = filter_nous_free_models(model_ids, pricing)
+                free_tier = check_nous_free_tier()
+                if free_tier:
+                    model_ids, unavailable_models = partition_nous_models_by_tier(
+                        model_ids, pricing, free_tier=True,
+                    )
+            _portal = auth_state.get("portal_base_url", "")
             if model_ids:
                 print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-                selected_model = _prompt_model_selection(model_ids)
+                selected_model = _prompt_model_selection(
+                    model_ids, pricing=pricing,
+                    unavailable_models=unavailable_models,
+                    portal_url=_portal,
+                )
                 if selected_model:
                     _save_model_choice(selected_model)
                     print(f"Default model set to: {selected_model}")
+            elif unavailable_models:
+                _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+                print("No free models currently available.")
+                print(f"Upgrade at {_url} to access paid models.")
             else:
                 print("No curated models available for Nous Portal.")
         except Exception as exc:
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 55faf841..dae8cc95 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1195,14 +1195,15 @@ def _model_flow_nous(config, current_model="", args=None):
     # Already logged in — use curated model list (same as OpenRouter defaults).
     # The live /models endpoint returns hundreds of models; the curated list
     # shows only agentic models users recognize from OpenRouter.
-    from hermes_cli.models import _PROVIDER_MODELS, get_pricing_for_provider
+    from hermes_cli.models import (
+        _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models,
+        check_nous_free_tier, partition_nous_models_by_tier,
+    )
     model_ids = _PROVIDER_MODELS.get("nous", [])
     if not model_ids:
         print("No curated models available for Nous Portal.")
         return
 
-    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
-
     # Verify credentials are still valid (catches expired sessions early)
     try:
         creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=5 * 60)
@@ -1228,7 +1229,44 @@ def _model_flow_nous(config, current_model="", args=None):
     # Fetch live pricing (non-blocking — returns empty dict on failure)
     pricing = get_pricing_for_provider("nous")
 
-    selected = _prompt_model_selection(model_ids, current_model=current_model, pricing=pricing)
+    # Check if user is on free tier
+    free_tier = check_nous_free_tier()
+
+    # For both tiers: apply the allowlist filter first (removes non-allowlisted
+    # free models and allowlist models that aren't actually free).
+    # Then for free users: partition remaining models into selectable/unavailable.
+    model_ids = filter_nous_free_models(model_ids, pricing)
+    unavailable_models: list[str] = []
+    if free_tier:
+        model_ids, unavailable_models = partition_nous_models_by_tier(model_ids, pricing, free_tier=True)
+
+    if not model_ids and not unavailable_models:
+        print("No models available for Nous Portal after filtering.")
+        return
+
+    # Resolve portal URL for upgrade links (may differ on staging)
+    _nous_portal_url = ""
+    try:
+        _nous_state = get_provider_auth_state("nous")
+        if _nous_state:
+            _nous_portal_url = _nous_state.get("portal_base_url", "")
+    except Exception:
+        pass
+
+    if free_tier and not model_ids:
+        print("No free models currently available.")
+        if unavailable_models:
+            from hermes_cli.auth import DEFAULT_NOUS_PORTAL_URL
+            _url = (_nous_portal_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/")
+            print(f"Upgrade at {_url} to access paid models.")
+        return
+
+    print(f"Showing {len(model_ids)} curated models — use \"Enter custom model name\" for others.")
+
+    selected = _prompt_model_selection(
+        model_ids, current_model=current_model, pricing=pricing,
+        unavailable_models=unavailable_models, portal_url=_nous_portal_url,
+    )
     if selected:
         _save_model_choice(selected)
         # Reactivate Nous as the provider and update config
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index a5b1c2b2..4a6d4c47 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -265,6 +265,172 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
     ],
 }
 
+# ---------------------------------------------------------------------------
+# Nous Portal free-model filtering
+# ---------------------------------------------------------------------------
+# Models that are ALLOWED to appear when priced as free on Nous Portal.
+# Any other free model is hidden — prevents promotional/temporary free models
+# from cluttering the selection when users are paying subscribers.
+# Models in this list are ALSO filtered out if they are NOT free (i.e. they
+# should only appear in the menu when they are genuinely free).
+_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({
+    "xiaomi/mimo-v2-pro",
+    "xiaomi/mimo-v2-omni",
+})
+
+
+def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool:
+    """Return True if *model_id* has zero-cost prompt AND completion pricing."""
+    p = pricing.get(model_id)
+    if not p:
+        return False
+    try:
+        return float(p.get("prompt", "1")) == 0 and float(p.get("completion", "1")) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def filter_nous_free_models(
+    model_ids: list[str],
+    pricing: dict[str, dict[str, str]],
+) -> list[str]:
+    """Filter the Nous Portal model list according to free-model policy.
+
+    Rules:
+      • Paid models that are NOT in the allowlist → keep (normal case).
+      • Free models that are NOT in the allowlist → drop.
+      • Allowlist models that ARE free → keep.
+      • Allowlist models that are NOT free → drop.
+    """
+    if not pricing:
+        return model_ids  # no pricing data — can't filter, show everything
+
+    result: list[str] = []
+    for mid in model_ids:
+        free = _is_model_free(mid, pricing)
+        if mid in _NOUS_ALLOWED_FREE_MODELS:
+            # Allowlist model: only show when it's actually free
+            if free:
+                result.append(mid)
+        else:
+            # Regular model: keep only when it's NOT free
+            if not free:
+                result.append(mid)
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Nous Portal account tier detection
+# ---------------------------------------------------------------------------
+
+def fetch_nous_account_tier(access_token: str, portal_base_url: str = "") -> dict[str, Any]:
+    """Fetch the user's Nous Portal account/subscription info.
+
+    Calls ``<portal>/api/oauth/account`` with the OAuth access token.
+
+    Returns the parsed JSON dict on success, e.g.::
+
+        {
+            "subscription": {
+                "plan": "Plus",
+                "tier": 2,
+                "monthly_charge": 20,
+                "credits_remaining": 1686.60,
+                ...
+            },
+            ...
+        }
+
+    Returns an empty dict on any failure (network, auth, parse).
+    """
+    base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/")
+    url = f"{base}/api/oauth/account"
+    headers = {
+        "Authorization": f"Bearer {access_token}",
+        "Accept": "application/json",
+    }
+    try:
+        req = urllib.request.Request(url, headers=headers)
+        with urllib.request.urlopen(req, timeout=8) as resp:
+            return json.loads(resp.read().decode())
+    except Exception:
+        return {}
+
+
+def is_nous_free_tier(account_info: dict[str, Any]) -> bool:
+    """Return True if the account info indicates a free (unpaid) tier.
+
+    Checks ``subscription.monthly_charge == 0``.  Returns False when
+    the field is missing or unparseable (assumes paid — don't block users).
+    """
+    sub = account_info.get("subscription")
+    if not isinstance(sub, dict):
+        return False
+    charge = sub.get("monthly_charge")
+    if charge is None:
+        return False
+    try:
+        return float(charge) == 0
+    except (TypeError, ValueError):
+        return False
+
+
+def partition_nous_models_by_tier(
+    model_ids: list[str],
+    pricing: dict[str, dict[str, str]],
+    free_tier: bool,
+) -> tuple[list[str], list[str]]:
+    """Split Nous models into (selectable, unavailable) based on user tier.
+
+    For paid-tier users: all models are selectable, none unavailable
+    (free-model filtering is handled separately by ``filter_nous_free_models``).
+
+    For free-tier users: only free models are selectable; paid models
+    are returned as unavailable (shown grayed out in the menu).
+    """
+    if not free_tier:
+        return (model_ids, [])
+
+    if not pricing:
+        return (model_ids, [])  # can't determine, show everything
+
+    selectable: list[str] = []
+    unavailable: list[str] = []
+    for mid in model_ids:
+        if _is_model_free(mid, pricing):
+            selectable.append(mid)
+        else:
+            unavailable.append(mid)
+    return (selectable, unavailable)
+
+
+def check_nous_free_tier() -> bool:
+    """Check if the current Nous Portal user is on a free (unpaid) tier.
+
+    Resolves the OAuth access token from the auth store, calls the
+    portal account endpoint, and returns True if the account has no
+    paid subscription.  Returns False (assume paid) on any error.
+    """
+    try:
+        from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials
+
+        # Ensure we have a fresh token (triggers refresh if needed)
+        resolve_nous_runtime_credentials(min_key_ttl_seconds=60)
+
+        state = get_provider_auth_state("nous")
+        if not state:
+            return False
+        access_token = state.get("access_token", "")
+        portal_url = state.get("portal_base_url", "")
+        if not access_token:
+            return False
+
+        account_info = fetch_nous_account_tier(access_token, portal_url)
+        return is_nous_free_tier(account_info)
+    except Exception:
+        return False  # default to paid on error — don't block users
+
+
 _PROVIDER_LABELS = {
     "openrouter": "OpenRouter",
     "openai-codex": "OpenAI Codex",
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index 74f84424..3d1564ae 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -1,6 +1,10 @@
 """Tests for the hermes_cli models module."""
 
-from hermes_cli.models import OPENROUTER_MODELS, menu_labels, model_ids, detect_provider_for_model
+from hermes_cli.models import (
+    OPENROUTER_MODELS, menu_labels, model_ids, detect_provider_for_model,
+    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
+    is_nous_free_tier, partition_nous_models_by_tier,
+)
 
 
 class TestModelIds:
@@ -124,3 +128,166 @@ class TestDetectProviderForModel:
         result = detect_provider_for_model("claude-opus-4-6", "openai-codex")
         assert result is not None
         assert result[0] not in ("nous",)  # nous has claude models but shouldn't be suggested
+
+
+class TestFilterNousFreeModels:
+    """Tests for filter_nous_free_models — Nous Portal free-model policy."""
+
+    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
+    _FREE = {"prompt": "0", "completion": "0"}
+
+    def test_paid_models_kept(self):
+        """Regular paid models pass through unchanged."""
+        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
+        pricing = {m: self._PAID for m in models}
+        assert filter_nous_free_models(models, pricing) == models
+
+    def test_free_non_allowlist_models_removed(self):
+        """Free models NOT in the allowlist are filtered out."""
+        models = ["anthropic/claude-opus-4.6", "arcee-ai/trinity-large-preview:free"]
+        pricing = {
+            "anthropic/claude-opus-4.6": self._PAID,
+            "arcee-ai/trinity-large-preview:free": self._FREE,
+        }
+        result = filter_nous_free_models(models, pricing)
+        assert result == ["anthropic/claude-opus-4.6"]
+
+    def test_allowlist_model_kept_when_free(self):
+        """Allowlist models are kept when they report as free."""
+        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
+        pricing = {
+            "anthropic/claude-opus-4.6": self._PAID,
+            "xiaomi/mimo-v2-pro": self._FREE,
+        }
+        result = filter_nous_free_models(models, pricing)
+        assert result == ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
+
+    def test_allowlist_model_removed_when_paid(self):
+        """Allowlist models are removed when they are NOT free."""
+        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
+        pricing = {
+            "anthropic/claude-opus-4.6": self._PAID,
+            "xiaomi/mimo-v2-pro": self._PAID,
+        }
+        result = filter_nous_free_models(models, pricing)
+        assert result == ["anthropic/claude-opus-4.6"]
+
+    def test_no_pricing_returns_all(self):
+        """When pricing data is unavailable, all models pass through."""
+        models = ["anthropic/claude-opus-4.6", "nvidia/nemotron-3-super-120b-a12b:free"]
+        assert filter_nous_free_models(models, {}) == models
+
+    def test_model_with_no_pricing_entry_treated_as_paid(self):
+        """A model missing from the pricing dict is kept (assumed paid)."""
+        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
+        pricing = {"anthropic/claude-opus-4.6": self._PAID}  # gpt-5.4 not in pricing
+        result = filter_nous_free_models(models, pricing)
+        assert result == models
+
+    def test_mixed_scenario(self):
+        """End-to-end: mix of paid, free-allowed, free-disallowed, allowlist-not-free."""
+        models = [
+            "anthropic/claude-opus-4.6",       # paid, not allowlist → keep
+            "nvidia/nemotron-3-super-120b-a12b:free",  # free, not allowlist → drop
+            "xiaomi/mimo-v2-pro",              # free, allowlist → keep
+            "xiaomi/mimo-v2-omni",             # paid, allowlist → drop
+            "openai/gpt-5.4",                  # paid, not allowlist → keep
+        ]
+        pricing = {
+            "anthropic/claude-opus-4.6": self._PAID,
+            "nvidia/nemotron-3-super-120b-a12b:free": self._FREE,
+            "xiaomi/mimo-v2-pro": self._FREE,
+            "xiaomi/mimo-v2-omni": self._PAID,
+            "openai/gpt-5.4": self._PAID,
+        }
+        result = filter_nous_free_models(models, pricing)
+        assert result == [
+            "anthropic/claude-opus-4.6",
+            "xiaomi/mimo-v2-pro",
+            "openai/gpt-5.4",
+        ]
+
+    def test_allowlist_contains_expected_models(self):
+        """Sanity: the allowlist has the models we expect."""
+        assert "xiaomi/mimo-v2-pro" in _NOUS_ALLOWED_FREE_MODELS
+        assert "xiaomi/mimo-v2-omni" in _NOUS_ALLOWED_FREE_MODELS
+
+
+class TestIsNousFreeTier:
+    """Tests for is_nous_free_tier — account tier detection."""
+
+    def test_paid_plus_tier(self):
+        assert is_nous_free_tier({"subscription": {"plan": "Plus", "tier": 2, "monthly_charge": 20}}) is False
+
+    def test_free_tier_by_charge(self):
+        assert is_nous_free_tier({"subscription": {"plan": "Free", "tier": 0, "monthly_charge": 0}}) is True
+
+    def test_no_charge_field_not_free(self):
+        """Missing monthly_charge defaults to not-free (don't block users)."""
+        assert is_nous_free_tier({"subscription": {"plan": "Free", "tier": 0}}) is False
+
+    def test_plan_name_alone_not_free(self):
+        """Plan name alone is not enough — monthly_charge is required."""
+        assert is_nous_free_tier({"subscription": {"plan": "free"}}) is False
+
+    def test_empty_subscription_not_free(self):
+        """Empty subscription dict defaults to not-free (don't block users)."""
+        assert is_nous_free_tier({"subscription": {}}) is False
+
+    def test_no_subscription_not_free(self):
+        """Missing subscription key returns False."""
+        assert is_nous_free_tier({}) is False
+
+    def test_empty_response_not_free(self):
+        """Completely empty response defaults to not-free."""
+        assert is_nous_free_tier({}) is False
+
+
+class TestPartitionNousModelsByTier:
+    """Tests for partition_nous_models_by_tier — free vs paid tier model split."""
+
+    _PAID = {"prompt": "0.000003", "completion": "0.000015"}
+    _FREE = {"prompt": "0", "completion": "0"}
+
+    def test_paid_tier_all_selectable(self):
+        """Paid users get all models as selectable, none unavailable."""
+        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro"]
+        pricing = {"anthropic/claude-opus-4.6": self._PAID, "xiaomi/mimo-v2-pro": self._FREE}
+        sel, unav = partition_nous_models_by_tier(models, pricing, free_tier=False)
+        assert sel == models
+        assert unav == []
+
+    def test_free_tier_splits_correctly(self):
+        """Free users see only free models; paid ones are unavailable."""
+        models = ["anthropic/claude-opus-4.6", "xiaomi/mimo-v2-pro", "openai/gpt-5.4"]
+        pricing = {
+            "anthropic/claude-opus-4.6": self._PAID,
+            "xiaomi/mimo-v2-pro": self._FREE,
+            "openai/gpt-5.4": self._PAID,
+        }
+        sel, unav = partition_nous_models_by_tier(models, pricing, free_tier=True)
+        assert sel == ["xiaomi/mimo-v2-pro"]
+        assert unav == ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
+
+    def test_no_pricing_returns_all(self):
+        """Without pricing data, all models are selectable."""
+        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
+        sel, unav = partition_nous_models_by_tier(models, {}, free_tier=True)
+        assert sel == models
+        assert unav == []
+
+    def test_all_free_models(self):
+        """When all models are free, free-tier users can select all."""
+        models = ["xiaomi/mimo-v2-pro", "xiaomi/mimo-v2-omni"]
+        pricing = {m: self._FREE for m in models}
+        sel, unav = partition_nous_models_by_tier(models, pricing, free_tier=True)
+        assert sel == models
+        assert unav == []
+
+    def test_all_paid_models(self):
+        """When all models are paid, free-tier users have none selectable."""
+        models = ["anthropic/claude-opus-4.6", "openai/gpt-5.4"]
+        pricing = {m: self._PAID for m in models}
+        sel, unav = partition_nous_models_by_tier(models, pricing, free_tier=True)
+        assert sel == []
+        assert unav == models

From 47ddc2bde56a84d1e0edefaa5912ccf9b9b5466e Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 09:30:26 -0700
Subject: [PATCH 070/154] fix(nous): add 3-minute TTL cache to free-tier
 detection

check_nous_free_tier() now caches its result for 180 seconds to avoid
redundant Portal API calls during a session (auxiliary client init,
model selection, login flow all call it independently).

The TTL is short enough that an account upgrade from free to paid is
reflected within 3 minutes. clear_nous_free_tier_cache() is exposed
for explicit invalidation on login/logout.

Adds 4 tests for cache hit, TTL expiry, explicit clear, and TTL bound.
---
 hermes_cli/models.py            | 38 +++++++++++++++++--
 tests/hermes_cli/test_models.py | 65 +++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 4a6d4c47..85413267 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -404,13 +404,38 @@ def partition_nous_models_by_tier(
     return (selectable, unavailable)
 
 
+# ---------------------------------------------------------------------------
+# TTL cache for free-tier detection — avoids repeated API calls within a
+# session while still picking up upgrades quickly.
+# ---------------------------------------------------------------------------
+_FREE_TIER_CACHE_TTL: int = 180  # seconds (3 minutes)
+_free_tier_cache: tuple[bool, float] | None = None  # (result, timestamp)
+
+
+def clear_nous_free_tier_cache() -> None:
+    """Invalidate the cached free-tier result (e.g. after login/logout)."""
+    global _free_tier_cache
+    _free_tier_cache = None
+
+
 def check_nous_free_tier() -> bool:
     """Check if the current Nous Portal user is on a free (unpaid) tier.
 
-    Resolves the OAuth access token from the auth store, calls the
-    portal account endpoint, and returns True if the account has no
-    paid subscription.  Returns False (assume paid) on any error.
+    Results are cached for ``_FREE_TIER_CACHE_TTL`` seconds to avoid
+    hitting the Portal API on every call.  The cache is short-lived so
+    that an account upgrade is reflected within a few minutes.
+
+    Returns False (assume paid) on any error — never blocks paying users.
     """
+    global _free_tier_cache
+    import time
+
+    now = time.monotonic()
+    if _free_tier_cache is not None:
+        cached_result, cached_at = _free_tier_cache
+        if now - cached_at < _FREE_TIER_CACHE_TTL:
+            return cached_result
+
     try:
         from hermes_cli.auth import get_provider_auth_state, resolve_nous_runtime_credentials
 
@@ -419,15 +444,20 @@ def check_nous_free_tier() -> bool:
 
         state = get_provider_auth_state("nous")
         if not state:
+            _free_tier_cache = (False, now)
             return False
         access_token = state.get("access_token", "")
         portal_url = state.get("portal_base_url", "")
         if not access_token:
+            _free_tier_cache = (False, now)
             return False
 
         account_info = fetch_nous_account_tier(access_token, portal_url)
-        return is_nous_free_tier(account_info)
+        result = is_nous_free_tier(account_info)
+        _free_tier_cache = (result, now)
+        return result
     except Exception:
+        _free_tier_cache = (False, now)
         return False  # default to paid on error — don't block users
 
 
diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py
index 3d1564ae..776256f0 100644
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@@ -1,10 +1,15 @@
 """Tests for the hermes_cli models module."""
 
+from unittest.mock import patch, MagicMock
+
 from hermes_cli.models import (
     OPENROUTER_MODELS, menu_labels, model_ids, detect_provider_for_model,
     filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
     is_nous_free_tier, partition_nous_models_by_tier,
+    check_nous_free_tier, clear_nous_free_tier_cache,
+    _FREE_TIER_CACHE_TTL,
 )
+import hermes_cli.models as _models_mod
 
 
 class TestModelIds:
@@ -291,3 +296,63 @@ class TestPartitionNousModelsByTier:
         sel, unav = partition_nous_models_by_tier(models, pricing, free_tier=True)
         assert sel == []
         assert unav == models
+
+
+class TestCheckNousFreeTierCache:
+    """Tests for the TTL cache on check_nous_free_tier()."""
+
+    def setup_method(self):
+        """Reset cache before each test."""
+        clear_nous_free_tier_cache()
+
+    def teardown_method(self):
+        """Reset cache after each test."""
+        clear_nous_free_tier_cache()
+
+    @patch("hermes_cli.models.fetch_nous_account_tier")
+    @patch("hermes_cli.models.is_nous_free_tier", return_value=True)
+    def test_result_is_cached(self, mock_is_free, mock_fetch):
+        """Second call within TTL returns cached result without API call."""
+        mock_fetch.return_value = {"subscription": {"monthly_charge": 0}}
+        with patch("hermes_cli.auth.get_provider_auth_state", return_value={"access_token": "tok"}), \
+             patch("hermes_cli.auth.resolve_nous_runtime_credentials"):
+            result1 = check_nous_free_tier()
+            result2 = check_nous_free_tier()
+
+        assert result1 is True
+        assert result2 is True
+        # fetch_nous_account_tier should only be called once (cached on second call)
+        assert mock_fetch.call_count == 1
+
+    @patch("hermes_cli.models.fetch_nous_account_tier")
+    @patch("hermes_cli.models.is_nous_free_tier", return_value=False)
+    def test_cache_expires_after_ttl(self, mock_is_free, mock_fetch):
+        """After TTL expires, the API is called again."""
+        mock_fetch.return_value = {"subscription": {"monthly_charge": 20}}
+        with patch("hermes_cli.auth.get_provider_auth_state", return_value={"access_token": "tok"}), \
+             patch("hermes_cli.auth.resolve_nous_runtime_credentials"):
+            result1 = check_nous_free_tier()
+            assert mock_fetch.call_count == 1
+
+            # Simulate TTL expiry by backdating the cache timestamp
+            cached_result, cached_at = _models_mod._free_tier_cache
+            _models_mod._free_tier_cache = (cached_result, cached_at - _FREE_TIER_CACHE_TTL - 1)
+
+            result2 = check_nous_free_tier()
+            assert mock_fetch.call_count == 2
+
+        assert result1 is False
+        assert result2 is False
+
+    def test_clear_cache_forces_refresh(self):
+        """clear_nous_free_tier_cache() invalidates the cached result."""
+        # Manually seed the cache
+        import time
+        _models_mod._free_tier_cache = (True, time.monotonic())
+
+        clear_nous_free_tier_cache()
+        assert _models_mod._free_tier_cache is None
+
+    def test_cache_ttl_is_short(self):
+        """TTL should be short enough to catch upgrades quickly (<=5 min)."""
+        assert _FREE_TIER_CACHE_TTL <= 300

From caded0a5e75f51fce4e0a18a28a483649d21569c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 09:58:45 -0700
Subject: [PATCH 071/154] fix: repair 57 failing CI tests across 14 files
 (#5823)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: repair 57 failing CI tests across 14 files

Categories of fixes:

**Test isolation under xdist (-n auto):**
- test_hermes_logging: Strip ALL RotatingFileHandlers before each test
  to prevent handlers leaked from other xdist workers from polluting counts
- test_code_execution: Force TERMINAL_ENV=local in setUp — prevents Modal
  AuthError when another test leaks TERMINAL_ENV=modal
- test_timezone: Same TERMINAL_ENV fix for execute_code timezone tests
- test_codex_execution_paths: Mock _resolve_turn_agent_config to ensure
  model resolution works regardless of xdist worker state

**Matrix adapter tests (nio not installed in CI):**
- Add _make_fake_nio() helper with real response classes for isinstance()
  checks in production code
- Replace MagicMock(spec=nio.XxxResponse) with fake_nio instances
- Wrap production method calls with patch.dict('sys.modules', {'nio': ...})
  so import nio succeeds in method bodies
- Use try/except instead of pytest.importorskip for nio.crypto imports
  (importorskip can be fooled by MagicMock in sys.modules)
- test_matrix_voice: Skip entire file if nio is a mock, not just missing

**Stale test expectations:**
- test_cli_provider_resolution: _prompt_provider_choice now takes **kwargs
  (default param added); mock getpass.getpass alongside input
- test_anthropic_oauth_flow: Mock getpass.getpass (code switched from input)
- test_gemini_provider: Mock models.dev + OpenRouter API lookups to test
  hardcoded defaults without external API variance
- test_code_execution: Add notify_on_complete to blocked terminal params
- test_setup_openclaw_migration: Mock prompt_choice to select 'Full setup'
  (new quick-setup path leads to _require_tty → sys.exit in CI)
- test_skill_manager_tool: Patch get_all_skills_dirs alongside SKILLS_DIR
  so _find_skill searches tmp_path, not real ~/.hermes/skills/

**Missing attributes in object.__new__ test runners:**
- test_platform_reconnect: Add session_store to _make_runner()
- test_session_race_guard: Add hooks, _running_agents_ts, session_store,
  delivery_router to _make_runner()

**Production bug fix (gateway/run.py):**
- Fix sentinel eviction race: _AGENT_PENDING_SENTINEL was immediately
  evicted by the stale-detection logic because sentinels have no
  get_activity_summary() method, causing _stale_idle=inf >= timeout.
  Guard _should_evict with 'is not _AGENT_PENDING_SENTINEL'.

* fix: address remaining CI failures

- test_setup_openclaw_migration: Also mock _offer_launch_chat (called at
  end of both quick and full setup paths)
- test_code_execution: Move TERMINAL_ENV=local to module level to protect
  ALL test classes (TestEnvVarFiltering, TestExecuteCodeEdgeCases,
  TestInterruptHandling, TestHeadTailTruncation) from xdist env leaks
- test_matrix: Use try/except for nio.crypto imports (importorskip can be
  fooled by MagicMock in sys.modules under xdist)
---
 gateway/run.py                                |  12 +-
 tests/gateway/test_matrix.py                  | 114 +++++++++++++-----
 tests/gateway/test_matrix_voice.py            |  12 +-
 tests/gateway/test_platform_reconnect.py      |   1 +
 tests/gateway/test_session_race_guard.py      |   5 +
 .../test_setup_openclaw_migration.py          |   8 ++
 tests/test_anthropic_oauth_flow.py            |   1 +
 tests/test_cli_provider_resolution.py         |   5 +-
 tests/test_codex_execution_paths.py           |  11 ++
 tests/test_gemini_provider.py                 |   6 +-
 tests/test_hermes_logging.py                  |  21 +++-
 tests/test_timezone.py                        |   5 +-
 tests/tools/test_code_execution.py            |   8 +-
 tests/tools/test_skill_manager_tool.py        |  68 ++++++-----
 14 files changed, 208 insertions(+), 69 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 7a45be62..56518be6 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1858,6 +1858,11 @@ class GatewayRunner:
         if _quick_key in self._running_agents and _stale_ts:
             _stale_age = time.time() - _stale_ts
             _stale_agent = self._running_agents.get(_quick_key)
+            # Never evict the pending sentinel — it was just placed moments
+            # ago during the async setup phase before the real agent is
+            # created.  Sentinels have no get_activity_summary(), so the
+            # idle check below would always evaluate to inf >= timeout and
+            # immediately evict them, racing with the setup path.
             _stale_idle = float("inf")  # assume idle if we can't check
             _stale_detail = ""
             if _stale_agent and hasattr(_stale_agent, "get_activity_summary"):
@@ -1876,8 +1881,11 @@ class GatewayRunner:
             # cases where the agent object was garbage-collected).
             _wall_ttl = max(_raw_stale_timeout * 10, 7200) if _raw_stale_timeout > 0 else float("inf")
             _should_evict = (
-                (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
-                or _stale_age > _wall_ttl
+                _stale_agent is not _AGENT_PENDING_SENTINEL
+                and (
+                    (_raw_stale_timeout > 0 and _stale_idle >= _raw_stale_timeout)
+                    or _stale_age > _wall_ttl
+                )
             )
             if _should_evict:
                 logger.warning(
diff --git a/tests/gateway/test_matrix.py b/tests/gateway/test_matrix.py
index 09f0ab95..0de00b73 100644
--- a/tests/gateway/test_matrix.py
+++ b/tests/gateway/test_matrix.py
@@ -2,12 +2,54 @@
 import asyncio
 import json
 import re
+import sys
+import types
 import pytest
 from unittest.mock import MagicMock, patch, AsyncMock
 
 from gateway.config import Platform, PlatformConfig
 
 
+def _make_fake_nio():
+    """Create a lightweight fake ``nio`` module with real response classes.
+
+    Tests that call production methods doing ``import nio`` / ``isinstance(resp, nio.XxxResponse)``
+    need real classes (not MagicMock auto-attributes) to satisfy isinstance checks.
+    Use via ``patch.dict("sys.modules", {"nio": _make_fake_nio()})``.
+    """
+    mod = types.ModuleType("nio")
+
+    class RoomSendResponse:
+        def __init__(self, event_id="$fake"):
+            self.event_id = event_id
+
+    class RoomRedactResponse:
+        pass
+
+    class RoomCreateResponse:
+        def __init__(self, room_id="!fake:example.org"):
+            self.room_id = room_id
+
+    class RoomInviteResponse:
+        pass
+
+    class UploadResponse:
+        def __init__(self, content_uri="mxc://example.org/fake"):
+            self.content_uri = content_uri
+
+    # Minimal Api stub for code that checks nio.Api.RoomPreset
+    class _Api:
+        pass
+    mod.Api = _Api
+
+    mod.RoomSendResponse = RoomSendResponse
+    mod.RoomRedactResponse = RoomRedactResponse
+    mod.RoomCreateResponse = RoomCreateResponse
+    mod.RoomInviteResponse = RoomInviteResponse
+    mod.UploadResponse = UploadResponse
+    return mod
+
+
 # ---------------------------------------------------------------------------
 # Platform & Config
 # ---------------------------------------------------------------------------
@@ -1450,7 +1492,10 @@ class TestMatrixEncryptedMedia:
 
     @pytest.mark.asyncio
     async def test_on_room_message_media_decrypts_encrypted_image_and_passes_local_path(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")
 
         adapter = _make_adapter()
         adapter._user_id = "@bot:example.org"
@@ -1518,7 +1563,10 @@ class TestMatrixEncryptedMedia:
 
     @pytest.mark.asyncio
     async def test_on_room_message_media_decrypts_encrypted_voice_and_caches_audio(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")
 
         adapter = _make_adapter()
         adapter._user_id = "@bot:example.org"
@@ -1587,7 +1635,10 @@ class TestMatrixEncryptedMedia:
 
     @pytest.mark.asyncio
     async def test_on_room_message_media_decrypts_encrypted_file_and_caches_document(self):
-        from nio.crypto.attachments import encrypt_attachment
+        try:
+            from nio.crypto.attachments import encrypt_attachment
+        except (ImportError, ModuleNotFoundError):
+            pytest.skip("matrix-nio[e2e] required for encryption tests")
 
         adapter = _make_adapter()
         adapter._user_id = "@bot:example.org"
@@ -1883,14 +1934,15 @@ class TestMatrixReactions:
     @pytest.mark.asyncio
     async def test_send_reaction(self):
         """_send_reaction should call room_send with m.reaction."""
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
         mock_client.room_send = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomSendResponse)
+            return_value=fake_nio.RoomSendResponse("$reaction1")
         )
         self.adapter._client = mock_client
 
-        result = await self.adapter._send_reaction("!room:ex", "$event1", "👍")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter._send_reaction("!room:ex", "$event1", "👍")
         assert result is True
         mock_client.room_send.assert_called_once()
         args = mock_client.room_send.call_args
@@ -1902,7 +1954,8 @@ class TestMatrixReactions:
     @pytest.mark.asyncio
     async def test_send_reaction_no_client(self):
         self.adapter._client = None
-        result = await self.adapter._send_reaction("!room:ex", "$ev", "👍")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter._send_reaction("!room:ex", "$ev", "👍")
         assert result is False
 
     @pytest.mark.asyncio
@@ -1999,21 +2052,23 @@ class TestMatrixRedaction:
 
     @pytest.mark.asyncio
     async def test_redact_message(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
         mock_client.room_redact = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomRedactResponse)
+            return_value=fake_nio.RoomRedactResponse()
         )
         self.adapter._client = mock_client
 
-        result = await self.adapter.redact_message("!room:ex", "$ev1", "oops")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.redact_message("!room:ex", "$ev1", "oops")
         assert result is True
         mock_client.room_redact.assert_called_once()
 
     @pytest.mark.asyncio
     async def test_redact_no_client(self):
         self.adapter._client = None
-        result = await self.adapter.redact_message("!room:ex", "$ev1")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.redact_message("!room:ex", "$ev1")
         assert result is False
 
 
@@ -2027,33 +2082,35 @@ class TestMatrixRoomManagement:
 
     @pytest.mark.asyncio
     async def test_create_room(self):
-        nio = pytest.importorskip("nio")
-        mock_resp = MagicMock(spec=nio.RoomCreateResponse)
-        mock_resp.room_id = "!new:example.org"
+        fake_nio = _make_fake_nio()
+        mock_resp = fake_nio.RoomCreateResponse(room_id="!new:example.org")
         mock_client = MagicMock()
         mock_client.room_create = AsyncMock(return_value=mock_resp)
         self.adapter._client = mock_client
 
-        room_id = await self.adapter.create_room(name="Test Room", topic="A test")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            room_id = await self.adapter.create_room(name="Test Room", topic="A test")
         assert room_id == "!new:example.org"
         assert "!new:example.org" in self.adapter._joined_rooms
 
     @pytest.mark.asyncio
     async def test_invite_user(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
         mock_client.room_invite = AsyncMock(
-            return_value=MagicMock(spec=nio.RoomInviteResponse)
+            return_value=fake_nio.RoomInviteResponse()
         )
         self.adapter._client = mock_client
 
-        result = await self.adapter.invite_user("!room:ex", "@user:ex")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.invite_user("!room:ex", "@user:ex")
         assert result is True
 
     @pytest.mark.asyncio
     async def test_create_room_no_client(self):
         self.adapter._client = None
-        result = await self.adapter.create_room()
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.create_room()
         assert result is None
 
 
@@ -2099,28 +2156,28 @@ class TestMatrixMessageTypes:
 
     @pytest.mark.asyncio
     async def test_send_emote(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
-        mock_resp = MagicMock(spec=nio.RoomSendResponse)
-        mock_resp.event_id = "$emote1"
+        mock_resp = fake_nio.RoomSendResponse(event_id="$emote1")
         mock_client.room_send = AsyncMock(return_value=mock_resp)
         self.adapter._client = mock_client
 
-        result = await self.adapter.send_emote("!room:ex", "waves hello")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.send_emote("!room:ex", "waves hello")
         assert result.success is True
         call_args = mock_client.room_send.call_args[0]
         assert call_args[2]["msgtype"] == "m.emote"
 
     @pytest.mark.asyncio
     async def test_send_notice(self):
-        nio = pytest.importorskip("nio")
+        fake_nio = _make_fake_nio()
         mock_client = MagicMock()
-        mock_resp = MagicMock(spec=nio.RoomSendResponse)
-        mock_resp.event_id = "$notice1"
+        mock_resp = fake_nio.RoomSendResponse(event_id="$notice1")
         mock_client.room_send = AsyncMock(return_value=mock_resp)
         self.adapter._client = mock_client
 
-        result = await self.adapter.send_notice("!room:ex", "System message")
+        with patch.dict("sys.modules", {"nio": fake_nio}):
+            result = await self.adapter.send_notice("!room:ex", "System message")
         assert result.success is True
         call_args = mock_client.room_send.call_args[0]
         assert call_args[2]["msgtype"] == "m.notice"
@@ -2128,5 +2185,6 @@ class TestMatrixMessageTypes:
     @pytest.mark.asyncio
     async def test_send_emote_empty_text(self):
         self.adapter._client = MagicMock()
-        result = await self.adapter.send_emote("!room:ex", "")
+        with patch.dict("sys.modules", {"nio": _make_fake_nio()}):
+            result = await self.adapter.send_emote("!room:ex", "")
         assert result.success is False
diff --git a/tests/gateway/test_matrix_voice.py b/tests/gateway/test_matrix_voice.py
index 79f0947f..93d56caf 100644
--- a/tests/gateway/test_matrix_voice.py
+++ b/tests/gateway/test_matrix_voice.py
@@ -1,10 +1,18 @@
 """Tests for Matrix voice message support (MSC3245)."""
 import io
+import types
 
 import pytest
-from unittest.mock import AsyncMock, MagicMock
+from unittest.mock import AsyncMock, MagicMock, patch
 
-nio = pytest.importorskip("nio", reason="matrix-nio not installed")
+# Try importing real nio; skip entire file if not available.
+# A MagicMock in sys.modules (from another test) is not the real package.
+try:
+    import nio as _nio_probe
+    if not isinstance(_nio_probe, types.ModuleType) or not hasattr(_nio_probe, "__file__"):
+        pytest.skip("nio in sys.modules is a mock, not the real package", allow_module_level=True)
+except ImportError:
+    pytest.skip("matrix-nio not installed", allow_module_level=True)
 
 from gateway.platforms.base import MessageType
 
diff --git a/tests/gateway/test_platform_reconnect.py b/tests/gateway/test_platform_reconnect.py
index 68dfd204..56674272 100644
--- a/tests/gateway/test_platform_reconnect.py
+++ b/tests/gateway/test_platform_reconnect.py
@@ -59,6 +59,7 @@ def _make_runner():
     runner._honcho_managers = {}
     runner._honcho_configs = {}
     runner._shutdown_all_gateway_honcho = lambda: None
+    runner.session_store = MagicMock()
     return runner
 
 
diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py
index 427718c9..ff21cdef 100644
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@@ -36,11 +36,16 @@ def _make_runner():
     )
     runner.adapters = {Platform.TELEGRAM: _FakeAdapter()}
     runner._running_agents = {}
+    runner._running_agents_ts = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
     runner._voice_mode = {}
     runner._background_tasks = set()
     runner._is_user_authorized = lambda _source: True
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.session_store = MagicMock()
+    runner.delivery_router = MagicMock()
     return runner
 
 
diff --git a/tests/hermes_cli/test_setup_openclaw_migration.py b/tests/hermes_cli/test_setup_openclaw_migration.py
index 0991b6d1..b956f1fe 100644
--- a/tests/hermes_cli/test_setup_openclaw_migration.py
+++ b/tests/hermes_cli/test_setup_openclaw_migration.py
@@ -184,6 +184,8 @@ class TestSetupWizardOpenclawIntegration:
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             # User presses Enter to start
             patch("builtins.input", return_value=""),
+            # Select "Full setup" (index 1) so we exercise the full path
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             # Mock the migration offer
             patch.object(
                 setup_mod, "_offer_openclaw_migration", return_value=False
@@ -196,6 +198,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "setup_tools"),
             patch.object(setup_mod, "save_config"),
             patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
         ):
             setup_mod.run_setup_wizard(args)
 
@@ -218,6 +221,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "is_interactive_stdin", return_value=True),
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             patch.object(setup_mod, "_offer_openclaw_migration", return_value=True),
             patch.object(setup_mod, "setup_model_provider"),
             patch.object(setup_mod, "setup_terminal_backend"),
@@ -226,6 +230,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "setup_tools"),
             patch.object(setup_mod, "save_config"),
             patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
         ):
             setup_mod.run_setup_wizard(args)
 
@@ -249,6 +254,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "is_interactive_stdin", return_value=True),
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             patch.object(setup_mod, "_offer_openclaw_migration", return_value=True),
             patch.object(setup_mod, "setup_model_provider") as setup_model_provider,
             patch.object(setup_mod, "setup_terminal_backend"),
@@ -257,6 +263,7 @@ class TestSetupWizardOpenclawIntegration:
             patch.object(setup_mod, "setup_tools"),
             patch.object(setup_mod, "save_config"),
             patch.object(setup_mod, "_print_setup_summary"),
+            patch.object(setup_mod, "_offer_launch_chat"),
         ):
             setup_mod.run_setup_wizard(args)
 
@@ -438,6 +445,7 @@ class TestSetupWizardSkipsConfiguredSections:
             patch.object(setup_mod, "is_interactive_stdin", return_value=True),
             patch("hermes_cli.auth.get_active_provider", return_value=None),
             patch("builtins.input", return_value=""),
+            patch.object(setup_mod, "prompt_choice", return_value=1),
             # Migration succeeds and flips the env_side flag
             patch.object(
                 setup_mod, "_offer_openclaw_migration",
diff --git a/tests/test_anthropic_oauth_flow.py b/tests/test_anthropic_oauth_flow.py
index 3b52831a..61cd6155 100644
--- a/tests/test_anthropic_oauth_flow.py
+++ b/tests/test_anthropic_oauth_flow.py
@@ -40,6 +40,7 @@ def test_run_anthropic_oauth_flow_manual_token_still_persists(tmp_path, monkeypa
     monkeypatch.setattr("agent.anthropic_adapter.read_claude_code_credentials", lambda: None)
     monkeypatch.setattr("agent.anthropic_adapter.is_claude_code_token_valid", lambda creds: False)
     monkeypatch.setattr("builtins.input", lambda _prompt="": "sk-ant-oat01-manual-token")
+    monkeypatch.setattr("getpass.getpass", lambda _prompt="": "sk-ant-oat01-manual-token")
 
     from hermes_cli.main import _run_anthropic_oauth_flow
 
diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py
index 53e48502..bd78a98e 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/test_cli_provider_resolution.py
@@ -538,7 +538,7 @@ def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys):
         return "openrouter"
 
     monkeypatch.setattr("hermes_cli.auth.resolve_provider", _resolve_provider)
-    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: len(choices) - 1)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices, **kwargs: len(choices) - 1)
     monkeypatch.setattr("sys.stdin", type("FakeTTY", (), {"isatty": lambda self: True})())
 
     hermes_main.cmd_model(SimpleNamespace())
@@ -579,6 +579,7 @@ def test_model_flow_custom_saves_verified_v1_base_url(monkeypatch, capsys):
     # "Use this model? [Y/n]:" — confirm with Enter, then context length.
     answers = iter(["http://localhost:8000", "local-key", "", ""])
     monkeypatch.setattr("builtins.input", lambda _prompt="": next(answers))
+    monkeypatch.setattr("getpass.getpass", lambda _prompt="": next(answers))
 
     hermes_main._model_flow_custom({})
     output = capsys.readouterr().out
@@ -601,7 +602,7 @@ def test_cmd_model_forwards_nous_login_tls_options(monkeypatch):
     monkeypatch.setattr("hermes_cli.config.save_env_value", lambda key, value: None)
     monkeypatch.setattr("hermes_cli.auth.resolve_provider", lambda requested, **kwargs: "nous")
     monkeypatch.setattr("hermes_cli.auth.get_provider_auth_state", lambda provider_id: None)
-    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices: 0)
+    monkeypatch.setattr(hermes_main, "_prompt_provider_choice", lambda choices, **kwargs: 0)
 
     captured = {}
 
diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py
index de33a0b9..354c95dd 100644
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@@ -152,11 +152,22 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
     runner._provider_routing = {}
     runner._fallback_model = None
     runner._running_agents = {}
+    runner._smart_model_routing = {}
     from unittest.mock import MagicMock, AsyncMock
     runner.hooks = MagicMock()
     runner.hooks.emit = AsyncMock()
     runner.hooks.loaded_hooks = []
     runner._session_db = None
+    # Ensure model resolution returns the codex model even if xdist
+    # leaked env vars cleared HERMES_MODEL.
+    monkeypatch.setattr(
+        gateway_run.GatewayRunner,
+        "_resolve_turn_agent_config",
+        lambda self, msg, model, runtime: {
+            "model": model or "gpt-5.3-codex",
+            "runtime": runtime,
+        },
+    )
 
     source = SessionSource(
         platform=Platform.LOCAL,
diff --git a/tests/test_gemini_provider.py b/tests/test_gemini_provider.py
index d0cba5d6..b448ca51 100644
--- a/tests/test_gemini_provider.py
+++ b/tests/test_gemini_provider.py
@@ -171,7 +171,11 @@ class TestGeminiModelNormalization:
 
 class TestGeminiContextLength:
     def test_gemma_4_31b_context(self):
-        ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
+        # Mock external API lookups to test against hardcoded defaults
+        # (models.dev and OpenRouter may return different values like 262144).
+        with patch("agent.models_dev.lookup_models_dev_context", return_value=None), \
+             patch("agent.model_metadata.fetch_model_metadata", return_value={}):
+            ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
         assert ctx == 256000
 
     def test_gemma_4_26b_context(self):
diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py
index 7b4004ef..5b40e632 100644
--- a/tests/test_hermes_logging.py
+++ b/tests/test_hermes_logging.py
@@ -14,14 +14,29 @@ import hermes_logging
 @pytest.fixture(autouse=True)
 def _reset_logging_state():
     """Reset the module-level sentinel and clean up root logger handlers
-    added by setup_logging() so tests don't leak state."""
+    added by setup_logging() so tests don't leak state.
+
+    Under xdist (-n auto) other test modules may have called setup_logging()
+    in the same worker process, leaving RotatingFileHandlers on the root
+    logger.  We strip ALL RotatingFileHandlers before each test so the count
+    assertions are stable regardless of test ordering.
+    """
     hermes_logging._logging_initialized = False
     root = logging.getLogger()
-    original_handlers = list(root.handlers)
+    # Strip ALL RotatingFileHandlers — not just the ones we added — so that
+    # handlers leaked from other test modules in the same xdist worker don't
+    # pollute our counts.
+    pre_existing = []
+    for h in list(root.handlers):
+        if isinstance(h, RotatingFileHandler):
+            root.removeHandler(h)
+            h.close()
+        else:
+            pre_existing.append(h)
     yield
     # Restore — remove any handlers added during the test.
     for h in list(root.handlers):
-        if h not in original_handlers:
+        if h not in pre_existing:
             root.removeHandler(h)
             h.close()
     hermes_logging._logging_initialized = False
diff --git a/tests/test_timezone.py b/tests/test_timezone.py
index 9848212c..2d021611 100644
--- a/tests/test_timezone.py
+++ b/tests/test_timezone.py
@@ -136,8 +136,11 @@ class TestCodeExecutionTZ:
     """Verify TZ env var is passed to sandboxed child process via real execute_code."""
 
     @pytest.fixture(autouse=True)
-    def _import_execute_code(self):
+    def _import_execute_code(self, monkeypatch):
         """Lazy-import execute_code to avoid pulling in firecrawl at collection time."""
+        # Force local backend — other tests in the same xdist worker may leak
+        # TERMINAL_ENV=modal/docker which causes modal.exception.AuthError.
+        monkeypatch.setenv("TERMINAL_ENV", "local")
         try:
             from tools.code_execution_tool import execute_code
             self._execute_code = execute_code
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 9d6df27c..085ffad2 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -15,9 +15,13 @@ Run with:  python -m pytest tests/test_code_execution.py -v
 import pytest
 # pytestmark removed — tests run fine (61 pass, ~99s)
 
-
 import json
 import os
+
+# Force local terminal backend for ALL tests in this file.
+# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending
+# execute_code down the remote path → modal.exception.AuthError.
+os.environ["TERMINAL_ENV"] = "local"
 import sys
 import time
 import threading
@@ -325,7 +329,7 @@ class TestStubSchemaDrift(unittest.TestCase):
     # Parameters that are internal (injected by the handler, not user-facing)
     _INTERNAL_PARAMS = {"task_id", "user_task"}
     # Parameters intentionally blocked in the sandbox
-    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty"}
+    _BLOCKED_TERMINAL_PARAMS = {"background", "check_interval", "pty", "notify_on_complete"}
 
     def test_stubs_cover_all_schema_params(self):
         """Every user-facing parameter in the real schema must appear in the
diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py
index a20d23fc..c1e615bd 100644
--- a/tests/tools/test_skill_manager_tool.py
+++ b/tests/tools/test_skill_manager_tool.py
@@ -1,6 +1,7 @@
 """Tests for tools/skill_manager_tool.py — skill creation, editing, and deletion."""
 
 import json
+from contextlib import contextmanager
 from pathlib import Path
 from unittest.mock import patch
 
@@ -24,6 +25,15 @@ from tools.skill_manager_tool import (
 )
 
 
+@contextmanager
+def _skill_dir(tmp_path):
+    """Patch both SKILLS_DIR and get_all_skills_dirs so _find_skill searches
+    only the temp directory — not the real ~/.hermes/skills/."""
+    with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path), \
+         patch("agent.skill_utils.get_all_skills_dirs", return_value=[tmp_path]):
+        yield
+
+
 VALID_SKILL_CONTENT = """\
 ---
 name: test-skill
@@ -179,32 +189,32 @@ class TestValidateFilePath:
 
 class TestCreateSkill:
     def test_create_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT)
         assert result["success"] is True
         assert (tmp_path / "my-skill" / "SKILL.md").exists()
 
     def test_create_with_category(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT, category="devops")
         assert result["success"] is True
         assert (tmp_path / "devops" / "my-skill" / "SKILL.md").exists()
         assert result["category"] == "devops"
 
     def test_create_duplicate_blocked(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _create_skill("my-skill", VALID_SKILL_CONTENT)
         assert result["success"] is False
         assert "already exists" in result["error"]
 
     def test_create_invalid_name(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("Invalid Name!", VALID_SKILL_CONTENT)
         assert result["success"] is False
 
     def test_create_invalid_content(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _create_skill("my-skill", "no frontmatter here")
         assert result["success"] is False
 
@@ -212,7 +222,8 @@ class TestCreateSkill:
         skills_dir = tmp_path / "skills"
         skills_dir.mkdir()
 
-        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir):
+        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir), \
+             patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_dir]):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT, category="../escape")
 
         assert result["success"] is False
@@ -224,7 +235,8 @@ class TestCreateSkill:
         skills_dir.mkdir()
         outside = tmp_path / "outside"
 
-        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir):
+        with patch("tools.skill_manager_tool.SKILLS_DIR", skills_dir), \
+             patch("agent.skill_utils.get_all_skills_dirs", return_value=[skills_dir]):
             result = _create_skill("my-skill", VALID_SKILL_CONTENT, category=str(outside))
 
         assert result["success"] is False
@@ -234,7 +246,7 @@ class TestCreateSkill:
 
 class TestEditSkill:
     def test_edit_existing_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _edit_skill("my-skill", VALID_SKILL_CONTENT_2)
         assert result["success"] is True
@@ -242,13 +254,13 @@ class TestEditSkill:
         assert "Updated description" in content
 
     def test_edit_nonexistent_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _edit_skill("nonexistent", VALID_SKILL_CONTENT)
         assert result["success"] is False
         assert "not found" in result["error"]
 
     def test_edit_invalid_content_rejected(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _edit_skill("my-skill", "no frontmatter")
         assert result["success"] is False
@@ -259,7 +271,7 @@ class TestEditSkill:
 
 class TestPatchSkill:
     def test_patch_unique_match(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _patch_skill("my-skill", "Do the thing.", "Do the new thing.")
         assert result["success"] is True
@@ -267,7 +279,7 @@ class TestPatchSkill:
         assert "Do the new thing." in content
 
     def test_patch_nonexistent_string(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _patch_skill("my-skill", "this text does not exist", "replacement")
         assert result["success"] is False
@@ -284,7 +296,7 @@ description: A test skill.
 
 word word
 """
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", content)
             result = _patch_skill("my-skill", "word", "replaced")
         assert result["success"] is False
@@ -301,39 +313,39 @@ description: A test skill.
 
 word word
 """
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", content)
             result = _patch_skill("my-skill", "word", "replaced", replace_all=True)
         assert result["success"] is True
 
     def test_patch_supporting_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             _write_file("my-skill", "references/api.md", "old text here")
             result = _patch_skill("my-skill", "old text", "new text", file_path="references/api.md")
         assert result["success"] is True
 
     def test_patch_skill_not_found(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _patch_skill("nonexistent", "old", "new")
         assert result["success"] is False
 
 
 class TestDeleteSkill:
     def test_delete_existing(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _delete_skill("my-skill")
         assert result["success"] is True
         assert not (tmp_path / "my-skill").exists()
 
     def test_delete_nonexistent(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _delete_skill("nonexistent")
         assert result["success"] is False
 
     def test_delete_cleans_empty_category_dir(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT, category="devops")
             _delete_skill("my-skill")
         assert not (tmp_path / "devops").exists()
@@ -346,19 +358,19 @@ class TestDeleteSkill:
 
 class TestWriteFile:
     def test_write_reference_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _write_file("my-skill", "references/api.md", "# API\nEndpoint docs.")
         assert result["success"] is True
         assert (tmp_path / "my-skill" / "references" / "api.md").exists()
 
     def test_write_to_nonexistent_skill(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             result = _write_file("nonexistent", "references/doc.md", "content")
         assert result["success"] is False
 
     def test_write_to_disallowed_path(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _write_file("my-skill", "secret/evil.py", "malicious")
         assert result["success"] is False
@@ -366,7 +378,7 @@ class TestWriteFile:
 
 class TestRemoveFile:
     def test_remove_existing_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             _write_file("my-skill", "references/api.md", "content")
             result = _remove_file("my-skill", "references/api.md")
@@ -374,7 +386,7 @@ class TestRemoveFile:
         assert not (tmp_path / "my-skill" / "references" / "api.md").exists()
 
     def test_remove_nonexistent_file(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             _create_skill("my-skill", VALID_SKILL_CONTENT)
             result = _remove_file("my-skill", "references/nope.md")
         assert result["success"] is False
@@ -387,27 +399,27 @@ class TestRemoveFile:
 
 class TestSkillManageDispatcher:
     def test_unknown_action(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="explode", name="test")
         result = json.loads(raw)
         assert result["success"] is False
         assert "Unknown action" in result["error"]
 
     def test_create_without_content(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="create", name="test")
         result = json.loads(raw)
         assert result["success"] is False
         assert "content" in result["error"].lower()
 
     def test_patch_without_old_string(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="patch", name="test")
         result = json.loads(raw)
         assert result["success"] is False
 
     def test_full_create_via_dispatcher(self, tmp_path):
-        with patch("tools.skill_manager_tool.SKILLS_DIR", tmp_path):
+        with _skill_dir(tmp_path):
             raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT)
         result = json.loads(raw)
         assert result["success"] is True

From aa7473cabd62e144647dd7d483b79d0a33d9f672 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 10:06:25 -0700
Subject: [PATCH 072/154] feat: replace z-ai/glm-5 with z-ai/glm-5.1 in
 OpenRouter and Nous model lists

---
 hermes_cli/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 85413267..857bd2ed 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -44,7 +44,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
     ("stepfun/step-3.5-flash",          ""),
     ("minimax/minimax-m2.7",            ""),
     ("minimax/minimax-m2.5",            ""),
-    ("z-ai/glm-5",                      ""),
+    ("z-ai/glm-5.1",                    ""),
     ("z-ai/glm-5-turbo",                ""),
     ("moonshotai/kimi-k2.5",            ""),
     ("x-ai/grok-4.20-beta",             ""),
@@ -75,7 +75,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "stepfun/step-3.5-flash",
         "minimax/minimax-m2.7",
         "minimax/minimax-m2.5",
-        "z-ai/glm-5",
+        "z-ai/glm-5.1",
         "z-ai/glm-5-turbo",
         "moonshotai/kimi-k2.5",
         "x-ai/grok-4.20-beta",

From c58e16757ad05e6b75289b34745608e26d61a9f7 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 10:17:44 -0700
Subject: [PATCH 073/154] docs: fix 40+ discrepancies between documentation and
 codebase (#5818)

Comprehensive audit of all ~100 doc pages against the actual code, fixing:

Reference docs:
- HERMES_API_TIMEOUT default 900 -> 1800 (env-vars)
- TERMINAL_DOCKER_IMAGE default python:3.11 -> nikolaik/python-nodejs (env-vars)
- compression.summary_model default shown as gemini -> actually empty string (env-vars)
- Add missing GOOGLE_API_KEY, GEMINI_API_KEY, GEMINI_BASE_URL env vars (env-vars)
- Add missing /branch (/fork) slash command (slash-commands)
- Fix hermes-cli tool count 39 -> 38 (toolsets-reference)
- Fix hermes-api-server drop list to include text_to_speech (toolsets-reference)
- Fix total tool count 47 -> 48, standalone 14 -> 15 (tools-reference)

User guide:
- web_extract.timeout default 30 -> 360 (configuration)
- Remove display.theme_mode (not implemented in code) (configuration)
- Remove display.background_process_notifications (not in defaults) (configuration)
- Browser inactivity timeout 300/5min -> 120/2min (browser)
- Screenshot path browser_screenshots -> cache/screenshots (browser)
- batch_runner default model claude-sonnet-4-20250514 -> claude-sonnet-4.6
- Add minimax to TTS provider list (voice-mode)
- Remove credential_pool_strategies from auth.json example (credential-pools)
- Fix Slack token path platforms/slack/ -> root ~/.hermes/ (slack)
- Fix Matrix store path for new installs (matrix)
- Fix WhatsApp session path for new installs (whatsapp)
- Fix HomeAssistant config from gateway.json to config.yaml (homeassistant)
- Fix WeCom gateway start command (wecom)

Developer guide:
- Fix tool/toolset counts in architecture overview
- Update line counts: main.py ~5500, setup.py ~3100, run.py ~7500, mcp_tool ~2200
- Replace nonexistent agent/memory_store.py with memory_manager.py + memory_provider.py
- Update _discover_tools() list: remove honcho_tools, add skill_manager_tool
- Add session_search and delegate_task to intercepted tools list (agent-loop)
- Fix budget warning: two-tier system (70% caution, 90% warning) (agent-loop)
- Fix gateway auth order (per-platform first, global last) (gateway-internals)
- Fix email_adapter.py -> email.py, add webhook.py + api_server.py (gateway-internals)
- Add 7 missing providers to provider-runtime list

Other:
- Add Docker --cap-add entries to security doc
- Fix Python version 3.10+ -> 3.11+ (contributing)
- Fix AGENTS.md discovery claim (not hierarchical walk) (tips)
- Fix cron 'add' -> canonical 'create' (cron-internals)
- Add pre_api_request/post_api_request hooks to plugin guide
- Add Google/Gemini provider to providers page
- Clarify OPENAI_BASE_URL deprecation (providers)
---
 website/docs/developer-guide/agent-loop.md    |  8 +++--
 website/docs/developer-guide/architecture.md  | 15 +++++----
 website/docs/developer-guide/contributing.md  |  2 +-
 .../docs/developer-guide/cron-internals.md    |  2 +-
 .../docs/developer-guide/gateway-internals.md | 11 ++++---
 .../docs/developer-guide/provider-runtime.md  |  7 ++++
 website/docs/developer-guide/tools-runtime.md |  3 +-
 website/docs/guides/build-a-hermes-plugin.md  |  2 ++
 website/docs/guides/tips.md                   |  4 +--
 website/docs/integrations/providers.md        |  5 +--
 .../docs/reference/environment-variables.md   |  9 +++--
 website/docs/reference/slash-commands.md      |  1 +
 website/docs/reference/tools-reference.md     |  4 +--
 website/docs/reference/toolsets-reference.md  |  4 +--
 website/docs/user-guide/configuration.md      | 18 ++--------
 .../user-guide/features/batch-processing.md   |  8 ++---
 website/docs/user-guide/features/browser.md   |  8 ++---
 .../user-guide/features/credential-pools.md   |  3 --
 .../docs/user-guide/features/voice-mode.md    |  2 +-
 .../user-guide/messaging/homeassistant.md     | 33 ++++++++++---------
 website/docs/user-guide/messaging/matrix.md   |  4 +--
 website/docs/user-guide/messaging/slack.md    |  2 +-
 website/docs/user-guide/messaging/wecom.md    |  2 +-
 website/docs/user-guide/messaging/whatsapp.md |  8 ++---
 website/docs/user-guide/security.md           |  3 ++
 25 files changed, 90 insertions(+), 78 deletions(-)

diff --git a/website/docs/developer-guide/agent-loop.md b/website/docs/developer-guide/agent-loop.md
index 39a96df6..4728a634 100644
--- a/website/docs/developer-guide/agent-loop.md
+++ b/website/docs/developer-guide/agent-loop.md
@@ -151,9 +151,11 @@ for each tool_call in response.tool_calls:
 Some tools are intercepted by `run_agent.py` *before* reaching `handle_function_call()`:
 
 | Tool | Why intercepted |
-|------|-----------------|
+|------|--------------------|
 | `todo` | Reads/writes agent-local task state |
 | `memory` | Writes to persistent memory files with character limits |
+| `session_search` | Queries session history via the agent's session DB |
+| `delegate_task` | Spawns subagent(s) with isolated context |
 
 These tools modify agent state directly and return synthetic tool results without going through the registry.
 
@@ -180,7 +182,9 @@ The agent tracks iterations via `IterationBudget`:
 
 - Default: 90 iterations (configurable via `agent.max_turns`)
 - Shared across parent and child agents — a subagent consumes from the parent's budget
-- At 70%+ usage, `_get_budget_warning()` appends a `[BUDGET WARNING: ...]` to the last tool result
+- Two-tier budget pressure via `_get_budget_warning()`:
+  - At 70%+ usage (caution tier): appends `[BUDGET: Iteration X/Y. N iterations left. Start consolidating your work.]` to the last tool result
+  - At 90%+ usage (warning tier): appends `[BUDGET WARNING: Iteration X/Y. Only N iteration(s) left. Provide your final response NOW.]`
 - At 100%, the agent stops and returns a summary of work done
 
 ### Fallback Model
diff --git a/website/docs/developer-guide/architecture.md b/website/docs/developer-guide/architecture.md
index ab143dc2..c08161b3 100644
--- a/website/docs/developer-guide/architecture.md
+++ b/website/docs/developer-guide/architecture.md
@@ -32,8 +32,8 @@ This page is the top-level map of Hermes Agent internals. Use it to orient yours
 │  ┌──────┴───────┐ ┌──────┴───────┐ ┌──────┴───────┐                │
 │  │ Compression  │ │ 3 API Modes  │ │ Tool Registry│                │
 │  │ & Caching    │ │ chat_compl.  │ │ (registry.py)│                │
-│  │              │ │ codex_resp.  │ │ 47 tools     │                │
-│  │              │ │ anthropic    │ │ 37 toolsets   │                │
+│  │              │ │ codex_resp.  │ │ 48 tools     │                │
+│  │              │ │ anthropic    │ │ 40 toolsets   │                │
 │  └──────────────┘ └──────────────┘ └──────────────┘                │
 └─────────────────────────────────────────────────────────────────────┘
            │                                    │
@@ -70,18 +70,19 @@ hermes-agent/
 │   ├── anthropic_adapter.py  # Anthropic Messages API format conversion
 │   ├── display.py            # KawaiiSpinner, tool preview formatting
 │   ├── skill_commands.py     # Skill slash commands
-│   ├── memory_store.py       # Persistent memory read/write
+│   ├── memory_manager.py    # Memory manager orchestration
+│   ├── memory_provider.py   # Memory provider ABC
 │   └── trajectory.py         # Trajectory saving helpers
 │
 ├── hermes_cli/               # CLI subcommands and setup
-│   ├── main.py               # Entry point — all `hermes` subcommands (~4,200 lines)
+│   ├── main.py               # Entry point — all `hermes` subcommands (~5,500 lines)
 │   ├── config.py             # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
 │   ├── commands.py           # COMMAND_REGISTRY — central slash command definitions
 │   ├── auth.py               # PROVIDER_REGISTRY, credential resolution
 │   ├── runtime_provider.py   # Provider → api_mode + credentials
 │   ├── models.py             # Model catalog, provider model lists
 │   ├── model_switch.py       # /model command logic (CLI + gateway shared)
-│   ├── setup.py              # Interactive setup wizard (~3,500 lines)
+│   ├── setup.py              # Interactive setup wizard (~3,100 lines)
 │   ├── skin_engine.py        # CLI theming engine
 │   ├── skills_config.py      # hermes skills — enable/disable per platform
 │   ├── skills_hub.py         # /skills slash command
@@ -100,14 +101,14 @@ hermes-agent/
 │   ├── browser_tool.py       # 11 browser automation tools
 │   ├── code_execution_tool.py # execute_code sandbox
 │   ├── delegate_tool.py      # Subagent delegation
-│   ├── mcp_tool.py           # MCP client (~1,050 lines)
+│   ├── mcp_tool.py           # MCP client (~2,200 lines)
 │   ├── credential_files.py   # File-based credential passthrough
 │   ├── env_passthrough.py    # Env var passthrough for sandboxes
 │   ├── ansi_strip.py         # ANSI escape stripping
 │   └── environments/         # Terminal backends (local, docker, ssh, modal, daytona, singularity)
 │
 ├── gateway/                  # Messaging platform gateway
-│   ├── run.py                # GatewayRunner — message dispatch (~5,800 lines)
+│   ├── run.py                # GatewayRunner — message dispatch (~7,500 lines)
 │   ├── session.py            # SessionStore — conversation persistence
 │   ├── delivery.py           # Outbound message delivery
 │   ├── pairing.py            # DM pairing authorization
diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md
index 603b416a..f9b9e0ec 100644
--- a/website/docs/developer-guide/contributing.md
+++ b/website/docs/developer-guide/contributing.md
@@ -33,7 +33,7 @@ We value contributions in this order:
 | Requirement | Notes |
 |-------------|-------|
 | **Git** | With `--recurse-submodules` support |
-| **Python 3.10+** | uv will install it if missing |
+| **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
 | **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
 
diff --git a/website/docs/developer-guide/cron-internals.md b/website/docs/developer-guide/cron-internals.md
index 060a8400..cc8435db 100644
--- a/website/docs/developer-guide/cron-internals.md
+++ b/website/docs/developer-guide/cron-internals.md
@@ -185,7 +185,7 @@ The `hermes cron` CLI provides direct job management:
 
 ```bash
 hermes cron list                    # Show all jobs
-hermes cron add                     # Interactive job creation
+hermes cron create                  # Interactive job creation (alias: add)
 hermes cron edit <job_id>           # Edit job configuration
 hermes cron pause <job_id>          # Pause a running job
 hermes cron resume <job_id>         # Resume a paused job
diff --git a/website/docs/developer-guide/gateway-internals.md b/website/docs/developer-guide/gateway-internals.md
index f875c401..1371bdd3 100644
--- a/website/docs/developer-guide/gateway-internals.md
+++ b/website/docs/developer-guide/gateway-internals.md
@@ -12,7 +12,7 @@ The messaging gateway is the long-running process that connects Hermes to 14+ ex
 
 | File | Purpose |
 |------|---------|
-| `gateway/run.py` | `GatewayRunner` — main loop, slash commands, message dispatch (~7,200 lines) |
+| `gateway/run.py` | `GatewayRunner` — main loop, slash commands, message dispatch (~7,500 lines) |
 | `gateway/session.py` | `SessionStore` — conversation persistence and session key construction |
 | `gateway/delivery.py` | Outbound message delivery to target platforms/channels |
 | `gateway/pairing.py` | DM pairing flow for user authorization |
@@ -91,10 +91,11 @@ Commands that must reach the runner while the agent is blocked (like `/approve`)
 
 The gateway uses a multi-layer authorization check, evaluated in order:
 
-1. **Gateway-wide allow-all** (`GATEWAY_ALLOW_ALL_USERS`) — if set, all users are authorized
+1. **Per-platform allow-all flag** (e.g., `TELEGRAM_ALLOW_ALL_USERS`) — if set, all users on that platform are authorized
 2. **Platform allowlist** (e.g., `TELEGRAM_ALLOWED_USERS`) — comma-separated user IDs
 3. **DM pairing** — authenticated users can pair new users via a pairing code
-4. **Admin escalation** — some commands require admin status beyond basic authorization
+4. **Global allow-all** (`GATEWAY_ALLOW_ALL_USERS`) — if set, all users across all platforms are authorized
+5. **Default: deny** — unauthorized users are rejected
 
 ### DM Pairing Flow
 
@@ -154,11 +155,13 @@ gateway/platforms/
 ├── signal.py            # Signal via signal-cli REST API
 ├── matrix.py            # Matrix via matrix-nio (optional E2EE)
 ├── mattermost.py        # Mattermost WebSocket API
-├── email_adapter.py     # Email via IMAP/SMTP
+├── email.py             # Email via IMAP/SMTP
 ├── sms.py               # SMS via Twilio
 ├── dingtalk.py          # DingTalk WebSocket
 ├── feishu.py            # Feishu/Lark WebSocket or webhook
 ├── wecom.py             # WeCom (WeChat Work) callback
+├── webhook.py           # Inbound/outbound webhook adapter
+├── api_server.py        # REST API server adapter
 └── homeassistant.py     # Home Assistant conversation integration
 ```
 
diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md
index 00772959..bf9abe0c 100644
--- a/website/docs/developer-guide/provider-runtime.md
+++ b/website/docs/developer-guide/provider-runtime.md
@@ -42,11 +42,18 @@ Current provider families include:
 - OpenRouter
 - Nous Portal
 - OpenAI Codex
+- Copilot / Copilot ACP
 - Anthropic (native)
+- Google / Gemini
+- Alibaba / DashScope
+- DeepSeek
 - Z.AI
 - Kimi / Moonshot
 - MiniMax
 - MiniMax China
+- Kilo Code
+- Hugging Face
+- OpenCode Zen / OpenCode Go
 - Custom (`provider: custom`) — first-class provider for any OpenAI-compatible endpoint
 - Named custom providers (`custom_providers` list in config.yaml)
 
diff --git a/website/docs/developer-guide/tools-runtime.md b/website/docs/developer-guide/tools-runtime.md
index f6fbc86d..8e349a50 100644
--- a/website/docs/developer-guide/tools-runtime.md
+++ b/website/docs/developer-guide/tools-runtime.md
@@ -55,6 +55,7 @@ _modules = [
     "tools.mixture_of_agents_tool",
     "tools.image_generation_tool",
     "tools.skills_tool",
+    "tools.skill_manager_tool",
     "tools.browser_tool",
     "tools.cronjob_tools",
     "tools.rl_training_tool",
@@ -67,7 +68,7 @@ _modules = [
     "tools.delegate_tool",
     "tools.process_registry",
     "tools.send_message_tool",
-    "tools.honcho_tools",
+    # "tools.honcho_tools",  # Removed — Honcho is now a memory provider plugin
     "tools.homeassistant_tool",
 ]
 ```
diff --git a/website/docs/guides/build-a-hermes-plugin.md b/website/docs/guides/build-a-hermes-plugin.md
index 91fb62f3..85b1c817 100644
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -405,6 +405,8 @@ Each hook is documented in full on the **[Event Hooks reference](/docs/user-guid
 | [`post_llm_call`](/docs/user-guide/features/hooks#post_llm_call) | Once per turn, after the tool-calling loop (successful turns only) | `session_id: str, user_message: str, assistant_response: str, conversation_history: list, model: str, platform: str` | ignored |
 | [`on_session_start`](/docs/user-guide/features/hooks#on_session_start) | New session created (first turn only) | `session_id: str, model: str, platform: str` | ignored |
 | [`on_session_end`](/docs/user-guide/features/hooks#on_session_end) | End of every `run_conversation` call + CLI exit | `session_id: str, completed: bool, interrupted: bool, model: str, platform: str` | ignored |
+| [`pre_api_request`](/docs/user-guide/features/hooks#pre_api_request) | Before each HTTP request to the LLM provider | `method: str, url: str, headers: dict, body: dict` | ignored |
+| [`post_api_request`](/docs/user-guide/features/hooks#post_api_request) | After each HTTP response from the LLM provider | `method: str, url: str, status_code: int, response: dict` | ignored |
 
 Most hooks are fire-and-forget observers — their return values are ignored. The exception is `pre_llm_call`, which can inject context into the conversation.
 
diff --git a/website/docs/guides/tips.md b/website/docs/guides/tips.md
index 804e9046..4d21b735 100644
--- a/website/docs/guides/tips.md
+++ b/website/docs/guides/tips.md
@@ -95,9 +95,9 @@ Use `SOUL.md` for durable personality. Use `AGENTS.md` for project-specific inst
 
 Already have a `.cursorrules` or `.cursor/rules/*.mdc` file? Hermes reads those too. No need to duplicate your coding conventions — they're loaded automatically from the working directory.
 
-### Hierarchical Discovery
+### Discovery
 
-Hermes walks the directory tree and discovers **all** `AGENTS.md` files at every level. In a monorepo, put project-wide conventions at the root and team-specific ones in subdirectories — they're all concatenated together with path headers.
+Hermes loads the top-level `AGENTS.md` from the current working directory at session start. Subdirectory `AGENTS.md` files are discovered lazily during tool calls (via `subdirectory_hints.py`) and injected into tool results — they are not loaded upfront into the system prompt.
 
 :::tip
 Keep context files focused and concise. Every character counts against your token budget since they're injected into every single message.
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index ca6a0c51..e23924dc 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -31,7 +31,8 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 | **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) |
 | **DeepSeek** | `DEEPSEEK_API_KEY` in `~/.hermes/.env` (provider: `deepseek`) |
 | **Hugging Face** | `HF_TOKEN` in `~/.hermes/.env` (provider: `huggingface`, aliases: `hf`) |
-| **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
+| **Google / Gemini** | `GOOGLE_API_KEY` (or `GEMINI_API_KEY`) in `~/.hermes/.env` (provider: `gemini`) |
+| **Custom Endpoint** | `hermes model` → choose "Custom endpoint" (saved in `config.yaml`) |
 
 :::tip Model key alias
 In the `model:` config section, you can use either `default:` or `model:` as the key name for your model ID. Both `model: { default: my-model }` and `model: { model: my-model }` work identically.
@@ -219,7 +220,7 @@ model:
 ```
 
 :::warning Legacy env vars
-`OPENAI_BASE_URL` and `LLM_MODEL` in `.env` are **deprecated**. The CLI ignores `LLM_MODEL` entirely (only the gateway reads it). Use `hermes model` or edit `config.yaml` directly — both persist correctly across restarts and Docker containers.
+`OPENAI_BASE_URL` and `LLM_MODEL` in `.env` are **deprecated**. `OPENAI_BASE_URL` is no longer consulted for endpoint resolution — `config.yaml` is the single source of truth. The CLI ignores `LLM_MODEL` entirely (only the gateway reads it as a fallback). Use `hermes model` or edit `config.yaml` directly — both persist correctly across restarts and Docker containers.
 :::
 
 Both approaches persist to `config.yaml`, which is the source of truth for model, provider, and base URL.
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 89934932..7d40546c 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -39,6 +39,9 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `KILOCODE_BASE_URL` | Override Kilo Code base URL (default: `https://api.kilo.ai/api/gateway`) |
 | `HF_TOKEN` | Hugging Face token for Inference Providers ([huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)) |
 | `HF_BASE_URL` | Override Hugging Face base URL (default: `https://router.huggingface.co/v1`) |
+| `GOOGLE_API_KEY` | Google AI Studio API key ([aistudio.google.com/app/apikey](https://aistudio.google.com/app/apikey)) |
+| `GEMINI_API_KEY` | Alias for `GOOGLE_API_KEY` |
+| `GEMINI_BASE_URL` | Override Google AI Studio base URL |
 | `ANTHROPIC_API_KEY` | Anthropic Console API key ([console.anthropic.com](https://console.anthropic.com/)) |
 | `ANTHROPIC_TOKEN` | Manual or legacy Anthropic OAuth/setup-token override |
 | `DASHSCOPE_API_KEY` | Alibaba Cloud DashScope API key for Qwen models ([modelstudio.console.alibabacloud.com](https://modelstudio.console.alibabacloud.com/)) |
@@ -108,7 +111,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
 | Variable | Description |
 |----------|-------------|
 | `TERMINAL_ENV` | Backend: `local`, `docker`, `ssh`, `singularity`, `modal`, `daytona` |
-| `TERMINAL_DOCKER_IMAGE` | Docker image (default: `python:3.11`) |
+| `TERMINAL_DOCKER_IMAGE` | Docker image (default: `nikolaik/python-nodejs:python3.11-nodejs20`) |
 | `TERMINAL_DOCKER_FORWARD_ENV` | JSON array of env var names to explicitly forward into Docker terminal sessions. Note: skill-declared `required_environment_variables` are forwarded automatically — you only need this for vars not declared by any skill. |
 | `TERMINAL_DOCKER_VOLUMES` | Additional Docker volume mounts (comma-separated `host:container` pairs) |
 | `TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE` | Advanced opt-in: mount the launch cwd into Docker `/workspace` (`true`/`false`, default: `false`) |
@@ -262,7 +265,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `HERMES_HUMAN_DELAY_MIN_MS` | Custom delay range minimum (ms) |
 | `HERMES_HUMAN_DELAY_MAX_MS` | Custom delay range maximum (ms) |
 | `HERMES_QUIET` | Suppress non-essential output (`true`/`false`) |
-| `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `900`) |
+| `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `1800`) |
 | `HERMES_EXEC_ASK` | Enable execution approval prompts in gateway mode (`true`/`false`) |
 | `HERMES_ENABLE_PROJECT_PLUGINS` | Enable auto-discovery of repo-local plugins from `./.hermes/plugins/` (`true`/`false`, default: `false`) |
 | `HERMES_BACKGROUND_NOTIFICATIONS` | Background process notification mode in gateway: `all` (default), `result`, `error`, `off` |
@@ -283,7 +286,7 @@ Context compression is configured exclusively through the `compression` section
 compression:
   enabled: true
   threshold: 0.50
-  summary_model: google/gemini-3-flash-preview
+  summary_model: ""                            # empty = use main configured model
   summary_provider: auto
   summary_base_url: null  # Custom OpenAI-compatible endpoint for summaries
 ```
diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md
index f750e7e7..89a30c46 100644
--- a/website/docs/reference/slash-commands.md
+++ b/website/docs/reference/slash-commands.md
@@ -37,6 +37,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
 | `/background <prompt>` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
 | `/btw <question>` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. |
 | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
+| `/branch [name]` (alias: `/fork`) | Branch the current session (explore a different path) |
 
 ### Configuration
 
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index cd798697..0728945d 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 # Built-in Tools Reference
 
-This page documents all 47 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
+This page documents all 48 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, and 14 standalone tools across other toolsets.
+**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, and 15 standalone tools across other toolsets.
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md
index 7d566e60..1c225b23 100644
--- a/website/docs/reference/toolsets-reference.md
+++ b/website/docs/reference/toolsets-reference.md
@@ -88,9 +88,9 @@ Platform toolsets define the complete tool configuration for a deployment target
 
 | Toolset | Differences from `hermes-cli` |
 |---------|-------------------------------|
-| `hermes-cli` | Full toolset — all 39 tools including `clarify`. The default for interactive CLI sessions. |
+| `hermes-cli` | Full toolset — all 38 tools including `clarify`. The default for interactive CLI sessions. |
 | `hermes-acp` | Drops `clarify`, `cronjob`, `image_generate`, `mixture_of_agents`, `send_message`, `text_to_speech`, homeassistant tools. Focused on coding tasks in IDE context. |
-| `hermes-api-server` | Drops `clarify` and `send_message`. Adds everything else — suitable for programmatic access where user interaction isn't possible. |
+| `hermes-api-server` | Drops `clarify`, `send_message`, and `text_to_speech`. Adds everything else — suitable for programmatic access where user interaction isn't possible. |
 | `hermes-telegram` | Same as `hermes-cli`. |
 | `hermes-discord` | Same as `hermes-cli`. |
 | `hermes-slack` | Same as `hermes-cli`. |
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 06332908..2e26a9f6 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -574,7 +574,7 @@ auxiliary:
     model: ""                  # e.g. "google/gemini-2.5-flash"
     base_url: ""
     api_key: ""
-    timeout: 30                # seconds
+    timeout: 360               # seconds (6min) — per-attempt LLM summarization
 
   # Dangerous command approval classifier
   approval:
@@ -622,7 +622,7 @@ auxiliary:
 ```
 
 :::tip
-Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision 30s, web_extract 30s, approval 30s, compression 120s. Increase these if you use slow local models for auxiliary tasks. Vision also has a separate `download_timeout` (default 30s) for the HTTP image download — increase this for slow connections or self-hosted image servers.
+Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision 30s, web_extract 360s, approval 30s, compression 120s. Increase these if you use slow local models for auxiliary tasks. Vision also has a separate `download_timeout` (default 30s) for the HTTP image download — increase this for slow connections or self-hosted image servers.
 :::
 
 :::info
@@ -804,30 +804,16 @@ display:
   tool_progress: all      # off | new | all | verbose
   tool_progress_command: false  # Enable /verbose slash command in messaging gateway
   skin: default           # Built-in or custom CLI skin (see user-guide/features/skins)
-  theme_mode: auto        # auto | light | dark — color scheme for skin-aware rendering
   personality: "kawaii"  # Legacy cosmetic field still surfaced in some summaries
   compact: false          # Compact output mode (less whitespace)
   resume_display: full    # full (show previous messages on resume) | minimal (one-liner only)
   bell_on_complete: false # Play terminal bell when agent finishes (great for long tasks)
   show_reasoning: false   # Show model reasoning/thinking above each response (toggle with /reasoning show|hide)
   streaming: false        # Stream tokens to terminal as they arrive (real-time output)
-  background_process_notifications: all  # all | result | error | off (gateway only)
   show_cost: false        # Show estimated $ cost in the CLI status bar
   tool_preview_length: 0  # Max chars for tool call previews (0 = no limit, show full paths/commands)
 ```
 
-### Theme mode
-
-The `theme_mode` setting controls whether skins render in light or dark mode:
-
-| Mode | Behavior |
-|------|----------|
-| `auto` (default) | Detects your terminal's background color automatically. Falls back to `dark` if detection fails. |
-| `light` | Forces light-mode skin colors. Skins that define a `colors_light` override use those colors instead of the default dark-mode palette. |
-| `dark` | Forces dark-mode skin colors. |
-
-This works with any skin — built-in or custom. Skin authors can provide `colors_light` in their skin definition for optimal light-terminal appearance.
-
 | Mode | What you see |
 |------|-------------|
 | `off` | Silent — just the final response |
diff --git a/website/docs/user-guide/features/batch-processing.md b/website/docs/user-guide/features/batch-processing.md
index 40df279c..3cab1eba 100644
--- a/website/docs/user-guide/features/batch-processing.md
+++ b/website/docs/user-guide/features/batch-processing.md
@@ -20,7 +20,7 @@ python batch_runner.py \
     --dataset_file=data/prompts.jsonl \
     --batch_size=10 \
     --run_name=my_first_run \
-    --model=anthropic/claude-sonnet-4-20250514 \
+    --model=anthropic/claude-sonnet-4.6 \
     --num_workers=4
 
 # Resume an interrupted run
@@ -56,7 +56,7 @@ Entries can optionally include:
 | `--batch_size` | (required) | Prompts per batch |
 | `--run_name` | (required) | Name for this run (used for output dir and checkpointing) |
 | `--distribution` | `"default"` | Toolset distribution to sample from |
-| `--model` | `claude-sonnet-4-20250514` | Model to use |
+| `--model` | `claude-sonnet-4.6` | Model to use |
 | `--base_url` | `https://openrouter.ai/api/v1` | API base URL |
 | `--api_key` | (env var) | API key for model |
 | `--max_turns` | `10` | Maximum tool-calling iterations per prompt |
@@ -127,7 +127,7 @@ Each line in `trajectories.jsonl` is a JSON object:
   "metadata": {
     "batch_num": 2,
     "timestamp": "2026-01-15T10:30:00",
-    "model": "anthropic/claude-sonnet-4-20250514"
+    "model": "anthropic/claude-sonnet-4.6"
   },
   "completed": true,
   "partial": false,
@@ -193,7 +193,7 @@ python batch_runner.py \
     --dataset_file=data/coding_prompts.jsonl \
     --batch_size=20 \
     --run_name=coding_v1 \
-    --model=anthropic/claude-sonnet-4-20250514 \
+    --model=anthropic/claude-sonnet-4.6 \
     --num_workers=8 \
     --distribution=default \
     --max_turns=15
diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md
index 0dafec10..bf7c6168 100644
--- a/website/docs/user-guide/features/browser.md
+++ b/website/docs/user-guide/features/browser.md
@@ -174,8 +174,8 @@ BROWSERBASE_KEEP_ALIVE=true
 # Examples: 600000 (10min), 1800000 (30min)
 BROWSERBASE_SESSION_TIMEOUT=600000
 
-# Inactivity timeout before auto-cleanup in seconds (default: 300)
-BROWSER_INACTIVITY_TIMEOUT=300
+# Inactivity timeout before auto-cleanup in seconds (default: 120)
+BROWSER_INACTIVITY_TIMEOUT=120
 ```
 
 ### Install agent-browser CLI
@@ -265,7 +265,7 @@ The screenshot is saved persistently and the file path is returned alongside the
 What does the chart on this page show?
 ```
 
-Screenshots are stored in `~/.hermes/browser_screenshots/` and automatically cleaned up after 24 hours.
+Screenshots are stored in `~/.hermes/cache/screenshots/` and automatically cleaned up after 24 hours.
 
 ### `browser_console`
 
@@ -333,7 +333,7 @@ If paid features aren't available on your plan, Hermes automatically falls back
 ## Session Management
 
 - Each task gets an isolated browser session via Browserbase
-- Sessions are automatically cleaned up after inactivity (default: 5 minutes)
+- Sessions are automatically cleaned up after inactivity (default: 2 minutes)
 - A background thread checks every 30 seconds for stale sessions
 - Emergency cleanup runs on process exit to prevent orphaned sessions
 - Sessions are released via the Browserbase API (`REQUEST_RELEASE` status)
diff --git a/website/docs/user-guide/features/credential-pools.md b/website/docs/user-guide/features/credential-pools.md
index 275e08a0..50eb8ca6 100644
--- a/website/docs/user-guide/features/credential-pools.md
+++ b/website/docs/user-guide/features/credential-pools.md
@@ -215,9 +215,6 @@ Pool state is stored in `~/.hermes/auth.json` under the `credential_pool` key:
       }
     ]
   },
-  "credential_pool_strategies": {
-    "openrouter": "round_robin"
-  }
 }
 ```
 
diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md
index 31d6ea27..2befd59e 100644
--- a/website/docs/user-guide/features/voice-mode.md
+++ b/website/docs/user-guide/features/voice-mode.md
@@ -395,7 +395,7 @@ stt:
 
 # Text-to-Speech
 tts:
-  provider: "edge"                 # "edge" (free) | "elevenlabs" | "openai" | "neutts"
+  provider: "edge"                 # "edge" (free) | "elevenlabs" | "openai" | "neutts" | "minimax"
   edge:
     voice: "en-US-AriaNeural"      # 322 voices, 74 languages
   elevenlabs:
diff --git a/website/docs/user-guide/messaging/homeassistant.md b/website/docs/user-guide/messaging/homeassistant.md
index ec72383b..f57b4397 100644
--- a/website/docs/user-guide/messaging/homeassistant.md
+++ b/website/docs/user-guide/messaging/homeassistant.md
@@ -130,22 +130,25 @@ The Home Assistant gateway adapter connects via WebSocket and subscribes to `sta
 By default, **no events are forwarded**. You must configure at least one of `watch_domains`, `watch_entities`, or `watch_all` to receive events. Without filters, a warning is logged at startup and all state changes are silently dropped.
 :::
 
-Configure which events the agent sees in `~/.hermes/gateway.json` under the Home Assistant platform's `extra` section:
+Configure which events the agent sees in `~/.hermes/config.yaml` under the Home Assistant platform's `extra` section:
 
-```json
-{
-  "platforms": {
-    "homeassistant": {
-      "enabled": true,
-      "extra": {
-        "watch_domains": ["climate", "binary_sensor", "alarm_control_panel", "light"],
-        "watch_entities": ["sensor.front_door_battery"],
-        "ignore_entities": ["sensor.uptime", "sensor.cpu_usage", "sensor.memory_usage"],
-        "cooldown_seconds": 30
-      }
-    }
-  }
-}
+```yaml
+platforms:
+  homeassistant:
+    enabled: true
+    extra:
+      watch_domains:
+        - climate
+        - binary_sensor
+        - alarm_control_panel
+        - light
+      watch_entities:
+        - sensor.front_door_battery
+      ignore_entities:
+        - sensor.uptime
+        - sensor.cpu_usage
+        - sensor.memory_usage
+      cooldown_seconds: 30
 ```
 
 | Setting | Default | Description |
diff --git a/website/docs/user-guide/messaging/matrix.md b/website/docs/user-guide/messaging/matrix.md
index 943751c1..6f476405 100644
--- a/website/docs/user-guide/messaging/matrix.md
+++ b/website/docs/user-guide/messaging/matrix.md
@@ -265,13 +265,13 @@ MATRIX_ENCRYPTION=true
 
 When E2EE is enabled, Hermes:
 
-- Stores encryption keys in `~/.hermes/matrix/store/`
+- Stores encryption keys in `~/.hermes/platforms/matrix/store/` (legacy installs: `~/.hermes/matrix/store/`)
 - Uploads device keys on first connection
 - Decrypts incoming messages and encrypts outgoing messages automatically
 - Auto-joins encrypted rooms when invited
 
 :::warning
-If you delete the `~/.hermes/matrix/store/` directory, the bot loses its encryption keys. You'll need to verify the device again in your Matrix client. Back up this directory if you want to preserve encrypted sessions.
+If you delete the `~/.hermes/platforms/matrix/store/` directory, the bot loses its encryption keys. You'll need to verify the device again in your Matrix client. Back up this directory if you want to preserve encrypted sessions.
 :::
 
 :::info
diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md
index 9b8edf0c..7ce1b035 100644
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@@ -384,7 +384,7 @@ platforms:
 In addition to tokens in the environment or config, Hermes also loads tokens from an **OAuth token file** at:
 
 ```
-~/.hermes/platforms/slack/slack_tokens.json
+~/.hermes/slack_tokens.json
 ```
 
 This file is a JSON object mapping team IDs to token entries:
diff --git a/website/docs/user-guide/messaging/wecom.md b/website/docs/user-guide/messaging/wecom.md
index 1a078a89..937872b9 100644
--- a/website/docs/user-guide/messaging/wecom.md
+++ b/website/docs/user-guide/messaging/wecom.md
@@ -50,7 +50,7 @@ WECOM_HOME_CHANNEL=chat_id
 ### 3. Start the gateway
 
 ```bash
-hermes gateway start
+hermes gateway
 ```
 
 ## Features
diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md
index 6011992e..ac6c07b7 100644
--- a/website/docs/user-guide/messaging/whatsapp.md
+++ b/website/docs/user-guide/messaging/whatsapp.md
@@ -134,7 +134,7 @@ The gateway starts the WhatsApp bridge automatically using the saved session.
 
 ## Session Persistence
 
-The Baileys bridge saves its session under `~/.hermes/whatsapp/session`. This means:
+The Baileys bridge saves its session under `~/.hermes/platforms/whatsapp/session`. This means:
 
 - **Sessions survive restarts** — you don't need to re-scan the QR code every time
 - The session data includes encryption keys and device credentials
@@ -180,7 +180,7 @@ whatsapp:
 |---------|----------|
 | **QR code not scanning** | Ensure terminal is wide enough (60+ columns). Try a different terminal. Make sure you're scanning from the correct WhatsApp account (bot number, not personal). |
 | **QR code expires** | QR codes refresh every ~20 seconds. If it times out, restart `hermes whatsapp`. |
-| **Session not persisting** | Check that `~/.hermes/whatsapp/session` exists and is writable. If containerized, mount it as a persistent volume. |
+| **Session not persisting** | Check that `~/.hermes/platforms/whatsapp/session` exists and is writable. If containerized, mount it as a persistent volume. |
 | **Logged out unexpectedly** | WhatsApp unlinks devices after long inactivity. Keep the phone on and connected to the network, then re-pair with `hermes whatsapp` if needed. |
 | **Bridge crashes or reconnect loops** | Restart the gateway, update Hermes, and re-pair if the session was invalidated by a WhatsApp protocol change. |
 | **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. |
@@ -206,8 +206,8 @@ whatsapp:
   unauthorized_dm_behavior: ignore
 ```
 
-- The `~/.hermes/whatsapp/session` directory contains full session credentials — protect it like a password
-- Set file permissions: `chmod 700 ~/.hermes/whatsapp/session`
+- The `~/.hermes/platforms/whatsapp/session` directory contains full session credentials — protect it like a password
+- Set file permissions: `chmod 700 ~/.hermes/platforms/whatsapp/session`
 - Use a **dedicated phone number** for the bot to isolate risk from your personal account
 - If you suspect compromise, unlink the device from WhatsApp → Settings → Linked Devices
 - Phone numbers in logs are partially redacted, but review your log retention policy
diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md
index 22e76b5a..5554e896 100644
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@@ -277,6 +277,9 @@ Every container runs with these flags (defined in `tools/environments/docker.py`
 ```python
 _SECURITY_ARGS = [
     "--cap-drop", "ALL",                          # Drop ALL Linux capabilities
+    "--cap-add", "DAC_OVERRIDE",                  # Root can write to bind-mounted dirs
+    "--cap-add", "CHOWN",                         # Package managers need file ownership
+    "--cap-add", "FOWNER",                        # Package managers need file ownership
     "--security-opt", "no-new-privileges",         # Block privilege escalation
     "--pids-limit", "256",                         # Limit process count
     "--tmpfs", "/tmp:rw,nosuid,size=512m",         # Size-limited /tmp

From afe6c63c525dec0c58448c71ea05eaf0eafadf6e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 10:21:03 -0700
Subject: [PATCH 074/154] =?UTF-8?q?docs:=20comprehensive=20docs=20audit=20?=
 =?UTF-8?q?=E2=80=94=20cover=2013=20features=20from=20last=20week's=20PRs?=
 =?UTF-8?q?=20(#5815)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cover documentation gaps found by auditing all 50+ merged PRs from the past week:

tools-reference.md:
- Fix stale tool count (47→46, 11→10 browser tools) after browser_close removal
- Document notify_on_complete parameter in terminal tool description

telegram.md:
- Add Interactive Model Picker section (inline keyboard, provider/model drill-down)

discord.md:
- Add Interactive Model Picker section (Select dropdowns, 120s timeout)
- Add Native Slash Commands for Skills section (auto-registration at startup)

signal.md:
- Expand Attachments section with outgoing media delivery (send_image_file,
  send_voice, send_video, send_document via MEDIA: tags)

webhooks.md:
- Document {__raw__} special template token for full payload access
- Document Forum Topic Delivery via message_thread_id in deliver_extra

slack.md:
- Fix stale/misleading thread reply docs — thread replies no longer require
  @mention when bot has active session (3 locations updated)

security.md:
- Add cross-session isolation (layer 6) and input sanitization (layer 7)
  to security layers overview

feishu.md:
- Add WebSocket Tuning section (ws_reconnect_interval, ws_ping_interval)
- Add Per-Group Access Control section (group_rules with 5 policy types)

credential-pools.md:
- Add Delegation & Subagent Sharing section

delegation.md:
- Update key properties to mention credential pool inheritance

providers.md:
- Add Z.AI Endpoint Auto-Detection note
- Add xAI (Grok) Prompt Caching section

skills-catalog.md:
- Add p5js to creative skills category
---
 website/docs/integrations/providers.md        | 10 ++++
 website/docs/reference/skills-catalog.md      |  1 +
 website/docs/reference/tools-reference.md     |  6 +--
 .../user-guide/features/credential-pools.md   | 10 ++++
 .../docs/user-guide/features/delegation.md    |  2 +-
 website/docs/user-guide/messaging/discord.md  | 20 +++++++
 website/docs/user-guide/messaging/feishu.md   | 54 +++++++++++++++++++
 website/docs/user-guide/messaging/signal.md   | 17 +++++-
 website/docs/user-guide/messaging/slack.md    |  7 ++-
 website/docs/user-guide/messaging/telegram.md | 13 +++++
 website/docs/user-guide/messaging/webhooks.md | 25 +++++++++
 website/docs/user-guide/security.md           |  4 +-
 12 files changed, 158 insertions(+), 11 deletions(-)

diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index e23924dc..74d4e631 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -168,6 +168,16 @@ model:
 
 Base URLs can be overridden with `GLM_BASE_URL`, `KIMI_BASE_URL`, `MINIMAX_BASE_URL`, `MINIMAX_CN_BASE_URL`, or `DASHSCOPE_BASE_URL` environment variables.
 
+:::note Z.AI Endpoint Auto-Detection
+When using the Z.AI / GLM provider, Hermes automatically probes multiple endpoints (global, China, coding variants) to find one that accepts your API key. You don't need to set `GLM_BASE_URL` manually — the working endpoint is detected and cached automatically.
+:::
+
+### xAI (Grok) Prompt Caching
+
+When using xAI as a provider (any base URL containing `x.ai`), Hermes automatically enables prompt caching by sending the `x-grok-conv-id` header with every API request. This routes requests to the same server within a conversation session, allowing xAI's infrastructure to reuse cached system prompts and conversation history.
+
+No configuration is needed — caching activates automatically when an xAI endpoint is detected and a session ID is available. This reduces latency and cost for multi-turn conversations.
+
 ### Hugging Face Inference Providers
 
 [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md
index fe282baf..e3c37ea6 100644
--- a/website/docs/reference/skills-catalog.md
+++ b/website/docs/reference/skills-catalog.md
@@ -47,6 +47,7 @@ Creative content generation — ASCII art, hand-drawn style diagrams, and visual
 | `ascii-art` | Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required. | `creative/ascii-art` |
 | `ascii-video` | "Production pipeline for ASCII art video — any format. Converts video/audio/images/generative input into colored ASCII character video output (MP4, GIF, image sequence). Covers: video-to-ASCII conversion, audio-reactive music visualizers, generative ASCII art animations, hybrid… | `creative/ascii-video` |
 | `excalidraw` | Create hand-drawn style diagrams using Excalidraw JSON format. Generate .excalidraw files for architecture diagrams, flowcharts, sequence diagrams, concept maps, and more. Files can be opened at excalidraw.com or uploaded for shareable links. | `creative/excalidraw` |
+| `p5js` | Production pipeline for interactive and generative visual art using p5.js. Create sketches, render them to images/video via headless browser, and serve live previews. Supports canvas animations, data visualizations, and creative coding experiments. | `creative/p5js` |
 
 ## devops
 
diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md
index 0728945d..06f7a0e3 100644
--- a/website/docs/reference/tools-reference.md
+++ b/website/docs/reference/tools-reference.md
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
 
 # Built-in Tools Reference
 
-This page documents all 48 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
+This page documents all 47 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
 
-**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, and 15 standalone tools across other toolsets.
+**Quick counts:** 10 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, and 15 standalone tools across other toolsets.
 
 :::tip MCP Tools
 In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
@@ -133,7 +133,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
 | Tool | Description | Requires environment |
 |------|-------------|----------------------|
 | `process` | Manage background processes started with terminal(background=true). Actions: 'list' (show all), 'poll' (check status + new output), 'log' (full output with pagination), 'wait' (block until done or timeout), 'kill' (terminate), 'write' (sen… | — |
-| `terminal` | Execute shell commands on a Linux environment. Filesystem persists between calls. Do NOT use cat/head/tail to read files — use read_file instead. Do NOT use grep/rg/find to search — use search_files instead. Do NOT use ls to list directori… | — |
+| `terminal` | Execute shell commands on a Linux environment. Filesystem persists between calls. Set `background=true` for long-running servers. Set `notify_on_complete=true` (with `background=true`) to get an automatic notification when the process finishes — no polling needed. Do NOT use cat/head/tail — use read_file. Do NOT use grep/rg/find — use search_files. | — |
 
 ## `todo` toolset
 
diff --git a/website/docs/user-guide/features/credential-pools.md b/website/docs/user-guide/features/credential-pools.md
index 50eb8ca6..f4d11369 100644
--- a/website/docs/user-guide/features/credential-pools.md
+++ b/website/docs/user-guide/features/credential-pools.md
@@ -179,6 +179,16 @@ Hermes automatically discovers credentials from multiple sources and seeds the p
 
 Auto-seeded entries are updated on each pool load — if you remove an env var, its pool entry is automatically pruned. Manual entries (added via `hermes auth add`) are never auto-pruned.
 
+## Delegation & Subagent Sharing
+
+When the agent spawns subagents via `delegate_task`, the parent's credential pool is automatically shared with children:
+
+- **Same provider** — the child receives the parent's full pool, enabling key rotation on rate limits
+- **Different provider** — the child loads that provider's own pool (if configured)
+- **No pool configured** — the child falls back to the inherited single API key
+
+This means subagents benefit from the same rate-limit resilience as the parent, with no extra configuration needed. Per-task credential leasing ensures children don't conflict with each other when rotating keys concurrently.
+
 ## Thread Safety
 
 The credential pool uses a threading lock for all state mutations (`select()`, `mark_exhausted_and_rotate()`, `try_refresh_current()`, `mark_used()`). This ensures safe concurrent access when the gateway handles multiple chat sessions simultaneously.
diff --git a/website/docs/user-guide/features/delegation.md b/website/docs/user-guide/features/delegation.md
index 80a5ad62..2e22bada 100644
--- a/website/docs/user-guide/features/delegation.md
+++ b/website/docs/user-guide/features/delegation.md
@@ -184,7 +184,7 @@ Delegation has a **depth limit of 2** — a parent (depth 0) can spawn children
 - Subagents **cannot** call: `delegate_task`, `clarify`, `memory`, `send_message`, `execute_code`
 - **Interrupt propagation** — interrupting the parent interrupts all active children
 - Only the final summary enters the parent's context, keeping token usage efficient
-- Subagents inherit the parent's **API key and provider configuration**
+- Subagents inherit the parent's **API key, provider configuration, and credential pool** (enabling key rotation on rate limits)
 
 ## Delegation vs execute_code
 
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index 3f3d5ec5..bad8d2e3 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -383,6 +383,26 @@ display:
   tool_progress_command: true
 ```
 
+## Interactive Model Picker
+
+Send `/model` with no arguments in a Discord channel to open a dropdown-based model picker:
+
+1. **Provider selection** — a Select dropdown showing available providers (up to 25).
+2. **Model selection** — a second dropdown with models for the chosen provider (up to 25).
+
+The picker times out after 120 seconds. Only authorized users (those in `DISCORD_ALLOWED_USERS`) can interact with it. If you know the model name, type `/model <name>` directly.
+
+## Native Slash Commands for Skills
+
+Hermes automatically registers installed skills as **native Discord Application Commands**. This means skills appear in Discord's autocomplete `/` menu alongside built-in commands.
+
+- Each skill becomes a Discord slash command (e.g., `/code-review`, `/ascii-art`)
+- Skills accept an optional `args` string parameter
+- Discord has a limit of 100 application commands per bot — if you have more skills than available slots, extra skills are skipped with a warning in the logs
+- Skills are registered during bot startup alongside built-in commands like `/model`, `/reset`, and `/background`
+
+No extra configuration is needed — any skill installed via `hermes skills install` is automatically registered as a Discord slash command on the next gateway restart.
+
 ## Home Channel
 
 You can designate a "home channel" where the bot sends proactive messages (such as cron job output, reminders, and notifications). There are two ways to set it:
diff --git a/website/docs/user-guide/messaging/feishu.md b/website/docs/user-guide/messaging/feishu.md
index 47901e35..5a7e06b7 100644
--- a/website/docs/user-guide/messaging/feishu.md
+++ b/website/docs/user-guide/messaging/feishu.md
@@ -310,6 +310,58 @@ Additional webhook protections:
 - **Body read timeout:** 30 seconds
 - **Content-Type enforcement:** Only `application/json` is accepted
 
+## WebSocket Tuning
+
+When using `websocket` mode, you can customize reconnect and ping behavior:
+
+```yaml
+platforms:
+  feishu:
+    extra:
+      ws_reconnect_interval: 120   # Seconds between reconnect attempts (default: 120)
+      ws_ping_interval: 30         # Seconds between WebSocket pings (optional; SDK default if unset)
+```
+
+| Setting | Config key | Default | Description |
+|---------|-----------|---------|-------------|
+| Reconnect interval | `ws_reconnect_interval` | 120s | How long to wait between reconnection attempts |
+| Ping interval | `ws_ping_interval` | _(SDK default)_ | Frequency of WebSocket keepalive pings |
+
+## Per-Group Access Control
+
+Beyond the global `FEISHU_GROUP_POLICY`, you can set fine-grained rules per group chat using `group_rules` in config.yaml:
+
+```yaml
+platforms:
+  feishu:
+    extra:
+      default_group_policy: "open"     # Default for groups not in group_rules
+      admins:                          # Users who can manage bot settings
+        - "ou_admin_open_id"
+      group_rules:
+        "oc_group_chat_id_1":
+          policy: "allowlist"          # open | allowlist | blacklist | admin_only | disabled
+          allowlist:
+            - "ou_user_open_id_1"
+            - "ou_user_open_id_2"
+        "oc_group_chat_id_2":
+          policy: "admin_only"
+        "oc_group_chat_id_3":
+          policy: "blacklist"
+          blacklist:
+            - "ou_blocked_user"
+```
+
+| Policy | Description |
+|--------|-------------|
+| `open` | Anyone in the group can use the bot |
+| `allowlist` | Only users in the group's `allowlist` can use the bot |
+| `blacklist` | Everyone except users in the group's `blacklist` can use the bot |
+| `admin_only` | Only users in the global `admins` list can use the bot in this group |
+| `disabled` | Bot ignores all messages in this group |
+
+Groups not listed in `group_rules` fall back to `default_group_policy` (defaults to the value of `FEISHU_GROUP_POLICY`).
+
 ## Deduplication
 
 Inbound messages are deduplicated using message IDs with a 24-hour TTL. The dedup state is persisted across restarts to `~/.hermes/feishu_seen_message_ids.json`.
@@ -343,6 +395,8 @@ Inbound messages are deduplicated using message IDs with a 24-hour TTL. The dedu
 | `HERMES_FEISHU_TEXT_BATCH_MAX_CHARS` | — | `4000` | Max characters merged per text batch |
 | `HERMES_FEISHU_MEDIA_BATCH_DELAY_SECONDS` | — | `0.8` | Media burst debounce quiet period |
 
+WebSocket and per-group ACL settings are configured via `config.yaml` under `platforms.feishu.extra` (see [WebSocket Tuning](#websocket-tuning) and [Per-Group Access Control](#per-group-access-control) above).
+
 ## Troubleshooting
 
 | Problem | Fix |
diff --git a/website/docs/user-guide/messaging/signal.md b/website/docs/user-guide/messaging/signal.md
index 3fc9eba6..bc72c27b 100644
--- a/website/docs/user-guide/messaging/signal.md
+++ b/website/docs/user-guide/messaging/signal.md
@@ -147,13 +147,26 @@ Group access is controlled by the `SIGNAL_GROUP_ALLOWED_USERS` env var:
 
 ### Attachments
 
-The adapter supports sending and receiving:
+The adapter supports sending and receiving media in both directions.
+
+**Incoming** (user → agent):
 
 - **Images** — PNG, JPEG, GIF, WebP (auto-detected via magic bytes)
 - **Audio** — MP3, OGG, WAV, M4A (voice messages transcribed if Whisper is configured)
 - **Documents** — PDF, ZIP, and other file types
 
-Attachment size limit: **100 MB**.
+**Outgoing** (agent → user):
+
+The agent can send media files via `MEDIA:` tags in responses. The following delivery methods are supported:
+
+- **Images** — `send_image_file` sends PNG, JPEG, GIF, WebP as native Signal attachments
+- **Voice** — `send_voice` sends audio files (OGG, MP3, WAV, M4A, AAC) as attachments
+- **Video** — `send_video` sends MP4 video files
+- **Documents** — `send_document` sends any file type (PDF, ZIP, etc.)
+
+All outgoing media goes through Signal's standard attachment API. Unlike some platforms, Signal does not distinguish between voice messages and file attachments at the protocol level.
+
+Attachment size limit: **100 MB** (both directions).
 
 ### Typing Indicators
 
diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md
index 7ce1b035..2b2808c5 100644
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@@ -210,11 +210,10 @@ Understanding how Hermes behaves in different contexts:
 |---------|----------|
 | **DMs** | Bot responds to every message — no @mention needed |
 | **Channels** | Bot **only responds when @mentioned** (e.g., `@Hermes Agent what time is it?`). In channels, Hermes replies in a thread attached to that message. |
-| **Threads** | If you @mention Hermes inside an existing thread, it replies in that same thread. |
+| **Threads** | If you @mention Hermes inside an existing thread, it replies in that same thread. Once the bot has an active session in a thread, **subsequent replies in that thread do not require @mention** — the bot follows the conversation naturally. |
 
 :::tip
-In channels, always @mention the bot. Simply typing a message without mentioning it will be ignored.
-This is intentional — it prevents the bot from responding to every message in busy channels.
+In channels, always @mention the bot to start a conversation. Once the bot is active in a thread, you can reply in that thread without mentioning it. Outside of threads, messages without @mention are ignored to prevent noise in busy channels.
 :::
 
 ---
@@ -283,7 +282,7 @@ slack:
 ```
 
 :::info
-Unlike Discord and Telegram, Slack does not have a `free_response_channels` equivalent. The Slack adapter always requires `@mention` in channels — this is hardcoded behavior. In DMs, the bot always responds without needing a mention.
+Unlike Discord and Telegram, Slack does not have a `free_response_channels` equivalent. The Slack adapter requires `@mention` to start a conversation in channels. However, once the bot has an active session in a thread, subsequent thread replies do not require a mention. In DMs, the bot always responds without needing a mention.
 :::
 
 ### Unauthorized User Handling
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index 54d89fea..a60697a0 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -383,6 +383,19 @@ To find a topic's `thread_id`, open the topic in Telegram Web or Desktop and loo
 - **Privacy policy:** Telegram now requires bots to have a privacy policy. Set one via BotFather with `/setprivacy_policy`, or Telegram may auto-generate a placeholder. This is particularly important if your bot is public-facing.
 - **Message streaming:** Bot API 9.x added support for streaming long responses, which can improve perceived latency for lengthy agent replies.
 
+## Interactive Model Picker
+
+When you send `/model` with no arguments in a Telegram chat, Hermes shows an interactive inline keyboard for switching models:
+
+1. **Provider selection** — buttons showing each available provider with model counts (e.g., "OpenAI (15)", "✓ Anthropic (12)" for the current provider).
+2. **Model selection** — paginated model list with **Prev**/**Next** navigation, a **Back** button to return to providers, and **Cancel**.
+
+The current model and provider are displayed at the top. All navigation happens by editing the same message in-place (no chat clutter).
+
+:::tip
+If you know the exact model name, type `/model <name>` directly to skip the picker. You can also type `/model <name> --global` to persist the change across sessions.
+:::
+
 ## Webhook Mode
 
 By default, the Telegram adapter connects via **long polling** — the gateway makes outbound connections to Telegram's servers. This works everywhere but keeps a persistent connection open.
diff --git a/website/docs/user-guide/messaging/webhooks.md b/website/docs/user-guide/messaging/webhooks.md
index d13210a4..700fea19 100644
--- a/website/docs/user-guide/messaging/webhooks.md
+++ b/website/docs/user-guide/messaging/webhooks.md
@@ -112,13 +112,38 @@ Prompts use dot-notation to access nested fields in the webhook payload:
 
 - `{pull_request.title}` resolves to `payload["pull_request"]["title"]`
 - `{repository.full_name}` resolves to `payload["repository"]["full_name"]`
+- `{__raw__}` — special token that dumps the **entire payload** as indented JSON (truncated at 4000 characters). Useful for monitoring alerts or generic webhooks where the agent needs the full context.
 - Missing keys are left as the literal `{key}` string (no error)
 - Nested dicts and lists are JSON-serialized and truncated at 2000 characters
 
+You can mix `{__raw__}` with regular template variables:
+
+```yaml
+prompt: "PR #{pull_request.number} by {pull_request.user.login}: {__raw__}"
+```
+
 If no `prompt` template is configured for a route, the entire payload is dumped as indented JSON (truncated at 4000 characters).
 
 The same dot-notation templates work in `deliver_extra` values.
 
+### Forum Topic Delivery
+
+When delivering webhook responses to Telegram, you can target a specific forum topic by including `message_thread_id` (or `thread_id`) in `deliver_extra`:
+
+```yaml
+webhooks:
+  routes:
+    alerts:
+      events: ["alert"]
+      prompt: "Alert: {__raw__}"
+      deliver: "telegram"
+      deliver_extra:
+        chat_id: "-1001234567890"
+        message_thread_id: "42"
+```
+
+If `chat_id` is not provided in `deliver_extra`, the delivery falls back to the home channel configured for the target platform.
+
 ---
 
 ## GitHub PR Review (Step by Step) {#github-pr-review}
diff --git a/website/docs/user-guide/security.md b/website/docs/user-guide/security.md
index 5554e896..aba476bc 100644
--- a/website/docs/user-guide/security.md
+++ b/website/docs/user-guide/security.md
@@ -10,13 +10,15 @@ Hermes Agent is designed with a defense-in-depth security model. This page cover
 
 ## Overview
 
-The security model has five layers:
+The security model has seven layers:
 
 1. **User authorization** — who can talk to the agent (allowlists, DM pairing)
 2. **Dangerous command approval** — human-in-the-loop for destructive operations
 3. **Container isolation** — Docker/Singularity/Modal sandboxing with hardened settings
 4. **MCP credential filtering** — environment variable isolation for MCP subprocesses
 5. **Context file scanning** — prompt injection detection in project files
+6. **Cross-session isolation** — sessions cannot access each other's data or state; cron job storage paths are hardened against path traversal attacks
+7. **Input sanitization** — working directory parameters in terminal tool backends are validated against an allowlist to prevent shell injection
 
 ## Dangerous Command Approval
 

From d0ffb111c25d7be2287f824b8426a99f019aa58d Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 10:25:31 -0700
Subject: [PATCH 075/154] =?UTF-8?q?refactor:=20codebase-wide=20lint=20clea?=
 =?UTF-8?q?nup=20=E2=80=94=20unused=20imports,=20dead=20code,=20and=20inef?=
 =?UTF-8?q?ficient=20patterns=20(#5821)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Comprehensive cleanup across 80 files based on automated (ruff, pyflakes, vulture)
and manual analysis of the entire codebase.

Changes by category:

Unused imports removed (~95 across 55 files):
- Removed genuinely unused imports from all major subsystems
- agent/, hermes_cli/, tools/, gateway/, plugins/, cron/
- Includes imports in try/except blocks that were truly unused
  (vs availability checks which were left alone)

Unused variables removed (~25):
- Removed dead variables: connected, inner, channels, last_exc,
  source, new_server_names, verify, pconfig, default_terminal,
  result, pending_handled, temperature, loop
- Dropped unused argparse subparser assignments in hermes_cli/main.py
  (12 instances of add_parser() where result was never used)

Dead code removed:
- run_agent.py: Removed dead ternary (None if False else None) and
  surrounding unreachable branch in identity fallback
- run_agent.py: Removed write-only attribute _last_reported_tool
- hermes_cli/providers.py: Removed dead @property decorator on
  module-level function (decorator has no effect outside a class)
- gateway/run.py: Removed unused MCP config load before reconnect
- gateway/platforms/slack.py: Removed dead SessionSource construction

Undefined name bugs fixed (would cause NameError at runtime):
- batch_runner.py: Added missing logger = logging.getLogger(__name__)
- tools/environments/daytona.py: Added missing Dict and Path imports

Unnecessary global statements removed (14):
- tools/terminal_tool.py: 5 functions declared global for dicts
  they only mutated via .pop()/[key]=value (no rebinding)
- tools/browser_tool.py: cleanup thread loop only reads flag
- tools/rl_training_tool.py: 4 functions only do dict mutations
- tools/mcp_oauth.py: only reads the global
- hermes_time.py: only reads cached values

Inefficient patterns fixed:
- startswith/endswith tuple form: 15 instances of
  x.startswith('a') or x.startswith('b') consolidated to
  x.startswith(('a', 'b'))
- len(x)==0 / len(x)>0: 13 instances replaced with pythonic
  truthiness checks (not x / bool(x))
- in dict.keys(): 5 instances simplified to in dict
- Redefined unused name: removed duplicate _strip_mdv2 import in
  send_message_tool.py

Other fixes:
- hermes_cli/doctor.py: Replaced undefined logger.debug() with pass
- hermes_cli/config.py: Consolidated chained .endswith() calls

Test results: 3934 passed, 17 failed (all pre-existing on main),
19 skipped. Zero regressions.
---
 acp_adapter/entry.py                          |  1 -
 acp_adapter/session.py                        |  2 -
 agent/anthropic_adapter.py                    |  6 +--
 agent/auxiliary_client.py                     |  1 -
 agent/builtin_memory_provider.py              |  2 +-
 agent/credential_pool.py                      |  5 +--
 agent/memory_provider.py                      |  2 +-
 agent/models_dev.py                           |  6 +--
 agent/skill_utils.py                          |  2 +-
 agent/subdirectory_hints.py                   |  1 -
 batch_runner.py                               |  4 +-
 cli.py                                        |  6 +--
 cron/scheduler.py                             |  1 -
 gateway/channel_directory.py                  |  1 -
 gateway/platforms/base.py                     |  1 -
 gateway/platforms/feishu.py                   |  1 -
 gateway/platforms/matrix.py                   |  2 +-
 gateway/platforms/mattermost.py               |  1 -
 gateway/platforms/slack.py                    | 11 +----
 gateway/platforms/telegram.py                 |  2 +-
 gateway/platforms/wecom.py                    |  4 +-
 gateway/platforms/whatsapp.py                 |  1 -
 gateway/run.py                                | 11 ++---
 gateway/stream_consumer.py                    |  2 +-
 hermes_cli/auth.py                            |  1 -
 hermes_cli/auth_commands.py                   |  1 -
 hermes_cli/banner.py                          |  1 -
 hermes_cli/callbacks.py                       |  2 +-
 hermes_cli/claw.py                            |  2 -
 hermes_cli/config.py                          |  2 +-
 hermes_cli/doctor.py                          |  4 +-
 hermes_cli/logs.py                            |  1 -
 hermes_cli/main.py                            | 40 +++++++++----------
 hermes_cli/model_switch.py                    |  8 +---
 hermes_cli/plugins_cmd.py                     |  2 +-
 hermes_cli/profiles.py                        |  3 +-
 hermes_cli/providers.py                       | 11 +----
 hermes_cli/setup.py                           |  3 --
 hermes_cli/skin_engine.py                     |  1 -
 hermes_cli/uninstall.py                       |  1 -
 hermes_cli/webhook.py                         |  2 +-
 hermes_logging.py                             |  1 -
 hermes_state.py                               |  1 -
 hermes_time.py                                |  2 -
 mcp_serve.py                                  |  3 +-
 plugins/memory/byterover/__init__.py          |  1 -
 plugins/memory/holographic/__init__.py        |  1 -
 plugins/memory/holographic/store.py           |  1 -
 plugins/memory/honcho/__init__.py             |  1 -
 plugins/memory/honcho/cli.py                  |  3 +-
 plugins/memory/mem0/__init__.py               |  1 -
 plugins/memory/retaindb/__init__.py           |  3 +-
 run_agent.py                                  | 30 ++++----------
 scripts/release.py                            |  2 -
 scripts/sample_and_compress.py                |  2 -
 .../templates/basic_grpo_training.py          |  2 +-
 .../godmode/scripts/auto_jailbreak.py         |  3 --
 .../godmode/scripts/godmode_race.py           |  2 -
 .../godmode/scripts/parseltongue.py           |  1 -
 tools/browser_camofox.py                      |  4 +-
 tools/browser_tool.py                         |  4 +-
 tools/code_execution_tool.py                  |  1 -
 tools/cronjob_tools.py                        |  1 -
 tools/debug_helpers.py                        |  1 -
 tools/environments/daytona.py                 |  3 +-
 tools/environments/singularity.py             |  3 +-
 tools/mcp_oauth.py                            |  5 +--
 tools/memory_tool.py                          |  4 +-
 tools/rl_training_tool.py                     | 10 +----
 tools/send_message_tool.py                    |  2 +-
 tools/skills_hub.py                           |  6 +--
 tools/skills_tool.py                          |  2 -
 tools/terminal_tool.py                        | 12 +-----
 tools/todo_tool.py                            |  2 +-
 tools/tts_tool.py                             |  1 -
 tools/vision_tools.py                         |  2 +-
 tools/voice_mode.py                           |  2 +-
 tools/website_policy.py                       |  1 -
 toolsets.py                                   |  2 +-
 trajectory_compressor.py                      |  2 -
 80 files changed, 81 insertions(+), 210 deletions(-)

diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py
index 02e44c15..7db5747a 100644
--- a/acp_adapter/entry.py
+++ b/acp_adapter/entry.py
@@ -15,7 +15,6 @@ Usage::
 
 import asyncio
 import logging
-import os
 import sys
 from pathlib import Path
 from hermes_constants import get_hermes_home
diff --git a/acp_adapter/session.py b/acp_adapter/session.py
index b489c398..4bb82398 100644
--- a/acp_adapter/session.py
+++ b/acp_adapter/session.py
@@ -262,8 +262,6 @@ class SessionManager:
         if self._db_instance is not None:
             return self._db_instance
         try:
-            import os
-            from pathlib import Path
             from hermes_state import SessionDB
             hermes_home = get_hermes_home()
             self._db_instance = SessionDB(db_path=hermes_home / "state.db")
diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index be2dec80..4dd3cadc 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -188,9 +188,7 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
     if not base_url:
         return False
     normalized = base_url.rstrip("/").lower()
-    return normalized.startswith("https://api.minimax.io/anthropic") or normalized.startswith(
-        "https://api.minimaxi.com/anthropic"
-    )
+    return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
 
 
 def build_anthropic_client(api_key: str, base_url: str = None):
@@ -847,7 +845,7 @@ def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Di
                 },
             }
 
-    if url.startswith("http://") or url.startswith("https://"):
+    if url.startswith(("http://", "https://")):
         return {
             "type": "image",
             "source": {
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 5f13994c..35ba3c7b 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -209,7 +209,6 @@ class _CodexCompletionsAdapter:
     def create(self, **kwargs) -> Any:
         messages = kwargs.get("messages", [])
         model = kwargs.get("model", self._model)
-        temperature = kwargs.get("temperature")
 
         # Separate system/instructions from conversation messages.
         # Convert chat.completions multimodal content blocks to Responses
diff --git a/agent/builtin_memory_provider.py b/agent/builtin_memory_provider.py
index df4e3b85..0d9cf6c0 100644
--- a/agent/builtin_memory_provider.py
+++ b/agent/builtin_memory_provider.py
@@ -13,7 +13,7 @@ from __future__ import annotations
 
 import json
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
 
diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 144a9101..a47901c8 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -10,21 +10,18 @@ import uuid
 import os
 import re
 from dataclasses import dataclass, fields, replace
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Any, Dict, List, Optional, Set, Tuple
 
 from hermes_constants import OPENROUTER_BASE_URL
 import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
-    ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
     CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
     DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
     PROVIDER_REGISTRY,
-    _agent_key_is_usable,
     _codex_access_token_is_expiring,
     _decode_jwt_claims,
     _import_codex_cli_tokens,
-    _is_expiring,
     _load_auth_store,
     _load_provider_state,
     _resolve_zai_base_url,
diff --git a/agent/memory_provider.py b/agent/memory_provider.py
index 54ef1fb1..24593e33 100644
--- a/agent/memory_provider.py
+++ b/agent/memory_provider.py
@@ -34,7 +34,7 @@ from __future__ import annotations
 
 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 logger = logging.getLogger(__name__)
 
diff --git a/agent/models_dev.py b/agent/models_dev.py
index 51eea8fe..a23ce74b 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -23,9 +23,9 @@ import json
 import logging
 import os
 import time
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple
 
 from utils import atomic_json_write
 
@@ -231,7 +231,7 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
         response = requests.get(MODELS_DEV_URL, timeout=15)
         response.raise_for_status()
         data = response.json()
-        if isinstance(data, dict) and len(data) > 0:
+        if isinstance(data, dict) and data:
             _models_dev_cache = data
             _models_dev_cache_time = time.time()
             _save_disk_cache(data)
diff --git a/agent/skill_utils.py b/agent/skill_utils.py
index f2416360..6b06a19e 100644
--- a/agent/skill_utils.py
+++ b/agent/skill_utils.py
@@ -10,7 +10,7 @@ import os
 import re
 import sys
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Tuple
+from typing import Any, Dict, List, Set, Tuple
 
 from hermes_constants import get_hermes_home
 
diff --git a/agent/subdirectory_hints.py b/agent/subdirectory_hints.py
index a6ca2adc..96903e2e 100644
--- a/agent/subdirectory_hints.py
+++ b/agent/subdirectory_hints.py
@@ -15,7 +15,6 @@ Inspired by Block/goose's SubdirectoryHintTracker.
 
 import logging
 import os
-import re
 import shlex
 from pathlib import Path
 from typing import Dict, Any, Optional, Set
diff --git a/batch_runner.py b/batch_runner.py
index ed00665e..32cd203b 100644
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -31,6 +31,8 @@ from multiprocessing import Pool, Lock
 import traceback
 from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn, MofNCompleteColumn
 from rich.console import Console
+
+logger = logging.getLogger(__name__)
 import fire
 
 from run_agent import AIAgent
@@ -1016,7 +1018,7 @@ class BatchRunner:
                             tool_stats = data.get('tool_stats', {})
                             
                             # Check for invalid tool names (model hallucinations)
-                            invalid_tools = [k for k in tool_stats.keys() if k not in VALID_TOOLS]
+                            invalid_tools = [k for k in tool_stats if k not in VALID_TOOLS]
                             
                             if invalid_tools:
                                 filtered_entries += 1
diff --git a/cli.py b/cli.py
index a60f699d..69a9e8e9 100644
--- a/cli.py
+++ b/cli.py
@@ -70,7 +70,7 @@ _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧
 
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
-from hermes_constants import get_hermes_home, display_hermes_home, OPENROUTER_BASE_URL
+from hermes_constants import get_hermes_home, display_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
 
 _hermes_home = get_hermes_home()
@@ -4246,7 +4246,6 @@ class HermesCLI:
         
         try:
             config = load_gateway_config()
-            connected = config.get_connected_platforms()
             
             print("  Messaging Platform Configuration:")
             print("  " + "-" * 55)
@@ -6008,7 +6007,7 @@ class HermesCLI:
 
         timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
         response_queue = queue.Queue()
-        is_open_ended = not choices or len(choices) == 0
+        is_open_ended = not choices
 
         self._clarify_state = {
             "question": question,
@@ -7839,7 +7838,6 @@ class HermesCLI:
             title = '🔐 Sudo Password Required'
             body = 'Enter password below (hidden), or press Enter to skip'
             box_width = _panel_box_width(title, [body])
-            inner = max(0, box_width - 2)
             lines = []
             lines.append(('class:sudo-border', '╭─ '))
             lines.append(('class:sudo-title', title))
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 5f3feba0..f694f440 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -25,7 +25,6 @@ except ImportError:
         import msvcrt
     except ImportError:
         msvcrt = None
-import time
 from pathlib import Path
 from typing import Optional
 
diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py
index ecc54e64..0d124721 100644
--- a/gateway/channel_directory.py
+++ b/gateway/channel_directory.py
@@ -124,7 +124,6 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
 
 def _build_slack(adapter) -> List[Dict[str, str]]:
     """List Slack channels the bot has joined."""
-    channels = []
     # Slack adapter may expose a web client
     client = getattr(adapter, "_app", None) or getattr(adapter, "_client", None)
     if not client:
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 66fc5bac..e9464365 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -27,7 +27,6 @@ sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
 
 from gateway.config import Platform, PlatformConfig
 from gateway.session import SessionSource, build_session_key
-from hermes_cli.config import get_hermes_home
 from hermes_constants import get_hermes_dir
 
 
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 9bbf2f62..8ba41e8e 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -60,7 +60,6 @@ try:
         CreateMessageRequestBody,
         GetChatRequest,
         GetMessageRequest,
-        GetImageRequest,
         GetMessageResourceRequest,
         P2ImMessageMessageReadV1,
         ReplyMessageRequest,
diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index 2dc0c5a9..2eb89d11 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -1057,7 +1057,7 @@ class MatrixAdapter(BasePlatformAdapter):
 
         # Message type.
         msg_type = MessageType.TEXT
-        if body.startswith("!") or body.startswith("/"):
+        if body.startswith(("!", "/")):
             msg_type = MessageType.COMMAND
 
         source = self.build_source(
diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py
index 95702603..3835919a 100644
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@@ -430,7 +430,6 @@ class MattermostAdapter(BasePlatformAdapter):
                     ct = resp.content_type or "application/octet-stream"
                     break
             except (aiohttp.ClientError, asyncio.TimeoutError) as exc:
-                last_exc = exc
                 if attempt < 2:
                     await asyncio.sleep(1.5 * (attempt + 1))
                     continue
diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 384f379d..627829ca 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -979,16 +979,7 @@ class SlackAdapter(BasePlatformAdapter):
         try:
             # Build a SessionSource for this thread
             from gateway.session import SessionSource
-            from gateway.config import Platform
-            
-            source = SessionSource(
-                platform=Platform.SLACK,
-                chat_id=channel_id,
-                chat_type="group",
-                user_id=user_id,
-                thread_id=thread_ts,
-            )
-            
+
             # Generate the session key using the same logic as SessionStore
             # This mirrors the logic in build_session_key for group sessions
             key_parts = ["agent:main", "slack", "group", channel_id, thread_ts]
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 355bf3ae..8c69c47b 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1369,7 +1369,7 @@ class TelegramAdapter(BasePlatformAdapter):
             
             with open(audio_path, "rb") as audio_file:
                 # .ogg files -> send as voice (round playable bubble)
-                if audio_path.endswith(".ogg") or audio_path.endswith(".opus"):
+                if audio_path.endswith((".ogg", ".opus")):
                     _voice_thread = metadata.get("thread_id") if metadata else None
                     msg = await self._bot.send_voice(
                         chat_id=int(chat_id),
diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index d40b651c..525a830b 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -653,7 +653,7 @@ class WeComAdapter(BasePlatformAdapter):
             return ".png"
         if data.startswith(b"\xff\xd8\xff"):
             return ".jpg"
-        if data.startswith(b"GIF87a") or data.startswith(b"GIF89a"):
+        if data.startswith((b"GIF87a", b"GIF89a")):
             return ".gif"
         if data.startswith(b"RIFF") and data[8:12] == b"WEBP":
             return ".webp"
@@ -689,7 +689,7 @@ class WeComAdapter(BasePlatformAdapter):
     @staticmethod
     def _derive_message_type(body: Dict[str, Any], text: str, media_types: List[str]) -> MessageType:
         """Choose the normalized inbound message type."""
-        if any(mtype.startswith("application/") or mtype.startswith("text/") for mtype in media_types):
+        if any(mtype.startswith(("application/", "text/")) for mtype in media_types):
             return MessageType.DOCUMENT
         if any(mtype.startswith("image/") for mtype in media_types):
             return MessageType.TEXT if text else MessageType.PHOTO
diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py
index ac94e472..a6475dcb 100644
--- a/gateway/platforms/whatsapp.py
+++ b/gateway/platforms/whatsapp.py
@@ -27,7 +27,6 @@ _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
 from typing import Dict, Optional, Any
 
-from hermes_cli.config import get_hermes_home
 from hermes_constants import get_hermes_dir
 
 logger = logging.getLogger(__name__)
diff --git a/gateway/run.py b/gateway/run.py
index 56518be6..f6fb563c 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -24,7 +24,6 @@ import signal
 import tempfile
 import threading
 import time
-import uuid
 from pathlib import Path
 from datetime import datetime
 from typing import Dict, Optional, Any, List
@@ -378,7 +377,7 @@ def _check_unavailable_skill(command_name: str) -> str | None:
                     )
 
         # Check optional skills (shipped with repo but not installed)
-        from hermes_constants import get_hermes_home, get_optional_skills_dir
+        from hermes_constants import get_optional_skills_dir
         repo_root = Path(__file__).resolve().parent.parent
         optional_dir = get_optional_skills_dir(repo_root / "optional-skills")
         if optional_dir.exists():
@@ -2822,7 +2821,7 @@ class GatewayRunner:
                         guessed, _ = _mimetypes.guess_type(path)
                         if guessed:
                             mtype = guessed
-                if not (mtype.startswith("application/") or mtype.startswith("text/")):
+                if not mtype.startswith(("application/", "text/")):
                     continue
                 # Extract display filename by stripping the doc_{uuid12}_ prefix
                 import os as _os
@@ -3909,7 +3908,7 @@ class GatewayRunner:
 
             return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
 
-        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities.keys())
+        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
         return f"Unknown personality: `{args}`\n\nAvailable: {available}"
     
     async def _handle_retry_command(self, event: MessageEvent) -> str:
@@ -5321,9 +5320,6 @@ class GatewayRunner:
                 old_servers = set(_servers.keys())
 
             # Read new config before shutting down, so we know what will be added/removed
-            new_config = _load_mcp_config()
-            new_server_names = set(new_config.keys())
-
             # Shutdown existing connections
             await loop.run_in_executor(None, shutdown_mcp_servers)
 
@@ -5411,7 +5407,6 @@ class GatewayRunner:
 
         from tools.approval import (
             resolve_gateway_approval, has_blocking_approval,
-            pending_approval_count,
         )
 
         if not has_blocking_approval(session_key):
diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index 59e72755..2cda3364 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -128,7 +128,7 @@ class GatewayStreamConsumer:
                     got_done
                     or got_segment_break
                     or (elapsed >= self.cfg.edit_interval
-                        and len(self._accumulated) > 0)
+                        and self._accumulated)
                     or len(self._accumulated) >= self.cfg.buffer_threshold
                 )
 
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 9e92b450..1cdbadc7 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -2839,7 +2839,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
         )
 
         inference_base_url = auth_state["inference_base_url"]
-        verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)
 
         with _auth_store_lock():
             auth_store = _load_auth_store()
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 395dbb76..97c2d850 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -18,7 +18,6 @@ from agent.credential_pool import (
     STRATEGY_ROUND_ROBIN,
     STRATEGY_RANDOM,
     STRATEGY_LEAST_USED,
-    SUPPORTED_POOL_STRATEGIES,
     PooledCredential,
     _exhausted_until,
     _normalize_custom_pool_name,
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index 7435750b..b9701d54 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -5,7 +5,6 @@ Pure display functions with no HermesCLI state dependency.
 
 import json
 import logging
-import os
 import shutil
 import subprocess
 import threading
diff --git a/hermes_cli/callbacks.py b/hermes_cli/callbacks.py
index 87f86b84..ada413df 100644
--- a/hermes_cli/callbacks.py
+++ b/hermes_cli/callbacks.py
@@ -25,7 +25,7 @@ def clarify_callback(cli, question, choices):
 
     timeout = CLI_CONFIG.get("clarify", {}).get("timeout", 120)
     response_queue = queue.Queue()
-    is_open_ended = not choices or len(choices) == 0
+    is_open_ended = not choices
 
     cli._clarify_state = {
         "question": question,
diff --git a/hermes_cli/claw.py b/hermes_cli/claw.py
index 87735f93..281ca37f 100644
--- a/hermes_cli/claw.py
+++ b/hermes_cli/claw.py
@@ -10,7 +10,6 @@ Usage:
 
 import importlib.util
 import logging
-import shutil
 import sys
 from datetime import datetime
 from pathlib import Path
@@ -24,7 +23,6 @@ from hermes_cli.setup import (
     print_info,
     print_success,
     print_error,
-    print_warning,
     prompt_yes_no,
 )
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 8863bda5..d90fc215 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -2520,7 +2520,7 @@ def set_config_value(key: str, value: str):
         'TINKER_API_KEY',
     ]
     
-    if key.upper() in api_keys or key.upper().endswith('_API_KEY') or key.upper().endswith('_TOKEN') or key.upper().startswith('TERMINAL_SSH'):
+    if key.upper() in api_keys or key.upper().endswith(('_API_KEY', '_TOKEN')) or key.upper().startswith('TERMINAL_SSH'):
         save_env_value(key.upper(), value)
         print(f"✓ Set {key} in {get_env_path()}")
         return
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 08cf7233..876ab15d 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -920,8 +920,8 @@ def run_doctor(args):
                         pass
     except ImportError:
         pass
-    except Exception as _e:
-        logger.debug("Profile health check failed: %s", _e)
+    except Exception:
+        pass
 
     # =========================================================================
     # Summary
diff --git a/hermes_cli/logs.py b/hermes_cli/logs.py
index 500cccd4..d5984940 100644
--- a/hermes_cli/logs.py
+++ b/hermes_cli/logs.py
@@ -15,7 +15,6 @@ Usage examples::
     hermes logs --since 30m -f     # follow, starting 30 min ago
 """
 
-import os
 import re
 import sys
 import time
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index dae8cc95..3d1e2847 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1154,7 +1154,7 @@ def _model_flow_nous(config, current_model="", args=None):
     from hermes_cli.auth import (
         get_provider_auth_state, _prompt_model_selection, _save_model_choice,
         _update_config_for_provider, resolve_nous_runtime_credentials,
-        fetch_nous_models, AuthError, format_auth_error,
+        AuthError, format_auth_error,
         _login_nous, PROVIDER_REGISTRY,
     )
     from hermes_cli.config import get_env_value, save_config, save_env_value
@@ -1314,7 +1314,6 @@ def _model_flow_openai_codex(config, current_model=""):
         PROVIDER_REGISTRY, DEFAULT_CODEX_BASE_URL,
     )
     from hermes_cli.codex_models import get_codex_model_ids
-    from hermes_cli.config import get_env_value, save_env_value
     import argparse
 
     status = get_codex_auth_status()
@@ -1367,7 +1366,7 @@ def _model_flow_custom(config):
     so it appears in the provider menu on subsequent runs.
     """
     from hermes_cli.auth import _save_model_choice, deactivate_provider
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.config import get_env_value, load_config, save_config
 
     current_url = get_env_value("OPENAI_BASE_URL") or ""
     current_key = get_env_value("OPENAI_API_KEY") or ""
@@ -1629,7 +1628,7 @@ def _model_flow_named_custom(config, provider_info):
     Otherwise probes the endpoint's /models API to let the user pick one.
     """
     from hermes_cli.auth import _save_model_choice, deactivate_provider
-    from hermes_cli.config import save_env_value, load_config, save_config
+    from hermes_cli.config import load_config, save_config
     from hermes_cli.models import fetch_api_models
 
     name = provider_info["name"]
@@ -1839,7 +1838,7 @@ def _model_flow_copilot(config, current_model=""):
         deactivate_provider,
         resolve_api_key_provider_credentials,
     )
-    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+    from hermes_cli.config import save_env_value, load_config, save_config
     from hermes_cli.models import (
         fetch_api_models,
         fetch_github_model_catalog,
@@ -2430,8 +2429,6 @@ def _model_flow_anthropic(config, current_model=""):
     )
     from hermes_cli.models import _PROVIDER_MODELS
 
-    pconfig = PROVIDER_REGISTRY["anthropic"]
-
     # Check ALL credential sources
     existing_key = (
         get_env_value("ANTHROPIC_TOKEN")
@@ -3700,7 +3697,7 @@ def cmd_update(args):
         try:
             from hermes_cli.gateway import (
                 is_macos, is_linux, _ensure_user_systemd_env,
-                get_systemd_linger_status, find_gateway_pids,
+                find_gateway_pids,
                 _get_service_pids,
             )
             import signal as _signal
@@ -3856,7 +3853,7 @@ def cmd_profile(args):
     """Profile management — create, delete, list, switch, alias."""
     from hermes_cli.profiles import (
         list_profiles, create_profile, delete_profile, seed_profile_skills,
-        get_active_profile, set_active_profile, get_active_profile_name,
+        set_active_profile, get_active_profile_name,
         check_alias_collision, create_wrapper_script, remove_wrapper_script,
         _is_wrapper_dir_in_path, _get_wrapper_dir,
     )
@@ -3984,7 +3981,6 @@ def cmd_profile(args):
             print(f"  {name} chat               Start chatting")
             print(f"  {name} gateway start      Start the messaging gateway")
             if clone or clone_all:
-                from hermes_constants import get_hermes_home
                 profile_dir_display = f"~/.hermes/profiles/{name}"
                 print(f"\n  Edit {profile_dir_display}/.env for different API keys")
                 print(f"  Edit {profile_dir_display}/SOUL.md for different personality")
@@ -4407,7 +4403,7 @@ For more help on a command:
     gateway_uninstall.add_argument("--system", action="store_true", help="Target the Linux system-level gateway service")
 
     # gateway setup
-    gateway_setup = gateway_subparsers.add_parser("setup", help="Configure messaging platforms")
+    gateway_subparsers.add_parser("setup", help="Configure messaging platforms")
 
     gateway_parser.set_defaults(func=cmd_gateway)
     
@@ -4682,10 +4678,10 @@ For more help on a command:
     config_subparsers = config_parser.add_subparsers(dest="config_command")
     
     # config show (default)
-    config_show = config_subparsers.add_parser("show", help="Show current configuration")
+    config_subparsers.add_parser("show", help="Show current configuration")
     
     # config edit
-    config_edit = config_subparsers.add_parser("edit", help="Open config file in editor")
+    config_subparsers.add_parser("edit", help="Open config file in editor")
     
     # config set
     config_set = config_subparsers.add_parser("set", help="Set a configuration value")
@@ -4693,16 +4689,16 @@ For more help on a command:
     config_set.add_argument("value", nargs="?", help="Value to set")
     
     # config path
-    config_path = config_subparsers.add_parser("path", help="Print config file path")
+    config_subparsers.add_parser("path", help="Print config file path")
     
     # config env-path
-    config_env = config_subparsers.add_parser("env-path", help="Print .env file path")
+    config_subparsers.add_parser("env-path", help="Print .env file path")
     
     # config check
-    config_check = config_subparsers.add_parser("check", help="Check for missing/outdated config")
+    config_subparsers.add_parser("check", help="Check for missing/outdated config")
     
     # config migrate
-    config_migrate = config_subparsers.add_parser("migrate", help="Update config with new options")
+    config_subparsers.add_parser("migrate", help="Update config with new options")
     
     config_parser.set_defaults(func=cmd_config)
     
@@ -4716,7 +4712,7 @@ For more help on a command:
     )
     pairing_sub = pairing_parser.add_subparsers(dest="pairing_action")
 
-    pairing_list_parser = pairing_sub.add_parser("list", help="Show pending + approved users")
+    pairing_sub.add_parser("list", help="Show pending + approved users")
 
     pairing_approve_parser = pairing_sub.add_parser("approve", help="Approve a pairing code")
     pairing_approve_parser.add_argument("platform", help="Platform name (telegram, discord, slack, whatsapp)")
@@ -4726,7 +4722,7 @@ For more help on a command:
     pairing_revoke_parser.add_argument("platform", help="Platform name")
     pairing_revoke_parser.add_argument("user_id", help="User ID to revoke")
 
-    pairing_clear_parser = pairing_sub.add_parser("clear-pending", help="Clear all pending codes")
+    pairing_sub.add_parser("clear-pending", help="Clear all pending codes")
 
     def cmd_pairing(args):
         from hermes_cli.pairing import pairing_command
@@ -4902,7 +4898,7 @@ For more help on a command:
     memory_sub = memory_parser.add_subparsers(dest="memory_command")
     memory_sub.add_parser("setup", help="Interactive provider selection and configuration")
     memory_sub.add_parser("status", help="Show current memory provider config")
-    memory_off_p = memory_sub.add_parser("off", help="Disable external provider (built-in only)")
+    memory_sub.add_parser("off", help="Disable external provider (built-in only)")
 
     def cmd_memory(args):
         sub = getattr(args, "memory_command", None)
@@ -5066,7 +5062,7 @@ For more help on a command:
     sessions_prune.add_argument("--source", help="Only prune sessions from this source")
     sessions_prune.add_argument("--yes", "-y", action="store_true", help="Skip confirmation")
 
-    sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics")
+    sessions_subparsers.add_parser("stats", help="Show session store statistics")
 
     sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title")
     sessions_rename.add_argument("session_id", help="Session ID to rename")
@@ -5426,7 +5422,7 @@ For more help on a command:
     )
     profile_subparsers = profile_parser.add_subparsers(dest="profile_action")
 
-    profile_list = profile_subparsers.add_parser("list", help="List all profiles")
+    profile_subparsers.add_parser("list", help="List all profiles")
     profile_use = profile_subparsers.add_parser("use", help="Set sticky default profile")
     profile_use.add_argument("profile_name", help="Profile name (or 'default')")
 
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index b2f763c6..988eeebd 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -21,22 +21,16 @@ OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
 from __future__ import annotations
 
 import logging
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import List, NamedTuple, Optional
 
 from hermes_cli.providers import (
-    ALIASES,
-    LABELS,
-    TRANSPORT_TO_API_MODE,
     determine_api_mode,
     get_label,
-    get_provider,
     is_aggregator,
-    normalize_provider,
     resolve_provider_full,
 )
 from hermes_cli.model_normalize import (
-    detect_vendor,
     normalize_model_for_provider,
 )
 from agent.models_dev import (
diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index bd6d7fab..5bfc488e 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -294,7 +294,7 @@ def cmd_install(identifier: str, force: bool = False) -> None:
         sys.exit(1)
 
     # Warn about insecure / local URL schemes
-    if git_url.startswith("http://") or git_url.startswith("file://"):
+    if git_url.startswith(("http://", "file://")):
         console.print(
             "[yellow]Warning:[/yellow] Using insecure/local URL scheme. "
             "Consider using https:// or git@ for production installs."
diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py
index bb3f6b99..48ecbc4c 100644
--- a/hermes_cli/profiles.py
+++ b/hermes_cli/profiles.py
@@ -26,7 +26,7 @@ import shutil
 import stat
 import subprocess
 import sys
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import List, Optional
 
@@ -517,7 +517,6 @@ def delete_profile(name: str, yes: bool = False) -> Path:
     ]
 
     # Check for service
-    from hermes_cli.gateway import _profile_suffix, get_service_name
     wrapper_path = _get_wrapper_dir() / name
     has_wrapper = wrapper_path.exists()
     if has_wrapper:
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index 89092788..0f238706 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -20,8 +20,7 @@ Other modules import from this file.  No parallel registries.
 from __future__ import annotations
 
 import logging
-import os
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple
 
 logger = logging.getLogger(__name__)
@@ -357,14 +356,6 @@ def _build_labels() -> Dict[str, str]:
 # Lazy-built on first access
 _labels_cache: Optional[Dict[str, str]] = None
 
-@property
-def LABELS() -> Dict[str, str]:
-    """Backward-compatible labels dict."""
-    global _labels_cache
-    if _labels_cache is None:
-        _labels_cache = _build_labels()
-    return _labels_cache
-
 # For direct import compat, expose as module-level dict
 # Built on demand by get_label() calls
 LABELS: Dict[str, str] = {
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 29cb6472..d7786d1d 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -21,7 +21,6 @@ from typing import Optional, Dict, Any
 
 from hermes_cli.nous_subscription import (
     apply_nous_provider_defaults,
-    get_nous_subscription_explainer_lines,
     get_nous_subscription_features,
 )
 from tools.tool_backend_helpers import managed_nous_tools_enabled
@@ -1348,8 +1347,6 @@ def setup_terminal_backend(config: dict):
     terminal_choices.append(f"Keep current ({current_backend})")
     idx_to_backend[keep_current_idx] = current_backend
 
-    default_terminal = backend_to_idx.get(current_backend, 0)
-
     terminal_idx = prompt_choice(
         "Select terminal backend:", terminal_choices, keep_current_idx
     )
diff --git a/hermes_cli/skin_engine.py b/hermes_cli/skin_engine.py
index 62fac0ea..16ec39cc 100644
--- a/hermes_cli/skin_engine.py
+++ b/hermes_cli/skin_engine.py
@@ -96,7 +96,6 @@ Activate with ``/skin <name>`` in the CLI or ``display.skin: <name>`` in config.
 """
 
 import logging
-import os
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index 4a068b04..7e35b867 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -6,7 +6,6 @@ Provides options for:
 - Keep data: Remove code but keep ~/.hermes/ (configs, sessions, logs)
 """
 
-import os
 import shutil
 import subprocess
 from pathlib import Path
diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py
index 264e7f84..15f5ec43 100644
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@@ -16,7 +16,7 @@ import re
 import secrets
 import time
 from pathlib import Path
-from typing import Dict, Optional
+from typing import Dict
 
 from hermes_constants import display_hermes_home
 
diff --git a/hermes_logging.py b/hermes_logging.py
index 9a720bf6..6d8f4fa7 100644
--- a/hermes_logging.py
+++ b/hermes_logging.py
@@ -13,7 +13,6 @@ secrets are never written to disk.
 """
 
 import logging
-import os
 from logging.handlers import RotatingFileHandler
 from pathlib import Path
 from typing import Optional
diff --git a/hermes_state.py b/hermes_state.py
index 6f6be056..da632a9e 100644
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -16,7 +16,6 @@ Key design decisions:
 
 import json
 import logging
-import os
 import random
 import re
 import sqlite3
diff --git a/hermes_time.py b/hermes_time.py
index 4ec8dfe0..faf02bf8 100644
--- a/hermes_time.py
+++ b/hermes_time.py
@@ -16,7 +16,6 @@ crashes due to a bad timezone string.
 import logging
 import os
 from datetime import datetime
-from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Optional
 
@@ -92,7 +91,6 @@ def get_timezone() -> Optional[ZoneInfo]:
 
 def get_timezone_name() -> str:
     """Return the IANA name of the configured timezone, or empty string."""
-    global _cached_tz_name, _cache_resolved
     if not _cache_resolved:
         get_timezone()  # populates cache
     return _cached_tz_name or ""
diff --git a/mcp_serve.py b/mcp_serve.py
index 93c43979..e8294d1f 100644
--- a/mcp_serve.py
+++ b/mcp_serve.py
@@ -37,9 +37,8 @@ import sys
 import threading
 import time
 from dataclasses import dataclass, field
-from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Dict, List, Optional
 
 logger = logging.getLogger("hermes.mcp_serve")
 
diff --git a/plugins/memory/byterover/__init__.py b/plugins/memory/byterover/__init__.py
index ead87d0c..d73440c7 100644
--- a/plugins/memory/byterover/__init__.py
+++ b/plugins/memory/byterover/__init__.py
@@ -23,7 +23,6 @@ import os
 import shutil
 import subprocess
 import threading
-import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
diff --git a/plugins/memory/holographic/__init__.py b/plugins/memory/holographic/__init__.py
index 3ffdda1d..b1423c10 100644
--- a/plugins/memory/holographic/__init__.py
+++ b/plugins/memory/holographic/__init__.py
@@ -20,7 +20,6 @@ from __future__ import annotations
 import json
 import logging
 import re
-from pathlib import Path
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
diff --git a/plugins/memory/holographic/store.py b/plugins/memory/holographic/store.py
index ea15554a..3dc66d68 100644
--- a/plugins/memory/holographic/store.py
+++ b/plugins/memory/holographic/store.py
@@ -6,7 +6,6 @@ Single-user Hermes memory store plugin.
 import re
 import sqlite3
 import threading
-from datetime import datetime
 from pathlib import Path
 
 try:
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index 336cf353..db277366 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -18,7 +18,6 @@ from __future__ import annotations
 import json
 import logging
 import threading
-from pathlib import Path
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
diff --git a/plugins/memory/honcho/cli.py b/plugins/memory/honcho/cli.py
index 1735c006..dff4b386 100644
--- a/plugins/memory/honcho/cli.py
+++ b/plugins/memory/honcho/cli.py
@@ -11,7 +11,7 @@ import sys
 from pathlib import Path
 
 from hermes_constants import get_hermes_home
-from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, GLOBAL_CONFIG_PATH, HOST
+from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, HOST
 
 
 def clone_honcho_for_profile(profile_name: str) -> bool:
@@ -1220,7 +1220,6 @@ def register_cli(subparser) -> None:
     Called by the plugin CLI registration system during argparse setup.
     The *subparser* is the parser for ``hermes honcho``.
     """
-    import argparse
 
     subparser.add_argument(
         "--target-profile", metavar="NAME", dest="target_profile",
diff --git a/plugins/memory/mem0/__init__.py b/plugins/memory/mem0/__init__.py
index df0f56bc..7e7d261f 100644
--- a/plugins/memory/mem0/__init__.py
+++ b/plugins/memory/mem0/__init__.py
@@ -20,7 +20,6 @@ import logging
 import os
 import threading
 import time
-from pathlib import Path
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
diff --git a/plugins/memory/retaindb/__init__.py b/plugins/memory/retaindb/__init__.py
index 2a3b7a22..69b8a8cf 100644
--- a/plugins/memory/retaindb/__init__.py
+++ b/plugins/memory/retaindb/__init__.py
@@ -20,7 +20,6 @@ Config (env vars or hermes config.yaml under retaindb:):
 
 from __future__ import annotations
 
-import hashlib
 import json
 import logging
 import os
@@ -189,7 +188,7 @@ class _Client:
             "Content-Type": "application/json",
             "x-sdk-runtime": "hermes-plugin",
         }
-        if path.startswith("/v1/memory") or path.startswith("/v1/context"):
+        if path.startswith(("/v1/memory", "/v1/context")):
             h["X-API-Key"] = token
         return h
 
diff --git a/run_agent.py b/run_agent.py
index cc0e06bd..4c6cf500 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -20,7 +20,6 @@ Usage:
     response = agent.run_conversation("Tell me about the latest Python updates")
 """
 
-import atexit
 import asyncio
 import base64
 import concurrent.futures
@@ -36,7 +35,6 @@ import sys
 import tempfile
 import time
 import threading
-import weakref
 from types import SimpleNamespace
 import uuid
 from typing import List, Dict, Any, Optional
@@ -654,7 +652,7 @@ class AIAgent:
         self.stream_delta_callback = stream_delta_callback
         self.status_callback = status_callback
         self.tool_gen_callback = tool_gen_callback
-        self._last_reported_tool = None  # Track for "new tool" mode
+
         
         # Tool execution state — allows _vprint during tool execution
         # even when stream consumers are registered (no tokens streaming then)
@@ -2702,20 +2700,7 @@ class AIAgent:
 
         if not _soul_loaded:
             # Fallback to hardcoded identity
-            _ai_peer_name = (
-                None
-                if False
-                else None
-            )
-            if _ai_peer_name:
-                _identity = DEFAULT_AGENT_IDENTITY.replace(
-                    "You are Hermes Agent",
-                    f"You are {_ai_peer_name}",
-                    1,
-                )
-            else:
-                _identity = DEFAULT_AGENT_IDENTITY
-            prompt_parts = [_identity]
+            prompt_parts = [DEFAULT_AGENT_IDENTITY]
 
         # Tool-aware behavioral guidance: only inject when the tools are loaded
         tool_guidance = []
@@ -3400,7 +3385,7 @@ class AIAgent:
         elif "stream" in api_kwargs:
             raise ValueError("Codex Responses stream flag is only allowed in fallback streaming requests.")
 
-        unexpected = sorted(key for key in api_kwargs.keys() if key not in allowed_keys)
+        unexpected = sorted(key for key in api_kwargs if key not in allowed_keys)
         if unexpected:
             raise ValueError(
                 f"Codex Responses request has unsupported field(s): {', '.join(unexpected)}."
@@ -5908,7 +5893,7 @@ class AIAgent:
                         args = json.loads(tc.function.arguments)
                         flush_target = args.get("target", "memory")
                         from tools.memory_tool import memory_tool as _memory_tool
-                        result = _memory_tool(
+                        _memory_tool(
                             action=args.get("action"),
                             target=flush_target,
                             content=args.get("content"),
@@ -7468,7 +7453,7 @@ class AIAgent:
                         elif not isinstance(output_items, list):
                             response_invalid = True
                             error_details.append("response.output is not a list")
-                        elif len(output_items) == 0:
+                        elif not output_items:
                             # If we reach here, _run_codex_stream's backfill
                             # from output_item.done events and text-delta
                             # synthesis both failed to populate output.
@@ -7491,11 +7476,11 @@ class AIAgent:
                         elif not isinstance(content_blocks, list):
                             response_invalid = True
                             error_details.append("response.content is not a list")
-                        elif len(content_blocks) == 0:
+                        elif not content_blocks:
                             response_invalid = True
                             error_details.append("response.content is empty")
                     else:
-                        if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
+                        if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
                             response_invalid = True
                             if response is None:
                                 error_details.append("response is None")
@@ -9033,7 +9018,6 @@ class AIAgent:
                                     "content": f"Error executing tool: {error_msg}",
                                 }
                                 messages.append(err_msg)
-                        pending_handled = True
                     break
                 
                 # Non-tool errors don't need a synthetic message injected.
diff --git a/scripts/release.py b/scripts/release.py
index cfe36006..ea697cb3 100755
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -21,8 +21,6 @@ Usage:
 """
 
 import argparse
-import json
-import os
 import re
 import shutil
 import subprocess
diff --git a/scripts/sample_and_compress.py b/scripts/sample_and_compress.py
index 419111d8..a6358f45 100644
--- a/scripts/sample_and_compress.py
+++ b/scripts/sample_and_compress.py
@@ -17,7 +17,6 @@ Usage:
 
 import json
 import random
-import os
 from pathlib import Path
 from typing import List, Dict, Any, Tuple
 import fire
@@ -138,7 +137,6 @@ def sample_from_datasets(
         List of sampled trajectory entries
     """
     from multiprocessing import Pool
-    from functools import partial
     
     random.seed(seed)
     
diff --git a/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py b/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
index 228a93e7..8ad45dfc 100644
--- a/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
+++ b/skills/mlops/training/grpo-rl-training/templates/basic_grpo_training.py
@@ -12,7 +12,7 @@ Adapt this for your specific task by modifying:
 
 import torch
 import re
-from datasets import load_dataset, Dataset
+from datasets import load_dataset
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import LoraConfig
 from trl import GRPOTrainer, GRPOConfig
diff --git a/skills/red-teaming/godmode/scripts/auto_jailbreak.py b/skills/red-teaming/godmode/scripts/auto_jailbreak.py
index 754b405a..0b17de50 100644
--- a/skills/red-teaming/godmode/scripts/auto_jailbreak.py
+++ b/skills/red-teaming/godmode/scripts/auto_jailbreak.py
@@ -16,13 +16,10 @@ Usage in execute_code:
 """
 
 import os
-import sys
 import json
 import time
-import re
 import yaml
 from pathlib import Path
-from concurrent.futures import ThreadPoolExecutor, as_completed
 
 try:
     from openai import OpenAI
diff --git a/skills/red-teaming/godmode/scripts/godmode_race.py b/skills/red-teaming/godmode/scripts/godmode_race.py
index 60b916cb..ccd02139 100644
--- a/skills/red-teaming/godmode/scripts/godmode_race.py
+++ b/skills/red-teaming/godmode/scripts/godmode_race.py
@@ -20,7 +20,6 @@ Usage in execute_code:
 
 import os
 import re
-import json
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
@@ -404,7 +403,6 @@ def race_godmode_classic(query, api_key=None, timeout=60):
     Each combo uses a different model paired with its best-performing jailbreak prompt.
     Returns the best result across all combos.
     """
-    from collections import namedtuple
     
     HALL_OF_FAME = [
         {
diff --git a/skills/red-teaming/godmode/scripts/parseltongue.py b/skills/red-teaming/godmode/scripts/parseltongue.py
index bf784d2b..ba891c6a 100644
--- a/skills/red-teaming/godmode/scripts/parseltongue.py
+++ b/skills/red-teaming/godmode/scripts/parseltongue.py
@@ -17,7 +17,6 @@ Usage:
 
 import re
 import base64
-import sys
 
 # ═══════════════════════════════════════════════════════════════════
 # Trigger words that commonly trip safety classifiers
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 91f8fa4f..13e85c18 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -27,9 +27,7 @@ import json
 import logging
 import os
 import threading
-import time
 import uuid
-from pathlib import Path
 from typing import Any, Dict, Optional
 
 import requests
@@ -445,7 +443,7 @@ def camofox_get_images(task_id: Optional[str] = None) -> str:
         lines = snapshot.split("\n")
         for i, line in enumerate(lines):
             stripped = line.strip()
-            if stripped.startswith("- img ") or stripped.startswith("img "):
+            if stripped.startswith(("- img ", "img ")):
                 alt_match = re.search(r'img\s+"([^"]*)"', stripped)
                 alt = alt_match.group(1) if alt_match else ""
                 # Look for URL on the next line
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index faa872a9..8ad3002b 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -191,7 +191,7 @@ def _resolve_cdp_override(cdp_url: str) -> str:
         return raw
 
     discovery_url = raw
-    if lowered.startswith("ws://") or lowered.startswith("wss://"):
+    if lowered.startswith(("ws://", "wss://")):
         if raw.count(":") == 2 and raw.rstrip("/").rsplit(":", 1)[-1].isdigit() and "/" not in raw.split(":", 2)[-1]:
             discovery_url = ("http://" if lowered.startswith("ws://") else "https://") + raw.split("://", 1)[1]
         else:
@@ -458,8 +458,6 @@ def _browser_cleanup_thread_worker():
     Runs every 30 seconds and checks for sessions that haven't been used
     within the BROWSER_SESSION_INACTIVITY_TIMEOUT period.
     """
-    global _cleanup_running
-    
     while _cleanup_running:
         try:
             _cleanup_inactive_browser_sessions()
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 5c4658b6..a7a80606 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -693,7 +693,6 @@ def _execute_remote(
     the remote environment, and tool calls are proxied through a polling
     thread that communicates via request/response files.
     """
-    from tools.terminal_tool import _interrupt_event
 
     _cfg = _load_config()
     timeout = _cfg.get("timeout", DEFAULT_TIMEOUT)
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 8dbcf7c3..caedaca7 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -150,7 +150,6 @@ def _validate_cron_script_path(script: Optional[str]) -> Optional[str]:
     if not script or not script.strip():
         return None  # empty/None = clearing the field, always OK
 
-    from pathlib import Path
     from hermes_constants import get_hermes_home
 
     raw = script.strip()
diff --git a/tools/debug_helpers.py b/tools/debug_helpers.py
index 0bd5f2ac..6f8acf22 100644
--- a/tools/debug_helpers.py
+++ b/tools/debug_helpers.py
@@ -26,7 +26,6 @@ import json
 import logging
 import os
 import uuid
-from pathlib import Path
 from typing import Any, Dict
 
 from hermes_constants import get_hermes_home
diff --git a/tools/environments/daytona.py b/tools/environments/daytona.py
index eb2a6731..e52459d8 100644
--- a/tools/environments/daytona.py
+++ b/tools/environments/daytona.py
@@ -12,7 +12,8 @@ import shlex
 import threading
 import uuid
 import warnings
-from typing import Optional
+from pathlib import Path
+from typing import Dict, Optional
 
 from tools.environments.base import BaseEnvironment
 from tools.interrupt import is_interrupted
diff --git a/tools/environments/singularity.py b/tools/environments/singularity.py
index 6643ea1b..0ea5037c 100644
--- a/tools/environments/singularity.py
+++ b/tools/environments/singularity.py
@@ -11,11 +11,10 @@ import os
 import shlex
 import shutil
 import subprocess
-import tempfile
 import threading
 import uuid
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Dict, Optional
 
 from hermes_constants import get_hermes_home
 from tools.environments.base import BaseEnvironment
diff --git a/tools/mcp_oauth.py b/tools/mcp_oauth.py
index 00172f34..c4d77267 100644
--- a/tools/mcp_oauth.py
+++ b/tools/mcp_oauth.py
@@ -43,7 +43,7 @@ import threading
 import webbrowser
 from http.server import BaseHTTPRequestHandler, HTTPServer
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 from urllib.parse import parse_qs, urlparse
 
 logger = logging.getLogger(__name__)
@@ -54,7 +54,7 @@ logger = logging.getLogger(__name__)
 
 _OAUTH_AVAILABLE = False
 try:
-    from mcp.client.auth import OAuthClientProvider, TokenStorage
+    from mcp.client.auth import OAuthClientProvider
     from mcp.shared.auth import (
         OAuthClientInformationFull,
         OAuthClientMetadata,
@@ -320,7 +320,6 @@ async def _wait_for_callback() -> tuple[str, str | None]:
         OAuthNonInteractiveError: If the callback times out (no user present
             to complete the browser auth).
     """
-    global _oauth_port
     assert _oauth_port is not None, "OAuth callback port not set"
 
     # The callback server is already running (started in build_oauth_auth).
diff --git a/tools/memory_tool.py b/tools/memory_tool.py
index 91924f66..f7b6bed2 100644
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -260,7 +260,7 @@ class MemoryStore:
             entries = self._entries_for(target)
             matches = [(i, e) for i, e in enumerate(entries) if old_text in e]
 
-            if len(matches) == 0:
+            if not matches:
                 return {"success": False, "error": f"No entry matched '{old_text}'."}
 
             if len(matches) > 1:
@@ -310,7 +310,7 @@ class MemoryStore:
             entries = self._entries_for(target)
             matches = [(i, e) for i, e in enumerate(entries) if old_text in e]
 
-            if len(matches) == 0:
+            if not matches:
                 return {"success": False, "error": f"No entry matched '{old_text}'."}
 
             if len(matches) > 1:
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 29919f22..7a6478b4 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -567,7 +567,7 @@ async def rl_select_environment(name: str) -> str:
     
     TIP: Read the returned file_path to understand how the environment works.
     """
-    global _current_env, _current_config, _env_config_cache
+    global _current_env, _current_config
     
     _initialize_environments()
     
@@ -673,8 +673,6 @@ async def rl_edit_config(field: str, value: Any) -> str:
     Returns:
         JSON string with updated config or error message
     """
-    global _current_config
-    
     if not _current_env:
         return json.dumps({
             "error": "No environment selected. Use rl_select_environment(name) first.",
@@ -727,8 +725,6 @@ async def rl_start_training() -> str:
     Returns:
         JSON string with run_id and initial status
     """
-    global _active_runs
-    
     if not _current_env:
         return json.dumps({
             "error": "No environment selected. Use rl_select_environment(name) first.",
@@ -829,8 +825,6 @@ async def rl_check_status(run_id: str) -> str:
     Returns:
         JSON string with run status and metrics
     """
-    global _last_status_check
-    
     # Check rate limiting
     now = time.time()
     if run_id in _last_status_check:
@@ -1311,7 +1305,7 @@ async def rl_test_inference(
         "avg_accuracy": round(
             sum(m.get("accuracy", 0) for m in working_models) / len(working_models), 3
         ) if working_models else 0,
-        "environment_working": len(working_models) > 0,
+        "environment_working": bool(working_models),
         "output_directory": str(test_output_dir),
     }
     
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index eff0e7b5..4e500e69 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -432,7 +432,7 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
         else:
             # Reuse the gateway adapter's format_message for markdown→MarkdownV2
             try:
-                from gateway.platforms.telegram import TelegramAdapter, _strip_mdv2
+                from gateway.platforms.telegram import TelegramAdapter
                 _adapter = TelegramAdapter.__new__(TelegramAdapter)
                 formatted = _adapter.format_message(message)
             except Exception:
diff --git a/tools/skills_hub.py b/tools/skills_hub.py
index 56c89ba7..d2d8127a 100644
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -430,7 +430,7 @@ class GitHubSource(SkillSource):
                 continue
 
             dir_name = entry["name"]
-            if dir_name.startswith(".") or dir_name.startswith("_"):
+            if dir_name.startswith((".", "_")):
                 continue
 
             prefix = path.rstrip("/")
@@ -1163,7 +1163,7 @@ class SkillsShSource(SkillSource):
                         if entry.get("type") != "dir":
                             continue
                         dir_name = entry["name"]
-                        if dir_name.startswith(".") or dir_name.startswith("_"):
+                        if dir_name.startswith((".", "_")):
                             continue
                         if dir_name in ("skills", ".agents", ".claude"):
                             continue  # already tried
@@ -1382,7 +1382,7 @@ class ClawHubSource(SkillSource):
         if isinstance(tags, list):
             return [str(t) for t in tags]
         if isinstance(tags, dict):
-            return [str(k) for k in tags.keys() if str(k) != "latest"]
+            return [str(k) for k in tags if str(k) != "latest"]
         return []
 
     @staticmethod
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index da023a14..c6b6cac3 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -72,12 +72,10 @@ import logging
 from hermes_constants import get_hermes_home
 import os
 import re
-import sys
 from enum import Enum
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Set, Tuple
 
-import yaml
 from tools.registry import registry
 
 logger = logging.getLogger(__name__)
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 305d0801..76946f80 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -720,8 +720,6 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
 
 def _cleanup_inactive_envs(lifetime_seconds: int = 300):
     """Clean up environments that have been inactive for longer than lifetime_seconds."""
-    global _active_environments, _last_activity
-
     current_time = time.time()
 
     # Check the process registry -- skip cleanup for sandboxes with active
@@ -784,8 +782,6 @@ def _cleanup_inactive_envs(lifetime_seconds: int = 300):
 
 def _cleanup_thread_worker():
     """Background thread worker that periodically cleans up inactive environments."""
-    global _cleanup_running
-
     while _cleanup_running:
         try:
             config = _get_env_config()
@@ -831,7 +827,7 @@ def get_active_environments_info() -> Dict[str, Any]:
     
     # Calculate total disk usage (per-task to avoid double-counting)
     total_size = 0
-    for task_id in _active_environments.keys():
+    for task_id in _active_environments:
         scratch_dir = _get_scratch_dir()
         pattern = f"hermes-*{task_id[:8]}*"
         import glob
@@ -848,8 +844,6 @@ def get_active_environments_info() -> Dict[str, Any]:
 
 def cleanup_all_environments():
     """Clean up ALL active environments. Use with caution."""
-    global _active_environments, _last_activity
-    
     task_ids = list(_active_environments.keys())
     cleaned = 0
     
@@ -877,8 +871,6 @@ def cleanup_all_environments():
 
 def cleanup_vm(task_id: str):
     """Manually clean up a specific environment by task_id."""
-    global _active_environments, _last_activity
-
     # Remove from tracking dicts while holding the lock, but defer the
     # actual (potentially slow) env.cleanup() call to outside the lock
     # so other tool calls aren't blocked.
@@ -1043,8 +1035,6 @@ def terminal_tool(
         # Force run after user confirmation
         # Note: force parameter is internal only, not exposed to model API
     """
-    global _active_environments, _last_activity
-
     try:
         # Get configuration
         config = _get_env_config()
diff --git a/tools/todo_tool.py b/tools/todo_tool.py
index b94e5474..d5dc33b5 100644
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@@ -85,7 +85,7 @@ class TodoStore:
 
     def has_items(self) -> bool:
         """Check if there are any items in the list."""
-        return len(self._items) > 0
+        return bool(self._items)
 
     def format_for_injection(self) -> Optional[str]:
         """
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index a8c2ac05..4cb6e64e 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -550,7 +550,6 @@ def text_to_speech_tool(
             if edge_available:
                 logger.info("Generating speech with Edge TTS...")
                 try:
-                    loop = asyncio.get_running_loop()
                     import concurrent.futures
                     with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
                         pool.submit(
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 404d06a5..8c9d6a9b 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -82,7 +82,7 @@ def _validate_image_url(url: str) -> bool:
         return False
 
     # Basic HTTP/HTTPS URL check
-    if not (url.startswith("http://") or url.startswith("https://")):
+    if not url.startswith(("http://", "https://")):
         return False
 
     # Parse to ensure we at least have a network location; still allow URLs
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index 53d9ecb0..1b09a178 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -108,7 +108,7 @@ def detect_audio_environment() -> dict:
         )
 
     return {
-        "available": len(warnings) == 0,
+        "available": not warnings,
         "warnings": warnings,
         "notices": notices,
     }
diff --git a/tools/website_policy.py b/tools/website_policy.py
index 93a2eb28..63fb7571 100644
--- a/tools/website_policy.py
+++ b/tools/website_policy.py
@@ -12,7 +12,6 @@ from __future__ import annotations
 
 import fnmatch
 import logging
-import os
 import threading
 import time
 from pathlib import Path
diff --git a/toolsets.py b/toolsets.py
index 04e43b28..2a359b60 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -592,7 +592,7 @@ def get_toolset_info(name: str) -> Dict[str, Any]:
         "includes": toolset["includes"],
         "resolved_tools": resolved_tools,
         "tool_count": len(resolved_tools),
-        "is_composite": len(toolset["includes"]) > 0
+        "is_composite": bool(toolset["includes"])
     }
 
 
diff --git a/trajectory_compressor.py b/trajectory_compressor.py
index 2dfdda7a..e4faf97a 100644
--- a/trajectory_compressor.py
+++ b/trajectory_compressor.py
@@ -32,7 +32,6 @@ Usage:
 
 import json
 import os
-import re
 import time
 import yaml
 import logging
@@ -350,7 +349,6 @@ class TrajectoryCompressor:
         which handles auth, headers, and provider detection internally.
         For custom endpoints, falls back to raw client construction.
         """
-        from agent.auxiliary_client import call_llm, async_call_llm
 
         provider = self._detect_provider()
         if provider:

From 187e90e4254c461e72d211564a44678f9626ffac Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 10:40:34 -0700
Subject: [PATCH 076/154] refactor: replace inline HERMES_HOME
 re-implementations with get_hermes_home()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

16 callsites across 14 files were re-deriving the hermes home path
via os.environ.get('HERMES_HOME', ...) instead of using the canonical
get_hermes_home() from hermes_constants. This breaks profiles — each
profile has its own HERMES_HOME, and the inline fallback defaults to
~/.hermes regardless.

Fixed by importing and calling get_hermes_home() at each site. For
files already inside the hermes process (agent/, hermes_cli/, tools/,
gateway/, plugins/), this is always safe. Files that run outside the
process context (mcp_serve.py, mcp_oauth.py) already had correct
try/except ImportError fallbacks and were left alone.

Skipped: hermes_constants.py (IS the implementation), env_loader.py
(bootstrap), profiles.py (intentionally manipulates the env var),
standalone scripts (optional-skills/, skills/), and tests.
---
 agent/context_references.py          |  5 ++---
 agent/model_metadata.py              |  4 ++--
 agent/models_dev.py                  |  5 ++---
 gateway/builtin_hooks/boot_md.py     |  3 ++-
 gateway/platforms/webhook.py         |  6 ++----
 hermes_cli/commands.py               |  6 ++----
 hermes_cli/memory_setup.py           | 10 ++++++----
 hermes_cli/plugins.py                |  4 ++--
 hermes_cli/plugins_cmd.py            |  5 +++--
 hermes_cli/webhook.py                |  5 ++---
 plugins/memory/hindsight/__init__.py |  3 +--
 plugins/memory/retaindb/__init__.py  |  3 ++-
 tools/credential_files.py            |  3 ++-
 tools/env_passthrough.py             |  4 ++--
 14 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/agent/context_references.py b/agent/context_references.py
index 8222dc33..1b8ac948 100644
--- a/agent/context_references.py
+++ b/agent/context_references.py
@@ -343,10 +343,9 @@ def _resolve_path(cwd: Path, target: str, *, allowed_root: Path | None = None) -
 
 
 def _ensure_reference_path_allowed(path: Path) -> None:
+    from hermes_constants import get_hermes_home
     home = Path(os.path.expanduser("~")).resolve()
-    hermes_home = Path(
-        os.getenv("HERMES_HOME", str(home / ".hermes"))
-    ).expanduser().resolve()
+    hermes_home = get_hermes_home().resolve()
 
     blocked_exact = {home / rel for rel in _SENSITIVE_HOME_FILES}
     blocked_exact.add(hermes_home / ".env")
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 62dfb2b8..50245a7c 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -510,8 +510,8 @@ def fetch_endpoint_model_metadata(
 
 def _get_context_cache_path() -> Path:
     """Return path to the persistent context length cache file."""
-    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-    return hermes_home / "context_length_cache.yaml"
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "context_length_cache.yaml"
 
 
 def _load_context_cache() -> Dict[str, int]:
diff --git a/agent/models_dev.py b/agent/models_dev.py
index a23ce74b..d3de5061 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -185,9 +185,8 @@ def _get_reverse_mapping() -> Dict[str, str]:
 
 def _get_cache_path() -> Path:
     """Return path to disk cache file."""
-    env_val = os.environ.get("HERMES_HOME", "")
-    hermes_home = Path(env_val) if env_val else Path.home() / ".hermes"
-    return hermes_home / "models_dev_cache.json"
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "models_dev_cache.json"
 
 
 def _load_disk_cache() -> Dict[str, Any]:
diff --git a/gateway/builtin_hooks/boot_md.py b/gateway/builtin_hooks/boot_md.py
index fced0b5e..c4b6c2d4 100644
--- a/gateway/builtin_hooks/boot_md.py
+++ b/gateway/builtin_hooks/boot_md.py
@@ -24,7 +24,8 @@ from pathlib import Path
 
 logger = logging.getLogger("hooks.boot-md")
 
-HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+from hermes_constants import get_hermes_home
+HERMES_HOME = get_hermes_home()
 BOOT_FILE = HERMES_HOME / "BOOT.md"
 
 
diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index ae2e7f27..daaf4f5d 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -203,10 +203,8 @@ class WebhookAdapter(BasePlatformAdapter):
 
     def _reload_dynamic_routes(self) -> None:
         """Reload agent-created subscriptions from disk if the file changed."""
-        from pathlib import Path as _Path
-        hermes_home = _Path(
-            os.getenv("HERMES_HOME", str(_Path.home() / ".hermes"))
-        ).expanduser()
+        from hermes_constants import get_hermes_home
+        hermes_home = get_hermes_home()
         subs_path = hermes_home / _DYNAMIC_ROUTES_FILENAME
         if not subs_path.exists():
             if self._dynamic_routes:
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index 9bce834d..ecf4d0d6 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -294,10 +294,8 @@ def _resolve_config_gates() -> set[str]:
         return set()
     try:
         import yaml
-        config_path = os.path.join(
-            os.getenv("HERMES_HOME", os.path.expanduser("~/.hermes")),
-            "config.yaml",
-        )
+        from hermes_constants import get_hermes_home
+        config_path = str(get_hermes_home() / "config.yaml")
         if os.path.exists(config_path):
             with open(config_path, encoding="utf-8") as f:
                 cfg = yaml.safe_load(f) or {}
diff --git a/hermes_cli/memory_setup.py b/hermes_cli/memory_setup.py
index c174d2b4..2843f4f4 100644
--- a/hermes_cli/memory_setup.py
+++ b/hermes_cli/memory_setup.py
@@ -12,6 +12,8 @@ import os
 import sys
 from pathlib import Path
 
+from hermes_constants import get_hermes_home
+
 
 # ---------------------------------------------------------------------------
 # Curses-based interactive picker (same pattern as hermes tools)
@@ -275,7 +277,7 @@ def cmd_setup_provider(provider_name: str) -> None:
         config["memory"] = {}
 
     if hasattr(provider, "post_setup"):
-        hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+        hermes_home = str(get_hermes_home())
         provider.post_setup(hermes_home, config)
         return
 
@@ -326,7 +328,7 @@ def cmd_setup(args) -> None:
     # If the provider has a post_setup hook, delegate entirely to it.
     # The hook handles its own config, connection test, and activation.
     if hasattr(provider, "post_setup"):
-        hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+        hermes_home = str(get_hermes_home())
         provider.post_setup(hermes_home, config)
         return
 
@@ -336,7 +338,7 @@ def cmd_setup(args) -> None:
     if not isinstance(provider_config, dict):
         provider_config = {}
 
-    env_path = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / ".env"
+    env_path = get_hermes_home() / ".env"
     env_writes = {}
 
     if schema:
@@ -400,7 +402,7 @@ def cmd_setup(args) -> None:
     save_config(config)
 
     # Write non-secret config to provider's native location
-    hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+    hermes_home = str(get_hermes_home())
     if provider_config and hasattr(provider, "save_config"):
         try:
             provider.save_config(provider_config, hermes_home)
diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index ce57695f..23a655aa 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -38,6 +38,7 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Set, Union
 
+from hermes_constants import get_hermes_home
 from utils import env_var_enabled
 
 try:
@@ -258,8 +259,7 @@ class PluginManager:
         manifests: List[PluginManifest] = []
 
         # 1. User plugins (~/.hermes/plugins/)
-        hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))
-        user_dir = Path(hermes_home) / "plugins"
+        user_dir = get_hermes_home() / "plugins"
         manifests.extend(self._scan_directory(user_dir, source="user"))
 
         # 2. Project plugins (./.hermes/plugins/)
diff --git a/hermes_cli/plugins_cmd.py b/hermes_cli/plugins_cmd.py
index 5bfc488e..4727d4b7 100644
--- a/hermes_cli/plugins_cmd.py
+++ b/hermes_cli/plugins_cmd.py
@@ -16,6 +16,8 @@ import subprocess
 import sys
 from pathlib import Path
 
+from hermes_constants import get_hermes_home
+
 logger = logging.getLogger(__name__)
 
 # Minimum manifest version this installer understands.
@@ -26,8 +28,7 @@ _SUPPORTED_MANIFEST_VERSION = 1
 
 def _plugins_dir() -> Path:
     """Return the user plugins directory, creating it if needed."""
-    hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))
-    plugins = Path(hermes_home) / "plugins"
+    plugins = get_hermes_home() / "plugins"
     plugins.mkdir(parents=True, exist_ok=True)
     return plugins
 
diff --git a/hermes_cli/webhook.py b/hermes_cli/webhook.py
index 15f5ec43..8ff135e2 100644
--- a/hermes_cli/webhook.py
+++ b/hermes_cli/webhook.py
@@ -25,9 +25,8 @@ _SUBSCRIPTIONS_FILENAME = "webhook_subscriptions.json"
 
 
 def _hermes_home() -> Path:
-    return Path(
-        os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))
-    ).expanduser()
+    from hermes_constants import get_hermes_home
+    return get_hermes_home()
 
 
 def _subscriptions_path() -> Path:
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 140aa1ea..e10a14a8 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -290,8 +290,7 @@ class HindsightMemoryProvider(MemoryProvider):
         if self._mode == "local":
             def _start_daemon():
                 import traceback
-                from pathlib import Path
-                log_dir = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / "logs"
+                log_dir = get_hermes_home() / "logs"
                 log_dir.mkdir(parents=True, exist_ok=True)
                 log_path = log_dir / "hindsight-embed.log"
                 try:
diff --git a/plugins/memory/retaindb/__init__.py b/plugins/memory/retaindb/__init__.py
index 69b8a8cf..72ff9d77 100644
--- a/plugins/memory/retaindb/__init__.py
+++ b/plugins/memory/retaindb/__init__.py
@@ -504,7 +504,8 @@ class RetainDBMemoryProvider(MemoryProvider):
         self._user_id = kwargs.get("user_id", "default") or "default"
         self._agent_id = kwargs.get("agent_id", "hermes") or "hermes"
 
-        hermes_home_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        from hermes_constants import get_hermes_home
+        hermes_home_path = get_hermes_home()
         db_path = hermes_home_path / "retaindb_queue.db"
         self._queue = _WriteQueue(self._client, db_path)
 
diff --git a/tools/credential_files.py b/tools/credential_files.py
index 49768bff..eafd5ea2 100644
--- a/tools/credential_files.py
+++ b/tools/credential_files.py
@@ -48,7 +48,8 @@ _config_files: List[Dict[str, str]] | None = None
 
 
 def _resolve_hermes_home() -> Path:
-    return Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+    from hermes_constants import get_hermes_home
+    return get_hermes_home()
 
 
 def register_credential_file(
diff --git a/tools/env_passthrough.py b/tools/env_passthrough.py
index 1c70d518..0e883bab 100644
--- a/tools/env_passthrough.py
+++ b/tools/env_passthrough.py
@@ -66,8 +66,8 @@ def _load_config_passthrough() -> frozenset[str]:
 
     result: set[str] = set()
     try:
-        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-        config_path = hermes_home / "config.yaml"
+        from hermes_constants import get_hermes_home
+        config_path = get_hermes_home() / "config.yaml"
         if config_path.exists():
             import yaml
 

From 1a2a03ca69ff342438343fd484716dcb48bfa835 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 11:03:14 -0700
Subject: [PATCH 077/154] feat(gateway): approval buttons for Slack & Telegram
 + Slack thread context (#5890)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Slack:
- Add Block Kit interactive buttons for command approval (Allow Once,
  Allow Session, Always Allow, Deny) via send_exec_approval()
- Register @app.action handlers for each approval button
- Add _fetch_thread_context() — fetches thread history via
  conversations.replies when bot is first @mentioned mid-thread
- Fix _has_active_session_for_thread() to use build_session_key()
  instead of manual key construction (fixes session key mismatch bug
  where thread_sessions_per_user flag was ignored, ref PR #5833)

Telegram:
- Add InlineKeyboard approval buttons via send_exec_approval()
- Add ea:* callback handling in _handle_callback_query()
- Uses monotonic counter + _approval_state dict to map button clicks
  back to session keys (avoids 64-byte callback_data limit)

Both platforms now auto-detected by the gateway runner's
_approval_notify_sync() — any adapter with send_exec_approval() on
its class gets button-based approval instead of text fallback.

Inspired by community PRs #3898 (LevSky22), #2953 (ygd58), #5833
(heathley). Implemented fresh on current main.

Tests: 24 new tests covering button rendering, action handling,
thread context fetching, session key fix, double-click prevention.
---
 gateway/platforms/slack.py                    | 317 +++++++++++++--
 gateway/platforms/telegram.py                 | 116 ++++++
 tests/gateway/test_slack_approval_buttons.py  | 373 ++++++++++++++++++
 .../gateway/test_telegram_approval_buttons.py | 284 +++++++++++++
 4 files changed, 1057 insertions(+), 33 deletions(-)
 create mode 100644 tests/gateway/test_slack_approval_buttons.py
 create mode 100644 tests/gateway/test_telegram_approval_buttons.py

diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 627829ca..0cdf2a33 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -84,6 +84,9 @@ class SlackAdapter(BasePlatformAdapter):
         self._seen_messages: Dict[str, float] = {}
         self._SEEN_TTL = 300   # 5 minutes
         self._SEEN_MAX = 2000  # prune threshold
+        # Track pending approval message_ts → resolved flag to prevent
+        # double-clicks on approval buttons.
+        self._approval_resolved: Dict[str, bool] = {}
 
     async def connect(self) -> bool:
         """Connect to Slack via Socket Mode."""
@@ -176,6 +179,15 @@ class SlackAdapter(BasePlatformAdapter):
                 await ack()
                 await self._handle_slash_command(command)
 
+            # Register Block Kit action handlers for approval buttons
+            for _action_id in (
+                "hermes_approve_once",
+                "hermes_approve_session",
+                "hermes_approve_always",
+                "hermes_deny",
+            ):
+                self._app.action(_action_id)(self._handle_approval_action)
+
             # Start Socket Mode handler in background
             self._handler = AsyncSocketModeHandler(self._app, app_token)
             self._socket_mode_task = asyncio.create_task(self._handler.start_async())
@@ -791,6 +803,24 @@ class SlackAdapter(BasePlatformAdapter):
             # Strip the bot mention from the text
             text = text.replace(f"<@{bot_uid}>", "").strip()
 
+            # When first mentioned in an existing thread, fetch thread context
+            # so the agent understands the conversation it's joining.
+            event_thread_ts = event.get("thread_ts")
+            is_thread_reply = event_thread_ts and event_thread_ts != ts
+            if is_thread_reply and not self._has_active_session_for_thread(
+                channel_id=channel_id,
+                thread_ts=event_thread_ts,
+                user_id=user_id,
+            ):
+                thread_context = await self._fetch_thread_context(
+                    channel_id=channel_id,
+                    thread_ts=event_thread_ts,
+                    current_ts=ts,
+                    team_id=team_id,
+                )
+                if thread_context:
+                    text = thread_context + text
+
         # Determine message type
         msg_type = MessageType.TEXT
         if text.startswith("/"):
@@ -912,6 +942,233 @@ class SlackAdapter(BasePlatformAdapter):
         await self._remove_reaction(channel_id, ts, "eyes")
         await self._add_reaction(channel_id, ts, "white_check_mark")
 
+    # ----- Approval button support (Block Kit) -----
+
+    async def send_exec_approval(
+        self, chat_id: str, command: str, session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a Block Kit approval prompt with interactive buttons.
+
+        The buttons call ``resolve_gateway_approval()`` to unblock the waiting
+        agent thread — same mechanism as the text ``/approve`` flow.
+        """
+        if not self._app:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            cmd_preview = command[:2900] + "..." if len(command) > 2900 else command
+            thread_ts = self._resolve_thread_ts(None, metadata)
+
+            blocks = [
+                {
+                    "type": "section",
+                    "text": {
+                        "type": "mrkdwn",
+                        "text": (
+                            f":warning: *Command Approval Required*\n"
+                            f"```{cmd_preview}```\n"
+                            f"Reason: {description}"
+                        ),
+                    },
+                },
+                {
+                    "type": "actions",
+                    "elements": [
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Allow Once"},
+                            "style": "primary",
+                            "action_id": "hermes_approve_once",
+                            "value": session_key,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Allow Session"},
+                            "action_id": "hermes_approve_session",
+                            "value": session_key,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Always Allow"},
+                            "action_id": "hermes_approve_always",
+                            "value": session_key,
+                        },
+                        {
+                            "type": "button",
+                            "text": {"type": "plain_text", "text": "Deny"},
+                            "style": "danger",
+                            "action_id": "hermes_deny",
+                            "value": session_key,
+                        },
+                    ],
+                },
+            ]
+
+            kwargs: Dict[str, Any] = {
+                "channel": chat_id,
+                "text": f"⚠️ Command approval required: {cmd_preview[:100]}",
+                "blocks": blocks,
+            }
+            if thread_ts:
+                kwargs["thread_ts"] = thread_ts
+
+            result = await self._get_client(chat_id).chat_postMessage(**kwargs)
+            msg_ts = result.get("ts", "")
+            if msg_ts:
+                self._approval_resolved[msg_ts] = False
+
+            return SendResult(success=True, message_id=msg_ts, raw_response=result)
+        except Exception as e:
+            logger.error("[Slack] send_exec_approval failed: %s", e, exc_info=True)
+            return SendResult(success=False, error=str(e))
+
+    async def _handle_approval_action(self, ack, body, action) -> None:
+        """Handle an approval button click from Block Kit."""
+        await ack()
+
+        action_id = action.get("action_id", "")
+        session_key = action.get("value", "")
+        message = body.get("message", {})
+        msg_ts = message.get("ts", "")
+        channel_id = body.get("channel", {}).get("id", "")
+        user_name = body.get("user", {}).get("name", "unknown")
+
+        # Map action_id to approval choice
+        choice_map = {
+            "hermes_approve_once": "once",
+            "hermes_approve_session": "session",
+            "hermes_approve_always": "always",
+            "hermes_deny": "deny",
+        }
+        choice = choice_map.get(action_id, "deny")
+
+        # Prevent double-clicks
+        if self._approval_resolved.get(msg_ts, False):
+            return
+        self._approval_resolved[msg_ts] = True
+
+        # Update the message to show the decision and remove buttons
+        label_map = {
+            "once": f"✅ Approved once by {user_name}",
+            "session": f"✅ Approved for session by {user_name}",
+            "always": f"✅ Approved permanently by {user_name}",
+            "deny": f"❌ Denied by {user_name}",
+        }
+        decision_text = label_map.get(choice, f"Resolved by {user_name}")
+
+        # Get original text from the section block
+        original_text = ""
+        for block in message.get("blocks", []):
+            if block.get("type") == "section":
+                original_text = block.get("text", {}).get("text", "")
+                break
+
+        updated_blocks = [
+            {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": original_text or "Command approval request",
+                },
+            },
+            {
+                "type": "context",
+                "elements": [
+                    {"type": "mrkdwn", "text": decision_text},
+                ],
+            },
+        ]
+
+        try:
+            await self._get_client(channel_id).chat_update(
+                channel=channel_id,
+                ts=msg_ts,
+                text=decision_text,
+                blocks=updated_blocks,
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to update approval message: %s", e)
+
+        # Resolve the approval — this unblocks the agent thread
+        try:
+            from tools.approval import resolve_gateway_approval
+            count = resolve_gateway_approval(session_key, choice)
+            logger.info(
+                "Slack button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                count, session_key, choice, user_name,
+            )
+        except Exception as exc:
+            logger.error("Failed to resolve gateway approval from Slack button: %s", exc)
+
+        # Clean up stale approval state
+        self._approval_resolved.pop(msg_ts, None)
+
+    # ----- Thread context fetching -----
+
+    async def _fetch_thread_context(
+        self, channel_id: str, thread_ts: str, current_ts: str,
+        team_id: str = "", limit: int = 30,
+    ) -> str:
+        """Fetch recent thread messages to provide context when the bot is
+        mentioned mid-thread for the first time.
+
+        Returns a formatted string with thread history, or empty string on
+        failure or if the thread is empty (just the parent message).
+        """
+        try:
+            client = self._get_client(channel_id)
+            result = await client.conversations_replies(
+                channel=channel_id,
+                ts=thread_ts,
+                limit=limit + 1,  # +1 because it includes the current message
+                inclusive=True,
+            )
+            messages = result.get("messages", [])
+            if not messages:
+                return ""
+
+            context_parts = []
+            for msg in messages:
+                msg_ts = msg.get("ts", "")
+                # Skip the current message (the one that triggered this fetch)
+                if msg_ts == current_ts:
+                    continue
+                # Skip bot messages from ourselves
+                if msg.get("bot_id") or msg.get("subtype") == "bot_message":
+                    continue
+
+                msg_user = msg.get("user", "unknown")
+                msg_text = msg.get("text", "").strip()
+                if not msg_text:
+                    continue
+
+                # Strip bot mentions from context messages
+                bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
+                if bot_uid:
+                    msg_text = msg_text.replace(f"<@{bot_uid}>", "").strip()
+
+                # Mark the thread parent
+                is_parent = msg_ts == thread_ts
+                prefix = "[thread parent] " if is_parent else ""
+
+                # Resolve user name (cached)
+                name = await self._resolve_user_name(msg_user, chat_id=channel_id)
+                context_parts.append(f"{prefix}{name}: {msg_text}")
+
+            if not context_parts:
+                return ""
+
+            return (
+                "[Thread context — previous messages in this thread:]\n"
+                + "\n".join(context_parts)
+                + "\n[End of thread context]\n\n"
+            )
+        except Exception as e:
+            logger.warning("[Slack] Failed to fetch thread context: %s", e)
+            return ""
+
     async def _handle_slash_command(self, command: dict) -> None:
         """Handle /hermes slash command."""
         text = command.get("text", "").strip()
@@ -960,50 +1217,44 @@ class SlackAdapter(BasePlatformAdapter):
         user_id: str,
     ) -> bool:
         """Check if there's an active session for a thread.
-        
+
         Used to determine if thread replies without @mentions should be
         processed (they should if there's an active session).
-        
-        Args:
-            channel_id: The Slack channel ID
-            thread_ts: The thread timestamp (parent message ts)
-            user_id: The user ID of the sender
-            
-        Returns:
-            True if there's an active session for this thread
+
+        Uses ``build_session_key()`` as the single source of truth for key
+        construction — avoids the bug where manual key building didn't
+        respect ``thread_sessions_per_user`` and ``group_sessions_per_user``
+        settings correctly.
         """
         session_store = getattr(self, "_session_store", None)
         if not session_store:
             return False
-        
-        try:
-            # Build a SessionSource for this thread
-            from gateway.session import SessionSource
 
-            # Generate the session key using the same logic as SessionStore
-            # This mirrors the logic in build_session_key for group sessions
-            key_parts = ["agent:main", "slack", "group", channel_id, thread_ts]
-            
-            # Include user_id if group_sessions_per_user is enabled
-            # We check the session store config if available
-            group_sessions_per_user = getattr(
-                session_store, "config", {}
+        try:
+            from gateway.session import SessionSource, build_session_key
+
+            source = SessionSource(
+                platform=Platform.SLACK,
+                chat_id=channel_id,
+                chat_type="group",
+                user_id=user_id,
+                thread_id=thread_ts,
             )
-            if hasattr(group_sessions_per_user, "group_sessions_per_user"):
-                group_sessions_per_user = group_sessions_per_user.group_sessions_per_user
-            else:
-                group_sessions_per_user = True  # Default
-            
-            if group_sessions_per_user and user_id:
-                key_parts.append(str(user_id))
-            
-            session_key = ":".join(key_parts)
-            
-            # Check if the session exists in the store
+
+            # Read session isolation settings from the store's config
+            store_cfg = getattr(session_store, "config", None)
+            gspu = getattr(store_cfg, "group_sessions_per_user", True) if store_cfg else True
+            tspu = getattr(store_cfg, "thread_sessions_per_user", False) if store_cfg else False
+
+            session_key = build_session_key(
+                source,
+                group_sessions_per_user=gspu,
+                thread_sessions_per_user=tspu,
+            )
+
             session_store._ensure_loaded()
             return session_key in session_store._entries
         except Exception:
-            # If anything goes wrong, default to False (require mention)
             return False
 
     async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str:
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 8c69c47b..26b0e426 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -153,6 +153,8 @@ class TelegramAdapter(BasePlatformAdapter):
         self._dm_topics_config: List[Dict[str, Any]] = self.config.extra.get("dm_topics", [])
         # Interactive model picker state per chat
         self._model_picker_state: Dict[str, dict] = {}
+        # Approval button state: message_id → session_key
+        self._approval_state: Dict[int, str] = {}
 
     def _fallback_ips(self) -> list[str]:
         """Return validated fallback IPs from config (populated by _apply_env_overrides)."""
@@ -1010,6 +1012,70 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.warning("[%s] send_update_prompt failed: %s", self.name, e)
             return SendResult(success=False, error=str(e))
 
+    async def send_exec_approval(
+        self, chat_id: str, command: str, session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an inline-keyboard approval prompt with interactive buttons.
+
+        The buttons call ``resolve_gateway_approval()`` to unblock the waiting
+        agent thread — same mechanism as the text ``/approve`` flow.
+        """
+        if not self._bot:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            cmd_preview = command[:3800] + "..." if len(command) > 3800 else command
+            text = (
+                f"⚠️ *Command Approval Required*\n\n"
+                f"`{cmd_preview}`\n\n"
+                f"Reason: {description}"
+            )
+
+            # Resolve thread context for thread replies
+            thread_id = None
+            if metadata:
+                thread_id = metadata.get("thread_id") or metadata.get("message_thread_id")
+
+            # We'll use the message_id as part of callback_data to look up session_key
+            # Send a placeholder first, then update — or use a counter.
+            # Simpler: use a monotonic counter to generate short IDs.
+            import itertools
+            if not hasattr(self, "_approval_counter"):
+                self._approval_counter = itertools.count(1)
+            approval_id = next(self._approval_counter)
+
+            keyboard = InlineKeyboardMarkup([
+                [
+                    InlineKeyboardButton("✅ Allow Once", callback_data=f"ea:once:{approval_id}"),
+                    InlineKeyboardButton("✅ Session", callback_data=f"ea:session:{approval_id}"),
+                ],
+                [
+                    InlineKeyboardButton("✅ Always", callback_data=f"ea:always:{approval_id}"),
+                    InlineKeyboardButton("❌ Deny", callback_data=f"ea:deny:{approval_id}"),
+                ],
+            ])
+
+            kwargs: Dict[str, Any] = {
+                "chat_id": int(chat_id),
+                "text": text,
+                "parse_mode": ParseMode.MARKDOWN,
+                "reply_markup": keyboard,
+            }
+            if thread_id:
+                kwargs["message_thread_id"] = int(thread_id)
+
+            msg = await self._bot.send_message(**kwargs)
+
+            # Store session_key keyed by approval_id for the callback handler
+            self._approval_state[approval_id] = session_key
+
+            return SendResult(success=True, message_id=str(msg.message_id))
+        except Exception as e:
+            logger.warning("[%s] send_exec_approval failed: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
     async def send_model_picker(
         self,
         chat_id: str,
@@ -1321,6 +1387,56 @@ class TelegramAdapter(BasePlatformAdapter):
                 await self._handle_model_picker_callback(query, data, chat_id)
             return
 
+        # --- Exec approval callbacks (ea:choice:id) ---
+        if data.startswith("ea:"):
+            parts = data.split(":", 2)
+            if len(parts) == 3:
+                choice = parts[1]  # once, session, always, deny
+                try:
+                    approval_id = int(parts[2])
+                except (ValueError, IndexError):
+                    await query.answer(text="Invalid approval data.")
+                    return
+
+                session_key = self._approval_state.pop(approval_id, None)
+                if not session_key:
+                    await query.answer(text="This approval has already been resolved.")
+                    return
+
+                # Map choice to human-readable label
+                label_map = {
+                    "once": "✅ Approved once",
+                    "session": "✅ Approved for session",
+                    "always": "✅ Approved permanently",
+                    "deny": "❌ Denied",
+                }
+                user_display = getattr(query.from_user, "first_name", "User")
+                label = label_map.get(choice, "Resolved")
+
+                await query.answer(text=label)
+
+                # Edit message to show decision, remove buttons
+                try:
+                    await query.edit_message_text(
+                        text=f"{label} by {user_display}",
+                        parse_mode=ParseMode.MARKDOWN,
+                        reply_markup=None,
+                    )
+                except Exception:
+                    pass  # non-fatal if edit fails
+
+                # Resolve the approval — unblocks the agent thread
+                try:
+                    from tools.approval import resolve_gateway_approval
+                    count = resolve_gateway_approval(session_key, choice)
+                    logger.info(
+                        "Telegram button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                        count, session_key, choice, user_display,
+                    )
+                except Exception as exc:
+                    logger.error("Failed to resolve gateway approval from Telegram button: %s", exc)
+            return
+
         # --- Update prompt callbacks ---
         if not data.startswith("update_prompt:"):
             return
diff --git a/tests/gateway/test_slack_approval_buttons.py b/tests/gateway/test_slack_approval_buttons.py
new file mode 100644
index 00000000..496f472c
--- /dev/null
+++ b/tests/gateway/test_slack_approval_buttons.py
@@ -0,0 +1,373 @@
+"""Tests for Slack Block Kit approval buttons and thread context fetching."""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Ensure the repo root is importable
+# ---------------------------------------------------------------------------
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+# ---------------------------------------------------------------------------
+# Minimal Slack SDK mock so SlackAdapter can be imported
+# ---------------------------------------------------------------------------
+def _ensure_slack_mock():
+    """Wire up the minimal mocks required to import SlackAdapter."""
+    if "slack_bolt" in sys.modules:
+        return
+    slack_bolt = MagicMock()
+    slack_bolt.async_app.AsyncApp = MagicMock
+    sys.modules["slack_bolt"] = slack_bolt
+    sys.modules["slack_bolt.async_app"] = slack_bolt.async_app
+    handler_mod = MagicMock()
+    handler_mod.AsyncSocketModeHandler = MagicMock
+    sys.modules["slack_bolt.adapter"] = MagicMock()
+    sys.modules["slack_bolt.adapter.socket_mode"] = MagicMock()
+    sys.modules["slack_bolt.adapter.socket_mode.async_handler"] = handler_mod
+    sdk_mod = MagicMock()
+    sdk_mod.web = MagicMock()
+    sdk_mod.web.async_client = MagicMock()
+    sdk_mod.web.async_client.AsyncWebClient = MagicMock
+    sys.modules["slack_sdk"] = sdk_mod
+    sys.modules["slack_sdk.web"] = sdk_mod.web
+    sys.modules["slack_sdk.web.async_client"] = sdk_mod.web.async_client
+
+
+_ensure_slack_mock()
+
+from gateway.platforms.slack import SlackAdapter
+from gateway.config import Platform, PlatformConfig
+
+
+def _make_adapter():
+    """Create a SlackAdapter instance with mocked internals."""
+    config = PlatformConfig(enabled=True, token="xoxb-test-token")
+    adapter = SlackAdapter(config)
+    adapter._app = MagicMock()
+    adapter._bot_user_id = "U_BOT"
+    adapter._team_clients = {"T1": AsyncMock()}
+    adapter._team_bot_user_ids = {"T1": "U_BOT"}
+    adapter._channel_team = {"C1": "T1"}
+    return adapter
+
+
+# ===========================================================================
+# send_exec_approval — Block Kit buttons
+# ===========================================================================
+
+class TestSlackExecApproval:
+    """Test the send_exec_approval method sends Block Kit buttons."""
+
+    @pytest.mark.asyncio
+    async def test_sends_blocks_with_buttons(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.chat_postMessage = AsyncMock(return_value={"ts": "1234.5678"})
+
+        result = await adapter.send_exec_approval(
+            chat_id="C1",
+            command="rm -rf /important",
+            session_key="agent:main:slack:group:C1:1111",
+            description="dangerous deletion",
+        )
+
+        assert result.success is True
+        assert result.message_id == "1234.5678"
+
+        # Verify chat_postMessage was called with blocks
+        mock_client.chat_postMessage.assert_called_once()
+        kwargs = mock_client.chat_postMessage.call_args[1]
+        assert "blocks" in kwargs
+        blocks = kwargs["blocks"]
+        assert len(blocks) == 2
+        assert blocks[0]["type"] == "section"
+        assert "rm -rf /important" in blocks[0]["text"]["text"]
+        assert "dangerous deletion" in blocks[0]["text"]["text"]
+        assert blocks[1]["type"] == "actions"
+        elements = blocks[1]["elements"]
+        assert len(elements) == 4
+        action_ids = [e["action_id"] for e in elements]
+        assert "hermes_approve_once" in action_ids
+        assert "hermes_approve_session" in action_ids
+        assert "hermes_approve_always" in action_ids
+        assert "hermes_deny" in action_ids
+        # Each button carries the session key as value
+        for e in elements:
+            assert e["value"] == "agent:main:slack:group:C1:1111"
+
+    @pytest.mark.asyncio
+    async def test_sends_in_thread(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.chat_postMessage = AsyncMock(return_value={"ts": "1234.5678"})
+
+        await adapter.send_exec_approval(
+            chat_id="C1",
+            command="echo test",
+            session_key="test-session",
+            metadata={"thread_id": "9999.0000"},
+        )
+
+        kwargs = mock_client.chat_postMessage.call_args[1]
+        assert kwargs.get("thread_ts") == "9999.0000"
+
+    @pytest.mark.asyncio
+    async def test_not_connected(self):
+        adapter = _make_adapter()
+        adapter._app = None
+        result = await adapter.send_exec_approval(
+            chat_id="C1", command="ls", session_key="s"
+        )
+        assert result.success is False
+
+    @pytest.mark.asyncio
+    async def test_truncates_long_command(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.chat_postMessage = AsyncMock(return_value={"ts": "1.2"})
+
+        long_cmd = "x" * 5000
+        await adapter.send_exec_approval(
+            chat_id="C1", command=long_cmd, session_key="s"
+        )
+
+        kwargs = mock_client.chat_postMessage.call_args[1]
+        section_text = kwargs["blocks"][0]["text"]["text"]
+        assert "..." in section_text
+        assert len(section_text) < 5000
+
+
+# ===========================================================================
+# _handle_approval_action — button click handler
+# ===========================================================================
+
+class TestSlackApprovalAction:
+    """Test the approval button click handler."""
+
+    @pytest.mark.asyncio
+    async def test_resolves_approval(self):
+        adapter = _make_adapter()
+        adapter._approval_resolved["1234.5678"] = False
+
+        ack = AsyncMock()
+        body = {
+            "message": {
+                "ts": "1234.5678",
+                "blocks": [
+                    {"type": "section", "text": {"type": "mrkdwn", "text": "original text"}},
+                    {"type": "actions", "elements": []},
+                ],
+            },
+            "channel": {"id": "C1"},
+            "user": {"name": "norbert"},
+        }
+        action = {
+            "action_id": "hermes_approve_once",
+            "value": "agent:main:slack:group:C1:1111",
+        }
+
+        mock_client = adapter._team_clients["T1"]
+        mock_client.chat_update = AsyncMock()
+
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._handle_approval_action(ack, body, action)
+
+        ack.assert_called_once()
+        mock_resolve.assert_called_once_with("agent:main:slack:group:C1:1111", "once")
+
+        # Message should be updated with decision
+        mock_client.chat_update.assert_called_once()
+        update_kwargs = mock_client.chat_update.call_args[1]
+        assert "Approved once by norbert" in update_kwargs["text"]
+
+    @pytest.mark.asyncio
+    async def test_prevents_double_click(self):
+        adapter = _make_adapter()
+        adapter._approval_resolved["1234.5678"] = True  # Already resolved
+
+        ack = AsyncMock()
+        body = {
+            "message": {"ts": "1234.5678", "blocks": []},
+            "channel": {"id": "C1"},
+            "user": {"name": "norbert"},
+        }
+        action = {
+            "action_id": "hermes_approve_once",
+            "value": "some-session",
+        }
+
+        with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
+            await adapter._handle_approval_action(ack, body, action)
+
+        # Should have acked but NOT resolved
+        ack.assert_called_once()
+        mock_resolve.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_deny_action(self):
+        adapter = _make_adapter()
+        adapter._approval_resolved["1.2"] = False
+
+        ack = AsyncMock()
+        body = {
+            "message": {"ts": "1.2", "blocks": [
+                {"type": "section", "text": {"type": "mrkdwn", "text": "cmd"}},
+            ]},
+            "channel": {"id": "C1"},
+            "user": {"name": "alice"},
+        }
+        action = {"action_id": "hermes_deny", "value": "session-key"}
+
+        mock_client = adapter._team_clients["T1"]
+        mock_client.chat_update = AsyncMock()
+
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._handle_approval_action(ack, body, action)
+
+        mock_resolve.assert_called_once_with("session-key", "deny")
+        update_kwargs = mock_client.chat_update.call_args[1]
+        assert "Denied by alice" in update_kwargs["text"]
+
+
+# ===========================================================================
+# _fetch_thread_context
+# ===========================================================================
+
+class TestSlackThreadContext:
+    """Test thread context fetching."""
+
+    @pytest.mark.asyncio
+    async def test_fetches_and_formats_context(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.conversations_replies = AsyncMock(return_value={
+            "messages": [
+                {"ts": "1000.0", "user": "U1", "text": "This is the parent message"},
+                {"ts": "1000.1", "user": "U2", "text": "I think we should refactor"},
+                {"ts": "1000.2", "user": "U1", "text": "Good idea, <@U_BOT> what do you think?"},
+            ]
+        })
+
+        # Mock user name resolution
+        adapter._user_name_cache = {"U1": "Alice", "U2": "Bob"}
+
+        context = await adapter._fetch_thread_context(
+            channel_id="C1",
+            thread_ts="1000.0",
+            current_ts="1000.2",  # The message that triggered the fetch
+            team_id="T1",
+        )
+
+        assert "[Thread context" in context
+        assert "[thread parent] Alice: This is the parent message" in context
+        assert "Bob: I think we should refactor" in context
+        # Current message should be excluded
+        assert "what do you think" not in context
+        # Bot mention should be stripped from context
+        assert "<@U_BOT>" not in context
+
+    @pytest.mark.asyncio
+    async def test_skips_bot_messages(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.conversations_replies = AsyncMock(return_value={
+            "messages": [
+                {"ts": "1000.0", "user": "U1", "text": "Parent"},
+                {"ts": "1000.1", "bot_id": "B1", "text": "Bot reply (should be skipped)"},
+                {"ts": "1000.2", "user": "U1", "text": "Current"},
+            ]
+        })
+        adapter._user_name_cache = {"U1": "Alice"}
+
+        context = await adapter._fetch_thread_context(
+            channel_id="C1", thread_ts="1000.0", current_ts="1000.2", team_id="T1"
+        )
+
+        assert "Bot reply" not in context
+        assert "Alice: Parent" in context
+
+    @pytest.mark.asyncio
+    async def test_empty_thread(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.conversations_replies = AsyncMock(return_value={"messages": []})
+
+        context = await adapter._fetch_thread_context(
+            channel_id="C1", thread_ts="1000.0", current_ts="1000.1", team_id="T1"
+        )
+        assert context == ""
+
+    @pytest.mark.asyncio
+    async def test_api_failure_returns_empty(self):
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.conversations_replies = AsyncMock(side_effect=Exception("API error"))
+
+        context = await adapter._fetch_thread_context(
+            channel_id="C1", thread_ts="1000.0", current_ts="1000.1", team_id="T1"
+        )
+        assert context == ""
+
+
+# ===========================================================================
+# _has_active_session_for_thread — session key fix (#5833)
+# ===========================================================================
+
+class TestSessionKeyFix:
+    """Test that _has_active_session_for_thread uses build_session_key."""
+
+    def test_uses_build_session_key(self):
+        """Verify the fix uses build_session_key instead of manual key construction."""
+        adapter = _make_adapter()
+
+        # Mock session store with a known entry
+        mock_store = MagicMock()
+        mock_store._entries = {
+            "agent:main:slack:group:C1:1000.0": MagicMock()
+        }
+        mock_store._ensure_loaded = MagicMock()
+        mock_store.config = MagicMock()
+        mock_store.config.group_sessions_per_user = False  # threads don't include user_id
+        mock_store.config.thread_sessions_per_user = False
+        adapter._session_store = mock_store
+
+        # With the fix, build_session_key should be called which respects
+        # group_sessions_per_user=False (no user_id appended)
+        result = adapter._has_active_session_for_thread(
+            channel_id="C1", thread_ts="1000.0", user_id="U123"
+        )
+
+        # Should find the session because build_session_key with
+        # group_sessions_per_user=False doesn't append user_id
+        assert result is True
+
+    def test_no_session_returns_false(self):
+        adapter = _make_adapter()
+        mock_store = MagicMock()
+        mock_store._entries = {}
+        mock_store._ensure_loaded = MagicMock()
+        mock_store.config = MagicMock()
+        mock_store.config.group_sessions_per_user = True
+        mock_store.config.thread_sessions_per_user = False
+        adapter._session_store = mock_store
+
+        result = adapter._has_active_session_for_thread(
+            channel_id="C1", thread_ts="1000.0", user_id="U123"
+        )
+        assert result is False
+
+    def test_no_session_store(self):
+        adapter = _make_adapter()
+        # No _session_store attribute
+        result = adapter._has_active_session_for_thread(
+            channel_id="C1", thread_ts="1000.0", user_id="U123"
+        )
+        assert result is False
diff --git a/tests/gateway/test_telegram_approval_buttons.py b/tests/gateway/test_telegram_approval_buttons.py
new file mode 100644
index 00000000..1b8249bc
--- /dev/null
+++ b/tests/gateway/test_telegram_approval_buttons.py
@@ -0,0 +1,284 @@
+"""Tests for Telegram inline keyboard approval buttons."""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Ensure the repo root is importable
+# ---------------------------------------------------------------------------
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+# ---------------------------------------------------------------------------
+# Minimal Telegram mock so TelegramAdapter can be imported
+# ---------------------------------------------------------------------------
+def _ensure_telegram_mock():
+    """Wire up the minimal mocks required to import TelegramAdapter."""
+    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
+        return
+
+    mod = MagicMock()
+    mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
+    mod.constants.ParseMode.MARKDOWN = "Markdown"
+    mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    mod.constants.ParseMode.HTML = "HTML"
+    mod.constants.ChatType.PRIVATE = "private"
+    mod.constants.ChatType.GROUP = "group"
+    mod.constants.ChatType.SUPERGROUP = "supergroup"
+    mod.constants.ChatType.CHANNEL = "channel"
+    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request", "telegram.error"):
+        sys.modules.setdefault(name, mod)
+
+
+_ensure_telegram_mock()
+
+from gateway.platforms.telegram import TelegramAdapter
+from gateway.config import Platform, PlatformConfig
+
+
+def _make_adapter():
+    """Create a TelegramAdapter with mocked internals."""
+    config = PlatformConfig(enabled=True, token="test-token")
+    adapter = TelegramAdapter(config)
+    adapter._bot = AsyncMock()
+    adapter._app = MagicMock()
+    return adapter
+
+
+# ===========================================================================
+# send_exec_approval — inline keyboard buttons
+# ===========================================================================
+
+class TestTelegramExecApproval:
+    """Test the send_exec_approval method sends InlineKeyboard buttons."""
+
+    @pytest.mark.asyncio
+    async def test_sends_inline_keyboard(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 42
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        result = await adapter.send_exec_approval(
+            chat_id="12345",
+            command="rm -rf /important",
+            session_key="agent:main:telegram:group:12345:99",
+            description="dangerous deletion",
+        )
+
+        assert result.success is True
+        assert result.message_id == "42"
+
+        adapter._bot.send_message.assert_called_once()
+        kwargs = adapter._bot.send_message.call_args[1]
+        assert kwargs["chat_id"] == 12345
+        assert "rm -rf /important" in kwargs["text"]
+        assert "dangerous deletion" in kwargs["text"]
+        assert kwargs["reply_markup"] is not None  # InlineKeyboardMarkup
+
+    @pytest.mark.asyncio
+    async def test_stores_approval_state(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 42
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        await adapter.send_exec_approval(
+            chat_id="12345",
+            command="echo test",
+            session_key="my-session-key",
+        )
+
+        # The approval_id should map to the session_key
+        assert len(adapter._approval_state) == 1
+        approval_id = list(adapter._approval_state.keys())[0]
+        assert adapter._approval_state[approval_id] == "my-session-key"
+
+    @pytest.mark.asyncio
+    async def test_sends_in_thread(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 42
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        await adapter.send_exec_approval(
+            chat_id="12345",
+            command="ls",
+            session_key="s",
+            metadata={"thread_id": "999"},
+        )
+
+        kwargs = adapter._bot.send_message.call_args[1]
+        assert kwargs.get("message_thread_id") == 999
+
+    @pytest.mark.asyncio
+    async def test_not_connected(self):
+        adapter = _make_adapter()
+        adapter._bot = None
+        result = await adapter.send_exec_approval(
+            chat_id="12345", command="ls", session_key="s"
+        )
+        assert result.success is False
+
+    @pytest.mark.asyncio
+    async def test_truncates_long_command(self):
+        adapter = _make_adapter()
+        mock_msg = MagicMock()
+        mock_msg.message_id = 1
+        adapter._bot.send_message = AsyncMock(return_value=mock_msg)
+
+        long_cmd = "x" * 5000
+        await adapter.send_exec_approval(
+            chat_id="12345", command=long_cmd, session_key="s"
+        )
+
+        kwargs = adapter._bot.send_message.call_args[1]
+        assert "..." in kwargs["text"]
+        assert len(kwargs["text"]) < 5000
+
+
+# ===========================================================================
+# _handle_callback_query — approval button clicks
+# ===========================================================================
+
+class TestTelegramApprovalCallback:
+    """Test the approval callback handling in _handle_callback_query."""
+
+    @pytest.mark.asyncio
+    async def test_resolves_approval_on_click(self):
+        adapter = _make_adapter()
+        # Set up approval state
+        adapter._approval_state[1] = "agent:main:telegram:group:12345:99"
+
+        # Mock callback query
+        query = AsyncMock()
+        query.data = "ea:once:1"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.from_user = MagicMock()
+        query.from_user.first_name = "Norbert"
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._handle_callback_query(update, context)
+
+        mock_resolve.assert_called_once_with("agent:main:telegram:group:12345:99", "once")
+        query.answer.assert_called_once()
+        query.edit_message_text.assert_called_once()
+
+        # State should be cleaned up
+        assert 1 not in adapter._approval_state
+
+    @pytest.mark.asyncio
+    async def test_deny_button(self):
+        adapter = _make_adapter()
+        adapter._approval_state[2] = "some-session"
+
+        query = AsyncMock()
+        query.data = "ea:deny:2"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.from_user = MagicMock()
+        query.from_user.first_name = "Alice"
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve:
+            await adapter._handle_callback_query(update, context)
+
+        mock_resolve.assert_called_once_with("some-session", "deny")
+        edit_kwargs = query.edit_message_text.call_args[1]
+        assert "Denied" in edit_kwargs["text"]
+
+    @pytest.mark.asyncio
+    async def test_already_resolved(self):
+        adapter = _make_adapter()
+        # No state for approval_id 99 — already resolved
+
+        query = AsyncMock()
+        query.data = "ea:once:99"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.from_user = MagicMock()
+        query.from_user.first_name = "Bob"
+        query.answer = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
+            await adapter._handle_callback_query(update, context)
+
+        # Should NOT resolve — already handled
+        mock_resolve.assert_not_called()
+        # Should still ack with "already resolved" message
+        query.answer.assert_called_once()
+        assert "already been resolved" in query.answer.call_args[1]["text"]
+
+    @pytest.mark.asyncio
+    async def test_model_picker_callback_not_affected(self):
+        """Ensure model picker callbacks still route correctly."""
+        adapter = _make_adapter()
+
+        query = AsyncMock()
+        query.data = "mp:some_provider"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.from_user = MagicMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        # Model picker callback should be handled (not crash)
+        # We just verify it doesn't try to resolve an approval
+        with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
+            with patch.object(adapter, "_handle_model_picker_callback", new_callable=AsyncMock):
+                await adapter._handle_callback_query(update, context)
+
+        mock_resolve.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_update_prompt_callback_not_affected(self):
+        """Ensure update prompt callbacks still work."""
+        adapter = _make_adapter()
+
+        query = AsyncMock()
+        query.data = "update_prompt:y"
+        query.message = MagicMock()
+        query.message.chat_id = 12345
+        query.from_user = MagicMock()
+        query.from_user.id = 123
+        query.answer = AsyncMock()
+        query.edit_message_text = AsyncMock()
+
+        update = MagicMock()
+        update.callback_query = query
+        context = MagicMock()
+
+        with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
+            with patch("hermes_constants.get_hermes_home", return_value=Path("/tmp/test")):
+                try:
+                    await adapter._handle_callback_query(update, context)
+                except Exception:
+                    pass  # May fail on file write, that's fine
+
+        # Should NOT have triggered approval resolution
+        mock_resolve.assert_not_called()

From ab0c1e58f1a54d47a8863e1f8b249916ed9d062e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 11:04:50 -0700
Subject: [PATCH 078/154] fix: pause typing indicator during approval waits
 (#5893)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the agent waits for dangerous-command approval, the typing
indicator (_keep_typing loop) kept refreshing. On Slack's Assistant
API this is critical: assistant_threads_setStatus disables the
compose box, preventing users from typing /approve or /deny.

- Add _typing_paused set + pause/resume methods to BasePlatformAdapter
- _keep_typing skips send_typing when chat_id is paused
- _approval_notify_sync pauses typing before sending approval prompt
- _handle_approve_command / _handle_deny_command resume typing after

Benefits all platforms — no reason to show 'is thinking...' while
the agent is idle waiting for human input.
---
 gateway/platforms/base.py | 26 ++++++++++++++++++++++++--
 gateway/run.py            | 19 +++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index e9464365..a1fef589 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -484,6 +484,9 @@ class BasePlatformAdapter(ABC):
         self._background_tasks: set[asyncio.Task] = set()
         # Chats where auto-TTS on voice input is disabled (set by /voice off)
         self._auto_tts_disabled_chats: set = set()
+        # Chats where typing indicator is paused (e.g. during approval waits).
+        # _keep_typing skips send_typing when the chat_id is in this set.
+        self._typing_paused: set = set()
 
     @property
     def has_fatal_error(self) -> bool:
@@ -943,10 +946,16 @@ class BasePlatformAdapter(ABC):
         
         Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
         to recover quickly after progress messages interrupt it.
+        
+        Skips send_typing when the chat is in ``_typing_paused`` (e.g. while
+        the agent is waiting for dangerous-command approval).  This is critical
+        for Slack's Assistant API where ``assistant_threads_setStatus`` disables
+        the compose box — pausing lets the user type ``/approve`` or ``/deny``.
         """
         try:
             while True:
-                await self.send_typing(chat_id, metadata=metadata)
+                if chat_id not in self._typing_paused:
+                    await self.send_typing(chat_id, metadata=metadata)
                 await asyncio.sleep(interval)
         except asyncio.CancelledError:
             pass  # Normal cancellation when handler completes
@@ -960,7 +969,20 @@ class BasePlatformAdapter(ABC):
                     await self.stop_typing(chat_id)
                 except Exception:
                     pass
-    
+            self._typing_paused.discard(chat_id)
+
+    def pause_typing_for_chat(self, chat_id: str) -> None:
+        """Pause typing indicator for a chat (e.g. during approval waits).
+
+        Thread-safe (CPython GIL) — can be called from the sync agent thread
+        while ``_keep_typing`` runs on the async event loop.
+        """
+        self._typing_paused.add(chat_id)
+
+    def resume_typing_for_chat(self, chat_id: str) -> None:
+        """Resume typing indicator for a chat after approval resolves."""
+        self._typing_paused.discard(chat_id)
+
     # ── Processing lifecycle hooks ──────────────────────────────────────────
     # Subclasses override these to react to message processing events
     # (e.g. Discord adds 👀/✅/❌ reactions).
diff --git a/gateway/run.py b/gateway/run.py
index f6fb563c..f1052825 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -5434,6 +5434,11 @@ class GatewayRunner:
         if not count:
             return "No pending command to approve."
 
+        # Resume typing indicator — agent is about to continue processing.
+        _adapter = self.adapters.get(source.platform)
+        if _adapter:
+            _adapter.resume_typing_for_chat(source.chat_id)
+
         count_msg = f" ({count} commands)" if count > 1 else ""
         logger.info("User approved %d dangerous command(s) via /approve%s", count, scope_msg)
         return f"✅ Command{'s' if count > 1 else ''} approved{scope_msg}{count_msg}. The agent is resuming..."
@@ -5466,6 +5471,11 @@ class GatewayRunner:
         if not count:
             return "No pending command to deny."
 
+        # Resume typing indicator — agent continues (with BLOCKED result).
+        _adapter = self.adapters.get(source.platform)
+        if _adapter:
+            _adapter.resume_typing_for_chat(source.chat_id)
+
         count_msg = f" ({count} commands)" if count > 1 else ""
         logger.info("User denied %d dangerous command(s) via /deny", count)
         return f"❌ Command{'s' if count > 1 else ''} denied{count_msg}."
@@ -6759,6 +6769,15 @@ class GatewayRunner:
                 UX.  Otherwise fall back to a plain text message with
                 ``/approve`` instructions.
                 """
+                # Pause the typing indicator while the agent waits for
+                # user approval.  Critical for Slack's Assistant API where
+                # assistant_threads_setStatus disables the compose box — the
+                # user literally cannot type /approve while "is thinking..."
+                # is active.  The approval message send auto-clears the Slack
+                # status; pausing prevents _keep_typing from re-setting it.
+                # Typing resumes in _handle_approve_command/_handle_deny_command.
+                _status_adapter.pause_typing_for_chat(_status_chat_id)
+
                 cmd = approval_data.get("command", "")
                 desc = approval_data.get("description", "dangerous command")
 

From e49c8bbbbb1f06dbe34f71c9400f77639d16a781 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 11:12:08 -0700
Subject: [PATCH 079/154] =?UTF-8?q?feat(slack):=20thread=20engagement=20?=
 =?UTF-8?q?=E2=80=94=20auto-respond=20in=20bot-started=20and=20mentioned?=
 =?UTF-8?q?=20threads=20(#5897)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the bot sends a message in a thread, track its ts in _bot_message_ts.
When the bot is @mentioned in a thread, register it in _mentioned_threads.
Both sets enable auto-responding to future messages in those threads
without requiring repeated @mentions — making the bot behave like a
team member that stays engaged once a conversation starts.

Channel message gating now checks 4 signals (in order):
  1. @mention in this message
  2. Reply in a thread the bot started/participated in (_bot_message_ts)
  3. Message in a thread where the bot was previously @mentioned (_mentioned_threads)
  4. Existing session for this thread (_has_active_session_for_thread — survives restarts)

Thread context fetching now triggers on ANY first-entry path (not just
@mention), so the agent gets context whether it's entering via a mention,
a bot-thread reply, or a mentioned-thread auto-trigger.

Both tracking sets are bounded (5000 cap with prune-oldest-half) to prevent
unbounded memory growth in long-running deployments.

Salvaged from PR #5754 by @hhhonzik. Preserves our existing approval buttons,
thread context fetching, and session key fix. Does NOT include the
edit_message format_message() removal (that was a regression in the original PR).

Tests: 4 new tests for bot-ts tracking and mentioned-thread bounds.
---
 gateway/platforms/slack.py                   | 100 +++++++++++++------
 tests/gateway/test_slack_approval_buttons.py |  53 ++++++++++
 2 files changed, 120 insertions(+), 33 deletions(-)

diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 0cdf2a33..164e6ab4 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -87,6 +87,14 @@ class SlackAdapter(BasePlatformAdapter):
         # Track pending approval message_ts → resolved flag to prevent
         # double-clicks on approval buttons.
         self._approval_resolved: Dict[str, bool] = {}
+        # Track timestamps of messages sent by the bot so we can respond
+        # to thread replies even without an explicit @mention.
+        self._bot_message_ts: set = set()
+        self._BOT_TS_MAX = 5000  # cap to avoid unbounded growth
+        # Track threads where the bot has been @mentioned — once mentioned,
+        # respond to ALL subsequent messages in that thread automatically.
+        self._mentioned_threads: set = set()
+        self._MENTIONED_THREADS_MAX = 5000
 
     async def connect(self) -> bool:
         """Connect to Slack via Socket Mode."""
@@ -268,9 +276,22 @@ class SlackAdapter(BasePlatformAdapter):
 
                 last_result = await self._get_client(chat_id).chat_postMessage(**kwargs)
 
+            # Track the sent message ts so we can auto-respond to thread
+            # replies without requiring @mention.
+            sent_ts = last_result.get("ts") if last_result else None
+            if sent_ts:
+                self._bot_message_ts.add(sent_ts)
+                # Also register the thread root so replies-to-my-replies work
+                if thread_ts:
+                    self._bot_message_ts.add(thread_ts)
+                if len(self._bot_message_ts) > self._BOT_TS_MAX:
+                    excess = len(self._bot_message_ts) - self._BOT_TS_MAX // 2
+                    for old_ts in list(self._bot_message_ts)[:excess]:
+                        self._bot_message_ts.discard(old_ts)
+
             return SendResult(
                 success=True,
-                message_id=last_result.get("ts") if last_result else None,
+                message_id=sent_ts,
                 raw_response=last_result,
             )
 
@@ -778,48 +799,61 @@ class SlackAdapter(BasePlatformAdapter):
         else:
             thread_ts = event.get("thread_ts") or ts  # ts fallback for channels
 
-        # In channels, only respond if bot is mentioned OR if this is a
-        # reply in a thread where the bot has an active session.
+        # In channels, respond if:
+        #   1. The bot is @mentioned in this message, OR
+        #   2. The message is a reply in a thread the bot started/participated in, OR
+        #   3. The message is in a thread where the bot was previously @mentioned, OR
+        #   4. There's an existing session for this thread (survives restarts)
         bot_uid = self._team_bot_user_ids.get(team_id, self._bot_user_id)
         is_mentioned = bot_uid and f"<@{bot_uid}>" in text
-        
+        event_thread_ts = event.get("thread_ts")
+        is_thread_reply = bool(event_thread_ts and event_thread_ts != ts)
+
         if not is_dm and bot_uid and not is_mentioned:
-            # Check if this is a thread reply (thread_ts exists and differs from ts)
-            event_thread_ts = event.get("thread_ts")
-            is_thread_reply = event_thread_ts and event_thread_ts != ts
-            
-            if is_thread_reply and self._has_active_session_for_thread(
-                channel_id=channel_id,
-                thread_ts=event_thread_ts,
-                user_id=user_id,
-            ):
-                # Allow thread replies without mention if there's an active session
-                pass
-            else:
-                # Not a thread reply or no active session - ignore
+            reply_to_bot_thread = (
+                is_thread_reply and event_thread_ts in self._bot_message_ts
+            )
+            in_mentioned_thread = (
+                event_thread_ts is not None
+                and event_thread_ts in self._mentioned_threads
+            )
+            has_session = (
+                is_thread_reply
+                and self._has_active_session_for_thread(
+                    channel_id=channel_id,
+                    thread_ts=event_thread_ts,
+                    user_id=user_id,
+                )
+            )
+            if not reply_to_bot_thread and not in_mentioned_thread and not has_session:
                 return
-        
+
         if is_mentioned:
             # Strip the bot mention from the text
             text = text.replace(f"<@{bot_uid}>", "").strip()
+            # Register this thread so all future messages auto-trigger the bot
+            if event_thread_ts:
+                self._mentioned_threads.add(event_thread_ts)
+                if len(self._mentioned_threads) > self._MENTIONED_THREADS_MAX:
+                    to_remove = list(self._mentioned_threads)[:self._MENTIONED_THREADS_MAX // 2]
+                    for t in to_remove:
+                        self._mentioned_threads.discard(t)
 
-            # When first mentioned in an existing thread, fetch thread context
-            # so the agent understands the conversation it's joining.
-            event_thread_ts = event.get("thread_ts")
-            is_thread_reply = event_thread_ts and event_thread_ts != ts
-            if is_thread_reply and not self._has_active_session_for_thread(
+        # When entering a thread for the first time (no existing session),
+        # fetch thread context so the agent understands the conversation.
+        if is_thread_reply and not self._has_active_session_for_thread(
+            channel_id=channel_id,
+            thread_ts=event_thread_ts,
+            user_id=user_id,
+        ):
+            thread_context = await self._fetch_thread_context(
                 channel_id=channel_id,
                 thread_ts=event_thread_ts,
-                user_id=user_id,
-            ):
-                thread_context = await self._fetch_thread_context(
-                    channel_id=channel_id,
-                    thread_ts=event_thread_ts,
-                    current_ts=ts,
-                    team_id=team_id,
-                )
-                if thread_context:
-                    text = thread_context + text
+                current_ts=ts,
+                team_id=team_id,
+            )
+            if thread_context:
+                text = thread_context + text
 
         # Determine message type
         msg_type = MessageType.TEXT
diff --git a/tests/gateway/test_slack_approval_buttons.py b/tests/gateway/test_slack_approval_buttons.py
index 496f472c..7278bd86 100644
--- a/tests/gateway/test_slack_approval_buttons.py
+++ b/tests/gateway/test_slack_approval_buttons.py
@@ -371,3 +371,56 @@ class TestSessionKeyFix:
             channel_id="C1", thread_ts="1000.0", user_id="U123"
         )
         assert result is False
+
+
+# ===========================================================================
+# Thread engagement — bot-started threads & mentioned threads
+# ===========================================================================
+
+class TestThreadEngagement:
+    """Test _bot_message_ts and _mentioned_threads tracking."""
+
+    @pytest.mark.asyncio
+    async def test_send_tracks_bot_message_ts(self):
+        """Bot's sent messages are tracked so thread replies work without @mention."""
+        adapter = _make_adapter()
+        mock_client = adapter._team_clients["T1"]
+        mock_client.chat_postMessage = AsyncMock(return_value={"ts": "9000.1"})
+
+        await adapter.send(chat_id="C1", content="Hello!", metadata={"thread_id": "8000.0"})
+
+        assert "9000.1" in adapter._bot_message_ts
+        # Thread root should also be tracked
+        assert "8000.0" in adapter._bot_message_ts
+
+    @pytest.mark.asyncio
+    async def test_bot_message_ts_cap(self):
+        """Verify memory is bounded when many messages are sent."""
+        adapter = _make_adapter()
+        adapter._BOT_TS_MAX = 10  # low cap for testing
+        mock_client = adapter._team_clients["T1"]
+
+        for i in range(20):
+            mock_client.chat_postMessage = AsyncMock(return_value={"ts": f"{i}.0"})
+            await adapter.send(chat_id="C1", content=f"msg {i}")
+
+        assert len(adapter._bot_message_ts) <= 10
+
+    def test_mentioned_threads_populated_on_mention(self):
+        """When bot is @mentioned in a thread, that thread is tracked."""
+        adapter = _make_adapter()
+        # Simulate what _handle_slack_message does on mention
+        adapter._mentioned_threads.add("1000.0")
+        assert "1000.0" in adapter._mentioned_threads
+
+    def test_mentioned_threads_cap(self):
+        """Verify _mentioned_threads is bounded."""
+        adapter = _make_adapter()
+        adapter._MENTIONED_THREADS_MAX = 10
+        for i in range(15):
+            adapter._mentioned_threads.add(f"{i}.0")
+            if len(adapter._mentioned_threads) > adapter._MENTIONED_THREADS_MAX:
+                to_remove = list(adapter._mentioned_threads)[:adapter._MENTIONED_THREADS_MAX // 2]
+                for t in to_remove:
+                    adapter._mentioned_threads.discard(t)
+        assert len(adapter._mentioned_threads) <= 10

From 69c753c19b43b02d3511e6045657c9a0e80cb674 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 11:14:12 -0700
Subject: [PATCH 080/154] fix: thread gateway user_id to memory plugins for
 per-user scoping (#5895)

Memory plugins (Mem0, Honcho) used static identifiers ('hermes-user',
config peerName) meaning all gateway users shared the same memory bucket.

Changes:
- AIAgent.__init__: add user_id parameter, store as self._user_id
- run_agent.py: include user_id in _init_kwargs passed to memory providers
- gateway/run.py: pass source.user_id to AIAgent in primary + background paths
- Mem0 plugin: prefer kwargs user_id over config default
- Honcho plugin: override cfg.peer_name with gateway user_id when present

CLI sessions (user_id=None) preserve existing defaults. Only gateway
sessions with a real platform user_id get per-user memory scoping.

Reported by plev333.
---
 gateway/run.py                     |   2 +
 plugins/memory/honcho/__init__.py  |   6 +
 plugins/memory/mem0/__init__.py    |   4 +-
 run_agent.py                       |   5 +
 tests/agent/test_memory_user_id.py | 289 +++++++++++++++++++++++++++++
 5 files changed, 305 insertions(+), 1 deletion(-)
 create mode 100644 tests/agent/test_memory_user_id.py

diff --git a/gateway/run.py b/gateway/run.py
index f1052825..df7df7db 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -4551,6 +4551,7 @@ class GatewayRunner:
                     provider_data_collection=pr.get("data_collection"),
                     session_id=task_id,
                     platform=platform_key,
+                    user_id=source.user_id,
                     session_db=self._session_db,
                     fallback_model=self._fallback_model,
                 )
@@ -6645,6 +6646,7 @@ class GatewayRunner:
                     provider_data_collection=pr.get("data_collection"),
                     session_id=session_id,
                     platform=platform_key,
+                    user_id=source.user_id,
                     session_db=self._session_db,
                     fallback_model=self._fallback_model,
                 )
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index db277366..782af579 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -216,6 +216,12 @@ class HonchoMemoryProvider(MemoryProvider):
                 logger.debug("Honcho not configured — plugin inactive")
                 return
 
+            # Override peer_name with gateway user_id for per-user memory scoping.
+            # CLI sessions won't have user_id, so the config default is preserved.
+            _gw_user_id = kwargs.get("user_id")
+            if _gw_user_id:
+                cfg.peer_name = _gw_user_id
+
             self._config = cfg
 
             # ----- B1: recall_mode from config -----
diff --git a/plugins/memory/mem0/__init__.py b/plugins/memory/mem0/__init__.py
index 7e7d261f..dc56becd 100644
--- a/plugins/memory/mem0/__init__.py
+++ b/plugins/memory/mem0/__init__.py
@@ -202,7 +202,9 @@ class Mem0MemoryProvider(MemoryProvider):
     def initialize(self, session_id: str, **kwargs) -> None:
         self._config = _load_config()
         self._api_key = self._config.get("api_key", "")
-        self._user_id = self._config.get("user_id", "hermes-user")
+        # Prefer gateway-provided user_id for per-user memory scoping;
+        # fall back to config/env default for CLI (single-user) sessions.
+        self._user_id = kwargs.get("user_id") or self._config.get("user_id", "hermes-user")
         self._agent_id = self._config.get("agent_id", "hermes")
         self._rerank = self._config.get("rerank", True)
 
diff --git a/run_agent.py b/run_agent.py
index 4c6cf500..1398a3d1 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -526,6 +526,7 @@ class AIAgent:
         reasoning_config: Dict[str, Any] = None,
         prefill_messages: List[Dict[str, Any]] = None,
         platform: str = None,
+        user_id: str = None,
         skip_context_files: bool = False,
         skip_memory: bool = False,
         session_db=None,
@@ -590,6 +591,7 @@ class AIAgent:
         self.quiet_mode = quiet_mode
         self.ephemeral_system_prompt = ephemeral_system_prompt
         self.platform = platform  # "cli", "telegram", "discord", "whatsapp", etc.
+        self._user_id = user_id  # Platform user identifier (gateway sessions)
         # Pluggable print function — CLI replaces this with _cprint so that
         # raw ANSI status lines are routed through prompt_toolkit's renderer
         # instead of going directly to stdout where patch_stdout's StdoutProxy
@@ -1092,6 +1094,9 @@ class AIAgent:
                             "hermes_home": str(_ghh()),
                             "agent_context": "primary",
                         }
+                        # Thread gateway user identity for per-user memory scoping
+                        if self._user_id:
+                            _init_kwargs["user_id"] = self._user_id
                         # Profile identity for per-profile provider scoping
                         try:
                             from hermes_cli.profiles import get_active_profile_name
diff --git a/tests/agent/test_memory_user_id.py b/tests/agent/test_memory_user_id.py
new file mode 100644
index 00000000..04f90c74
--- /dev/null
+++ b/tests/agent/test_memory_user_id.py
@@ -0,0 +1,289 @@
+"""Tests for per-user memory scoping via user_id threading.
+
+Verifies that gateway user_id flows from AIAgent -> MemoryManager -> plugins,
+so each gateway user gets their own memory bucket instead of sharing a static one.
+"""
+
+import json
+import os
+import pytest
+from unittest.mock import MagicMock, patch
+
+from agent.memory_provider import MemoryProvider
+from agent.memory_manager import MemoryManager
+
+
+# ---------------------------------------------------------------------------
+# Concrete test provider that records init kwargs
+# ---------------------------------------------------------------------------
+
+
+class RecordingProvider(MemoryProvider):
+    """Minimal provider that records what initialize() receives."""
+
+    def __init__(self, name="recording"):
+        self._name = name
+        self._init_kwargs = {}
+        self._init_session_id = None
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    def is_available(self) -> bool:
+        return True
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._init_session_id = session_id
+        self._init_kwargs = dict(kwargs)
+
+    def system_prompt_block(self) -> str:
+        return ""
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        return ""
+
+    def sync_turn(self, user_content, assistant_content, *, session_id=""):
+        pass
+
+    def get_tool_schemas(self):
+        return []
+
+    def handle_tool_call(self, tool_name, args, **kwargs):
+        return json.dumps({})
+
+    def shutdown(self):
+        pass
+
+
+# ---------------------------------------------------------------------------
+# MemoryManager user_id threading tests
+# ---------------------------------------------------------------------------
+
+
+class TestMemoryManagerUserIdThreading:
+    """Verify user_id reaches providers via initialize_all."""
+
+    def test_user_id_forwarded_to_provider(self):
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        mgr.initialize_all(
+            session_id="sess-123",
+            platform="telegram",
+            user_id="tg_user_42",
+        )
+
+        assert p._init_kwargs.get("user_id") == "tg_user_42"
+        assert p._init_kwargs.get("platform") == "telegram"
+        assert p._init_session_id == "sess-123"
+
+    def test_no_user_id_when_cli(self):
+        """CLI sessions should not have user_id in kwargs."""
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        mgr.initialize_all(
+            session_id="sess-456",
+            platform="cli",
+        )
+
+        assert "user_id" not in p._init_kwargs
+        assert p._init_kwargs.get("platform") == "cli"
+
+    def test_user_id_none_not_forwarded(self):
+        """Explicit None user_id should not appear in kwargs."""
+        mgr = MemoryManager()
+        p = RecordingProvider()
+        mgr.add_provider(p)
+
+        # Simulates what happens when AIAgent passes user_id=None
+        # (the agent code only adds user_id to kwargs when it's truthy)
+        mgr.initialize_all(
+            session_id="sess-789",
+            platform="discord",
+        )
+
+        assert "user_id" not in p._init_kwargs
+
+    def test_multiple_providers_all_receive_user_id(self):
+        from agent.builtin_memory_provider import BuiltinMemoryProvider
+
+        mgr = MemoryManager()
+        # Use builtin + one external (MemoryManager only allows one external)
+        builtin = BuiltinMemoryProvider()
+        ext = RecordingProvider("external")
+        mgr.add_provider(builtin)
+        mgr.add_provider(ext)
+
+        mgr.initialize_all(
+            session_id="sess-multi",
+            platform="slack",
+            user_id="slack_U12345",
+        )
+
+        assert ext._init_kwargs.get("user_id") == "slack_U12345"
+        assert ext._init_kwargs.get("platform") == "slack"
+
+
+# ---------------------------------------------------------------------------
+# Mem0 provider user_id tests
+# ---------------------------------------------------------------------------
+
+
+class TestMem0UserIdScoping:
+    """Verify Mem0 plugin uses gateway user_id when provided."""
+
+    def test_gateway_user_id_overrides_default(self):
+        """When user_id is passed via kwargs, it should override the config default."""
+        from plugins.memory.mem0 import Mem0MemoryProvider
+
+        provider = Mem0MemoryProvider()
+        # Mock _load_config to return a config with default user_id
+        with patch("plugins.memory.mem0._load_config", return_value={
+            "api_key": "test-key",
+            "user_id": "hermes-user",
+            "agent_id": "hermes",
+            "rerank": True,
+        }):
+            provider.initialize(session_id="test-sess", user_id="tg_user_99")
+
+        assert provider._user_id == "tg_user_99"
+
+    def test_no_user_id_falls_back_to_config(self):
+        """Without user_id in kwargs, should use config default."""
+        from plugins.memory.mem0 import Mem0MemoryProvider
+
+        provider = Mem0MemoryProvider()
+        with patch("plugins.memory.mem0._load_config", return_value={
+            "api_key": "test-key",
+            "user_id": "custom-default",
+            "agent_id": "hermes",
+            "rerank": True,
+        }):
+            provider.initialize(session_id="test-sess")
+
+        assert provider._user_id == "custom-default"
+
+    def test_no_user_id_no_config_uses_hermes_user(self):
+        """Without user_id or config override, should default to 'hermes-user'."""
+        from plugins.memory.mem0 import Mem0MemoryProvider
+
+        provider = Mem0MemoryProvider()
+        with patch("plugins.memory.mem0._load_config", return_value={
+            "api_key": "test-key",
+            "agent_id": "hermes",
+            "rerank": True,
+        }):
+            provider.initialize(session_id="test-sess")
+
+        assert provider._user_id == "hermes-user"
+
+    def test_different_users_get_different_ids(self):
+        """Two providers initialized with different user_ids should be scoped differently."""
+        from plugins.memory.mem0 import Mem0MemoryProvider
+
+        p1 = Mem0MemoryProvider()
+        p2 = Mem0MemoryProvider()
+
+        with patch("plugins.memory.mem0._load_config", return_value={
+            "api_key": "test-key",
+            "user_id": "hermes-user",
+            "agent_id": "hermes",
+            "rerank": True,
+        }):
+            p1.initialize(session_id="sess-1", user_id="alice_123")
+            p2.initialize(session_id="sess-2", user_id="bob_456")
+
+        assert p1._user_id == "alice_123"
+        assert p2._user_id == "bob_456"
+        assert p1._user_id != p2._user_id
+
+
+# ---------------------------------------------------------------------------
+# Honcho provider user_id tests
+# ---------------------------------------------------------------------------
+
+
+class TestHonchoUserIdScoping:
+    """Verify Honcho plugin uses gateway user_id for peer_name when provided."""
+
+    def test_gateway_user_id_overrides_peer_name(self):
+        """When user_id is in kwargs, cfg.peer_name should be overridden."""
+        from plugins.memory.honcho import HonchoMemoryProvider
+
+        provider = HonchoMemoryProvider()
+
+        # Create a mock config with a static peer_name
+        mock_cfg = MagicMock()
+        mock_cfg.enabled = True
+        mock_cfg.api_key = "test-key"
+        mock_cfg.base_url = None
+        mock_cfg.peer_name = "static-user"
+        mock_cfg.recall_mode = "tools"  # Use tools mode to defer session init
+
+        with patch(
+            "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
+            return_value=mock_cfg,
+        ):
+            provider.initialize(
+                session_id="test-sess",
+                user_id="discord_user_789",
+                platform="discord",
+            )
+
+        # The config's peer_name should have been overridden with the user_id
+        assert mock_cfg.peer_name == "discord_user_789"
+
+    def test_no_user_id_preserves_config_peer_name(self):
+        """Without user_id, the config peer_name should be preserved."""
+        from plugins.memory.honcho import HonchoMemoryProvider
+
+        provider = HonchoMemoryProvider()
+
+        mock_cfg = MagicMock()
+        mock_cfg.enabled = True
+        mock_cfg.api_key = "test-key"
+        mock_cfg.base_url = None
+        mock_cfg.peer_name = "my-custom-peer"
+        mock_cfg.recall_mode = "tools"
+
+        with patch(
+            "plugins.memory.honcho.client.HonchoClientConfig.from_global_config",
+            return_value=mock_cfg,
+        ):
+            provider.initialize(
+                session_id="test-sess",
+                platform="cli",
+            )
+
+        # peer_name should not have been overridden
+        assert mock_cfg.peer_name == "my-custom-peer"
+
+
+# ---------------------------------------------------------------------------
+# AIAgent user_id propagation test
+# ---------------------------------------------------------------------------
+
+
+class TestAIAgentUserIdPropagation:
+    """Verify AIAgent stores user_id and passes it to memory init kwargs."""
+
+    def test_user_id_stored_on_agent(self):
+        """AIAgent should store user_id as instance attribute."""
+        with patch.dict(os.environ, {"HERMES_HOME": "/tmp/test_hermes"}):
+            from run_agent import AIAgent
+            agent = object.__new__(AIAgent)
+            # Manually set the attribute as __init__ does
+            agent._user_id = "test_user_42"
+            assert agent._user_id == "test_user_42"
+
+    def test_user_id_none_by_default(self):
+        """AIAgent should have None user_id when not provided (CLI mode)."""
+        with patch.dict(os.environ, {"HERMES_HOME": "/tmp/test_hermes"}):
+            from run_agent import AIAgent
+            agent = object.__new__(AIAgent)
+            agent._user_id = None
+            assert agent._user_id is None

From ca0459d109b9d23ce2b3c4c4cb6e8547a7eada3c Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 11:41:26 -0700
Subject: [PATCH 081/154] =?UTF-8?q?refactor:=20remove=2024=20confirmed=20d?=
 =?UTF-8?q?ead=20functions=20=E2=80=94=20432=20lines=20of=20unused=20code?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each function was verified to have exactly 1 reference in the entire
codebase (its own definition). Zero calls, zero imports, zero string
references anywhere including tests.

Removed by category:

Superseded wrappers (replaced by newer implementations):
- agent/anthropic_adapter.py: run_hermes_oauth_login, refresh_hermes_oauth_token
- hermes_cli/callbacks.py: sudo_password_callback (superseded by CLI method)
- hermes_cli/setup.py: _set_model_provider, _sync_model_from_disk
- tools/file_tools.py: get_file_tools (superseded by registry.register)
- tools/cronjob_tools.py: get_cronjob_tool_definitions (same)
- tools/terminal_tool.py: _check_dangerous_command (_check_all_guards used)

Dead private helpers (lost their callers during refactors):
- agent/anthropic_adapter.py: _convert_user_content_part_to_anthropic
- agent/display.py: honcho_session_line, write_tty
- hermes_cli/providers.py: _build_labels (+ dead _labels_cache var)
- hermes_cli/tools_config.py: _prompt_yes_no
- hermes_cli/models.py: _extract_model_ids
- hermes_cli/uninstall.py: log_error
- gateway/platforms/feishu.py: _is_loop_ready
- tools/file_operations.py: _read_image (64-line method)
- tools/process_registry.py: cleanup_expired
- tools/skill_manager_tool.py: check_skill_manage_requirements

Dead class methods (zero callers):
- run_agent.py: _is_anthropic_url (logic duplicated inline at L618)
- run_agent.py: _classify_empty_content_response (68-line method, never wired)
- cli.py: reset_conversation (callers all use new_session directly)
- cli.py: _clear_current_input (added but never wired in)

Other:
- gateway/delivery.py: build_delivery_context_for_tool
- tools/browser_tool.py: get_active_browser_sessions
---
 agent/anthropic_adapter.py  | 84 -------------------------------------
 agent/display.py            | 18 --------
 cli.py                      | 15 -------
 gateway/delivery.py         | 36 +---------------
 gateway/platforms/feishu.py |  4 --
 hermes_cli/callbacks.py     | 41 ------------------
 hermes_cli/models.py        |  4 --
 hermes_cli/providers.py     | 12 ------
 hermes_cli/setup.py         | 22 ----------
 hermes_cli/tools_config.py  | 16 -------
 hermes_cli/uninstall.py     |  4 --
 run_agent.py                | 72 -------------------------------
 tools/browser_tool.py       | 10 -----
 tools/cronjob_tools.py      |  5 ---
 tools/file_operations.py    | 69 ------------------------------
 tools/file_tools.py         |  5 ---
 tools/process_registry.py   |  5 ---
 tools/skill_manager_tool.py |  5 ---
 tools/terminal_tool.py      |  6 ---
 19 files changed, 1 insertion(+), 432 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 4dd3cadc..f4e8dcee 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -706,29 +706,6 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
     }
 
 
-def run_hermes_oauth_login() -> Optional[str]:
-    """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
-
-    Opens a browser to claude.ai for authorization, prompts for the code,
-    exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
-
-    Returns the access token on success, None on failure.
-    """
-    result = run_hermes_oauth_login_pure()
-    if not result:
-        return None
-
-    access_token = result["access_token"]
-    refresh_token = result["refresh_token"]
-    expires_at_ms = result["expires_at_ms"]
-
-    _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
-    _write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
-
-    print("Authentication successful!")
-    return access_token
-
-
 def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
     """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
     data = {
@@ -756,38 +733,6 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
     return None
 
 
-def refresh_hermes_oauth_token() -> Optional[str]:
-    """Refresh the Hermes-managed OAuth token using the stored refresh token.
-
-    Returns the new access token, or None if refresh fails.
-    """
-    creds = read_hermes_oauth_credentials()
-    if not creds or not creds.get("refreshToken"):
-        return None
-
-    try:
-        refreshed = refresh_anthropic_oauth_pure(
-            creds["refreshToken"],
-            use_json=True,
-        )
-        _save_hermes_oauth_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
-        )
-        _write_claude_code_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
-        )
-        logger.debug("Successfully refreshed Hermes OAuth token")
-        return refreshed["access_token"]
-    except Exception as e:
-        logger.debug("Failed to refresh Hermes OAuth token: %s", e)
-
-    return None
-
-
 # ---------------------------------------------------------------------------
 # Message / tool / response format conversion
 # ---------------------------------------------------------------------------
@@ -857,35 +802,6 @@ def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Di
     return None
 
 
-def _convert_user_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
-    if isinstance(part, dict):
-        ptype = part.get("type")
-        if ptype == "text":
-            block = {"type": "text", "text": part.get("text", "")}
-            if isinstance(part.get("cache_control"), dict):
-                block["cache_control"] = dict(part["cache_control"])
-            return block
-        if ptype == "image_url":
-            return _convert_openai_image_part_to_anthropic(part)
-        if ptype == "image" and part.get("source"):
-            return dict(part)
-        if ptype == "image" and part.get("data"):
-            media_type = part.get("mimeType") or part.get("media_type") or "image/png"
-            return {
-                "type": "image",
-                "source": {
-                    "type": "base64",
-                    "media_type": media_type,
-                    "data": part.get("data", ""),
-                },
-            }
-        if ptype == "tool_result":
-            return dict(part)
-    elif part is not None:
-        return {"type": "text", "text": str(part)}
-    return None
-
-
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
     """Convert OpenAI tool definitions to Anthropic format."""
     if not tools:
diff --git a/agent/display.py b/agent/display.py
index 5eac70a4..7c7707eb 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -986,24 +986,6 @@ def _osc8_link(url: str, text: str) -> str:
     return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
 
 
-def honcho_session_line(workspace: str, session_name: str) -> str:
-    """One-line session indicator: `Honcho session: <clickable name>`."""
-    url = honcho_session_url(workspace, session_name)
-    linked_name = _osc8_link(url, f"{_SKY_BLUE}{session_name}{_ANSI_RESET}")
-    return f"{_DIM}Honcho session:{_ANSI_RESET} {linked_name}"
-
-
-def write_tty(text: str) -> None:
-    """Write directly to /dev/tty, bypassing stdout capture."""
-    try:
-        fd = os.open("/dev/tty", os.O_WRONLY)
-        os.write(fd, text.encode("utf-8"))
-        os.close(fd)
-    except OSError:
-        sys.stdout.write(text)
-        sys.stdout.flush()
-
-
 # =========================================================================
 # Context pressure display (CLI user-facing warnings)
 # =========================================================================
diff --git a/cli.py b/cli.py
index 69a9e8e9..2dce0827 100644
--- a/cli.py
+++ b/cli.py
@@ -3536,13 +3536,6 @@ class HermesCLI:
         _cprint(f"  Original session: {parent_session_id}")
         _cprint(f"  Branch session:   {new_session_id}")
 
-    def reset_conversation(self):
-        """Reset the conversation by starting a new session."""
-        # Shut down memory provider before resetting — actual session boundary
-        if hasattr(self, 'agent') and self.agent:
-            self.agent.shutdown_memory_provider(self.conversation_history)
-        self.new_session()
-    
     def save_conversation(self):
         """Save the current conversation to a file."""
         if not self.conversation_history:
@@ -6290,14 +6283,6 @@ class HermesCLI:
             except Exception:
                 pass
 
-    def _clear_current_input(self) -> None:
-        if getattr(self, "_app", None):
-            try:
-                self._app.current_buffer.text = ""
-            except Exception:
-                pass
-
-
     def chat(self, message, images: list = None) -> Optional[str]:
         """
         Send a message to the agent and get a response.
diff --git a/gateway/delivery.py b/gateway/delivery.py
index fff0aead..294c9b81 100644
--- a/gateway/delivery.py
+++ b/gateway/delivery.py
@@ -314,38 +314,4 @@ def parse_deliver_spec(
     return deliver
 
 
-def build_delivery_context_for_tool(
-    config: GatewayConfig,
-    origin: Optional[SessionSource] = None
-) -> Dict[str, Any]:
-    """
-    Build context for the unified cronjob tool to understand delivery options.
-    
-    This is passed to the tool so it can validate and explain delivery targets.
-    """
-    connected = config.get_connected_platforms()
-    
-    options = {
-        "origin": {
-            "description": "Back to where this job was created",
-            "available": origin is not None,
-        },
-        "local": {
-            "description": "Save to local files only",
-            "available": True,
-        }
-    }
-    
-    for platform in connected:
-        home = config.get_home_channel(platform)
-        options[platform.value] = {
-            "description": f"{platform.value.title()} home channel",
-            "available": True,
-            "home_channel": home.to_dict() if home else None,
-        }
-    
-    return {
-        "origin": origin.to_dict() if origin else None,
-        "options": options,
-        "always_log_local": config.always_log_local,
-    }
+
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 8ba41e8e..fce22a97 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -387,10 +387,6 @@ def _coerce_required_int(value: Any, default: int, min_value: int = 0) -> int:
     return default if parsed is None else parsed
 
 
-def _is_loop_ready(loop: Optional[asyncio.AbstractEventLoop]) -> bool:
-    return loop is not None and not bool(getattr(loop, "is_closed", lambda: False)())
-
-
 # ---------------------------------------------------------------------------
 # Post payload builders and parsers
 # ---------------------------------------------------------------------------
diff --git a/hermes_cli/callbacks.py b/hermes_cli/callbacks.py
index ada413df..724e6e4c 100644
--- a/hermes_cli/callbacks.py
+++ b/hermes_cli/callbacks.py
@@ -63,47 +63,6 @@ def clarify_callback(cli, question, choices):
     )
 
 
-def sudo_password_callback(cli) -> str:
-    """Prompt for sudo password through the TUI.
-
-    Sets up a password input area and blocks until the user responds.
-    """
-    timeout = 45
-    response_queue = queue.Queue()
-
-    cli._sudo_state = {"response_queue": response_queue}
-    cli._sudo_deadline = _time.monotonic() + timeout
-
-    if hasattr(cli, "_app") and cli._app:
-        cli._app.invalidate()
-
-    while True:
-        try:
-            result = response_queue.get(timeout=1)
-            cli._sudo_state = None
-            cli._sudo_deadline = 0
-            if hasattr(cli, "_app") and cli._app:
-                cli._app.invalidate()
-            if result:
-                cprint(f"\n{_DIM}  ✓ Password received (cached for session){_RST}")
-            else:
-                cprint(f"\n{_DIM}  ⏭ Skipped{_RST}")
-            return result
-        except queue.Empty:
-            remaining = cli._sudo_deadline - _time.monotonic()
-            if remaining <= 0:
-                break
-            if hasattr(cli, "_app") and cli._app:
-                cli._app.invalidate()
-
-    cli._sudo_state = None
-    cli._sudo_deadline = 0
-    if hasattr(cli, "_app") and cli._app:
-        cli._app.invalidate()
-    cprint(f"\n{_DIM}  ⏱ Timeout — continuing without sudo{_RST}")
-    return ""
-
-
 def prompt_for_secret(cli, var_name: str, prompt: str, metadata=None) -> dict:
     """Prompt for a secret value through the TUI (e.g. API keys for skills).
 
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 857bd2ed..4b37bc9e 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -1131,10 +1131,6 @@ def _payload_items(payload: Any) -> list[dict[str, Any]]:
     return []
 
 
-def _extract_model_ids(payload: Any) -> list[str]:
-    return [item.get("id", "") for item in _payload_items(payload) if item.get("id")]
-
-
 def copilot_default_headers() -> dict[str, str]:
     """Standard headers for Copilot API requests.
 
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index 0f238706..5cd02380 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -344,18 +344,6 @@ def get_label(provider_id: str) -> str:
     return canonical
 
 
-# Build LABELS dict for backward compat
-def _build_labels() -> Dict[str, str]:
-    """Build labels dict from overlays + overrides. Lazy, cached."""
-    labels: Dict[str, str] = {}
-    for pid in HERMES_OVERLAYS:
-        labels[pid] = get_label(pid)
-    labels.update(_LABEL_OVERRIDES)
-    return labels
-
-# Lazy-built on first access
-_labels_cache: Optional[Dict[str, str]] = None
-
 # For direct import compat, expose as module-level dict
 # Built on demand by get_label() calls
 LABELS: Dict[str, str] = {
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index d7786d1d..ea2e57a9 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -42,18 +42,6 @@ def _model_config_dict(config: Dict[str, Any]) -> Dict[str, Any]:
     return {}
 
 
-def _set_model_provider(
-    config: Dict[str, Any], provider_id: str, base_url: str = ""
-) -> None:
-    model_cfg = _model_config_dict(config)
-    model_cfg["provider"] = provider_id
-    if base_url:
-        model_cfg["base_url"] = base_url.rstrip("/")
-    else:
-        model_cfg.pop("base_url", None)
-    config["model"] = model_cfg
-
-
 def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
     if not model_name:
         return
@@ -326,16 +314,6 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
         config["model"] = model_cfg
 
 
-def _sync_model_from_disk(config: Dict[str, Any]) -> None:
-    disk_model = load_config().get("model")
-    if isinstance(disk_model, dict):
-        model_cfg = _model_config_dict(config)
-        model_cfg.update(disk_model)
-        config["model"] = model_cfg
-    elif isinstance(disk_model, str) and disk_model.strip():
-        _set_default_model(config, disk_model.strip())
-
-
 # Import config helpers
 from hermes_cli.config import (
     get_hermes_home,
diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index 804a7a4f..cddc664b 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -61,22 +61,6 @@ def _prompt(question: str, default: str = None, password: bool = False) -> str:
         print()
         return default or ""
 
-def _prompt_yes_no(question: str, default: bool = True) -> bool:
-    default_str = "Y/n" if default else "y/N"
-    while True:
-        try:
-            value = input(color(f"{question} [{default_str}]: ", Colors.YELLOW)).strip().lower()
-        except (KeyboardInterrupt, EOFError):
-            print()
-            return default
-        if not value:
-            return default
-        if value in ('y', 'yes'):
-            return True
-        if value in ('n', 'no'):
-            return False
-
-
 # ─── Toolset Registry ─────────────────────────────────────────────────────────
 
 # Toolsets shown in the configurator, grouped for display.
diff --git a/hermes_cli/uninstall.py b/hermes_cli/uninstall.py
index 7e35b867..fa49e3c9 100644
--- a/hermes_cli/uninstall.py
+++ b/hermes_cli/uninstall.py
@@ -23,10 +23,6 @@ def log_success(msg: str):
 def log_warn(msg: str):
     print(f"{color('⚠', Colors.YELLOW)} {msg}")
 
-def log_error(msg: str):
-    print(f"{color('✗', Colors.RED)} {msg}")
-
-
 def get_project_root() -> Path:
     """Get the project installation directory."""
     return Path(__file__).parent.parent.resolve()
diff --git a/run_agent.py b/run_agent.py
index 1398a3d1..4d91c5cd 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1505,10 +1505,6 @@ class AIAgent:
         """Return True when the base URL targets OpenRouter."""
         return "openrouter" in self._base_url_lower
 
-    def _is_anthropic_url(self) -> bool:
-        """Return True when the base URL targets Anthropic (native or /anthropic proxy path)."""
-        return "api.anthropic.com" in self._base_url_lower or self._base_url_lower.rstrip("/").endswith("/anthropic")
-
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
         
@@ -1694,74 +1690,6 @@ class AIAgent:
         
         return None
 
-    def _classify_empty_content_response(
-        self,
-        assistant_message,
-        *,
-        finish_reason: Optional[str],
-        approx_tokens: int,
-        api_messages: List[Dict[str, Any]],
-        conversation_history: Optional[List[Dict[str, Any]]],
-    ) -> Dict[str, Any]:
-        """Classify think-only/empty responses so we can retry, compress, or salvage.
-
-        We intentionally do NOT short-circuit all structured-reasoning responses.
-        Prior discussion/PR history shows some models recover on retry. Instead we:
-        - compress immediately when the pattern looks like implicit context pressure
-        - salvage reasoning early when the same reasoning-only payload repeats
-        - otherwise preserve the normal retry path
-        """
-        reasoning_text = self._extract_reasoning(assistant_message)
-        has_structured_reasoning = bool(
-            getattr(assistant_message, "reasoning", None)
-            or getattr(assistant_message, "reasoning_content", None)
-            or getattr(assistant_message, "reasoning_details", None)
-        )
-        content = getattr(assistant_message, "content", None) or ""
-        stripped_content = self._strip_think_blocks(content).strip()
-        signature = (
-            content,
-            reasoning_text or "",
-            bool(has_structured_reasoning),
-            finish_reason or "",
-        )
-        repeated_signature = signature == getattr(self, "_last_empty_content_signature", None)
-
-        compressor = getattr(self, "context_compressor", None)
-        ctx_len = getattr(compressor, "context_length", 0) or 0
-        threshold_tokens = getattr(compressor, "threshold_tokens", 0) or 0
-        is_large_session = bool(
-            (ctx_len and approx_tokens >= max(int(ctx_len * 0.4), threshold_tokens))
-            or len(api_messages) > 80
-        )
-        is_local_custom = is_local_endpoint(getattr(self, "base_url", "") or "")
-        is_resumed = bool(conversation_history)
-        context_pressure_signals = any(
-            [
-                finish_reason == "length",
-                getattr(compressor, "_context_probed", False),
-                is_large_session,
-                is_resumed,
-            ]
-        )
-        should_compress = bool(
-            self.compression_enabled
-            and is_local_custom
-            and context_pressure_signals
-            and not stripped_content
-        )
-
-        self._last_empty_content_signature = signature
-        return {
-            "reasoning_text": reasoning_text,
-            "has_structured_reasoning": has_structured_reasoning,
-            "repeated_signature": repeated_signature,
-            "should_compress": should_compress,
-            "is_local_custom": is_local_custom,
-            "is_large_session": is_large_session,
-            "is_resumed": is_resumed,
-        }
-    
     def _cleanup_task_resources(self, task_id: str) -> None:
         """Clean up VM and browser resources for a given task."""
         try:
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 8ad3002b..317c1fb1 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -2024,16 +2024,6 @@ def cleanup_all_browsers() -> None:
         cleanup_browser(task_id)
 
 
-def get_active_browser_sessions() -> Dict[str, Dict[str, str]]:
-    """
-    Get information about active browser sessions.
-    
-    Returns:
-        Dict mapping task_id to session info (session_name, bb_session_id, cdp_url)
-    """
-    with _cleanup_lock:
-        return _active_sessions.copy()
-
 
 # ============================================================================
 # Requirements Check
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index caedaca7..0578dca5 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -501,11 +501,6 @@ def check_cronjob_requirements() -> bool:
     )
 
 
-def get_cronjob_tool_definitions():
-    """Return tool definitions for cronjob management."""
-    return [CRONJOB_SCHEMA]
-
-
 # --- Registry ---
 from tools.registry import registry
 
diff --git a/tools/file_operations.py b/tools/file_operations.py
index 8305eb9c..052f77a8 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -555,75 +555,6 @@ class ShellFileOperations(FileOperations):
             hint=hint
         )
     
-    # Images larger than this are too expensive to inline as base64 in the
-    # conversation context. Return metadata only and suggest vision_analyze.
-    MAX_IMAGE_BYTES = 512 * 1024  # 512 KB
-
-    def _read_image(self, path: str) -> ReadResult:
-        """Read an image file, returning base64 content."""
-        # Get file size (wc -c is POSIX, works on Linux + macOS)
-        stat_cmd = f"wc -c < {self._escape_shell_arg(path)} 2>/dev/null"
-        stat_result = self._exec(stat_cmd)
-        try:
-            file_size = int(stat_result.stdout.strip())
-        except ValueError:
-            file_size = 0
-        
-        if file_size > self.MAX_IMAGE_BYTES:
-            return ReadResult(
-                is_image=True,
-                is_binary=True,
-                file_size=file_size,
-                hint=(
-                    f"Image is too large to inline ({file_size:,} bytes). "
-                    "Use vision_analyze to inspect the image, or reference it by path."
-                ),
-            )
-        
-        # Get base64 content — pipe through tr to strip newlines portably.
-        # GNU base64 supports -w 0 but macOS base64 does not; both wrap by
-        # default, so stripping with tr is portable across all backends.
-        b64_cmd = f"base64 {self._escape_shell_arg(path)} 2>/dev/null | tr -d '\\n'"
-        b64_result = self._exec(b64_cmd, timeout=30)
-        
-        if b64_result.exit_code != 0:
-            return ReadResult(
-                is_image=True,
-                is_binary=True,
-                file_size=file_size,
-                error=f"Failed to read image: {b64_result.stdout}"
-            )
-        
-        # Try to get dimensions (requires ImageMagick)
-        dimensions = None
-        if self._has_command('identify'):
-            dim_cmd = f"identify -format '%wx%h' {self._escape_shell_arg(path)} 2>/dev/null"
-            dim_result = self._exec(dim_cmd)
-            if dim_result.exit_code == 0:
-                dimensions = dim_result.stdout.strip()
-        
-        # Determine MIME type from extension
-        ext = os.path.splitext(path)[1].lower()
-        mime_types = {
-            '.png': 'image/png',
-            '.jpg': 'image/jpeg',
-            '.jpeg': 'image/jpeg',
-            '.gif': 'image/gif',
-            '.webp': 'image/webp',
-            '.bmp': 'image/bmp',
-            '.ico': 'image/x-icon',
-        }
-        mime_type = mime_types.get(ext, 'application/octet-stream')
-        
-        return ReadResult(
-            is_image=True,
-            is_binary=True,
-            file_size=file_size,
-            base64_content=b64_result.stdout,
-            mime_type=mime_type,
-            dimensions=dimensions
-        )
-    
     def _suggest_similar_files(self, path: str) -> ReadResult:
         """Suggest similar files when the requested file is not found."""
         # Get directory and filename
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 45add116..626cae9a 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -713,11 +713,6 @@ FILE_TOOLS = [
 ]
 
 
-def get_file_tools():
-    """Get the list of file tool definitions."""
-    return FILE_TOOLS
-
-
 # ---------------------------------------------------------------------------
 # Schemas + Registry
 # ---------------------------------------------------------------------------
diff --git a/tools/process_registry.py b/tools/process_registry.py
index f5ac9543..86868f72 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -714,11 +714,6 @@ class ProcessRegistry:
             oldest_id = min(self._finished, key=lambda sid: self._finished[sid].started_at)
             del self._finished[oldest_id]
 
-    def cleanup_expired(self):
-        """Public method to prune expired finished sessions."""
-        with self._lock:
-            self._prune_if_needed()
-
     # ----- Checkpoint (crash recovery) -----
 
     def _write_checkpoint(self):
diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
index b8d8d622..1a7de513 100644
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -92,11 +92,6 @@ VALID_NAME_RE = re.compile(r'^[a-z0-9][a-z0-9._-]*$')
 ALLOWED_SUBDIRS = {"references", "templates", "scripts", "assets"}
 
 
-def check_skill_manage_requirements() -> bool:
-    """Skill management has no external requirements -- always available."""
-    return True
-
-
 # =============================================================================
 # Validation helpers
 # =============================================================================
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 76946f80..32f1bd3b 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -142,12 +142,6 @@ from tools.approval import (
 )
 
 
-def _check_dangerous_command(command: str, env_type: str) -> dict:
-    """Delegate to the consolidated approval module, passing the CLI callback."""
-    return _check_dangerous_command_impl(command, env_type,
-                                         approval_callback=_approval_callback)
-
-
 def _check_all_guards(command: str, env_type: str) -> dict:
     """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback."""
     return _check_all_guards_impl(command, env_type,

From 0f3895ba294f3d55b8407a28409576dfd66132ae Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Wed, 8 Apr 2026 00:11:26 +0530
Subject: [PATCH 082/154] fix(cron): deliver MEDIA files as native platform
 attachments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cron delivery path sent raw 'MEDIA:/path/to/file' text instead
of uploading the file as a native attachment.  The standalone path
(via _send_to_platform) already extracted MEDIA tags and forwarded
them as media_files, but the live adapter path passed the unprocessed
delivery_content directly to adapter.send().

Two bugs fixed:
1. Live adapter path now sends cleaned text (MEDIA tags stripped)
   instead of raw content — prevents 'MEDIA:/path' from appearing
   as literal text in Discord/Telegram/etc.
2. Live adapter path now sends each extracted media file via the
   adapter's native method (send_voice for audio, send_image_file
   for images, send_video for video, send_document as fallback) —
   files are uploaded as proper platform attachments.

The file-type routing mirrors BasePlatformAdapter._process_message_background
to ensure consistent behavior between normal gateway responses and
cron-delivered responses.

Adds 2 tests:
- test_live_adapter_sends_media_as_attachments: verifies Discord
  adapter receives send_voice call for .mp3 file
- test_live_adapter_sends_cleaned_text_not_raw: verifies MEDIA tag
  stripped from text sent via live adapter
---
 cron/scheduler.py            |  70 +++++++++++---
 tests/cron/test_scheduler.py | 182 +++++++++++++++++++++++++++++++++++
 2 files changed, 241 insertions(+), 11 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index f694f440..8d71248b 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -158,6 +158,44 @@ def _resolve_delivery_target(job: dict) -> Optional[dict]:
     }
 
 
+# Media extension sets — keep in sync with gateway/platforms/base.py:_process_message_background
+_AUDIO_EXTS = frozenset({'.ogg', '.opus', '.mp3', '.wav', '.m4a'})
+_VIDEO_EXTS = frozenset({'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'})
+_IMAGE_EXTS = frozenset({'.jpg', '.jpeg', '.png', '.webp', '.gif'})
+
+
+def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: dict | None, loop, job: dict) -> None:
+    """Send extracted MEDIA files as native platform attachments via a live adapter.
+
+    Routes each file to the appropriate adapter method (send_voice, send_image_file,
+    send_video, send_document) based on file extension — mirroring the routing logic
+    in ``BasePlatformAdapter._process_message_background``.
+    """
+    from pathlib import Path
+
+    for media_path, _is_voice in media_files:
+        try:
+            ext = Path(media_path).suffix.lower()
+            if ext in _AUDIO_EXTS:
+                coro = adapter.send_voice(chat_id=chat_id, audio_path=media_path, metadata=metadata)
+            elif ext in _VIDEO_EXTS:
+                coro = adapter.send_video(chat_id=chat_id, video_path=media_path, metadata=metadata)
+            elif ext in _IMAGE_EXTS:
+                coro = adapter.send_image_file(chat_id=chat_id, image_path=media_path, metadata=metadata)
+            else:
+                coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
+
+            future = asyncio.run_coroutine_threadsafe(coro, loop)
+            result = future.result(timeout=30)
+            if result and not getattr(result, "success", True):
+                logger.warning(
+                    "Job '%s': media send failed for %s: %s",
+                    job.get("id", "?"), media_path, getattr(result, "error", "unknown"),
+                )
+        except Exception as e:
+            logger.warning("Job '%s': failed to send media %s: %s", job.get("id", "?"), media_path, e)
+
+
 def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
     """
     Deliver job output to the configured target (origin chat, specific platform, etc.).
@@ -246,18 +284,28 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
     if runtime_adapter is not None and loop is not None and getattr(loop, "is_running", lambda: False)():
         send_metadata = {"thread_id": thread_id} if thread_id else None
         try:
-            future = asyncio.run_coroutine_threadsafe(
-                runtime_adapter.send(chat_id, delivery_content, metadata=send_metadata),
-                loop,
-            )
-            send_result = future.result(timeout=60)
-            if send_result and not getattr(send_result, "success", True):
-                err = getattr(send_result, "error", "unknown")
-                logger.warning(
-                    "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
-                    job["id"], platform_name, chat_id, err,
+            # Send cleaned text (MEDIA tags stripped) — not the raw content
+            text_to_send = cleaned_delivery_content.strip()
+            adapter_ok = True
+            if text_to_send:
+                future = asyncio.run_coroutine_threadsafe(
+                    runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
+                    loop,
                 )
-            else:
+                send_result = future.result(timeout=60)
+                if send_result and not getattr(send_result, "success", True):
+                    err = getattr(send_result, "error", "unknown")
+                    logger.warning(
+                        "Job '%s': live adapter send to %s:%s failed (%s), falling back to standalone",
+                        job["id"], platform_name, chat_id, err,
+                    )
+                    adapter_ok = False  # fall through to standalone path
+
+            # Send extracted media files as native attachments via the live adapter
+            if adapter_ok and media_files:
+                _send_media_via_adapter(runtime_adapter, chat_id, media_files, send_metadata, loop, job)
+
+            if adapter_ok:
                 logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
                 return
         except Exception as e:
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index c1282897..baa6be9d 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -277,6 +277,188 @@ class TestDeliverResultWrapping:
         # Media files should be forwarded separately
         assert kwargs["media_files"] == [("/tmp/test-voice.ogg", False)]
 
+    def test_live_adapter_sends_media_as_attachments(self):
+        """When a live adapter is available, MEDIA files should be sent as native
+        platform attachments (e.g., Discord voice, Telegram audio) rather than
+        as literal 'MEDIA:/path' text."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        adapter = AsyncMock()
+        adapter.send.return_value = MagicMock(success=True)
+        adapter.send_voice.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.DISCORD: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        # run_coroutine_threadsafe returns concurrent.futures.Future (has timeout kwarg)
+        def fake_run_coro(coro, _loop):
+            future = Future()
+            future.set_result(MagicMock(success=True))
+            coro.close()
+            return future
+
+        job = {
+            "id": "tts-job",
+            "deliver": "origin",
+            "origin": {"platform": "discord", "chat_id": "9876"},
+        }
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            _deliver_result(
+                job,
+                "Here is TTS\nMEDIA:/tmp/cron-voice.mp3",
+                adapters={Platform.DISCORD: adapter},
+                loop=loop,
+            )
+
+        # Text should be sent without the MEDIA tag
+        adapter.send.assert_called_once()
+        text_sent = adapter.send.call_args[0][1]
+        assert "MEDIA:" not in text_sent
+        assert "Here is TTS" in text_sent
+
+        # Audio file should be sent as a voice attachment
+        adapter.send_voice.assert_called_once()
+        voice_call = adapter.send_voice.call_args
+        assert voice_call[1]["audio_path"] == "/tmp/cron-voice.mp3"
+
+    def test_live_adapter_routes_image_to_send_image_file(self):
+        """Image MEDIA files should be routed to send_image_file, not send_voice."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        adapter = AsyncMock()
+        adapter.send.return_value = MagicMock(success=True)
+        adapter.send_image_file.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.DISCORD: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        def fake_run_coro(coro, _loop):
+            future = Future()
+            future.set_result(MagicMock(success=True))
+            coro.close()
+            return future
+
+        job = {
+            "id": "img-job",
+            "deliver": "origin",
+            "origin": {"platform": "discord", "chat_id": "1234"},
+        }
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            _deliver_result(
+                job,
+                "Chart attached\nMEDIA:/tmp/chart.png",
+                adapters={Platform.DISCORD: adapter},
+                loop=loop,
+            )
+
+        adapter.send_image_file.assert_called_once()
+        assert adapter.send_image_file.call_args[1]["image_path"] == "/tmp/chart.png"
+        adapter.send_voice.assert_not_called()
+
+    def test_live_adapter_media_only_no_text(self):
+        """When content is ONLY a MEDIA tag with no text, media should still be sent."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        adapter = AsyncMock()
+        adapter.send_voice.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        def fake_run_coro(coro, _loop):
+            future = Future()
+            future.set_result(MagicMock(success=True))
+            coro.close()
+            return future
+
+        job = {
+            "id": "voice-only",
+            "deliver": "origin",
+            "origin": {"platform": "telegram", "chat_id": "999"},
+        }
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            _deliver_result(
+                job,
+                "MEDIA:/tmp/voice.ogg",
+                adapters={Platform.TELEGRAM: adapter},
+                loop=loop,
+            )
+
+        # Text send should NOT be called (no text after stripping MEDIA tag)
+        adapter.send.assert_not_called()
+        # Audio should still be delivered
+        adapter.send_voice.assert_called_once()
+
+    def test_live_adapter_sends_cleaned_text_not_raw(self):
+        """The live adapter path must send cleaned text (MEDIA tags stripped),
+        not the raw delivery_content with embedded MEDIA: tags."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        adapter = AsyncMock()
+        adapter.send.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        def fake_run_coro(coro, _loop):
+            future = Future()
+            future.set_result(MagicMock(success=True))
+            coro.close()
+            return future
+
+        job = {
+            "id": "img-job",
+            "deliver": "origin",
+            "origin": {"platform": "telegram", "chat_id": "555"},
+        }
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            _deliver_result(
+                job,
+                "Report\nMEDIA:/tmp/chart.png",
+                adapters={Platform.TELEGRAM: adapter},
+                loop=loop,
+            )
+
+        text_sent = adapter.send.call_args[0][1]
+        assert "MEDIA:" not in text_sent
+        assert "Report" in text_sent
+
     def test_no_mirror_to_session_call(self):
         """Cron deliveries should NOT mirror into the gateway session."""
         from gateway.config import Platform

From c040b0e4ae1d2b241898402bfeb95cc1c8cc8af5 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 12:32:23 -0700
Subject: [PATCH 083/154] =?UTF-8?q?test:=20add=20unit=20tests=20for=20medi?=
 =?UTF-8?q?a=20helper=20=E2=80=94=20video,=20document,=20multi-file,=20fai?=
 =?UTF-8?q?lure=20isolation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adapted from PR #5679 (0xbyt4) to cover edge cases not in the integration tests:
video routing, unknown extension fallback to send_document, multi-file delivery,
and single-failure isolation.
---
 tests/cron/test_scheduler.py | 56 +++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index baa6be9d..4a15fa22 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -7,7 +7,7 @@ from unittest.mock import AsyncMock, patch, MagicMock
 
 import pytest
 
-from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, run_job, SILENT_MARKER, _build_job_prompt
+from cron.scheduler import _resolve_origin, _resolve_delivery_target, _deliver_result, _send_media_via_adapter, run_job, SILENT_MARKER, _build_job_prompt
 
 
 class TestResolveOrigin:
@@ -1044,3 +1044,57 @@ class TestTickAdvanceBeforeRun:
         adv_mock.assert_called_once_with("test-advance")
         # advance must happen before run
         assert call_order == [("advance", "test-advance"), ("run", "test-advance")]
+
+
+class TestSendMediaViaAdapter:
+    """Unit tests for _send_media_via_adapter — routes files to typed adapter methods."""
+
+    @staticmethod
+    def _run_with_loop(adapter, chat_id, media_files, metadata, job):
+        """Helper: run _send_media_via_adapter with a real running event loop."""
+        import asyncio
+        import threading
+
+        loop = asyncio.new_event_loop()
+        t = threading.Thread(target=loop.run_forever, daemon=True)
+        t.start()
+        try:
+            _send_media_via_adapter(adapter, chat_id, media_files, metadata, loop, job)
+        finally:
+            loop.call_soon_threadsafe(loop.stop)
+            t.join(timeout=5)
+            loop.close()
+
+    def test_video_dispatched_to_send_video(self):
+        adapter = MagicMock()
+        adapter.send_video = AsyncMock()
+        media_files = [("/tmp/clip.mp4", False)]
+        self._run_with_loop(adapter, "123", media_files, None, {"id": "j1"})
+        adapter.send_video.assert_called_once()
+        assert adapter.send_video.call_args[1]["video_path"] == "/tmp/clip.mp4"
+
+    def test_unknown_ext_dispatched_to_send_document(self):
+        adapter = MagicMock()
+        adapter.send_document = AsyncMock()
+        media_files = [("/tmp/report.pdf", False)]
+        self._run_with_loop(adapter, "123", media_files, None, {"id": "j2"})
+        adapter.send_document.assert_called_once()
+        assert adapter.send_document.call_args[1]["file_path"] == "/tmp/report.pdf"
+
+    def test_multiple_media_files_all_delivered(self):
+        adapter = MagicMock()
+        adapter.send_voice = AsyncMock()
+        adapter.send_image_file = AsyncMock()
+        media_files = [("/tmp/voice.mp3", False), ("/tmp/photo.jpg", False)]
+        self._run_with_loop(adapter, "123", media_files, None, {"id": "j3"})
+        adapter.send_voice.assert_called_once()
+        adapter.send_image_file.assert_called_once()
+
+    def test_single_failure_does_not_block_others(self):
+        adapter = MagicMock()
+        adapter.send_voice = AsyncMock(side_effect=RuntimeError("network error"))
+        adapter.send_image_file = AsyncMock()
+        media_files = [("/tmp/voice.ogg", False), ("/tmp/photo.png", False)]
+        self._run_with_loop(adapter, "123", media_files, None, {"id": "j4"})
+        adapter.send_voice.assert_called_once()
+        adapter.send_image_file.assert_called_once()

From f4528c885b31ab14b66bf1403da81fd893289ff4 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Tue, 7 Apr 2026 23:41:11 +0530
Subject: [PATCH 084/154] feat(clipboard): add native Windows image paste
 support

Add win32 platform branch to clipboard.py so Ctrl+V image paste
works on native Windows (PowerShell / Windows Terminal), not just
WSL2.

Uses the same .NET System.Windows.Forms.Clipboard approach as the
WSL path but calls PowerShell directly instead of powershell.exe
(the WSL cross-call path).  Tries 'powershell' first (Windows
PowerShell 5.1, always available), then 'pwsh' (PowerShell 7+).

PowerShell executable is discovered once and cached for the process
lifetime.

Includes 14 new tests covering:
- Platform dispatch (save_clipboard_image + has_clipboard_image)
- Image detection via PowerShell .NET check
- Base64 PNG extraction and decode
- Edge cases: no PowerShell, empty output, invalid base64, timeout
---
 hermes_cli/clipboard.py       | 107 ++++++++++++++++++++++++++++++++--
 tests/tools/test_clipboard.py | 106 +++++++++++++++++++++++++++++++++
 2 files changed, 209 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/clipboard.py b/hermes_cli/clipboard.py
index 4a56fd0f..d968de3b 100644
--- a/hermes_cli/clipboard.py
+++ b/hermes_cli/clipboard.py
@@ -1,4 +1,4 @@
-"""Clipboard image extraction for macOS, Linux, and WSL2.
+"""Clipboard image extraction for macOS, Windows, Linux, and WSL2.
 
 Provides a single function `save_clipboard_image(dest)` that checks the
 system clipboard for image data, saves it to *dest* as PNG, and returns
@@ -6,9 +6,10 @@ True on success.  No external Python dependencies — uses only OS-level
 CLI tools that ship with the platform (or are commonly installed).
 
 Platform support:
-  macOS  — osascript (always available), pngpaste (if installed)
-  WSL2   — powershell.exe via .NET System.Windows.Forms.Clipboard
-  Linux  — wl-paste (Wayland), xclip (X11)
+  macOS   — osascript (always available), pngpaste (if installed)
+  Windows — PowerShell via .NET System.Windows.Forms.Clipboard
+  WSL2    — powershell.exe via .NET System.Windows.Forms.Clipboard
+  Linux   — wl-paste (Wayland), xclip (X11)
 """
 
 import base64
@@ -32,6 +33,8 @@ def save_clipboard_image(dest: Path) -> bool:
     dest.parent.mkdir(parents=True, exist_ok=True)
     if sys.platform == "darwin":
         return _macos_save(dest)
+    if sys.platform == "win32":
+        return _windows_save(dest)
     return _linux_save(dest)
 
 
@@ -42,6 +45,8 @@ def has_clipboard_image() -> bool:
     """
     if sys.platform == "darwin":
         return _macos_has_image()
+    if sys.platform == "win32":
+        return _windows_has_image()
     if _is_wsl():
         return _wsl_has_image()
     if os.environ.get("WAYLAND_DISPLAY"):
@@ -112,6 +117,100 @@ def _macos_osascript(dest: Path) -> bool:
     return False
 
 
+# ── Native Windows ────────────────────────────────────────────────────────
+
+# PowerShell scripts for native Windows.
+# Same .NET approach as the WSL path but called directly (not via powershell.exe
+# cross-call).  ``powershell`` resolves to Windows PowerShell 5.1 (always present);
+# ``pwsh`` would be PowerShell 7+ (optional).  We try ``powershell`` first.
+_WIN_PS_CHECK = (
+    "Add-Type -AssemblyName System.Windows.Forms;"
+    "[System.Windows.Forms.Clipboard]::ContainsImage()"
+)
+
+_WIN_PS_EXTRACT = (
+    "Add-Type -AssemblyName System.Windows.Forms;"
+    "Add-Type -AssemblyName System.Drawing;"
+    "$img = [System.Windows.Forms.Clipboard]::GetImage();"
+    "if ($null -eq $img) { exit 1 }"
+    "$ms = New-Object System.IO.MemoryStream;"
+    "$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
+    "[System.Convert]::ToBase64String($ms.ToArray())"
+)
+
+
+def _find_powershell() -> str | None:
+    """Return the first available PowerShell executable, or None."""
+    for name in ("powershell", "pwsh"):
+        try:
+            r = subprocess.run(
+                [name, "-NoProfile", "-NonInteractive", "-Command", "echo ok"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if r.returncode == 0 and "ok" in r.stdout:
+                return name
+        except FileNotFoundError:
+            continue
+        except Exception:
+            continue
+    return None
+
+
+# Cache the resolved PowerShell executable (checked once per process)
+_ps_exe: str | None | bool = False  # False = not yet checked
+
+
+def _get_ps_exe() -> str | None:
+    global _ps_exe
+    if _ps_exe is False:
+        _ps_exe = _find_powershell()
+    return _ps_exe
+
+
+def _windows_has_image() -> bool:
+    """Check if the Windows clipboard contains an image."""
+    ps = _get_ps_exe()
+    if ps is None:
+        return False
+    try:
+        r = subprocess.run(
+            [ps, "-NoProfile", "-NonInteractive", "-Command", _WIN_PS_CHECK],
+            capture_output=True, text=True, timeout=5,
+        )
+        return r.returncode == 0 and "True" in r.stdout
+    except Exception as e:
+        logger.debug("Windows clipboard image check failed: %s", e)
+    return False
+
+
+def _windows_save(dest: Path) -> bool:
+    """Extract clipboard image on native Windows via PowerShell → base64 PNG."""
+    ps = _get_ps_exe()
+    if ps is None:
+        logger.debug("No PowerShell found — Windows clipboard image paste unavailable")
+        return False
+    try:
+        r = subprocess.run(
+            [ps, "-NoProfile", "-NonInteractive", "-Command", _WIN_PS_EXTRACT],
+            capture_output=True, text=True, timeout=15,
+        )
+        if r.returncode != 0:
+            return False
+
+        b64_data = r.stdout.strip()
+        if not b64_data:
+            return False
+
+        png_bytes = base64.b64decode(b64_data)
+        dest.write_bytes(png_bytes)
+        return dest.exists() and dest.stat().st_size > 0
+
+    except Exception as e:
+        logger.debug("Windows clipboard image extraction failed: %s", e)
+        dest.unlink(missing_ok=True)
+    return False
+
+
 # ── Linux ────────────────────────────────────────────────────────────────
 
 def _is_wsl() -> bool:
diff --git a/tests/tools/test_clipboard.py b/tests/tools/test_clipboard.py
index 6f1ecf8d..82a4aa6f 100644
--- a/tests/tools/test_clipboard.py
+++ b/tests/tools/test_clipboard.py
@@ -31,6 +31,8 @@ from hermes_cli.clipboard import (
     _wsl_has_image,
     _wayland_save,
     _wayland_has_image,
+    _windows_save,
+    _windows_has_image,
     _convert_to_png,
 )
 
@@ -51,6 +53,14 @@ class TestSaveClipboardImage:
                 save_clipboard_image(dest)
                 m.assert_called_once_with(dest)
 
+    def test_dispatches_to_windows_on_win32(self, tmp_path):
+        dest = tmp_path / "out.png"
+        with patch("hermes_cli.clipboard.sys") as mock_sys:
+            mock_sys.platform = "win32"
+            with patch("hermes_cli.clipboard._windows_save", return_value=False) as m:
+                save_clipboard_image(dest)
+                m.assert_called_once_with(dest)
+
     def test_dispatches_to_linux_on_linux(self, tmp_path):
         dest = tmp_path / "out.png"
         with patch("hermes_cli.clipboard.sys") as mock_sys:
@@ -497,6 +507,102 @@ class TestLinuxSave:
                     m.assert_called_once_with(dest)
 
 
+# ── Native Windows (PowerShell) ─────────────────────────────────────────
+
+class TestWindowsHasImage:
+    def setup_method(self):
+        import hermes_cli.clipboard as cb
+        cb._ps_exe = False  # reset cache
+
+    def test_clipboard_has_image(self):
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout="True\n", returncode=0)
+                assert _windows_has_image() is True
+
+    def test_clipboard_no_image(self):
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout="False\n", returncode=0)
+                assert _windows_has_image() is False
+
+    def test_no_powershell_available(self):
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value=None):
+            assert _windows_has_image() is False
+
+    def test_powershell_error(self):
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout="", returncode=1)
+                assert _windows_has_image() is False
+
+    def test_subprocess_exception(self):
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run",
+                       side_effect=subprocess.TimeoutExpired("powershell", 5)):
+                assert _windows_has_image() is False
+
+
+class TestWindowsSave:
+    def setup_method(self):
+        import hermes_cli.clipboard as cb
+        cb._ps_exe = False  # reset cache
+
+    def test_successful_extraction(self, tmp_path):
+        dest = tmp_path / "out.png"
+        b64_png = base64.b64encode(FAKE_PNG).decode()
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout=b64_png + "\n", returncode=0)
+                assert _windows_save(dest) is True
+        assert dest.read_bytes() == FAKE_PNG
+
+    def test_no_image_returns_false(self, tmp_path):
+        dest = tmp_path / "out.png"
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout="", returncode=1)
+                assert _windows_save(dest) is False
+        assert not dest.exists()
+
+    def test_empty_output(self, tmp_path):
+        dest = tmp_path / "out.png"
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout="", returncode=0)
+                assert _windows_save(dest) is False
+
+    def test_no_powershell_returns_false(self, tmp_path):
+        dest = tmp_path / "out.png"
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value=None):
+            assert _windows_save(dest) is False
+
+    def test_invalid_base64(self, tmp_path):
+        dest = tmp_path / "out.png"
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run") as mock_run:
+                mock_run.return_value = MagicMock(stdout="not-valid-base64!!!", returncode=0)
+                assert _windows_save(dest) is False
+
+    def test_timeout(self, tmp_path):
+        dest = tmp_path / "out.png"
+        with patch("hermes_cli.clipboard._get_ps_exe", return_value="powershell"):
+            with patch("hermes_cli.clipboard.subprocess.run",
+                       side_effect=subprocess.TimeoutExpired("powershell", 15)):
+                assert _windows_save(dest) is False
+
+
+class TestHasClipboardImageWin32:
+    """Verify has_clipboard_image dispatches to _windows_has_image on win32."""
+
+    def test_dispatches_on_win32(self):
+        with patch("hermes_cli.clipboard.sys") as mock_sys:
+            mock_sys.platform = "win32"
+            with patch("hermes_cli.clipboard._windows_has_image", return_value=True) as m:
+                assert has_clipboard_image() is True
+                m.assert_called_once()
+
+
 # ── BMP conversion ──────────────────────────────────────────────────────
 
 class TestConvertToPng:

From 6e2f6a25a1d439b6ca0883c4d1c4ed2def1b3359 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 12:22:59 -0700
Subject: [PATCH 085/154] refactor: deduplicate PowerShell script constants
 between Windows and WSL paths

Move _PS_CHECK_IMAGE and _PS_EXTRACT_IMAGE above both the native Windows
and WSL2 sections so both can share them. Removes the duplicate
_WIN_PS_CHECK / _WIN_PS_EXTRACT constants.
---
 hermes_cli/clipboard.py | 41 ++++++++++++++---------------------------
 1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/hermes_cli/clipboard.py b/hermes_cli/clipboard.py
index d968de3b..622c087f 100644
--- a/hermes_cli/clipboard.py
+++ b/hermes_cli/clipboard.py
@@ -117,18 +117,16 @@ def _macos_osascript(dest: Path) -> bool:
     return False
 
 
-# ── Native Windows ────────────────────────────────────────────────────────
+# ── Shared PowerShell scripts (native Windows + WSL2) ─────────────────────
 
-# PowerShell scripts for native Windows.
-# Same .NET approach as the WSL path but called directly (not via powershell.exe
-# cross-call).  ``powershell`` resolves to Windows PowerShell 5.1 (always present);
-# ``pwsh`` would be PowerShell 7+ (optional).  We try ``powershell`` first.
-_WIN_PS_CHECK = (
+# .NET System.Windows.Forms.Clipboard — used by both native Windows (powershell)
+# and WSL2 (powershell.exe) paths.
+_PS_CHECK_IMAGE = (
     "Add-Type -AssemblyName System.Windows.Forms;"
     "[System.Windows.Forms.Clipboard]::ContainsImage()"
 )
 
-_WIN_PS_EXTRACT = (
+_PS_EXTRACT_IMAGE = (
     "Add-Type -AssemblyName System.Windows.Forms;"
     "Add-Type -AssemblyName System.Drawing;"
     "$img = [System.Windows.Forms.Clipboard]::GetImage();"
@@ -139,6 +137,12 @@ _WIN_PS_EXTRACT = (
 )
 
 
+# ── Native Windows ────────────────────────────────────────────────────────
+
+# Native Windows uses ``powershell`` (Windows PowerShell 5.1, always present)
+# or ``pwsh`` (PowerShell 7+, optional).  Discovery is cached per-process.
+
+
 def _find_powershell() -> str | None:
     """Return the first available PowerShell executable, or None."""
     for name in ("powershell", "pwsh"):
@@ -174,7 +178,7 @@ def _windows_has_image() -> bool:
         return False
     try:
         r = subprocess.run(
-            [ps, "-NoProfile", "-NonInteractive", "-Command", _WIN_PS_CHECK],
+            [ps, "-NoProfile", "-NonInteractive", "-Command", _PS_CHECK_IMAGE],
             capture_output=True, text=True, timeout=5,
         )
         return r.returncode == 0 and "True" in r.stdout
@@ -191,7 +195,7 @@ def _windows_save(dest: Path) -> bool:
         return False
     try:
         r = subprocess.run(
-            [ps, "-NoProfile", "-NonInteractive", "-Command", _WIN_PS_EXTRACT],
+            [ps, "-NoProfile", "-NonInteractive", "-Command", _PS_EXTRACT_IMAGE],
             capture_output=True, text=True, timeout=15,
         )
         if r.returncode != 0:
@@ -241,24 +245,7 @@ def _linux_save(dest: Path) -> bool:
 
 
 # ── WSL2 (powershell.exe) ────────────────────────────────────────────────
-
-# PowerShell script: get clipboard image as base64-encoded PNG on stdout.
-# Using .NET System.Windows.Forms.Clipboard — always available on Windows.
-_PS_CHECK_IMAGE = (
-    "Add-Type -AssemblyName System.Windows.Forms;"
-    "[System.Windows.Forms.Clipboard]::ContainsImage()"
-)
-
-_PS_EXTRACT_IMAGE = (
-    "Add-Type -AssemblyName System.Windows.Forms;"
-    "Add-Type -AssemblyName System.Drawing;"
-    "$img = [System.Windows.Forms.Clipboard]::GetImage();"
-    "if ($null -eq $img) { exit 1 }"
-    "$ms = New-Object System.IO.MemoryStream;"
-    "$img.Save($ms, [System.Drawing.Imaging.ImageFormat]::Png);"
-    "[System.Convert]::ToBase64String($ms.ToArray())"
-)
-
+# Reuses _PS_CHECK_IMAGE / _PS_EXTRACT_IMAGE defined above.
 
 def _wsl_has_image() -> bool:
     """Check if Windows clipboard has an image (via powershell.exe)."""

From ab8f9c089ea08071e3f5b6d1a96856ac78cf3db9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 13:19:06 -0700
Subject: [PATCH 086/154] feat: thinking-only prefill continuation for
 structured reasoning responses (#5931)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the model produces structured reasoning (via API fields like .reasoning,
.reasoning_content, .reasoning_details) but no visible text content, append
the assistant message as prefill and continue the loop. The model sees its own
reasoning context on the next turn and produces the text portion.

Inspired by clawdbot's 'incomplete-text' recovery pattern. Up to 2 prefill
attempts before falling through to the existing '(empty)' terminal.

Key design decisions:
- Only triggers for structured reasoning (API fields), NOT inline <think> tags
- Prefill messages are popped on success to maintain strict role alternation
- _thinking_prefill marker stripped from all API message building paths
- Works across all providers: OpenAI (continuation), Anthropic (native prefill)

Verified with E2E tests: simulated thinking-only → real OpenRouter continuation
produces correct content. Also confirmed Qwen models consistently produce
structured-reasoning-only responses under token pressure.
---
 run_agent.py            | 64 ++++++++++++++++++++++++++++++++++++-----
 tests/test_run_agent.py | 44 +++++++++++++++++++++++-----
 2 files changed, 94 insertions(+), 14 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 4d91c5cd..19f7c23f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -5741,6 +5741,7 @@ class AIAgent:
                 api_msg.pop("reasoning", None)
                 api_msg.pop("finish_reason", None)
                 api_msg.pop("_flush_sentinel", None)
+                api_msg.pop("_thinking_prefill", None)
                 if _needs_sanitize:
                     self._sanitize_tool_calls_for_strict_api(api_msg)
                 api_messages.append(api_msg)
@@ -6664,7 +6665,7 @@ class AIAgent:
             api_messages = []
             for msg in messages:
                 api_msg = msg.copy()
-                for internal_field in ("reasoning", "finish_reason"):
+                for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
                     api_msg.pop(internal_field, None)
                 if _needs_sanitize:
                     self._sanitize_tool_calls_for_strict_api(api_msg)
@@ -6856,6 +6857,7 @@ class AIAgent:
         self._empty_content_retries = 0
         self._incomplete_scratchpad_retries = 0
         self._codex_incomplete_retries = 0
+        self._thinking_prefill_retries = 0
         self._last_content_with_tools = None
         self._mute_post_response = False
         self._surrogate_sanitized = False
@@ -7201,6 +7203,8 @@ class AIAgent:
                 # Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
                 if "finish_reason" in api_msg:
                     api_msg.pop("finish_reason")
+                # Strip internal thinking-prefill marker
+                api_msg.pop("_thinking_prefill", None)
                 # Strip Codex Responses API fields (call_id, response_item_id) for
                 # strict providers like Mistral, Fireworks, etc. that reject unknown fields.
                 # Uses new dicts so the internal messages list retains the fields
@@ -8735,6 +8739,15 @@ class AIAgent:
                             if clean:
                                 self._vprint(f"  ┊ 💬 {clean}")
                     
+                    # Pop thinking-only prefill message(s) before appending
+                    # (tool-call path — same rationale as the final-response path).
+                    while (
+                        messages
+                        and isinstance(messages[-1], dict)
+                        and messages[-1].get("_thinking_prefill")
+                    ):
+                        messages.pop()
+
                     messages.append(assistant_msg)
 
                     # Close any open streaming display (response box, reasoning
@@ -8848,11 +8861,36 @@ class AIAgent:
                             self._response_was_previewed = True
                             break
 
-                        # Reasoning-only response: the model produced thinking
-                        # but no visible content.  This is a valid response —
-                        # keep reasoning in its own field and set content to
-                        # "(empty)" so every provider accepts the message.
-                        # No retries needed.
+                        # ── Thinking-only prefill continuation ──────────
+                        # The model produced structured reasoning (via API
+                        # fields) but no visible text content.  Rather than
+                        # giving up, append the assistant message as-is and
+                        # continue — the model will see its own reasoning
+                        # on the next turn and produce the text portion.
+                        # Inspired by clawdbot's "incomplete-text" recovery.
+                        _has_structured = bool(
+                            getattr(assistant_message, "reasoning", None)
+                            or getattr(assistant_message, "reasoning_content", None)
+                            or getattr(assistant_message, "reasoning_details", None)
+                        )
+                        if _has_structured and self._thinking_prefill_retries < 2:
+                            self._thinking_prefill_retries += 1
+                            self._vprint(
+                                f"{self.log_prefix}↻ Thinking-only response — "
+                                f"prefilling to continue "
+                                f"({self._thinking_prefill_retries}/2)"
+                            )
+                            interim_msg = self._build_assistant_message(
+                                assistant_message, "incomplete"
+                            )
+                            interim_msg["_thinking_prefill"] = True
+                            messages.append(interim_msg)
+                            self._session_messages = messages
+                            self._save_session_log(messages)
+                            continue
+
+                        # Exhausted prefill attempts or no structured
+                        # reasoning — fall through to "(empty)" terminal.
                         reasoning_text = self._extract_reasoning(assistant_message)
                         assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
                         assistant_msg["content"] = "(empty)"
@@ -8871,6 +8909,7 @@ class AIAgent:
                     if hasattr(self, '_empty_content_retries'):
                         self._empty_content_retries = 0
                     self._last_empty_content_signature = None
+                    self._thinking_prefill_retries = 0
 
                     if (
                         self.api_mode == "codex_responses"
@@ -8909,7 +8948,18 @@ class AIAgent:
                     final_response = self._strip_think_blocks(final_response).strip()
                     
                     final_msg = self._build_assistant_message(assistant_message, finish_reason)
-                    
+
+                    # Pop thinking-only prefill message(s) before appending
+                    # the final response.  This avoids consecutive assistant
+                    # messages which break strict-alternation providers
+                    # (Anthropic Messages API) and keeps history clean.
+                    while (
+                        messages
+                        and isinstance(messages[-1], dict)
+                        and messages[-1].get("_thinking_prefill")
+                    ):
+                        messages.pop()
+
                     messages.append(final_msg)
                     
                     if not self.quiet_mode:
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 28194549..7f6ab4c3 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -1547,7 +1547,7 @@ class TestRunConversation:
         assert any(m.get("reasoning") for m in assistant_msgs)
 
     def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
-        """Reasoning-only responses no longer trigger compression — accepted immediately."""
+        """Reasoning-only responses no longer trigger compression — prefill then accepted."""
         self._setup_agent(agent)
         agent.base_url = "http://127.0.0.1:1234/v1"
         agent.compression_enabled = True
@@ -1561,8 +1561,9 @@ class TestRunConversation:
             {"role": "assistant", "content": "old answer"},
         ]
 
+        # 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
         with (
-            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp]),
+            patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
             patch.object(agent, "_compress_context") as mock_compress,
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
@@ -1573,17 +1574,18 @@ class TestRunConversation:
         mock_compress.assert_not_called()  # no compression triggered
         assert result["completed"] is True
         assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 1
+        assert result["api_calls"] == 3  # 1 original + 2 prefill continuations
 
-    def test_reasoning_only_response_accepted_without_retry(self, agent):
-        """Reasoning-only response should be accepted with (empty) content, no retries."""
+    def test_reasoning_only_response_prefill_then_empty(self, agent):
+        """Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
         self._setup_agent(agent)
         empty_resp = _mock_response(
             content=None,
             finish_reason="stop",
             reasoning_content="structured reasoning answer",
         )
-        agent.client.chat.completions.create.side_effect = [empty_resp]
+        # 3 responses: original + 2 prefill continuations, all reasoning-only
+        agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
         with (
             patch.object(agent, "_persist_session"),
             patch.object(agent, "_save_trajectory"),
@@ -1592,7 +1594,35 @@ class TestRunConversation:
             result = agent.run_conversation("answer me")
         assert result["completed"] is True
         assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 1  # no retries
+        assert result["api_calls"] == 3  # 1 original + 2 prefill continuations
+
+    def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
+        """When prefill continuation produces content, it becomes the final response."""
+        self._setup_agent(agent)
+        empty_resp = _mock_response(
+            content=None,
+            finish_reason="stop",
+            reasoning_content="structured reasoning answer",
+        )
+        content_resp = _mock_response(
+            content="Here is the actual answer.",
+            finish_reason="stop",
+        )
+        agent.client.chat.completions.create.side_effect = [empty_resp, content_resp]
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("answer me")
+        assert result["completed"] is True
+        assert result["final_response"] == "Here is the actual answer."
+        assert result["api_calls"] == 2  # 1 original + 1 prefill continuation
+        # Prefill message should be cleaned up — no consecutive assistant messages
+        roles = [m.get("role") for m in result["messages"]]
+        for i in range(len(roles) - 1):
+            if roles[i] == "assistant" and roles[i + 1] == "assistant":
+                raise AssertionError("Consecutive assistant messages found in history")
 
     def test_truly_empty_response_accepted_without_retry(self, agent):
         """Truly empty response (no content, no reasoning) should still complete with (empty)."""

From 678a87c47753a98ab2320def830c7ae24cda4c0e Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 13:36:20 -0700
Subject: [PATCH 087/154] refactor: add tool_error/tool_result helpers +
 read_raw_config, migrate 129 callsites
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add three reusable helpers to eliminate pervasive boilerplate:

tools/registry.py — tool_error() and tool_result():
  Every tool handler returns JSON strings. The pattern
  json.dumps({"error": msg}, ensure_ascii=False) appeared 106 times,
  and json.dumps({"success": False, "error": msg}, ...) another 23.
  Now: tool_error(msg) or tool_error(msg, success=False).

  tool_result() handles arbitrary result dicts:
  tool_result(success=True, data=payload) or tool_result(some_dict).

hermes_cli/config.py — read_raw_config():
  Lightweight YAML reader that returns the raw config dict without
  load_config()'s deep-merge + migration overhead. Available for
  callsites that just need a single config value.

Migration (129 callsites across 32 files):
- tools/: browser_camofox (18), file_tools (10), homeassistant (8),
  web_tools (7), skill_manager (7), cronjob (11), code_execution (4),
  delegate (5), send_message (4), tts (4), memory (7), session_search (3),
  mcp (2), clarify (2), skills_tool (3), todo (1), vision (1),
  browser (1), process_registry (2), image_gen (1)
- plugins/memory/: honcho (9), supermemory (9), hindsight (8),
  holographic (7), openviking (7), mem0 (7), byterover (6), retaindb (2)
- agent/: memory_manager (2), builtin_memory_provider (1)
---
 agent/builtin_memory_provider.py       |  3 +-
 agent/memory_manager.py                |  5 +--
 hermes_cli/config.py                   | 18 +++++++++++
 plugins/memory/byterover/__init__.py   | 13 ++++----
 plugins/memory/hindsight/__init__.py   | 17 +++++-----
 plugins/memory/holographic/__init__.py | 15 +++++----
 plugins/memory/honcho/__init__.py      | 19 +++++------
 plugins/memory/mem0/__init__.py        | 15 +++++----
 plugins/memory/openviking/__init__.py  | 15 +++++----
 plugins/memory/retaindb/__init__.py    |  5 +--
 plugins/memory/supermemory/__init__.py | 19 +++++------
 tools/browser_camofox.py               | 36 ++++++++++-----------
 tools/browser_tool.py                  |  4 +--
 tools/clarify_tool.py                  |  6 ++--
 tools/code_execution_tool.py           | 10 +++---
 tools/cronjob_tools.py                 | 24 +++++++-------
 tools/delegate_tool.py                 | 12 +++----
 tools/file_tools.py                    | 22 ++++++-------
 tools/homeassistant_tool.py            | 18 +++++------
 tools/image_generation_tool.py         |  4 +--
 tools/mcp_tool.py                      |  8 ++---
 tools/memory_tool.py                   | 16 ++++-----
 tools/process_registry.py              |  6 ++--
 tools/registry.py                      | 45 ++++++++++++++++++++++++++
 tools/send_message_tool.py             | 10 +++---
 tools/session_search_tool.py           |  8 ++---
 tools/skill_manager_tool.py            | 16 ++++-----
 tools/skills_tool.py                   |  8 ++---
 tools/todo_tool.py                     |  4 +--
 tools/tts_tool.py                      | 10 +++---
 tools/vision_tools.py                  |  4 +--
 tools/web_tools.py                     | 16 ++++-----
 32 files changed, 252 insertions(+), 179 deletions(-)

diff --git a/agent/builtin_memory_provider.py b/agent/builtin_memory_provider.py
index 0d9cf6c0..77df9a30 100644
--- a/agent/builtin_memory_provider.py
+++ b/agent/builtin_memory_provider.py
@@ -16,6 +16,7 @@ import logging
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -92,7 +93,7 @@ class BuiltinMemoryProvider(MemoryProvider):
 
     def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
         """Not used — the memory tool is intercepted in run_agent.py."""
-        return json.dumps({"error": "Built-in memory tool is handled by the agent loop"})
+        return tool_error("Built-in memory tool is handled by the agent loop")
 
     def shutdown(self) -> None:
         """No cleanup needed — files are saved on every write."""
diff --git a/agent/memory_manager.py b/agent/memory_manager.py
index 0e4113ef..4630c481 100644
--- a/agent/memory_manager.py
+++ b/agent/memory_manager.py
@@ -34,6 +34,7 @@ import re
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -249,7 +250,7 @@ class MemoryManager:
         """
         provider = self._tool_to_provider.get(tool_name)
         if provider is None:
-            return json.dumps({"error": f"No memory provider handles tool '{tool_name}'"})
+            return tool_error(f"No memory provider handles tool '{tool_name}'")
         try:
             return provider.handle_tool_call(tool_name, args, **kwargs)
         except Exception as e:
@@ -257,7 +258,7 @@ class MemoryManager:
                 "Memory provider '%s' handle_tool_call(%s) failed: %s",
                 provider.name, tool_name, e,
             )
-            return json.dumps({"error": f"Memory tool '{tool_name}' failed: {e}"})
+            return tool_error(f"Memory tool '{tool_name}' failed: {e}")
 
     # -- Lifecycle hooks -----------------------------------------------------
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index d90fc215..405b83ac 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -1881,6 +1881,24 @@ def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
 
 
 
+def read_raw_config() -> Dict[str, Any]:
+    """Read ~/.hermes/config.yaml as-is, without merging defaults or migrating.
+
+    Returns the raw YAML dict, or ``{}`` if the file doesn't exist or can't
+    be parsed.  Use this for lightweight config reads where you just need a
+    single value and don't want the overhead of ``load_config()``'s deep-merge
+    + migration pipeline.
+    """
+    try:
+        config_path = get_config_path()
+        if config_path.exists():
+            with open(config_path, encoding="utf-8") as f:
+                return yaml.safe_load(f) or {}
+    except Exception:
+        pass
+    return {}
+
+
 def load_config() -> Dict[str, Any]:
     """Load configuration from ~/.hermes/config.yaml."""
     import copy
diff --git a/plugins/memory/byterover/__init__.py b/plugins/memory/byterover/__init__.py
index d73440c7..1870e9ab 100644
--- a/plugins/memory/byterover/__init__.py
+++ b/plugins/memory/byterover/__init__.py
@@ -27,6 +27,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -320,7 +321,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
             return self._tool_curate(args)
         elif tool_name == "brv_status":
             return self._tool_status()
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        return tool_error(f"Unknown tool: {tool_name}")
 
     def shutdown(self) -> None:
         if self._sync_thread and self._sync_thread.is_alive():
@@ -331,7 +332,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
     def _tool_query(self, args: dict) -> str:
         query = args.get("query", "")
         if not query:
-            return json.dumps({"error": "query is required"})
+            return tool_error("query is required")
 
         result = _run_brv(
             ["query", "--", query.strip()[:5000]],
@@ -339,7 +340,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
         )
 
         if not result["success"]:
-            return json.dumps({"error": result.get("error", "Query failed")})
+            return tool_error(result.get("error", "Query failed"))
 
         output = result.get("output", "").strip()
         if not output or len(output) < _MIN_OUTPUT_LEN:
@@ -354,7 +355,7 @@ class ByteRoverMemoryProvider(MemoryProvider):
     def _tool_curate(self, args: dict) -> str:
         content = args.get("content", "")
         if not content:
-            return json.dumps({"error": "content is required"})
+            return tool_error("content is required")
 
         result = _run_brv(
             ["curate", "--", content],
@@ -362,14 +363,14 @@ class ByteRoverMemoryProvider(MemoryProvider):
         )
 
         if not result["success"]:
-            return json.dumps({"error": result.get("error", "Curate failed")})
+            return tool_error(result.get("error", "Curate failed"))
 
         return json.dumps({"result": "Memory curated successfully."})
 
     def _tool_status(self) -> str:
         result = _run_brv(["status"], timeout=15, cwd=self._cwd)
         if not result["success"]:
-            return json.dumps({"error": result.get("error", "Status check failed")})
+            return tool_error(result.get("error", "Status check failed"))
         return json.dumps({"status": result.get("output", "")})
 
 
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index e10a14a8..51feb3cb 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -26,6 +26,7 @@ import threading
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -433,12 +434,12 @@ class HindsightMemoryProvider(MemoryProvider):
             client = self._get_client()
         except Exception as e:
             logger.warning("Hindsight client init failed: %s", e)
-            return json.dumps({"error": f"Hindsight client unavailable: {e}"})
+            return tool_error(f"Hindsight client unavailable: {e}")
 
         if tool_name == "hindsight_retain":
             content = args.get("content", "")
             if not content:
-                return json.dumps({"error": "Missing required parameter: content"})
+                return tool_error("Missing required parameter: content")
             context = args.get("context")
             try:
                 _run_sync(client.aretain(
@@ -447,12 +448,12 @@ class HindsightMemoryProvider(MemoryProvider):
                 return json.dumps({"result": "Memory stored successfully."})
             except Exception as e:
                 logger.warning("hindsight_retain failed: %s", e)
-                return json.dumps({"error": f"Failed to store memory: {e}"})
+                return tool_error(f"Failed to store memory: {e}")
 
         elif tool_name == "hindsight_recall":
             query = args.get("query", "")
             if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
+                return tool_error("Missing required parameter: query")
             try:
                 resp = _run_sync(client.arecall(
                     bank_id=self._bank_id, query=query, budget=self._budget
@@ -463,12 +464,12 @@ class HindsightMemoryProvider(MemoryProvider):
                 return json.dumps({"result": "\n".join(lines)})
             except Exception as e:
                 logger.warning("hindsight_recall failed: %s", e)
-                return json.dumps({"error": f"Failed to search memory: {e}"})
+                return tool_error(f"Failed to search memory: {e}")
 
         elif tool_name == "hindsight_reflect":
             query = args.get("query", "")
             if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
+                return tool_error("Missing required parameter: query")
             try:
                 resp = _run_sync(client.areflect(
                     bank_id=self._bank_id, query=query, budget=self._budget
@@ -476,9 +477,9 @@ class HindsightMemoryProvider(MemoryProvider):
                 return json.dumps({"result": resp.text or "No relevant memories found."})
             except Exception as e:
                 logger.warning("hindsight_reflect failed: %s", e)
-                return json.dumps({"error": f"Failed to reflect: {e}"})
+                return tool_error(f"Failed to reflect: {e}")
 
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        return tool_error(f"Unknown tool: {tool_name}")
 
     def shutdown(self) -> None:
         global _loop, _loop_thread
diff --git a/plugins/memory/holographic/__init__.py b/plugins/memory/holographic/__init__.py
index b1423c10..cd4ef07b 100644
--- a/plugins/memory/holographic/__init__.py
+++ b/plugins/memory/holographic/__init__.py
@@ -23,6 +23,7 @@ import re
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 from .store import MemoryStore
 from .retrieval import FactRetriever
 
@@ -230,7 +231,7 @@ class HolographicMemoryProvider(MemoryProvider):
             return self._handle_fact_store(args)
         elif tool_name == "fact_feedback":
             return self._handle_fact_feedback(args)
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        return tool_error(f"Unknown tool: {tool_name}")
 
     def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
         if not self._config.get("auto_extract", False):
@@ -296,7 +297,7 @@ class HolographicMemoryProvider(MemoryProvider):
             elif action == "reason":
                 entities = args.get("entities", [])
                 if not entities:
-                    return json.dumps({"error": "reason requires 'entities' list"})
+                    return tool_error("reason requires 'entities' list")
                 results = retriever.reason(
                     entities,
                     category=args.get("category"),
@@ -334,12 +335,12 @@ class HolographicMemoryProvider(MemoryProvider):
                 return json.dumps({"facts": facts, "count": len(facts)})
 
             else:
-                return json.dumps({"error": f"Unknown action: {action}"})
+                return tool_error(f"Unknown action: {action}")
 
         except KeyError as exc:
-            return json.dumps({"error": f"Missing required argument: {exc}"})
+            return tool_error(f"Missing required argument: {exc}")
         except Exception as exc:
-            return json.dumps({"error": str(exc)})
+            return tool_error(str(exc))
 
     def _handle_fact_feedback(self, args: dict) -> str:
         try:
@@ -348,9 +349,9 @@ class HolographicMemoryProvider(MemoryProvider):
             result = self._store.record_feedback(fact_id, helpful=helpful)
             return json.dumps(result)
         except KeyError as exc:
-            return json.dumps({"error": f"Missing required argument: {exc}"})
+            return tool_error(f"Missing required argument: {exc}")
         except Exception as exc:
-            return json.dumps({"error": str(exc)})
+            return tool_error(str(exc))
 
     # -- Auto-extraction (on_session_end) ------------------------------------
 
diff --git a/plugins/memory/honcho/__init__.py b/plugins/memory/honcho/__init__.py
index 782af579..e8078ae5 100644
--- a/plugins/memory/honcho/__init__.py
+++ b/plugins/memory/honcho/__init__.py
@@ -21,6 +21,7 @@ import threading
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -638,15 +639,15 @@ class HonchoMemoryProvider(MemoryProvider):
     def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
         """Handle a Honcho tool call, with lazy session init for tools-only mode."""
         if self._cron_skipped:
-            return json.dumps({"error": "Honcho is not active (cron context)."})
+            return tool_error("Honcho is not active (cron context).")
 
         # Port #1957: ensure session is initialized for tools-only mode
         if not self._session_initialized:
             if not self._ensure_session():
-                return json.dumps({"error": "Honcho session could not be initialized."})
+                return tool_error("Honcho session could not be initialized.")
 
         if not self._manager or not self._session_key:
-            return json.dumps({"error": "Honcho is not active for this session."})
+            return tool_error("Honcho is not active for this session.")
 
         try:
             if tool_name == "honcho_profile":
@@ -658,7 +659,7 @@ class HonchoMemoryProvider(MemoryProvider):
             elif tool_name == "honcho_search":
                 query = args.get("query", "")
                 if not query:
-                    return json.dumps({"error": "Missing required parameter: query"})
+                    return tool_error("Missing required parameter: query")
                 max_tokens = min(int(args.get("max_tokens", 800)), 2000)
                 result = self._manager.search_context(
                     self._session_key, query, max_tokens=max_tokens
@@ -670,7 +671,7 @@ class HonchoMemoryProvider(MemoryProvider):
             elif tool_name == "honcho_context":
                 query = args.get("query", "")
                 if not query:
-                    return json.dumps({"error": "Missing required parameter: query"})
+                    return tool_error("Missing required parameter: query")
                 peer = args.get("peer", "user")
                 result = self._manager.dialectic_query(
                     self._session_key, query, peer=peer
@@ -680,17 +681,17 @@ class HonchoMemoryProvider(MemoryProvider):
             elif tool_name == "honcho_conclude":
                 conclusion = args.get("conclusion", "")
                 if not conclusion:
-                    return json.dumps({"error": "Missing required parameter: conclusion"})
+                    return tool_error("Missing required parameter: conclusion")
                 ok = self._manager.create_conclusion(self._session_key, conclusion)
                 if ok:
                     return json.dumps({"result": f"Conclusion saved: {conclusion}"})
-                return json.dumps({"error": "Failed to save conclusion."})
+                return tool_error("Failed to save conclusion.")
 
-            return json.dumps({"error": f"Unknown tool: {tool_name}"})
+            return tool_error(f"Unknown tool: {tool_name}")
 
         except Exception as e:
             logger.error("Honcho tool %s failed: %s", tool_name, e)
-            return json.dumps({"error": f"Honcho {tool_name} failed: {e}"})
+            return tool_error(f"Honcho {tool_name} failed: {e}")
 
     def shutdown(self) -> None:
         for t in (self._prefetch_thread, self._sync_thread):
diff --git a/plugins/memory/mem0/__init__.py b/plugins/memory/mem0/__init__.py
index dc56becd..32d1f6ff 100644
--- a/plugins/memory/mem0/__init__.py
+++ b/plugins/memory/mem0/__init__.py
@@ -23,6 +23,7 @@ import time
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -305,7 +306,7 @@ class Mem0MemoryProvider(MemoryProvider):
         try:
             client = self._get_client()
         except Exception as e:
-            return json.dumps({"error": str(e)})
+            return tool_error(str(e))
 
         if tool_name == "mem0_profile":
             try:
@@ -317,12 +318,12 @@ class Mem0MemoryProvider(MemoryProvider):
                 return json.dumps({"result": "\n".join(lines), "count": len(lines)})
             except Exception as e:
                 self._record_failure()
-                return json.dumps({"error": f"Failed to fetch profile: {e}"})
+                return tool_error(f"Failed to fetch profile: {e}")
 
         elif tool_name == "mem0_search":
             query = args.get("query", "")
             if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
+                return tool_error("Missing required parameter: query")
             rerank = args.get("rerank", False)
             top_k = min(int(args.get("top_k", 10)), 50)
             try:
@@ -339,12 +340,12 @@ class Mem0MemoryProvider(MemoryProvider):
                 return json.dumps({"results": items, "count": len(items)})
             except Exception as e:
                 self._record_failure()
-                return json.dumps({"error": f"Search failed: {e}"})
+                return tool_error(f"Search failed: {e}")
 
         elif tool_name == "mem0_conclude":
             conclusion = args.get("conclusion", "")
             if not conclusion:
-                return json.dumps({"error": "Missing required parameter: conclusion"})
+                return tool_error("Missing required parameter: conclusion")
             try:
                 client.add(
                     [{"role": "user", "content": conclusion}],
@@ -355,9 +356,9 @@ class Mem0MemoryProvider(MemoryProvider):
                 return json.dumps({"result": "Fact stored."})
             except Exception as e:
                 self._record_failure()
-                return json.dumps({"error": f"Failed to store: {e}"})
+                return tool_error(f"Failed to store: {e}")
 
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        return tool_error(f"Unknown tool: {tool_name}")
 
     def shutdown(self) -> None:
         for t in (self._prefetch_thread, self._sync_thread):
diff --git a/plugins/memory/openviking/__init__.py b/plugins/memory/openviking/__init__.py
index 596080c3..f46d7132 100644
--- a/plugins/memory/openviking/__init__.py
+++ b/plugins/memory/openviking/__init__.py
@@ -31,6 +31,7 @@ import threading
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -461,7 +462,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
 
     def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
         if not self._client:
-            return json.dumps({"error": "OpenViking server not connected"})
+            return tool_error("OpenViking server not connected")
 
         try:
             if tool_name == "viking_search":
@@ -474,9 +475,9 @@ class OpenVikingMemoryProvider(MemoryProvider):
                 return self._tool_remember(args)
             elif tool_name == "viking_add_resource":
                 return self._tool_add_resource(args)
-            return json.dumps({"error": f"Unknown tool: {tool_name}"})
+            return tool_error(f"Unknown tool: {tool_name}")
         except Exception as e:
-            return json.dumps({"error": str(e)})
+            return tool_error(str(e))
 
     def shutdown(self) -> None:
         # Wait for background threads to finish
@@ -493,7 +494,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
     def _tool_search(self, args: dict) -> str:
         query = args.get("query", "")
         if not query:
-            return json.dumps({"error": "query is required"})
+            return tool_error("query is required")
 
         payload: Dict[str, Any] = {"query": query}
         mode = args.get("mode", "auto")
@@ -530,7 +531,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
     def _tool_read(self, args: dict) -> str:
         uri = args.get("uri", "")
         if not uri:
-            return json.dumps({"error": "uri is required"})
+            return tool_error("uri is required")
 
         level = args.get("level", "overview")
         # Map our level names to OpenViking GET endpoints
@@ -582,7 +583,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
     def _tool_remember(self, args: dict) -> str:
         content = args.get("content", "")
         if not content:
-            return json.dumps({"error": "content is required"})
+            return tool_error("content is required")
 
         # Store as a session message that will be extracted during commit.
         # The category hint helps OpenViking's extraction classify correctly.
@@ -606,7 +607,7 @@ class OpenVikingMemoryProvider(MemoryProvider):
     def _tool_add_resource(self, args: dict) -> str:
         url = args.get("url", "")
         if not url:
-            return json.dumps({"error": "url is required"})
+            return tool_error("url is required")
 
         payload: Dict[str, Any] = {"path": url}
         if args.get("reason"):
diff --git a/plugins/memory/retaindb/__init__.py b/plugins/memory/retaindb/__init__.py
index 72ff9d77..62121410 100644
--- a/plugins/memory/retaindb/__init__.py
+++ b/plugins/memory/retaindb/__init__.py
@@ -34,6 +34,7 @@ from typing import Any, Dict, List
 from urllib.parse import quote
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -649,11 +650,11 @@ class RetainDBMemoryProvider(MemoryProvider):
 
     def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
         if not self._client:
-            return json.dumps({"error": "RetainDB not initialized"})
+            return tool_error("RetainDB not initialized")
         try:
             return json.dumps(self._dispatch(tool_name, args))
         except Exception as exc:
-            return json.dumps({"error": str(exc)})
+            return tool_error(str(exc))
 
     def _dispatch(self, tool_name: str, args: dict) -> Any:
         c = self._client
diff --git a/plugins/memory/supermemory/__init__.py b/plugins/memory/supermemory/__init__.py
index ee2c35e6..08ac3575 100644
--- a/plugins/memory/supermemory/__init__.py
+++ b/plugins/memory/supermemory/__init__.py
@@ -18,6 +18,7 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional
 
 from agent.memory_provider import MemoryProvider
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -587,7 +588,7 @@ class SupermemoryMemoryProvider(MemoryProvider):
     def _tool_store(self, args: dict) -> str:
         content = str(args.get("content") or "").strip()
         if not content:
-            return json.dumps({"error": "content is required"})
+            return tool_error("content is required")
         metadata = args.get("metadata") or {}
         if not isinstance(metadata, dict):
             metadata = {}
@@ -598,12 +599,12 @@ class SupermemoryMemoryProvider(MemoryProvider):
             preview = content[:80] + ("..." if len(content) > 80 else "")
             return json.dumps({"saved": True, "id": result.get("id", ""), "preview": preview})
         except Exception as exc:
-            return json.dumps({"error": f"Failed to store memory: {exc}"})
+            return tool_error(f"Failed to store memory: {exc}")
 
     def _tool_search(self, args: dict) -> str:
         query = str(args.get("query") or "").strip()
         if not query:
-            return json.dumps({"error": "query is required"})
+            return tool_error("query is required")
         try:
             limit = max(1, min(20, int(args.get("limit", 5) or 5)))
         except Exception:
@@ -621,20 +622,20 @@ class SupermemoryMemoryProvider(MemoryProvider):
                 formatted.append(entry)
             return json.dumps({"results": formatted, "count": len(formatted)})
         except Exception as exc:
-            return json.dumps({"error": f"Search failed: {exc}"})
+            return tool_error(f"Search failed: {exc}")
 
     def _tool_forget(self, args: dict) -> str:
         memory_id = str(args.get("id") or "").strip()
         query = str(args.get("query") or "").strip()
         if not memory_id and not query:
-            return json.dumps({"error": "Provide either id or query"})
+            return tool_error("Provide either id or query")
         try:
             if memory_id:
                 self._client.forget_memory(memory_id)
                 return json.dumps({"forgotten": True, "id": memory_id})
             return json.dumps(self._client.forget_by_query(query))
         except Exception as exc:
-            return json.dumps({"error": f"Forget failed: {exc}"})
+            return tool_error(f"Forget failed: {exc}")
 
     def _tool_profile(self, args: dict) -> str:
         query = str(args.get("query") or "").strip() or None
@@ -651,11 +652,11 @@ class SupermemoryMemoryProvider(MemoryProvider):
                 "dynamic_count": len(profile["dynamic"]),
             })
         except Exception as exc:
-            return json.dumps({"error": f"Profile failed: {exc}"})
+            return tool_error(f"Profile failed: {exc}")
 
     def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
         if not self._active or not self._client:
-            return json.dumps({"error": "Supermemory is not configured"})
+            return tool_error("Supermemory is not configured")
         if tool_name == "supermemory_store":
             return self._tool_store(args)
         if tool_name == "supermemory_search":
@@ -664,7 +665,7 @@ class SupermemoryMemoryProvider(MemoryProvider):
             return self._tool_forget(args)
         if tool_name == "supermemory_profile":
             return self._tool_profile(args)
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        return tool_error(f"Unknown tool: {tool_name}")
 
 
 def register(ctx):
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 13e85c18..08f26f50 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -259,7 +259,7 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
 
         return json.dumps(result)
     except requests.HTTPError as e:
-        return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
+        return tool_error(f"Navigation failed: {e}", success=False)
     except requests.ConnectionError:
         return json.dumps({
             "success": False,
@@ -268,7 +268,7 @@ def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
                      "or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser",
         })
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
@@ -277,7 +277,7 @@ def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         data = _get(
             f"/tabs/{session['tab_id']}/snapshot",
@@ -306,7 +306,7 @@ def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
             "element_count": refs_count,
         })
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
@@ -314,7 +314,7 @@ def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         # Strip @ prefix if present (our tool convention)
         clean_ref = ref.lstrip("@")
@@ -329,7 +329,7 @@ def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
             "url": data.get("url", ""),
         })
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
@@ -337,7 +337,7 @@ def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         clean_ref = ref.lstrip("@")
 
@@ -351,7 +351,7 @@ def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
             "element": clean_ref,
         })
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
@@ -359,7 +359,7 @@ def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         _post(
             f"/tabs/{session['tab_id']}/scroll",
@@ -367,7 +367,7 @@ def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
         )
         return json.dumps({"success": True, "scrolled": direction})
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_back(task_id: Optional[str] = None) -> str:
@@ -375,7 +375,7 @@ def camofox_back(task_id: Optional[str] = None) -> str:
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         data = _post(
             f"/tabs/{session['tab_id']}/back",
@@ -383,7 +383,7 @@ def camofox_back(task_id: Optional[str] = None) -> str:
         )
         return json.dumps({"success": True, "url": data.get("url", "")})
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_press(key: str, task_id: Optional[str] = None) -> str:
@@ -391,7 +391,7 @@ def camofox_press(key: str, task_id: Optional[str] = None) -> str:
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         _post(
             f"/tabs/{session['tab_id']}/press",
@@ -399,7 +399,7 @@ def camofox_press(key: str, task_id: Optional[str] = None) -> str:
         )
         return json.dumps({"success": True, "pressed": key})
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_close(task_id: Optional[str] = None) -> str:
@@ -426,7 +426,7 @@ def camofox_get_images(task_id: Optional[str] = None) -> str:
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         import re
 
@@ -461,7 +461,7 @@ def camofox_get_images(task_id: Optional[str] = None) -> str:
             "count": len(images),
         })
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_vision(question: str, annotate: bool = False,
@@ -470,7 +470,7 @@ def camofox_vision(question: str, annotate: bool = False,
     try:
         session = _get_session(task_id)
         if not session["tab_id"]:
-            return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
+            return tool_error("No browser session. Call browser_navigate first.", success=False)
 
         # Get screenshot as binary PNG
         resp = _get_raw(
@@ -551,7 +551,7 @@ def camofox_vision(question: str, annotate: bool = False,
             "screenshot_path": screenshot_path,
         })
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)})
+        return tool_error(str(e), success=False)
 
 
 def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str:
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 317c1fb1..10004b08 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1618,7 +1618,7 @@ def _camofox_eval(expression: str, task_id: Optional[str] = None) -> str:
                 "error": "JavaScript evaluation is not supported by this Camofox server. "
                          "Use browser_snapshot or browser_vision to inspect page state.",
             })
-        return json.dumps({"success": False, "error": error_msg})
+        return tool_error(error_msg, success=False)
 
 
 def _maybe_start_recording(task_id: str):
@@ -2102,7 +2102,7 @@ if __name__ == "__main__":
 # ---------------------------------------------------------------------------
 # Registry
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 _BROWSER_SCHEMA_MAP = {s["name"]: s for s in BROWSER_TOOL_SCHEMAS}
 
diff --git a/tools/clarify_tool.py b/tools/clarify_tool.py
index ece33eb5..c4478755 100644
--- a/tools/clarify_tool.py
+++ b/tools/clarify_tool.py
@@ -40,14 +40,14 @@ def clarify_tool(
         JSON string with the user's response.
     """
     if not question or not question.strip():
-        return json.dumps({"error": "Question text is required."}, ensure_ascii=False)
+        return tool_error("Question text is required.")
 
     question = question.strip()
 
     # Validate and trim choices
     if choices is not None:
         if not isinstance(choices, list):
-            return json.dumps({"error": "choices must be a list of strings."}, ensure_ascii=False)
+            return tool_error("choices must be a list of strings.")
         choices = [str(c).strip() for c in choices if str(c).strip()]
         if len(choices) > MAX_CHOICES:
             choices = choices[:MAX_CHOICES]
@@ -126,7 +126,7 @@ CLARIFY_SCHEMA = {
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="clarify",
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index a7a80606..77be5569 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -344,7 +344,7 @@ def _rpc_server_loop(
                 try:
                     request = json.loads(line.decode())
                 except (json.JSONDecodeError, UnicodeDecodeError) as exc:
-                    resp = json.dumps({"error": f"Invalid RPC request: {exc}"})
+                    resp = tool_error(f"Invalid RPC request: {exc}")
                     conn.sendall((resp + "\n").encode())
                     continue
 
@@ -396,7 +396,7 @@ def _rpc_server_loop(
                         devnull.close()
                 except Exception as exc:
                     logger.error("Tool call failed in sandbox: %s", exc, exc_info=True)
-                    result = json.dumps({"error": str(exc)})
+                    result = tool_error(str(exc))
 
                 tool_call_counter[0] += 1
                 call_duration = time.monotonic() - call_start
@@ -648,7 +648,7 @@ def _rpc_poll_loop(
                     except Exception as exc:
                         logger.error("Tool call failed in remote sandbox: %s",
                                      exc, exc_info=True)
-                        tool_result = json.dumps({"error": str(exc)})
+                        tool_result = tool_error(str(exc))
 
                     tool_call_counter[0] += 1
                     call_duration = time.monotonic() - call_start
@@ -890,7 +890,7 @@ def execute_code(
         })
 
     if not code or not code.strip():
-        return json.dumps({"error": "No code provided."})
+        return tool_error("No code provided.")
 
     # Dispatch: remote backends use file-based RPC, local uses UDS
     from tools.terminal_tool import _get_env_config
@@ -1331,7 +1331,7 @@ EXECUTE_CODE_SCHEMA = build_execute_code_schema()
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="execute_code",
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 0578dca5..82d43c58 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -231,20 +231,20 @@ def cronjob(
 
         if normalized == "create":
             if not schedule:
-                return json.dumps({"success": False, "error": "schedule is required for create"}, indent=2)
+                return tool_error("schedule is required for create", success=False)
             canonical_skills = _canonical_skills(skill, skills)
             if not prompt and not canonical_skills:
-                return json.dumps({"success": False, "error": "create requires either prompt or at least one skill"}, indent=2)
+                return tool_error("create requires either prompt or at least one skill", success=False)
             if prompt:
                 scan_error = _scan_cron_prompt(prompt)
                 if scan_error:
-                    return json.dumps({"success": False, "error": scan_error}, indent=2)
+                    return tool_error(scan_error, success=False)
 
             # Validate script path before storing
             if script:
                 script_error = _validate_cron_script_path(script)
                 if script_error:
-                    return json.dumps({"success": False, "error": script_error}, indent=2)
+                    return tool_error(script_error, success=False)
 
             job = create_job(
                 prompt=prompt or "",
@@ -281,7 +281,7 @@ def cronjob(
             return json.dumps({"success": True, "count": len(jobs), "jobs": jobs}, indent=2)
 
         if not job_id:
-            return json.dumps({"success": False, "error": f"job_id is required for action '{normalized}'"}, indent=2)
+            return tool_error(f"job_id is required for action '{normalized}'", success=False)
 
         job = get_job(job_id)
         if not job:
@@ -293,7 +293,7 @@ def cronjob(
         if normalized == "remove":
             removed = remove_job(job_id)
             if not removed:
-                return json.dumps({"success": False, "error": f"Failed to remove job '{job_id}'"}, indent=2)
+                return tool_error(f"Failed to remove job '{job_id}'", success=False)
             return json.dumps(
                 {
                     "success": True,
@@ -324,7 +324,7 @@ def cronjob(
             if prompt is not None:
                 scan_error = _scan_cron_prompt(prompt)
                 if scan_error:
-                    return json.dumps({"success": False, "error": scan_error}, indent=2)
+                    return tool_error(scan_error, success=False)
                 updates["prompt"] = prompt
             if name is not None:
                 updates["name"] = name
@@ -345,7 +345,7 @@ def cronjob(
                 if script:
                     script_error = _validate_cron_script_path(script)
                     if script_error:
-                        return json.dumps({"success": False, "error": script_error}, indent=2)
+                        return tool_error(script_error, success=False)
                 updates["script"] = _normalize_optional_job_value(script) if script else None
             if repeat is not None:
                 # Normalize: treat 0 or negative as None (infinite)
@@ -361,14 +361,14 @@ def cronjob(
                     updates["state"] = "scheduled"
                     updates["enabled"] = True
             if not updates:
-                return json.dumps({"success": False, "error": "No updates provided."}, indent=2)
+                return tool_error("No updates provided.", success=False)
             updated = update_job(job_id, updates)
             return json.dumps({"success": True, "job": _format_job(updated)}, indent=2)
 
-        return json.dumps({"success": False, "error": f"Unknown cron action '{action}'"}, indent=2)
+        return tool_error(f"Unknown cron action '{action}'", success=False)
 
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)}, indent=2)
+        return tool_error(str(e), success=False)
 
 
 # ---------------------------------------------------------------------------
@@ -502,7 +502,7 @@ def check_cronjob_requirements() -> bool:
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="cronjob",
diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py
index ad9b54c4..a148a31f 100644
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -527,7 +527,7 @@ def delegate_task(
     Returns JSON with results array, one entry per task.
     """
     if parent_agent is None:
-        return json.dumps({"error": "delegate_task requires a parent agent context."})
+        return tool_error("delegate_task requires a parent agent context.")
 
     # Depth limit
     depth = getattr(parent_agent, '_delegate_depth', 0)
@@ -552,7 +552,7 @@ def delegate_task(
     try:
         creds = _resolve_delegation_credentials(cfg, parent_agent)
     except ValueError as exc:
-        return json.dumps({"error": str(exc)})
+        return tool_error(str(exc))
 
     # Normalize to task list
     if tasks and isinstance(tasks, list):
@@ -560,15 +560,15 @@ def delegate_task(
     elif goal and isinstance(goal, str) and goal.strip():
         task_list = [{"goal": goal, "context": context, "toolsets": toolsets}]
     else:
-        return json.dumps({"error": "Provide either 'goal' (single task) or 'tasks' (batch)."})
+        return tool_error("Provide either 'goal' (single task) or 'tasks' (batch).")
 
     if not task_list:
-        return json.dumps({"error": "No tasks provided."})
+        return tool_error("No tasks provided.")
 
     # Validate each task has a goal
     for i, task in enumerate(task_list):
         if not task.get("goal", "").strip():
-            return json.dumps({"error": f"Task {i} is missing a 'goal'."})
+            return tool_error(f"Task {i} is missing a 'goal'.")
 
     overall_start = time.monotonic()
     results = []
@@ -958,7 +958,7 @@ DELEGATE_TASK_SCHEMA = {
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="delegate_task",
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 626cae9a..43e40315 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -432,7 +432,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
 
         return json.dumps(result_dict, ensure_ascii=False)
     except Exception as e:
-        return json.dumps({"error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e))
 
 
 def get_read_files_summary(task_id: str = "default") -> list:
@@ -560,7 +560,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
     """Write content to a file."""
     sensitive_err = _check_sensitive_path(path)
     if sensitive_err:
-        return json.dumps({"error": sensitive_err}, ensure_ascii=False)
+        return tool_error(sensitive_err)
     try:
         stale_warning = _check_file_staleness(path, task_id)
         file_ops = _get_file_ops(task_id)
@@ -577,7 +577,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
             logger.debug("write_file expected denial: %s: %s", type(e).__name__, e)
         else:
             logger.error("write_file error: %s: %s", type(e).__name__, e, exc_info=True)
-        return json.dumps({"error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e))
 
 
 def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
@@ -595,7 +595,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
     for _p in _paths_to_check:
         sensitive_err = _check_sensitive_path(_p)
         if sensitive_err:
-            return json.dumps({"error": sensitive_err}, ensure_ascii=False)
+            return tool_error(sensitive_err)
     try:
         # Check staleness for all files this patch will touch.
         stale_warnings = []
@@ -608,16 +608,16 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
         
         if mode == "replace":
             if not path:
-                return json.dumps({"error": "path required"})
+                return tool_error("path required")
             if old_string is None or new_string is None:
-                return json.dumps({"error": "old_string and new_string required"})
+                return tool_error("old_string and new_string required")
             result = file_ops.patch_replace(path, old_string, new_string, replace_all)
         elif mode == "patch":
             if not patch:
-                return json.dumps({"error": "patch content required"})
+                return tool_error("patch content required")
             result = file_ops.patch_v4a(patch)
         else:
-            return json.dumps({"error": f"Unknown mode: {mode}"})
+            return tool_error(f"Unknown mode: {mode}")
         
         result_dict = result.to_dict()
         if stale_warnings:
@@ -634,7 +634,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
             result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
         return result_json
     except Exception as e:
-        return json.dumps({"error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e))
 
 
 def search_tool(pattern: str, target: str = "content", path: str = ".",
@@ -702,7 +702,7 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
             result_json += f"\n\n[Hint: Results truncated. Use offset={next_offset} to see more, or narrow with a more specific pattern or file_glob.]"
         return result_json
     except Exception as e:
-        return json.dumps({"error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e))
 
 
 FILE_TOOLS = [
@@ -716,7 +716,7 @@ FILE_TOOLS = [
 # ---------------------------------------------------------------------------
 # Schemas + Registry
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 
 def _check_file_reqs():
diff --git a/tools/homeassistant_tool.py b/tools/homeassistant_tool.py
index 62125a7f..0ab99b4b 100644
--- a/tools/homeassistant_tool.py
+++ b/tools/homeassistant_tool.py
@@ -221,22 +221,22 @@ def _handle_list_entities(args: dict, **kw) -> str:
         return json.dumps({"result": result})
     except Exception as e:
         logger.error("ha_list_entities error: %s", e)
-        return json.dumps({"error": f"Failed to list entities: {e}"})
+        return tool_error(f"Failed to list entities: {e}")
 
 
 def _handle_get_state(args: dict, **kw) -> str:
     """Handler for ha_get_state tool."""
     entity_id = args.get("entity_id", "")
     if not entity_id:
-        return json.dumps({"error": "Missing required parameter: entity_id"})
+        return tool_error("Missing required parameter: entity_id")
     if not _ENTITY_ID_RE.match(entity_id):
-        return json.dumps({"error": f"Invalid entity_id format: {entity_id}"})
+        return tool_error(f"Invalid entity_id format: {entity_id}")
     try:
         result = _run_async(_async_get_state(entity_id))
         return json.dumps({"result": result})
     except Exception as e:
         logger.error("ha_get_state error: %s", e)
-        return json.dumps({"error": f"Failed to get state for {entity_id}: {e}"})
+        return tool_error(f"Failed to get state for {entity_id}: {e}")
 
 
 def _handle_call_service(args: dict, **kw) -> str:
@@ -244,7 +244,7 @@ def _handle_call_service(args: dict, **kw) -> str:
     domain = args.get("domain", "")
     service = args.get("service", "")
     if not domain or not service:
-        return json.dumps({"error": "Missing required parameters: domain and service"})
+        return tool_error("Missing required parameters: domain and service")
 
     if domain in _BLOCKED_DOMAINS:
         return json.dumps({
@@ -254,7 +254,7 @@ def _handle_call_service(args: dict, **kw) -> str:
 
     entity_id = args.get("entity_id")
     if entity_id and not _ENTITY_ID_RE.match(entity_id):
-        return json.dumps({"error": f"Invalid entity_id format: {entity_id}"})
+        return tool_error(f"Invalid entity_id format: {entity_id}")
 
     data = args.get("data")
     try:
@@ -262,7 +262,7 @@ def _handle_call_service(args: dict, **kw) -> str:
         return json.dumps({"result": result})
     except Exception as e:
         logger.error("ha_call_service error: %s", e)
-        return json.dumps({"error": f"Failed to call {domain}.{service}: {e}"})
+        return tool_error(f"Failed to call {domain}.{service}: {e}")
 
 
 # ---------------------------------------------------------------------------
@@ -311,7 +311,7 @@ def _handle_list_services(args: dict, **kw) -> str:
         return json.dumps({"result": result})
     except Exception as e:
         logger.error("ha_list_services error: %s", e)
-        return json.dumps({"error": f"Failed to list services: {e}"})
+        return tool_error(f"Failed to list services: {e}")
 
 
 # ---------------------------------------------------------------------------
@@ -451,7 +451,7 @@ HA_CALL_SERVICE_SCHEMA = {
 # Registration
 # ---------------------------------------------------------------------------
 
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="ha_list_entities",
diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py
index 77e09052..edf43dec 100644
--- a/tools/image_generation_tool.py
+++ b/tools/image_generation_tool.py
@@ -652,7 +652,7 @@ if __name__ == "__main__":
 # ---------------------------------------------------------------------------
 # Registry
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 IMAGE_GENERATE_SCHEMA = {
     "name": "image_generate",
@@ -679,7 +679,7 @@ IMAGE_GENERATE_SCHEMA = {
 def _handle_image_generate(args, **kw):
     prompt = args.get("prompt", "")
     if not prompt:
-        return json.dumps({"error": "prompt is required for image generation"})
+        return tool_error("prompt is required for image generation")
     return image_generate_tool(
         prompt=prompt,
         aspect_ratio=args.get("aspect_ratio", "landscape"),
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 5e4101a9..2ed8ba21 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -792,7 +792,7 @@ class MCPServerTask:
         After the initial ``await`` (list_tools), all mutations are synchronous
         — atomic from the event loop's perspective.
         """
-        from tools.registry import registry
+        from tools.registry import registry, tool_error
         from toolsets import TOOLSETS
 
         async with self._refresh_lock:
@@ -1326,7 +1326,7 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float):
 
         uri = args.get("uri")
         if not uri:
-            return json.dumps({"error": "Missing required parameter 'uri'"})
+            return tool_error("Missing required parameter 'uri'")
 
         async def _call():
             result = await server.session.read_resource(uri)
@@ -1415,7 +1415,7 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float):
 
         name = args.get("name")
         if not name:
-            return json.dumps({"error": "Missing required parameter 'name'"})
+            return tool_error("Missing required parameter 'name'")
         arguments = args.get("arguments", {})
 
         async def _call():
@@ -1724,7 +1724,7 @@ def _register_server_tools(name: str, server: MCPServerTask, config: dict) -> Li
     Returns:
         List of registered prefixed tool names.
     """
-    from tools.registry import registry
+    from tools.registry import registry, tool_error
     from toolsets import create_custom_toolset, TOOLSETS
 
     registered_names: List[str] = []
diff --git a/tools/memory_tool.py b/tools/memory_tool.py
index f7b6bed2..1feee269 100644
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -449,30 +449,30 @@ def memory_tool(
     Returns JSON string with results.
     """
     if store is None:
-        return json.dumps({"success": False, "error": "Memory is not available. It may be disabled in config or this environment."}, ensure_ascii=False)
+        return tool_error("Memory is not available. It may be disabled in config or this environment.", success=False)
 
     if target not in ("memory", "user"):
-        return json.dumps({"success": False, "error": f"Invalid target '{target}'. Use 'memory' or 'user'."}, ensure_ascii=False)
+        return tool_error(f"Invalid target '{target}'. Use 'memory' or 'user'.", success=False)
 
     if action == "add":
         if not content:
-            return json.dumps({"success": False, "error": "Content is required for 'add' action."}, ensure_ascii=False)
+            return tool_error("Content is required for 'add' action.", success=False)
         result = store.add(target, content)
 
     elif action == "replace":
         if not old_text:
-            return json.dumps({"success": False, "error": "old_text is required for 'replace' action."}, ensure_ascii=False)
+            return tool_error("old_text is required for 'replace' action.", success=False)
         if not content:
-            return json.dumps({"success": False, "error": "content is required for 'replace' action."}, ensure_ascii=False)
+            return tool_error("content is required for 'replace' action.", success=False)
         result = store.replace(target, old_text, content)
 
     elif action == "remove":
         if not old_text:
-            return json.dumps({"success": False, "error": "old_text is required for 'remove' action."}, ensure_ascii=False)
+            return tool_error("old_text is required for 'remove' action.", success=False)
         result = store.remove(target, old_text)
 
     else:
-        return json.dumps({"success": False, "error": f"Unknown action '{action}'. Use: add, replace, remove"}, ensure_ascii=False)
+        return tool_error(f"Unknown action '{action}'. Use: add, replace, remove", success=False)
 
     return json.dumps(result, ensure_ascii=False)
 
@@ -539,7 +539,7 @@ MEMORY_SCHEMA = {
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="memory",
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 86868f72..948f073a 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -821,7 +821,7 @@ process_registry = ProcessRegistry()
 # ---------------------------------------------------------------------------
 # Registry -- the "process" tool schema + handler
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 PROCESS_SCHEMA = {
     "name": "process",
@@ -879,7 +879,7 @@ def _handle_process(args, **kw):
         return _json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False)
     elif action in ("poll", "log", "wait", "kill", "write", "submit"):
         if not session_id:
-            return _json.dumps({"error": f"session_id is required for {action}"}, ensure_ascii=False)
+            return tool_error(f"session_id is required for {action}")
         if action == "poll":
             return _json.dumps(process_registry.poll(session_id), ensure_ascii=False)
         elif action == "log":
@@ -893,7 +893,7 @@ def _handle_process(args, **kw):
             return _json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
         elif action == "submit":
             return _json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False)
-    return _json.dumps({"error": f"Unknown process action: {action}. Use: list, poll, log, wait, kill, write, submit"}, ensure_ascii=False)
+    return tool_error(f"Unknown process action: {action}. Use: list, poll, log, wait, kill, write, submit")
 
 
 registry.register(
diff --git a/tools/registry.py b/tools/registry.py
index 432e1f07..079052a3 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -273,3 +273,48 @@ class ToolRegistry:
 
 # Module-level singleton
 registry = ToolRegistry()
+
+
+# ---------------------------------------------------------------------------
+# Helpers for tool response serialization
+# ---------------------------------------------------------------------------
+# Every tool handler must return a JSON string.  These helpers eliminate the
+# boilerplate ``json.dumps({"error": msg}, ensure_ascii=False)`` that appears
+# hundreds of times across tool files.
+#
+# Usage:
+#   from tools.registry import registry, tool_error, tool_result
+#
+#   return tool_error("something went wrong")
+#   return tool_error("not found", code=404)
+#   return tool_result(success=True, data=payload)
+#   return tool_result(items)            # pass a dict directly
+
+
+def tool_error(message, **extra) -> str:
+    """Return a JSON error string for tool handlers.
+
+    >>> tool_error("file not found")
+    '{"error": "file not found"}'
+    >>> tool_error("bad input", success=False)
+    '{"error": "bad input", "success": false}'
+    """
+    result = {"error": str(message)}
+    if extra:
+        result.update(extra)
+    return json.dumps(result, ensure_ascii=False)
+
+
+def tool_result(data=None, **kwargs) -> str:
+    """Return a JSON result string for tool handlers.
+
+    Accepts a dict positional arg *or* keyword arguments (not both):
+
+    >>> tool_result(success=True, count=42)
+    '{"success": true, "count": 42}'
+    >>> tool_result({"key": "value"})
+    '{"key": "value"}'
+    """
+    if data is not None:
+        return json.dumps(data, ensure_ascii=False)
+    return json.dumps(kwargs, ensure_ascii=False)
diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py
index 4e500e69..164b8a2f 100644
--- a/tools/send_message_tool.py
+++ b/tools/send_message_tool.py
@@ -101,7 +101,7 @@ def _handle_send(args):
     target = args.get("target", "")
     message = args.get("message", "")
     if not target or not message:
-        return json.dumps({"error": "Both 'target' and 'message' are required when action='send'"})
+        return tool_error("Both 'target' and 'message' are required when action='send'")
 
     parts = target.split(":", 1)
     platform_name = parts[0].strip().lower()
@@ -134,7 +134,7 @@ def _handle_send(args):
 
     from tools.interrupt import is_interrupted
     if is_interrupted():
-        return json.dumps({"error": "Interrupted"})
+        return tool_error("Interrupted")
 
     try:
         from gateway.config import load_gateway_config, Platform
@@ -160,11 +160,11 @@ def _handle_send(args):
     platform = platform_map.get(platform_name)
     if not platform:
         avail = ", ".join(platform_map.keys())
-        return json.dumps({"error": f"Unknown platform: {platform_name}. Available: {avail}"})
+        return tool_error(f"Unknown platform: {platform_name}. Available: {avail}")
 
     pconfig = config.platforms.get(platform)
     if not pconfig or not pconfig.enabled:
-        return json.dumps({"error": f"Platform '{platform_name}' is not configured. Set up credentials in ~/.hermes/config.yaml or environment variables."})
+        return tool_error(f"Platform '{platform_name}' is not configured. Set up credentials in ~/.hermes/config.yaml or environment variables.")
 
     from gateway.platforms.base import BasePlatformAdapter
 
@@ -940,7 +940,7 @@ def _check_send_message():
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="send_message",
diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py
index 3ff36f94..3e9c68af 100644
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -241,7 +241,7 @@ def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str
         }, ensure_ascii=False)
     except Exception as e:
         logging.error("Error listing recent sessions: %s", e, exc_info=True)
-        return json.dumps({"success": False, "error": f"Failed to list recent sessions: {e}"}, ensure_ascii=False)
+        return tool_error(f"Failed to list recent sessions: {e}", success=False)
 
 
 def session_search(
@@ -258,7 +258,7 @@ def session_search(
     The current session is excluded from results since the agent already has that context.
     """
     if db is None:
-        return json.dumps({"success": False, "error": "Session database not available."}, ensure_ascii=False)
+        return tool_error("Session database not available.", success=False)
 
     limit = min(limit, 5)  # Cap at 5 sessions to avoid excessive LLM calls
 
@@ -427,7 +427,7 @@ def session_search(
 
     except Exception as e:
         logging.error("Session search failed: %s", e, exc_info=True)
-        return json.dumps({"success": False, "error": f"Search failed: {str(e)}"}, ensure_ascii=False)
+        return tool_error(f"Search failed: {str(e)}", success=False)
 
 
 def check_session_search_requirements() -> bool:
@@ -487,7 +487,7 @@ SESSION_SEARCH_SCHEMA = {
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="session_search",
diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py
index 1a7de513..97a4bf5a 100644
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -584,19 +584,19 @@ def skill_manage(
     """
     if action == "create":
         if not content:
-            return json.dumps({"success": False, "error": "content is required for 'create'. Provide the full SKILL.md text (frontmatter + body)."}, ensure_ascii=False)
+            return tool_error("content is required for 'create'. Provide the full SKILL.md text (frontmatter + body).", success=False)
         result = _create_skill(name, content, category)
 
     elif action == "edit":
         if not content:
-            return json.dumps({"success": False, "error": "content is required for 'edit'. Provide the full updated SKILL.md text."}, ensure_ascii=False)
+            return tool_error("content is required for 'edit'. Provide the full updated SKILL.md text.", success=False)
         result = _edit_skill(name, content)
 
     elif action == "patch":
         if not old_string:
-            return json.dumps({"success": False, "error": "old_string is required for 'patch'. Provide the text to find."}, ensure_ascii=False)
+            return tool_error("old_string is required for 'patch'. Provide the text to find.", success=False)
         if new_string is None:
-            return json.dumps({"success": False, "error": "new_string is required for 'patch'. Use empty string to delete matched text."}, ensure_ascii=False)
+            return tool_error("new_string is required for 'patch'. Use empty string to delete matched text.", success=False)
         result = _patch_skill(name, old_string, new_string, file_path, replace_all)
 
     elif action == "delete":
@@ -604,14 +604,14 @@ def skill_manage(
 
     elif action == "write_file":
         if not file_path:
-            return json.dumps({"success": False, "error": "file_path is required for 'write_file'. Example: 'references/api-guide.md'"}, ensure_ascii=False)
+            return tool_error("file_path is required for 'write_file'. Example: 'references/api-guide.md'", success=False)
         if file_content is None:
-            return json.dumps({"success": False, "error": "file_content is required for 'write_file'."}, ensure_ascii=False)
+            return tool_error("file_content is required for 'write_file'.", success=False)
         result = _write_file(name, file_path, file_content)
 
     elif action == "remove_file":
         if not file_path:
-            return json.dumps({"success": False, "error": "file_path is required for 'remove_file'."}, ensure_ascii=False)
+            return tool_error("file_path is required for 'remove_file'.", success=False)
         result = _remove_file(name, file_path)
 
     else:
@@ -722,7 +722,7 @@ SKILL_MANAGE_SCHEMA = {
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="skill_manage",
diff --git a/tools/skills_tool.py b/tools/skills_tool.py
index c6b6cac3..1c7182e8 100644
--- a/tools/skills_tool.py
+++ b/tools/skills_tool.py
@@ -76,7 +76,7 @@ from enum import Enum
 from pathlib import Path
 from typing import Dict, Any, List, Optional, Set, Tuple
 
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 logger = logging.getLogger(__name__)
 
@@ -713,7 +713,7 @@ def skills_categories(verbose: bool = False, task_id: str = None) -> str:
         )
 
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e), success=False)
 
 
 def skills_list(category: str = None, task_id: str = None) -> str:
@@ -781,7 +781,7 @@ def skills_list(category: str = None, task_id: str = None) -> str:
         )
 
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e), success=False)
 
 
 def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
@@ -1255,7 +1255,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
         return json.dumps(result, ensure_ascii=False)
 
     except Exception as e:
-        return json.dumps({"success": False, "error": str(e)}, ensure_ascii=False)
+        return tool_error(str(e), success=False)
 
 
 # Tool description for model_tools.py
diff --git a/tools/todo_tool.py b/tools/todo_tool.py
index d5dc33b5..9021fbc2 100644
--- a/tools/todo_tool.py
+++ b/tools/todo_tool.py
@@ -161,7 +161,7 @@ def todo_tool(
         JSON string with the full current list and summary metadata.
     """
     if store is None:
-        return json.dumps({"error": "TodoStore not initialized"}, ensure_ascii=False)
+        return tool_error("TodoStore not initialized")
 
     if todos is not None:
         items = store.write(todos, merge)
@@ -255,7 +255,7 @@ TODO_SCHEMA = {
 
 
 # --- Registry ---
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 registry.register(
     name="todo",
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 4cb6e64e..85fa4974 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -466,7 +466,7 @@ def text_to_speech_tool(
         str: JSON result with success, file_path, and optionally MEDIA tag.
     """
     if not text or not text.strip():
-        return json.dumps({"success": False, "error": "Text is required"}, ensure_ascii=False)
+        return tool_error("Text is required", success=False)
 
     # Truncate very long text with a warning
     if len(text) > MAX_TEXT_LENGTH:
@@ -607,17 +607,17 @@ def text_to_speech_tool(
         # Configuration errors (missing API keys, etc.)
         error_msg = f"TTS configuration error ({provider}): {e}"
         logger.error("%s", error_msg)
-        return json.dumps({"success": False, "error": error_msg}, ensure_ascii=False)
+        return tool_error(error_msg, success=False)
     except FileNotFoundError as e:
         # Missing dependencies or files
         error_msg = f"TTS dependency missing ({provider}): {e}"
         logger.error("%s", error_msg, exc_info=True)
-        return json.dumps({"success": False, "error": error_msg}, ensure_ascii=False)
+        return tool_error(error_msg, success=False)
     except Exception as e:
         # Unexpected errors
         error_msg = f"TTS generation failed ({provider}): {e}"
         logger.error("%s", error_msg, exc_info=True)
-        return json.dumps({"success": False, "error": error_msg}, ensure_ascii=False)
+        return tool_error(error_msg, success=False)
 
 
 # ===========================================================================
@@ -950,7 +950,7 @@ if __name__ == "__main__":
 # ---------------------------------------------------------------------------
 # Registry
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 TTS_SCHEMA = {
     "name": "text_to_speech",
diff --git a/tools/vision_tools.py b/tools/vision_tools.py
index 8c9d6a9b..2223032c 100644
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -320,7 +320,7 @@ async def vision_analyze_tool(
     try:
         from tools.interrupt import is_interrupted
         if is_interrupted():
-            return json.dumps({"success": False, "error": "Interrupted"})
+            return tool_error("Interrupted", success=False)
 
         logger.info("Analyzing image: %s", image_url[:60])
         logger.info("User prompt: %s", user_prompt[:100])
@@ -570,7 +570,7 @@ if __name__ == "__main__":
 # ---------------------------------------------------------------------------
 # Registry
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 VISION_ANALYZE_SCHEMA = {
     "name": "vision_analyze",
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 8571c2a2..803a09c0 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -1079,7 +1079,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
     try:
         from tools.interrupt import is_interrupted
         if is_interrupted():
-            return json.dumps({"error": "Interrupted", "success": False})
+            return tool_error("Interrupted", success=False)
 
         # Dispatch to the configured backend
         backend = _get_backend()
@@ -1158,7 +1158,7 @@ def web_search_tool(query: str, limit: int = 5) -> str:
         _debug.log_call("web_search_tool", debug_call_data)
         _debug.save()
 
-        return json.dumps({"error": error_msg}, ensure_ascii=False)
+        return tool_error(error_msg)
 
 
 async def web_extract_tool(
@@ -1458,7 +1458,7 @@ async def web_extract_tool(
         trimmed_response = {"results": trimmed_results}
 
         if trimmed_response.get("results") == []:
-            result_json = json.dumps({"error": "Content was inaccessible or not found"}, ensure_ascii=False)
+            result_json = tool_error("Content was inaccessible or not found")
 
             cleaned_result = clean_base64_images(result_json)
         
@@ -1484,7 +1484,7 @@ async def web_extract_tool(
         _debug.log_call("web_extract_tool", debug_call_data)
         _debug.save()
         
-        return json.dumps({"error": error_msg}, ensure_ascii=False)
+        return tool_error(error_msg)
 
 
 async def web_crawl_tool(
@@ -1560,7 +1560,7 @@ async def web_crawl_tool(
 
             from tools.interrupt import is_interrupted as _is_int
             if _is_int():
-                return json.dumps({"error": "Interrupted", "success": False})
+                return tool_error("Interrupted", success=False)
 
             logger.info("Tavily crawl: %s", url)
             payload: Dict[str, Any] = {
@@ -1671,7 +1671,7 @@ async def web_crawl_tool(
         
         from tools.interrupt import is_interrupted as _is_int
         if _is_int():
-            return json.dumps({"error": "Interrupted", "success": False})
+            return tool_error("Interrupted", success=False)
 
         try:
             crawl_result = _get_firecrawl_client().crawl(
@@ -1897,7 +1897,7 @@ async def web_crawl_tool(
         _debug.log_call("web_crawl_tool", debug_call_data)
         _debug.save()
         
-        return json.dumps({"error": error_msg}, ensure_ascii=False)
+        return tool_error(error_msg)
 
 
 # Convenience function to check Firecrawl credentials
@@ -2043,7 +2043,7 @@ if __name__ == "__main__":
 # ---------------------------------------------------------------------------
 # Registry
 # ---------------------------------------------------------------------------
-from tools.registry import registry
+from tools.registry import registry, tool_error
 
 WEB_SEARCH_SCHEMA = {
     "name": "web_search",

From 7b18eeee9b8e8ea27a3df92346c71175f54681db Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 14:03:46 -0700
Subject: [PATCH 088/154] feat(supermemory): add multi-container, search_mode,
 identity template, and env var override (#5933)

Based on PR #5413 spec by MaheshtheDev (Mahesh Sanikommu).

Changes:
- Add search_mode config (hybrid/memories/documents) passed to SDK
- Add {identity} template support in container_tag for profile-scoped containers
- Add SUPERMEMORY_CONTAINER_TAG env var override (priority over config)
- Add multi-container mode: enable_custom_container_tags, custom_containers,
  custom_container_instructions in supermemory.json
- Dynamic tool schemas when multi-container enabled (optional container_tag param)
- Whitelist validation for custom container tags in tool calls
- Simplify get_config_schema() to only prompt for API key during setup
- Defer container_tag sanitization to initialize() (after template resolution)
- Add custom_id support to documents.add calls
- Update README with multi-container docs, search_mode, identity template,
  support links (Discord, email)
- Update memory-providers.md with new features and multi-container example
- Update memory-provider-plugin.md with minimal vs full schema guidance
- Add 12 new tests covering identity template, search_mode, multi-container,
  config schema, and env var override
---
 plugins/memory/supermemory/README.md          |  49 ++++-
 plugins/memory/supermemory/__init__.py        | 197 ++++++++++++++----
 .../memory/test_supermemory_provider.py       | 170 ++++++++++++++-
 .../developer-guide/memory-provider-plugin.md |   4 +
 .../user-guide/features/memory-providers.md   |  27 ++-
 5 files changed, 395 insertions(+), 52 deletions(-)

diff --git a/plugins/memory/supermemory/README.md b/plugins/memory/supermemory/README.md
index 7c1310fe..c1f41c41 100644
--- a/plugins/memory/supermemory/README.md
+++ b/plugins/memory/supermemory/README.md
@@ -17,7 +17,7 @@ Or manually:
 
 ```bash
 hermes config set memory.provider supermemory
-echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
+echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
 ```
 
 ## Config
@@ -26,15 +26,23 @@ Config file: `$HERMES_HOME/supermemory.json`
 
 | Key | Default | Description |
 |-----|---------|-------------|
-| `container_tag` | `hermes` | Container tag used for search and writes |
+| `container_tag` | `hermes` | Container tag used for search and writes. Supports `{identity}` template for profile-scoped tags (e.g. `hermes-{identity}` → `hermes-coder`). |
 | `auto_recall` | `true` | Inject relevant memory context before turns |
 | `auto_capture` | `true` | Store cleaned user-assistant turns after each response |
 | `max_recall_results` | `10` | Max recalled items to format into context |
 | `profile_frequency` | `50` | Include profile facts on first turn and every N turns |
 | `capture_mode` | `all` | Skip tiny or trivial turns by default |
+| `search_mode` | `hybrid` | Search mode: `hybrid` (profile + memories), `memories` (memories only), `documents` (documents only) |
 | `entity_context` | built-in default | Extraction guidance passed to Supermemory |
 | `api_timeout` | `5.0` | Timeout for SDK and ingest requests |
 
+### Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `SUPERMEMORY_API_KEY` | API key (required) |
+| `SUPERMEMORY_CONTAINER_TAG` | Override container tag (takes priority over config file) |
+
 ## Tools
 
 | Tool | Description |
@@ -52,3 +60,40 @@ When enabled, Hermes can:
 - store cleaned conversation turns after each completed response
 - ingest the full session on session end for richer graph updates
 - expose explicit tools for search, store, forget, and profile access
+
+## Profile-Scoped Containers
+
+Use `{identity}` in the `container_tag` to scope memories per Hermes profile:
+
+```json
+{
+  "container_tag": "hermes-{identity}"
+}
+```
+
+For a profile named `coder`, this resolves to `hermes-coder`. The default profile resolves to `hermes-default`. Without `{identity}`, all profiles share the same container.
+
+## Multi-Container Mode
+
+For advanced setups (e.g. OpenClaw-style multi-workspace), you can enable custom container tags so the agent can read/write across multiple named containers:
+
+```json
+{
+  "container_tag": "hermes",
+  "enable_custom_container_tags": true,
+  "custom_containers": ["project-alpha", "project-beta", "shared-knowledge"],
+  "custom_container_instructions": "Use project-alpha for coding tasks, project-beta for research, and shared-knowledge for team-wide facts."
+}
+```
+
+When enabled:
+- `supermemory_search`, `supermemory_store`, `supermemory_forget`, and `supermemory_profile` accept an optional `container_tag` parameter
+- The tag must be in the whitelist: primary container + `custom_containers`
+- Automatic operations (turn sync, prefetch, memory write mirroring, session ingest) always use the **primary** container only
+- Custom container instructions are injected into the system prompt
+
+## Support
+
+- [Supermemory Discord](https://supermemory.link/discord)
+- [support@supermemory.com](mailto:support@supermemory.com)
+- [supermemory.ai](https://supermemory.ai)
diff --git a/plugins/memory/supermemory/__init__.py b/plugins/memory/supermemory/__init__.py
index 08ac3575..f0cbfd60 100644
--- a/plugins/memory/supermemory/__init__.py
+++ b/plugins/memory/supermemory/__init__.py
@@ -26,6 +26,8 @@ _DEFAULT_CONTAINER_TAG = "hermes"
 _DEFAULT_MAX_RECALL_RESULTS = 10
 _DEFAULT_PROFILE_FREQUENCY = 50
 _DEFAULT_CAPTURE_MODE = "all"
+_DEFAULT_SEARCH_MODE = "hybrid"
+_VALID_SEARCH_MODES = ("hybrid", "memories", "documents")
 _DEFAULT_API_TIMEOUT = 5.0
 _MIN_CAPTURE_LENGTH = 10
 _MAX_ENTITY_CONTEXT_LENGTH = 1500
@@ -59,8 +61,12 @@ def _default_config() -> dict:
         "max_recall_results": _DEFAULT_MAX_RECALL_RESULTS,
         "profile_frequency": _DEFAULT_PROFILE_FREQUENCY,
         "capture_mode": _DEFAULT_CAPTURE_MODE,
+        "search_mode": _DEFAULT_SEARCH_MODE,
         "entity_context": _DEFAULT_ENTITY_CONTEXT,
         "api_timeout": _DEFAULT_API_TIMEOUT,
+        "enable_custom_container_tags": False,
+        "custom_containers": [],
+        "custom_container_instructions": "",
     }
 
 
@@ -100,7 +106,10 @@ def _load_supermemory_config(hermes_home: str) -> dict:
         except Exception:
             logger.debug("Failed to parse %s", config_path, exc_info=True)
 
-    config["container_tag"] = _sanitize_tag(str(config.get("container_tag", _DEFAULT_CONTAINER_TAG)))
+    # Keep raw container_tag — template variables like {identity} are resolved
+    # in initialize(), and _sanitize_tag runs AFTER resolution.
+    raw_tag = str(config.get("container_tag", _DEFAULT_CONTAINER_TAG)).strip()
+    config["container_tag"] = raw_tag if raw_tag else _DEFAULT_CONTAINER_TAG
     config["auto_recall"] = _as_bool(config.get("auto_recall"), True)
     config["auto_capture"] = _as_bool(config.get("auto_capture"), True)
     try:
@@ -112,11 +121,23 @@ def _load_supermemory_config(hermes_home: str) -> dict:
     except Exception:
         config["profile_frequency"] = _DEFAULT_PROFILE_FREQUENCY
     config["capture_mode"] = "everything" if config.get("capture_mode") == "everything" else "all"
+    raw_search_mode = str(config.get("search_mode", _DEFAULT_SEARCH_MODE)).strip().lower()
+    config["search_mode"] = raw_search_mode if raw_search_mode in _VALID_SEARCH_MODES else _DEFAULT_SEARCH_MODE
     config["entity_context"] = _clamp_entity_context(str(config.get("entity_context", _DEFAULT_ENTITY_CONTEXT)))
     try:
         config["api_timeout"] = max(0.5, min(15.0, float(config.get("api_timeout", _DEFAULT_API_TIMEOUT))))
     except Exception:
         config["api_timeout"] = _DEFAULT_API_TIMEOUT
+
+    # Multi-container support
+    config["enable_custom_container_tags"] = _as_bool(config.get("enable_custom_container_tags"), False)
+    raw_containers = config.get("custom_containers", [])
+    if isinstance(raw_containers, list):
+        config["custom_containers"] = [_sanitize_tag(str(t)) for t in raw_containers if t]
+    else:
+        config["custom_containers"] = []
+    config["custom_container_instructions"] = str(config.get("custom_container_instructions", "")).strip()
+
     return config
 
 
@@ -240,28 +261,41 @@ def _is_trivial_message(text: str) -> bool:
 
 
 class _SupermemoryClient:
-    def __init__(self, api_key: str, timeout: float, container_tag: str):
+    def __init__(self, api_key: str, timeout: float, container_tag: str, search_mode: str = "hybrid"):
         from supermemory import Supermemory
 
         self._api_key = api_key
         self._container_tag = container_tag
+        self._search_mode = search_mode if search_mode in _VALID_SEARCH_MODES else _DEFAULT_SEARCH_MODE
         self._timeout = timeout
         self._client = Supermemory(api_key=api_key, timeout=timeout, max_retries=0)
 
-    def add_memory(self, content: str, metadata: Optional[dict] = None, *, entity_context: str = "") -> dict:
-        kwargs = {
+    def add_memory(self, content: str, metadata: Optional[dict] = None, *,
+                   entity_context: str = "", container_tag: Optional[str] = None,
+                   custom_id: Optional[str] = None) -> dict:
+        tag = container_tag or self._container_tag
+        kwargs: dict[str, Any] = {
             "content": content.strip(),
-            "container_tags": [self._container_tag],
+            "container_tags": [tag],
         }
         if metadata:
             kwargs["metadata"] = metadata
         if entity_context:
             kwargs["entity_context"] = _clamp_entity_context(entity_context)
+        if custom_id:
+            kwargs["custom_id"] = custom_id
         result = self._client.documents.add(**kwargs)
         return {"id": getattr(result, "id", "")}
 
-    def search_memories(self, query: str, *, limit: int = 5) -> list[dict]:
-        response = self._client.search.memories(q=query, container_tag=self._container_tag, limit=limit)
+    def search_memories(self, query: str, *, limit: int = 5,
+                        container_tag: Optional[str] = None,
+                        search_mode: Optional[str] = None) -> list[dict]:
+        tag = container_tag or self._container_tag
+        mode = search_mode or self._search_mode
+        kwargs: dict[str, Any] = {"q": query, "container_tag": tag, "limit": limit}
+        if mode in _VALID_SEARCH_MODES:
+            kwargs["search_mode"] = mode
+        response = self._client.search.memories(**kwargs)
         results = []
         for item in (getattr(response, "results", None) or []):
             results.append({
@@ -273,8 +307,10 @@ class _SupermemoryClient:
             })
         return results
 
-    def get_profile(self, query: Optional[str] = None) -> dict:
-        kwargs = {"container_tag": self._container_tag}
+    def get_profile(self, query: Optional[str] = None, *,
+                    container_tag: Optional[str] = None) -> dict:
+        tag = container_tag or self._container_tag
+        kwargs: dict[str, Any] = {"container_tag": tag}
         if query:
             kwargs["q"] = query
         response = self._client.profile(**kwargs)
@@ -296,18 +332,19 @@ class _SupermemoryClient:
                     })
         return {"static": static, "dynamic": dynamic, "search_results": search_results}
 
-    def forget_memory(self, memory_id: str) -> None:
-        self._client.memories.forget(container_tag=self._container_tag, id=memory_id)
+    def forget_memory(self, memory_id: str, *, container_tag: Optional[str] = None) -> None:
+        tag = container_tag or self._container_tag
+        self._client.memories.forget(container_tag=tag, id=memory_id)
 
-    def forget_by_query(self, query: str) -> dict:
-        results = self.search_memories(query, limit=5)
+    def forget_by_query(self, query: str, *, container_tag: Optional[str] = None) -> dict:
+        results = self.search_memories(query, limit=5, container_tag=container_tag)
         if not results:
             return {"success": False, "message": "No matching memory found to forget."}
         target = results[0]
         memory_id = target.get("id", "")
         if not memory_id:
             return {"success": False, "message": "Best matching memory has no id."}
-        self.forget_memory(memory_id)
+        self.forget_memory(memory_id, container_tag=container_tag)
         preview = (target.get("memory") or "")[:100]
         return {"success": True, "message": f'Forgot: "{preview}"', "id": memory_id}
 
@@ -398,11 +435,17 @@ class SupermemoryMemoryProvider(MemoryProvider):
         self._max_recall_results = _DEFAULT_MAX_RECALL_RESULTS
         self._profile_frequency = _DEFAULT_PROFILE_FREQUENCY
         self._capture_mode = _DEFAULT_CAPTURE_MODE
+        self._search_mode = _DEFAULT_SEARCH_MODE
         self._entity_context = _DEFAULT_ENTITY_CONTEXT
         self._api_timeout = _DEFAULT_API_TIMEOUT
         self._hermes_home = ""
         self._write_enabled = True
         self._active = False
+        # Multi-container support
+        self._enable_custom_containers = False
+        self._custom_containers: List[str] = []
+        self._custom_container_instructions = ""
+        self._allowed_containers: List[str] = []
 
     @property
     def name(self) -> str:
@@ -419,16 +462,11 @@ class SupermemoryMemoryProvider(MemoryProvider):
             return False
 
     def get_config_schema(self):
+        # Only prompt for the API key during `hermes memory setup`.
+        # All other options are documented for $HERMES_HOME/supermemory.json
+        # or the SUPERMEMORY_CONTAINER_TAG env var.
         return [
             {"key": "api_key", "description": "Supermemory API key", "secret": True, "required": True, "env_var": "SUPERMEMORY_API_KEY", "url": "https://supermemory.ai"},
-            {"key": "container_tag", "description": "Container tag for reads and writes", "default": _DEFAULT_CONTAINER_TAG},
-            {"key": "auto_recall", "description": "Enable automatic recall before each turn", "default": "true", "choices": ["true", "false"]},
-            {"key": "auto_capture", "description": "Enable automatic capture after each completed turn", "default": "true", "choices": ["true", "false"]},
-            {"key": "max_recall_results", "description": "Maximum recalled items to inject", "default": str(_DEFAULT_MAX_RECALL_RESULTS)},
-            {"key": "profile_frequency", "description": "Include profile facts on first turn and every N turns", "default": str(_DEFAULT_PROFILE_FREQUENCY)},
-            {"key": "capture_mode", "description": "Capture mode", "default": _DEFAULT_CAPTURE_MODE, "choices": ["all", "everything"]},
-            {"key": "entity_context", "description": "Extraction guidance passed to Supermemory", "default": _DEFAULT_ENTITY_CONTEXT},
-            {"key": "api_timeout", "description": "Timeout in seconds for SDK and ingest calls", "default": str(_DEFAULT_API_TIMEOUT)},
         ]
 
     def save_config(self, values, hermes_home):
@@ -446,14 +484,29 @@ class SupermemoryMemoryProvider(MemoryProvider):
         self._turn_count = 0
         self._config = _load_supermemory_config(self._hermes_home)
         self._api_key = os.environ.get("SUPERMEMORY_API_KEY", "")
-        self._container_tag = self._config["container_tag"]
+
+        # Resolve container tag: env var > config > default.
+        # Supports {identity} template for profile-scoped containers.
+        env_tag = os.environ.get("SUPERMEMORY_CONTAINER_TAG", "").strip()
+        raw_tag = env_tag or self._config["container_tag"]
+        identity = kwargs.get("agent_identity", "default")
+        self._container_tag = _sanitize_tag(raw_tag.replace("{identity}", identity))
+
         self._auto_recall = self._config["auto_recall"]
         self._auto_capture = self._config["auto_capture"]
         self._max_recall_results = self._config["max_recall_results"]
         self._profile_frequency = self._config["profile_frequency"]
         self._capture_mode = self._config["capture_mode"]
+        self._search_mode = self._config["search_mode"]
         self._entity_context = self._config["entity_context"]
         self._api_timeout = self._config["api_timeout"]
+
+        # Multi-container setup
+        self._enable_custom_containers = self._config["enable_custom_container_tags"]
+        self._custom_containers = self._config["custom_containers"]
+        self._custom_container_instructions = self._config["custom_container_instructions"]
+        self._allowed_containers = [self._container_tag] + list(self._custom_containers)
+
         agent_context = kwargs.get("agent_context", "")
         self._write_enabled = agent_context not in ("cron", "flush", "subagent")
         self._active = bool(self._api_key)
@@ -464,6 +517,7 @@ class SupermemoryMemoryProvider(MemoryProvider):
                     api_key=self._api_key,
                     timeout=self._api_timeout,
                     container_tag=self._container_tag,
+                    search_mode=self._search_mode,
                 )
             except Exception:
                 logger.warning("Supermemory initialization failed", exc_info=True)
@@ -476,11 +530,18 @@ class SupermemoryMemoryProvider(MemoryProvider):
     def system_prompt_block(self) -> str:
         if not self._active:
             return ""
-        return (
-            "# Supermemory\n"
-            f"Active. Container: {self._container_tag}.\n"
-            "Use supermemory_search, supermemory_store, supermemory_forget, and supermemory_profile for explicit memory operations."
-        )
+        lines = [
+            "# Supermemory",
+            f"Active. Container: {self._container_tag}.",
+            "Use supermemory_search, supermemory_store, supermemory_forget, and supermemory_profile for explicit memory operations.",
+        ]
+        if self._enable_custom_containers and self._custom_containers:
+            tags_str = ", ".join(self._allowed_containers)
+            lines.append(f"\nMulti-container mode enabled. Available containers: {tags_str}.")
+            lines.append("Pass an optional container_tag to supermemory_search, supermemory_store, supermemory_forget, and supermemory_profile to target a specific container.")
+            if self._custom_container_instructions:
+                lines.append(f"\n{self._custom_container_instructions}")
+        return "\n".join(lines)
 
     def prefetch(self, query: str, *, session_id: str = "") -> str:
         if not self._active or not self._auto_recall or not self._client or not query.strip():
@@ -582,22 +643,62 @@ class SupermemoryMemoryProvider(MemoryProvider):
                 thread.join(timeout=5.0)
             setattr(self, attr_name, None)
 
+    def _resolve_tool_container_tag(self, args: dict) -> Optional[str]:
+        """Validate and resolve container_tag from tool call args.
+
+        Returns None (use primary) if multi-container is disabled or no tag provided.
+        Returns the validated tag if it's in the allowed list.
+        Raises ValueError if the tag is not whitelisted.
+        """
+        if not self._enable_custom_containers:
+            return None
+        tag = str(args.get("container_tag") or "").strip()
+        if not tag:
+            return None
+        sanitized = _sanitize_tag(tag)
+        if sanitized not in self._allowed_containers:
+            raise ValueError(
+                f"Container tag '{sanitized}' is not allowed. "
+                f"Allowed: {', '.join(self._allowed_containers)}"
+            )
+        return sanitized
+
     def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [STORE_SCHEMA, SEARCH_SCHEMA, FORGET_SCHEMA, PROFILE_SCHEMA]
+        if not self._enable_custom_containers:
+            return [STORE_SCHEMA, SEARCH_SCHEMA, FORGET_SCHEMA, PROFILE_SCHEMA]
+
+        # When multi-container is enabled, add optional container_tag to relevant tools
+        container_param = {
+            "type": "string",
+            "description": f"Optional container tag. Allowed: {', '.join(self._allowed_containers)}. Defaults to primary ({self._container_tag}).",
+        }
+        schemas = []
+        for base in [STORE_SCHEMA, SEARCH_SCHEMA, FORGET_SCHEMA, PROFILE_SCHEMA]:
+            schema = json.loads(json.dumps(base))  # deep copy
+            schema["parameters"]["properties"]["container_tag"] = container_param
+            schemas.append(schema)
+        return schemas
 
     def _tool_store(self, args: dict) -> str:
         content = str(args.get("content") or "").strip()
         if not content:
             return tool_error("content is required")
+        try:
+            tag = self._resolve_tool_container_tag(args)
+        except ValueError as exc:
+            return tool_error(str(exc))
         metadata = args.get("metadata") or {}
         if not isinstance(metadata, dict):
             metadata = {}
         metadata.setdefault("type", _detect_category(content))
         metadata["source"] = "hermes_tool"
         try:
-            result = self._client.add_memory(content, metadata=metadata, entity_context=self._entity_context)
+            result = self._client.add_memory(content, metadata=metadata, entity_context=self._entity_context, container_tag=tag)
             preview = content[:80] + ("..." if len(content) > 80 else "")
-            return json.dumps({"saved": True, "id": result.get("id", ""), "preview": preview})
+            resp: dict[str, Any] = {"saved": True, "id": result.get("id", ""), "preview": preview}
+            if tag:
+                resp["container_tag"] = tag
+            return json.dumps(resp)
         except Exception as exc:
             return tool_error(f"Failed to store memory: {exc}")
 
@@ -605,22 +706,29 @@ class SupermemoryMemoryProvider(MemoryProvider):
         query = str(args.get("query") or "").strip()
         if not query:
             return tool_error("query is required")
+        try:
+            tag = self._resolve_tool_container_tag(args)
+        except ValueError as exc:
+            return tool_error(str(exc))
         try:
             limit = max(1, min(20, int(args.get("limit", 5) or 5)))
         except Exception:
             limit = 5
         try:
-            results = self._client.search_memories(query, limit=limit)
+            results = self._client.search_memories(query, limit=limit, container_tag=tag)
             formatted = []
             for item in results:
-                entry = {"id": item.get("id", ""), "content": item.get("memory", "")}
+                entry: dict[str, Any] = {"id": item.get("id", ""), "content": item.get("memory", "")}
                 if item.get("similarity") is not None:
                     try:
                         entry["similarity"] = round(float(item["similarity"]) * 100)
                     except Exception:
                         pass
                 formatted.append(entry)
-            return json.dumps({"results": formatted, "count": len(formatted)})
+            resp: dict[str, Any] = {"results": formatted, "count": len(formatted)}
+            if tag:
+                resp["container_tag"] = tag
+            return json.dumps(resp)
         except Exception as exc:
             return tool_error(f"Search failed: {exc}")
 
@@ -629,28 +737,39 @@ class SupermemoryMemoryProvider(MemoryProvider):
         query = str(args.get("query") or "").strip()
         if not memory_id and not query:
             return tool_error("Provide either id or query")
+        try:
+            tag = self._resolve_tool_container_tag(args)
+        except ValueError as exc:
+            return tool_error(str(exc))
         try:
             if memory_id:
-                self._client.forget_memory(memory_id)
+                self._client.forget_memory(memory_id, container_tag=tag)
                 return json.dumps({"forgotten": True, "id": memory_id})
-            return json.dumps(self._client.forget_by_query(query))
+            return json.dumps(self._client.forget_by_query(query, container_tag=tag))
         except Exception as exc:
             return tool_error(f"Forget failed: {exc}")
 
     def _tool_profile(self, args: dict) -> str:
         query = str(args.get("query") or "").strip() or None
         try:
-            profile = self._client.get_profile(query=query)
+            tag = self._resolve_tool_container_tag(args)
+        except ValueError as exc:
+            return tool_error(str(exc))
+        try:
+            profile = self._client.get_profile(query=query, container_tag=tag)
             sections = []
             if profile["static"]:
                 sections.append("## User Profile (Persistent)\n" + "\n".join(f"- {item}" for item in profile["static"]))
             if profile["dynamic"]:
                 sections.append("## Recent Context\n" + "\n".join(f"- {item}" for item in profile["dynamic"]))
-            return json.dumps({
+            resp: dict[str, Any] = {
                 "profile": "\n\n".join(sections),
                 "static_count": len(profile["static"]),
                 "dynamic_count": len(profile["dynamic"]),
-            })
+            }
+            if tag:
+                resp["container_tag"] = tag
+            return json.dumps(resp)
         except Exception as exc:
             return tool_error(f"Profile failed: {exc}")
 
diff --git a/tests/plugins/memory/test_supermemory_provider.py b/tests/plugins/memory/test_supermemory_provider.py
index 689793f1..0aee4597 100644
--- a/tests/plugins/memory/test_supermemory_provider.py
+++ b/tests/plugins/memory/test_supermemory_provider.py
@@ -13,10 +13,11 @@ from plugins.memory.supermemory import (
 
 
 class FakeClient:
-    def __init__(self, api_key: str, timeout: float, container_tag: str):
+    def __init__(self, api_key: str, timeout: float, container_tag: str, search_mode: str = "hybrid"):
         self.api_key = api_key
         self.timeout = timeout
         self.container_tag = container_tag
+        self.search_mode = search_mode
         self.add_calls = []
         self.search_results = []
         self.profile_response = {"static": [], "dynamic": [], "search_results": []}
@@ -24,24 +25,27 @@ class FakeClient:
         self.forgotten_ids = []
         self.forget_by_query_response = {"success": True, "message": "Forgot"}
 
-    def add_memory(self, content, metadata=None, *, entity_context=""):
+    def add_memory(self, content, metadata=None, *, entity_context="",
+                   container_tag=None, custom_id=None):
         self.add_calls.append({
             "content": content,
             "metadata": metadata,
             "entity_context": entity_context,
+            "container_tag": container_tag,
+            "custom_id": custom_id,
         })
         return {"id": "mem_123"}
 
-    def search_memories(self, query, *, limit=5):
+    def search_memories(self, query, *, limit=5, container_tag=None, search_mode=None):
         return self.search_results
 
-    def get_profile(self, query=None):
+    def get_profile(self, query=None, *, container_tag=None):
         return self.profile_response
 
-    def forget_memory(self, memory_id):
+    def forget_memory(self, memory_id, *, container_tag=None):
         self.forgotten_ids.append(memory_id)
 
-    def forget_by_query(self, query):
+    def forget_by_query(self, query, *, container_tag=None):
         return self.forget_by_query_response
 
     def ingest_conversation(self, session_id, messages):
@@ -82,7 +86,8 @@ def test_is_available_false_when_import_missing(monkeypatch):
 def test_load_and_save_config_round_trip(tmp_path):
     _save_supermemory_config({"container_tag": "demo-tag", "auto_capture": False}, str(tmp_path))
     cfg = _load_supermemory_config(str(tmp_path))
-    assert cfg["container_tag"] == "demo_tag"
+    # container_tag is kept raw — sanitization happens in initialize() after template resolution
+    assert cfg["container_tag"] == "demo-tag"
     assert cfg["auto_capture"] is False
     assert cfg["auto_recall"] is True
 
@@ -176,7 +181,8 @@ def test_shutdown_joins_and_clears_threads(provider, monkeypatch):
     started = threading.Event()
     release = threading.Event()
 
-    def slow_add_memory(content, metadata=None, *, entity_context=""):
+    def slow_add_memory(content, metadata=None, *, entity_context="",
+                        container_tag=None, custom_id=None):
         started.set()
         release.wait(timeout=1)
         provider._client.add_calls.append({
@@ -255,3 +261,151 @@ def test_handle_tool_call_returns_error_when_unconfigured(monkeypatch):
     p = SupermemoryMemoryProvider()
     result = json.loads(p.handle_tool_call("supermemory_search", {"query": "x"}))
     assert "error" in result
+
+
+# -- Identity template tests --------------------------------------------------
+
+
+def test_identity_template_resolved_in_container_tag(monkeypatch, tmp_path):
+    """container_tag with {identity} resolves to profile-scoped tag."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({"container_tag": "hermes-{identity}"}, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli", agent_identity="coder")
+    assert p._container_tag == "hermes_coder"
+
+
+def test_identity_template_default_profile(monkeypatch, tmp_path):
+    """Without agent_identity kwarg, {identity} resolves to 'default'."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({"container_tag": "hermes-{identity}"}, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    assert p._container_tag == "hermes_default"
+
+
+def test_container_tag_env_var_override(monkeypatch, tmp_path):
+    """SUPERMEMORY_CONTAINER_TAG env var overrides config."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setenv("SUPERMEMORY_CONTAINER_TAG", "env-override")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    assert p._container_tag == "env_override"
+
+
+# -- Search mode tests --------------------------------------------------------
+
+
+def test_search_mode_config_passed_to_client(monkeypatch, tmp_path):
+    """search_mode from config is passed to _SupermemoryClient."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({"search_mode": "memories"}, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    assert p._search_mode == "memories"
+    assert p._client.search_mode == "memories"
+
+
+def test_invalid_search_mode_falls_back_to_default(monkeypatch, tmp_path):
+    """Invalid search_mode falls back to 'hybrid'."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({"search_mode": "invalid_mode"}, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    assert p._search_mode == "hybrid"
+
+
+# -- Multi-container tests ----------------------------------------------------
+
+
+def test_multi_container_disabled_by_default(provider):
+    """Multi-container is off by default; schemas have no container_tag param."""
+    assert provider._enable_custom_containers is False
+    schemas = provider.get_tool_schemas()
+    for s in schemas:
+        assert "container_tag" not in s["parameters"]["properties"]
+
+
+def test_multi_container_enabled_adds_schema_param(monkeypatch, tmp_path):
+    """When enabled, tool schemas include container_tag parameter."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({
+        "enable_custom_container_tags": True,
+        "custom_containers": ["project-alpha", "shared"],
+    }, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    assert p._enable_custom_containers is True
+    assert p._allowed_containers == ["hermes", "project_alpha", "shared"]
+    schemas = p.get_tool_schemas()
+    for s in schemas:
+        assert "container_tag" in s["parameters"]["properties"]
+
+
+def test_multi_container_tool_store_with_custom_tag(monkeypatch, tmp_path):
+    """supermemory_store uses the resolved container_tag when multi-container is enabled."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({
+        "enable_custom_container_tags": True,
+        "custom_containers": ["project-alpha"],
+    }, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    result = json.loads(p.handle_tool_call("supermemory_store", {
+        "content": "test memory",
+        "container_tag": "project-alpha",
+    }))
+    assert result["saved"] is True
+    assert result["container_tag"] == "project_alpha"
+    assert p._client.add_calls[-1]["container_tag"] == "project_alpha"
+
+
+def test_multi_container_rejects_unlisted_tag(monkeypatch, tmp_path):
+    """Tool calls with a non-whitelisted container_tag return an error."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({
+        "enable_custom_container_tags": True,
+        "custom_containers": ["allowed-tag"],
+    }, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    result = json.loads(p.handle_tool_call("supermemory_store", {
+        "content": "test",
+        "container_tag": "forbidden-tag",
+    }))
+    assert "error" in result
+    assert "not allowed" in result["error"]
+
+
+def test_multi_container_system_prompt_includes_instructions(monkeypatch, tmp_path):
+    """system_prompt_block includes container list and instructions when multi-container is enabled."""
+    monkeypatch.setenv("SUPERMEMORY_API_KEY", "test-key")
+    monkeypatch.setattr("plugins.memory.supermemory._SupermemoryClient", FakeClient)
+    _save_supermemory_config({
+        "enable_custom_container_tags": True,
+        "custom_containers": ["docs"],
+        "custom_container_instructions": "Use docs for documentation context.",
+    }, str(tmp_path))
+    p = SupermemoryMemoryProvider()
+    p.initialize("s1", hermes_home=str(tmp_path), platform="cli")
+    block = p.system_prompt_block()
+    assert "Multi-container mode enabled" in block
+    assert "docs" in block
+    assert "Use docs for documentation context." in block
+
+
+def test_get_config_schema_minimal():
+    """get_config_schema only returns the API key field."""
+    p = SupermemoryMemoryProvider()
+    schema = p.get_config_schema()
+    assert len(schema) == 1
+    assert schema[0]["key"] == "api_key"
+    assert schema[0]["secret"] is True
diff --git a/website/docs/developer-guide/memory-provider-plugin.md b/website/docs/developer-guide/memory-provider-plugin.md
index 70ae2f61..b5c6a3a3 100644
--- a/website/docs/developer-guide/memory-provider-plugin.md
+++ b/website/docs/developer-guide/memory-provider-plugin.md
@@ -110,6 +110,10 @@ def get_config_schema(self):
 
 Fields with `secret: True` and `env_var` go to `.env`. Non-secret fields are passed to `save_config()`.
 
+:::tip Minimal vs Full Schema
+Every field in `get_config_schema()` is prompted during `hermes memory setup`. Providers with many options should keep the schema minimal — only include fields the user **must** configure (API key, required credentials). Document optional settings in a config file reference (e.g. `$HERMES_HOME/myprovider.json`) rather than prompting for them all during setup. This keeps the setup wizard fast while still supporting advanced configuration. See the Supermemory provider for an example — it only prompts for the API key; all other options live in `supermemory.json`.
+:::
+
 ## Save Config
 
 ```python
diff --git a/website/docs/user-guide/features/memory-providers.md b/website/docs/user-guide/features/memory-providers.md
index 3396c70e..ad0a17ae 100644
--- a/website/docs/user-guide/features/memory-providers.md
+++ b/website/docs/user-guide/features/memory-providers.md
@@ -400,26 +400,47 @@ Semantic long-term memory with profile recall, semantic search, explicit memory
 hermes memory setup    # select "supermemory"
 # Or manually:
 hermes config set memory.provider supermemory
-echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
+echo 'SUPERMEMORY_API_KEY=***' >> ~/.hermes/.env
 ```
 
 **Config:** `$HERMES_HOME/supermemory.json`
 
 | Key | Default | Description |
 |-----|---------|-------------|
-| `container_tag` | `hermes` | Container tag used for search and writes |
+| `container_tag` | `hermes` | Container tag used for search and writes. Supports `{identity}` template for profile-scoped tags. |
 | `auto_recall` | `true` | Inject relevant memory context before turns |
 | `auto_capture` | `true` | Store cleaned user-assistant turns after each response |
 | `max_recall_results` | `10` | Max recalled items to format into context |
 | `profile_frequency` | `50` | Include profile facts on first turn and every N turns |
 | `capture_mode` | `all` | Skip tiny or trivial turns by default |
+| `search_mode` | `hybrid` | Search mode: `hybrid`, `memories`, or `documents` |
 | `api_timeout` | `5.0` | Timeout for SDK and ingest requests |
 
+**Environment variables:** `SUPERMEMORY_API_KEY` (required), `SUPERMEMORY_CONTAINER_TAG` (overrides config).
+
 **Key features:**
 - Automatic context fencing — strips recalled memories from captured turns to prevent recursive memory pollution
 - Session-end conversation ingest for richer graph-level knowledge building
 - Profile facts injected on first turn and at configurable intervals
 - Trivial message filtering (skips "ok", "thanks", etc.)
+- **Profile-scoped containers** — use `{identity}` in `container_tag` (e.g. `hermes-{identity}` → `hermes-coder`) to isolate memories per Hermes profile
+- **Multi-container mode** — enable `enable_custom_container_tags` with a `custom_containers` list to let the agent read/write across named containers. Automatic operations (sync, prefetch) stay on the primary container.
+
+<details>
+<summary>Multi-container example</summary>
+
+```json
+{
+  "container_tag": "hermes",
+  "enable_custom_container_tags": true,
+  "custom_containers": ["project-alpha", "shared-knowledge"],
+  "custom_container_instructions": "Use project-alpha for coding context."
+}
+```
+
+</details>
+
+**Support:** [Discord](https://supermemory.link/discord) · [support@supermemory.com](mailto:support@supermemory.com)
 
 ---
 
@@ -434,7 +455,7 @@ echo 'SUPERMEMORY_API_KEY=your-key-here' >> ~/.hermes/.env
 | **Holographic** | Local | Free | 2 | None | HRR algebra + trust scoring |
 | **RetainDB** | Cloud | $20/mo | 5 | `requests` | Delta compression |
 | **ByteRover** | Local/Cloud | Free/Paid | 3 | `brv` CLI | Pre-compression extraction |
-| **Supermemory** | Cloud | Paid | 4 | `supermemory` | Context fencing + session graph ingest |
+| **Supermemory** | Cloud | Paid | 4 | `supermemory` | Context fencing + session graph ingest + multi-container |
 
 ## Profile Isolation
 

From 4a630c20718ed43195b53aaa2e92a0b1794e80f3 Mon Sep 17 00:00:00 2001
From: Dilee <uzmpsk.dilekakbas@gmail.com>
Date: Tue, 7 Apr 2026 14:01:27 -0700
Subject: [PATCH 089/154] fix(telegram): replace substring caption check with
 exact line-by-line match

Captions in photo bursts and media group albums were silently dropped when
a shorter caption happened to be a substring of an existing one (e.g.
"Meeting" lost inside "Meeting agenda"). Extract a shared _merge_caption
static helper that splits on "\n\n" and uses exact match with whitespace
normalisation, then use it in both _enqueue_photo_event and
_queue_media_group_event.

Adds 13 unit tests covering the fixed bug scenarios.

Cherry-picked from PR #2671 by Dilee.
---
 gateway/platforms/telegram.py                | 27 ++++---
 tests/gateway/test_telegram_caption_merge.py | 77 ++++++++++++++++++++
 2 files changed, 95 insertions(+), 9 deletions(-)
 create mode 100644 tests/gateway/test_telegram_caption_merge.py

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 26b0e426..3fa11498 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2213,6 +2213,22 @@ class TelegramAdapter(BasePlatformAdapter):
             if self._pending_photo_batch_tasks.get(batch_key) is current_task:
                 self._pending_photo_batch_tasks.pop(batch_key, None)
 
+    @staticmethod
+    def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
+        """Merge a new caption into existing text, avoiding duplicates.
+
+        Uses line-by-line exact match (not substring) to prevent false positives
+        where a shorter caption is silently dropped because it appears as a
+        substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
+        Whitespace is normalised for comparison.
+        """
+        if not existing_text:
+            return new_text
+        existing_captions = [c.strip() for c in existing_text.split("\n\n")]
+        if new_text.strip() not in existing_captions:
+            return f"{existing_text}\n\n{new_text}".strip()
+        return existing_text
+
     def _enqueue_photo_event(self, batch_key: str, event: MessageEvent) -> None:
         """Merge photo events into a pending batch and schedule flush."""
         existing = self._pending_photo_batches.get(batch_key)
@@ -2222,10 +2238,7 @@ class TelegramAdapter(BasePlatformAdapter):
             existing.media_urls.extend(event.media_urls)
             existing.media_types.extend(event.media_types)
             if event.text:
-                if not existing.text:
-                    existing.text = event.text
-                elif event.text not in existing.text:
-                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+                existing.text = self._merge_caption(existing.text, event.text)
 
         prior_task = self._pending_photo_batch_tasks.get(batch_key)
         if prior_task and not prior_task.done():
@@ -2415,11 +2428,7 @@ class TelegramAdapter(BasePlatformAdapter):
             existing.media_urls.extend(event.media_urls)
             existing.media_types.extend(event.media_types)
             if event.text:
-                if existing.text:
-                    if event.text not in existing.text.split("\n\n"):
-                        existing.text = f"{existing.text}\n\n{event.text}"
-                else:
-                    existing.text = event.text
+                existing.text = self._merge_caption(existing.text, event.text)
 
         prior_task = self._media_group_tasks.get(media_group_id)
         if prior_task:
diff --git a/tests/gateway/test_telegram_caption_merge.py b/tests/gateway/test_telegram_caption_merge.py
new file mode 100644
index 00000000..09cfd8c3
--- /dev/null
+++ b/tests/gateway/test_telegram_caption_merge.py
@@ -0,0 +1,77 @@
+"""Tests for TelegramPlatform._merge_caption caption deduplication logic."""
+
+import pytest
+
+from gateway.platforms.telegram import TelegramAdapter
+
+merge = TelegramAdapter._merge_caption
+
+
+class TestMergeCaptionBasic:
+    def test_no_existing_text(self):
+        assert merge(None, "Hello") == "Hello"
+
+    def test_empty_existing_text(self):
+        assert merge("", "Hello") == "Hello"
+
+    def test_exact_duplicate_dropped(self):
+        assert merge("Revenue", "Revenue") == "Revenue"
+
+    def test_different_captions_merged(self):
+        result = merge("Q3 Results", "Q4 Projections")
+        assert result == "Q3 Results\n\nQ4 Projections"
+
+
+class TestMergeCaptionSubstringBug:
+    """These are the exact scenarios that the old substring check got wrong."""
+
+    def test_shorter_caption_not_dropped_when_substring(self):
+        # Bug: "Meeting" in "Meeting agenda" → True → caption was silently lost
+        result = merge("Meeting agenda", "Meeting")
+        assert result == "Meeting agenda\n\nMeeting"
+
+    def test_longer_caption_not_dropped_when_contains_existing(self):
+        # "Revenue and Profit" contains "Revenue", but they are different captions
+        result = merge("Revenue", "Revenue and Profit")
+        assert result == "Revenue\n\nRevenue and Profit"
+
+    def test_prefix_caption_not_dropped(self):
+        result = merge("Q3 Results - Revenue", "Q3 Results")
+        assert result == "Q3 Results - Revenue\n\nQ3 Results"
+
+
+class TestMergeCaptionWhitespace:
+    def test_trailing_space_treated_as_duplicate(self):
+        assert merge("Revenue", "Revenue  ") == "Revenue"
+
+    def test_leading_space_treated_as_duplicate(self):
+        assert merge("Revenue", "  Revenue") == "Revenue"
+
+    def test_whitespace_only_new_text_not_added(self):
+        # strip() makes it empty string → falsy check in callers guards this,
+        # but _merge_caption itself: strip matches "" which is not in list → would merge.
+        # Callers already guard with `if event.text:` so this is an edge case.
+        result = merge("Revenue", "   ")
+        # "   ".strip() == "" → not in ["Revenue"] → gets merged (caller guards prevent this)
+        assert "\n\n" in result or result == "Revenue"
+
+
+class TestMergeCaptionMultipleItems:
+    def test_three_unique_captions_all_present(self):
+        text = merge(None, "A")
+        text = merge(text, "B")
+        text = merge(text, "C")
+        assert text == "A\n\nB\n\nC"
+
+    def test_duplicate_in_middle_dropped(self):
+        text = merge(None, "A")
+        text = merge(text, "B")
+        text = merge(text, "A")  # duplicate
+        assert text == "A\n\nB"
+
+    def test_album_scenario_revenue_profit(self):
+        # Album Item 1: "Revenue and Profit", Item 2: "Revenue"
+        # Old bug: "Revenue" in ["Revenue and Profit"] → True → lost
+        text = merge(None, "Revenue and Profit")
+        text = merge(text, "Revenue")
+        assert text == "Revenue and Profit\n\nRevenue"

From 125e5ef0899d4c60ec84ac720192ba05007ea7e1 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 14:05:25 -0700
Subject: [PATCH 090/154] fix: extend caption substring fix to all platforms

Move _merge_caption helper from TelegramAdapter to BasePlatformAdapter
so all adapters inherit it. Fix the same substring-containment bug in:
- gateway/platforms/base.py (photo burst merging)
- gateway/run.py (priority photo follow-up merging)
- gateway/platforms/feishu.py (media batch merging)

The original fix only covered telegram.py. The same bug existed in base.py
and run.py (pure substring check) and feishu.py (list membership without
whitespace normalization).
---
 gateway/platforms/base.py     | 21 +++++++++++++++++----
 gateway/platforms/feishu.py   |  5 +----
 gateway/platforms/telegram.py | 16 ----------------
 gateway/run.py                |  5 +----
 4 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index a1fef589..551c0e86 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -1105,6 +1105,22 @@ class BasePlatformAdapter(ABC):
             logger.error("[%s] Fallback send also failed: %s", self.name, fallback_result.error)
         return fallback_result
 
+    @staticmethod
+    def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
+        """Merge a new caption into existing text, avoiding duplicates.
+
+        Uses line-by-line exact match (not substring) to prevent false positives
+        where a shorter caption is silently dropped because it appears as a
+        substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
+        Whitespace is normalised for comparison.
+        """
+        if not existing_text:
+            return new_text
+        existing_captions = [c.strip() for c in existing_text.split("\n\n")]
+        if new_text.strip() not in existing_captions:
+            return f"{existing_text}\n\n{new_text}".strip()
+        return existing_text
+
     async def handle_message(self, event: MessageEvent) -> None:
         """
         Process an incoming message.
@@ -1164,10 +1180,7 @@ class BasePlatformAdapter(ABC):
                     existing.media_urls.extend(event.media_urls)
                     existing.media_types.extend(event.media_types)
                     if event.text:
-                        if not existing.text:
-                            existing.text = event.text
-                        elif event.text not in existing.text:
-                            existing.text = f"{existing.text}\n\n{event.text}".strip()
+                        existing.text = self._merge_caption(existing.text, event.text)
                 else:
                     self._pending_messages[session_key] = event
                 return  # Don't interrupt now - will run after current task completes
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index fce22a97..7b20bc19 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -2065,10 +2065,7 @@ class FeishuAdapter(BasePlatformAdapter):
         existing.media_urls.extend(event.media_urls)
         existing.media_types.extend(event.media_types)
         if event.text:
-            if not existing.text:
-                existing.text = event.text
-            elif event.text not in existing.text.split("\n\n"):
-                existing.text = f"{existing.text}\n\n{event.text}"
+            existing.text = self._merge_caption(existing.text, event.text)
         existing.timestamp = event.timestamp
         if event.message_id:
             existing.message_id = event.message_id
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 3fa11498..f72c31e1 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2213,22 +2213,6 @@ class TelegramAdapter(BasePlatformAdapter):
             if self._pending_photo_batch_tasks.get(batch_key) is current_task:
                 self._pending_photo_batch_tasks.pop(batch_key, None)
 
-    @staticmethod
-    def _merge_caption(existing_text: Optional[str], new_text: str) -> str:
-        """Merge a new caption into existing text, avoiding duplicates.
-
-        Uses line-by-line exact match (not substring) to prevent false positives
-        where a shorter caption is silently dropped because it appears as a
-        substring of a longer one (e.g. "Meeting" inside "Meeting agenda").
-        Whitespace is normalised for comparison.
-        """
-        if not existing_text:
-            return new_text
-        existing_captions = [c.strip() for c in existing_text.split("\n\n")]
-        if new_text.strip() not in existing_captions:
-            return f"{existing_text}\n\n{new_text}".strip()
-        return existing_text
-
     def _enqueue_photo_event(self, batch_key: str, event: MessageEvent) -> None:
         """Merge photo events into a pending batch and schedule flush."""
         existing = self._pending_photo_batches.get(batch_key)
diff --git a/gateway/run.py b/gateway/run.py
index df7df7db..81c7d55f 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1987,10 +1987,7 @@ class GatewayRunner:
                             existing.media_urls.extend(event.media_urls)
                             existing.media_types.extend(event.media_types)
                             if event.text:
-                                if not existing.text:
-                                    existing.text = event.text
-                                elif event.text not in existing.text:
-                                    existing.text = f"{existing.text}\n\n{event.text}".strip()
+                                existing.text = BasePlatformAdapter._merge_caption(existing.text, event.text)
                         else:
                             adapter._pending_messages[_quick_key] = event
                     else:

From 99ff375f7a313daed08c13a6981fac353e78733c Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 14:10:56 -0700
Subject: [PATCH 091/154] fix(gateway): respect tool_preview_length in all/new
 progress modes (#5937)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, all/new tool progress modes always hard-truncated previews
to 40 chars, ignoring the display.tool_preview_length config. This made
it impossible for gateway users to see meaningful command/path info
without switching to verbose mode (which shows too much detail).

Now all/new modes read tool_preview_length from config:
- tool_preview_length: 0 (default/unset) → 40 chars (no regression)
- tool_preview_length: 120 → 120-char previews in all/new mode
- verbose mode: unchanged (already respected the config)

Users who want longer previews can set:
  display:
    tool_preview_length: 120

Reported by demontut_ on Discord.
---
 gateway/run.py                            |  15 ++-
 tests/gateway/test_run_progress_topics.py | 117 ++++++++++++++++++++++
 2 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index 81c7d55f..aaee28bd 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -4910,8 +4910,8 @@ class GatewayRunner:
         cycle = ["off", "new", "all", "verbose"]
         descriptions = {
             "off": "⚙️ Tool progress: **OFF** — no tool activity shown.",
-            "new": "⚙️ Tool progress: **NEW** — shown when tool changes (short previews).",
-            "all": "⚙️ Tool progress: **ALL** — every tool call shown (short previews).",
+            "new": "⚙️ Tool progress: **NEW** — shown when tool changes (preview length: `display.tool_preview_length`, default 40).",
+            "all": "⚙️ Tool progress: **ALL** — every tool call shown (preview length: `display.tool_preview_length`, default 40).",
             "verbose": "⚙️ Tool progress: **VERBOSE** — every tool call with full arguments.",
         }
 
@@ -6324,10 +6324,15 @@ class GatewayRunner:
                 progress_queue.put(msg)
                 return
             
-            # "all" / "new" modes: short preview, always truncated (40 chars)
+            # "all" / "new" modes: short preview, respects tool_preview_length
+            # config (defaults to 40 chars when unset to keep gateway messages
+            # compact — unlike CLI spinners, these persist as permanent messages).
             if preview:
-                if len(preview) > 40:
-                    preview = preview[:37] + "..."
+                from agent.display import get_tool_preview_max_len
+                _pl = get_tool_preview_max_len()
+                _cap = _pl if _pl > 0 else 40
+                if len(preview) > _cap:
+                    preview = preview[:_cap - 3] + "..."
                 msg = f"{emoji} {tool_name}: \"{preview}\""
             else:
                 msg = f"{emoji} {tool_name}..."
diff --git a/tests/gateway/test_run_progress_topics.py b/tests/gateway/test_run_progress_topics.py
index 9e0481ae..f3ff9051 100644
--- a/tests/gateway/test_run_progress_topics.py
+++ b/tests/gateway/test_run_progress_topics.py
@@ -71,6 +71,24 @@ class FakeAgent:
         }
 
 
+class LongPreviewAgent:
+    """Agent that emits a tool call with a very long preview string."""
+    LONG_CMD = "cd /home/teknium/.hermes/hermes-agent/.worktrees/hermes-d8860339 && source .venv/bin/activate && python -m pytest tests/gateway/test_run_progress_topics.py -n0 -q"
+
+    def __init__(self, **kwargs):
+        self.tool_progress_callback = kwargs.get("tool_progress_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        self.tool_progress_callback("tool.started", "terminal", self.LONG_CMD, {})
+        time.sleep(0.35)
+        return {
+            "final_response": "done",
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 def _make_runner(adapter):
     gateway_run = importlib.import_module("gateway.run")
     GatewayRunner = gateway_run.GatewayRunner
@@ -217,3 +235,102 @@ async def test_run_agent_progress_uses_event_message_id_for_slack_dm(monkeypatch
     assert adapter.sent
     assert adapter.sent[0]["metadata"] == {"thread_id": "1234567890.000001"}
     assert all(call["metadata"] == {"thread_id": "1234567890.000001"} for call in adapter.typing)
+
+
+# ---------------------------------------------------------------------------
+# Preview truncation tests (all/new mode respects tool_preview_length)
+# ---------------------------------------------------------------------------
+
+
+def _run_long_preview_helper(monkeypatch, tmp_path, preview_length=0):
+    """Shared setup for long-preview truncation tests.
+
+    Returns (adapter, result) after running the agent with LongPreviewAgent.
+    ``preview_length`` controls display.tool_preview_length in the config file
+    that _run_agent reads — so the gateway picks it up the same way production does.
+    """
+    import asyncio
+    import yaml
+
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all")
+
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv)
+
+    fake_run_agent = types.ModuleType("run_agent")
+    fake_run_agent.AIAgent = LongPreviewAgent
+    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
+
+    # Write config.yaml so _run_agent picks up tool_preview_length
+    config = {"display": {"tool_preview_length": preview_length}}
+    (tmp_path / "config.yaml").write_text(yaml.dump(config), encoding="utf-8")
+
+    adapter = ProgressCaptureAdapter()
+    runner = _make_runner(adapter)
+    gateway_run = importlib.import_module("gateway.run")
+    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
+
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="12345",
+        chat_type="dm",
+        thread_id=None,
+    )
+
+    result = asyncio.get_event_loop().run_until_complete(
+        runner._run_agent(
+            message="hello",
+            context_prompt="",
+            history=[],
+            source=source,
+            session_id="sess-trunc",
+            session_key="agent:main:telegram:dm:12345",
+        )
+    )
+    return adapter, result
+
+
+def test_all_mode_default_truncation_40_chars(monkeypatch, tmp_path):
+    """When tool_preview_length is 0 (default), all/new mode truncates to 40 chars."""
+    adapter, result = _run_long_preview_helper(monkeypatch, tmp_path, preview_length=0)
+    assert result["final_response"] == "done"
+    assert adapter.sent
+    content = adapter.sent[0]["content"]
+    # The long command should be truncated — total preview <= 40 chars
+    assert "..." in content
+    # Extract the preview part between quotes
+    import re
+    match = re.search(r'"(.+)"', content)
+    assert match, f"No quoted preview found in: {content}"
+    preview_text = match.group(1)
+    assert len(preview_text) <= 40, f"Preview too long ({len(preview_text)}): {preview_text}"
+
+
+def test_all_mode_respects_custom_preview_length(monkeypatch, tmp_path):
+    """When tool_preview_length is explicitly set (e.g. 120), all/new mode uses that."""
+    adapter, result = _run_long_preview_helper(monkeypatch, tmp_path, preview_length=120)
+    assert result["final_response"] == "done"
+    assert adapter.sent
+    content = adapter.sent[0]["content"]
+    # With 120-char cap, the command (165 chars) should still be truncated but longer
+    import re
+    match = re.search(r'"(.+)"', content)
+    assert match, f"No quoted preview found in: {content}"
+    preview_text = match.group(1)
+    # Should be longer than the 40-char default
+    assert len(preview_text) > 40, f"Preview suspiciously short ({len(preview_text)}): {preview_text}"
+    # But still capped at 120
+    assert len(preview_text) <= 120, f"Preview too long ({len(preview_text)}): {preview_text}"
+
+
+def test_all_mode_no_truncation_when_preview_fits(monkeypatch, tmp_path):
+    """Short previews (under the cap) are not truncated."""
+    # Set a generous cap — the LongPreviewAgent's command is ~165 chars
+    adapter, result = _run_long_preview_helper(monkeypatch, tmp_path, preview_length=200)
+    assert result["final_response"] == "done"
+    assert adapter.sent
+    content = adapter.sent[0]["content"]
+    # With a 200-char cap, the 165-char command should NOT be truncated
+    assert "..." not in content, f"Preview was truncated when it shouldn't be: {content}"

From f3006ebef9759d6d1002310e40d3b79a8293f074 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:19:07 -0700
Subject: [PATCH 092/154] refactor(tests): re-architect tests + fix CI failures
 (#5946)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* refactor: re-architect tests to mirror the codebase

* Update tests.yml

* fix: add missing tool_error imports after registry refactor

* fix(tests): replace patch.dict with monkeypatch to prevent env var leaks under xdist

patch.dict(os.environ) can leak TERMINAL_ENV across xdist workers,
causing test_code_execution tests to hit the Modal remote path.

* fix(tests): fix update_check and telegram xdist failures

- test_update_check: replace patch("hermes_cli.banner.os.getenv") with
  monkeypatch.setenv("HERMES_HOME") — banner.py no longer imports os
  directly, it uses get_hermes_home() from hermes_constants.

- test_telegram_conflict/approval_buttons: provide real exception classes
  for telegram.error mock (NetworkError, TimedOut, BadRequest) so the
  except clause in connect() doesn't fail with "catching classes that do
  not inherit from BaseException" when xdist pollutes sys.modules.

* fix(tests): accept unavailable_models kwarg in _prompt_model_selection mock
---
 .github/workflows/tests.yml                   |   3 +
 tests/{ => agent}/test_anthropic_adapter.py   |   0
 .../test_auxiliary_config_bridge.py           |   6 +-
 tests/{ => agent}/test_context_references.py  |   0
 tests/{ => agent}/test_credential_pool.py     |   0
 .../test_credential_pool_routing.py           |   0
 .../test_crossloop_client_cache.py            |   0
 tests/{ => agent}/test_display.py             |   0
 tests/{ => agent}/test_insights.py            |   0
 .../test_model_metadata_local_ctx.py          |   0
 tests/cli/__init__.py                         |   0
 tests/{ => cli}/test_branch_command.py        |   0
 tests/{ => cli}/test_cli_approval_ui.py       |   0
 .../test_cli_background_tui_refresh.py        |   0
 tests/{ => cli}/test_cli_browser_connect.py   |   0
 tests/{ => cli}/test_cli_context_warning.py   |   0
 tests/{ => cli}/test_cli_extension_hooks.py   |   0
 tests/{ => cli}/test_cli_file_drop.py         |   0
 tests/{ => cli}/test_cli_init.py              |   0
 .../{ => cli}/test_cli_interrupt_subagent.py  |   0
 tests/{ => cli}/test_cli_loading_indicator.py |   0
 tests/{ => cli}/test_cli_mcp_config_watch.py  |   0
 tests/{ => cli}/test_cli_new_session.py       |   0
 tests/{ => cli}/test_cli_plan_command.py      |   0
 tests/{ => cli}/test_cli_prefix_matching.py   |   0
 tests/{ => cli}/test_cli_preloaded_skills.py  |   0
 .../{ => cli}/test_cli_provider_resolution.py |   4 +-
 tests/{ => cli}/test_cli_retry.py             |   2 +-
 tests/{ => cli}/test_cli_save_config_value.py |   0
 tests/{ => cli}/test_cli_secret_capture.py    |   0
 tests/{ => cli}/test_cli_skin_integration.py  |   0
 tests/{ => cli}/test_cli_status_bar.py        |   0
 tests/{ => cli}/test_cli_tools_command.py     |   0
 tests/{ => cli}/test_personality_none.py      |   0
 tests/{ => cli}/test_quick_commands.py        |   0
 tests/{ => cli}/test_reasoning_command.py     |   0
 tests/{ => cli}/test_resume_display.py        |   0
 .../{ => cli}/test_surrogate_sanitization.py  |   0
 tests/{ => cli}/test_worktree.py              |   0
 tests/{ => cli}/test_worktree_security.py     |   0
 .../{ => cron}/test_codex_execution_paths.py  |   0
 tests/{ => cron}/test_file_permissions.py     |   0
 .../gateway/test_telegram_approval_buttons.py |   9 +-
 tests/gateway/test_telegram_conflict.py       |   8 +
 .../test_anthropic_oauth_flow.py              |   0
 .../test_anthropic_provider_persistence.py    |   0
 .../test_api_key_providers.py                 |   0
 .../test_atomic_json_write.py                 |   0
 .../test_atomic_yaml_write.py                 |   0
 .../test_auth_codex_provider.py               |   0
 tests/{ => hermes_cli}/test_auth_commands.py  |   0
 .../test_auth_nous_provider.py                |   0
 tests/{ => hermes_cli}/test_codex_models.py   |   0
 .../test_config_env_expansion.py              |   0
 .../test_external_credential_detection.py     |   0
 .../{ => hermes_cli}/test_gemini_provider.py  |   0
 .../{ => hermes_cli}/test_model_normalize.py  |   0
 .../test_model_provider_persistence.py        |   0
 .../test_ollama_cloud_auth.py                 |   0
 .../test_plugin_cli_registration.py           |   0
 tests/{ => hermes_cli}/test_plugins.py        |   0
 tests/{ => hermes_cli}/test_plugins_cmd.py    |   0
 .../test_runtime_provider_resolution.py       |   0
 .../test_setup_model_selection.py             |   0
 tests/hermes_cli/test_update_check.py         |  50 +++--
 tests/run_agent/__init__.py                   |   0
 .../test_1630_context_overflow_loop.py        |   0
 tests/{ => run_agent}/test_413_compression.py |   0
 tests/{ => run_agent}/test_860_dedup.py       |   0
 .../{ => run_agent}/test_agent_guardrails.py  |   0
 tests/{ => run_agent}/test_agent_loop.py      |   2 +-
 .../test_agent_loop_tool_calling.py           |   2 +-
 tests/{ => run_agent}/test_agent_loop_vllm.py |   2 +-
 .../test_anthropic_error_handling.py          |   0
 .../test_async_httpx_del_neuter.py            |   0
 .../test_compression_boundary.py              |   0
 .../test_compression_persistence.py           |   0
 .../test_compressor_fallback_update.py        |   0
 .../{ => run_agent}/test_context_pressure.py  |   0
 .../test_context_token_tracking.py            |   0
 .../test_dict_tool_call_args.py               |   0
 .../test_exit_cleanup_interrupt.py            |   0
 tests/{ => run_agent}/test_fallback_model.py  |   0
 .../test_flush_memories_codex.py              |   0
 .../test_interactive_interrupt.py             |   2 +-
 .../test_interrupt_propagation.py             |   0
 .../{ => run_agent}/test_large_tool_result.py |   0
 .../test_long_context_tier_429.py             |   0
 .../test_openai_client_lifecycle.py           |   0
 .../{ => run_agent}/test_percentage_clamp.py  |   2 +-
 .../test_primary_runtime_restore.py           |   0
 .../{ => run_agent}/test_provider_fallback.py |   0
 tests/{ => run_agent}/test_provider_parity.py |   0
 .../test_real_interrupt_subagent.py           |   0
 .../test_redirect_stdout_issue.py             |   0
 tests/{ => run_agent}/test_run_agent.py       |   0
 .../test_run_agent_codex_responses.py         |   0
 .../test_session_meta_filtering.py            |   0
 .../{ => run_agent}/test_session_reset_fix.py |   2 +-
 tests/{ => run_agent}/test_streaming.py       |   0
 .../test_strict_api_validation.py             |   0
 .../test_token_persistence_non_cli.py         |   0
 .../{ => run_agent}/test_tool_arg_coercion.py |   0
 tests/tools/test_code_execution.py            |  14 +-
 .../test_managed_browserbase_and_modal.py     |   6 +-
 .../test_managed_server_tool_support.py       |   0
 tests/tools/test_modal_sandbox_fixes.py       | 184 ++++++++----------
 tests/{ => tools}/test_tool_call_parsers.py   |   0
 tools/browser_camofox.py                      |   1 +
 tools/mcp_tool.py                             |   4 +
 110 files changed, 153 insertions(+), 150 deletions(-)
 rename tests/{ => agent}/test_anthropic_adapter.py (100%)
 rename tests/{ => agent}/test_auxiliary_config_bridge.py (98%)
 rename tests/{ => agent}/test_context_references.py (100%)
 rename tests/{ => agent}/test_credential_pool.py (100%)
 rename tests/{ => agent}/test_credential_pool_routing.py (100%)
 rename tests/{ => agent}/test_crossloop_client_cache.py (100%)
 rename tests/{ => agent}/test_display.py (100%)
 rename tests/{ => agent}/test_insights.py (100%)
 rename tests/{ => agent}/test_model_metadata_local_ctx.py (100%)
 create mode 100644 tests/cli/__init__.py
 rename tests/{ => cli}/test_branch_command.py (100%)
 rename tests/{ => cli}/test_cli_approval_ui.py (100%)
 rename tests/{ => cli}/test_cli_background_tui_refresh.py (100%)
 rename tests/{ => cli}/test_cli_browser_connect.py (100%)
 rename tests/{ => cli}/test_cli_context_warning.py (100%)
 rename tests/{ => cli}/test_cli_extension_hooks.py (100%)
 rename tests/{ => cli}/test_cli_file_drop.py (100%)
 rename tests/{ => cli}/test_cli_init.py (100%)
 rename tests/{ => cli}/test_cli_interrupt_subagent.py (100%)
 rename tests/{ => cli}/test_cli_loading_indicator.py (100%)
 rename tests/{ => cli}/test_cli_mcp_config_watch.py (100%)
 rename tests/{ => cli}/test_cli_new_session.py (100%)
 rename tests/{ => cli}/test_cli_plan_command.py (100%)
 rename tests/{ => cli}/test_cli_prefix_matching.py (100%)
 rename tests/{ => cli}/test_cli_preloaded_skills.py (100%)
 rename tests/{ => cli}/test_cli_provider_resolution.py (99%)
 rename tests/{ => cli}/test_cli_retry.py (96%)
 rename tests/{ => cli}/test_cli_save_config_value.py (100%)
 rename tests/{ => cli}/test_cli_secret_capture.py (100%)
 rename tests/{ => cli}/test_cli_skin_integration.py (100%)
 rename tests/{ => cli}/test_cli_status_bar.py (100%)
 rename tests/{ => cli}/test_cli_tools_command.py (100%)
 rename tests/{ => cli}/test_personality_none.py (100%)
 rename tests/{ => cli}/test_quick_commands.py (100%)
 rename tests/{ => cli}/test_reasoning_command.py (100%)
 rename tests/{ => cli}/test_resume_display.py (100%)
 rename tests/{ => cli}/test_surrogate_sanitization.py (100%)
 rename tests/{ => cli}/test_worktree.py (100%)
 rename tests/{ => cli}/test_worktree_security.py (100%)
 rename tests/{ => cron}/test_codex_execution_paths.py (100%)
 rename tests/{ => cron}/test_file_permissions.py (100%)
 rename tests/{ => hermes_cli}/test_anthropic_oauth_flow.py (100%)
 rename tests/{ => hermes_cli}/test_anthropic_provider_persistence.py (100%)
 rename tests/{ => hermes_cli}/test_api_key_providers.py (100%)
 rename tests/{ => hermes_cli}/test_atomic_json_write.py (100%)
 rename tests/{ => hermes_cli}/test_atomic_yaml_write.py (100%)
 rename tests/{ => hermes_cli}/test_auth_codex_provider.py (100%)
 rename tests/{ => hermes_cli}/test_auth_commands.py (100%)
 rename tests/{ => hermes_cli}/test_auth_nous_provider.py (100%)
 rename tests/{ => hermes_cli}/test_codex_models.py (100%)
 rename tests/{ => hermes_cli}/test_config_env_expansion.py (100%)
 rename tests/{ => hermes_cli}/test_external_credential_detection.py (100%)
 rename tests/{ => hermes_cli}/test_gemini_provider.py (100%)
 rename tests/{ => hermes_cli}/test_model_normalize.py (100%)
 rename tests/{ => hermes_cli}/test_model_provider_persistence.py (100%)
 rename tests/{ => hermes_cli}/test_ollama_cloud_auth.py (100%)
 rename tests/{ => hermes_cli}/test_plugin_cli_registration.py (100%)
 rename tests/{ => hermes_cli}/test_plugins.py (100%)
 rename tests/{ => hermes_cli}/test_plugins_cmd.py (100%)
 rename tests/{ => hermes_cli}/test_runtime_provider_resolution.py (100%)
 rename tests/{ => hermes_cli}/test_setup_model_selection.py (100%)
 create mode 100644 tests/run_agent/__init__.py
 rename tests/{ => run_agent}/test_1630_context_overflow_loop.py (100%)
 rename tests/{ => run_agent}/test_413_compression.py (100%)
 rename tests/{ => run_agent}/test_860_dedup.py (100%)
 rename tests/{ => run_agent}/test_agent_guardrails.py (100%)
 rename tests/{ => run_agent}/test_agent_loop.py (99%)
 rename tests/{ => run_agent}/test_agent_loop_tool_calling.py (99%)
 rename tests/{ => run_agent}/test_agent_loop_vllm.py (99%)
 rename tests/{ => run_agent}/test_anthropic_error_handling.py (100%)
 rename tests/{ => run_agent}/test_async_httpx_del_neuter.py (100%)
 rename tests/{ => run_agent}/test_compression_boundary.py (100%)
 rename tests/{ => run_agent}/test_compression_persistence.py (100%)
 rename tests/{ => run_agent}/test_compressor_fallback_update.py (100%)
 rename tests/{ => run_agent}/test_context_pressure.py (100%)
 rename tests/{ => run_agent}/test_context_token_tracking.py (100%)
 rename tests/{ => run_agent}/test_dict_tool_call_args.py (100%)
 rename tests/{ => run_agent}/test_exit_cleanup_interrupt.py (100%)
 rename tests/{ => run_agent}/test_fallback_model.py (100%)
 rename tests/{ => run_agent}/test_flush_memories_codex.py (100%)
 rename tests/{ => run_agent}/test_interactive_interrupt.py (98%)
 rename tests/{ => run_agent}/test_interrupt_propagation.py (100%)
 rename tests/{ => run_agent}/test_large_tool_result.py (100%)
 rename tests/{ => run_agent}/test_long_context_tier_429.py (100%)
 rename tests/{ => run_agent}/test_openai_client_lifecycle.py (100%)
 rename tests/{ => run_agent}/test_percentage_clamp.py (98%)
 rename tests/{ => run_agent}/test_primary_runtime_restore.py (100%)
 rename tests/{ => run_agent}/test_provider_fallback.py (100%)
 rename tests/{ => run_agent}/test_provider_parity.py (100%)
 rename tests/{ => run_agent}/test_real_interrupt_subagent.py (100%)
 rename tests/{ => run_agent}/test_redirect_stdout_issue.py (100%)
 rename tests/{ => run_agent}/test_run_agent.py (100%)
 rename tests/{ => run_agent}/test_run_agent_codex_responses.py (100%)
 rename tests/{ => run_agent}/test_session_meta_filtering.py (100%)
 rename tests/{ => run_agent}/test_session_reset_fix.py (98%)
 rename tests/{ => run_agent}/test_streaming.py (100%)
 rename tests/{ => run_agent}/test_strict_api_validation.py (100%)
 rename tests/{ => run_agent}/test_token_persistence_non_cli.py (100%)
 rename tests/{ => run_agent}/test_tool_arg_coercion.py (100%)
 rename tests/{ => tools}/test_managed_server_tool_support.py (100%)
 rename tests/{ => tools}/test_tool_call_parsers.py (100%)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index a54be8b1..1e45193b 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -19,6 +19,9 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ripgrep
+
       - name: Install uv
         uses: astral-sh/setup-uv@v5
 
diff --git a/tests/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
similarity index 100%
rename from tests/test_anthropic_adapter.py
rename to tests/agent/test_anthropic_adapter.py
diff --git a/tests/test_auxiliary_config_bridge.py b/tests/agent/test_auxiliary_config_bridge.py
similarity index 98%
rename from tests/test_auxiliary_config_bridge.py
rename to tests/agent/test_auxiliary_config_bridge.py
index 0151daf2..91dea15a 100644
--- a/tests/test_auxiliary_config_bridge.py
+++ b/tests/agent/test_auxiliary_config_bridge.py
@@ -13,7 +13,7 @@ from unittest.mock import patch, MagicMock
 import pytest
 import yaml
 
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
 
 
 def _run_auxiliary_bridge(config_dict, monkeypatch):
@@ -199,7 +199,7 @@ class TestGatewayBridgeCodeParity:
 
     def test_gateway_has_auxiliary_bridge(self):
         """The gateway config bridge must include auxiliary.* bridging."""
-        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
         content = gateway_path.read_text()
         # Check for key patterns that indicate the bridge is present
         assert "AUXILIARY_VISION_PROVIDER" in content
@@ -213,7 +213,7 @@ class TestGatewayBridgeCodeParity:
 
     def test_gateway_no_compression_env_bridge(self):
         """Gateway should NOT bridge compression config to env vars (config-only)."""
-        gateway_path = Path(__file__).parent.parent / "gateway" / "run.py"
+        gateway_path = Path(__file__).parent.parent.parent / "gateway" / "run.py"
         content = gateway_path.read_text()
         assert "CONTEXT_COMPRESSION_PROVIDER" not in content
         assert "CONTEXT_COMPRESSION_MODEL" not in content
diff --git a/tests/test_context_references.py b/tests/agent/test_context_references.py
similarity index 100%
rename from tests/test_context_references.py
rename to tests/agent/test_context_references.py
diff --git a/tests/test_credential_pool.py b/tests/agent/test_credential_pool.py
similarity index 100%
rename from tests/test_credential_pool.py
rename to tests/agent/test_credential_pool.py
diff --git a/tests/test_credential_pool_routing.py b/tests/agent/test_credential_pool_routing.py
similarity index 100%
rename from tests/test_credential_pool_routing.py
rename to tests/agent/test_credential_pool_routing.py
diff --git a/tests/test_crossloop_client_cache.py b/tests/agent/test_crossloop_client_cache.py
similarity index 100%
rename from tests/test_crossloop_client_cache.py
rename to tests/agent/test_crossloop_client_cache.py
diff --git a/tests/test_display.py b/tests/agent/test_display.py
similarity index 100%
rename from tests/test_display.py
rename to tests/agent/test_display.py
diff --git a/tests/test_insights.py b/tests/agent/test_insights.py
similarity index 100%
rename from tests/test_insights.py
rename to tests/agent/test_insights.py
diff --git a/tests/test_model_metadata_local_ctx.py b/tests/agent/test_model_metadata_local_ctx.py
similarity index 100%
rename from tests/test_model_metadata_local_ctx.py
rename to tests/agent/test_model_metadata_local_ctx.py
diff --git a/tests/cli/__init__.py b/tests/cli/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_branch_command.py b/tests/cli/test_branch_command.py
similarity index 100%
rename from tests/test_branch_command.py
rename to tests/cli/test_branch_command.py
diff --git a/tests/test_cli_approval_ui.py b/tests/cli/test_cli_approval_ui.py
similarity index 100%
rename from tests/test_cli_approval_ui.py
rename to tests/cli/test_cli_approval_ui.py
diff --git a/tests/test_cli_background_tui_refresh.py b/tests/cli/test_cli_background_tui_refresh.py
similarity index 100%
rename from tests/test_cli_background_tui_refresh.py
rename to tests/cli/test_cli_background_tui_refresh.py
diff --git a/tests/test_cli_browser_connect.py b/tests/cli/test_cli_browser_connect.py
similarity index 100%
rename from tests/test_cli_browser_connect.py
rename to tests/cli/test_cli_browser_connect.py
diff --git a/tests/test_cli_context_warning.py b/tests/cli/test_cli_context_warning.py
similarity index 100%
rename from tests/test_cli_context_warning.py
rename to tests/cli/test_cli_context_warning.py
diff --git a/tests/test_cli_extension_hooks.py b/tests/cli/test_cli_extension_hooks.py
similarity index 100%
rename from tests/test_cli_extension_hooks.py
rename to tests/cli/test_cli_extension_hooks.py
diff --git a/tests/test_cli_file_drop.py b/tests/cli/test_cli_file_drop.py
similarity index 100%
rename from tests/test_cli_file_drop.py
rename to tests/cli/test_cli_file_drop.py
diff --git a/tests/test_cli_init.py b/tests/cli/test_cli_init.py
similarity index 100%
rename from tests/test_cli_init.py
rename to tests/cli/test_cli_init.py
diff --git a/tests/test_cli_interrupt_subagent.py b/tests/cli/test_cli_interrupt_subagent.py
similarity index 100%
rename from tests/test_cli_interrupt_subagent.py
rename to tests/cli/test_cli_interrupt_subagent.py
diff --git a/tests/test_cli_loading_indicator.py b/tests/cli/test_cli_loading_indicator.py
similarity index 100%
rename from tests/test_cli_loading_indicator.py
rename to tests/cli/test_cli_loading_indicator.py
diff --git a/tests/test_cli_mcp_config_watch.py b/tests/cli/test_cli_mcp_config_watch.py
similarity index 100%
rename from tests/test_cli_mcp_config_watch.py
rename to tests/cli/test_cli_mcp_config_watch.py
diff --git a/tests/test_cli_new_session.py b/tests/cli/test_cli_new_session.py
similarity index 100%
rename from tests/test_cli_new_session.py
rename to tests/cli/test_cli_new_session.py
diff --git a/tests/test_cli_plan_command.py b/tests/cli/test_cli_plan_command.py
similarity index 100%
rename from tests/test_cli_plan_command.py
rename to tests/cli/test_cli_plan_command.py
diff --git a/tests/test_cli_prefix_matching.py b/tests/cli/test_cli_prefix_matching.py
similarity index 100%
rename from tests/test_cli_prefix_matching.py
rename to tests/cli/test_cli_prefix_matching.py
diff --git a/tests/test_cli_preloaded_skills.py b/tests/cli/test_cli_preloaded_skills.py
similarity index 100%
rename from tests/test_cli_preloaded_skills.py
rename to tests/cli/test_cli_preloaded_skills.py
diff --git a/tests/test_cli_provider_resolution.py b/tests/cli/test_cli_provider_resolution.py
similarity index 99%
rename from tests/test_cli_provider_resolution.py
rename to tests/cli/test_cli_provider_resolution.py
index bd78a98e..353b3234 100644
--- a/tests/test_cli_provider_resolution.py
+++ b/tests/cli/test_cli_provider_resolution.py
@@ -330,7 +330,7 @@ def test_model_flow_nous_prints_subscription_guidance_without_mutating_explicit_
         "hermes_cli.auth.fetch_nous_models",
         lambda *args, **kwargs: ["claude-opus-4-6"],
     )
-    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6")
+    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
     monkeypatch.setattr(
@@ -368,7 +368,7 @@ def test_model_flow_nous_applies_managed_tts_default_when_unconfigured(monkeypat
         "hermes_cli.auth.fetch_nous_models",
         lambda *args, **kwargs: ["claude-opus-4-6"],
     )
-    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None: "claude-opus-4-6")
+    monkeypatch.setattr("hermes_cli.auth._prompt_model_selection", lambda model_ids, current_model="", pricing=None, **kw: "claude-opus-4-6")
     monkeypatch.setattr("hermes_cli.auth._save_model_choice", lambda model: None)
     monkeypatch.setattr("hermes_cli.auth._update_config_for_provider", lambda provider, url: None)
     monkeypatch.setattr(
diff --git a/tests/test_cli_retry.py b/tests/cli/test_cli_retry.py
similarity index 96%
rename from tests/test_cli_retry.py
rename to tests/cli/test_cli_retry.py
index 74e2512b..b287b457 100644
--- a/tests/test_cli_retry.py
+++ b/tests/cli/test_cli_retry.py
@@ -1,6 +1,6 @@
 """Regression tests for CLI /retry history replacement semantics."""
 
-from tests.test_cli_init import _make_cli
+from tests.cli.test_cli_init import _make_cli
 
 
 def test_retry_last_truncates_history_before_requeueing_message():
diff --git a/tests/test_cli_save_config_value.py b/tests/cli/test_cli_save_config_value.py
similarity index 100%
rename from tests/test_cli_save_config_value.py
rename to tests/cli/test_cli_save_config_value.py
diff --git a/tests/test_cli_secret_capture.py b/tests/cli/test_cli_secret_capture.py
similarity index 100%
rename from tests/test_cli_secret_capture.py
rename to tests/cli/test_cli_secret_capture.py
diff --git a/tests/test_cli_skin_integration.py b/tests/cli/test_cli_skin_integration.py
similarity index 100%
rename from tests/test_cli_skin_integration.py
rename to tests/cli/test_cli_skin_integration.py
diff --git a/tests/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py
similarity index 100%
rename from tests/test_cli_status_bar.py
rename to tests/cli/test_cli_status_bar.py
diff --git a/tests/test_cli_tools_command.py b/tests/cli/test_cli_tools_command.py
similarity index 100%
rename from tests/test_cli_tools_command.py
rename to tests/cli/test_cli_tools_command.py
diff --git a/tests/test_personality_none.py b/tests/cli/test_personality_none.py
similarity index 100%
rename from tests/test_personality_none.py
rename to tests/cli/test_personality_none.py
diff --git a/tests/test_quick_commands.py b/tests/cli/test_quick_commands.py
similarity index 100%
rename from tests/test_quick_commands.py
rename to tests/cli/test_quick_commands.py
diff --git a/tests/test_reasoning_command.py b/tests/cli/test_reasoning_command.py
similarity index 100%
rename from tests/test_reasoning_command.py
rename to tests/cli/test_reasoning_command.py
diff --git a/tests/test_resume_display.py b/tests/cli/test_resume_display.py
similarity index 100%
rename from tests/test_resume_display.py
rename to tests/cli/test_resume_display.py
diff --git a/tests/test_surrogate_sanitization.py b/tests/cli/test_surrogate_sanitization.py
similarity index 100%
rename from tests/test_surrogate_sanitization.py
rename to tests/cli/test_surrogate_sanitization.py
diff --git a/tests/test_worktree.py b/tests/cli/test_worktree.py
similarity index 100%
rename from tests/test_worktree.py
rename to tests/cli/test_worktree.py
diff --git a/tests/test_worktree_security.py b/tests/cli/test_worktree_security.py
similarity index 100%
rename from tests/test_worktree_security.py
rename to tests/cli/test_worktree_security.py
diff --git a/tests/test_codex_execution_paths.py b/tests/cron/test_codex_execution_paths.py
similarity index 100%
rename from tests/test_codex_execution_paths.py
rename to tests/cron/test_codex_execution_paths.py
diff --git a/tests/test_file_permissions.py b/tests/cron/test_file_permissions.py
similarity index 100%
rename from tests/test_file_permissions.py
rename to tests/cron/test_file_permissions.py
diff --git a/tests/gateway/test_telegram_approval_buttons.py b/tests/gateway/test_telegram_approval_buttons.py
index 1b8249bc..98d3cdc3 100644
--- a/tests/gateway/test_telegram_approval_buttons.py
+++ b/tests/gateway/test_telegram_approval_buttons.py
@@ -33,8 +33,15 @@ def _ensure_telegram_mock():
     mod.constants.ChatType.GROUP = "group"
     mod.constants.ChatType.SUPERGROUP = "supergroup"
     mod.constants.ChatType.CHANNEL = "channel"
-    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request", "telegram.error"):
+    # Provide real exception classes so ``except (NetworkError, ...)`` in
+    # connect() doesn't blow up under xdist when this mock leaks.
+    mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
+    for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
         sys.modules.setdefault(name, mod)
+    sys.modules.setdefault("telegram.error", mod.error)
 
 
 _ensure_telegram_mock()
diff --git a/tests/gateway/test_telegram_conflict.py b/tests/gateway/test_telegram_conflict.py
index 7a480d9f..47a67f22 100644
--- a/tests/gateway/test_telegram_conflict.py
+++ b/tests/gateway/test_telegram_conflict.py
@@ -20,8 +20,16 @@ def _ensure_telegram_mock():
     telegram_mod.constants.ChatType.CHANNEL = "channel"
     telegram_mod.constants.ChatType.PRIVATE = "private"
 
+    # Provide real exception classes so ``except (NetworkError, ...)`` in
+    # connect() doesn't blow up with "catching classes that do not inherit
+    # from BaseException" when another xdist worker pollutes sys.modules.
+    telegram_mod.error.NetworkError = type("NetworkError", (OSError,), {})
+    telegram_mod.error.TimedOut = type("TimedOut", (OSError,), {})
+    telegram_mod.error.BadRequest = type("BadRequest", (Exception,), {})
+
     for name in ("telegram", "telegram.ext", "telegram.constants", "telegram.request"):
         sys.modules.setdefault(name, telegram_mod)
+    sys.modules.setdefault("telegram.error", telegram_mod.error)
 
 
 _ensure_telegram_mock()
diff --git a/tests/test_anthropic_oauth_flow.py b/tests/hermes_cli/test_anthropic_oauth_flow.py
similarity index 100%
rename from tests/test_anthropic_oauth_flow.py
rename to tests/hermes_cli/test_anthropic_oauth_flow.py
diff --git a/tests/test_anthropic_provider_persistence.py b/tests/hermes_cli/test_anthropic_provider_persistence.py
similarity index 100%
rename from tests/test_anthropic_provider_persistence.py
rename to tests/hermes_cli/test_anthropic_provider_persistence.py
diff --git a/tests/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py
similarity index 100%
rename from tests/test_api_key_providers.py
rename to tests/hermes_cli/test_api_key_providers.py
diff --git a/tests/test_atomic_json_write.py b/tests/hermes_cli/test_atomic_json_write.py
similarity index 100%
rename from tests/test_atomic_json_write.py
rename to tests/hermes_cli/test_atomic_json_write.py
diff --git a/tests/test_atomic_yaml_write.py b/tests/hermes_cli/test_atomic_yaml_write.py
similarity index 100%
rename from tests/test_atomic_yaml_write.py
rename to tests/hermes_cli/test_atomic_yaml_write.py
diff --git a/tests/test_auth_codex_provider.py b/tests/hermes_cli/test_auth_codex_provider.py
similarity index 100%
rename from tests/test_auth_codex_provider.py
rename to tests/hermes_cli/test_auth_codex_provider.py
diff --git a/tests/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py
similarity index 100%
rename from tests/test_auth_commands.py
rename to tests/hermes_cli/test_auth_commands.py
diff --git a/tests/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
similarity index 100%
rename from tests/test_auth_nous_provider.py
rename to tests/hermes_cli/test_auth_nous_provider.py
diff --git a/tests/test_codex_models.py b/tests/hermes_cli/test_codex_models.py
similarity index 100%
rename from tests/test_codex_models.py
rename to tests/hermes_cli/test_codex_models.py
diff --git a/tests/test_config_env_expansion.py b/tests/hermes_cli/test_config_env_expansion.py
similarity index 100%
rename from tests/test_config_env_expansion.py
rename to tests/hermes_cli/test_config_env_expansion.py
diff --git a/tests/test_external_credential_detection.py b/tests/hermes_cli/test_external_credential_detection.py
similarity index 100%
rename from tests/test_external_credential_detection.py
rename to tests/hermes_cli/test_external_credential_detection.py
diff --git a/tests/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py
similarity index 100%
rename from tests/test_gemini_provider.py
rename to tests/hermes_cli/test_gemini_provider.py
diff --git a/tests/test_model_normalize.py b/tests/hermes_cli/test_model_normalize.py
similarity index 100%
rename from tests/test_model_normalize.py
rename to tests/hermes_cli/test_model_normalize.py
diff --git a/tests/test_model_provider_persistence.py b/tests/hermes_cli/test_model_provider_persistence.py
similarity index 100%
rename from tests/test_model_provider_persistence.py
rename to tests/hermes_cli/test_model_provider_persistence.py
diff --git a/tests/test_ollama_cloud_auth.py b/tests/hermes_cli/test_ollama_cloud_auth.py
similarity index 100%
rename from tests/test_ollama_cloud_auth.py
rename to tests/hermes_cli/test_ollama_cloud_auth.py
diff --git a/tests/test_plugin_cli_registration.py b/tests/hermes_cli/test_plugin_cli_registration.py
similarity index 100%
rename from tests/test_plugin_cli_registration.py
rename to tests/hermes_cli/test_plugin_cli_registration.py
diff --git a/tests/test_plugins.py b/tests/hermes_cli/test_plugins.py
similarity index 100%
rename from tests/test_plugins.py
rename to tests/hermes_cli/test_plugins.py
diff --git a/tests/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py
similarity index 100%
rename from tests/test_plugins_cmd.py
rename to tests/hermes_cli/test_plugins_cmd.py
diff --git a/tests/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
similarity index 100%
rename from tests/test_runtime_provider_resolution.py
rename to tests/hermes_cli/test_runtime_provider_resolution.py
diff --git a/tests/test_setup_model_selection.py b/tests/hermes_cli/test_setup_model_selection.py
similarity index 100%
rename from tests/test_setup_model_selection.py
rename to tests/hermes_cli/test_setup_model_selection.py
diff --git a/tests/hermes_cli/test_update_check.py b/tests/hermes_cli/test_update_check.py
index b7d6de6f..368bb1b0 100644
--- a/tests/hermes_cli/test_update_check.py
+++ b/tests/hermes_cli/test_update_check.py
@@ -15,7 +15,7 @@ def test_version_string_no_v_prefix():
     assert not __version__.startswith("v"), f"__version__ should not start with 'v', got {__version__!r}"
 
 
-def test_check_for_updates_uses_cache(tmp_path):
+def test_check_for_updates_uses_cache(tmp_path, monkeypatch):
     """When cache is fresh, check_for_updates should return cached value without calling git."""
     from hermes_cli.banner import check_for_updates
 
@@ -27,15 +27,15 @@ def test_check_for_updates_uses_cache(tmp_path):
     cache_file = tmp_path / ".update_check"
     cache_file.write_text(json.dumps({"ts": time.time(), "behind": 3}))
 
-    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-        with patch("hermes_cli.banner.subprocess.run") as mock_run:
-            result = check_for_updates()
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        result = check_for_updates()
 
     assert result == 3
     mock_run.assert_not_called()
 
 
-def test_check_for_updates_expired_cache(tmp_path):
+def test_check_for_updates_expired_cache(tmp_path, monkeypatch):
     """When cache is expired, check_for_updates should call git fetch."""
     from hermes_cli.banner import check_for_updates
 
@@ -49,15 +49,15 @@ def test_check_for_updates_expired_cache(tmp_path):
 
     mock_result = MagicMock(returncode=0, stdout="5\n")
 
-    with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-        with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
-            result = check_for_updates()
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run", return_value=mock_result) as mock_run:
+        result = check_for_updates()
 
     assert result == 5
     assert mock_run.call_count == 2  # git fetch + git rev-list
 
 
-def test_check_for_updates_no_git_dir(tmp_path):
+def test_check_for_updates_no_git_dir(tmp_path, monkeypatch):
     """Returns None when .git directory doesn't exist anywhere."""
     import hermes_cli.banner as banner
 
@@ -66,19 +66,15 @@ def test_check_for_updates_no_git_dir(tmp_path):
     fake_banner.parent.mkdir(parents=True, exist_ok=True)
     fake_banner.touch()
 
-    original = banner.__file__
-    try:
-        banner.__file__ = str(fake_banner)
-        with patch("hermes_cli.banner.os.getenv", return_value=str(tmp_path)):
-            with patch("hermes_cli.banner.subprocess.run") as mock_run:
-                result = banner.check_for_updates()
-        assert result is None
-        mock_run.assert_not_called()
-    finally:
-        banner.__file__ = original
+    monkeypatch.setattr(banner, "__file__", str(fake_banner))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        result = banner.check_for_updates()
+    assert result is None
+    mock_run.assert_not_called()
 
 
-def test_check_for_updates_fallback_to_project_root():
+def test_check_for_updates_fallback_to_project_root(tmp_path, monkeypatch):
     """Dev install: falls back to Path(__file__).parent.parent when HERMES_HOME has no git repo."""
     import hermes_cli.banner as banner
 
@@ -87,14 +83,12 @@ def test_check_for_updates_fallback_to_project_root():
         pytest.skip("Not running from a git checkout")
 
     # Point HERMES_HOME at a temp dir with no hermes-agent/.git
-    import tempfile
-    with tempfile.TemporaryDirectory() as td:
-        with patch("hermes_cli.banner.os.getenv", return_value=td):
-            with patch("hermes_cli.banner.subprocess.run") as mock_run:
-                mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
-                result = banner.check_for_updates()
-        # Should have fallen back to project root and run git commands
-        assert mock_run.call_count >= 1
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    with patch("hermes_cli.banner.subprocess.run") as mock_run:
+        mock_run.return_value = MagicMock(returncode=0, stdout="0\n")
+        result = banner.check_for_updates()
+    # Should have fallen back to project root and run git commands
+    assert mock_run.call_count >= 1
 
 
 def test_prefetch_non_blocking():
diff --git a/tests/run_agent/__init__.py b/tests/run_agent/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_1630_context_overflow_loop.py b/tests/run_agent/test_1630_context_overflow_loop.py
similarity index 100%
rename from tests/test_1630_context_overflow_loop.py
rename to tests/run_agent/test_1630_context_overflow_loop.py
diff --git a/tests/test_413_compression.py b/tests/run_agent/test_413_compression.py
similarity index 100%
rename from tests/test_413_compression.py
rename to tests/run_agent/test_413_compression.py
diff --git a/tests/test_860_dedup.py b/tests/run_agent/test_860_dedup.py
similarity index 100%
rename from tests/test_860_dedup.py
rename to tests/run_agent/test_860_dedup.py
diff --git a/tests/test_agent_guardrails.py b/tests/run_agent/test_agent_guardrails.py
similarity index 100%
rename from tests/test_agent_guardrails.py
rename to tests/run_agent/test_agent_guardrails.py
diff --git a/tests/test_agent_loop.py b/tests/run_agent/test_agent_loop.py
similarity index 99%
rename from tests/test_agent_loop.py
rename to tests/run_agent/test_agent_loop.py
index b95ff780..bd9e41b9 100644
--- a/tests/test_agent_loop.py
+++ b/tests/run_agent/test_agent_loop.py
@@ -16,7 +16,7 @@ from unittest.mock import MagicMock
 import pytest
 
 # Ensure repo root is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
 try:
     from environments.agent_loop import (
diff --git a/tests/test_agent_loop_tool_calling.py b/tests/run_agent/test_agent_loop_tool_calling.py
similarity index 99%
rename from tests/test_agent_loop_tool_calling.py
rename to tests/run_agent/test_agent_loop_tool_calling.py
index 74e67c0b..3b8d6ac5 100644
--- a/tests/test_agent_loop_tool_calling.py
+++ b/tests/run_agent/test_agent_loop_tool_calling.py
@@ -31,7 +31,7 @@ import pytest
 # pytestmark removed — tests skip gracefully via OPENROUTER_API_KEY check on line 59
 
 # Ensure repo root is importable
-_repo_root = Path(__file__).resolve().parent.parent
+_repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
diff --git a/tests/test_agent_loop_vllm.py b/tests/run_agent/test_agent_loop_vllm.py
similarity index 99%
rename from tests/test_agent_loop_vllm.py
rename to tests/run_agent/test_agent_loop_vllm.py
index d47478ec..d4284909 100644
--- a/tests/test_agent_loop_vllm.py
+++ b/tests/run_agent/test_agent_loop_vllm.py
@@ -30,7 +30,7 @@ import pytest
 import requests
 
 # Ensure repo root is importable
-_repo_root = Path(__file__).resolve().parent.parent
+_repo_root = Path(__file__).resolve().parent.parent.parent
 if str(_repo_root) not in sys.path:
     sys.path.insert(0, str(_repo_root))
 
diff --git a/tests/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
similarity index 100%
rename from tests/test_anthropic_error_handling.py
rename to tests/run_agent/test_anthropic_error_handling.py
diff --git a/tests/test_async_httpx_del_neuter.py b/tests/run_agent/test_async_httpx_del_neuter.py
similarity index 100%
rename from tests/test_async_httpx_del_neuter.py
rename to tests/run_agent/test_async_httpx_del_neuter.py
diff --git a/tests/test_compression_boundary.py b/tests/run_agent/test_compression_boundary.py
similarity index 100%
rename from tests/test_compression_boundary.py
rename to tests/run_agent/test_compression_boundary.py
diff --git a/tests/test_compression_persistence.py b/tests/run_agent/test_compression_persistence.py
similarity index 100%
rename from tests/test_compression_persistence.py
rename to tests/run_agent/test_compression_persistence.py
diff --git a/tests/test_compressor_fallback_update.py b/tests/run_agent/test_compressor_fallback_update.py
similarity index 100%
rename from tests/test_compressor_fallback_update.py
rename to tests/run_agent/test_compressor_fallback_update.py
diff --git a/tests/test_context_pressure.py b/tests/run_agent/test_context_pressure.py
similarity index 100%
rename from tests/test_context_pressure.py
rename to tests/run_agent/test_context_pressure.py
diff --git a/tests/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py
similarity index 100%
rename from tests/test_context_token_tracking.py
rename to tests/run_agent/test_context_token_tracking.py
diff --git a/tests/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py
similarity index 100%
rename from tests/test_dict_tool_call_args.py
rename to tests/run_agent/test_dict_tool_call_args.py
diff --git a/tests/test_exit_cleanup_interrupt.py b/tests/run_agent/test_exit_cleanup_interrupt.py
similarity index 100%
rename from tests/test_exit_cleanup_interrupt.py
rename to tests/run_agent/test_exit_cleanup_interrupt.py
diff --git a/tests/test_fallback_model.py b/tests/run_agent/test_fallback_model.py
similarity index 100%
rename from tests/test_fallback_model.py
rename to tests/run_agent/test_fallback_model.py
diff --git a/tests/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py
similarity index 100%
rename from tests/test_flush_memories_codex.py
rename to tests/run_agent/test_flush_memories_codex.py
diff --git a/tests/test_interactive_interrupt.py b/tests/run_agent/test_interactive_interrupt.py
similarity index 98%
rename from tests/test_interactive_interrupt.py
rename to tests/run_agent/test_interactive_interrupt.py
index 8c0d328c..762621f2 100644
--- a/tests/test_interactive_interrupt.py
+++ b/tests/run_agent/test_interactive_interrupt.py
@@ -23,7 +23,7 @@ logging.basicConfig(level=logging.DEBUG, stream=sys.stderr,
                     format="%(asctime)s [%(threadName)s] %(message)s")
 log = logging.getLogger("interrupt_test")
 
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
 
 from unittest.mock import MagicMock, patch
 from run_agent import AIAgent, IterationBudget
diff --git a/tests/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py
similarity index 100%
rename from tests/test_interrupt_propagation.py
rename to tests/run_agent/test_interrupt_propagation.py
diff --git a/tests/test_large_tool_result.py b/tests/run_agent/test_large_tool_result.py
similarity index 100%
rename from tests/test_large_tool_result.py
rename to tests/run_agent/test_large_tool_result.py
diff --git a/tests/test_long_context_tier_429.py b/tests/run_agent/test_long_context_tier_429.py
similarity index 100%
rename from tests/test_long_context_tier_429.py
rename to tests/run_agent/test_long_context_tier_429.py
diff --git a/tests/test_openai_client_lifecycle.py b/tests/run_agent/test_openai_client_lifecycle.py
similarity index 100%
rename from tests/test_openai_client_lifecycle.py
rename to tests/run_agent/test_openai_client_lifecycle.py
diff --git a/tests/test_percentage_clamp.py b/tests/run_agent/test_percentage_clamp.py
similarity index 98%
rename from tests/test_percentage_clamp.py
rename to tests/run_agent/test_percentage_clamp.py
index 67d11914..fcf1e39e 100644
--- a/tests/test_percentage_clamp.py
+++ b/tests/run_agent/test_percentage_clamp.py
@@ -122,7 +122,7 @@ class TestSourceLinesAreClamped:
     @staticmethod
     def _read_file(rel_path: str) -> str:
         import os
-        base = os.path.dirname(os.path.dirname(__file__))
+        base = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
         with open(os.path.join(base, rel_path)) as f:
             return f.read()
 
diff --git a/tests/test_primary_runtime_restore.py b/tests/run_agent/test_primary_runtime_restore.py
similarity index 100%
rename from tests/test_primary_runtime_restore.py
rename to tests/run_agent/test_primary_runtime_restore.py
diff --git a/tests/test_provider_fallback.py b/tests/run_agent/test_provider_fallback.py
similarity index 100%
rename from tests/test_provider_fallback.py
rename to tests/run_agent/test_provider_fallback.py
diff --git a/tests/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
similarity index 100%
rename from tests/test_provider_parity.py
rename to tests/run_agent/test_provider_parity.py
diff --git a/tests/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py
similarity index 100%
rename from tests/test_real_interrupt_subagent.py
rename to tests/run_agent/test_real_interrupt_subagent.py
diff --git a/tests/test_redirect_stdout_issue.py b/tests/run_agent/test_redirect_stdout_issue.py
similarity index 100%
rename from tests/test_redirect_stdout_issue.py
rename to tests/run_agent/test_redirect_stdout_issue.py
diff --git a/tests/test_run_agent.py b/tests/run_agent/test_run_agent.py
similarity index 100%
rename from tests/test_run_agent.py
rename to tests/run_agent/test_run_agent.py
diff --git a/tests/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
similarity index 100%
rename from tests/test_run_agent_codex_responses.py
rename to tests/run_agent/test_run_agent_codex_responses.py
diff --git a/tests/test_session_meta_filtering.py b/tests/run_agent/test_session_meta_filtering.py
similarity index 100%
rename from tests/test_session_meta_filtering.py
rename to tests/run_agent/test_session_meta_filtering.py
diff --git a/tests/test_session_reset_fix.py b/tests/run_agent/test_session_reset_fix.py
similarity index 98%
rename from tests/test_session_reset_fix.py
rename to tests/run_agent/test_session_reset_fix.py
index ee65ed90..1fd1223c 100644
--- a/tests/test_session_reset_fix.py
+++ b/tests/run_agent/test_session_reset_fix.py
@@ -13,7 +13,7 @@ from pathlib import Path
 import pytest
 
 # Ensure repo root is importable
-sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 
 # Stub out optional heavy dependencies not installed in the test environment
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
diff --git a/tests/test_streaming.py b/tests/run_agent/test_streaming.py
similarity index 100%
rename from tests/test_streaming.py
rename to tests/run_agent/test_streaming.py
diff --git a/tests/test_strict_api_validation.py b/tests/run_agent/test_strict_api_validation.py
similarity index 100%
rename from tests/test_strict_api_validation.py
rename to tests/run_agent/test_strict_api_validation.py
diff --git a/tests/test_token_persistence_non_cli.py b/tests/run_agent/test_token_persistence_non_cli.py
similarity index 100%
rename from tests/test_token_persistence_non_cli.py
rename to tests/run_agent/test_token_persistence_non_cli.py
diff --git a/tests/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py
similarity index 100%
rename from tests/test_tool_arg_coercion.py
rename to tests/run_agent/test_tool_arg_coercion.py
diff --git a/tests/tools/test_code_execution.py b/tests/tools/test_code_execution.py
index 085ffad2..5ac3fd87 100644
--- a/tests/tools/test_code_execution.py
+++ b/tests/tools/test_code_execution.py
@@ -18,10 +18,18 @@ import pytest
 import json
 import os
 
-# Force local terminal backend for ALL tests in this file.
-# Under xdist, another test may leak TERMINAL_ENV=modal/docker, sending
-# execute_code down the remote path → modal.exception.AuthError.
 os.environ["TERMINAL_ENV"] = "local"
+
+
+@pytest.fixture(autouse=True)
+def _force_local_terminal(monkeypatch):
+    """Re-set TERMINAL_ENV=local before every test.
+
+    The module-level assignment above covers import time, but under xdist
+    another worker can overwrite os.environ between tests.  monkeypatch
+    ensures each test starts (and ends) with the correct value.
+    """
+    monkeypatch.setenv("TERMINAL_ENV", "local")
 import sys
 import time
 import threading
diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browserbase_and_modal.py
index d07dcb36..5ae24f01 100644
--- a/tests/tools/test_managed_browserbase_and_modal.py
+++ b/tests/tools/test_managed_browserbase_and_modal.py
@@ -91,7 +91,11 @@ def _install_fake_tools_package():
         def register(self, **kwargs):
             return None
 
-    sys.modules["tools.registry"] = types.SimpleNamespace(registry=_Registry())
+    from tools.registry import tool_error
+
+    sys.modules["tools.registry"] = types.SimpleNamespace(
+        registry=_Registry(), tool_error=tool_error,
+    )
 
     class _DummyEnvironment:
         def __init__(self, *args, **kwargs):
diff --git a/tests/test_managed_server_tool_support.py b/tests/tools/test_managed_server_tool_support.py
similarity index 100%
rename from tests/test_managed_server_tool_support.py
rename to tests/tools/test_managed_server_tool_support.py
diff --git a/tests/tools/test_modal_sandbox_fixes.py b/tests/tools/test_modal_sandbox_fixes.py
index 7e3feb5c..e1baf13d 100644
--- a/tests/tools/test_modal_sandbox_fixes.py
+++ b/tests/tools/test_modal_sandbox_fixes.py
@@ -12,8 +12,6 @@ Covers the bugs discovered while setting up TBLite evaluation:
 import os
 import sys
 from pathlib import Path
-from unittest.mock import patch, MagicMock
-
 import pytest
 
 # Ensure repo root is importable
@@ -64,89 +62,72 @@ class TestToolResolution:
 class TestCwdHandling:
     """Verify host paths are sanitized for container backends."""
 
-    def test_home_path_replaced_for_modal(self):
+    def test_home_path_replaced_for_modal(self, monkeypatch):
         """TERMINAL_CWD=/home/user/... should be replaced with /root for modal."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "modal",
-            "TERMINAL_CWD": "/home/dakota/github/hermes-agent",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root", (
-                f"Expected /root, got {config['cwd']}. "
-                "/home/ paths should be replaced for modal backend."
-            )
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        monkeypatch.setenv("TERMINAL_CWD", "/home/dakota/github/hermes-agent")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Expected /root, got {config['cwd']}. "
+            "/home/ paths should be replaced for modal backend."
+        )
 
-    def test_users_path_replaced_for_docker_by_default(self):
+    def test_users_path_replaced_for_docker_by_default(self, monkeypatch):
         """Docker should keep host paths out of the sandbox unless explicitly enabled."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "docker",
-            "TERMINAL_CWD": "/Users/someone/projects",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root", (
-                f"Expected /root, got {config['cwd']}. "
-                "Host paths should be discarded for docker backend by default."
-            )
-            assert config["host_cwd"] is None
-            assert config["docker_mount_cwd_to_workspace"] is False
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Expected /root, got {config['cwd']}. "
+            "Host paths should be discarded for docker backend by default."
+        )
+        assert config["host_cwd"] is None
+        assert config["docker_mount_cwd_to_workspace"] is False
 
-    def test_users_path_maps_to_workspace_for_docker_when_enabled(self):
+    def test_users_path_maps_to_workspace_for_docker_when_enabled(self, monkeypatch):
         """Docker should map the host cwd into /workspace only when explicitly enabled."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "docker",
-            "TERMINAL_CWD": "/Users/someone/projects",
-            "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/workspace"
-            assert config["host_cwd"] == "/Users/someone/projects"
-            assert config["docker_mount_cwd_to_workspace"] is True
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_CWD", "/Users/someone/projects")
+        monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/workspace"
+        assert config["host_cwd"] == "/Users/someone/projects"
+        assert config["docker_mount_cwd_to_workspace"] is True
 
-    def test_windows_path_replaced_for_modal(self):
+    def test_windows_path_replaced_for_modal(self, monkeypatch):
         """TERMINAL_CWD=C:\\Users\\... should be replaced for modal."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "modal",
-            "TERMINAL_CWD": "C:\\Users\\someone\\projects",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/root"
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        monkeypatch.setenv("TERMINAL_CWD", "C:\\Users\\someone\\projects")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root"
 
-    def test_default_cwd_is_root_for_container_backends(self):
+    @pytest.mark.parametrize("backend", ["modal", "docker", "singularity", "daytona"])
+    def test_default_cwd_is_root_for_container_backends(self, backend, monkeypatch):
         """Container backends should default to /root, not ~."""
-        for backend in ("modal", "docker", "singularity", "daytona"):
-            with patch.dict(os.environ, {"TERMINAL_ENV": backend}, clear=False):
-                # Remove TERMINAL_CWD so it uses default
-                env = os.environ.copy()
-                env.pop("TERMINAL_CWD", None)
-                env.pop("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", None)
-                with patch.dict(os.environ, env, clear=True):
-                    config = _tt_mod._get_env_config()
-                    assert config["cwd"] == "/root", (
-                        f"Backend {backend}: expected /root default, got {config['cwd']}"
-                    )
+        monkeypatch.setenv("TERMINAL_ENV", backend)
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        monkeypatch.delenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/root", (
+            f"Backend {backend}: expected /root default, got {config['cwd']}"
+        )
 
-    def test_docker_default_cwd_maps_current_directory_when_enabled(self):
+    def test_docker_default_cwd_maps_current_directory_when_enabled(self, monkeypatch):
         """Docker should use /workspace when cwd mounting is explicitly enabled."""
-        with patch("tools.terminal_tool.os.getcwd", return_value="/home/user/project"):
-            with patch.dict(os.environ, {
-                "TERMINAL_ENV": "docker",
-                "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE": "true",
-            }, clear=False):
-                env = os.environ.copy()
-                env.pop("TERMINAL_CWD", None)
-                with patch.dict(os.environ, env, clear=True):
-                    config = _tt_mod._get_env_config()
-                    assert config["cwd"] == "/workspace"
-                    assert config["host_cwd"] == "/home/user/project"
+        monkeypatch.setattr("tools.terminal_tool.os.getcwd", lambda: "/home/user/project")
+        monkeypatch.setenv("TERMINAL_ENV", "docker")
+        monkeypatch.setenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "true")
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/workspace"
+        assert config["host_cwd"] == "/home/user/project"
 
-    def test_local_backend_uses_getcwd(self):
+    def test_local_backend_uses_getcwd(self, monkeypatch):
         """Local backend should use os.getcwd(), not /root."""
-        with patch.dict(os.environ, {"TERMINAL_ENV": "local"}, clear=False):
-            env = os.environ.copy()
-            env.pop("TERMINAL_CWD", None)
-            with patch.dict(os.environ, env, clear=True):
-                config = _tt_mod._get_env_config()
-                assert config["cwd"] == os.getcwd()
+        monkeypatch.setenv("TERMINAL_ENV", "local")
+        monkeypatch.delenv("TERMINAL_CWD", raising=False)
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == os.getcwd()
 
     def test_create_environment_passes_docker_host_cwd_and_flag(self, monkeypatch):
         """Docker host cwd and mount flag should reach DockerEnvironment."""
@@ -173,18 +154,16 @@ class TestCwdHandling:
         assert captured["host_cwd"] == "/home/user/project"
         assert captured["auto_mount_cwd"] is True
 
-    def test_ssh_preserves_home_paths(self):
+    def test_ssh_preserves_home_paths(self, monkeypatch):
         """SSH backend should NOT replace /home/ paths (they're valid remotely)."""
-        with patch.dict(os.environ, {
-            "TERMINAL_ENV": "ssh",
-            "TERMINAL_CWD": "/home/remote-user/work",
-            "TERMINAL_SSH_HOST": "example.com",
-            "TERMINAL_SSH_USER": "user",
-        }):
-            config = _tt_mod._get_env_config()
-            assert config["cwd"] == "/home/remote-user/work", (
-                "SSH backend should preserve /home/ paths"
-            )
+        monkeypatch.setenv("TERMINAL_ENV", "ssh")
+        monkeypatch.setenv("TERMINAL_CWD", "/home/remote-user/work")
+        monkeypatch.setenv("TERMINAL_SSH_HOST", "example.com")
+        monkeypatch.setenv("TERMINAL_SSH_USER", "user")
+        config = _tt_mod._get_env_config()
+        assert config["cwd"] == "/home/remote-user/work", (
+            "SSH backend should preserve /home/ paths"
+        )
 
 
 # =========================================================================
@@ -194,12 +173,8 @@ class TestCwdHandling:
 class TestEphemeralDiskCheck:
     """Verify ephemeral_disk is only passed when modal supports it."""
 
-    def test_ephemeral_disk_skipped_when_unsupported(self):
+    def test_ephemeral_disk_skipped_when_unsupported(self, monkeypatch):
         """If modal.Sandbox.create doesn't have ephemeral_disk param, skip it."""
-        # Mock the modal import and Sandbox.create signature
-        mock_modal = MagicMock()
-        mock_sandbox_create = MagicMock()
-        # Simulate a signature WITHOUT ephemeral_disk
         import inspect
         mock_params = {
             "args": inspect.Parameter("args", inspect.Parameter.VAR_POSITIONAL),
@@ -208,26 +183,25 @@ class TestEphemeralDiskCheck:
             "cpu": inspect.Parameter("cpu", inspect.Parameter.KEYWORD_ONLY),
             "memory": inspect.Parameter("memory", inspect.Parameter.KEYWORD_ONLY),
         }
-        mock_sig = inspect.Signature(parameters=list(mock_params.values()))
 
-        with patch.dict(os.environ, {"TERMINAL_ENV": "modal"}):
-            config = _tt_mod._get_env_config()
-            # The config has container_disk default of 51200
-            disk = config.get("container_disk", 51200)
-            assert disk > 0, "disk should default to > 0"
+        monkeypatch.setenv("TERMINAL_ENV", "modal")
+        config = _tt_mod._get_env_config()
+        # The config has container_disk default of 51200
+        disk = config.get("container_disk", 51200)
+        assert disk > 0, "disk should default to > 0"
 
-            # Simulate the version check logic from terminal_tool.py
-            sandbox_kwargs = {}
-            if disk > 0:
-                try:
-                    if "ephemeral_disk" in mock_params:
-                        sandbox_kwargs["ephemeral_disk"] = disk
-                except Exception:
-                    pass
+        # Simulate the version check logic from terminal_tool.py
+        sandbox_kwargs = {}
+        if disk > 0:
+            try:
+                if "ephemeral_disk" in mock_params:
+                    sandbox_kwargs["ephemeral_disk"] = disk
+            except Exception:
+                pass
 
-            assert "ephemeral_disk" not in sandbox_kwargs, (
-                "ephemeral_disk should not be set when Sandbox.create doesn't support it"
-            )
+        assert "ephemeral_disk" not in sandbox_kwargs, (
+            "ephemeral_disk should not be set when Sandbox.create doesn't support it"
+        )
 
 
 # =========================================================================
diff --git a/tests/test_tool_call_parsers.py b/tests/tools/test_tool_call_parsers.py
similarity index 100%
rename from tests/test_tool_call_parsers.py
rename to tests/tools/test_tool_call_parsers.py
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 08f26f50..226e99b5 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -34,6 +34,7 @@ import requests
 
 from hermes_cli.config import load_config
 from tools.browser_camofox_state import get_camofox_identity
+from tools.registry import tool_error
 
 logger = logging.getLogger(__name__)
 
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 2ed8ba21..1ff42e77 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1317,6 +1317,8 @@ def _make_read_resource_handler(server_name: str, tool_timeout: float):
     """Return a sync handler that reads a resource by URI from an MCP server."""
 
     def _handler(args: dict, **kwargs) -> str:
+        from tools.registry import tool_error
+
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session:
@@ -1406,6 +1408,8 @@ def _make_get_prompt_handler(server_name: str, tool_timeout: float):
     """Return a sync handler that gets a prompt by name from an MCP server."""
 
     def _handler(args: dict, **kwargs) -> str:
+        from tools.registry import tool_error
+
         with _lock:
             server = _servers.get(server_name)
         if not server or not server.session:

From 4aef0558054f332212f40c6cebe5147c6488e311 Mon Sep 17 00:00:00 2001
From: Jeff Escalante <556932+jescalan@users.noreply.github.com>
Date: Tue, 7 Apr 2026 11:17:20 -0400
Subject: [PATCH 093/154] fix(gateway/webhook): don't pop delivery_info on send
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The webhook adapter stored per-request `deliver`/`deliver_extra` config in
`_delivery_info[chat_id]` during POST handling and consumed it via `.pop()`
inside `send()`. That worked for routes whose agent run produced exactly
one outbound message — the final response — but it broke whenever the
agent emitted any interim status message before the final response.

Status messages flow through the same `send(chat_id, ...)` path as the
final response (see `gateway/run.py::_status_callback_sync` →
`adapter.send(...)`). Common triggers include:

  - "🔄 Primary model failed — switching to fallback: ..."
    (run_agent.py::_emit_status when `fallback_providers` activates)
  - context-pressure / compression notices
  - any other lifecycle event routed through `status_callback`

When any of those fired, the first `send()` call popped the entry, so the
subsequent final-response `send()` saw an empty dict and silently
downgraded `deliver_type` from `"telegram"` (or `discord`/`slack`/etc.) to
the default `"log"`. The agent's response was logged to the gateway log
instead of being delivered to the configured cross-platform target — no
warning, no error, just a missing message.

This was easy to hit in practice. Any user with `fallback_providers`
configured saw it the first time their primary provider hiccuped on a
webhook-triggered run. Routes that worked perfectly in dev (where the
primary stays healthy) silently dropped responses in prod.

Fix: read `_delivery_info` with `.get()` so multiple `send()` calls for
the same `chat_id` all see the same delivery config. To keep the dict
bounded without relying on per-send cleanup, add a parallel
`_delivery_info_created` timestamp dict and a `_prune_delivery_info()`
helper that drops entries older than `_idempotency_ttl` (1h, same window
already used by `_seen_deliveries`). Pruning runs on each POST, mirroring
the existing `_seen_deliveries` cleanup pattern.

Worst-case memory footprint is now `rate_limit * TTL = 30/min * 60min =
1800` entries, each ~1KB → under 2 MB. In practice it'll be far smaller
because most webhooks complete in seconds, not the full hour.

Test changes:
  - `test_delivery_info_cleaned_after_send` is replaced with
    `test_delivery_info_survives_multiple_sends`, which is now the
    regression test for this bug — it asserts that two consecutive
    `send()` calls both see the delivery config.
  - A new `test_delivery_info_pruned_via_ttl` covers the TTL cleanup
    behavior.
  - The two integration tests that asserted `chat_id not in
    adapter._delivery_info` after `send()` now assert the opposite, with
    a comment explaining why.

All 40 tests in `tests/gateway/test_webhook_adapter.py` and
`tests/gateway/test_webhook_integration.py` pass. Verified end-to-end
locally against a dynamic `hermes webhook subscribe` route configured
with `--deliver telegram --deliver-chat-id <user>`: with `gpt-5.4` as
the primary (currently flaky) and `claude-opus-4.6` as the fallback,
the fallback notification fires, the agent finishes, and the final
response is delivered to Telegram as expected.
---
 gateway/platforms/webhook.py              | 44 ++++++++++++++++++---
 tests/gateway/test_webhook_adapter.py     | 47 ++++++++++++++++++++---
 tests/gateway/test_webhook_integration.py | 10 +++--
 3 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/gateway/platforms/webhook.py b/gateway/platforms/webhook.py
index daaf4f5d..6d4885d2 100644
--- a/gateway/platforms/webhook.py
+++ b/gateway/platforms/webhook.py
@@ -76,8 +76,17 @@ class WebhookAdapter(BasePlatformAdapter):
         self._routes: Dict[str, dict] = dict(self._static_routes)
         self._runner = None
 
-        # Delivery info keyed by session chat_id — consumed by send()
+        # Delivery info keyed by session chat_id.
+        #
+        # Read by every send() invocation for the chat_id (status messages
+        # AND the final response).  Cleaned up via TTL on each POST so the
+        # dict stays bounded — see _prune_delivery_info().  Do NOT pop on
+        # send(), or interim status messages (e.g. fallback notifications,
+        # context-pressure warnings) will consume the entry before the
+        # final response arrives, causing the response to silently fall
+        # back to the "log" deliver type.
         self._delivery_info: Dict[str, dict] = {}
+        self._delivery_info_created: Dict[str, float] = {}
 
         # Reference to gateway runner for cross-platform delivery (set externally)
         self.gateway_runner = None
@@ -160,10 +169,14 @@ class WebhookAdapter(BasePlatformAdapter):
     ) -> SendResult:
         """Deliver the agent's response to the configured destination.
 
-        chat_id is ``webhook:{route}:{delivery_id}`` — we pop the delivery
-        info stored during webhook receipt so it doesn't leak memory.
+        chat_id is ``webhook:{route}:{delivery_id}``.  The delivery info
+        stored during webhook receipt is read with ``.get()`` (not popped)
+        so that interim status messages emitted before the final response
+        — fallback-model notifications, context-pressure warnings, etc. —
+        do not consume the entry and silently downgrade the final response
+        to the ``log`` deliver type.  TTL cleanup happens on POST.
         """
-        delivery = self._delivery_info.pop(chat_id, {})
+        delivery = self._delivery_info.get(chat_id, {})
         deliver_type = delivery.get("deliver", "log")
 
         if deliver_type == "log":
@@ -190,6 +203,23 @@ class WebhookAdapter(BasePlatformAdapter):
             success=False, error=f"Unknown deliver type: {deliver_type}"
         )
 
+    def _prune_delivery_info(self, now: float) -> None:
+        """Drop delivery_info entries older than the idempotency TTL.
+
+        Mirrors the cleanup pattern used for ``_seen_deliveries``.  Called
+        on each POST so the dict size is bounded by ``rate_limit * TTL``
+        even if many webhooks fire and never receive a final response.
+        """
+        cutoff = now - self._idempotency_ttl
+        stale = [
+            k
+            for k, t in self._delivery_info_created.items()
+            if t < cutoff
+        ]
+        for k in stale:
+            self._delivery_info.pop(k, None)
+            self._delivery_info_created.pop(k, None)
+
     async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
         return {"name": chat_id, "type": "webhook"}
 
@@ -382,7 +412,9 @@ class WebhookAdapter(BasePlatformAdapter):
         # same route get independent agent runs (not queued/interrupted).
         session_chat_id = f"webhook:{route_name}:{delivery_id}"
 
-        # Store delivery info for send() — consumed (popped) on delivery
+        # Store delivery info for send().  Read by every send() invocation
+        # for this chat_id (interim status messages and the final response),
+        # so we do NOT pop on send.  TTL-based cleanup keeps the dict bounded.
         deliver_config = {
             "deliver": route_config.get("deliver", "log"),
             "deliver_extra": self._render_delivery_extra(
@@ -391,6 +423,8 @@ class WebhookAdapter(BasePlatformAdapter):
             "payload": payload,
         }
         self._delivery_info[session_chat_id] = deliver_config
+        self._delivery_info_created[session_chat_id] = now
+        self._prune_delivery_info(now)
 
         # Build source and event
         source = self.build_source(
diff --git a/tests/gateway/test_webhook_adapter.py b/tests/gateway/test_webhook_adapter.py
index f323b95a..bedf254a 100644
--- a/tests/gateway/test_webhook_adapter.py
+++ b/tests/gateway/test_webhook_adapter.py
@@ -590,8 +590,15 @@ class TestSessionIsolation:
 class TestDeliveryCleanup:
 
     @pytest.mark.asyncio
-    async def test_delivery_info_cleaned_after_send(self):
-        """send() pops delivery_info so the entry doesn't leak memory."""
+    async def test_delivery_info_survives_multiple_sends(self):
+        """send() must NOT pop delivery_info.
+
+        Interim status messages (fallback notifications, context-pressure
+        warnings, etc.) flow through the same send() path as the final
+        response.  If the entry were popped on the first send, the final
+        response would silently downgrade to the ``log`` deliver type.
+        Regression test for that bug.
+        """
         adapter = _make_adapter()
         chat_id = "webhook:test:d-xyz"
         adapter._delivery_info[chat_id] = {
@@ -599,10 +606,40 @@ class TestDeliveryCleanup:
             "deliver_extra": {},
             "payload": {"x": 1},
         }
+        adapter._delivery_info_created[chat_id] = time.time()
 
-        result = await adapter.send(chat_id, "Agent response here")
-        assert result.success is True
-        assert chat_id not in adapter._delivery_info
+        # First send (e.g. an interim status message)
+        result1 = await adapter.send(chat_id, "Status: switching to fallback")
+        assert result1.success is True
+        # Entry must still be present so the final send can read it
+        assert chat_id in adapter._delivery_info
+
+        # Second send (the final agent response)
+        result2 = await adapter.send(chat_id, "Final agent response")
+        assert result2.success is True
+        assert chat_id in adapter._delivery_info
+
+    @pytest.mark.asyncio
+    async def test_delivery_info_pruned_via_ttl(self):
+        """Stale delivery_info entries are dropped on the next POST."""
+        adapter = _make_adapter()
+        adapter._idempotency_ttl = 60  # short TTL for the test
+        now = time.time()
+
+        # Stale entry — older than TTL
+        adapter._delivery_info["webhook:test:old"] = {"deliver": "log"}
+        adapter._delivery_info_created["webhook:test:old"] = now - 120
+
+        # Fresh entry — should survive
+        adapter._delivery_info["webhook:test:new"] = {"deliver": "log"}
+        adapter._delivery_info_created["webhook:test:new"] = now - 5
+
+        adapter._prune_delivery_info(now)
+
+        assert "webhook:test:old" not in adapter._delivery_info
+        assert "webhook:test:old" not in adapter._delivery_info_created
+        assert "webhook:test:new" in adapter._delivery_info
+        assert "webhook:test:new" in adapter._delivery_info_created
 
 
 # ===================================================================
diff --git a/tests/gateway/test_webhook_integration.py b/tests/gateway/test_webhook_integration.py
index 89998981..5c6fe011 100644
--- a/tests/gateway/test_webhook_integration.py
+++ b/tests/gateway/test_webhook_integration.py
@@ -259,8 +259,9 @@ class TestCrossPlatformDelivery:
         mock_tg_adapter.send.assert_awaited_once_with(
             "12345", "I've acknowledged the alert.", metadata=None
         )
-        # Delivery info should be cleaned up
-        assert chat_id not in adapter._delivery_info
+        # Delivery info is retained after send() so interim status messages
+        # don't strand the final response (TTL-based cleanup happens on POST).
+        assert chat_id in adapter._delivery_info
 
 
 # ===================================================================
@@ -333,5 +334,6 @@ class TestGitHubCommentDelivery:
             text=True,
             timeout=30,
         )
-        # Delivery info cleaned up
-        assert chat_id not in adapter._delivery_info
+        # Delivery info is retained after send() so interim status messages
+        # don't strand the final response (TTL-based cleanup happens on POST).
+        assert chat_id in adapter._delivery_info

From 0d41fb082770b23afde6d1049382de2192c9cd01 Mon Sep 17 00:00:00 2001
From: Zainan Victor Zhou <zzn+pa@zzn.im>
Date: Tue, 7 Apr 2026 11:51:33 -0700
Subject: [PATCH 094/154] fix(gateway): show full session id and title in
 /status

---
 gateway/run.py                       | 23 +++++++++++++++++------
 tests/gateway/test_status_command.py | 25 ++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index aaee28bd..68027f28 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -3342,25 +3342,36 @@ class GatewayRunner:
         """Handle /status command."""
         source = event.source
         session_entry = self.session_store.get_or_create_session(source)
-        
+
         connected_platforms = [p.value for p in self.adapters.keys()]
-        
+
         # Check if there's an active agent
         session_key = session_entry.session_key
         is_running = session_key in self._running_agents
-        
+
+        title = None
+        if self._session_db:
+            try:
+                title = self._session_db.get_session_title(session_entry.session_id)
+            except Exception:
+                title = None
+
         lines = [
             "📊 **Hermes Gateway Status**",
             "",
-            f"**Session ID:** `{session_entry.session_id[:12]}...`",
+            f"**Session ID:** `{session_entry.session_id}`",
+        ]
+        if title:
+            lines.append(f"**Title:** {title}")
+        lines.extend([
             f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
             f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
             f"**Tokens:** {session_entry.total_tokens:,}",
             f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
             "",
             f"**Connected Platforms:** {', '.join(connected_platforms)}",
-        ]
-        
+        ])
+
         return "\n".join(lines)
     
     async def _handle_stop_command(self, event: MessageEvent) -> str:
diff --git a/tests/gateway/test_status_command.py b/tests/gateway/test_status_command.py
index a363abd8..0dbd5980 100644
--- a/tests/gateway/test_status_command.py
+++ b/tests/gateway/test_status_command.py
@@ -51,7 +51,8 @@ def _make_runner(session_entry: SessionEntry):
     runner._running_agents = {}
     runner._pending_messages = {}
     runner._pending_approvals = {}
-    runner._session_db = None
+    runner._session_db = MagicMock()
+    runner._session_db.get_session_title.return_value = None
     runner._reasoning_config = None
     runner._provider_routing = {}
     runner._fallback_model = None
@@ -82,12 +83,34 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
 
     result = await runner._handle_message(_make_event("/status"))
 
+    assert "**Session ID:** `sess-1`" in result
     assert "**Tokens:** 321" in result
     assert "**Agent Running:** Yes ⚡" in result
+    assert "**Title:**" not in result
     running_agent.interrupt.assert_not_called()
     assert runner._pending_messages == {}
 
 
+@pytest.mark.asyncio
+async def test_status_command_includes_session_title_when_present():
+    session_entry = SessionEntry(
+        session_key=build_session_key(_make_source()),
+        session_id="sess-1",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+        total_tokens=321,
+    )
+    runner = _make_runner(session_entry)
+    runner._session_db.get_session_title.return_value = "My titled session"
+
+    result = await runner._handle_message(_make_event("/status"))
+
+    assert "**Session ID:** `sess-1`" in result
+    assert "**Title:** My titled session" in result
+
+
 @pytest.mark.asyncio
 async def test_handle_message_persists_agent_token_counts(monkeypatch):
     import gateway.run as gateway_run

From b1a66d55b47df176503e566aa9a3e454d647e07a Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:28:04 -0700
Subject: [PATCH 095/154] refactor: migrate 10 config.yaml inline loaders to
 read_raw_config()

Replace 10 callsites across 6 files that manually opened config.yaml,
called yaml.safe_load(), and handled missing-file/parse-error fallbacks
with the new read_raw_config() helper from hermes_cli/config.py.

Each migrated site previously had 5-8 lines of boilerplate:
    config_path = get_hermes_home() / 'config.yaml'
    if config_path.exists():
        import yaml
        with open(config_path) as f:
            cfg = yaml.safe_load(f) or {}

Now reduced to:
    from hermes_cli.config import read_raw_config
    cfg = read_raw_config()

Migrated files:
- tools/browser_tool.py (4 sites): command_timeout, cloud_provider,
  allow_private_urls, record_sessions
- tools/env_passthrough.py: terminal.env_passthrough
- tools/credential_files.py: terminal.credential_files
- tools/transcription_tools.py: stt.model
- hermes_cli/commands.py: config-gated command resolution
- hermes_cli/auth.py (2 sites): model config read + provider reset

Skipped (intentionally):
- gateway/run.py: 10+ sites with local aliases, critical path
- hermes_cli/profiles.py: profile-specific config path
- hermes_cli/doctor.py: reads raw then writes fixes back
- agent/model_metadata.py: different file (context_length_cache.yaml)
- tools/website_policy.py: custom config_path param + error types
---
 hermes_cli/auth.py           | 19 +++--------
 hermes_cli/commands.py       | 10 ++----
 tools/browser_tool.py        | 64 ++++++++++++++----------------------
 tools/credential_files.py    | 62 ++++++++++++++++------------------
 tools/env_passthrough.py     | 19 ++++-------
 tools/transcription_tools.py |  8 ++---
 6 files changed, 68 insertions(+), 114 deletions(-)

diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 1cdbadc7..2025bbcc 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -37,7 +37,7 @@ from typing import Any, Dict, List, Optional
 import httpx
 import yaml
 
-from hermes_cli.config import get_hermes_home, get_config_path
+from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
 from hermes_constants import OPENROUTER_BASE_URL
 
 logger = logging.getLogger(__name__)
@@ -2214,14 +2214,7 @@ def _update_config_for_provider(
     config_path = get_config_path()
     config_path.parent.mkdir(parents=True, exist_ok=True)
 
-    config: Dict[str, Any] = {}
-    if config_path.exists():
-        try:
-            loaded = yaml.safe_load(config_path.read_text()) or {}
-            if isinstance(loaded, dict):
-                config = loaded
-        except Exception:
-            config = {}
+    config = read_raw_config()
 
     current_model = config.get("model")
     if isinstance(current_model, dict):
@@ -2258,12 +2251,8 @@ def _reset_config_provider() -> Path:
     if not config_path.exists():
         return config_path
 
-    try:
-        config = yaml.safe_load(config_path.read_text()) or {}
-    except Exception:
-        return config_path
-
-    if not isinstance(config, dict):
+    config = read_raw_config()
+    if not config:
         return config_path
 
     model = config.get("model")
diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py
index ecf4d0d6..39dc4569 100644
--- a/hermes_cli/commands.py
+++ b/hermes_cli/commands.py
@@ -293,14 +293,8 @@ def _resolve_config_gates() -> set[str]:
     if not gated:
         return set()
     try:
-        import yaml
-        from hermes_constants import get_hermes_home
-        config_path = str(get_hermes_home() / "config.yaml")
-        if os.path.exists(config_path):
-            with open(config_path, encoding="utf-8") as f:
-                cfg = yaml.safe_load(f) or {}
-        else:
-            cfg = {}
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
     except Exception:
         return set()
     result: set[str] = set()
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 10004b08..7e52ed78 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -146,15 +146,11 @@ def _get_command_timeout() -> int:
     ``DEFAULT_COMMAND_TIMEOUT`` (30s) if unset or unreadable.
     """
     try:
-        hermes_home = get_hermes_home()
-        config_path = hermes_home / "config.yaml"
-        if config_path.exists():
-            import yaml
-            with open(config_path) as f:
-                cfg = yaml.safe_load(f) or {}
-            val = cfg.get("browser", {}).get("command_timeout")
-            if val is not None:
-                return max(int(val), 5)  # Floor at 5s to avoid instant kills
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        val = cfg.get("browser", {}).get("command_timeout")
+        if val is not None:
+            return max(int(val), 5)  # Floor at 5s to avoid instant kills
     except Exception as e:
         logger.debug("Could not read command_timeout from config: %s", e)
     return DEFAULT_COMMAND_TIMEOUT
@@ -259,23 +255,19 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
 
     _cloud_provider_resolved = True
     try:
-        hermes_home = get_hermes_home()
-        config_path = hermes_home / "config.yaml"
-        if config_path.exists():
-            import yaml
-            with open(config_path) as f:
-                cfg = yaml.safe_load(f) or {}
-            browser_cfg = cfg.get("browser", {})
-            provider_key = None
-            if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
-                provider_key = normalize_browser_cloud_provider(
-                    browser_cfg.get("cloud_provider")
-                )
-                if provider_key == "local":
-                    _cached_cloud_provider = None
-                    return None
-            if provider_key and provider_key in _PROVIDER_REGISTRY:
-                _cached_cloud_provider = _PROVIDER_REGISTRY[provider_key]()
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        browser_cfg = cfg.get("browser", {})
+        provider_key = None
+        if isinstance(browser_cfg, dict) and "cloud_provider" in browser_cfg:
+            provider_key = normalize_browser_cloud_provider(
+                browser_cfg.get("cloud_provider")
+            )
+            if provider_key == "local":
+                _cached_cloud_provider = None
+                return None
+        if provider_key and provider_key in _PROVIDER_REGISTRY:
+            _cached_cloud_provider = _PROVIDER_REGISTRY[provider_key]()
     except Exception as e:
         logger.debug("Could not read cloud_provider from config: %s", e)
 
@@ -326,13 +318,9 @@ def _allow_private_urls() -> bool:
     _allow_private_urls_resolved = True
     _cached_allow_private_urls = False  # safe default
     try:
-        hermes_home = get_hermes_home()
-        config_path = hermes_home / "config.yaml"
-        if config_path.exists():
-            import yaml
-            with open(config_path) as f:
-                cfg = yaml.safe_load(f) or {}
-            _cached_allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls"))
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        _cached_allow_private_urls = bool(cfg.get("browser", {}).get("allow_private_urls"))
     except Exception as e:
         logger.debug("Could not read allow_private_urls from config: %s", e)
     return _cached_allow_private_urls
@@ -1626,14 +1614,10 @@ def _maybe_start_recording(task_id: str):
     if task_id in _recording_sessions:
         return
     try:
+        from hermes_cli.config import read_raw_config
         hermes_home = get_hermes_home()
-        config_path = hermes_home / "config.yaml"
-        record_enabled = False
-        if config_path.exists():
-            import yaml
-            with open(config_path) as f:
-                cfg = yaml.safe_load(f) or {}
-            record_enabled = cfg.get("browser", {}).get("record_sessions", False)
+        cfg = read_raw_config()
+        record_enabled = cfg.get("browser", {}).get("record_sessions", False)
         
         if not record_enabled:
             return
diff --git a/tools/credential_files.py b/tools/credential_files.py
index eafd5ea2..3092b75e 100644
--- a/tools/credential_files.py
+++ b/tools/credential_files.py
@@ -137,40 +137,36 @@ def _load_config_files() -> List[Dict[str, str]]:
 
     result: List[Dict[str, str]] = []
     try:
+        from hermes_cli.config import read_raw_config
         hermes_home = _resolve_hermes_home()
-        config_path = hermes_home / "config.yaml"
-        if config_path.exists():
-            import yaml
-
-            with open(config_path) as f:
-                cfg = yaml.safe_load(f) or {}
-            cred_files = cfg.get("terminal", {}).get("credential_files")
-            if isinstance(cred_files, list):
-                hermes_home_resolved = hermes_home.resolve()
-                for item in cred_files:
-                    if isinstance(item, str) and item.strip():
-                        rel = item.strip()
-                        if os.path.isabs(rel):
-                            logger.warning(
-                                "credential_files: rejected absolute config path %r", rel,
-                            )
-                            continue
-                        host_path = (hermes_home / rel).resolve()
-                        try:
-                            host_path.relative_to(hermes_home_resolved)
-                        except ValueError:
-                            logger.warning(
-                                "credential_files: rejected config path traversal %r "
-                                "(resolves to %s, outside HERMES_HOME %s)",
-                                rel, host_path, hermes_home_resolved,
-                            )
-                            continue
-                        if host_path.is_file():
-                            container_path = f"/root/.hermes/{rel}"
-                            result.append({
-                                "host_path": str(host_path),
-                                "container_path": container_path,
-                            })
+        cfg = read_raw_config()
+        cred_files = cfg.get("terminal", {}).get("credential_files")
+        if isinstance(cred_files, list):
+            hermes_home_resolved = hermes_home.resolve()
+            for item in cred_files:
+                if isinstance(item, str) and item.strip():
+                    rel = item.strip()
+                    if os.path.isabs(rel):
+                        logger.warning(
+                            "credential_files: rejected absolute config path %r", rel,
+                        )
+                        continue
+                    host_path = (hermes_home / rel).resolve()
+                    try:
+                        host_path.relative_to(hermes_home_resolved)
+                    except ValueError:
+                        logger.warning(
+                            "credential_files: rejected config path traversal %r "
+                            "(resolves to %s, outside HERMES_HOME %s)",
+                            rel, host_path, hermes_home_resolved,
+                        )
+                        continue
+                    if host_path.is_file():
+                        container_path = f"/root/.hermes/{rel}"
+                        result.append({
+                            "host_path": str(host_path),
+                            "container_path": container_path,
+                        })
     except Exception as e:
         logger.debug("Could not read terminal.credential_files from config: %s", e)
 
diff --git a/tools/env_passthrough.py b/tools/env_passthrough.py
index 0e883bab..d931f150 100644
--- a/tools/env_passthrough.py
+++ b/tools/env_passthrough.py
@@ -66,18 +66,13 @@ def _load_config_passthrough() -> frozenset[str]:
 
     result: set[str] = set()
     try:
-        from hermes_constants import get_hermes_home
-        config_path = get_hermes_home() / "config.yaml"
-        if config_path.exists():
-            import yaml
-
-            with open(config_path) as f:
-                cfg = yaml.safe_load(f) or {}
-            passthrough = cfg.get("terminal", {}).get("env_passthrough")
-            if isinstance(passthrough, list):
-                for item in passthrough:
-                    if isinstance(item, str) and item.strip():
-                        result.add(item.strip())
+        from hermes_cli.config import read_raw_config
+        cfg = read_raw_config()
+        passthrough = cfg.get("terminal", {}).get("env_passthrough")
+        if isinstance(passthrough, list):
+            for item in passthrough:
+                if isinstance(item, str) and item.strip():
+                    result.add(item.strip())
     except Exception as e:
         logger.debug("Could not read tools.env_passthrough from config: %s", e)
 
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 9a79cdfb..d8d0f364 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -98,12 +98,8 @@ def get_stt_model_from_config() -> Optional[str]:
     Silently returns ``None`` on any error (missing file, bad YAML, etc.).
     """
     try:
-        import yaml
-        cfg_path = get_hermes_home() / "config.yaml"
-        if cfg_path.exists():
-            with open(cfg_path) as f:
-                data = yaml.safe_load(f) or {}
-            return data.get("stt", {}).get("model")
+        from hermes_cli.config import read_raw_config
+        return read_raw_config().get("stt", {}).get("model")
     except Exception:
         pass
     return None

From 469cd16fe01edbde4070a5ed549a8e2a839f9157 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:28:37 -0700
Subject: [PATCH 096/154] =?UTF-8?q?fix(security):=20consolidated=20securit?=
 =?UTF-8?q?y=20hardening=20=E2=80=94=20SSRF,=20timing=20attack,=20tar=20tr?=
 =?UTF-8?q?aversal,=20credential=20leakage=20(#5944)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Salvaged from PRs #5800 (memosr), #5806 (memosr), #5915 (Ruzzgar), #5928 (Awsh1).

Changes:
- Use hmac.compare_digest for API key comparison (timing attack prevention)
- Apply provider env var blocklist to Docker containers (credential leakage)
- Replace tar.extractall() with safe extraction in TerminalBench2 (CVE-2007-4559)
- Add SSRF protection via is_safe_url to ALL platform adapters:
  base.py (cache_image_from_url, cache_audio_from_url),
  discord, slack, telegram, matrix, mattermost, feishu, wecom
  (Signal and WhatsApp protected via base.py helpers)
- Update tests: mock is_safe_url in Mattermost download tests
- Add security tests for tar extraction (traversal, symlinks, safe files)
---
 .../terminalbench_2/terminalbench2_env.py     |  60 ++++++-
 gateway/platforms/api_server.py               |   3 +-
 gateway/platforms/base.py                     |  14 ++
 gateway/platforms/discord.py                  |   5 +
 gateway/platforms/feishu.py                   |   4 +
 gateway/platforms/matrix.py                   |   5 +
 gateway/platforms/mattermost.py               |   5 +
 gateway/platforms/slack.py                    |   5 +
 gateway/platforms/telegram.py                 |   7 +-
 gateway/platforms/wecom.py                    |   4 +
 .../test_terminalbench2_env_security.py       | 164 ++++++++++++++++++
 tests/gateway/test_mattermost.py              |   3 +-
 tests/gateway/test_media_download_retry.py    |  13 +-
 tools/environments/docker.py                  |   3 +
 14 files changed, 284 insertions(+), 11 deletions(-)
 create mode 100644 tests/environments/benchmarks/test_terminalbench2_env_security.py

diff --git a/environments/benchmarks/terminalbench_2/terminalbench2_env.py b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
index 3f95d402..2f0d9262 100644
--- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py
+++ b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
@@ -44,7 +44,7 @@ import tempfile
 import time
 import uuid
 from collections import defaultdict
-from pathlib import Path
+from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 # Ensure repo root is on sys.path for imports
@@ -148,6 +148,62 @@ MODAL_INCOMPATIBLE_TASKS = {
 # Tar extraction helper
 # =============================================================================
 
+def _normalize_tar_member_parts(member_name: str) -> list:
+    """Return safe path components for a tar member or raise ValueError."""
+    normalized_name = member_name.replace("\\", "/")
+    posix_path = PurePosixPath(normalized_name)
+    windows_path = PureWindowsPath(member_name)
+
+    if (
+        not normalized_name
+        or posix_path.is_absolute()
+        or windows_path.is_absolute()
+        or windows_path.drive
+    ):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+
+    parts = [part for part in posix_path.parts if part not in ("", ".")]
+    if not parts or any(part == ".." for part in parts):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+    return parts
+
+
+def _safe_extract_tar(tar: tarfile.TarFile, target_dir: Path) -> None:
+    """Extract a tar archive without allowing traversal or link entries."""
+    target_dir.mkdir(parents=True, exist_ok=True)
+    target_root = target_dir.resolve()
+
+    for member in tar.getmembers():
+        parts = _normalize_tar_member_parts(member.name)
+        target = target_dir.joinpath(*parts)
+        target_real = target.resolve(strict=False)
+
+        try:
+            target_real.relative_to(target_root)
+        except ValueError as exc:
+            raise ValueError(f"Unsafe archive member path: {member.name}") from exc
+
+        if member.isdir():
+            target_real.mkdir(parents=True, exist_ok=True)
+            continue
+
+        if not member.isfile():
+            raise ValueError(f"Unsupported archive member type: {member.name}")
+
+        target_real.parent.mkdir(parents=True, exist_ok=True)
+        extracted = tar.extractfile(member)
+        if extracted is None:
+            raise ValueError(f"Cannot read archive member: {member.name}")
+
+        with extracted, open(target_real, "wb") as dst:
+            shutil.copyfileobj(extracted, dst)
+
+        try:
+            os.chmod(target_real, member.mode & 0o777)
+        except OSError:
+            pass
+
+
 def _extract_base64_tar(b64_data: str, target_dir: Path):
     """Extract a base64-encoded tar.gz archive into target_dir."""
     if not b64_data:
@@ -155,7 +211,7 @@ def _extract_base64_tar(b64_data: str, target_dir: Path):
     raw = base64.b64decode(b64_data)
     buf = io.BytesIO(raw)
     with tarfile.open(fileobj=buf, mode="r:gz") as tar:
-        tar.extractall(path=str(target_dir))
+        _safe_extract_tar(tar, target_dir)
 
 
 # =============================================================================
diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 7ced55c1..d9b32873 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -20,6 +20,7 @@ Requires:
 """
 
 import asyncio
+import hmac
 import json
 import logging
 import os
@@ -370,7 +371,7 @@ class APIServerAdapter(BasePlatformAdapter):
         auth_header = request.headers.get("Authorization", "")
         if auth_header.startswith("Bearer "):
             token = auth_header[7:].strip()
-            if token == self._api_key:
+            if hmac.compare_digest(token, self._api_key):
                 return None  # Auth OK
 
         return web.json_response(
diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py
index 551c0e86..a888eede 100644
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -124,7 +124,14 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
 
     Returns:
         Absolute path to the cached image file as a string.
+
+    Raises:
+        ValueError: If the URL targets a private/internal network (SSRF protection).
     """
+    from tools.url_safety import is_safe_url
+    if not is_safe_url(url):
+        raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
+
     import asyncio
     import httpx
     import logging as _logging
@@ -232,7 +239,14 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
 
     Returns:
         Absolute path to the cached audio file as a string.
+
+    Raises:
+        ValueError: If the URL targets a private/internal network (SSRF protection).
     """
+    from tools.url_safety import is_safe_url
+    if not is_safe_url(url):
+        raise ValueError(f"Blocked unsafe URL (SSRF protection): {_safe_url_for_log(url)}")
+
     import asyncio
     import httpx
     import logging as _logging
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 83ea2694..d6580ab9 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -55,6 +55,7 @@ from gateway.platforms.base import (
     cache_document_from_bytes,
     SUPPORTED_DOCUMENT_TYPES,
 )
+from tools.url_safety import is_safe_url
 
 
 def _clean_discord_id(entry: str) -> str:
@@ -1285,6 +1286,10 @@ class DiscordAdapter(BasePlatformAdapter):
         if not self._client:
             return SendResult(success=False, error="Not connected")
 
+        if not is_safe_url(image_url):
+            logger.warning("[%s] Blocked unsafe image URL during Discord send_image", self.name)
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
         try:
             import aiohttp
 
diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 7b20bc19..4bc712f2 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -2109,6 +2109,10 @@ class FeishuAdapter(BasePlatformAdapter):
         default_ext: str,
         preferred_name: str,
     ) -> tuple[str, str]:
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(file_url):
+            raise ValueError(f"Blocked unsafe URL (SSRF protection): {file_url[:80]}")
+
         import httpx
 
         async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py
index 2eb89d11..e29ae379 100644
--- a/gateway/platforms/matrix.py
+++ b/gateway/platforms/matrix.py
@@ -586,6 +586,11 @@ class MatrixAdapter(BasePlatformAdapter):
         metadata: Optional[Dict[str, Any]] = None,
     ) -> SendResult:
         """Download an image URL and upload it to Matrix."""
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(image_url):
+            logger.warning("Matrix: blocked unsafe image URL (SSRF protection)")
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
         try:
             # Try aiohttp first (always available), fall back to httpx
             try:
diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py
index 3835919a..56f29e87 100644
--- a/gateway/platforms/mattermost.py
+++ b/gateway/platforms/mattermost.py
@@ -407,6 +407,11 @@ class MattermostAdapter(BasePlatformAdapter):
         kind: str = "file",
     ) -> SendResult:
         """Download a URL and upload it as a file attachment."""
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(url):
+            logger.warning("Mattermost: blocked unsafe URL (SSRF protection)")
+            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
+
         import asyncio
         import aiohttp
 
diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py
index 164e6ab4..7af313d3 100644
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -595,6 +595,11 @@ class SlackAdapter(BasePlatformAdapter):
         if not self._app:
             return SendResult(success=False, error="Not connected")
 
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(image_url):
+            logger.warning("[Slack] Blocked unsafe image URL (SSRF protection)")
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
         try:
             import httpx
 
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index f72c31e1..c9da4c9b 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -1632,7 +1632,12 @@ class TelegramAdapter(BasePlatformAdapter):
         """
         if not self._bot:
             return SendResult(success=False, error="Not connected")
-        
+
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(image_url):
+            logger.warning("[%s] Blocked unsafe image URL (SSRF protection)", self.name)
+            return await super().send_image(chat_id, image_url, caption, reply_to, metadata=metadata)
+
         try:
             # Telegram can send photos directly from URLs (up to ~5MB)
             _photo_thread = metadata.get("thread_id") if metadata else None
diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py
index 525a830b..b1c04bef 100644
--- a/gateway/platforms/wecom.py
+++ b/gateway/platforms/wecom.py
@@ -910,6 +910,10 @@ class WeComAdapter(BasePlatformAdapter):
         url: str,
         max_bytes: int,
     ) -> Tuple[bytes, Dict[str, str]]:
+        from tools.url_safety import is_safe_url
+        if not is_safe_url(url):
+            raise ValueError(f"Blocked unsafe URL (SSRF protection): {url[:80]}")
+
         if not HTTPX_AVAILABLE:
             raise RuntimeError("httpx is required for WeCom media download")
 
diff --git a/tests/environments/benchmarks/test_terminalbench2_env_security.py b/tests/environments/benchmarks/test_terminalbench2_env_security.py
new file mode 100644
index 00000000..b2610757
--- /dev/null
+++ b/tests/environments/benchmarks/test_terminalbench2_env_security.py
@@ -0,0 +1,164 @@
+"""Security tests for Terminal-Bench 2 archive extraction."""
+
+import base64
+import importlib
+import io
+import sys
+import tarfile
+import types
+
+import pytest
+
+
+def _stub_module(name: str, **attrs):
+    module = types.ModuleType(name)
+    for key, value in attrs.items():
+        setattr(module, key, value)
+    return module
+
+
+def _load_terminalbench_module(monkeypatch):
+    class _EvalHandlingEnum:
+        STOP_TRAIN = "stop_train"
+
+    class _APIServerConfig:
+        def __init__(self, *args, **kwargs):
+            self.args = args
+            self.kwargs = kwargs
+
+    class _AgentResult:
+        pass
+
+    class _HermesAgentLoop:
+        pass
+
+    class _HermesAgentBaseEnv:
+        pass
+
+    class _HermesAgentEnvConfig:
+        pass
+
+    class _ToolContext:
+        pass
+
+    stub_modules = {
+        "atroposlib": _stub_module("atroposlib"),
+        "atroposlib.envs": _stub_module("atroposlib.envs"),
+        "atroposlib.envs.base": _stub_module(
+            "atroposlib.envs.base",
+            EvalHandlingEnum=_EvalHandlingEnum,
+        ),
+        "atroposlib.envs.server_handling": _stub_module("atroposlib.envs.server_handling"),
+        "atroposlib.envs.server_handling.server_manager": _stub_module(
+            "atroposlib.envs.server_handling.server_manager",
+            APIServerConfig=_APIServerConfig,
+        ),
+        "environments.agent_loop": _stub_module(
+            "environments.agent_loop",
+            AgentResult=_AgentResult,
+            HermesAgentLoop=_HermesAgentLoop,
+        ),
+        "environments.hermes_base_env": _stub_module(
+            "environments.hermes_base_env",
+            HermesAgentBaseEnv=_HermesAgentBaseEnv,
+            HermesAgentEnvConfig=_HermesAgentEnvConfig,
+        ),
+        "environments.tool_context": _stub_module(
+            "environments.tool_context",
+            ToolContext=_ToolContext,
+        ),
+        "tools.terminal_tool": _stub_module(
+            "tools.terminal_tool",
+            register_task_env_overrides=lambda *args, **kwargs: None,
+            clear_task_env_overrides=lambda *args, **kwargs: None,
+            cleanup_vm=lambda *args, **kwargs: None,
+        ),
+    }
+
+    stub_modules["atroposlib"].envs = stub_modules["atroposlib.envs"]
+    stub_modules["atroposlib.envs"].base = stub_modules["atroposlib.envs.base"]
+    stub_modules["atroposlib.envs"].server_handling = stub_modules["atroposlib.envs.server_handling"]
+    stub_modules["atroposlib.envs.server_handling"].server_manager = stub_modules[
+        "atroposlib.envs.server_handling.server_manager"
+    ]
+
+    for name, module in stub_modules.items():
+        monkeypatch.setitem(sys.modules, name, module)
+
+    module_name = "environments.benchmarks.terminalbench_2.terminalbench2_env"
+    sys.modules.pop(module_name, None)
+    return importlib.import_module(module_name)
+
+
+def _build_tar_b64(entries):
+    buf = io.BytesIO()
+    with tarfile.open(fileobj=buf, mode="w:gz") as tar:
+        for entry in entries:
+            kind = entry["kind"]
+            info = tarfile.TarInfo(entry["name"])
+
+            if kind == "dir":
+                info.type = tarfile.DIRTYPE
+                tar.addfile(info)
+                continue
+
+            if kind == "file":
+                data = entry["data"].encode("utf-8")
+                info.size = len(data)
+                tar.addfile(info, io.BytesIO(data))
+                continue
+
+            if kind == "symlink":
+                info.type = tarfile.SYMTYPE
+                info.linkname = entry["target"]
+                tar.addfile(info)
+                continue
+
+            raise ValueError(f"Unknown tar entry kind: {kind}")
+
+    return base64.b64encode(buf.getvalue()).decode("ascii")
+
+
+def test_extract_base64_tar_allows_safe_files(tmp_path, monkeypatch):
+    module = _load_terminalbench_module(monkeypatch)
+    archive = _build_tar_b64(
+        [
+            {"kind": "dir", "name": "nested"},
+            {"kind": "file", "name": "nested/hello.txt", "data": "hello"},
+        ]
+    )
+
+    target = tmp_path / "extract"
+    module._extract_base64_tar(archive, target)
+
+    assert (target / "nested" / "hello.txt").read_text(encoding="utf-8") == "hello"
+
+
+def test_extract_base64_tar_rejects_path_traversal(tmp_path, monkeypatch):
+    module = _load_terminalbench_module(monkeypatch)
+    archive = _build_tar_b64(
+        [
+            {"kind": "file", "name": "../escape.txt", "data": "owned"},
+        ]
+    )
+
+    target = tmp_path / "extract"
+    with pytest.raises(ValueError, match="Unsafe archive member path"):
+        module._extract_base64_tar(archive, target)
+
+    assert not (tmp_path / "escape.txt").exists()
+
+
+def test_extract_base64_tar_rejects_symlinks(tmp_path, monkeypatch):
+    module = _load_terminalbench_module(monkeypatch)
+    archive = _build_tar_b64(
+        [
+            {"kind": "symlink", "name": "link", "target": "../../escape.txt"},
+        ]
+    )
+
+    target = tmp_path / "extract"
+    with pytest.raises(ValueError, match="Unsupported archive member type"):
+        module._extract_base64_tar(archive, target)
+
+    assert not (target / "link").exists()
diff --git a/tests/gateway/test_mattermost.py b/tests/gateway/test_mattermost.py
index a7a586ff..7d47c0a3 100644
--- a/tests/gateway/test_mattermost.py
+++ b/tests/gateway/test_mattermost.py
@@ -504,7 +504,8 @@ class TestMattermostFileUpload:
         self.adapter._session = MagicMock()
 
     @pytest.mark.asyncio
-    async def test_send_image_downloads_and_uploads(self):
+    @patch("tools.url_safety.is_safe_url", return_value=True)
+    async def test_send_image_downloads_and_uploads(self, _mock_safe):
         """send_image should download the URL, upload via /api/v4/files, then post."""
         # Mock the download (GET)
         mock_dl_resp = AsyncMock()
diff --git a/tests/gateway/test_media_download_retry.py b/tests/gateway/test_media_download_retry.py
index ad00da24..8f135a05 100644
--- a/tests/gateway/test_media_download_retry.py
+++ b/tests/gateway/test_media_download_retry.py
@@ -596,10 +596,11 @@ def _make_aiohttp_resp(status: int, content: bytes = b"file bytes",
     return resp
 
 
+@patch("tools.url_safety.is_safe_url", return_value=True)
 class TestMattermostSendUrlAsFile:
     """Tests for MattermostAdapter._send_url_as_file"""
 
-    def test_success_on_first_attempt(self):
+    def test_success_on_first_attempt(self, _mock_safe):
         """200 on first attempt → file uploaded and post created."""
         adapter = _make_mm_adapter()
         resp = _make_aiohttp_resp(200)
@@ -616,7 +617,7 @@ class TestMattermostSendUrlAsFile:
         adapter._upload_file.assert_called_once()
         adapter._api_post.assert_called_once()
 
-    def test_retries_on_429_then_succeeds(self):
+    def test_retries_on_429_then_succeeds(self, _mock_safe):
         """429 on first attempt is retried; 200 on second attempt succeeds."""
         adapter = _make_mm_adapter()
 
@@ -637,7 +638,7 @@ class TestMattermostSendUrlAsFile:
         assert adapter._session.get.call_count == 2
         mock_sleep.assert_called_once()
 
-    def test_retries_on_500_then_succeeds(self):
+    def test_retries_on_500_then_succeeds(self, _mock_safe):
         """5xx on first attempt is retried; 200 on second attempt succeeds."""
         adapter = _make_mm_adapter()
 
@@ -655,7 +656,7 @@ class TestMattermostSendUrlAsFile:
         assert result.success
         assert adapter._session.get.call_count == 2
 
-    def test_falls_back_to_text_after_max_retries_on_5xx(self):
+    def test_falls_back_to_text_after_max_retries_on_5xx(self, _mock_safe):
         """Three consecutive 500s exhaust retries; falls back to send() with URL text."""
         adapter = _make_mm_adapter()
 
@@ -674,7 +675,7 @@ class TestMattermostSendUrlAsFile:
         text_arg = adapter.send.call_args[0][1]
         assert "http://cdn.example.com/img.png" in text_arg
 
-    def test_falls_back_on_client_error(self):
+    def test_falls_back_on_client_error(self, _mock_safe):
         """aiohttp.ClientError on every attempt falls back to send() with URL."""
         import aiohttp
 
@@ -699,7 +700,7 @@ class TestMattermostSendUrlAsFile:
         text_arg = adapter.send.call_args[0][1]
         assert "http://cdn.example.com/img.png" in text_arg
 
-    def test_non_retryable_404_falls_back_immediately(self):
+    def test_non_retryable_404_falls_back_immediately(self, _mock_safe):
         """404 is non-retryable (< 500, != 429); send() is called right away."""
         adapter = _make_mm_adapter()
 
diff --git a/tools/environments/docker.py b/tools/environments/docker.py
index 1d2d325c..4d3b6f50 100644
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -18,6 +18,7 @@ import uuid
 from typing import Optional
 
 from tools.environments.base import BaseEnvironment
+from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST
 from tools.interrupt import is_interrupted
 
 logger = logging.getLogger(__name__)
@@ -510,6 +511,8 @@ class DockerEnvironment(BaseEnvironment):
             forward_keys |= get_all_passthrough()
         except Exception:
             pass
+        # Strip Hermes-managed secrets so they never leak into the container.
+        forward_keys -= _HERMES_PROVIDER_ENV_BLOCKLIST
         hermes_env = _load_hermes_env_vars() if forward_keys else {}
         for key in sorted(forward_keys):
             value = os.getenv(key)

From 37bf19a29d4d80609bc91b016f3f9c544b5c98f3 Mon Sep 17 00:00:00 2001
From: lesterli <fulfilself@gmail.com>
Date: Tue, 7 Apr 2026 18:30:33 +0800
Subject: [PATCH 097/154] fix(codex): align validation with normalization for
 empty stream output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The response validation stage unconditionally marked Codex Responses API
replies as invalid when response.output was empty, triggering unnecessary
retries and fallback chains. However, _normalize_codex_response can
recover from this state by synthesizing output from response.output_text.

Now the validation stage checks for output_text before marking the
response invalid, matching the normalization logic. Also fixes
logging.warning → logger.warning for consistency with the rest of the
file.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 run_agent.py                                  | 38 ++++++++------
 .../test_run_agent_codex_responses.py         | 50 +++++++++++++++++++
 2 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 19f7c23f..a0c266aa 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7391,20 +7391,30 @@ class AIAgent:
                             response_invalid = True
                             error_details.append("response.output is not a list")
                         elif not output_items:
-                            # If we reach here, _run_codex_stream's backfill
-                            # from output_item.done events and text-delta
-                            # synthesis both failed to populate output.
-                            _resp_status = getattr(response, "status", None)
-                            _resp_incomplete = getattr(response, "incomplete_details", None)
-                            logging.warning(
-                                "Codex response.output is empty after stream backfill "
-                                "(status=%s, incomplete_details=%s, model=%s). %s",
-                                _resp_status, _resp_incomplete,
-                                getattr(response, "model", None),
-                                f"api_mode={self.api_mode} provider={self.provider}",
-                            )
-                            response_invalid = True
-                            error_details.append("response.output is empty")
+                            # Stream backfill may have failed, but
+                            # _normalize_codex_response can still recover
+                            # from response.output_text. Only mark invalid
+                            # when that fallback is also absent.
+                            _out_text = getattr(response, "output_text", None)
+                            _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else ""
+                            if _out_text_stripped:
+                                logger.debug(
+                                    "Codex response.output is empty but output_text is present "
+                                    "(%d chars); deferring to normalization.",
+                                    len(_out_text_stripped),
+                                )
+                            else:
+                                _resp_status = getattr(response, "status", None)
+                                _resp_incomplete = getattr(response, "incomplete_details", None)
+                                logger.warning(
+                                    "Codex response.output is empty after stream backfill "
+                                    "(status=%s, incomplete_details=%s, model=%s). %s",
+                                    _resp_status, _resp_incomplete,
+                                    getattr(response, "model", None),
+                                    f"api_mode={self.api_mode} provider={self.provider}",
+                                )
+                                response_invalid = True
+                                error_details.append("response.output is empty")
                     elif self.api_mode == "anthropic_messages":
                         content_blocks = getattr(response, "content", None) if response is not None else None
                         if response is None:
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 4b24fbb1..ea703ffb 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -386,6 +386,56 @@ def test_run_conversation_codex_plain_text(monkeypatch):
     assert result["messages"][-1]["content"] == "OK"
 
 
+def test_run_conversation_codex_empty_output_with_output_text(monkeypatch):
+    """Regression: empty response.output + valid output_text should succeed,
+    not trigger retry/fallback. The validation stage must defer to
+    _normalize_codex_response which synthesizes output from output_text."""
+    agent = _build_agent(monkeypatch)
+
+    def _empty_output_response(api_kwargs):
+        return SimpleNamespace(
+            output=[],
+            output_text="Hello from Codex",
+            usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _empty_output_response)
+
+    result = agent.run_conversation("Say hello")
+
+    assert result["completed"] is True
+    assert result["final_response"] == "Hello from Codex"
+
+
+def test_run_conversation_codex_empty_output_no_output_text_retries(monkeypatch):
+    """When both output and output_text are empty, validation should
+    correctly mark the response as invalid and trigger retry."""
+    agent = _build_agent(monkeypatch)
+    calls = {"api": 0}
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            return SimpleNamespace(
+                output=[],
+                output_text=None,
+                usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+                status="completed",
+                model="gpt-5-codex",
+            )
+        return _codex_message_response("Recovered")
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+
+    result = agent.run_conversation("Say hello")
+
+    assert calls["api"] >= 2
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered"
+
+
 def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
     agent = _build_agent(monkeypatch)
     calls = {"api": 0, "refresh": 0}

From f6d4b6a3198b35e2ee340c617b2f3193a9ab727b Mon Sep 17 00:00:00 2001
From: Angello Picasso <angello.picasso@devsu.com>
Date: Tue, 7 Apr 2026 16:57:34 +0000
Subject: [PATCH 098/154] feat(discord): add ignored_channels and
 no_thread_channels config

- ignored_channels: channels where bot never responds (even when mentioned)
- no_thread_channels: channels where bot responds directly without thread

Both support config.yaml and env vars (DISCORD_IGNORED_CHANNELS,
DISCORD_NO_THREAD_CHANNELS), following existing pattern for
free_response_channels.

Fixes #5881
---
 gateway/config.py            | 12 ++++++++++++
 gateway/platforms/discord.py | 21 ++++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/gateway/config.py b/gateway/config.py
index 470eee7f..6c882529 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -556,6 +556,18 @@ def load_gateway_config() -> GatewayConfig:
                     os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
                 if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
                     os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()
+                # ignored_channels: channels where bot never responds (even when mentioned)
+                ic = discord_cfg.get("ignored_channels")
+                if ic is not None and not os.getenv("DISCORD_IGNORED_CHANNELS"):
+                    if isinstance(ic, list):
+                        ic = ",".join(str(v) for v in ic)
+                    os.environ["DISCORD_IGNORED_CHANNELS"] = str(ic)
+                # no_thread_channels: channels where bot responds directly without creating thread
+                ntc = discord_cfg.get("no_thread_channels")
+                if ntc is not None and not os.getenv("DISCORD_NO_THREAD_CHANNELS"):
+                    if isinstance(ntc, list):
+                        ntc = ",".join(str(v) for v in ntc)
+                    os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc)
 
             # Telegram settings → env vars (env vars take precedence)
             telegram_cfg = yaml_cfg.get("telegram", {})
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index d6580ab9..703c7549 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -2193,9 +2193,11 @@ class DiscordAdapter(BasePlatformAdapter):
         # UNLESS the channel is in the free-response list or the message is
         # in a thread where the bot has already participated.
         #
-        # Config (all settable via discord.* in config.yaml):
+        # Config (all settable via discord.* in config.yaml or DISCORD_* env vars):
         #   discord.require_mention: Require @mention in server channels (default: true)
         #   discord.free_response_channels: Channel IDs where bot responds without mention
+        #   discord.ignored_channels: Channel IDs where bot NEVER responds (even when mentioned)
+        #   discord.no_thread_channels: Channel IDs where bot responds directly without creating thread
         #   discord.auto_thread: Auto-create thread on @mention in channels (default: true)
 
         thread_id = None
@@ -2206,9 +2208,18 @@ class DiscordAdapter(BasePlatformAdapter):
             parent_channel_id = self._get_parent_channel_id(message.channel)
 
         if not isinstance(message.channel, discord.DMChannel):
+            # Check ignored channels first - never respond even when mentioned
+            ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
+            ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
+            channel_ids = {str(message.channel.id)}
+            if parent_channel_id:
+                channel_ids.add(parent_channel_id)
+            if channel_ids & ignored_channels:
+                logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
+                return
+
             free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
             free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()}
-            channel_ids = {str(message.channel.id)}
             if parent_channel_id:
                 channel_ids.add(parent_channel_id)
 
@@ -2230,10 +2241,14 @@ class DiscordAdapter(BasePlatformAdapter):
         # Auto-thread: when enabled, automatically create a thread for every
         # @mention in a text channel so each conversation is isolated (like Slack).
         # Messages already inside threads or DMs are unaffected.
+        # no_thread_channels: channels where bot responds directly without thread.
         auto_threaded_channel = None
         if not is_thread and not isinstance(message.channel, discord.DMChannel):
+            no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "")
+            no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
+            skip_thread = bool(channel_ids & no_thread_channels)
             auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread:
+            if auto_thread and not skip_thread:
                 thread = await self._auto_create_thread(message)
                 if thread:
                     is_thread = True

From 74b0072f8f9864fe2c0735d90ea3853b7927314e Mon Sep 17 00:00:00 2001
From: Alvaro Linares <alvarolinares@gmail.com>
Date: Tue, 7 Apr 2026 07:34:56 -0300
Subject: [PATCH 099/154] feat(telegram): add message reactions on processing
 start/complete
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror the Discord reaction pattern for Telegram:
- 👀 (eyes) when message processing begins
- ✅ (check) on successful completion
- ❌ (cross) on failure

Controlled via TELEGRAM_REACTIONS env var or telegram.reactions
in config.yaml (enabled by default, like Discord).

Uses python-telegram-bot's Bot.set_message_reaction() API.
Failures are caught and logged at debug level so they never
break message processing.
---
 gateway/config.py             |  2 ++
 gateway/platforms/telegram.py | 54 +++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/gateway/config.py b/gateway/config.py
index 6c882529..ab0d7c11 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -582,6 +582,8 @@ def load_gateway_config() -> GatewayConfig:
                     if isinstance(frc, list):
                         frc = ",".join(str(v) for v in frc)
                     os.environ["TELEGRAM_FREE_RESPONSE_CHATS"] = str(frc)
+                if "reactions" in telegram_cfg and not os.getenv("TELEGRAM_REACTIONS"):
+                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
 
             whatsapp_cfg = yaml_cfg.get("whatsapp", {})
             if isinstance(whatsapp_cfg, dict):
diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index c9da4c9b..90812bb3 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2673,3 +2673,57 @@ class TelegramAdapter(BasePlatformAdapter):
             auto_skill=topic_skill,
             timestamp=message.date,
         )
+
+    # ── Message reactions (processing lifecycle) ──────────────────────────
+
+    def _reactions_enabled(self) -> bool:
+        """Check if message reactions are enabled via config/env."""
+        return os.getenv("TELEGRAM_REACTIONS", "true").lower() not in ("false", "0", "no")
+
+    async def _set_reaction(self, chat_id: str, message_id: str, emoji: str) -> bool:
+        """Set a single emoji reaction on a Telegram message."""
+        if not self._bot:
+            return False
+        try:
+            await self._bot.set_message_reaction(
+                chat_id=int(chat_id),
+                message_id=int(message_id),
+                reaction=emoji,
+            )
+            return True
+        except Exception as e:
+            logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
+            return False
+
+    async def _remove_reaction(self, chat_id: str, message_id: str) -> bool:
+        """Remove all reactions from a Telegram message."""
+        if not self._bot:
+            return False
+        try:
+            await self._bot.set_message_reaction(
+                chat_id=int(chat_id),
+                message_id=int(message_id),
+                reaction=None,
+            )
+            return True
+        except Exception as e:
+            logger.debug("[%s] remove_reaction failed: %s", self.name, e)
+            return False
+
+    async def on_processing_start(self, event: MessageEvent) -> None:
+        """Add an in-progress reaction when message processing begins."""
+        if not self._reactions_enabled():
+            return
+        chat_id = getattr(event.source, "chat_id", None)
+        message_id = getattr(event, "message_id", None)
+        if chat_id and message_id:
+            await self._set_reaction(chat_id, message_id, "\U0001f440")
+
+    async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
+        """Swap the in-progress reaction for a final success/failure reaction."""
+        if not self._reactions_enabled():
+            return
+        chat_id = getattr(event.source, "chat_id", None)
+        message_id = getattr(event, "message_id", None)
+        if chat_id and message_id:
+            await self._set_reaction(chat_id, message_id, "\u2705" if success else "\u274c")

From 52b3a3ca3aec41e2bf53ead7a48867f63b4073bb Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:32:37 -0700
Subject: [PATCH 100/154] fix: default Telegram reactions to off, remove dead
 _remove_reaction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Telegram's set_message_reaction replaces all reactions in one call,
so _remove_reaction was never called (unlike Discord's additive model).
Default reactions to disabled — users opt in via telegram.reactions: true.
---
 gateway/platforms/telegram.py | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py
index 90812bb3..85b8afc9 100644
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -2678,7 +2678,7 @@ class TelegramAdapter(BasePlatformAdapter):
 
     def _reactions_enabled(self) -> bool:
         """Check if message reactions are enabled via config/env."""
-        return os.getenv("TELEGRAM_REACTIONS", "true").lower() not in ("false", "0", "no")
+        return os.getenv("TELEGRAM_REACTIONS", "false").lower() not in ("false", "0", "no")
 
     async def _set_reaction(self, chat_id: str, message_id: str, emoji: str) -> bool:
         """Set a single emoji reaction on a Telegram message."""
@@ -2695,21 +2695,6 @@ class TelegramAdapter(BasePlatformAdapter):
             logger.debug("[%s] set_message_reaction failed (%s): %s", self.name, emoji, e)
             return False
 
-    async def _remove_reaction(self, chat_id: str, message_id: str) -> bool:
-        """Remove all reactions from a Telegram message."""
-        if not self._bot:
-            return False
-        try:
-            await self._bot.set_message_reaction(
-                chat_id=int(chat_id),
-                message_id=int(message_id),
-                reaction=None,
-            )
-            return True
-        except Exception as e:
-            logger.debug("[%s] remove_reaction failed: %s", self.name, e)
-            return False
-
     async def on_processing_start(self, event: MessageEvent) -> None:
         """Add an in-progress reaction when message processing begins."""
         if not self._reactions_enabled():
@@ -2720,7 +2705,11 @@ class TelegramAdapter(BasePlatformAdapter):
             await self._set_reaction(chat_id, message_id, "\U0001f440")
 
     async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
-        """Swap the in-progress reaction for a final success/failure reaction."""
+        """Swap the in-progress reaction for a final success/failure reaction.
+
+        Unlike Discord (additive reactions), Telegram's set_message_reaction
+        replaces all existing reactions in one call — no remove step needed.
+        """
         if not self._reactions_enabled():
             return
         chat_id = getattr(event.source, "chat_id", None)

From a6547f399f8d7edaef385f9d878a3fdb9175f4fd Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:35:31 -0700
Subject: [PATCH 101/154] test: add tests for Discord channel controls and
 Telegram reactions

- 14 tests for ignored_channels, no_thread_channels, and config bridging
- 17 tests for reaction enable/disable, API calls, error handling, and config
---
 .../gateway/test_discord_channel_controls.py  | 342 ++++++++++++++++++
 tests/gateway/test_telegram_reactions.py      | 258 +++++++++++++
 2 files changed, 600 insertions(+)
 create mode 100644 tests/gateway/test_discord_channel_controls.py
 create mode 100644 tests/gateway/test_telegram_reactions.py

diff --git a/tests/gateway/test_discord_channel_controls.py b/tests/gateway/test_discord_channel_controls.py
new file mode 100644
index 00000000..9b8e7cbb
--- /dev/null
+++ b/tests/gateway/test_discord_channel_controls.py
@@ -0,0 +1,342 @@
+"""Tests for Discord ignored_channels and no_thread_channels config."""
+
+from types import SimpleNamespace
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock
+import sys
+
+import pytest
+
+from gateway.config import PlatformConfig
+
+
+def _ensure_discord_mock():
+    """Install a mock discord module when discord.py isn't available."""
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        return
+
+    discord_mod = MagicMock()
+    discord_mod.Intents.default.return_value = MagicMock()
+    discord_mod.Client = MagicMock
+    discord_mod.File = MagicMock
+    discord_mod.DMChannel = type("DMChannel", (), {})
+    discord_mod.Thread = type("Thread", (), {})
+    discord_mod.ForumChannel = type("ForumChannel", (), {})
+    discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
+    discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, secondary=2, danger=3, green=1, grey=2, blurple=2, red=3)
+    discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4, purple=lambda: 5)
+    discord_mod.Interaction = object
+    discord_mod.Embed = MagicMock
+    discord_mod.app_commands = SimpleNamespace(
+        describe=lambda **kwargs: (lambda fn: fn),
+        choices=lambda **kwargs: (lambda fn: fn),
+        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+    )
+
+    ext_mod = MagicMock()
+    commands_mod = MagicMock()
+    commands_mod.Bot = MagicMock
+    ext_mod.commands = commands_mod
+
+    sys.modules.setdefault("discord", discord_mod)
+    sys.modules.setdefault("discord.ext", ext_mod)
+    sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+
+_ensure_discord_mock()
+
+import gateway.platforms.discord as discord_platform  # noqa: E402
+from gateway.platforms.discord import DiscordAdapter  # noqa: E402
+
+
+class FakeDMChannel:
+    def __init__(self, channel_id: int = 1, name: str = "dm"):
+        self.id = channel_id
+        self.name = name
+
+
+class FakeTextChannel:
+    def __init__(self, channel_id: int = 1, name: str = "general", guild_name: str = "Hermes Server"):
+        self.id = channel_id
+        self.name = name
+        self.guild = SimpleNamespace(name=guild_name)
+        self.topic = None
+
+
+class FakeThread:
+    def __init__(self, channel_id: int = 1, name: str = "thread", parent=None, guild_name: str = "Hermes Server"):
+        self.id = channel_id
+        self.name = name
+        self.parent = parent
+        self.parent_id = getattr(parent, "id", None)
+        self.guild = getattr(parent, "guild", None) or SimpleNamespace(name=guild_name)
+        self.topic = None
+
+
+@pytest.fixture
+def adapter(monkeypatch):
+    monkeypatch.setattr(discord_platform.discord, "DMChannel", FakeDMChannel, raising=False)
+    monkeypatch.setattr(discord_platform.discord, "Thread", FakeThread, raising=False)
+    monkeypatch.setattr(discord_platform.discord, "ForumChannel", type("ForumChannel", (), {}), raising=False)
+
+    config = PlatformConfig(enabled=True, token="fake-token")
+    adapter = DiscordAdapter(config)
+    adapter._client = SimpleNamespace(user=SimpleNamespace(id=999))
+    adapter.handle_message = AsyncMock()
+    return adapter
+
+
+def make_message(*, channel, content: str, mentions=None):
+    author = SimpleNamespace(id=42, display_name="TestUser", name="TestUser")
+    return SimpleNamespace(
+        id=123,
+        content=content,
+        mentions=list(mentions or []),
+        attachments=[],
+        reference=None,
+        created_at=datetime.now(timezone.utc),
+        channel=channel,
+        author=author,
+    )
+
+
+# ── ignored_channels ─────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_ignored_channel_blocks_message(adapter, monkeypatch):
+    """Messages in ignored channels are silently dropped."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "500")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    message = make_message(channel=FakeTextChannel(channel_id=500), content="hello")
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_ignored_channel_blocks_even_with_mention(adapter, monkeypatch):
+    """Ignored channels take priority — even @mentions are dropped."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "500")
+
+    bot_user = adapter._client.user
+    message = make_message(
+        channel=FakeTextChannel(channel_id=500),
+        content=f"<@{bot_user.id}> hello",
+        mentions=[bot_user],
+    )
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_non_ignored_channel_processes_normally(adapter, monkeypatch):
+    """Channels not in the ignored list process normally."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "500,600")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    message = make_message(channel=FakeTextChannel(channel_id=700), content="hello")
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_ignored_channels_csv_parsing(adapter, monkeypatch):
+    """Multiple channel IDs are parsed correctly from CSV."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "500, 600 , 700")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    for ch_id in (500, 600, 700):
+        adapter.handle_message.reset_mock()
+        message = make_message(channel=FakeTextChannel(channel_id=ch_id), content="hello")
+        await adapter._handle_message(message)
+        adapter.handle_message.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_ignored_channels_empty_string_ignores_nothing(adapter, monkeypatch):
+    """Empty DISCORD_IGNORED_CHANNELS means nothing is ignored."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    message = make_message(channel=FakeTextChannel(channel_id=500), content="hello")
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_ignored_channel_thread_parent_match(adapter, monkeypatch):
+    """Thread whose parent channel is ignored should also be ignored."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "500")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    parent = FakeTextChannel(channel_id=500, name="ignored-channel")
+    thread = FakeThread(channel_id=501, name="thread-in-ignored", parent=parent)
+    message = make_message(channel=thread, content="hello from thread")
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_dms_unaffected_by_ignored_channels(adapter, monkeypatch):
+    """DMs should never be affected by ignored_channels."""
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "500")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    message = make_message(channel=FakeDMChannel(channel_id=500), content="dm hello")
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_awaited_once()
+
+
+# ── no_thread_channels ───────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_no_thread_channel_skips_auto_thread(adapter, monkeypatch):
+    """Channels in no_thread_channels should not auto-create threads."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_NO_THREAD_CHANNELS", "800")
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.delenv("DISCORD_IGNORED_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    adapter._auto_create_thread = AsyncMock(return_value=FakeThread(channel_id=999))
+
+    message = make_message(channel=FakeTextChannel(channel_id=800), content="hello")
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "group"
+
+
+@pytest.mark.asyncio
+async def test_normal_channel_still_auto_threads(adapter, monkeypatch):
+    """Channels NOT in no_thread_channels still get auto-threading."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_NO_THREAD_CHANNELS", "800")
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.delenv("DISCORD_IGNORED_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    fake_thread = FakeThread(channel_id=999, name="auto-thread")
+    adapter._auto_create_thread = AsyncMock(return_value=fake_thread)
+
+    message = make_message(channel=FakeTextChannel(channel_id=900), content="hello")
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_awaited_once()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.source.chat_type == "thread"
+
+
+@pytest.mark.asyncio
+async def test_no_thread_channels_csv_parsing(adapter, monkeypatch):
+    """Multiple no_thread channel IDs parsed from CSV."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_NO_THREAD_CHANNELS", "800, 900")
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+    monkeypatch.delenv("DISCORD_IGNORED_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    adapter._auto_create_thread = AsyncMock(return_value=FakeThread(channel_id=999))
+
+    for ch_id in (800, 900):
+        adapter._auto_create_thread.reset_mock()
+        adapter.handle_message.reset_mock()
+        message = make_message(channel=FakeTextChannel(channel_id=ch_id), content="hello")
+        await adapter._handle_message(message)
+        adapter._auto_create_thread.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_no_thread_with_auto_thread_disabled_is_noop(adapter, monkeypatch):
+    """no_thread_channels is a no-op when auto_thread is globally disabled."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "false")
+    monkeypatch.setenv("DISCORD_AUTO_THREAD", "false")
+    monkeypatch.setenv("DISCORD_NO_THREAD_CHANNELS", "800")
+    monkeypatch.delenv("DISCORD_IGNORED_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(channel=FakeTextChannel(channel_id=800), content="hello")
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+
+
+# ── config.py bridging ───────────────────────────────────────────────
+
+
+def test_config_bridges_ignored_channels(monkeypatch, tmp_path):
+    """gateway/config.py bridges discord.ignored_channels to env var."""
+    import yaml
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(yaml.dump({
+        "discord": {
+            "ignored_channels": ["111", "222"],
+        },
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.delenv("DISCORD_IGNORED_CHANNELS", raising=False)
+
+    from gateway.config import load_gateway_config
+    load_gateway_config()
+
+    import os
+    assert os.getenv("DISCORD_IGNORED_CHANNELS") == "111,222"
+
+
+def test_config_bridges_no_thread_channels(monkeypatch, tmp_path):
+    """gateway/config.py bridges discord.no_thread_channels to env var."""
+    import yaml
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(yaml.dump({
+        "discord": {
+            "no_thread_channels": ["333"],
+        },
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.delenv("DISCORD_NO_THREAD_CHANNELS", raising=False)
+
+    from gateway.config import load_gateway_config
+    load_gateway_config()
+
+    import os
+    assert os.getenv("DISCORD_NO_THREAD_CHANNELS") == "333"
+
+
+def test_config_env_var_takes_precedence(monkeypatch, tmp_path):
+    """Env vars should take precedence over config.yaml values."""
+    import yaml
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(yaml.dump({
+        "discord": {
+            "ignored_channels": ["111"],
+        },
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "999")
+
+    from gateway.config import load_gateway_config
+    load_gateway_config()
+
+    import os
+    # Env var should NOT be overwritten
+    assert os.getenv("DISCORD_IGNORED_CHANNELS") == "999"
diff --git a/tests/gateway/test_telegram_reactions.py b/tests/gateway/test_telegram_reactions.py
new file mode 100644
index 00000000..c232a760
--- /dev/null
+++ b/tests/gateway/test_telegram_reactions.py
@@ -0,0 +1,258 @@
+"""Tests for Telegram message reactions tied to processing lifecycle hooks."""
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, MessageType
+from gateway.session import SessionSource
+
+
+def _make_adapter(**extra_env):
+    from gateway.platforms.telegram import TelegramAdapter
+
+    adapter = object.__new__(TelegramAdapter)
+    adapter.platform = Platform.TELEGRAM
+    adapter.config = PlatformConfig(enabled=True, token="fake-token")
+    adapter._bot = AsyncMock()
+    adapter._bot.set_message_reaction = AsyncMock()
+    return adapter
+
+
+def _make_event(chat_id: str = "123", message_id: str = "456") -> MessageEvent:
+    return MessageEvent(
+        text="hello",
+        message_type=MessageType.TEXT,
+        source=SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id=chat_id,
+            chat_type="private",
+            user_id="42",
+            user_name="TestUser",
+        ),
+        message_id=message_id,
+    )
+
+
+# ── _reactions_enabled ───────────────────────────────────────────────
+
+
+def test_reactions_disabled_by_default(monkeypatch):
+    """Telegram reactions should be disabled by default."""
+    monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
+    adapter = _make_adapter()
+    assert adapter._reactions_enabled() is False
+
+
+def test_reactions_enabled_when_set_true(monkeypatch):
+    """Setting TELEGRAM_REACTIONS=true enables reactions."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    assert adapter._reactions_enabled() is True
+
+
+def test_reactions_enabled_with_1(monkeypatch):
+    """TELEGRAM_REACTIONS=1 enables reactions."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "1")
+    adapter = _make_adapter()
+    assert adapter._reactions_enabled() is True
+
+
+def test_reactions_disabled_with_false(monkeypatch):
+    """TELEGRAM_REACTIONS=false disables reactions."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "false")
+    adapter = _make_adapter()
+    assert adapter._reactions_enabled() is False
+
+
+def test_reactions_disabled_with_0(monkeypatch):
+    """TELEGRAM_REACTIONS=0 disables reactions."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "0")
+    adapter = _make_adapter()
+    assert adapter._reactions_enabled() is False
+
+
+def test_reactions_disabled_with_no(monkeypatch):
+    """TELEGRAM_REACTIONS=no disables reactions."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "no")
+    adapter = _make_adapter()
+    assert adapter._reactions_enabled() is False
+
+
+# ── _set_reaction ────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_set_reaction_calls_bot_api(monkeypatch):
+    """_set_reaction should call bot.set_message_reaction with correct args."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+
+    result = await adapter._set_reaction("123", "456", "\U0001f440")
+
+    assert result is True
+    adapter._bot.set_message_reaction.assert_awaited_once_with(
+        chat_id=123,
+        message_id=456,
+        reaction="\U0001f440",
+    )
+
+
+@pytest.mark.asyncio
+async def test_set_reaction_returns_false_without_bot(monkeypatch):
+    """_set_reaction should return False when bot is not available."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    adapter._bot = None
+
+    result = await adapter._set_reaction("123", "456", "\U0001f440")
+    assert result is False
+
+
+@pytest.mark.asyncio
+async def test_set_reaction_handles_api_error_gracefully(monkeypatch):
+    """API errors during reaction should not propagate."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    adapter._bot.set_message_reaction = AsyncMock(side_effect=RuntimeError("no perms"))
+
+    result = await adapter._set_reaction("123", "456", "\U0001f440")
+    assert result is False
+
+
+# ── on_processing_start ──────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_on_processing_start_adds_eyes_reaction(monkeypatch):
+    """Processing start should add eyes reaction when enabled."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    event = _make_event()
+
+    await adapter.on_processing_start(event)
+
+    adapter._bot.set_message_reaction.assert_awaited_once_with(
+        chat_id=123,
+        message_id=456,
+        reaction="\U0001f440",
+    )
+
+
+@pytest.mark.asyncio
+async def test_on_processing_start_skipped_when_disabled(monkeypatch):
+    """Processing start should not react when reactions are disabled."""
+    monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
+    adapter = _make_adapter()
+    event = _make_event()
+
+    await adapter.on_processing_start(event)
+
+    adapter._bot.set_message_reaction.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_on_processing_start_handles_missing_ids(monkeypatch):
+    """Should handle events without chat_id or message_id gracefully."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    event = MessageEvent(
+        text="hello",
+        message_type=MessageType.TEXT,
+        source=SimpleNamespace(chat_id=None),
+        message_id=None,
+    )
+
+    await adapter.on_processing_start(event)
+
+    adapter._bot.set_message_reaction.assert_not_awaited()
+
+
+# ── on_processing_complete ───────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_on_processing_complete_success(monkeypatch):
+    """Successful processing should set check mark reaction."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    event = _make_event()
+
+    await adapter.on_processing_complete(event, success=True)
+
+    adapter._bot.set_message_reaction.assert_awaited_once_with(
+        chat_id=123,
+        message_id=456,
+        reaction="\u2705",
+    )
+
+
+@pytest.mark.asyncio
+async def test_on_processing_complete_failure(monkeypatch):
+    """Failed processing should set cross mark reaction."""
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "true")
+    adapter = _make_adapter()
+    event = _make_event()
+
+    await adapter.on_processing_complete(event, success=False)
+
+    adapter._bot.set_message_reaction.assert_awaited_once_with(
+        chat_id=123,
+        message_id=456,
+        reaction="\u274c",
+    )
+
+
+@pytest.mark.asyncio
+async def test_on_processing_complete_skipped_when_disabled(monkeypatch):
+    """Processing complete should not react when reactions are disabled."""
+    monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
+    adapter = _make_adapter()
+    event = _make_event()
+
+    await adapter.on_processing_complete(event, success=True)
+
+    adapter._bot.set_message_reaction.assert_not_awaited()
+
+
+# ── config.py bridging ───────────────────────────────────────────────
+
+
+def test_config_bridges_telegram_reactions(monkeypatch, tmp_path):
+    """gateway/config.py bridges telegram.reactions to TELEGRAM_REACTIONS env var."""
+    import yaml
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(yaml.dump({
+        "telegram": {
+            "reactions": True,
+        },
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
+
+    from gateway.config import load_gateway_config
+    load_gateway_config()
+
+    import os
+    assert os.getenv("TELEGRAM_REACTIONS") == "true"
+
+
+def test_config_reactions_env_takes_precedence(monkeypatch, tmp_path):
+    """Env var should take precedence over config.yaml for reactions."""
+    import yaml
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text(yaml.dump({
+        "telegram": {
+            "reactions": True,
+        },
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "false")
+
+    from gateway.config import load_gateway_config
+    load_gateway_config()
+
+    import os
+    assert os.getenv("TELEGRAM_REACTIONS") == "false"

From efbe8d674a6d7814891eb027f38f24403c77b925 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:38:07 -0700
Subject: [PATCH 102/154] docs: add Discord channel controls and Telegram
 reactions documentation

- Discord: ignored_channels, no_thread_channels config reference + examples
- Telegram: message reactions section with config, behavior notes
- Environment variables reference updated for all new vars
---
 .../gateway/test_discord_channel_controls.py  |  7 ++--
 tests/gateway/test_telegram_reactions.py      |  4 +-
 .../docs/reference/environment-variables.md   |  4 ++
 website/docs/user-guide/messaging/discord.md  | 38 +++++++++++++++++++
 website/docs/user-guide/messaging/telegram.md | 29 ++++++++++++++
 5 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/tests/gateway/test_discord_channel_controls.py b/tests/gateway/test_discord_channel_controls.py
index 9b8e7cbb..d71304d0 100644
--- a/tests/gateway/test_discord_channel_controls.py
+++ b/tests/gateway/test_discord_channel_controls.py
@@ -77,7 +77,6 @@ class FakeThread:
 def adapter(monkeypatch):
     monkeypatch.setattr(discord_platform.discord, "DMChannel", FakeDMChannel, raising=False)
     monkeypatch.setattr(discord_platform.discord, "Thread", FakeThread, raising=False)
-    monkeypatch.setattr(discord_platform.discord, "ForumChannel", type("ForumChannel", (), {}), raising=False)
 
     config = PlatformConfig(enabled=True, token="fake-token")
     adapter = DiscordAdapter(config)
@@ -294,7 +293,9 @@ def test_config_bridges_ignored_channels(monkeypatch, tmp_path):
         },
     }))
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    monkeypatch.delenv("DISCORD_IGNORED_CHANNELS", raising=False)
+    # Use setenv (not delenv) so monkeypatch registers cleanup even when
+    # the var doesn't exist yet — load_gateway_config will overwrite it.
+    monkeypatch.setenv("DISCORD_IGNORED_CHANNELS", "")
 
     from gateway.config import load_gateway_config
     load_gateway_config()
@@ -313,7 +314,7 @@ def test_config_bridges_no_thread_channels(monkeypatch, tmp_path):
         },
     }))
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    monkeypatch.delenv("DISCORD_NO_THREAD_CHANNELS", raising=False)
+    monkeypatch.setenv("DISCORD_NO_THREAD_CHANNELS", "")
 
     from gateway.config import load_gateway_config
     load_gateway_config()
diff --git a/tests/gateway/test_telegram_reactions.py b/tests/gateway/test_telegram_reactions.py
index c232a760..5068adb9 100644
--- a/tests/gateway/test_telegram_reactions.py
+++ b/tests/gateway/test_telegram_reactions.py
@@ -230,7 +230,9 @@ def test_config_bridges_telegram_reactions(monkeypatch, tmp_path):
         },
     }))
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
-    monkeypatch.delenv("TELEGRAM_REACTIONS", raising=False)
+    # Use setenv (not delenv) so monkeypatch registers cleanup even when
+    # the var doesn't exist yet — load_gateway_config will overwrite it.
+    monkeypatch.setenv("TELEGRAM_REACTIONS", "")
 
     from gateway.config import load_gateway_config
     load_gateway_config()
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index 7d40546c..beacb8c1 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -164,6 +164,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `TELEGRAM_WEBHOOK_URL` | Public HTTPS URL for webhook mode (enables webhook instead of polling) |
 | `TELEGRAM_WEBHOOK_PORT` | Local listen port for webhook server (default: `8443`) |
 | `TELEGRAM_WEBHOOK_SECRET` | Secret token for verifying updates come from Telegram |
+| `TELEGRAM_REACTIONS` | Enable emoji reactions on messages during processing (default: `false`) |
 | `DISCORD_BOT_TOKEN` | Discord bot token |
 | `DISCORD_ALLOWED_USERS` | Comma-separated Discord user IDs allowed to use the bot |
 | `DISCORD_HOME_CHANNEL` | Default Discord channel for cron delivery |
@@ -171,6 +172,9 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `DISCORD_REQUIRE_MENTION` | Require an @mention before responding in server channels |
 | `DISCORD_FREE_RESPONSE_CHANNELS` | Comma-separated channel IDs where mention is not required |
 | `DISCORD_AUTO_THREAD` | Auto-thread long replies when supported |
+| `DISCORD_REACTIONS` | Enable emoji reactions on messages during processing (default: `true`) |
+| `DISCORD_IGNORED_CHANNELS` | Comma-separated channel IDs where the bot never responds |
+| `DISCORD_NO_THREAD_CHANNELS` | Comma-separated channel IDs where bot responds without auto-threading |
 | `SLACK_BOT_TOKEN` | Slack bot token (`xoxb-...`) |
 | `SLACK_APP_TOKEN` | Slack app-level token (`xapp-...`, required for Socket Mode) |
 | `SLACK_ALLOWED_USERS` | Comma-separated Slack user IDs |
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index bad8d2e3..a015dbb9 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -280,6 +280,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | `DISCORD_AUTO_THREAD` | No | `true` | When `true`, automatically creates a new thread for every `@mention` in a text channel, so each conversation is isolated (similar to Slack behavior). Messages already inside threads or DMs are unaffected. |
 | `DISCORD_ALLOW_BOTS` | No | `"none"` | Controls how the bot handles messages from other Discord bots. `"none"` — ignore all other bots. `"mentions"` — only accept bot messages that `@mention` Hermes. `"all"` — accept all bot messages. |
 | `DISCORD_REACTIONS` | No | `true` | When `true`, the bot adds emoji reactions to messages during processing (👀 when starting, ✅ on success, ❌ on error). Set to `false` to disable reactions entirely. |
+| `DISCORD_IGNORED_CHANNELS` | No | — | Comma-separated channel IDs where the bot **never** responds, even when `@mentioned`. Takes priority over all other channel settings. |
+| `DISCORD_NO_THREAD_CHANNELS` | No | — | Comma-separated channel IDs where the bot responds directly in the channel instead of creating a thread. Only relevant when `DISCORD_AUTO_THREAD` is `true`. |
 
 ### Config File (`config.yaml`)
 
@@ -292,6 +294,8 @@ discord:
   free_response_channels: ""      # Comma-separated channel IDs (or YAML list)
   auto_thread: true               # Auto-create threads on @mention
   reactions: true                 # Add emoji reactions during processing
+  ignored_channels: []            # Channel IDs where bot never responds
+  no_thread_channels: []          # Channel IDs where bot responds without threading
 
 # Session isolation (applies to all gateway platforms, not just Discord)
 group_sessions_per_user: true     # Isolate sessions per user in shared channels
@@ -342,6 +346,40 @@ Controls whether the bot adds emoji reactions to messages as visual feedback:
 
 Disable this if you find the reactions distracting or if the bot's role doesn't have the **Add Reactions** permission.
 
+#### `discord.ignored_channels`
+
+**Type:** string or list — **Default:** `[]`
+
+Channel IDs where the bot **never** responds, even when directly `@mentioned`. This takes the highest priority — if a channel is in this list, the bot silently ignores all messages there, regardless of `require_mention`, `free_response_channels`, or any other setting.
+
+```yaml
+# String format
+discord:
+  ignored_channels: "1234567890,9876543210"
+
+# List format
+discord:
+  ignored_channels:
+    - 1234567890
+    - 9876543210
+```
+
+If a thread's parent channel is in this list, messages in that thread are also ignored.
+
+#### `discord.no_thread_channels`
+
+**Type:** string or list — **Default:** `[]`
+
+Channel IDs where the bot responds directly in the channel instead of auto-creating a thread. This only has an effect when `auto_thread` is `true` (the default). In these channels, the bot responds inline like a normal message rather than spawning a new thread.
+
+```yaml
+discord:
+  no_thread_channels:
+    - 1234567890  # Bot responds inline here
+```
+
+Useful for channels dedicated to bot interaction where threads would add unnecessary noise.
+
 #### `group_sessions_per_user`
 
 **Type:** boolean — **Default:** `true`
diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index a60697a0..a59b73ca 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -463,6 +463,35 @@ platforms:
 You usually don't need to configure this manually. The auto-discovery via DoH handles most restricted-network scenarios. The `TELEGRAM_FALLBACK_IPS` env var is only needed if DoH is also blocked on your network.
 :::
 
+## Message Reactions
+
+The bot can add emoji reactions to messages as visual processing feedback:
+
+- 👀 when the bot starts processing your message
+- ✅ when the response is delivered successfully
+- ❌ if an error occurs during processing
+
+Reactions are **disabled by default**. Enable them in `config.yaml`:
+
+```yaml
+telegram:
+  reactions: true
+```
+
+Or via environment variable:
+
+```bash
+TELEGRAM_REACTIONS=true
+```
+
+:::note
+Unlike Discord (where reactions are additive), Telegram's Bot API replaces all bot reactions in a single call. The transition from 👀 to ✅/❌ happens atomically — you won't see both at once.
+:::
+
+:::tip
+If the bot doesn't have permission to add reactions in a group, the reaction calls fail silently and message processing continues normally.
+:::
+
 ## Troubleshooting
 
 | Problem | Solution |

From 7ec838507a7e2e1d7002c04180d9f4b302a6a4b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Steffen=20R=C3=B6cker?= <sroecker@gmail.com>
Date: Tue, 7 Apr 2026 21:23:49 +0200
Subject: [PATCH 103/154] fix(api_server): update tool_progress_callback
 signature for Open WebUI streaming

Commit cc2b56b2 changed the tool_progress_callback signature from
(name, preview, args) to (event_type, name, preview, args, **kwargs)
but the API server's chat completion streaming callback was not updated.

This caused tool calls to not display in Open WebUI because the
callback received arguments in wrong positions.

- Update _on_tool_progress to use new 4-arg signature
- Add event_type filter to only show tool.started events
- Add **kwargs for optional duration/is_error parameters
---
 gateway/platforms/api_server.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index d9b32873..82412e5d 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -564,8 +564,10 @@ class APIServerAdapter(BasePlatformAdapter):
                 if delta is not None:
                     _stream_q.put(delta)
 
-            def _on_tool_progress(name, preview, args):
+            def _on_tool_progress(event_type, name, preview, args, **kwargs):
                 """Inject tool progress into the SSE stream for Open WebUI."""
+                if event_type != "tool.started":
+                    return  # Only show tool start events in chat stream
                 if name.startswith("_"):
                     return  # Skip internal events (_thinking)
                 from agent.display import get_tool_emoji

From 786038443e06660c468352f35585a402f83c6d15 Mon Sep 17 00:00:00 2001
From: VanBladee <VanBladee@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:41:05 -0700
Subject: [PATCH 104/154] feat(api): accept conversation_history in request
 body

Allow clients to pass explicit conversation_history in /v1/responses and
/v1/runs request bodies instead of relying on server-side response chaining
via previous_response_id. Solves problems with stateless deployments where
the in-memory ResponseStore is lost on restart.

Adds input validation (must be array of {role, content} objects) and clear
precedence: explicit conversation_history > previous_response_id.

Based on PR #5805 by VanBladee, with added input validation.
---
 gateway/platforms/api_server.py | 46 ++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 82412e5d..241df3a6 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -818,9 +818,29 @@ class APIServerAdapter(BasePlatformAdapter):
         else:
             return web.json_response(_openai_error("'input' must be a string or array"), status=400)
 
-        # Reconstruct conversation history from previous_response_id
+        # Accept explicit conversation_history from the request body.
+        # This lets stateless clients supply their own history instead of
+        # relying on server-side response chaining via previous_response_id.
+        # Precedence: explicit conversation_history > previous_response_id.
         conversation_history: List[Dict[str, str]] = []
-        if previous_response_id:
+        raw_history = body.get("conversation_history")
+        if raw_history:
+            if not isinstance(raw_history, list):
+                return web.json_response(
+                    _openai_error("'conversation_history' must be an array of message objects"),
+                    status=400,
+                )
+            for i, entry in enumerate(raw_history):
+                if not isinstance(entry, dict) or "role" not in entry or "content" not in entry:
+                    return web.json_response(
+                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
+                        status=400,
+                    )
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+            if previous_response_id:
+                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
+
+        if not conversation_history and previous_response_id:
             stored = self._response_store.get(previous_response_id)
             if stored is None:
                 return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
@@ -1406,8 +1426,28 @@ class APIServerAdapter(BasePlatformAdapter):
 
         instructions = body.get("instructions")
         previous_response_id = body.get("previous_response_id")
+
+        # Accept explicit conversation_history from the request body.
+        # Precedence: explicit conversation_history > previous_response_id.
         conversation_history: List[Dict[str, str]] = []
-        if previous_response_id:
+        raw_history = body.get("conversation_history")
+        if raw_history:
+            if not isinstance(raw_history, list):
+                return web.json_response(
+                    _openai_error("'conversation_history' must be an array of message objects"),
+                    status=400,
+                )
+            for i, entry in enumerate(raw_history):
+                if not isinstance(entry, dict) or "role" not in entry or "content" not in entry:
+                    return web.json_response(
+                        _openai_error(f"conversation_history[{i}] must have 'role' and 'content' fields"),
+                        status=400,
+                    )
+                conversation_history.append({"role": str(entry["role"]), "content": str(entry["content"])})
+            if previous_response_id:
+                logger.debug("Both conversation_history and previous_response_id provided; using conversation_history")
+
+        if not conversation_history and previous_response_id:
             stored = self._response_store.get(previous_response_id)
             if stored:
                 conversation_history = list(stored.get("conversation_history", []))

From 1d5a69a445619310b7fb6f1d34359ba8eacfc4fd Mon Sep 17 00:00:00 2001
From: pradeep7127 <pradeep7127@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:41:34 -0700
Subject: [PATCH 105/154] fix(api_server): preserve conversation history when
 /v1/runs input is a message array

When /v1/runs receives an OpenAI-style array of messages as input, all
messages except the last user turn are now extracted as conversation_history.
Previously only the last message was kept, silently discarding earlier
context in multi-turn conversations.

Handles multi-part content blocks by flattening text portions. Only fires
when no explicit conversation_history was provided.

Based on PR #5837 by pradeep7127.
---
 gateway/platforms/api_server.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py
index 241df3a6..aafc1579 100644
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@@ -1454,6 +1454,21 @@ class APIServerAdapter(BasePlatformAdapter):
                 if instructions is None:
                     instructions = stored.get("instructions")
 
+        # When input is a multi-message array, extract all but the last
+        # message as conversation history (the last becomes user_message).
+        # Only fires when no explicit history was provided.
+        if not conversation_history and isinstance(raw_input, list) and len(raw_input) > 1:
+            for msg in raw_input[:-1]:
+                if isinstance(msg, dict) and msg.get("role") and msg.get("content"):
+                    content = msg["content"]
+                    if isinstance(content, list):
+                        # Flatten multi-part content blocks to text
+                        content = " ".join(
+                            part.get("text", "") for part in content
+                            if isinstance(part, dict) and part.get("type") == "text"
+                        )
+                    conversation_history.append({"role": msg["role"], "content": str(content)})
+
         session_id = body.get("session_id") or run_id
         ephemeral_system_prompt = instructions
 

From 50d1518df63a331f6191b7cd03c7a9751a2946c9 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:44:04 -0700
Subject: [PATCH 106/154] fix(tests): update tool_progress_callback test calls
 to new 4-arg signature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to sroecker's PR #5918 — test mocks were using the old 3-arg
callback signature (name, preview, args) instead of the new
(event_type, name, preview, args, **kwargs).
---
 tests/gateway/test_api_server.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py
index 5bde076a..3b216061 100644
--- a/tests/gateway/test_api_server.py
+++ b/tests/gateway/test_api_server.py
@@ -439,7 +439,7 @@ class TestChatCompletionsEndpoint:
                 tp_cb = kwargs.get("tool_progress_callback")
                 # Simulate tool progress before streaming content
                 if tp_cb:
-                    tp_cb("terminal", "ls -la", {"command": "ls -la"})
+                    tp_cb("tool.started", "terminal", "ls -la", {"command": "ls -la"})
                 if cb:
                     await asyncio.sleep(0.05)
                     cb("Here are the files.")
@@ -476,8 +476,8 @@ class TestChatCompletionsEndpoint:
                 cb = kwargs.get("stream_delta_callback")
                 tp_cb = kwargs.get("tool_progress_callback")
                 if tp_cb:
-                    tp_cb("_thinking", "some internal state", {})
-                    tp_cb("web_search", "Python docs", {"query": "Python docs"})
+                    tp_cb("tool.started", "_thinking", "some internal state", {})
+                    tp_cb("tool.started", "web_search", "Python docs", {"query": "Python docs"})
                 if cb:
                     await asyncio.sleep(0.05)
                     cb("Found it.")

From c3158d38b28f3b59baef3768f19d09f55375fafd Mon Sep 17 00:00:00 2001
From: Jarvis AI <jarvisai@Erwins-Mac-Studio.local>
Date: Tue, 7 Apr 2026 21:18:39 +0200
Subject: [PATCH 107/154] fix(gateway): include --profile in launchd/systemd
 argv for named profiles

generate_launchd_plist() and generate_systemd_unit() were missing the
--profile <name> argument in ProgramArguments/ExecStart, causing
hermes gateway start to regenerate plists that fell back to
~/.hermes/active_profile instead of the intended profile.

Fix:
- Add _profile_arg(hermes_home?) helper returning '--profile <name>'
  only for ~/.hermes/profiles/<name> paths, empty string otherwise.
- Update generate_launchd_plist() to build ProgramArguments array
  dynamically with --profile when applicable.
- Update generate_systemd_unit() both user and system service
  branches with {profile_arg} in ExecStart.

This ensures hermes --profile <name> gateway start produces a
service definition that correctly scopes to the named profile.
---
 hermes_cli/gateway.py | 60 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py
index 4a12a34b..89b01b18 100644
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@@ -267,6 +267,34 @@ def _profile_suffix() -> str:
     return hashlib.sha256(str(home).encode()).hexdigest()[:8]
 
 
+def _profile_arg(hermes_home: str | None = None) -> str:
+    """Return ``--profile <name>`` only when HERMES_HOME is a named profile.
+
+    For ``~/.hermes/profiles/<name>``, returns ``"--profile <name>"``.
+    For the default profile or hash-based custom paths, returns the empty string.
+
+    Args:
+        hermes_home: Optional explicit HERMES_HOME path. Defaults to the current
+            ``get_hermes_home()`` value. Should be passed when generating a
+            service definition for a different user (e.g. system service).
+    """
+    import re
+    from pathlib import Path as _Path
+    home = Path(hermes_home or str(get_hermes_home())).resolve()
+    default = (_Path.home() / ".hermes").resolve()
+    if home == default:
+        return ""
+    profiles_root = (default / "profiles").resolve()
+    try:
+        rel = home.relative_to(profiles_root)
+        parts = rel.parts
+        if len(parts) == 1 and re.match(r"^[a-z0-9][a-z0-9_-]{0,63}$", parts[0]):
+            return f"--profile {parts[0]}"
+    except ValueError:
+        pass
+    return ""
+
+
 def get_service_name() -> str:
     """Derive a systemd service name scoped to this HERMES_HOME.
 
@@ -626,6 +654,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
     if system:
         username, group_name, home_dir = _system_service_identity(run_as_user)
         hermes_home = _hermes_home_for_target_user(home_dir)
+        profile_arg = _profile_arg(hermes_home)
         path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
         path_entries.extend(common_bin_paths)
         sane_path = ":".join(path_entries)
@@ -640,7 +669,7 @@ StartLimitBurst=5
 Type=simple
 User={username}
 Group={group_name}
-ExecStart={python_path} -m hermes_cli.main gateway run --replace
+ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace
 WorkingDirectory={working_dir}
 Environment="HOME={home_dir}"
 Environment="USER={username}"
@@ -661,6 +690,7 @@ WantedBy=multi-user.target
 """
 
     hermes_home = str(get_hermes_home().resolve())
+    profile_arg = _profile_arg(hermes_home)
     path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
     path_entries.extend(common_bin_paths)
     sane_path = ":".join(path_entries)
@@ -672,7 +702,7 @@ StartLimitBurst=5
 
 [Service]
 Type=simple
-ExecStart={python_path} -m hermes_cli.main gateway run --replace
+ExecStart={python_path} -m hermes_cli.main{f" {profile_arg}" if profile_arg else ""} gateway run --replace
 WorkingDirectory={working_dir}
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
@@ -965,6 +995,7 @@ def generate_launchd_plist() -> str:
     log_dir = get_hermes_home() / "logs"
     log_dir.mkdir(parents=True, exist_ok=True)
     label = get_launchd_label()
+    profile_arg = _profile_arg(hermes_home)
     # Build a sane PATH for the launchd plist.  launchd provides only a
     # minimal default (/usr/bin:/bin:/usr/sbin:/sbin) which misses Homebrew,
     # nvm, cargo, etc.  We prepend venv/bin and node_modules/.bin (matching
@@ -986,21 +1017,32 @@ def generate_launchd_plist() -> str:
         dict.fromkeys(priority_dirs + [p for p in os.environ.get("PATH", "").split(":") if p])
     )
 
+    # Build ProgramArguments array, including --profile when using a named profile
+    prog_args = [
+        f"<string>{python_path}</string>",
+        "<string>-m</string>",
+        "<string>hermes_cli.main</string>",
+    ]
+    if profile_arg:
+        for part in profile_arg.split():
+            prog_args.append(f"<string>{part}</string>")
+    prog_args.extend([
+        "<string>gateway</string>",
+        "<string>run</string>",
+        "<string>--replace</string>",
+    ])
+    prog_args_xml = "\n        ".join(prog_args)
+
     return f"""<?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
     <key>Label</key>
     <string>{label}</string>
-    
+
     <key>ProgramArguments</key>
     <array>
-        <string>{python_path}</string>
-        <string>-m</string>
-        <string>hermes_cli.main</string>
-        <string>gateway</string>
-        <string>run</string>
-        <string>--replace</string>
+        {prog_args_xml}
     </array>
     
     <key>WorkingDirectory</key>

From 25080986a03efcbae11d4ea60117d32fc9450c5f Mon Sep 17 00:00:00 2001
From: Marc Bickel <mrcbickel@gmail.com>
Date: Tue, 7 Apr 2026 13:46:59 +0200
Subject: [PATCH 108/154] fix(gateway): discard empty placeholder when voice
 transcription succeeds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a Discord voice message arrives, the adapter sets event.text to
"(The user sent a message with no text content)" since voice messages
have no text content. The transcription enrichment in
_enrich_message_with_transcription() then prepends the transcript but
leaves the placeholder intact, causing the agent to receive both:

    [The user sent a voice message~ Here's what they said: "..."]

    (The user sent a message with no text content)

The agent sees this as two separate user turns — one transcribed
and one empty — creating confusing duplicate messages.

Fix: when the transcription succeeds and user_text is only the empty
placeholder, return just the transcript without the redundant placeholder.
---
 gateway/run.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/gateway/run.py b/gateway/run.py
index 68027f28..99c71d91 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -6044,6 +6044,11 @@ class GatewayRunner:
 
         if enriched_parts:
             prefix = "\n\n".join(enriched_parts)
+            # Strip the empty-content placeholder from the Discord adapter
+            # when we successfully transcribed the audio — it's redundant.
+            _placeholder = "(The user sent a message with no text content)"
+            if user_text and user_text.strip() == _placeholder:
+                return prefix
             if user_text:
                 return f"{prefix}\n\n{user_text}"
             return prefix

From 6e02fa73c21914f553d99c9dd05a345a0fcdf67f Mon Sep 17 00:00:00 2001
From: Marc Bickel <mrcbickel@gmail.com>
Date: Tue, 7 Apr 2026 15:28:36 +0200
Subject: [PATCH 109/154] fix(discord): discard empty placeholder on voice
 transcription + force STT language
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- gateway/run.py: Strip "(The user sent a message with no text content)"
  placeholder when voice transcription succeeds — it was being appended
  alongside the transcript, creating duplicate user turns.
- tools/transcription_tools.py: Wire HERMES_LOCAL_STT_LANGUAGE env var
  into the faster-whisper backend. It was only used by the CLI fallback
  path (_transcribe_local_command), not the primary faster-whisper path.
---
 tools/transcription_tools.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index d8d0f364..d473172a 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -295,7 +295,13 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
             _local_model = WhisperModel(model_name, device="auto", compute_type="auto")
             _local_model_name = model_name
 
-        segments, info = _local_model.transcribe(file_path, beam_size=5)
+        # Allow forcing the language via env var (e.g. HERMES_LOCAL_STT_LANGUAGE=en)
+        _forced_lang = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE)
+        transcribe_kwargs = {"beam_size": 5}
+        if _forced_lang:
+            transcribe_kwargs["language"] = _forced_lang
+
+        segments, info = _local_model.transcribe(file_path, **transcribe_kwargs)
         transcript = " ".join(segment.text.strip() for segment in segments)
 
         logger.info(

From f3c59321aff81ec5f03ba9e5d8a2a393553ea2eb Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:35:00 -0700
Subject: [PATCH 110/154] fix: add _profile_arg tests + move STT language to
 config.yaml

- Add 7 unit tests for _profile_arg: default home, named profile,
  hash path, nested path, invalid name, systemd integration, launchd integration
- Add stt.local.language to config.yaml (empty = auto-detect)
- Both STT code paths now read config.yaml first, env var fallback,
  then default (auto-detect for faster-whisper, 'en' for CLI command)
- HERMES_LOCAL_STT_LANGUAGE env var still works as backward-compat fallback
---
 hermes_cli/config.py                     |  1 +
 tests/hermes_cli/test_gateway_service.py | 66 ++++++++++++++++++++++++
 tools/transcription_tools.py             | 15 ++++--
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 405b83ac..3338a13c 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -416,6 +416,7 @@ DEFAULT_CONFIG = {
         "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
         "local": {
             "model": "base",  # tiny, base, small, medium, large-v3
+            "language": "",  # auto-detect by default; set to "en", "es", "fr", etc. to force
         },
         "openai": {
             "model": "whisper-1",  # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py
index 03c9c56e..739d4500 100644
--- a/tests/hermes_cli/test_gateway_service.py
+++ b/tests/hermes_cli/test_gateway_service.py
@@ -641,3 +641,69 @@ class TestEnsureUserSystemdEnv:
         result = gateway_cli._systemctl_cmd(system=True)
         assert result == ["systemctl"]
         assert calls == []
+
+
+class TestProfileArg:
+    """Tests for _profile_arg — returns '--profile <name>' for named profiles."""
+
+    def test_default_hermes_home_returns_empty(self, tmp_path, monkeypatch):
+        """Default ~/.hermes should not produce a --profile flag."""
+        hermes_home = tmp_path / ".hermes"
+        hermes_home.mkdir()
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        result = gateway_cli._profile_arg(str(hermes_home))
+        assert result == ""
+
+    def test_named_profile_returns_flag(self, tmp_path, monkeypatch):
+        """~/.hermes/profiles/mybot should return '--profile mybot'."""
+        profile_dir = tmp_path / ".hermes" / "profiles" / "mybot"
+        profile_dir.mkdir(parents=True)
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        result = gateway_cli._profile_arg(str(profile_dir))
+        assert result == "--profile mybot"
+
+    def test_hash_path_returns_empty(self, tmp_path, monkeypatch):
+        """Arbitrary non-profile HERMES_HOME should return empty string."""
+        custom_home = tmp_path / "custom" / "hermes"
+        custom_home.mkdir(parents=True)
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        result = gateway_cli._profile_arg(str(custom_home))
+        assert result == ""
+
+    def test_nested_profile_path_returns_empty(self, tmp_path, monkeypatch):
+        """~/.hermes/profiles/mybot/subdir should NOT match — too deep."""
+        nested = tmp_path / ".hermes" / "profiles" / "mybot" / "subdir"
+        nested.mkdir(parents=True)
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        result = gateway_cli._profile_arg(str(nested))
+        assert result == ""
+
+    def test_invalid_profile_name_returns_empty(self, tmp_path, monkeypatch):
+        """Profile names with invalid chars should not match the regex."""
+        bad_profile = tmp_path / ".hermes" / "profiles" / "My Bot!"
+        bad_profile.mkdir(parents=True)
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        result = gateway_cli._profile_arg(str(bad_profile))
+        assert result == ""
+
+    def test_systemd_unit_includes_profile(self, tmp_path, monkeypatch):
+        """generate_systemd_unit should include --profile in ExecStart for named profiles."""
+        profile_dir = tmp_path / ".hermes" / "profiles" / "mybot"
+        profile_dir.mkdir(parents=True)
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        monkeypatch.setenv("HERMES_HOME", str(profile_dir))
+        monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
+        unit = gateway_cli.generate_systemd_unit(system=False)
+        assert "--profile mybot" in unit
+        assert "gateway run --replace" in unit
+
+    def test_launchd_plist_includes_profile(self, tmp_path, monkeypatch):
+        """generate_launchd_plist should include --profile in ProgramArguments for named profiles."""
+        profile_dir = tmp_path / ".hermes" / "profiles" / "mybot"
+        profile_dir.mkdir(parents=True)
+        monkeypatch.setattr(Path, "home", lambda: tmp_path)
+        monkeypatch.setenv("HERMES_HOME", str(profile_dir))
+        monkeypatch.setattr(gateway_cli, "get_hermes_home", lambda: profile_dir)
+        plist = gateway_cli.generate_launchd_plist()
+        assert "<string>--profile</string>" in plist
+        assert "<string>mybot</string>" in plist
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index d473172a..4f07e5c4 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -295,8 +295,12 @@ def _transcribe_local(file_path: str, model_name: str) -> Dict[str, Any]:
             _local_model = WhisperModel(model_name, device="auto", compute_type="auto")
             _local_model_name = model_name
 
-        # Allow forcing the language via env var (e.g. HERMES_LOCAL_STT_LANGUAGE=en)
-        _forced_lang = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE)
+        # Language: config.yaml (stt.local.language) > env var > auto-detect.
+        _forced_lang = (
+            _load_stt_config().get("local", {}).get("language")
+            or os.getenv(LOCAL_STT_LANGUAGE_ENV)
+            or None
+        )
         transcribe_kwargs = {"beam_size": 5}
         if _forced_lang:
             transcribe_kwargs["language"] = _forced_lang
@@ -350,7 +354,12 @@ def _transcribe_local_command(file_path: str, model_name: str) -> Dict[str, Any]
             ),
         }
 
-    language = os.getenv(LOCAL_STT_LANGUAGE_ENV, DEFAULT_LOCAL_STT_LANGUAGE)
+    # Language: config.yaml (stt.local.language) > env var > "en" default.
+    language = (
+        _load_stt_config().get("local", {}).get("language")
+        or os.getenv(LOCAL_STT_LANGUAGE_ENV)
+        or DEFAULT_LOCAL_STT_LANGUAGE
+    )
     normalized_model = _normalize_local_command_model(model_name)
 
     try:

From 9692b3c28ad3f89e1b8b34e7c05b4a4f1a731b52 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:59:42 -0700
Subject: [PATCH 111/154] =?UTF-8?q?fix:=20CLI/UX=20batch=20=E2=80=94=20Cha?=
 =?UTF-8?q?tConsole=20errors,=20curses=20scroll,=20skin-aware=20banner,=20?=
 =?UTF-8?q?git=20state=20banner=20(#5974)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(cli): route error messages through ChatConsole inside patch_stdout

Cherry-pick of PR #5798 by @icn5381.

Replace self.console.print() with ChatConsole().print() for 11 error/status
messages reachable during the interactive session. Inside patch_stdout,
self.console (plain Rich Console) writes raw ANSI escapes that StdoutProxy
mangles into garbled text. ChatConsole uses prompt_toolkit's native
print_formatted_text which renders correctly.

Same class of bug as #2262 — that fix covered agent output but missed
these error paths in _ensure_runtime_credentials, _init_agent, quick
commands, skill loading, and plan mode.

* fix(model-picker): add scrolling viewport to curses provider menu

Cherry-pick of PR #5790 by @Lempkey. Fixes #5755.

_curses_prompt_choice rendered items starting unconditionally from index 0
with no scroll offset. The 'More providers' submenu has 13 entries. On
terminals shorter than ~16 rows, items past the fold were never drawn.
When UP-arrow wrapped cursor from 0 to the last item (Cancel, index 12),
the highlight rendered off-screen — appearing as if only Cancel existed.

Adds scroll_offset tracking that adjusts each frame to keep the cursor
inside the visible window.

* feat(cli): skin-aware compact banner + git state in startup banner

Combined salvage of PR #5922 by @ASRagab and PR #5877 by @xinbenlv.

Compact banner changes (from #5922):
- Read active skin colors and branding instead of hardcoding gold/NOUS HERMES
- Default skin preserves backward-compatible legacy branding
- Non-default skins use their own agent_name and colors

Git state in banner (from #5877):
- New format_banner_version_label() shows upstream/local git hashes
- Full banner title now includes git state (upstream hash, carried commits)
- Compact banner line2 shows the version label with git state
- Widen compact banner max width from 64 to 88 to fit version info

Both the full Rich banner and compact fallback are now skin-aware
and show git state.
---
 cli.py                                    |  67 +++++++----
 hermes_cli/banner.py                      |  76 +++++++++++-
 hermes_cli/setup.py                       |  18 ++-
 tests/hermes_cli/test_banner_git_state.py |  63 ++++++++++
 tests/test_cli_skin_integration.py        | 140 ++++++++++++++++++++++
 tests/test_model_picker_scroll.py         | 118 ++++++++++++++++++
 6 files changed, 456 insertions(+), 26 deletions(-)
 create mode 100644 tests/hermes_cli/test_banner_git_state.py
 create mode 100644 tests/test_cli_skin_integration.py
 create mode 100644 tests/test_model_picker_scroll.py

diff --git a/cli.py b/cli.py
index 2dce0827..b4358a16 100644
--- a/cli.py
+++ b/cli.py
@@ -63,7 +63,7 @@ from agent.usage_pricing import (
     format_duration_compact,
     format_token_count_compact,
 )
-from hermes_cli.banner import _format_context_length
+from hermes_cli.banner import _format_context_length, format_banner_version_label
 
 _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
 
@@ -1036,21 +1036,44 @@ COMPACT_BANNER = """
 
 def _build_compact_banner() -> str:
     """Build a compact banner that fits the current terminal width."""
-    w = min(shutil.get_terminal_size().columns - 2, 64)
+    try:
+        from hermes_cli.skin_engine import get_active_skin
+        _skin = get_active_skin()
+    except Exception:
+        _skin = None
+
+    skin_name = getattr(_skin, "name", "default") if _skin else "default"
+    border_color = _skin.get_color("banner_border", "#FFD700") if _skin else "#FFD700"
+    title_color = _skin.get_color("banner_title", "#FFBF00") if _skin else "#FFBF00"
+    dim_color = _skin.get_color("banner_dim", "#B8860B") if _skin else "#B8860B"
+
+    if skin_name == "default":
+        line1 = "⚕ NOUS HERMES - AI Agent Framework"
+        tiny_line = "⚕ NOUS HERMES"
+    else:
+        agent_name = _skin.get_branding("agent_name", "Hermes Agent") if _skin else "Hermes Agent"
+        line1 = f"{agent_name} - AI Agent Framework"
+        tiny_line = agent_name
+
+    version_line = format_banner_version_label()
+
+    w = min(shutil.get_terminal_size().columns - 2, 88)
     if w < 30:
-        return "\n[#FFBF00]⚕ NOUS HERMES[/] [dim #B8860B]- Nous Research[/]\n"
+        return f"\n[{title_color}]{tiny_line}[/] [dim {dim_color}]- Nous Research[/]\n"
+
     inner = w - 2  # inside the box border
     bar = "═" * w
-    line1 = "⚕ NOUS HERMES - AI Agent Framework"
-    line2 = "Messenger of the Digital Gods  ·  Nous Research"
+    content_width = inner - 2
+
     # Truncate and pad to fit
-    line1 = line1[:inner - 2].ljust(inner - 2)
-    line2 = line2[:inner - 2].ljust(inner - 2)
+    line1 = line1[:content_width].ljust(content_width)
+    line2 = version_line[:content_width].ljust(content_width)
+
     return (
-        f"\n[bold #FFD700]╔{bar}╗[/]\n"
-        f"[bold #FFD700]║[/] [#FFBF00]{line1}[/] [bold #FFD700]║[/]\n"
-        f"[bold #FFD700]║[/] [dim #B8860B]{line2}[/] [bold #FFD700]║[/]\n"
-        f"[bold #FFD700]╚{bar}╝[/]\n"
+        f"\n[bold {border_color}]╔{bar}╗[/]\n"
+        f"[bold {border_color}]║[/] [{title_color}]{line1}[/] [bold {border_color}]║[/]\n"
+        f"[bold {border_color}]║[/] [dim {dim_color}]{line2}[/] [bold {border_color}]║[/]\n"
+        f"[bold {border_color}]╚{bar}╝[/]\n"
     )
 
 
@@ -2163,7 +2186,7 @@ class HermesCLI:
             )
         except Exception as exc:
             message = format_runtime_provider_error(exc)
-            self.console.print(f"[bold red]{message}[/]")
+            ChatConsole().print(f"[bold red]{message}[/]")
             return False
 
         api_key = runtime.get("api_key")
@@ -2378,7 +2401,7 @@ class HermesCLI:
                     self._pending_title = None
             return True
         except Exception as e:
-            self.console.print(f"[bold red]Failed to initialize agent: {e}[/]")
+            ChatConsole().print(f"[bold red]Failed to initialize agent: {e}[/]")
             return False
     
     def show_banner(self):
@@ -4530,13 +4553,13 @@ class HermesCLI:
                             if output:
                                 self.console.print(_rich_text_from_ansi(output))
                             else:
-                                self.console.print("[dim]Command returned no output[/]")
+                                ChatConsole().print("[dim]Command returned no output[/]")
                         except subprocess.TimeoutExpired:
-                            self.console.print("[bold red]Quick command timed out (30s)[/]")
+                            ChatConsole().print("[bold red]Quick command timed out (30s)[/]")
                         except Exception as e:
-                            self.console.print(f"[bold red]Quick command error: {e}[/]")
+                            ChatConsole().print(f"[bold red]Quick command error: {e}[/]")
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                        ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
                 elif qcmd.get("type") == "alias":
                     target = qcmd.get("target", "").strip()
                     if target:
@@ -4545,9 +4568,9 @@ class HermesCLI:
                         aliased_command = f"{target} {user_args}".strip()
                         return self.process_command(aliased_command)
                     else:
-                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+                        ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
                 else:
-                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+                    ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
             # Check for plugin-registered slash commands
             elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
                 from hermes_cli.plugins import get_plugin_command_handler
@@ -4572,7 +4595,7 @@ class HermesCLI:
                     if hasattr(self, '_pending_input'):
                         self._pending_input.put(msg)
                 else:
-                    self.console.print(f"[bold red]Failed to load skill for {base_cmd}[/]")
+                    ChatConsole().print(f"[bold red]Failed to load skill for {base_cmd}[/]")
             else:
                 # Prefix matching: if input uniquely identifies one command, execute it.
                 # Matches against both built-in COMMANDS and installed skill commands so
@@ -4633,14 +4656,14 @@ class HermesCLI:
         )
 
         if not msg:
-            self.console.print("[bold red]Failed to load the bundled /plan skill[/]")
+            ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]")
             return
 
         _cprint(f"  📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
         if hasattr(self, '_pending_input'):
             self._pending_input.put(msg)
         else:
-            self.console.print("[bold red]Plan mode unavailable: input queue not initialized[/]")
+            ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]")
     
     def _handle_background_command(self, cmd: str):
         """Handle /background <prompt> — run a prompt in a separate background session.
diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py
index b9701d54..03712c27 100644
--- a/hermes_cli/banner.py
+++ b/hermes_cli/banner.py
@@ -5,6 +5,7 @@ Pure display functions with no HermesCLI state dependency.
 
 import json
 import logging
+import os
 import shutil
 import subprocess
 import threading
@@ -189,6 +190,79 @@ def check_for_updates() -> Optional[int]:
     return behind
 
 
+def _resolve_repo_dir() -> Optional[Path]:
+    """Return the active Hermes git checkout, or None if this isn't a git install."""
+    hermes_home = get_hermes_home()
+    repo_dir = hermes_home / "hermes-agent"
+    if not (repo_dir / ".git").exists():
+        repo_dir = Path(__file__).parent.parent.resolve()
+    return repo_dir if (repo_dir / ".git").exists() else None
+
+
+def _git_short_hash(repo_dir: Path, rev: str) -> Optional[str]:
+    """Resolve a git revision to an 8-character short hash."""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--short=8", rev],
+            capture_output=True,
+            text=True,
+            timeout=5,
+            cwd=str(repo_dir),
+        )
+    except Exception:
+        return None
+    if result.returncode != 0:
+        return None
+    value = (result.stdout or "").strip()
+    return value or None
+
+
+def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
+    """Return upstream/local git hashes for the startup banner."""
+    repo_dir = repo_dir or _resolve_repo_dir()
+    if repo_dir is None:
+        return None
+
+    upstream = _git_short_hash(repo_dir, "origin/main")
+    local = _git_short_hash(repo_dir, "HEAD")
+    if not upstream or not local:
+        return None
+
+    ahead = 0
+    try:
+        result = subprocess.run(
+            ["git", "rev-list", "--count", "origin/main..HEAD"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+            cwd=str(repo_dir),
+        )
+        if result.returncode == 0:
+            ahead = int((result.stdout or "0").strip() or "0")
+    except Exception:
+        ahead = 0
+
+    return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}
+
+
+def format_banner_version_label() -> str:
+    """Return the version label shown in the startup banner title."""
+    base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
+    state = get_git_banner_state()
+    if not state:
+        return base
+
+    upstream = state["upstream"]
+    local = state["local"]
+    ahead = int(state.get("ahead") or 0)
+
+    if ahead <= 0 or upstream == local:
+        return f"{base} · upstream {upstream}"
+
+    carried_word = "commit" if ahead == 1 else "commits"
+    return f"{base} · upstream {upstream} · local {local} (+{ahead} carried {carried_word})"
+
+
 # =========================================================================
 # Non-blocking update check
 # =========================================================================
@@ -448,7 +522,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
     border_color = _skin_color("banner_border", "#CD7F32")
     outer_panel = Panel(
         layout_table,
-        title=f"[bold {title_color}]{agent_name} v{VERSION} ({RELEASE_DATE})[/]",
+        title=f"[bold {title_color}]{format_banner_version_label()}[/]",
         border_style=border_color,
         padding=(0, 2),
     )
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index ea2e57a9..2407ca27 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -421,10 +421,22 @@ def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int
                 curses.init_pair(1, curses.COLOR_GREEN, -1)
                 curses.init_pair(2, curses.COLOR_YELLOW, -1)
             cursor = default
+            scroll_offset = 0
 
             while True:
                 stdscr.clear()
                 max_y, max_x = stdscr.getmaxyx()
+
+                # Rows available for list items: rows 2..(max_y-2) inclusive.
+                visible = max(1, max_y - 3)
+
+                # Scroll the viewport so the cursor is always visible.
+                if cursor < scroll_offset:
+                    scroll_offset = cursor
+                elif cursor >= scroll_offset + visible:
+                    scroll_offset = cursor - visible + 1
+                scroll_offset = max(0, min(scroll_offset, max(0, len(choices) - visible)))
+
                 try:
                     stdscr.addnstr(
                         0,
@@ -436,12 +448,12 @@ def _curses_prompt_choice(question: str, choices: list, default: int = 0) -> int
                 except curses.error:
                     pass
 
-                for i, choice in enumerate(choices):
-                    y = i + 2
+                for row, i in enumerate(range(scroll_offset, min(scroll_offset + visible, len(choices)))):
+                    y = row + 2
                     if y >= max_y - 1:
                         break
                     arrow = "→" if i == cursor else " "
-                    line = f" {arrow}  {choice}"
+                    line = f" {arrow}  {choices[i]}"
                     attr = curses.A_NORMAL
                     if i == cursor:
                         attr = curses.A_BOLD
diff --git a/tests/hermes_cli/test_banner_git_state.py b/tests/hermes_cli/test_banner_git_state.py
new file mode 100644
index 00000000..6556145e
--- /dev/null
+++ b/tests/hermes_cli/test_banner_git_state.py
@@ -0,0 +1,63 @@
+from unittest.mock import MagicMock, patch
+
+
+def test_format_banner_version_label_without_git_state():
+    from hermes_cli import banner
+
+    with patch.object(banner, "get_git_banner_state", return_value=None):
+        value = banner.format_banner_version_label()
+
+    assert value == f"Hermes Agent v{banner.VERSION} ({banner.RELEASE_DATE})"
+
+
+def test_format_banner_version_label_on_upstream_main():
+    from hermes_cli import banner
+
+    with patch.object(
+        banner,
+        "get_git_banner_state",
+        return_value={"upstream": "b2f477a3", "local": "b2f477a3", "ahead": 0},
+    ):
+        value = banner.format_banner_version_label()
+
+    assert value.endswith("· upstream b2f477a3")
+    assert "local" not in value
+
+
+def test_format_banner_version_label_with_carried_commits():
+    from hermes_cli import banner
+
+    with patch.object(
+        banner,
+        "get_git_banner_state",
+        return_value={"upstream": "b2f477a3", "local": "af8aad31", "ahead": 3},
+    ):
+        value = banner.format_banner_version_label()
+
+    assert "upstream b2f477a3" in value
+    assert "local af8aad31" in value
+    assert "+3 carried commits" in value
+
+
+def test_get_git_banner_state_reads_origin_and_head(tmp_path):
+    from hermes_cli import banner
+
+    repo_dir = tmp_path / "repo"
+    (repo_dir / ".git").mkdir(parents=True)
+
+    results = {
+        ("git", "rev-parse", "--short=8", "origin/main"): MagicMock(returncode=0, stdout="b2f477a3\n"),
+        ("git", "rev-parse", "--short=8", "HEAD"): MagicMock(returncode=0, stdout="af8aad31\n"),
+        ("git", "rev-list", "--count", "origin/main..HEAD"): MagicMock(returncode=0, stdout="3\n"),
+    }
+
+    def fake_run(cmd, **kwargs):
+        key = tuple(cmd)
+        if key not in results:
+            raise AssertionError(f"unexpected command: {cmd}")
+        return results[key]
+
+    with patch("hermes_cli.banner.subprocess.run", side_effect=fake_run):
+        state = banner.get_git_banner_state(repo_dir)
+
+    assert state == {"upstream": "b2f477a3", "local": "af8aad31", "ahead": 3}
diff --git a/tests/test_cli_skin_integration.py b/tests/test_cli_skin_integration.py
new file mode 100644
index 00000000..272a7bc5
--- /dev/null
+++ b/tests/test_cli_skin_integration.py
@@ -0,0 +1,140 @@
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+from cli import HermesCLI, _build_compact_banner, _rich_text_from_ansi
+from hermes_cli.skin_engine import get_active_skin, set_active_skin
+
+
+def _make_cli_stub():
+    cli = HermesCLI.__new__(HermesCLI)
+    cli._sudo_state = None
+    cli._secret_state = None
+    cli._approval_state = None
+    cli._clarify_state = None
+    cli._clarify_freetext = False
+    cli._command_running = False
+    cli._agent_running = False
+    cli._voice_recording = False
+    cli._voice_processing = False
+    cli._voice_mode = False
+    cli._command_spinner_frame = lambda: "⟳"
+    cli._tui_style_base = {
+        "prompt": "#fff",
+        "input-area": "#fff",
+        "input-rule": "#aaa",
+        "prompt-working": "#888 italic",
+    }
+    cli._app = SimpleNamespace(style=None)
+    cli._invalidate = MagicMock()
+    return cli
+
+
+class TestCliSkinPromptIntegration:
+    def test_default_prompt_fragments_use_default_symbol(self):
+        cli = _make_cli_stub()
+
+        set_active_skin("default")
+        assert cli._get_tui_prompt_fragments() == [("class:prompt", "❯ ")]
+
+    def test_ares_prompt_fragments_use_skin_symbol(self):
+        cli = _make_cli_stub()
+
+        set_active_skin("ares")
+        assert cli._get_tui_prompt_fragments() == [("class:prompt", "⚔ ❯ ")]
+
+    def test_secret_prompt_fragments_preserve_secret_state(self):
+        cli = _make_cli_stub()
+        cli._secret_state = {"response_queue": object()}
+
+        set_active_skin("ares")
+        assert cli._get_tui_prompt_fragments() == [("class:sudo-prompt", "🔑 ❯ ")]
+
+    def test_icon_only_skin_symbol_still_visible_in_special_states(self):
+        cli = _make_cli_stub()
+        cli._secret_state = {"response_queue": object()}
+
+        with patch("hermes_cli.skin_engine.get_active_prompt_symbol", return_value="⚔ "):
+            assert cli._get_tui_prompt_fragments() == [("class:sudo-prompt", "🔑 ⚔ ")]
+
+    def test_build_tui_style_dict_uses_skin_overrides(self):
+        cli = _make_cli_stub()
+
+        set_active_skin("ares")
+        skin = get_active_skin()
+        style_dict = cli._build_tui_style_dict()
+
+        assert style_dict["prompt"] == skin.get_color("prompt")
+        assert style_dict["input-rule"] == skin.get_color("input_rule")
+        assert style_dict["prompt-working"] == f"{skin.get_color('banner_dim')} italic"
+        assert style_dict["approval-title"] == f"{skin.get_color('ui_warn')} bold"
+
+    def test_apply_tui_skin_style_updates_running_app(self):
+        cli = _make_cli_stub()
+
+        set_active_skin("ares")
+        assert cli._apply_tui_skin_style() is True
+        assert cli._app.style is not None
+        cli._invalidate.assert_called_once_with(min_interval=0.0)
+
+    def test_handle_skin_command_refreshes_live_tui(self, capsys):
+        cli = _make_cli_stub()
+
+        with patch("cli.save_config_value", return_value=True):
+            cli._handle_skin_command("/skin ares")
+
+        output = capsys.readouterr().out
+        assert "Skin set to: ares (saved)" in output
+        assert "Prompt + TUI colors updated." in output
+        assert cli._app.style is not None
+
+
+class TestCompactBannerSkinIntegration:
+    def test_default_compact_banner_keeps_legacy_nous_hermes_branding(self):
+        set_active_skin("default")
+
+        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
+             patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
+            banner = _build_compact_banner()
+
+        assert "NOUS HERMES" in banner
+
+    def test_poseidon_compact_banner_uses_skin_branding_instead_of_nous_hermes(self):
+        set_active_skin("poseidon")
+
+        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
+             patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
+            banner = _build_compact_banner()
+
+        assert "Poseidon Agent" in banner
+        assert "NOUS HERMES" not in banner
+
+    def test_poseidon_compact_banner_uses_skin_colors(self):
+        set_active_skin("poseidon")
+        skin = get_active_skin()
+
+        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
+             patch("cli.format_banner_version_label", return_value="Hermes Agent v0.1.0 (test)"):
+            banner = _build_compact_banner()
+
+        assert skin.get_color("banner_border") in banner
+        assert skin.get_color("banner_title") in banner
+        assert skin.get_color("banner_dim") in banner
+
+    def test_compact_banner_shows_version_label(self):
+        set_active_skin("default")
+
+        with patch("cli.shutil.get_terminal_size", return_value=SimpleNamespace(columns=90)), \
+             patch("cli.format_banner_version_label", return_value="Hermes Agent v1.0 (test) · upstream abc12345"):
+            banner = _build_compact_banner()
+
+        assert "upstream abc12345" in banner
+
+
+class TestAnsiRichTextHelper:
+    def test_preserves_literal_brackets(self):
+        text = _rich_text_from_ansi("[notatag] literal")
+        assert text.plain == "[notatag] literal"
+
+    def test_strips_ansi_but_keeps_plain_text(self):
+        text = _rich_text_from_ansi("\x1b[31mred\x1b[0m")
+        assert text.plain == "red"
diff --git a/tests/test_model_picker_scroll.py b/tests/test_model_picker_scroll.py
new file mode 100644
index 00000000..e20c330e
--- /dev/null
+++ b/tests/test_model_picker_scroll.py
@@ -0,0 +1,118 @@
+"""Tests for the scrolling viewport logic in _curses_prompt_choice (issue #5755).
+
+The "More providers" submenu has 13 entries (11 extended + custom + cancel).
+Before the fix, _curses_prompt_choice rendered items starting unconditionally
+from index 0 with no scroll offset.  On terminals shorter than ~16 rows, items
+near the bottom were never drawn.  When the cursor wrapped from 0 to the last
+item (Cancel) via UP-arrow, the highlight rendered off-screen, leaving the menu
+looking like only "Cancel" existed.
+
+The fix adds a scroll_offset that tracks the cursor so the highlighted item
+is always within the visible window.  These tests exercise that logic in
+isolation without requiring a real TTY.
+"""
+
+import sys
+import os
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+# ---------------------------------------------------------------------------
+# Pure scroll-offset logic extracted from _curses_menu for unit testing
+# ---------------------------------------------------------------------------
+
+def _compute_scroll_offset(cursor: int, scroll_offset: int, visible: int, n_choices: int) -> int:
+    """Mirror of the scroll adjustment block inside _curses_menu."""
+    if cursor < scroll_offset:
+        scroll_offset = cursor
+    elif cursor >= scroll_offset + visible:
+        scroll_offset = cursor - visible + 1
+    scroll_offset = max(0, min(scroll_offset, max(0, n_choices - visible)))
+    return scroll_offset
+
+
+def _visible_indices(cursor: int, scroll_offset: int, visible: int, n_choices: int):
+    """Return the list indices that would be rendered for the given state."""
+    scroll_offset = _compute_scroll_offset(cursor, scroll_offset, visible, n_choices)
+    return list(range(scroll_offset, min(scroll_offset + visible, n_choices)))
+
+
+# ---------------------------------------------------------------------------
+# Tests: scroll offset calculation
+# ---------------------------------------------------------------------------
+
+class TestScrollOffsetLogic:
+    N = 13  # typical extended-providers list length
+
+    def test_cursor_at_zero_no_scroll(self):
+        """Start position: offset stays 0, first items visible."""
+        assert _compute_scroll_offset(0, 0, 8, self.N) == 0
+
+    def test_cursor_within_window_unchanged(self):
+        """Cursor inside the current window: offset unchanged."""
+        assert _compute_scroll_offset(5, 0, 8, self.N) == 0
+
+    def test_cursor_at_last_item_scrolls_down(self):
+        """Cursor on Cancel (index 12) with 8-row window: offset = 12 - 8 + 1 = 5."""
+        offset = _compute_scroll_offset(12, 0, 8, self.N)
+        assert offset == 5
+        assert 12 in _visible_indices(12, 0, 8, self.N)
+
+    def test_cursor_wraps_to_cancel_via_up(self):
+        """UP from index 0 wraps to last item; last item must be visible."""
+        wrapped_cursor = (0 - 1) % self.N  # == 12
+        indices = _visible_indices(wrapped_cursor, 0, 8, self.N)
+        assert wrapped_cursor in indices
+
+    def test_cursor_above_window_scrolls_up(self):
+        """Cursor above current window: offset tracks cursor."""
+        # window currently shows [5..12], cursor moves to 3
+        offset = _compute_scroll_offset(3, 5, 8, self.N)
+        assert offset == 3
+        assert 3 in _visible_indices(3, 5, 8, self.N)
+
+    def test_visible_window_never_exceeds_list(self):
+        """Offset is clamped so the window never starts past the list end."""
+        offset = _compute_scroll_offset(12, 0, 20, self.N)  # window larger than list
+        assert offset == 0
+
+    def test_single_item_list(self):
+        """Edge case: one choice, cursor 0."""
+        assert _compute_scroll_offset(0, 0, 8, 1) == 0
+
+    def test_list_fits_in_window_no_scroll_needed(self):
+        """If all choices fit in the visible window, offset is always 0."""
+        for cursor in range(self.N):
+            offset = _compute_scroll_offset(cursor, 0, 20, self.N)
+            assert offset == 0, f"cursor={cursor} should not scroll when window > list"
+
+    def test_cursor_always_in_visible_range(self):
+        """Invariant: cursor is always within the rendered window after adjustment."""
+        visible = 5
+        for cursor in range(self.N):
+            indices = _visible_indices(cursor, 0, visible, self.N)
+            assert cursor in indices, f"cursor={cursor} not in visible={indices}"
+
+    def test_full_navigation_down_cursor_always_visible(self):
+        """Simulate pressing DOWN through all items; cursor always in view."""
+        visible = 6
+        scroll_offset = 0
+        cursor = 0
+        for _ in range(self.N + 2):  # wrap around twice
+            scroll_offset = _compute_scroll_offset(cursor, scroll_offset, visible, self.N)
+            rendered = list(range(scroll_offset, min(scroll_offset + visible, self.N)))
+            assert cursor in rendered, f"cursor={cursor} not in rendered={rendered}"
+            cursor = (cursor + 1) % self.N
+
+    def test_full_navigation_up_cursor_always_visible(self):
+        """Simulate pressing UP through all items; cursor always in view."""
+        visible = 6
+        scroll_offset = 0
+        cursor = 0
+        for _ in range(self.N + 2):
+            scroll_offset = _compute_scroll_offset(cursor, scroll_offset, visible, self.N)
+            rendered = list(range(scroll_offset, min(scroll_offset + visible, self.N)))
+            assert cursor in rendered, f"cursor={cursor} not in rendered={rendered}"
+            cursor = (cursor - 1) % self.N

From cbf1f15cfedfca3fd5130b5532fb9b7f8421946b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 17:59:47 -0700
Subject: [PATCH 112/154] fix(auxiliary): resolve named custom providers and
 'main' alias in auxiliary routing (#5978)

* fix(telegram): replace substring caption check with exact line-by-line match

Captions in photo bursts and media group albums were silently dropped when
a shorter caption happened to be a substring of an existing one (e.g.
"Meeting" lost inside "Meeting agenda"). Extract a shared _merge_caption
static helper that splits on "\n\n" and uses exact match with whitespace
normalisation, then use it in both _enqueue_photo_event and
_queue_media_group_event.

Adds 13 unit tests covering the fixed bug scenarios.

Cherry-picked from PR #2671 by Dilee.

* fix: extend caption substring fix to all platforms

Move _merge_caption helper from TelegramAdapter to BasePlatformAdapter
so all adapters inherit it. Fix the same substring-containment bug in:
- gateway/platforms/base.py (photo burst merging)
- gateway/run.py (priority photo follow-up merging)
- gateway/platforms/feishu.py (media batch merging)

The original fix only covered telegram.py. The same bug existed in base.py
and run.py (pure substring check) and feishu.py (list membership without
whitespace normalization).

* fix(auxiliary): resolve named custom providers and 'main' alias in auxiliary routing

Two bugs caused auxiliary tasks (vision, compression, etc.) to fail when
using named custom providers defined in config.yaml:

1. 'provider: main' was hardcoded to 'custom', which only checks legacy
   OPENAI_BASE_URL env vars. Now reads _read_main_provider() to resolve
   to the actual provider (e.g., 'custom:beans', 'openrouter', 'deepseek').

2. Named custom provider names (e.g., 'beans') fell through to
   PROVIDER_REGISTRY which doesn't know about config.yaml entries.
   Now checks _get_named_custom_provider() before the registry fallback.

Fixes both resolve_provider_client() and _normalize_vision_provider()
so the fix covers all auxiliary tasks (vision, compression, web_extract,
session_search, etc.).

Adds 13 unit tests. Reported by Laura via Discord.

---------

Co-authored-by: Dilee <uzmpsk.dilekakbas@gmail.com>
---
 agent/auxiliary_client.py                     |  35 +++-
 .../test_auxiliary_named_custom_providers.py  | 151 ++++++++++++++++++
 website/docs/user-guide/configuration.md      |   4 +-
 3 files changed, 187 insertions(+), 3 deletions(-)
 create mode 100644 tests/agent/test_auxiliary_named_custom_providers.py

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 35ba3c7b..49a78458 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1142,7 +1142,13 @@ def resolve_provider_client(
     if provider == "codex":
         provider = "openai-codex"
     if provider == "main":
-        provider = "custom"
+        # Resolve to the user's actual main provider so named custom providers
+        # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
+        main_prov = _read_main_provider()
+        if main_prov and main_prov not in ("auto", "main", ""):
+            provider = main_prov
+        else:
+            provider = "custom"
 
     # ── Auto: try all providers in priority order ────────────────────
     if provider == "auto":
@@ -1238,6 +1244,28 @@ def resolve_provider_client(
                        "but no endpoint credentials found")
         return None, None
 
+    # ── Named custom providers (config.yaml custom_providers list) ───
+    try:
+        from hermes_cli.runtime_provider import _get_named_custom_provider
+        custom_entry = _get_named_custom_provider(provider)
+        if custom_entry:
+            custom_base = custom_entry.get("base_url", "").strip()
+            custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
+            if custom_base:
+                final_model = model or _read_main_model() or "gpt-4o-mini"
+                client = OpenAI(api_key=custom_key, base_url=custom_base)
+                logger.debug(
+                    "resolve_provider_client: named custom provider %r (%s)",
+                    provider, final_model)
+                return (_to_async_client(client, final_model) if async_mode
+                        else (client, final_model))
+            logger.warning(
+                "resolve_provider_client: named custom provider %r has no base_url",
+                provider)
+            return None, None
+    except ImportError:
+        pass
+
     # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
     try:
         from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
@@ -1358,6 +1386,11 @@ def _normalize_vision_provider(provider: Optional[str]) -> str:
     if provider == "codex":
         return "openai-codex"
     if provider == "main":
+        # Resolve to actual main provider — named custom providers and
+        # non-aggregator providers need to pass through as their real name.
+        main_prov = _read_main_provider()
+        if main_prov and main_prov not in ("auto", "main", ""):
+            return main_prov
         return "custom"
     return provider
 
diff --git a/tests/agent/test_auxiliary_named_custom_providers.py b/tests/agent/test_auxiliary_named_custom_providers.py
new file mode 100644
index 00000000..9ca0c5e5
--- /dev/null
+++ b/tests/agent/test_auxiliary_named_custom_providers.py
@@ -0,0 +1,151 @@
+"""Tests for named custom provider and 'main' alias resolution in auxiliary_client."""
+
+import os
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def _isolate(tmp_path, monkeypatch):
+    """Redirect HERMES_HOME and clear module caches."""
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    # Write a minimal config so load_config doesn't fail
+    (hermes_home / "config.yaml").write_text("model:\n  default: test-model\n")
+
+
+def _write_config(tmp_path, config_dict):
+    """Write a config.yaml to the test HERMES_HOME."""
+    import yaml
+    config_path = tmp_path / ".hermes" / "config.yaml"
+    config_path.write_text(yaml.dump(config_dict))
+
+
+class TestNormalizeVisionProvider:
+    """_normalize_vision_provider should resolve 'main' to actual main provider."""
+
+    def test_main_resolves_to_named_custom(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "my-model", "provider": "custom:beans"},
+            "custom_providers": [{"name": "beans", "base_url": "http://localhost/v1"}],
+        })
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("main") == "custom:beans"
+
+    def test_main_resolves_to_openrouter(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "anthropic/claude-sonnet-4", "provider": "openrouter"},
+        })
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("main") == "openrouter"
+
+    def test_main_resolves_to_deepseek(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "deepseek-chat", "provider": "deepseek"},
+        })
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("main") == "deepseek"
+
+    def test_main_falls_back_to_custom_when_no_provider(self, tmp_path):
+        _write_config(tmp_path, {"model": {"default": "gpt-4o"}})
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("main") == "custom"
+
+    def test_bare_provider_name_unchanged(self):
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("beans") == "beans"
+        assert _normalize_vision_provider("deepseek") == "deepseek"
+
+    def test_codex_alias_still_works(self):
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("codex") == "openai-codex"
+
+    def test_auto_unchanged(self):
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("auto") == "auto"
+        assert _normalize_vision_provider(None) == "auto"
+
+
+class TestResolveProviderClientMainAlias:
+    """resolve_provider_client('main', ...) should resolve to actual main provider."""
+
+    def test_main_resolves_to_named_custom_provider(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "my-model", "provider": "beans"},
+            "custom_providers": [
+                {"name": "beans", "base_url": "http://beans.local/v1", "api_key": "k"},
+            ],
+        })
+        from agent.auxiliary_client import resolve_provider_client
+        client, model = resolve_provider_client("main", "override-model")
+        assert client is not None
+        assert model == "override-model"
+        assert "beans.local" in str(client.base_url)
+
+    def test_main_with_custom_colon_prefix(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "my-model", "provider": "custom:beans"},
+            "custom_providers": [
+                {"name": "beans", "base_url": "http://beans.local/v1", "api_key": "k"},
+            ],
+        })
+        from agent.auxiliary_client import resolve_provider_client
+        client, model = resolve_provider_client("main", "test")
+        assert client is not None
+        assert "beans.local" in str(client.base_url)
+
+
+class TestResolveProviderClientNamedCustom:
+    """resolve_provider_client should resolve named custom providers directly."""
+
+    def test_named_custom_provider(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "test-model"},
+            "custom_providers": [
+                {"name": "beans", "base_url": "http://beans.local/v1", "api_key": "k"},
+            ],
+        })
+        from agent.auxiliary_client import resolve_provider_client
+        client, model = resolve_provider_client("beans", "my-model")
+        assert client is not None
+        assert model == "my-model"
+        assert "beans.local" in str(client.base_url)
+
+    def test_named_custom_provider_default_model(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "main-model"},
+            "custom_providers": [
+                {"name": "beans", "base_url": "http://beans.local/v1", "api_key": "k"},
+            ],
+        })
+        from agent.auxiliary_client import resolve_provider_client
+        client, model = resolve_provider_client("beans")
+        assert client is not None
+        # Should use _read_main_model() fallback
+        assert model == "main-model"
+
+    def test_named_custom_no_api_key_uses_fallback(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "test"},
+            "custom_providers": [
+                {"name": "local", "base_url": "http://localhost:8080/v1"},
+            ],
+        })
+        from agent.auxiliary_client import resolve_provider_client
+        client, model = resolve_provider_client("local", "test")
+        assert client is not None
+        # no-key-required should be used
+
+    def test_nonexistent_named_custom_falls_through(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "test"},
+            "custom_providers": [
+                {"name": "beans", "base_url": "http://beans.local/v1"},
+            ],
+        })
+        from agent.auxiliary_client import resolve_provider_client
+        # "coffee" doesn't exist in custom_providers
+        client, model = resolve_provider_client("coffee", "test")
+        assert client is None
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 2e26a9f6..468806b8 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -553,7 +553,7 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t
 
 When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL.
 
-Available providers: `auto`, `openrouter`, `nous`, `codex`, `copilot`, `anthropic`, `main`, `zai`, `kimi-coding`, `minimax`, and any provider registered in the [provider registry](/docs/reference/environment-variables).
+Available providers: `auto`, `openrouter`, `nous`, `codex`, `copilot`, `anthropic`, `main`, `zai`, `kimi-coding`, `minimax`, any provider registered in the [provider registry](/docs/reference/environment-variables), or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
 
 ### Full auxiliary config reference
 
@@ -704,7 +704,7 @@ auxiliary:
     model: "my-local-model"
 ```
 
-`provider: "main"` follows the same custom endpoint Hermes uses for normal chat. That endpoint can be set directly with `OPENAI_BASE_URL`, or saved once through `hermes model` and persisted in `config.yaml`.
+`provider: "main"` uses whatever provider Hermes uses for normal chat — whether that's a named custom provider (e.g. `beans`), a built-in provider like `openrouter`, or a legacy `OPENAI_BASE_URL` endpoint.
 
 :::tip
 If you use Codex OAuth as your main model provider, vision works automatically — no extra configuration needed. Codex is included in the auto-detection chain for vision.

From 2ad769487492ba53e6c986df5b8e387761a83926 Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Wed, 8 Apr 2026 02:15:37 +0800
Subject: [PATCH 113/154] fix(mcp): preserve structured_content in tool call
 results

MCP CallToolResult may include structured_content (a JSON object) alongside
content blocks. The tool handler previously only forwarded concatenated text
from content blocks, silently dropping the structured payload.

This breaks MCP tools that return a minimal human text in content while
putting the actual machine-usable payload in structured_content.

Now, when structured_content is present, it is included in the returned
JSON under the 'structuredContent' key.

Fixes NousResearch/hermes-agent#5874
---
 tools/mcp_tool.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index 1ff42e77..c055e44f 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1253,7 +1253,16 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
             for block in (result.content or []):
                 if hasattr(block, "text"):
                     parts.append(block.text)
-            return json.dumps({"result": "\n".join(parts) if parts else ""})
+            text_result = "\n".join(parts) if parts else ""
+
+            # Preserve structured_content (structuredContent) if present
+            structured = getattr(result, "structured_content", None)
+            if structured is not None:
+                return json.dumps({
+                    "result": text_result,
+                    "structuredContent": structured,
+                })
+            return json.dumps({"result": text_result})
 
         try:
             return _run_on_mcp_loop(_call(), timeout=tool_timeout)

From 363c5bc3c3daa04e24d6a31bc111ec18c6d9b1fa Mon Sep 17 00:00:00 2001
From: r266-tech <r2668940489@gmail.com>
Date: Wed, 8 Apr 2026 02:15:43 +0800
Subject: [PATCH 114/154] test(mcp): add structured_content preservation tests

---
 tests/test_mcp_structured_content.py | 100 +++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)
 create mode 100644 tests/test_mcp_structured_content.py

diff --git a/tests/test_mcp_structured_content.py b/tests/test_mcp_structured_content.py
new file mode 100644
index 00000000..3041681e
--- /dev/null
+++ b/tests/test_mcp_structured_content.py
@@ -0,0 +1,100 @@
+"""Tests for MCP tool structured_content preservation."""
+
+import json
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from tools import mcp_tool
+
+
+class _FakeContentBlock:
+    """Minimal content block with .text and .type attributes."""
+
+    def __init__(self, text: str, block_type: str = "text"):
+        self.text = text
+        self.type = block_type
+
+
+class _FakeCallToolResult:
+    """Minimal CallToolResult stand-in."""
+
+    def __init__(self, content, is_error=False, structured_content=None):
+        self.content = content
+        self.isError = is_error
+        self.structured_content = structured_content
+
+
+@pytest.fixture
+def _patch_mcp_server():
+    """Patch _servers and the MCP event loop so _make_tool_handler can run."""
+    fake_session = MagicMock()
+    fake_server = SimpleNamespace(session=fake_session)
+    with patch.dict(mcp_tool._servers, {"test-server": fake_server}):
+        yield fake_session
+
+
+class TestStructuredContentPreservation:
+    """Ensure structured_content from CallToolResult is forwarded."""
+
+    def test_text_only_result(self, _patch_mcp_server):
+        """When no structured_content, result is text-only (existing behaviour)."""
+        session = _patch_mcp_server
+        session.call_tool = AsyncMock(
+            return_value=_FakeCallToolResult(
+                content=[_FakeContentBlock("hello")],
+            )
+        )
+        handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
+        raw = handler({})
+        data = json.loads(raw)
+        assert data == {"result": "hello"}
+        assert "structuredContent" not in data
+
+    def test_structured_content_included(self, _patch_mcp_server):
+        """When structured_content is present, it must appear in the response."""
+        session = _patch_mcp_server
+        payload = {"value": "secret-123", "revealed": True}
+        session.call_tool = AsyncMock(
+            return_value=_FakeCallToolResult(
+                content=[_FakeContentBlock("OK")],
+                structured_content=payload,
+            )
+        )
+        handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
+        raw = handler({})
+        data = json.loads(raw)
+        assert data["result"] == "OK"
+        assert data["structuredContent"] == payload
+
+    def test_structured_content_none_omitted(self, _patch_mcp_server):
+        """When structured_content is explicitly None, key is omitted."""
+        session = _patch_mcp_server
+        session.call_tool = AsyncMock(
+            return_value=_FakeCallToolResult(
+                content=[_FakeContentBlock("done")],
+                structured_content=None,
+            )
+        )
+        handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
+        raw = handler({})
+        data = json.loads(raw)
+        assert data == {"result": "done"}
+        assert "structuredContent" not in data
+
+    def test_empty_text_with_structured_content(self, _patch_mcp_server):
+        """When content blocks are empty but structured_content exists."""
+        session = _patch_mcp_server
+        payload = {"status": "ok", "data": [1, 2, 3]}
+        session.call_tool = AsyncMock(
+            return_value=_FakeCallToolResult(
+                content=[],
+                structured_content=payload,
+            )
+        )
+        handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
+        raw = handler({})
+        data = json.loads(raw)
+        assert data["result"] == ""
+        assert data["structuredContent"] == payload

From b9a5e6e247ac3292fcefcc3fa1e75b0c031f8dfb Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Tue, 7 Apr 2026 17:48:30 -0700
Subject: [PATCH 115/154] fix: use camelCase structuredContent attr, prefer
 structured over text
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- The MCP SDK Pydantic model uses camelCase (structuredContent), not
  snake_case (structured_content). The original getattr was a silent no-op.
- When structuredContent is present, return it AS the result instead of
  alongside text — the structured payload is the machine-readable data.
- Move test file to tests/tools/ and fix fake class to use camelCase.
- Patch _run_on_mcp_loop in tests so the handler actually executes.
---
 .../test_mcp_structured_content.py            | 53 +++++++++++--------
 tools/mcp_tool.py                             |  9 ++--
 2 files changed, 35 insertions(+), 27 deletions(-)
 rename tests/{ => tools}/test_mcp_structured_content.py (62%)

diff --git a/tests/test_mcp_structured_content.py b/tests/tools/test_mcp_structured_content.py
similarity index 62%
rename from tests/test_mcp_structured_content.py
rename to tests/tools/test_mcp_structured_content.py
index 3041681e..fa10f8d5 100644
--- a/tests/test_mcp_structured_content.py
+++ b/tests/tools/test_mcp_structured_content.py
@@ -1,5 +1,6 @@
-"""Tests for MCP tool structured_content preservation."""
+"""Tests for MCP tool structuredContent preservation."""
 
+import asyncio
 import json
 from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -18,12 +19,25 @@ class _FakeContentBlock:
 
 
 class _FakeCallToolResult:
-    """Minimal CallToolResult stand-in."""
+    """Minimal CallToolResult stand-in.
 
-    def __init__(self, content, is_error=False, structured_content=None):
+    Uses camelCase ``structuredContent`` / ``isError`` to match the real
+    MCP SDK Pydantic model (``mcp.types.CallToolResult``).
+    """
+
+    def __init__(self, content, is_error=False, structuredContent=None):
         self.content = content
         self.isError = is_error
-        self.structured_content = structured_content
+        self.structuredContent = structuredContent
+
+
+def _fake_run_on_mcp_loop(coro, timeout=30):
+    """Run an MCP coroutine directly in a fresh event loop."""
+    loop = asyncio.new_event_loop()
+    try:
+        return loop.run_until_complete(coro)
+    finally:
+        loop.close()
 
 
 @pytest.fixture
@@ -31,15 +45,16 @@ def _patch_mcp_server():
     """Patch _servers and the MCP event loop so _make_tool_handler can run."""
     fake_session = MagicMock()
     fake_server = SimpleNamespace(session=fake_session)
-    with patch.dict(mcp_tool._servers, {"test-server": fake_server}):
+    with patch.dict(mcp_tool._servers, {"test-server": fake_server}), \
+         patch("tools.mcp_tool._run_on_mcp_loop", side_effect=_fake_run_on_mcp_loop):
         yield fake_session
 
 
 class TestStructuredContentPreservation:
-    """Ensure structured_content from CallToolResult is forwarded."""
+    """Ensure structuredContent from CallToolResult is forwarded."""
 
     def test_text_only_result(self, _patch_mcp_server):
-        """When no structured_content, result is text-only (existing behaviour)."""
+        """When no structuredContent, result is text-only (existing behaviour)."""
         session = _patch_mcp_server
         session.call_tool = AsyncMock(
             return_value=_FakeCallToolResult(
@@ -50,51 +65,47 @@ class TestStructuredContentPreservation:
         raw = handler({})
         data = json.loads(raw)
         assert data == {"result": "hello"}
-        assert "structuredContent" not in data
 
-    def test_structured_content_included(self, _patch_mcp_server):
-        """When structured_content is present, it must appear in the response."""
+    def test_structured_content_is_the_result(self, _patch_mcp_server):
+        """When structuredContent is present, it becomes the result directly."""
         session = _patch_mcp_server
         payload = {"value": "secret-123", "revealed": True}
         session.call_tool = AsyncMock(
             return_value=_FakeCallToolResult(
                 content=[_FakeContentBlock("OK")],
-                structured_content=payload,
+                structuredContent=payload,
             )
         )
         handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
         raw = handler({})
         data = json.loads(raw)
-        assert data["result"] == "OK"
-        assert data["structuredContent"] == payload
+        assert data["result"] == payload
 
-    def test_structured_content_none_omitted(self, _patch_mcp_server):
-        """When structured_content is explicitly None, key is omitted."""
+    def test_structured_content_none_falls_back_to_text(self, _patch_mcp_server):
+        """When structuredContent is explicitly None, fall back to text."""
         session = _patch_mcp_server
         session.call_tool = AsyncMock(
             return_value=_FakeCallToolResult(
                 content=[_FakeContentBlock("done")],
-                structured_content=None,
+                structuredContent=None,
             )
         )
         handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
         raw = handler({})
         data = json.loads(raw)
         assert data == {"result": "done"}
-        assert "structuredContent" not in data
 
     def test_empty_text_with_structured_content(self, _patch_mcp_server):
-        """When content blocks are empty but structured_content exists."""
+        """When content blocks are empty but structuredContent exists."""
         session = _patch_mcp_server
         payload = {"status": "ok", "data": [1, 2, 3]}
         session.call_tool = AsyncMock(
             return_value=_FakeCallToolResult(
                 content=[],
-                structured_content=payload,
+                structuredContent=payload,
             )
         )
         handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
         raw = handler({})
         data = json.loads(raw)
-        assert data["result"] == ""
-        assert data["structuredContent"] == payload
+        assert data["result"] == payload
diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py
index c055e44f..d0b3263b 100644
--- a/tools/mcp_tool.py
+++ b/tools/mcp_tool.py
@@ -1255,13 +1255,10 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float):
                     parts.append(block.text)
             text_result = "\n".join(parts) if parts else ""
 
-            # Preserve structured_content (structuredContent) if present
-            structured = getattr(result, "structured_content", None)
+            # Prefer structuredContent (machine-readable JSON) over plain text
+            structured = getattr(result, "structuredContent", None)
             if structured is not None:
-                return json.dumps({
-                    "result": text_result,
-                    "structuredContent": structured,
-                })
+                return json.dumps({"result": structured})
             return json.dumps({"result": text_result})
 
         try:

From 7fe6782a25f4aeb6b792162c946cba825813beef Mon Sep 17 00:00:00 2001
From: Jonathan Barket <jbarket@sleepunit.com>
Date: Mon, 6 Apr 2026 23:03:14 -0500
Subject: [PATCH 116/154] feat(tools): add "no_mcp" sentinel to exclude MCP
 servers per platform
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, MCP servers are included on all platforms by default. If a
platform's toolset list does not explicitly name any MCP servers, every
globally enabled MCP server is injected. There is no way to opt a
platform out of MCP servers entirely.

This matters for the API server platform when used as an execution
backend — each spawned agent session gets the full MCP tool schema
injected into its system prompt, dramatically inflating token usage
(e.g. 57K tokens vs 9K without MCP tools) and slowing response times.

Add a "no_mcp" sentinel value for platform_toolsets. When present in a
platform's toolset list, all MCP servers are excluded for that platform.
Other platforms are unaffected.

Usage in config.yaml:

    platform_toolsets:
      api_server:
        - terminal
        - file
        - web
        - no_mcp    # exclude all MCP servers

The sentinel is filtered out of the final toolset — it does not appear
as an actual toolset name.
---
 hermes_cli/tools_config.py            | 12 ++++++---
 tests/hermes_cli/test_tools_config.py | 39 +++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py
index cddc664b..65525d27 100644
--- a/hermes_cli/tools_config.py
+++ b/hermes_cli/tools_config.py
@@ -554,6 +554,7 @@ def _get_platform_tools(
     # MCP servers are expected to be available on all platforms by default.
     # If the platform explicitly lists one or more MCP server names, treat that
     # as an allowlist. Otherwise include every globally enabled MCP server.
+    # Special sentinel: "no_mcp" in the toolset list disables all MCP servers.
     mcp_servers = config.get("mcp_servers") or {}
     enabled_mcp_servers = {
         name
@@ -561,10 +562,15 @@ def _get_platform_tools(
         if isinstance(server_cfg, dict)
         and _parse_enabled_flag(server_cfg.get("enabled", True), default=True)
     }
-    explicit_mcp_servers = explicit_passthrough & enabled_mcp_servers
-    enabled_toolsets.update(explicit_passthrough - enabled_mcp_servers)
+    # Allow "no_mcp" sentinel to opt out of all MCP servers for this platform
+    if "no_mcp" in toolset_names:
+        explicit_mcp_servers = set()
+        enabled_toolsets.update(explicit_passthrough - enabled_mcp_servers - {"no_mcp"})
+    else:
+        explicit_mcp_servers = explicit_passthrough & enabled_mcp_servers
+        enabled_toolsets.update(explicit_passthrough - enabled_mcp_servers)
     if include_default_mcp_servers:
-        if explicit_mcp_servers:
+        if explicit_mcp_servers or "no_mcp" in toolset_names:
             enabled_toolsets.update(explicit_mcp_servers)
         else:
             enabled_toolsets.update(enabled_mcp_servers)
diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py
index b02b3c1f..7371c89d 100644
--- a/tests/hermes_cli/test_tools_config.py
+++ b/tests/hermes_cli/test_tools_config.py
@@ -72,6 +72,45 @@ def test_get_platform_tools_keeps_enabled_mcp_servers_with_explicit_builtin_sele
     assert "web-search-prime" in enabled
 
 
+def test_get_platform_tools_no_mcp_sentinel_excludes_all_mcp_servers():
+    """The 'no_mcp' sentinel in platform_toolsets excludes all MCP servers."""
+    config = {
+        "platform_toolsets": {"cli": ["web", "terminal", "no_mcp"]},
+        "mcp_servers": {
+            "exa": {"url": "https://mcp.exa.ai/mcp"},
+            "web-search-prime": {"url": "https://api.z.ai/api/mcp/web_search_prime/mcp"},
+        },
+    }
+
+    enabled = _get_platform_tools(config, "cli")
+
+    assert "web" in enabled
+    assert "terminal" in enabled
+    assert "exa" not in enabled
+    assert "web-search-prime" not in enabled
+    assert "no_mcp" not in enabled
+
+
+def test_get_platform_tools_no_mcp_sentinel_does_not_affect_other_platforms():
+    """The 'no_mcp' sentinel only affects the platform it's configured on."""
+    config = {
+        "platform_toolsets": {
+            "api_server": ["web", "terminal", "no_mcp"],
+        },
+        "mcp_servers": {
+            "exa": {"url": "https://mcp.exa.ai/mcp"},
+        },
+    }
+
+    # api_server should exclude MCP
+    api_enabled = _get_platform_tools(config, "api_server")
+    assert "exa" not in api_enabled
+
+    # cli (not configured with no_mcp) should include MCP
+    cli_enabled = _get_platform_tools(config, "cli")
+    assert "exa" in cli_enabled
+
+
 def test_toolset_has_keys_for_vision_accepts_codex_auth(tmp_path, monkeypatch):
     monkeypatch.setenv("HERMES_HOME", str(tmp_path))
     (tmp_path / "auth.json").write_text(

From 8d7a98d2ff3f78077e8efad5d8264c9488a7d4ba Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 21:41:05 -0700
Subject: [PATCH 117/154] feat: use mimo-v2-pro for non-vision auxiliary tasks
 on Nous free tier (#6018)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Free-tier Nous Portal users were getting mimo-v2-omni (a multimodal
model) for all auxiliary tasks including compression, session search,
and web extraction. Now routes non-vision tasks to mimo-v2-pro (a
text model) which is better suited for those workloads.

- Added _NOUS_FREE_TIER_AUX_MODEL constant for text auxiliary tasks
- _try_nous() accepts vision=False param to select the right model
- Vision path (_resolve_strict_vision_backend) passes vision=True
- All other callers default to vision=False → mimo-v2-pro
---
 agent/auxiliary_client.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 49a78458..b9059b69 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -92,6 +92,7 @@ auxiliary_is_nous: bool = False
 _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
 _NOUS_MODEL = "google/gemini-3-flash-preview"
 _NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni"
+_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
 _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
@@ -713,7 +714,7 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
                    default_headers=_OR_HEADERS), _OPENROUTER_MODEL
 
 
-def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
+def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
     nous = _read_nous_auth()
     if not nous:
         return None, None
@@ -725,12 +726,13 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
     else:
         model = _NOUS_MODEL
     # Free-tier users can't use paid auxiliary models — use the free
-    # multimodal model instead so vision/browser-vision still works.
+    # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
     try:
         from hermes_cli.models import check_nous_free_tier
         if check_nous_free_tier():
-            model = _NOUS_FREE_TIER_VISION_MODEL
-            logger.debug("Free-tier Nous account — using %s for auxiliary/vision", model)
+            model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
+            logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
+                         model, "vision" if vision else "text")
     except Exception:
         pass
     return (
@@ -1400,7 +1402,7 @@ def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Option
     if provider == "openrouter":
         return _try_openrouter()
     if provider == "nous":
-        return _try_nous()
+        return _try_nous(vision=True)
     if provider == "openai-codex":
         return _try_codex()
     if provider == "anthropic":

From 5c03f2e7cc4e24567b104f9a665b5845dfc454d4 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 22:23:28 -0700
Subject: [PATCH 118/154] =?UTF-8?q?fix:=20provider/model=20resolution=20?=
 =?UTF-8?q?=E2=80=94=20salvage=204=20PRs=20+=20MiniMax=20aux=20URL=20fix?=
 =?UTF-8?q?=20(#5983)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Salvaged fixes from community PRs:

- fix(model_switch): _read_auth_store → _load_auth_store + fix auth store
  key lookup (was checking top-level dict instead of store['providers']).
  OAuth providers now correctly detected in /model picker.
  Cherry-picked from PR #5911 by Xule Lin (linxule).

- fix(ollama): pass num_ctx to override 2048 default context window.
  Ollama defaults to 2048 context regardless of model capabilities. Now
  auto-detects from /api/show metadata and injects num_ctx into every
  request. Config override via model.ollama_num_ctx. Fixes #2708.
  Cherry-picked from PR #5929 by kshitij (kshitijk4poor).

- fix(aux): normalize provider aliases for vision/auxiliary routing.
  Adds _normalize_aux_provider() with 17 aliases (google→gemini,
  claude→anthropic, glm→zai, etc). Fixes vision routing failure when
  provider is set to 'google' instead of 'gemini'.
  Cherry-picked from PR #5793 by e11i (Elizabeth1979).

- fix(aux): rewrite MiniMax /anthropic base URLs to /v1 for OpenAI SDK.
  MiniMax's inference_base_url ends in /anthropic (Anthropic Messages API),
  but auxiliary client uses OpenAI SDK which appends /chat/completions →
  404 at /anthropic/chat/completions. Generic _to_openai_base_url() helper
  rewrites terminal /anthropic to /v1 for OpenAI-compatible endpoint.
  Inspired by PR #5786 by Lempkey.

Added debug logging to silent exception blocks across all fixes.

Co-authored-by: Hermes Agent <hermes@nousresearch.com>
---
 agent/auxiliary_client.py                 |  90 ++++++++++-----
 agent/model_metadata.py                   |  53 +++++++++
 hermes_cli/model_switch.py                |  10 +-
 run_agent.py                              |  37 ++++++
 tests/agent/test_auxiliary_client.py      |  42 +++++++
 tests/agent/test_minimax_auxiliary_url.py |  42 +++++++
 tests/test_ollama_num_ctx.py              | 135 ++++++++++++++++++++++
 7 files changed, 378 insertions(+), 31 deletions(-)
 create mode 100644 tests/agent/test_minimax_auxiliary_url.py
 create mode 100644 tests/test_ollama_num_ctx.py

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index b9059b69..4b156a4e 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -59,6 +59,41 @@ from hermes_constants import OPENROUTER_BASE_URL
 
 logger = logging.getLogger(__name__)
 
+_PROVIDER_ALIASES = {
+    "google": "gemini",
+    "google-gemini": "gemini",
+    "google-ai-studio": "gemini",
+    "glm": "zai",
+    "z-ai": "zai",
+    "z.ai": "zai",
+    "zhipu": "zai",
+    "kimi": "kimi-coding",
+    "moonshot": "kimi-coding",
+    "minimax-china": "minimax-cn",
+    "minimax_cn": "minimax-cn",
+    "claude": "anthropic",
+    "claude-code": "anthropic",
+}
+
+
+def _normalize_aux_provider(provider: Optional[str], *, for_vision: bool = False) -> str:
+    normalized = (provider or "auto").strip().lower()
+    if normalized.startswith("custom:"):
+        suffix = normalized.split(":", 1)[1].strip()
+        if not suffix:
+            return "custom"
+        normalized = suffix if not for_vision else "custom"
+    if normalized == "codex":
+        return "openai-codex"
+    if normalized == "main":
+        # Resolve to the user's actual main provider so named custom providers
+        # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
+        main_prov = _read_main_provider()
+        if main_prov and main_prov not in ("auto", "main", ""):
+            return main_prov
+        return "custom"
+    return _PROVIDER_ALIASES.get(normalized, normalized)
+
 # Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
 _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "gemini": "gemini-3-flash-preview",
@@ -106,6 +141,23 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 
 
+def _to_openai_base_url(base_url: str) -> str:
+    """Normalize an Anthropic-style base URL to OpenAI-compatible format.
+
+    Some providers (MiniMax, MiniMax-CN) expose an ``/anthropic`` endpoint for
+    the Anthropic Messages API and a separate ``/v1`` endpoint for OpenAI chat
+    completions.  The auxiliary client uses the OpenAI SDK, so it must hit the
+    ``/v1`` surface.  Passing the raw ``inference_base_url`` causes requests to
+    land on ``/anthropic/chat/completions`` — a 404.
+    """
+    url = str(base_url or "").strip().rstrip("/")
+    if url.endswith("/anthropic"):
+        rewritten = url[: -len("/anthropic")] + "/v1"
+        logger.debug("Auxiliary client: rewrote base URL %s → %s", url, rewritten)
+        return rewritten
+    return url
+
+
 def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
     """Return (pool_exists_for_provider, selected_entry)."""
     try:
@@ -635,7 +687,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
             if not api_key:
                 continue
 
-            base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            base_url = _to_openai_base_url(
+                _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            )
             model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
             logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
             extra = {}
@@ -652,7 +706,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
         if not api_key:
             continue
 
-        base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        base_url = _to_openai_base_url(
+            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        )
         model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
         logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
         extra = {}
@@ -778,7 +834,7 @@ def _read_main_provider() -> str:
         if isinstance(model_cfg, dict):
             provider = model_cfg.get("provider", "")
             if isinstance(provider, str) and provider.strip():
-                return provider.strip().lower()
+                return _normalize_aux_provider(provider)
     except Exception:
         pass
     return ""
@@ -1140,17 +1196,7 @@ def resolve_provider_client(
         (client, resolved_model) or (None, None) if auth is unavailable.
     """
     # Normalise aliases
-    provider = (provider or "auto").strip().lower()
-    if provider == "codex":
-        provider = "openai-codex"
-    if provider == "main":
-        # Resolve to the user's actual main provider so named custom providers
-        # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
-        main_prov = _read_main_provider()
-        if main_prov and main_prov not in ("auto", "main", ""):
-            provider = main_prov
-        else:
-            provider = "custom"
+    provider = _normalize_aux_provider(provider)
 
     # ── Auto: try all providers in priority order ────────────────────
     if provider == "auto":
@@ -1300,7 +1346,9 @@ def resolve_provider_client(
                          provider, ", ".join(tried_sources))
             return None, None
 
-        base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        base_url = _to_openai_base_url(
+            str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
+        )
 
         default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
         final_model = model or default_model
@@ -1384,17 +1432,7 @@ _VISION_AUTO_PROVIDER_ORDER = (
 
 
 def _normalize_vision_provider(provider: Optional[str]) -> str:
-    provider = (provider or "auto").strip().lower()
-    if provider == "codex":
-        return "openai-codex"
-    if provider == "main":
-        # Resolve to actual main provider — named custom providers and
-        # non-aggregator providers need to pass through as their real name.
-        main_prov = _read_main_provider()
-        if main_prov and main_prov not in ("auto", "main", ""):
-            return main_prov
-        return "custom"
-    return provider
+    return _normalize_aux_provider(provider, for_vision=True)
 
 
 def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 50245a7c..a5fb11f5 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -611,6 +611,59 @@ def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
     return False
 
 
+def query_ollama_num_ctx(model: str, base_url: str) -> Optional[int]:
+    """Query an Ollama server for the model's context length.
+
+    Returns the model's maximum context from GGUF metadata via ``/api/show``,
+    or the explicit ``num_ctx`` from the Modelfile if set.  Returns None if
+    the server is unreachable or not Ollama.
+
+    This is the value that should be passed as ``num_ctx`` in Ollama chat
+    requests to override the default 2048.
+    """
+    import httpx
+
+    bare_model = _strip_provider_prefix(model)
+    server_url = base_url.rstrip("/")
+    if server_url.endswith("/v1"):
+        server_url = server_url[:-3]
+
+    try:
+        server_type = detect_local_server_type(base_url)
+    except Exception:
+        return None
+    if server_type != "ollama":
+        return None
+
+    try:
+        with httpx.Client(timeout=3.0) as client:
+            resp = client.post(f"{server_url}/api/show", json={"name": bare_model})
+            if resp.status_code != 200:
+                return None
+            data = resp.json()
+
+            # Prefer explicit num_ctx from Modelfile parameters (user override)
+            params = data.get("parameters", "")
+            if "num_ctx" in params:
+                for line in params.split("\n"):
+                    if "num_ctx" in line:
+                        parts = line.strip().split()
+                        if len(parts) >= 2:
+                            try:
+                                return int(parts[-1])
+                            except ValueError:
+                                pass
+
+            # Fall back to GGUF model_info context_length (training max)
+            model_info = data.get("model_info", {})
+            for key, value in model_info.items():
+                if "context_length" in key and isinstance(value, (int, float)):
+                    return int(value)
+    except Exception:
+        pass
+    return None
+
+
 def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
     """Query a local server for the model's context length."""
     import httpx
diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py
index 988eeebd..07efbcf4 100644
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -791,12 +791,12 @@ def list_authenticated_providers(
         if overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
             # These use auth stores, not env vars — check for auth.json entries
             try:
-                from hermes_cli.auth import _read_auth_store
-                store = _read_auth_store()
-                if store and pid in store:
+                from hermes_cli.auth import _load_auth_store
+                store = _load_auth_store()
+                if store and (pid in store.get("providers", {}) or pid in store.get("credential_pool", {})):
                     has_creds = True
-            except Exception:
-                pass
+            except Exception as exc:
+                logger.debug("Auth store check failed for %s: %s", pid, exc)
         if not has_creds:
             continue
 
diff --git a/run_agent.py b/run_agent.py
index a0c266aa..343110ec 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -85,6 +85,7 @@ from agent.model_metadata import (
     estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough,
     get_next_probe_tier, parse_context_limit_from_error,
     save_context_length, is_local_endpoint,
+    query_ollama_num_ctx,
 )
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
@@ -1216,6 +1217,33 @@ class AIAgent:
         self.session_cost_status = "unknown"
         self.session_cost_source = "none"
         
+        # ── Ollama num_ctx injection ──
+        # Ollama defaults to 2048 context regardless of the model's capabilities.
+        # When running against an Ollama server, detect the model's max context
+        # and pass num_ctx on every chat request so the full window is used.
+        # User override: set model.ollama_num_ctx in config.yaml to cap VRAM use.
+        self._ollama_num_ctx: int | None = None
+        _ollama_num_ctx_override = None
+        if isinstance(_model_cfg, dict):
+            _ollama_num_ctx_override = _model_cfg.get("ollama_num_ctx")
+        if _ollama_num_ctx_override is not None:
+            try:
+                self._ollama_num_ctx = int(_ollama_num_ctx_override)
+            except (TypeError, ValueError):
+                logger.debug("Invalid ollama_num_ctx config value: %r", _ollama_num_ctx_override)
+        if self._ollama_num_ctx is None and self.base_url and is_local_endpoint(self.base_url):
+            try:
+                _detected = query_ollama_num_ctx(self.model, self.base_url)
+                if _detected and _detected > 0:
+                    self._ollama_num_ctx = _detected
+            except Exception as exc:
+                logger.debug("Ollama num_ctx detection failed: %s", exc)
+        if self._ollama_num_ctx and not self.quiet_mode:
+            logger.info(
+                "Ollama num_ctx: will request %d tokens (model max from /api/show)",
+                self._ollama_num_ctx,
+            )
+
         if not self.quiet_mode:
             if compression_enabled:
                 print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (compress at {int(compression_threshold*100)}% = {self.context_compressor.threshold_tokens:,})")
@@ -5456,6 +5484,15 @@ class AIAgent:
         if _is_nous:
             extra_body["tags"] = ["product=hermes-agent"]
 
+        # Ollama num_ctx: override the 2048 default so the model actually
+        # uses the context window it was trained for.  Passed via the OpenAI
+        # SDK's extra_body → options.num_ctx, which Ollama's OpenAI-compat
+        # endpoint forwards to the runner as --ctx-size.
+        if self._ollama_num_ctx:
+            options = extra_body.get("options", {})
+            options["num_ctx"] = self._ollama_num_ctx
+            extra_body["options"] = options
+
         if extra_body:
             api_kwargs["extra_body"] = extra_body
 
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 32f48198..22da03cf 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -471,6 +471,23 @@ class TestExplicitProviderRouting:
             client, model = resolve_provider_client("zai")
             assert client is not None
 
+    def test_explicit_google_alias_uses_gemini_credentials(self):
+        """provider='google' should route through the gemini API-key provider."""
+        with (
+            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
+                "api_key": "gemini-key",
+                "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
+            }),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            mock_openai.return_value = MagicMock()
+            client, model = resolve_provider_client("google", model="gemini-3.1-pro-preview")
+
+        assert client is not None
+        assert model == "gemini-3.1-pro-preview"
+        assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
+        assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
+
     def test_explicit_unknown_returns_none(self, monkeypatch):
         """Unknown provider should return None."""
         client, model = resolve_provider_client("nonexistent-provider")
@@ -822,6 +839,31 @@ class TestAuxiliaryPoolAwareness:
         assert model == "google/gemini-3-flash-preview"
         assert client is not None
 
+    def test_vision_config_google_provider_uses_gemini_credentials(self, monkeypatch):
+        config = {
+            "auxiliary": {
+                "vision": {
+                    "provider": "google",
+                    "model": "gemini-3.1-pro-preview",
+                }
+            }
+        }
+        monkeypatch.setattr("hermes_cli.config.load_config", lambda: config)
+        with (
+            patch("hermes_cli.auth.resolve_api_key_provider_credentials", return_value={
+                "api_key": "gemini-key",
+                "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
+            }),
+            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+        ):
+            resolved_provider, client, model = resolve_vision_provider_client()
+
+        assert resolved_provider == "gemini"
+        assert client is not None
+        assert model == "gemini-3.1-pro-preview"
+        assert mock_openai.call_args.kwargs["api_key"] == "gemini-key"
+        assert mock_openai.call_args.kwargs["base_url"] == "https://generativelanguage.googleapis.com/v1beta/openai"
+
     def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch):
         """When explicitly forced to 'main', vision CAN use custom endpoint."""
         config = {
diff --git a/tests/agent/test_minimax_auxiliary_url.py b/tests/agent/test_minimax_auxiliary_url.py
new file mode 100644
index 00000000..4444c3aa
--- /dev/null
+++ b/tests/agent/test_minimax_auxiliary_url.py
@@ -0,0 +1,42 @@
+"""Tests for MiniMax auxiliary client URL normalization.
+
+MiniMax and MiniMax-CN set inference_base_url to the /anthropic path.
+The auxiliary client uses the OpenAI SDK, which needs /v1 instead.
+"""
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
+
+from agent.auxiliary_client import _to_openai_base_url
+
+
+class TestToOpenaiBaseUrl:
+    def test_minimax_global_anthropic_suffix_replaced(self):
+        assert _to_openai_base_url("https://api.minimax.io/anthropic") == "https://api.minimax.io/v1"
+
+    def test_minimax_cn_anthropic_suffix_replaced(self):
+        assert _to_openai_base_url("https://api.minimaxi.com/anthropic") == "https://api.minimaxi.com/v1"
+
+    def test_trailing_slash_stripped_before_replace(self):
+        assert _to_openai_base_url("https://api.minimax.io/anthropic/") == "https://api.minimax.io/v1"
+
+    def test_v1_url_unchanged(self):
+        assert _to_openai_base_url("https://api.openai.com/v1") == "https://api.openai.com/v1"
+
+    def test_openrouter_url_unchanged(self):
+        assert _to_openai_base_url("https://openrouter.ai/api/v1") == "https://openrouter.ai/api/v1"
+
+    def test_anthropic_domain_unchanged(self):
+        """api.anthropic.com doesn't end with /anthropic — should be untouched."""
+        assert _to_openai_base_url("https://api.anthropic.com") == "https://api.anthropic.com"
+
+    def test_anthropic_in_subpath_unchanged(self):
+        assert _to_openai_base_url("https://example.com/anthropic/extra") == "https://example.com/anthropic/extra"
+
+    def test_empty_string(self):
+        assert _to_openai_base_url("") == ""
+
+    def test_none(self):
+        assert _to_openai_base_url(None) == ""
diff --git a/tests/test_ollama_num_ctx.py b/tests/test_ollama_num_ctx.py
new file mode 100644
index 00000000..fff0144d
--- /dev/null
+++ b/tests/test_ollama_num_ctx.py
@@ -0,0 +1,135 @@
+"""Tests for Ollama num_ctx context length detection and injection.
+
+Covers:
+  agent/model_metadata.py — query_ollama_num_ctx()
+  run_agent.py — _ollama_num_ctx detection + extra_body injection
+"""
+
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from agent.model_metadata import query_ollama_num_ctx
+
+
+# ═══════════════════════════════════════════════════════════════════════
+# Level 1: query_ollama_num_ctx — Ollama API interaction
+# ═══════════════════════════════════════════════════════════════════════
+
+
+def _mock_httpx_client(show_response_data, status_code=200):
+    """Create a mock httpx.Client context manager that returns given /api/show data."""
+    mock_resp = MagicMock(status_code=status_code)
+    mock_resp.json.return_value = show_response_data
+    mock_client = MagicMock()
+    mock_client.post.return_value = mock_resp
+    mock_ctx = MagicMock()
+    mock_ctx.__enter__ = MagicMock(return_value=mock_client)
+    mock_ctx.__exit__ = MagicMock(return_value=False)
+    return mock_ctx, mock_client
+
+
+class TestQueryOllamaNumCtx:
+    """Test the Ollama /api/show context length query."""
+
+    def test_returns_context_from_model_info(self):
+        """Should extract context_length from GGUF model_info metadata."""
+        show_data = {
+            "model_info": {"llama.context_length": 131072},
+            "parameters": "",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            # httpx is imported inside the function — patch the module import
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("llama3.1:8b", "http://localhost:11434/v1")
+
+        assert result == 131072
+
+    def test_prefers_explicit_num_ctx_from_modelfile(self):
+        """If the Modelfile sets num_ctx explicitly, that should take priority."""
+        show_data = {
+            "model_info": {"llama.context_length": 131072},
+            "parameters": "num_ctx 32768\ntemperature 0.7",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("custom-model", "http://localhost:11434")
+
+        assert result == 32768
+
+    def test_returns_none_for_non_ollama_server(self):
+        """Should return None if the server is not Ollama."""
+        with patch("agent.model_metadata.detect_local_server_type", return_value="lm-studio"):
+            result = query_ollama_num_ctx("model", "http://localhost:1234")
+        assert result is None
+
+    def test_returns_none_on_connection_error(self):
+        """Should return None if the server is unreachable."""
+        with patch("agent.model_metadata.detect_local_server_type", side_effect=Exception("timeout")):
+            result = query_ollama_num_ctx("model", "http://localhost:11434")
+        assert result is None
+
+    def test_returns_none_on_404(self):
+        """Should return None if the model is not found."""
+        mock_ctx, _ = _mock_httpx_client({}, status_code=404)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("nonexistent", "http://localhost:11434")
+
+        assert result is None
+
+    def test_strips_provider_prefix(self):
+        """Should strip 'local:' prefix from model name before querying."""
+        show_data = {
+            "model_info": {"qwen2.context_length": 32768},
+            "parameters": "",
+        }
+        mock_ctx, mock_client = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("local:qwen2.5:7b", "http://localhost:11434/v1")
+
+        # Verify the post was called with stripped name (no "local:" prefix)
+        call_args = mock_client.post.call_args
+        assert call_args[1]["json"]["name"] == "qwen2.5:7b" or call_args[0][1] is not None
+        assert result == 32768
+
+    def test_handles_qwen2_architecture_key(self):
+        """Different model architectures use different key prefixes in model_info."""
+        show_data = {
+            "model_info": {"qwen2.context_length": 65536},
+            "parameters": "",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("qwen2.5:32b", "http://localhost:11434")
+
+        assert result == 65536
+
+    def test_returns_none_when_model_info_empty(self):
+        """Should return None if model_info has no context_length key."""
+        show_data = {
+            "model_info": {"llama.embedding_length": 4096},
+            "parameters": "",
+        }
+        mock_ctx, _ = _mock_httpx_client(show_data)
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"):
+            import httpx
+            with patch.object(httpx, "Client", return_value=mock_ctx):
+                result = query_ollama_num_ctx("model", "http://localhost:11434")
+
+        assert result is None

From 598c25d43edfc85ccc17c81fa8c1d2165097123e Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 22:45:14 -0700
Subject: [PATCH 119/154] feat(feishu): add interactive card approval buttons
 (#6043)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add button-based exec approval to the Feishu adapter, matching the
existing Discord, Telegram, and Slack implementations.

When the agent encounters a dangerous command, Feishu users now see
an interactive card with four buttons instead of text instructions:
- Allow Once (primary)
- Allow Session
- Always Allow
- Deny (danger)

Implementation:
- send_exec_approval() sends an interactive card via the Feishu
  message API with buttons carrying hermes_action in their value dict
- _handle_card_action_event() intercepts approval button clicks
  before routing them as synthetic commands, directly calling
  resolve_gateway_approval() to unblock the agent thread
- _update_approval_card() replaces the orange approval card with a
  green (approved) or red (denied) status card showing who acted
- _approval_state dict tracks pending approval_id → session_key
  mappings; cleaned up on resolution

The gateway's existing routing in _approval_notify_sync already checks
getattr(type(adapter), 'send_exec_approval', None) and will
automatically use the button-based flow for Feishu.

Tests: 16 new tests covering send, callback resolution, state
management, card updates, and non-interference with existing card
actions.
---
 gateway/platforms/feishu.py                   | 148 ++++++
 tests/gateway/test_feishu_approval_buttons.py | 432 ++++++++++++++++++
 2 files changed, 580 insertions(+)
 create mode 100644 tests/gateway/test_feishu_approval_buttons.py

diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py
index 4bc712f2..6012a0f1 100644
--- a/gateway/platforms/feishu.py
+++ b/gateway/platforms/feishu.py
@@ -20,6 +20,7 @@ from __future__ import annotations
 import asyncio
 import hashlib
 import hmac
+import itertools
 import json
 import logging
 import mimetypes
@@ -1052,6 +1053,9 @@ class FeishuAdapter(BasePlatformAdapter):
         self._media_batch_state = FeishuBatchState()
         self._pending_media_batches = self._media_batch_state.events
         self._pending_media_batch_tasks = self._media_batch_state.tasks
+        # Exec approval button state (approval_id → {session_key, message_id, chat_id})
+        self._approval_state: Dict[int, Dict[str, str]] = {}
+        self._approval_counter = itertools.count(1)
         self._load_seen_message_ids()
 
     @staticmethod
@@ -1394,6 +1398,104 @@ class FeishuAdapter(BasePlatformAdapter):
             logger.error("[Feishu] Failed to edit message %s: %s", message_id, exc, exc_info=True)
             return SendResult(success=False, error=str(exc))
 
+    async def send_exec_approval(
+        self, chat_id: str, command: str, session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an interactive card with approval buttons.
+
+        The buttons carry ``hermes_action`` in their value dict so that
+        ``_handle_card_action_event`` can intercept them and call
+        ``resolve_gateway_approval()`` to unblock the waiting agent thread.
+        """
+        if not self._client:
+            return SendResult(success=False, error="Not connected")
+
+        try:
+            approval_id = next(self._approval_counter)
+            cmd_preview = command[:3000] + "..." if len(command) > 3000 else command
+
+            def _btn(label: str, action_name: str, btn_type: str = "default") -> dict:
+                return {
+                    "tag": "button",
+                    "text": {"tag": "plain_text", "content": label},
+                    "type": btn_type,
+                    "value": {"hermes_action": action_name, "approval_id": approval_id},
+                }
+
+            card = {
+                "config": {"wide_screen_mode": True},
+                "header": {
+                    "title": {"content": "⚠️ Command Approval Required", "tag": "plain_text"},
+                    "template": "orange",
+                },
+                "elements": [
+                    {
+                        "tag": "markdown",
+                        "content": f"```\n{cmd_preview}\n```\n**Reason:** {description}",
+                    },
+                    {
+                        "tag": "action",
+                        "actions": [
+                            _btn("✅ Allow Once", "approve_once", "primary"),
+                            _btn("✅ Session", "approve_session"),
+                            _btn("✅ Always", "approve_always"),
+                            _btn("❌ Deny", "deny", "danger"),
+                        ],
+                    },
+                ],
+            }
+
+            payload = json.dumps(card, ensure_ascii=False)
+            response = await self._feishu_send_with_retry(
+                chat_id=chat_id,
+                msg_type="interactive",
+                payload=payload,
+                reply_to=None,
+                metadata=metadata,
+            )
+
+            result = self._finalize_send_result(response, "send_exec_approval failed")
+            if result.success:
+                self._approval_state[approval_id] = {
+                    "session_key": session_key,
+                    "message_id": result.message_id or "",
+                    "chat_id": chat_id,
+                }
+            return result
+        except Exception as exc:
+            logger.warning("[Feishu] send_exec_approval failed: %s", exc)
+            return SendResult(success=False, error=str(exc))
+
+    async def _update_approval_card(
+        self, message_id: str, label: str, user_name: str, choice: str,
+    ) -> None:
+        """Replace the approval card with a resolved status card."""
+        if not self._client or not message_id:
+            return
+        icon = "❌" if choice == "deny" else "✅"
+        card = {
+            "config": {"wide_screen_mode": True},
+            "header": {
+                "title": {"content": f"{icon} {label}", "tag": "plain_text"},
+                "template": "red" if choice == "deny" else "green",
+            },
+            "elements": [
+                {
+                    "tag": "markdown",
+                    "content": f"{icon} **{label}** by {user_name}",
+                },
+            ],
+        }
+        try:
+            payload = json.dumps(card, ensure_ascii=False)
+            body = self._build_update_message_body(msg_type="interactive", content=payload)
+            request = self._build_update_message_request(message_id=message_id, request_body=body)
+            await asyncio.to_thread(self._client.im.v1.message.update, request)
+        except Exception as exc:
+            logger.warning("[Feishu] Failed to update approval card %s: %s", message_id, exc)
+
     async def send_voice(
         self,
         chat_id: str,
@@ -1820,6 +1922,52 @@ class FeishuAdapter(BasePlatformAdapter):
         action = getattr(event, "action", None)
         action_tag = str(getattr(action, "tag", "") or "button")
         action_value = getattr(action, "value", {}) or {}
+
+        # --- Exec approval button intercept ---
+        hermes_action = action_value.get("hermes_action") if isinstance(action_value, dict) else None
+        if hermes_action:
+            approval_id = action_value.get("approval_id")
+            state = self._approval_state.pop(approval_id, None)
+            if not state:
+                logger.debug("[Feishu] Approval %s already resolved or unknown", approval_id)
+                return
+
+            choice_map = {
+                "approve_once": "once",
+                "approve_session": "session",
+                "approve_always": "always",
+                "deny": "deny",
+            }
+            choice = choice_map.get(hermes_action, "deny")
+
+            label_map = {
+                "once": "Approved once",
+                "session": "Approved for session",
+                "always": "Approved permanently",
+                "deny": "Denied",
+            }
+            label = label_map.get(choice, "Resolved")
+
+            # Resolve sender name for the status card
+            sender_id = SimpleNamespace(open_id=open_id, user_id=None, union_id=None)
+            sender_profile = await self._resolve_sender_profile(sender_id)
+            user_name = sender_profile.get("user_name") or open_id
+
+            # Resolve the approval — unblocks the agent thread
+            try:
+                from tools.approval import resolve_gateway_approval
+                count = resolve_gateway_approval(state["session_key"], choice)
+                logger.info(
+                    "Feishu button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                    count, state["session_key"], choice, user_name,
+                )
+            except Exception as exc:
+                logger.error("Failed to resolve gateway approval from Feishu button: %s", exc)
+
+            # Update the card to show the decision
+            await self._update_approval_card(state.get("message_id", ""), label, user_name, choice)
+            return
+
         synthetic_text = f"/card {action_tag}"
         if action_value:
             try:
diff --git a/tests/gateway/test_feishu_approval_buttons.py b/tests/gateway/test_feishu_approval_buttons.py
new file mode 100644
index 00000000..9c51d1ac
--- /dev/null
+++ b/tests/gateway/test_feishu_approval_buttons.py
@@ -0,0 +1,432 @@
+"""Tests for Feishu interactive card approval buttons."""
+
+import asyncio
+import json
+import os
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, Mock, patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Ensure the repo root is importable
+# ---------------------------------------------------------------------------
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+# ---------------------------------------------------------------------------
+# Minimal Feishu mock so FeishuAdapter can be imported without lark-oapi
+# ---------------------------------------------------------------------------
+def _ensure_feishu_mocks():
+    """Provide stubs for lark-oapi / aiohttp.web so the import succeeds."""
+    if "lark_oapi" not in sys.modules:
+        mod = MagicMock()
+        for name in (
+            "lark_oapi", "lark_oapi.api.im.v1",
+            "lark_oapi.event", "lark_oapi.event.callback_type",
+        ):
+            sys.modules.setdefault(name, mod)
+    if "aiohttp" not in sys.modules:
+        aio = MagicMock()
+        sys.modules.setdefault("aiohttp", aio)
+        sys.modules.setdefault("aiohttp.web", aio.web)
+
+
+_ensure_feishu_mocks()
+
+from gateway.config import PlatformConfig
+from gateway.platforms.feishu import FeishuAdapter
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter() -> FeishuAdapter:
+    """Create a FeishuAdapter with mocked internals."""
+    config = PlatformConfig(enabled=True)
+    adapter = FeishuAdapter(config)
+    adapter._client = MagicMock()
+    return adapter
+
+
+def _make_card_action_data(
+    action_value: dict,
+    chat_id: str = "oc_12345",
+    open_id: str = "ou_user1",
+    token: str = "tok_abc",
+) -> SimpleNamespace:
+    """Create a mock Feishu card action callback data object."""
+    return SimpleNamespace(
+        event=SimpleNamespace(
+            token=token,
+            context=SimpleNamespace(open_chat_id=chat_id),
+            operator=SimpleNamespace(open_id=open_id),
+            action=SimpleNamespace(
+                tag="button",
+                value=action_value,
+            ),
+        ),
+    )
+
+
+# ===========================================================================
+# send_exec_approval — interactive card with buttons
+# ===========================================================================
+
+class TestFeishuExecApproval:
+    """Test send_exec_approval sends an interactive card."""
+
+    @pytest.mark.asyncio
+    async def test_sends_interactive_card(self):
+        adapter = _make_adapter()
+
+        mock_response = SimpleNamespace(
+            success=lambda: True,
+            data=SimpleNamespace(message_id="msg_001"),
+        )
+        with patch.object(
+            adapter, "_feishu_send_with_retry", new_callable=AsyncMock,
+            return_value=mock_response,
+        ) as mock_send:
+            result = await adapter.send_exec_approval(
+                chat_id="oc_12345",
+                command="rm -rf /important",
+                session_key="agent:main:feishu:group:oc_12345",
+                description="dangerous deletion",
+            )
+
+        assert result.success is True
+        assert result.message_id == "msg_001"
+
+        mock_send.assert_called_once()
+        kwargs = mock_send.call_args[1]
+        assert kwargs["chat_id"] == "oc_12345"
+        assert kwargs["msg_type"] == "interactive"
+
+        # Verify card payload contains the command and buttons
+        card = json.loads(kwargs["payload"])
+        assert card["header"]["template"] == "orange"
+        assert "rm -rf /important" in card["elements"][0]["content"]
+        assert "dangerous deletion" in card["elements"][0]["content"]
+
+        # Check buttons
+        actions = card["elements"][1]["actions"]
+        assert len(actions) == 4
+        action_names = [a["value"]["hermes_action"] for a in actions]
+        assert action_names == [
+            "approve_once", "approve_session", "approve_always", "deny"
+        ]
+
+    @pytest.mark.asyncio
+    async def test_stores_approval_state(self):
+        adapter = _make_adapter()
+
+        mock_response = SimpleNamespace(
+            success=lambda: True,
+            data=SimpleNamespace(message_id="msg_002"),
+        )
+        with patch.object(
+            adapter, "_feishu_send_with_retry", new_callable=AsyncMock,
+            return_value=mock_response,
+        ):
+            await adapter.send_exec_approval(
+                chat_id="oc_12345",
+                command="echo test",
+                session_key="my-session-key",
+            )
+
+        assert len(adapter._approval_state) == 1
+        approval_id = list(adapter._approval_state.keys())[0]
+        state = adapter._approval_state[approval_id]
+        assert state["session_key"] == "my-session-key"
+        assert state["message_id"] == "msg_002"
+        assert state["chat_id"] == "oc_12345"
+
+    @pytest.mark.asyncio
+    async def test_not_connected(self):
+        adapter = _make_adapter()
+        adapter._client = None
+        result = await adapter.send_exec_approval(
+            chat_id="oc_12345", command="ls", session_key="s"
+        )
+        assert result.success is False
+
+    @pytest.mark.asyncio
+    async def test_truncates_long_command(self):
+        adapter = _make_adapter()
+
+        mock_response = SimpleNamespace(
+            success=lambda: True,
+            data=SimpleNamespace(message_id="msg_003"),
+        )
+        with patch.object(
+            adapter, "_feishu_send_with_retry", new_callable=AsyncMock,
+            return_value=mock_response,
+        ) as mock_send:
+            long_cmd = "x" * 5000
+            await adapter.send_exec_approval(
+                chat_id="oc_12345", command=long_cmd, session_key="s"
+            )
+
+        card = json.loads(mock_send.call_args[1]["payload"])
+        content = card["elements"][0]["content"]
+        assert "..." in content
+        assert len(content) < 5000
+
+    @pytest.mark.asyncio
+    async def test_multiple_approvals_get_unique_ids(self):
+        adapter = _make_adapter()
+
+        mock_response = SimpleNamespace(
+            success=lambda: True,
+            data=SimpleNamespace(message_id="msg_x"),
+        )
+        with patch.object(
+            adapter, "_feishu_send_with_retry", new_callable=AsyncMock,
+            return_value=mock_response,
+        ):
+            await adapter.send_exec_approval(
+                chat_id="oc_1", command="cmd1", session_key="s1"
+            )
+            await adapter.send_exec_approval(
+                chat_id="oc_2", command="cmd2", session_key="s2"
+            )
+
+        assert len(adapter._approval_state) == 2
+        ids = list(adapter._approval_state.keys())
+        assert ids[0] != ids[1]
+
+
+# ===========================================================================
+# _handle_card_action_event — approval button clicks
+# ===========================================================================
+
+class TestFeishuApprovalCallback:
+    """Test the approval intercept in _handle_card_action_event."""
+
+    @pytest.mark.asyncio
+    async def test_resolves_approval_on_click(self):
+        adapter = _make_adapter()
+        adapter._approval_state[1] = {
+            "session_key": "agent:main:feishu:group:oc_12345",
+            "message_id": "msg_001",
+            "chat_id": "oc_12345",
+        }
+
+        data = _make_card_action_data(
+            action_value={"hermes_action": "approve_once", "approval_id": 1},
+        )
+
+        with (
+            patch.object(
+                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
+                return_value={"user_id": "ou_user1", "user_name": "Norbert", "user_id_alt": None},
+            ),
+            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock) as mock_update,
+            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
+        ):
+            await adapter._handle_card_action_event(data)
+
+        mock_resolve.assert_called_once_with("agent:main:feishu:group:oc_12345", "once")
+        mock_update.assert_called_once_with("msg_001", "Approved once", "Norbert", "once")
+
+        # State should be cleaned up
+        assert 1 not in adapter._approval_state
+
+    @pytest.mark.asyncio
+    async def test_deny_button(self):
+        adapter = _make_adapter()
+        adapter._approval_state[2] = {
+            "session_key": "some-session",
+            "message_id": "msg_002",
+            "chat_id": "oc_12345",
+        }
+
+        data = _make_card_action_data(
+            action_value={"hermes_action": "deny", "approval_id": 2},
+            token="tok_deny",
+        )
+
+        with (
+            patch.object(
+                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
+                return_value={"user_id": "ou_alice", "user_name": "Alice", "user_id_alt": None},
+            ),
+            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock) as mock_update,
+            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
+        ):
+            await adapter._handle_card_action_event(data)
+
+        mock_resolve.assert_called_once_with("some-session", "deny")
+        mock_update.assert_called_once_with("msg_002", "Denied", "Alice", "deny")
+
+    @pytest.mark.asyncio
+    async def test_session_approval(self):
+        adapter = _make_adapter()
+        adapter._approval_state[3] = {
+            "session_key": "sess-3",
+            "message_id": "msg_003",
+            "chat_id": "oc_99",
+        }
+
+        data = _make_card_action_data(
+            action_value={"hermes_action": "approve_session", "approval_id": 3},
+            token="tok_ses",
+        )
+
+        with (
+            patch.object(
+                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
+                return_value={"user_id": "ou_u", "user_name": "Bob", "user_id_alt": None},
+            ),
+            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock) as mock_update,
+            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
+        ):
+            await adapter._handle_card_action_event(data)
+
+        mock_resolve.assert_called_once_with("sess-3", "session")
+        mock_update.assert_called_once_with("msg_003", "Approved for session", "Bob", "session")
+
+    @pytest.mark.asyncio
+    async def test_always_approval(self):
+        adapter = _make_adapter()
+        adapter._approval_state[4] = {
+            "session_key": "sess-4",
+            "message_id": "msg_004",
+            "chat_id": "oc_55",
+        }
+
+        data = _make_card_action_data(
+            action_value={"hermes_action": "approve_always", "approval_id": 4},
+            token="tok_alw",
+        )
+
+        with (
+            patch.object(
+                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
+                return_value={"user_id": "ou_u", "user_name": "Carol", "user_id_alt": None},
+            ),
+            patch.object(adapter, "_update_approval_card", new_callable=AsyncMock),
+            patch("tools.approval.resolve_gateway_approval", return_value=1) as mock_resolve,
+        ):
+            await adapter._handle_card_action_event(data)
+
+        mock_resolve.assert_called_once_with("sess-4", "always")
+
+    @pytest.mark.asyncio
+    async def test_already_resolved_drops_silently(self):
+        adapter = _make_adapter()
+        # No state for approval_id 99 — already resolved
+
+        data = _make_card_action_data(
+            action_value={"hermes_action": "approve_once", "approval_id": 99},
+            token="tok_gone",
+        )
+
+        with patch("tools.approval.resolve_gateway_approval") as mock_resolve:
+            await adapter._handle_card_action_event(data)
+
+        # Should NOT resolve — already handled
+        mock_resolve.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_non_approval_actions_route_normally(self):
+        """Non-approval card actions should still become synthetic commands."""
+        adapter = _make_adapter()
+
+        data = _make_card_action_data(
+            action_value={"custom_action": "something_else"},
+            token="tok_normal",
+        )
+
+        with (
+            patch.object(
+                adapter, "_resolve_sender_profile", new_callable=AsyncMock,
+                return_value={"user_id": "ou_u", "user_name": "Dave", "user_id_alt": None},
+            ),
+            patch.object(adapter, "get_chat_info", new_callable=AsyncMock, return_value={"name": "Test Chat"}),
+            patch.object(adapter, "_handle_message_with_guards", new_callable=AsyncMock) as mock_handle,
+            patch("tools.approval.resolve_gateway_approval") as mock_resolve,
+        ):
+            await adapter._handle_card_action_event(data)
+
+        # Should NOT resolve any approval
+        mock_resolve.assert_not_called()
+        # Should have routed as synthetic command
+        mock_handle.assert_called_once()
+        event = mock_handle.call_args[0][0]
+        assert "/card button" in event.text
+
+
+# ===========================================================================
+# _update_approval_card — card replacement after resolution
+# ===========================================================================
+
+class TestFeishuUpdateApprovalCard:
+    """Test the card update after approval resolution."""
+
+    @pytest.mark.asyncio
+    async def test_updates_card_on_approve(self):
+        adapter = _make_adapter()
+
+        mock_update = AsyncMock()
+        adapter._client.im.v1.message.update = MagicMock()
+
+        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
+            await adapter._update_approval_card(
+                "msg_001", "Approved once", "Norbert", "once"
+            )
+
+        mock_thread.assert_called_once()
+        # Verify the update request was built
+        call_args = mock_thread.call_args
+        assert call_args[0][0] == adapter._client.im.v1.message.update
+
+    @pytest.mark.asyncio
+    async def test_updates_card_on_deny(self):
+        adapter = _make_adapter()
+
+        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
+            await adapter._update_approval_card(
+                "msg_002", "Denied", "Alice", "deny"
+            )
+
+        mock_thread.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_skips_update_when_not_connected(self):
+        adapter = _make_adapter()
+        adapter._client = None
+
+        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
+            await adapter._update_approval_card(
+                "msg_001", "Approved", "Bob", "once"
+            )
+
+        mock_thread.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_skips_update_when_no_message_id(self):
+        adapter = _make_adapter()
+
+        with patch("asyncio.to_thread", new_callable=AsyncMock) as mock_thread:
+            await adapter._update_approval_card(
+                "", "Approved", "Bob", "once"
+            )
+
+        mock_thread.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_swallows_update_errors(self):
+        adapter = _make_adapter()
+
+        with patch("asyncio.to_thread", new_callable=AsyncMock, side_effect=Exception("API error")):
+            # Should not raise
+            await adapter._update_approval_card(
+                "msg_001", "Approved", "Bob", "once"
+            )

From fff237e11198a8918086bc4a2f53300a0a48dfcf Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Tue, 7 Apr 2026 22:49:01 -0700
Subject: [PATCH 120/154] feat(cron): track delivery failures in job status
 (#6042)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_deliver_result() now returns Optional[str] — None on success, error
message on failure. All failure paths (unknown platform, platform
disabled, config load error, send failure, unresolvable target)
return descriptive error strings.

mark_job_run() gains delivery_error param, tracked as
last_delivery_error on the job — separate from agent execution errors.
A job where the agent succeeded but delivery failed shows
last_status='ok' + last_delivery_error='...'.

The cronjob list tool now surfaces last_delivery_error so agents and
users can see when cron outputs aren't arriving.

Inspired by PR #5863 (oxngon) — reimplemented with proper wiring.

Tests: 3 new mark_job_run tests + 6 new _deliver_result return tests.
---
 cron/jobs.py                 |  8 +++-
 cron/scheduler.py            | 51 +++++++++++++---------
 hermes_cli/cron.py           | 15 +++++++
 tests/cron/test_jobs.py      | 30 +++++++++++++
 tests/cron/test_scheduler.py | 84 ++++++++++++++++++++++++++++++++++++
 tools/cronjob_tools.py       |  1 +
 6 files changed, 167 insertions(+), 22 deletions(-)

diff --git a/cron/jobs.py b/cron/jobs.py
index 214da521..4096d1fd 100644
--- a/cron/jobs.py
+++ b/cron/jobs.py
@@ -574,12 +574,16 @@ def remove_job(job_id: str) -> bool:
     return False
 
 
-def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
+def mark_job_run(job_id: str, success: bool, error: Optional[str] = None,
+                 delivery_error: Optional[str] = None):
     """
     Mark a job as having been run.
     
     Updates last_run_at, last_status, increments completed count,
     computes next_run_at, and auto-deletes if repeat limit reached.
+
+    ``delivery_error`` is tracked separately from the agent error — a job
+    can succeed (agent produced output) but fail delivery (platform down).
     """
     jobs = load_jobs()
     for i, job in enumerate(jobs):
@@ -588,6 +592,8 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):
             job["last_run_at"] = now
             job["last_status"] = "ok" if success else "error"
             job["last_error"] = error if not success else None
+            # Track delivery failures separately — cleared on successful delivery
+            job["last_delivery_error"] = delivery_error
             
             # Increment completed count
             if job.get("repeat"):
diff --git a/cron/scheduler.py b/cron/scheduler.py
index 8d71248b..e164c1f3 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -196,7 +196,7 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
             logger.warning("Job '%s': failed to send media %s: %s", job.get("id", "?"), media_path, e)
 
 
-def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
+def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Optional[str]:
     """
     Deliver job output to the configured target (origin chat, specific platform, etc.).
 
@@ -204,16 +204,16 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
     use the live adapter first — this supports E2EE rooms (e.g. Matrix) where
     the standalone HTTP path cannot encrypt.  Falls back to standalone send if
     the adapter path fails or is unavailable.
+
+    Returns None on success, or an error string on failure.
     """
     target = _resolve_delivery_target(job)
     if not target:
         if job.get("deliver", "local") != "local":
-            logger.warning(
-                "Job '%s' deliver=%s but no concrete delivery target could be resolved",
-                job["id"],
-                job.get("deliver", "local"),
-            )
-        return
+            msg = f"no delivery target resolved for deliver={job.get('deliver', 'local')}"
+            logger.warning("Job '%s': %s", job["id"], msg)
+            return msg
+        return None  # local-only jobs don't deliver — not a failure
 
     platform_name = target["platform"]
     chat_id = target["chat_id"]
@@ -239,19 +239,22 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
     }
     platform = platform_map.get(platform_name.lower())
     if not platform:
-        logger.warning("Job '%s': unknown platform '%s' for delivery", job["id"], platform_name)
-        return
+        msg = f"unknown platform '{platform_name}'"
+        logger.warning("Job '%s': %s", job["id"], msg)
+        return msg
 
     try:
         config = load_gateway_config()
     except Exception as e:
-        logger.error("Job '%s': failed to load gateway config for delivery: %s", job["id"], e)
-        return
+        msg = f"failed to load gateway config: {e}"
+        logger.error("Job '%s': %s", job["id"], msg)
+        return msg
 
     pconfig = config.platforms.get(platform)
     if not pconfig or not pconfig.enabled:
-        logger.warning("Job '%s': platform '%s' not configured/enabled", job["id"], platform_name)
-        return
+        msg = f"platform '{platform_name}' not configured/enabled"
+        logger.warning("Job '%s': %s", job["id"], msg)
+        return msg
 
     # Optionally wrap the content with a header/footer so the user knows this
     # is a cron delivery.  Wrapping is on by default; set cron.wrap_response: false
@@ -307,7 +310,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
 
             if adapter_ok:
                 logger.info("Job '%s': delivered to %s:%s via live adapter", job["id"], platform_name, chat_id)
-                return
+                return None
         except Exception as e:
             logger.warning(
                 "Job '%s': live adapter delivery to %s:%s failed (%s), falling back to standalone",
@@ -329,13 +332,17 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> None:
             future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files))
             result = future.result(timeout=30)
     except Exception as e:
-        logger.error("Job '%s': delivery to %s:%s failed: %s", job["id"], platform_name, chat_id, e)
-        return
+        msg = f"delivery to {platform_name}:{chat_id} failed: {e}"
+        logger.error("Job '%s': %s", job["id"], msg)
+        return msg
 
     if result and result.get("error"):
-        logger.error("Job '%s': delivery error: %s", job["id"], result["error"])
-    else:
-        logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
+        msg = f"delivery error: {result['error']}"
+        logger.error("Job '%s': %s", job["id"], msg)
+        return msg
+
+    logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)
+    return None
 
 
 _SCRIPT_TIMEOUT = 120  # seconds
@@ -868,13 +875,15 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                     logger.info("Job '%s': agent returned %s — skipping delivery", job["id"], SILENT_MARKER)
                     should_deliver = False
 
+                delivery_error = None
                 if should_deliver:
                     try:
-                        _deliver_result(job, deliver_content, adapters=adapters, loop=loop)
+                        delivery_error = _deliver_result(job, deliver_content, adapters=adapters, loop=loop)
                     except Exception as de:
+                        delivery_error = str(de)
                         logger.error("Delivery failed for job %s: %s", job["id"], de)
 
-                mark_job_run(job["id"], success, error)
+                mark_job_run(job["id"], success, error, delivery_error=delivery_error)
                 executed += 1
 
             except Exception as e:
diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py
index d10513a2..e0ab6007 100644
--- a/hermes_cli/cron.py
+++ b/hermes_cli/cron.py
@@ -93,6 +93,21 @@ def cron_list(show_all: bool = False):
         script = job.get("script")
         if script:
             print(f"    Script:    {script}")
+
+        # Execution history
+        last_status = job.get("last_status")
+        if last_status:
+            last_run = job.get("last_run_at", "?")
+            if last_status == "ok":
+                status_display = color("ok", Colors.GREEN)
+            else:
+                status_display = color(f"{last_status}: {job.get('last_error', '?')}", Colors.RED)
+            print(f"    Last run:  {last_run}  {status_display}")
+
+        delivery_err = job.get("last_delivery_error")
+        if delivery_err:
+            print(f"    {color('⚠ Delivery failed:', Colors.YELLOW)} {delivery_err}")
+
         print()
 
     from hermes_cli.gateway import find_gateway_pids
diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py
index cca46010..e0f56b96 100644
--- a/tests/cron/test_jobs.py
+++ b/tests/cron/test_jobs.py
@@ -339,6 +339,36 @@ class TestMarkJobRun:
         assert updated["last_status"] == "error"
         assert updated["last_error"] == "timeout"
 
+    def test_delivery_error_tracked_separately(self, tmp_cron_dir):
+        """Agent succeeds but delivery fails — both tracked independently."""
+        job = create_job(prompt="Report", schedule="every 1h")
+        mark_job_run(job["id"], success=True, delivery_error="platform 'telegram' not configured")
+        updated = get_job(job["id"])
+        assert updated["last_status"] == "ok"
+        assert updated["last_error"] is None
+        assert updated["last_delivery_error"] == "platform 'telegram' not configured"
+
+    def test_delivery_error_cleared_on_success(self, tmp_cron_dir):
+        """Successful delivery clears the previous delivery error."""
+        job = create_job(prompt="Report", schedule="every 1h")
+        mark_job_run(job["id"], success=True, delivery_error="network timeout")
+        updated = get_job(job["id"])
+        assert updated["last_delivery_error"] == "network timeout"
+        # Next run delivers successfully
+        mark_job_run(job["id"], success=True, delivery_error=None)
+        updated = get_job(job["id"])
+        assert updated["last_delivery_error"] is None
+
+    def test_both_agent_and_delivery_error(self, tmp_cron_dir):
+        """Agent fails AND delivery fails — both errors recorded."""
+        job = create_job(prompt="Report", schedule="every 1h")
+        mark_job_run(job["id"], success=False, error="model timeout",
+                     delivery_error="platform 'discord' not enabled")
+        updated = get_job(job["id"])
+        assert updated["last_status"] == "error"
+        assert updated["last_error"] == "model timeout"
+        assert updated["last_delivery_error"] == "platform 'discord' not enabled"
+
 
 class TestAdvanceNextRun:
     """Tests for advance_next_run() — crash-safety for recurring jobs."""
diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py
index 4a15fa22..c07663a3 100644
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@@ -508,6 +508,90 @@ class TestDeliverResultWrapping:
         assert send_mock.call_args.kwargs["thread_id"] == "17585"
 
 
+class TestDeliverResultErrorReturns:
+    """Verify _deliver_result returns error strings on failure, None on success."""
+
+    def test_returns_none_on_successful_delivery(self):
+        from gateway.config import Platform
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})):
+            job = {
+                "id": "ok-job",
+                "deliver": "origin",
+                "origin": {"platform": "telegram", "chat_id": "123"},
+            }
+            result = _deliver_result(job, "Output.")
+        assert result is None
+
+    def test_returns_none_for_local_delivery(self):
+        """local-only jobs don't deliver — not a failure."""
+        job = {"id": "local-job", "deliver": "local"}
+        result = _deliver_result(job, "Output.")
+        assert result is None
+
+    def test_returns_error_for_unknown_platform(self):
+        job = {
+            "id": "bad-platform",
+            "deliver": "origin",
+            "origin": {"platform": "fax", "chat_id": "123"},
+        }
+        with patch("gateway.config.load_gateway_config"):
+            result = _deliver_result(job, "Output.")
+        assert result is not None
+        assert "unknown platform" in result
+
+    def test_returns_error_when_platform_disabled(self):
+        from gateway.config import Platform
+
+        pconfig = MagicMock()
+        pconfig.enabled = False
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg):
+            job = {
+                "id": "disabled",
+                "deliver": "origin",
+                "origin": {"platform": "telegram", "chat_id": "123"},
+            }
+            result = _deliver_result(job, "Output.")
+        assert result is not None
+        assert "not configured" in result
+
+    def test_returns_error_on_send_failure(self):
+        from gateway.config import Platform
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"error": "rate limited"})):
+            job = {
+                "id": "rate-limited",
+                "deliver": "origin",
+                "origin": {"platform": "telegram", "chat_id": "123"},
+            }
+            result = _deliver_result(job, "Output.")
+        assert result is not None
+        assert "rate limited" in result
+
+    def test_returns_error_for_unresolved_target(self, monkeypatch):
+        """Non-local delivery with no resolvable target should return an error."""
+        monkeypatch.delenv("TELEGRAM_HOME_CHANNEL", raising=False)
+        job = {"id": "no-target", "deliver": "telegram"}
+        result = _deliver_result(job, "Output.")
+        assert result is not None
+        assert "no delivery target" in result
+
+
 class TestRunJobSessionPersistence:
     def test_run_job_passes_session_db_and_cron_platform(self, tmp_path):
         job = {
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index 82d43c58..595ad8bc 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -195,6 +195,7 @@ def _format_job(job: Dict[str, Any]) -> Dict[str, Any]:
         "next_run_at": job.get("next_run_at"),
         "last_run_at": job.get("last_run_at"),
         "last_status": job.get("last_status"),
+        "last_delivery_error": job.get("last_delivery_error"),
         "enabled": job.get("enabled", True),
         "state": job.get("state", "scheduled" if job.get("enabled", True) else "paused"),
         "paused_at": job.get("paused_at"),

From e1befe5077b219967a1f075bc7bacca529861bd6 Mon Sep 17 00:00:00 2001
From: zocomputer <help@zocomputer.com>
Date: Tue, 7 Apr 2026 22:49:31 -0700
Subject: [PATCH 121/154] feat(agent): add jittered retry backoff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds agent/retry_utils.py with jittered_backoff() — exponential backoff
with additive jitter to prevent thundering-herd retry spikes when
multiple gateway sessions hit the same rate-limited provider.

Replaces fixed exponential backoff at 4 call sites:
- run_agent.py: None-choices retry path (5s base, 120s cap)
- run_agent.py: API error retry path (2s base, 60s cap)
- trajectory_compressor.py: sync + async summarization retries

Thread-safe jitter counter with overflow guards ensures unique seeds
across concurrent retries.

Trimmed from original PR to keep only wired-in functionality.

Co-authored-by: martinp09 <martinp09@users.noreply.github.com>
---
 agent/retry_utils.py      |  57 +++++++++++++++++++
 run_agent.py              |   6 +-
 tests/test_retry_utils.py | 117 ++++++++++++++++++++++++++++++++++++++
 trajectory_compressor.py  |   5 +-
 4 files changed, 181 insertions(+), 4 deletions(-)
 create mode 100644 agent/retry_utils.py
 create mode 100644 tests/test_retry_utils.py

diff --git a/agent/retry_utils.py b/agent/retry_utils.py
new file mode 100644
index 00000000..71d6963f
--- /dev/null
+++ b/agent/retry_utils.py
@@ -0,0 +1,57 @@
+"""Retry utilities — jittered backoff for decorrelated retries.
+
+Replaces fixed exponential backoff with jittered delays to prevent
+thundering-herd retry spikes when multiple sessions hit the same
+rate-limited provider concurrently.
+"""
+
+import random
+import threading
+import time
+
+# Monotonic counter for jitter seed uniqueness within the same process.
+# Protected by a lock to avoid race conditions in concurrent retry paths
+# (e.g. multiple gateway sessions retrying simultaneously).
+_jitter_counter = 0
+_jitter_lock = threading.Lock()
+
+
+def jittered_backoff(
+    attempt: int,
+    *,
+    base_delay: float = 5.0,
+    max_delay: float = 120.0,
+    jitter_ratio: float = 0.5,
+) -> float:
+    """Compute a jittered exponential backoff delay.
+
+    Args:
+        attempt: 1-based retry attempt number.
+        base_delay: Base delay in seconds for attempt 1.
+        max_delay: Maximum delay cap in seconds.
+        jitter_ratio: Fraction of computed delay to use as random jitter
+            range.  0.5 means jitter is uniform in [0, 0.5 * delay].
+
+    Returns:
+        Delay in seconds: min(base * 2^(attempt-1), max_delay) + jitter.
+
+    The jitter decorrelates concurrent retries so multiple sessions
+    hitting the same provider don't all retry at the same instant.
+    """
+    global _jitter_counter
+    with _jitter_lock:
+        _jitter_counter += 1
+        tick = _jitter_counter
+
+    exponent = max(0, attempt - 1)
+    if exponent >= 63 or base_delay <= 0:
+        delay = max_delay
+    else:
+        delay = min(base_delay * (2 ** exponent), max_delay)
+
+    # Seed from time + counter for decorrelation even with coarse clocks.
+    seed = (time.time_ns() ^ (tick * 0x9E3779B9)) & 0xFFFFFFFF
+    rng = random.Random(seed)
+    jitter = rng.uniform(0, jitter_ratio * delay)
+
+    return delay + jitter
diff --git a/run_agent.py b/run_agent.py
index 343110ec..22928bb1 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -75,6 +75,7 @@ from hermes_constants import OPENROUTER_BASE_URL
 
 # Agent internals extracted to agent/ package for modularity
 from agent.memory_manager import build_memory_context_block
+from agent.retry_utils import jittered_backoff
 from agent.prompt_builder import (
     DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS,
     MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE,
@@ -7541,7 +7542,8 @@ class AIAgent:
                             }
                         
                         # Longer backoff for rate limiting (likely cause of None choices)
-                        wait_time = min(5 * (2 ** (retry_count - 1)), 120)  # 5s, 10s, 20s, 40s, 80s, 120s
+                        # Jittered exponential: 5s base, 120s cap + random jitter
+                        wait_time = jittered_backoff(retry_count, base_delay=5.0, max_delay=120.0)
                         self._vprint(f"{self.log_prefix}⏳ Retrying in {wait_time}s (extended backoff for possible rate limit)...", force=True)
                         logging.warning(f"Invalid API response (retry {retry_count}/{max_retries}): {', '.join(error_details)} | Provider: {provider_name}")
                         
@@ -8398,7 +8400,7 @@ class AIAgent:
                                     _retry_after = min(int(_ra_raw), 120)  # Cap at 2 minutes
                                 except (TypeError, ValueError):
                                     pass
-                    wait_time = _retry_after if _retry_after else min(2 ** retry_count, 60)
+                    wait_time = _retry_after if _retry_after else jittered_backoff(retry_count, base_delay=2.0, max_delay=60.0)
                     if is_rate_limited:
                         self._emit_status(f"⏱️ Rate limit reached. Waiting {wait_time}s before retry (attempt {retry_count + 1}/{max_retries})...")
                     else:
diff --git a/tests/test_retry_utils.py b/tests/test_retry_utils.py
new file mode 100644
index 00000000..f39c3142
--- /dev/null
+++ b/tests/test_retry_utils.py
@@ -0,0 +1,117 @@
+"""Tests for agent.retry_utils jittered backoff."""
+
+import threading
+
+import agent.retry_utils as retry_utils
+from agent.retry_utils import jittered_backoff
+
+
+def test_backoff_is_exponential():
+    """Base delay should double each attempt (before jitter)."""
+    for attempt in (1, 2, 3, 4):
+        delays = [jittered_backoff(attempt, base_delay=5.0, max_delay=120.0, jitter_ratio=0.0) for _ in range(100)]
+        expected = min(5.0 * (2 ** (attempt - 1)), 120.0)
+        mean = sum(delays) / len(delays)
+        assert abs(mean - expected) < 0.01, f"attempt {attempt}: expected {expected}, got {mean}"
+
+
+def test_backoff_respects_max_delay():
+    """Even with high attempt numbers, delay should not exceed max_delay."""
+    for attempt in (10, 20, 100):
+        delay = jittered_backoff(attempt, base_delay=5.0, max_delay=60.0, jitter_ratio=0.0)
+        assert delay <= 60.0, f"attempt {attempt}: delay {delay} exceeds max 60s"
+
+
+def test_backoff_adds_jitter():
+    """With jitter enabled, delays should vary across calls."""
+    delays = [jittered_backoff(1, base_delay=10.0, max_delay=120.0, jitter_ratio=0.5) for _ in range(50)]
+    assert min(delays) != max(delays), "jitter should produce varying delays"
+    assert all(d >= 10.0 for d in delays), "jittered delay should be >= base delay"
+    assert all(d <= 15.0 for d in delays), "jittered delay should be bounded"
+
+
+def test_backoff_attempt_1_is_base():
+    """First attempt delay should equal base_delay (with no jitter)."""
+    delay = jittered_backoff(1, base_delay=3.0, max_delay=120.0, jitter_ratio=0.0)
+    assert delay == 3.0
+
+
+def test_backoff_with_zero_base_delay_returns_max():
+    """base_delay=0 should return max_delay (guard against busy-wait)."""
+    delay = jittered_backoff(1, base_delay=0.0, max_delay=60.0, jitter_ratio=0.0)
+    assert delay == 60.0
+
+
+def test_backoff_with_extreme_attempt_returns_max():
+    """Very large attempt numbers should not overflow and should return max_delay."""
+    delay = jittered_backoff(999, base_delay=5.0, max_delay=120.0, jitter_ratio=0.0)
+    assert delay == 120.0
+
+
+def test_backoff_negative_attempt_treated_as_one():
+    """Negative attempt should not crash and behaves like attempt=1."""
+    delay = jittered_backoff(-5, base_delay=10.0, max_delay=120.0, jitter_ratio=0.0)
+    assert delay == 10.0
+
+
+def test_backoff_thread_safety():
+    """Concurrent calls should generally produce different delays."""
+    results = []
+    barrier = threading.Barrier(8)
+
+    def _call_backoff():
+        barrier.wait()
+        results.append(jittered_backoff(1, base_delay=10.0, max_delay=120.0, jitter_ratio=0.5))
+
+    threads = [threading.Thread(target=_call_backoff) for _ in range(8)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join(timeout=5)
+
+    assert len(results) == 8
+    unique = len(set(results))
+    assert unique >= 6, f"Expected mostly unique delays, got {unique}/8 unique"
+
+
+def test_backoff_uses_locked_tick_for_seed(monkeypatch):
+    """Seed derivation should use per-call tick captured under lock."""
+    import time
+
+    monkeypatch.setattr(retry_utils, "_jitter_counter", 0)
+
+    recorded_seeds = []
+
+    class _RecordingRandom:
+        def __init__(self, seed):
+            recorded_seeds.append(seed)
+
+        def uniform(self, a, b):
+            return 0.0
+
+    monkeypatch.setattr(retry_utils.random, "Random", _RecordingRandom)
+
+    fixed_time_ns = 123456789
+
+    def _time_ns_wait_for_two_ticks():
+        deadline = time.time() + 2.0
+        while retry_utils._jitter_counter < 2 and time.time() < deadline:
+            time.sleep(0.001)
+        return fixed_time_ns
+
+    monkeypatch.setattr(retry_utils.time, "time_ns", _time_ns_wait_for_two_ticks)
+
+    barrier = threading.Barrier(2)
+
+    def _call():
+        barrier.wait()
+        jittered_backoff(1, base_delay=10.0, max_delay=120.0, jitter_ratio=0.5)
+
+    threads = [threading.Thread(target=_call) for _ in range(2)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join(timeout=5)
+
+    assert len(recorded_seeds) == 2
+    assert len(set(recorded_seeds)) == 2, f"Expected unique seeds, got {recorded_seeds}"
diff --git a/trajectory_compressor.py b/trajectory_compressor.py
index e4faf97a..24c1f722 100644
--- a/trajectory_compressor.py
+++ b/trajectory_compressor.py
@@ -44,6 +44,7 @@ import fire
 from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, TimeElapsedColumn, TimeRemainingColumn
 from rich.console import Console
 from hermes_constants import OPENROUTER_BASE_URL
+from agent.retry_utils import jittered_backoff
 
 # Load environment variables
 from dotenv import load_dotenv
@@ -585,7 +586,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                 self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
                 
                 if attempt < self.config.max_retries - 1:
-                    time.sleep(self.config.retry_delay * (attempt + 1))
+                    time.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
                 else:
                     # Fallback: create a basic summary
                     return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"
@@ -647,7 +648,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
                 self.logger.warning(f"Summarization attempt {attempt + 1} failed: {e}")
                 
                 if attempt < self.config.max_retries - 1:
-                    await asyncio.sleep(self.config.retry_delay * (attempt + 1))
+                    await asyncio.sleep(jittered_backoff(attempt + 1, base_delay=self.config.retry_delay, max_delay=30.0))
                 else:
                     # Fallback: create a basic summary
                     return "[CONTEXT SUMMARY]: [Summary generation failed - previous turns contained tool calls and responses that have been compressed to save context space.]"

From ab271ebe102b0602d5ccbcd5ea0371843e081388 Mon Sep 17 00:00:00 2001
From: Mibayy <mibayy@users.noreply.github.com>
Date: Tue, 7 Apr 2026 22:24:36 -0700
Subject: [PATCH 122/154] =?UTF-8?q?fix(vision):=20simplify=20vision=20auto?=
 =?UTF-8?q?-detection=20to=20openrouter=20=E2=86=92=20nous=20=E2=86=92=20a?=
 =?UTF-8?q?ctive=20provider?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify the vision auto-detection chain from 5 backends (openrouter,
nous, codex, anthropic, custom) down to 3:

  1. OpenRouter  (known vision-capable default model)
  2. Nous Portal (known vision-capable default model)
  3. Active provider + model (whatever the user is running)
  4. Stop

This is simpler and more predictable. The active provider step uses
resolve_provider_client() which handles all provider types including
named custom providers (from #5978).

Removed the complex preferred-provider promotion logic and API-level
fallback — the chain is short enough that it doesn't need them.

Based on PR #5376 by Mibay. Closes #5366.
---
 agent/auxiliary_client.py            | 54 +++++++++------
 tests/agent/test_auxiliary_client.py | 98 ++++++++++------------------
 2 files changed, 69 insertions(+), 83 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 4b156a4e..c7391833 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1425,9 +1425,6 @@ def get_async_text_auxiliary_client(task: str = ""):
 _VISION_AUTO_PROVIDER_ORDER = (
     "openrouter",
     "nous",
-    "openai-codex",
-    "anthropic",
-    "custom",
 )
 
 
@@ -1473,17 +1470,20 @@ def _preferred_main_vision_provider() -> Optional[str]:
 def get_available_vision_backends() -> List[str]:
     """Return the currently available vision backends in auto-selection order.
 
-    This is the single source of truth for setup, tool gating, and runtime
-    auto-routing of vision tasks. The selected main provider is preferred when
-    it is also a known-good vision backend; otherwise Hermes falls back through
-    the standard conservative order.
+    Order: OpenRouter → Nous → active provider.  This is the single source
+    of truth for setup, tool gating, and runtime auto-routing of vision tasks.
     """
-    ordered = list(_VISION_AUTO_PROVIDER_ORDER)
-    preferred = _preferred_main_vision_provider()
-    if preferred in ordered:
-        ordered.remove(preferred)
-        ordered.insert(0, preferred)
-    return [provider for provider in ordered if _strict_vision_backend_available(provider)]
+    available = [p for p in _VISION_AUTO_PROVIDER_ORDER
+                 if _strict_vision_backend_available(p)]
+    # Also check the user's active provider (may be DeepSeek, Alibaba, named
+    # custom, etc.) — resolve_provider_client handles all provider types.
+    main_provider = _read_main_provider()
+    if (main_provider and main_provider not in ("auto", "")
+            and main_provider not in available):
+        client, _ = resolve_provider_client(main_provider, _read_main_model())
+        if client is not None:
+            available.append(main_provider)
+    return available
 
 
 def resolve_vision_provider_client(
@@ -1528,16 +1528,30 @@ def resolve_vision_provider_client(
         return "custom", client, final_model
 
     if requested == "auto":
-        ordered = list(_VISION_AUTO_PROVIDER_ORDER)
-        preferred = _preferred_main_vision_provider()
-        if preferred in ordered:
-            ordered.remove(preferred)
-            ordered.insert(0, preferred)
-
-        for candidate in ordered:
+        # Vision auto-detection order:
+        #   1. OpenRouter  (known vision-capable default model)
+        #   2. Nous Portal (known vision-capable default model)
+        #   3. Active provider + model (user's main chat config)
+        #   4. Stop
+        for candidate in _VISION_AUTO_PROVIDER_ORDER:
             sync_client, default_model = _resolve_strict_vision_backend(candidate)
             if sync_client is not None:
                 return _finalize(candidate, sync_client, default_model)
+
+        # Fall back to the user's active provider + model.
+        main_provider = _read_main_provider()
+        main_model = _read_main_model()
+        if main_provider and main_provider not in ("auto", ""):
+            sync_client, resolved_model = resolve_provider_client(
+                main_provider, main_model)
+            if sync_client is not None:
+                logger.info(
+                    "Vision auto-detect: using active provider %s (%s)",
+                    main_provider, resolved_model or main_model,
+                )
+                return _finalize(
+                    main_provider, sync_client, resolved_model or main_model)
+
         logger.debug("Auxiliary vision client: none available")
         return None, None, None
 
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index 22da03cf..c7cd12ae 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -641,12 +641,15 @@ class TestVisionClientFallback:
         assert client is None
         assert model is None
 
-    def test_vision_auto_includes_anthropic_when_configured(self, monkeypatch):
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+    def test_vision_auto_includes_active_provider_when_configured(self, monkeypatch):
+        """Active provider appears in available backends when credentials exist."""
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
         with (
             patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
+            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
             patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
         ):
             backends = get_available_vision_backends()
 
@@ -719,88 +722,50 @@ class TestAuxiliaryPoolAwareness:
         assert call_kwargs["base_url"] == "https://api.githubcopilot.com"
         assert call_kwargs["default_headers"]["Editor-Version"]
 
-    def test_vision_auto_uses_anthropic_when_no_higher_priority_backend(self, monkeypatch):
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
+    def test_vision_auto_uses_active_provider_as_fallback(self, monkeypatch):
+        """When no OpenRouter/Nous available, vision auto falls back to active provider."""
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
         with (
             patch("agent.auxiliary_client._read_nous_auth", return_value=None),
+            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
+            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
             patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
         ):
             client, model = get_vision_auxiliary_client()
 
         assert client is not None
         assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
-        assert model == "claude-haiku-4-5-20251001"
 
-    def test_selected_anthropic_provider_is_preferred_for_vision_auto(self, monkeypatch):
+    def test_vision_auto_prefers_openrouter_over_active_provider(self, monkeypatch):
+        """OpenRouter is tried before the active provider in vision auto."""
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
-
-        def fake_load_config():
-            return {"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}}
+        monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
 
         with (
             patch("agent.auxiliary_client._read_nous_auth", return_value=None),
-            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
-            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
+            patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
+            patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
             patch("agent.auxiliary_client.OpenAI") as mock_openai,
-            patch("hermes_cli.config.load_config", fake_load_config),
-        ):
-            client, model = get_vision_auxiliary_client()
-
-        assert client is not None
-        assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
-        assert model == "claude-haiku-4-5-20251001"
-
-    def test_selected_codex_provider_short_circuits_vision_auto(self, monkeypatch):
-        def fake_load_config():
-            return {"model": {"provider": "openai-codex", "default": "gpt-5.2-codex"}}
-
-        codex_client = MagicMock()
-        with (
-            patch("hermes_cli.config.load_config", fake_load_config),
-            patch("agent.auxiliary_client._try_codex", return_value=(codex_client, "gpt-5.2-codex")) as mock_codex,
-            patch("agent.auxiliary_client._try_openrouter") as mock_openrouter,
-            patch("agent.auxiliary_client._try_nous") as mock_nous,
-            patch("agent.auxiliary_client._try_anthropic") as mock_anthropic,
-            patch("agent.auxiliary_client._try_custom_endpoint") as mock_custom,
         ):
             provider, client, model = resolve_vision_provider_client()
 
-        assert provider == "openai-codex"
-        assert client is codex_client
-        assert model == "gpt-5.2-codex"
-        mock_codex.assert_called_once()
-        mock_openrouter.assert_not_called()
-        mock_nous.assert_not_called()
-        mock_anthropic.assert_not_called()
-        mock_custom.assert_not_called()
+        # OpenRouter should win over anthropic active provider
+        assert provider == "openrouter"
 
-    def test_vision_auto_includes_codex(self, codex_auth_dir):
-        """Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
-        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client.OpenAI"):
-            client, model = get_vision_auxiliary_client()
-        from agent.auxiliary_client import CodexAuxiliaryClient
-        assert isinstance(client, CodexAuxiliaryClient)
-        assert model == "gpt-5.2-codex"
-
-    def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
-        """Custom endpoint is used as fallback in vision auto mode.
-
-        Many local models (Qwen-VL, LLaVA, etc.) support vision.
-        When no OpenRouter/Nous/Codex is available, try the custom endpoint.
-        """
+    def test_vision_auto_uses_named_custom_as_active_provider(self, monkeypatch):
+        """Named custom provider works as active provider fallback in vision auto."""
         monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
         with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
              patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
-             patch("agent.auxiliary_client._resolve_custom_runtime",
-                   return_value=("http://localhost:1234/v1", "local-key")), \
-             patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_vision_auxiliary_client()
-        assert client is not None  # Custom endpoint picked up as fallback
+             patch("agent.auxiliary_client._read_main_provider", return_value="custom:local"), \
+             patch("agent.auxiliary_client._read_main_model", return_value="my-local-model"), \
+             patch("agent.auxiliary_client.resolve_provider_client",
+                   return_value=(MagicMock(), "my-local-model")) as mock_resolve:
+            provider, client, model = resolve_vision_provider_client()
+        assert client is not None
+        assert provider == "custom:local"
 
     def test_vision_direct_endpoint_override(self, monkeypatch):
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
@@ -888,7 +853,14 @@ class TestAuxiliaryPoolAwareness:
         monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
         monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
         monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        # Clear client cache to avoid stale entries from previous tests
+        from agent.auxiliary_client import _client_cache
+        _client_cache.clear()
         with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_main_provider", return_value=""), \
+             patch("agent.auxiliary_client._read_main_model", return_value=""), \
+             patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)), \
+             patch("agent.auxiliary_client._resolve_custom_runtime", return_value=(None, None)), \
              patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
              patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
             client, model = get_vision_auxiliary_client()

From 22d1bda1856d64a7af8c9da61d9e17a96f4fd204 Mon Sep 17 00:00:00 2001
From: kshitij <kshitijk4poor@users.noreply.github.com>
Date: Wed, 8 Apr 2026 01:39:28 -0700
Subject: [PATCH 123/154] fix(minimax): correct context lengths, model catalog,
 thinking guard, aux model, and config base_url
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cherry-picked from PR #6046 by kshitijk4poor with dead code stripped.

- Context lengths: 204800 → 1M (M1) / 1048576 (M2.5/M2.7) per official docs
- Model catalog: add M1 family, remove deprecated M2.1 and highspeed variants
- Thinking guard: skip extended thinking for MiniMax (Anthropic-compat endpoint)
- Aux model: MiniMax-M2.7-highspeed → MiniMax-M2.7 (same model, half price)
- Config base_url: honour model.base_url for API-key providers (fixes China users)
- Stripped unused get_minimax_max_output() / _MINIMAX_MAX_OUTPUT (no consumer)

Fixes #5777, #4082, #6039. Closes #3895.
---
 agent/anthropic_adapter.py                    |   4 +-
 agent/auxiliary_client.py                     |   4 +-
 agent/model_metadata.py                       |  13 ++-
 hermes_cli/models.py                          |  20 ++--
 hermes_cli/runtime_provider.py                |  20 +++-
 hermes_cli/setup.py                           |   4 +-
 tests/agent/test_minimax_provider.py          | 105 ++++++++++++++++++
 .../test_runtime_provider_resolution.py       |  49 ++++++++
 .../hermes_cli/test_setup_model_selection.py  |   4 +-
 9 files changed, 203 insertions(+), 20 deletions(-)
 create mode 100644 tests/agent/test_minimax_provider.py

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index f4e8dcee..3292f0c6 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1224,9 +1224,9 @@ def build_anthropic_kwargs(
     # Map reasoning_config to Anthropic's thinking parameter.
     # Claude 4.6 models use adaptive thinking + output_config.effort.
     # Older models use manual thinking with budget_tokens.
-    # Haiku models do NOT support extended thinking at all — skip entirely.
+    # Haiku and MiniMax models do NOT support extended thinking — skip entirely.
     if reasoning_config and isinstance(reasoning_config, dict):
-        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
+        if reasoning_config.get("enabled") is not False and "haiku" not in model.lower() and "minimax" not in model.lower():
             effort = str(reasoning_config.get("effort", "medium")).lower()
             budget = THINKING_BUDGET.get(effort, 8000)
             if _supports_adaptive_thinking(model):
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index c7391833..2b99ac07 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -99,8 +99,8 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "gemini": "gemini-3-flash-preview",
     "zai": "glm-4.5-flash",
     "kimi-coding": "kimi-k2-turbo-preview",
-    "minimax": "MiniMax-M2.7-highspeed",
-    "minimax-cn": "MiniMax-M2.7-highspeed",
+    "minimax": "MiniMax-M2.7",
+    "minimax-cn": "MiniMax-M2.7",
     "anthropic": "claude-haiku-4-5-20251001",
     "ai-gateway": "google/gemini-3-flash",
     "opencode-zen": "gemini-3-flash",
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index a5fb11f5..0a227118 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -113,8 +113,15 @@ DEFAULT_CONTEXT_LENGTHS = {
     "llama": 131072,
     # Qwen
     "qwen": 131072,
-    # MiniMax
-    "minimax": 204800,
+    # MiniMax (lowercase — lookup lowercases model names at line 973)
+    "minimax-m1-256k": 1000000,
+    "minimax-m1-128k": 1000000,
+    "minimax-m1-80k": 1000000,
+    "minimax-m1-40k": 1000000,
+    "minimax-m1": 1000000,
+    "minimax-m2.5": 1048576,
+    "minimax-m2.7": 1048576,
+    "minimax": 1048576,
     # GLM
     "glm": 202752,
     # Kimi
@@ -127,7 +134,7 @@ DEFAULT_CONTEXT_LENGTHS = {
     "deepseek-ai/DeepSeek-V3.2": 65536,
     "moonshotai/Kimi-K2.5": 262144,
     "moonshotai/Kimi-K2-Thinking": 262144,
-    "MiniMaxAI/MiniMax-M2.5": 204800,
+    "minimaxai/minimax-m2.5": 1048576,
     "XiaomiMiMo/MiMo-V2-Flash": 32768,
     "mimo-v2-pro": 1048576,
     "mimo-v2-omni": 1048576,
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 4b37bc9e..aa68f877 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -144,18 +144,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "kimi-k2-0905-preview",
     ],
     "minimax": [
-        "MiniMax-M2.7",
-        "MiniMax-M2.7-highspeed",
+        "MiniMax-M1",
+        "MiniMax-M1-40k",
+        "MiniMax-M1-80k",
+        "MiniMax-M1-128k",
+        "MiniMax-M1-256k",
         "MiniMax-M2.5",
-        "MiniMax-M2.5-highspeed",
-        "MiniMax-M2.1",
+        "MiniMax-M2.7",
     ],
     "minimax-cn": [
-        "MiniMax-M2.7",
-        "MiniMax-M2.7-highspeed",
+        "MiniMax-M1",
+        "MiniMax-M1-40k",
+        "MiniMax-M1-80k",
+        "MiniMax-M1-128k",
+        "MiniMax-M1-256k",
         "MiniMax-M2.5",
-        "MiniMax-M2.5-highspeed",
-        "MiniMax-M2.1",
+        "MiniMax-M2.7",
     ],
     "anthropic": [
         "claude-opus-4-6",
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 9c82ef62..fa9d4939 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -163,6 +163,16 @@ def _resolve_runtime_from_pool_entry(
         api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
     else:
         configured_provider = str(model_cfg.get("provider") or "").strip().lower()
+        # Honour model.base_url from config.yaml when the configured provider
+        # matches this provider — same pattern as the Anthropic branch above.
+        # Only override when the pool entry has no explicit base_url (i.e. it
+        # fell back to the hardcoded default).  Env var overrides win (#6039).
+        pconfig = PROVIDER_REGISTRY.get(provider)
+        pool_url_is_default = pconfig and base_url.rstrip("/") == pconfig.inference_base_url.rstrip("/")
+        if configured_provider == provider and pool_url_is_default:
+            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+            if cfg_base_url:
+                base_url = cfg_base_url
         configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
         if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
             api_mode = configured_mode
@@ -724,7 +734,15 @@ def resolve_runtime_provider(
     pconfig = PROVIDER_REGISTRY.get(provider)
     if pconfig and pconfig.auth_type == "api_key":
         creds = resolve_api_key_provider_credentials(provider)
-        base_url = creds.get("base_url", "").rstrip("/")
+        # Honour model.base_url from config.yaml when the configured provider
+        # matches this provider — mirrors the Anthropic path above.  Without
+        # this, users who set model.base_url to e.g. api.minimaxi.com/anthropic
+        # (China endpoint) still get the hardcoded api.minimax.io default (#6039).
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = ""
+        if cfg_provider == provider:
+            cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
+        base_url = cfg_base_url or creds.get("base_url", "").rstrip("/")
         api_mode = "chat_completions"
         if provider == "copilot":
             api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 2407ca27..43c3b086 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -105,8 +105,8 @@ _DEFAULT_PROVIDER_MODELS = {
     ],
     "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
     "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
-    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "minimax": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
+    "minimax-cn": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
     "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
     "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
     "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py
new file mode 100644
index 00000000..c6819e87
--- /dev/null
+++ b/tests/agent/test_minimax_provider.py
@@ -0,0 +1,105 @@
+"""Tests for MiniMax provider hardening — context lengths, thinking guard, catalog."""
+
+
+class TestMinimaxContextLengths:
+    """Verify per-model context length entries for MiniMax models."""
+
+    def test_m1_variants_have_1m_context(self):
+        from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+        # Keys are lowercase because the lookup lowercases model names
+        for model in ("minimax-m1", "minimax-m1-40k", "minimax-m1-80k",
+                       "minimax-m1-128k", "minimax-m1-256k"):
+            assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths"
+            assert DEFAULT_CONTEXT_LENGTHS[model] == 1_000_000, f"{model} expected 1M"
+
+    def test_m2_variants_have_1m_context(self):
+        from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+        # Keys are lowercase because the lookup lowercases model names
+        for model in ("minimax-m2.5", "minimax-m2.7"):
+            assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths"
+            assert DEFAULT_CONTEXT_LENGTHS[model] == 1_048_576, f"{model} expected 1048576"
+
+    def test_minimax_prefix_fallback(self):
+        from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
+        # The generic "minimax" prefix entry should be 1M for unknown models
+        assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 1_048_576
+
+
+
+class TestMinimaxThinkingGuard:
+    """Verify that build_anthropic_kwargs does NOT add thinking params for MiniMax models."""
+
+    def test_no_thinking_for_minimax_m27(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M2.7",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+        )
+        assert "thinking" not in kwargs
+        assert "output_config" not in kwargs
+
+    def test_no_thinking_for_minimax_m1(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="MiniMax-M1-128k",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "high"},
+        )
+        assert "thinking" not in kwargs
+
+    def test_thinking_still_works_for_claude(self):
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        kwargs = build_anthropic_kwargs(
+            model="claude-sonnet-4-20250514",
+            messages=[{"role": "user", "content": "hello"}],
+            tools=None,
+            max_tokens=4096,
+            reasoning_config={"enabled": True, "effort": "medium"},
+        )
+        assert "thinking" in kwargs
+
+
+class TestMinimaxAuxModel:
+    """Verify auxiliary model is standard (not highspeed)."""
+
+    def test_minimax_aux_is_standard(self):
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7"
+        assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7"
+
+    def test_minimax_aux_not_highspeed(self):
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"]
+        assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
+
+
+class TestMinimaxModelCatalog:
+    """Verify the model catalog includes M1 family and excludes deprecated models."""
+
+    def test_catalog_includes_m1_family(self):
+        from hermes_cli.models import _PROVIDER_MODELS
+        for provider in ("minimax", "minimax-cn"):
+            models = _PROVIDER_MODELS[provider]
+            assert "MiniMax-M1" in models
+            assert "MiniMax-M1-40k" in models
+            assert "MiniMax-M1-80k" in models
+            assert "MiniMax-M1-128k" in models
+            assert "MiniMax-M1-256k" in models
+
+    def test_catalog_excludes_deprecated(self):
+        from hermes_cli.models import _PROVIDER_MODELS
+        for provider in ("minimax", "minimax-cn"):
+            models = _PROVIDER_MODELS[provider]
+            assert "MiniMax-M2.1" not in models
+
+    def test_catalog_excludes_highspeed(self):
+        from hermes_cli.models import _PROVIDER_MODELS
+        for provider in ("minimax", "minimax-cn"):
+            models = _PROVIDER_MODELS[provider]
+            assert "MiniMax-M2.7-highspeed" not in models
+            assert "MiniMax-M2.5-highspeed" not in models
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index ded0c920..0abc8196 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -808,6 +808,55 @@ def test_minimax_explicit_api_mode_respected(monkeypatch):
     assert resolved["api_mode"] == "chat_completions"
 
 
+def test_minimax_config_base_url_overrides_hardcoded_default(monkeypatch):
+    """model.base_url in config.yaml should override the hardcoded default (#6039)."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {
+        "provider": "minimax",
+        "base_url": "https://api.minimaxi.com/anthropic",
+    })
+    monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
+    monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="minimax")
+
+    assert resolved["provider"] == "minimax"
+    assert resolved["base_url"] == "https://api.minimaxi.com/anthropic"
+    assert resolved["api_mode"] == "anthropic_messages"
+
+
+def test_minimax_env_base_url_still_wins_over_config(monkeypatch):
+    """MINIMAX_BASE_URL env var should take priority over config.yaml model.base_url."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {
+        "provider": "minimax",
+        "base_url": "https://api.minimaxi.com/anthropic",
+    })
+    monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
+    monkeypatch.setenv("MINIMAX_BASE_URL", "https://custom.example.com/v1")
+
+    resolved = rp.resolve_runtime_provider(requested="minimax")
+
+    # Env var wins because resolve_api_key_provider_credentials prefers it
+    assert resolved["base_url"] == "https://custom.example.com/v1"
+
+
+def test_minimax_config_base_url_ignored_for_different_provider(monkeypatch):
+    """model.base_url should NOT be used when model.provider doesn't match."""
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {
+        "provider": "openrouter",
+        "base_url": "https://some-other-endpoint.com/v1",
+    })
+    monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
+    monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
+
+    resolved = rp.resolve_runtime_provider(requested="minimax")
+
+    # Should use the default, NOT the config base_url from a different provider
+    assert resolved["base_url"] == "https://api.minimax.io/anthropic"
+
+
 def test_alibaba_default_coding_intl_endpoint_uses_chat_completions(monkeypatch):
     """Alibaba default coding-intl /v1 URL should use chat_completions mode."""
     monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "alibaba")
diff --git a/tests/hermes_cli/test_setup_model_selection.py b/tests/hermes_cli/test_setup_model_selection.py
index 3cb7056c..b42365da 100644
--- a/tests/hermes_cli/test_setup_model_selection.py
+++ b/tests/hermes_cli/test_setup_model_selection.py
@@ -34,8 +34,8 @@ class TestSetupProviderModelSelection:
     @pytest.mark.parametrize("provider_id,expected_defaults", [
         ("zai", ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]),
         ("kimi-coding", ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]),
-        ("minimax", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
-        ("minimax-cn", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
+        ("minimax", ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"]),
+        ("minimax-cn", ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"]),
         ("opencode-zen", ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash"]),
         ("opencode-go", ["glm-5", "kimi-k2.5", "minimax-m2.5", "minimax-m2.7"]),
     ])

From 65e24c942e89f81f672d22c9dc3cf11514ea0b89 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Tue, 7 Apr 2026 22:21:27 -0700
Subject: [PATCH 124/154] wip: tool result fixes -- persistence

---
 environments/agent_loop.py                |  20 +-
 run_agent.py                              |  89 ++--
 tests/run_agent/test_large_tool_result.py | 162 -------
 tests/tools/test_tool_result_storage.py   | 494 ++++++++++++++++++++++
 tools/binary_extensions.py                |  42 ++
 tools/code_execution_tool.py              |   1 +
 tools/file_tools.py                       |  49 ++-
 tools/registry.py                         |  15 +-
 tools/terminal_tool.py                    |   7 +
 tools/tool_result_storage.py              | 223 ++++++++++
 tools/web_tools.py                        |   2 +
 11 files changed, 869 insertions(+), 235 deletions(-)
 delete mode 100644 tests/run_agent/test_large_tool_result.py
 create mode 100644 tests/tools/test_tool_result_storage.py
 create mode 100644 tools/binary_extensions.py
 create mode 100644 tools/tool_result_storage.py

diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index 11a8a01f..ba2db0b5 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -21,6 +21,8 @@ from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Set
 
 from model_tools import handle_function_call
+from tools.terminal_tool import get_active_env
+from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget
 
 # Thread pool for running sync tool calls that internally use asyncio.run()
 # (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate
@@ -446,8 +448,17 @@ class HermesAgentLoop:
                         except (json.JSONDecodeError, TypeError):
                             pass
 
-                    # Add tool response to conversation
                     tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id
+                    try:
+                        tool_result = maybe_persist_tool_result(
+                            content=tool_result,
+                            tool_name=tool_name,
+                            tool_use_id=tc_id,
+                            env=get_active_env(self.task_id),
+                        )
+                    except Exception:
+                        pass  # Persistence is best-effort in eval path
+
                     messages.append(
                         {
                             "role": "tool",
@@ -456,6 +467,13 @@ class HermesAgentLoop:
                         }
                     )
 
+                try:
+                    num_tcs = len(assistant_msg.tool_calls)
+                    if num_tcs > 0:
+                        enforce_turn_budget(messages[-num_tcs:], env=get_active_env(self.task_id))
+                except Exception:
+                    pass
+
                 turn_elapsed = _time.monotonic() - turn_start
                 logger.info(
                     "[%s] turn %d: api=%.1fs, %d tools, turn_total=%.1fs",
diff --git a/run_agent.py b/run_agent.py
index 22928bb1..49f36da4 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -66,7 +66,8 @@ from model_tools import (
     handle_function_call,
     check_toolset_requirements,
 )
-from tools.terminal_tool import cleanup_vm
+from tools.terminal_tool import cleanup_vm, get_active_env
+from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget
 from tools.interrupt import set_interrupt as _set_interrupt
 from tools.browser_tool import cleanup_browser
 
@@ -411,63 +412,6 @@ def _strip_budget_warnings_from_history(messages: list) -> None:
 # Large tool result handler — save oversized output to temp file
 # =========================================================================
 
-# Threshold at which tool results are saved to a file instead of kept inline.
-# 100K chars ≈ 25K tokens — generous for any reasonable output but prevents
-# catastrophic context explosions.
-_LARGE_RESULT_CHARS = 100_000
-
-# How many characters of the original result to include as an inline preview
-# so the model has immediate context about what the tool returned.
-_LARGE_RESULT_PREVIEW_CHARS = 1_500
-
-
-def _save_oversized_tool_result(function_name: str, function_result: str) -> str:
-    """Replace oversized tool results with a file reference + preview.
-
-    When a tool returns more than ``_LARGE_RESULT_CHARS`` characters, the full
-    content is written to a temporary file under ``HERMES_HOME/cache/tool_responses/``
-    and the result sent to the model is replaced with:
-      • a brief head preview  (first ``_LARGE_RESULT_PREVIEW_CHARS`` chars)
-      • the file path so the model can use ``read_file`` / ``search_files``
-
-    Falls back to destructive truncation if the file write fails.
-    """
-    original_len = len(function_result)
-    if original_len <= _LARGE_RESULT_CHARS:
-        return function_result
-
-    # Build the target directory
-    try:
-        response_dir = os.path.join(get_hermes_home(), "cache", "tool_responses")
-        os.makedirs(response_dir, exist_ok=True)
-
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-        # Sanitize tool name for use in filename
-        safe_name = re.sub(r"[^\w\-]", "_", function_name)[:40]
-        filename = f"{safe_name}_{timestamp}.txt"
-        filepath = os.path.join(response_dir, filename)
-
-        with open(filepath, "w", encoding="utf-8") as f:
-            f.write(function_result)
-
-        preview = function_result[:_LARGE_RESULT_PREVIEW_CHARS]
-        return (
-            f"{preview}\n\n"
-            f"[Large tool response: {original_len:,} characters total — "
-            f"only the first {_LARGE_RESULT_PREVIEW_CHARS:,} shown above. "
-            f"Full output saved to: {filepath}\n"
-            f"Use read_file or search_files on that path to access the rest.]"
-        )
-    except Exception as exc:
-        # Fall back to destructive truncation if file write fails
-        logger.warning("Failed to save large tool result to file: %s", exc)
-        return (
-            function_result[:_LARGE_RESULT_CHARS]
-            + f"\n\n[Truncated: tool response was {original_len:,} chars, "
-            f"exceeding the {_LARGE_RESULT_CHARS:,} char limit. "
-            f"File save failed: {exc}]"
-        )
-
 
 class AIAgent:
     """
@@ -6262,15 +6206,17 @@ class AIAgent:
                 except Exception as cb_err:
                     logging.debug(f"Tool complete callback error: {cb_err}")
 
-            # Save oversized results to file instead of destructive truncation
-            function_result = _save_oversized_tool_result(name, function_result)
+            function_result = maybe_persist_tool_result(
+                content=function_result,
+                tool_name=name,
+                tool_use_id=tc.id,
+                env=get_active_env(effective_task_id),
+            )
 
-            # Discover subdirectory context files from tool arguments
             subdir_hints = self._subdirectory_hints.check_tool_call(name, args)
             if subdir_hints:
                 function_result += subdir_hints
 
-            # Append tool result message in order
             tool_msg = {
                 "role": "tool",
                 "content": function_result,
@@ -6278,6 +6224,12 @@ class AIAgent:
             }
             messages.append(tool_msg)
 
+        # ── Per-turn aggregate budget enforcement ─────────────────────────
+        num_tools = len(parsed_calls)
+        if num_tools > 0:
+            turn_tool_msgs = messages[-num_tools:]
+            enforce_turn_budget(turn_tool_msgs, env=get_active_env(effective_task_id))
+
         # ── Budget pressure injection ────────────────────────────────────
         budget_warning = self._get_budget_warning(api_call_count)
         if budget_warning and messages and messages[-1].get("role") == "tool":
@@ -6562,8 +6514,12 @@ class AIAgent:
                 except Exception as cb_err:
                     logging.debug(f"Tool complete callback error: {cb_err}")
 
-            # Save oversized results to file instead of destructive truncation
-            function_result = _save_oversized_tool_result(function_name, function_result)
+            function_result = maybe_persist_tool_result(
+                content=function_result,
+                tool_name=function_name,
+                tool_use_id=tool_call.id,
+                env=get_active_env(effective_task_id),
+            )
 
             # Discover subdirectory context files from tool arguments
             subdir_hints = self._subdirectory_hints.check_tool_call(function_name, function_args)
@@ -6601,6 +6557,11 @@ class AIAgent:
             if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
                 time.sleep(self.tool_delay)
 
+        # ── Per-turn aggregate budget enforcement ─────────────────────────
+        num_tools_seq = len(assistant_message.tool_calls)
+        if num_tools_seq > 0:
+            enforce_turn_budget(messages[-num_tools_seq:], env=get_active_env(effective_task_id))
+
         # ── Budget pressure injection ─────────────────────────────────
         # After all tool calls in this turn are processed, check if we're
         # approaching max_iterations. If so, inject a warning into the LAST
diff --git a/tests/run_agent/test_large_tool_result.py b/tests/run_agent/test_large_tool_result.py
deleted file mode 100644
index ef51f2fe..00000000
--- a/tests/run_agent/test_large_tool_result.py
+++ /dev/null
@@ -1,162 +0,0 @@
-"""Tests for _save_oversized_tool_result() — the large tool response handler.
-
-When a tool returns more than _LARGE_RESULT_CHARS characters, the full content
-is saved to a file and the model receives a preview + file path instead.
-"""
-
-import os
-import re
-
-import pytest
-
-from run_agent import (
-    _save_oversized_tool_result,
-    _LARGE_RESULT_CHARS,
-    _LARGE_RESULT_PREVIEW_CHARS,
-)
-
-
-class TestSaveOversizedToolResult:
-    """Unit tests for the large tool result handler."""
-
-    def test_small_result_returned_unchanged(self):
-        """Results under the threshold pass through untouched."""
-        small = "x" * 1000
-        assert _save_oversized_tool_result("terminal", small) is small
-
-    def test_exactly_at_threshold_returned_unchanged(self):
-        """Results exactly at the threshold pass through."""
-        exact = "y" * _LARGE_RESULT_CHARS
-        assert _save_oversized_tool_result("terminal", exact) is exact
-
-    def test_oversized_result_saved_to_file(self, tmp_path, monkeypatch):
-        """Results over the threshold are written to a file."""
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
-        os.makedirs(tmp_path / ".hermes", exist_ok=True)
-
-        big = "A" * (_LARGE_RESULT_CHARS + 500)
-        result = _save_oversized_tool_result("terminal", big)
-
-        # Should contain the preview
-        assert result.startswith("A" * _LARGE_RESULT_PREVIEW_CHARS)
-        # Should mention the file path
-        assert "Full output saved to:" in result
-        # Should mention original size
-        assert f"{len(big):,}" in result
-
-        # Extract the file path and verify the file exists with full content
-        match = re.search(r"Full output saved to: (.+?)\n", result)
-        assert match, f"No file path found in result: {result[:300]}"
-        filepath = match.group(1)
-        assert os.path.isfile(filepath)
-        with open(filepath, "r", encoding="utf-8") as f:
-            saved = f.read()
-        assert saved == big
-        assert len(saved) == _LARGE_RESULT_CHARS + 500
-
-    def test_file_placed_in_cache_tool_responses(self, tmp_path, monkeypatch):
-        """Saved file lives under HERMES_HOME/cache/tool_responses/."""
-        hermes_home = str(tmp_path / ".hermes")
-        monkeypatch.setenv("HERMES_HOME", hermes_home)
-        os.makedirs(hermes_home, exist_ok=True)
-
-        big = "B" * (_LARGE_RESULT_CHARS + 1)
-        result = _save_oversized_tool_result("web_search", big)
-
-        match = re.search(r"Full output saved to: (.+?)\n", result)
-        filepath = match.group(1)
-        expected_dir = os.path.join(hermes_home, "cache", "tool_responses")
-        assert filepath.startswith(expected_dir)
-
-    def test_filename_contains_tool_name(self, tmp_path, monkeypatch):
-        """The saved filename includes a sanitized version of the tool name."""
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
-        os.makedirs(tmp_path / ".hermes", exist_ok=True)
-
-        big = "C" * (_LARGE_RESULT_CHARS + 1)
-        result = _save_oversized_tool_result("browser_navigate", big)
-
-        match = re.search(r"Full output saved to: (.+?)\n", result)
-        filename = os.path.basename(match.group(1))
-        assert filename.startswith("browser_navigate_")
-        assert filename.endswith(".txt")
-
-    def test_tool_name_sanitized(self, tmp_path, monkeypatch):
-        """Special characters in tool names are replaced in the filename."""
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
-        os.makedirs(tmp_path / ".hermes", exist_ok=True)
-
-        big = "D" * (_LARGE_RESULT_CHARS + 1)
-        result = _save_oversized_tool_result("mcp:some/weird tool", big)
-
-        match = re.search(r"Full output saved to: (.+?)\n", result)
-        filename = os.path.basename(match.group(1))
-        # No slashes or colons in filename
-        assert "/" not in filename
-        assert ":" not in filename
-
-    def test_fallback_on_write_failure(self, tmp_path, monkeypatch):
-        """When file write fails, falls back to destructive truncation."""
-        # Point HERMES_HOME to a path that will fail (file, not directory)
-        bad_path = str(tmp_path / "not_a_dir.txt")
-        with open(bad_path, "w") as f:
-            f.write("I'm a file, not a directory")
-        monkeypatch.setenv("HERMES_HOME", bad_path)
-
-        big = "E" * (_LARGE_RESULT_CHARS + 50_000)
-        result = _save_oversized_tool_result("terminal", big)
-
-        # Should still contain data (fallback truncation)
-        assert len(result) > 0
-        assert result.startswith("E" * 1000)
-        # Should mention the failure
-        assert "File save failed" in result
-        # Should be truncated to approximately _LARGE_RESULT_CHARS + error msg
-        assert len(result) < len(big)
-
-    def test_preview_length_capped(self, tmp_path, monkeypatch):
-        """The inline preview is capped at _LARGE_RESULT_PREVIEW_CHARS."""
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
-        os.makedirs(tmp_path / ".hermes", exist_ok=True)
-
-        # Use distinct chars so we can measure the preview
-        big = "Z" * (_LARGE_RESULT_CHARS + 5000)
-        result = _save_oversized_tool_result("terminal", big)
-
-        # The preview section is the content before the "[Large tool response:" marker
-        marker_pos = result.index("[Large tool response:")
-        preview_section = result[:marker_pos].rstrip()
-        assert len(preview_section) == _LARGE_RESULT_PREVIEW_CHARS
-
-    def test_guidance_message_mentions_tools(self, tmp_path, monkeypatch):
-        """The replacement message tells the model how to access the file."""
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
-        os.makedirs(tmp_path / ".hermes", exist_ok=True)
-
-        big = "F" * (_LARGE_RESULT_CHARS + 1)
-        result = _save_oversized_tool_result("terminal", big)
-
-        assert "read_file" in result
-        assert "search_files" in result
-
-    def test_empty_result_passes_through(self):
-        """Empty strings are not oversized."""
-        assert _save_oversized_tool_result("terminal", "") == ""
-
-    def test_unicode_content_preserved(self, tmp_path, monkeypatch):
-        """Unicode content is fully preserved in the saved file."""
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes"))
-        os.makedirs(tmp_path / ".hermes", exist_ok=True)
-
-        # Mix of ASCII and multi-byte unicode to exceed threshold
-        unit = "Hello 世界! 🎉 " * 100  # ~1400 chars per repeat
-        big = unit * ((_LARGE_RESULT_CHARS // len(unit)) + 1)
-        assert len(big) > _LARGE_RESULT_CHARS
-
-        result = _save_oversized_tool_result("terminal", big)
-        match = re.search(r"Full output saved to: (.+?)\n", result)
-        filepath = match.group(1)
-
-        with open(filepath, "r", encoding="utf-8") as f:
-            saved = f.read()
-        assert saved == big
diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py
new file mode 100644
index 00000000..7c757027
--- /dev/null
+++ b/tests/tools/test_tool_result_storage.py
@@ -0,0 +1,494 @@
+"""Tests for tools/tool_result_storage.py -- 3-layer tool result persistence."""
+
+import pytest
+from unittest.mock import MagicMock, patch
+
+from tools.tool_result_storage import (
+    DEFAULT_MAX_RESULT_SIZE_CHARS,
+    HEREDOC_MARKER,
+    MAX_TURN_BUDGET_CHARS,
+    PERSISTED_OUTPUT_TAG,
+    PERSISTED_OUTPUT_CLOSING_TAG,
+    PREVIEW_SIZE_CHARS,
+    STORAGE_DIR,
+    _build_persisted_message,
+    _extract_raw_output,
+    _heredoc_marker,
+    _write_to_sandbox,
+    enforce_turn_budget,
+    generate_preview,
+    maybe_persist_tool_result,
+)
+
+
+# ── generate_preview ──────────────────────────────────────────────────
+
+class TestGeneratePreview:
+    def test_short_content_unchanged(self):
+        text = "short result"
+        preview, has_more = generate_preview(text)
+        assert preview == text
+        assert has_more is False
+
+    def test_long_content_truncated(self):
+        text = "x" * 5000
+        preview, has_more = generate_preview(text, max_chars=2000)
+        assert len(preview) <= 2000
+        assert has_more is True
+
+    def test_truncates_at_newline_boundary(self):
+        # 1500 chars + newline + 600 chars  (past halfway)
+        text = "a" * 1500 + "\n" + "b" * 600
+        preview, has_more = generate_preview(text, max_chars=2000)
+        assert preview == "a" * 1500 + "\n"
+        assert has_more is True
+
+    def test_ignores_early_newline(self):
+        # Newline at position 100, well before halfway of 2000
+        text = "a" * 100 + "\n" + "b" * 3000
+        preview, has_more = generate_preview(text, max_chars=2000)
+        assert len(preview) == 2000
+        assert has_more is True
+
+    def test_empty_content(self):
+        preview, has_more = generate_preview("")
+        assert preview == ""
+        assert has_more is False
+
+    def test_exact_boundary(self):
+        text = "x" * PREVIEW_SIZE_CHARS
+        preview, has_more = generate_preview(text)
+        assert preview == text
+        assert has_more is False
+
+
+# ── _extract_raw_output ────────────────────────────────────────────────
+
+class TestExtractRawOutput:
+    def test_extracts_output_from_terminal_json(self):
+        import json
+        content = json.dumps({"output": "hello world\nline2", "exit_code": 0, "error": None})
+        assert _extract_raw_output(content) == "hello world\nline2"
+
+    def test_passes_through_non_json(self):
+        assert _extract_raw_output("plain text output") == "plain text output"
+
+    def test_passes_through_json_without_output_key(self):
+        import json
+        content = json.dumps({"result": "something", "status": "ok"})
+        assert _extract_raw_output(content) == content
+
+    def test_extracts_large_output(self):
+        import json
+        big = "x\n" * 30_000
+        content = json.dumps({"output": big, "exit_code": 0, "error": None})
+        assert _extract_raw_output(content) == big
+
+
+# ── _heredoc_marker ───────────────────────────────────────────────────
+
+class TestHeredocMarker:
+    def test_default_marker_when_no_collision(self):
+        assert _heredoc_marker("normal content") == HEREDOC_MARKER
+
+    def test_uuid_marker_on_collision(self):
+        content = f"some text with {HEREDOC_MARKER} embedded"
+        marker = _heredoc_marker(content)
+        assert marker != HEREDOC_MARKER
+        assert marker.startswith("HERMES_PERSIST_")
+        assert marker not in content
+
+
+# ── _write_to_sandbox ─────────────────────────────────────────────────
+
+class TestWriteToSandbox:
+    def test_success(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        result = _write_to_sandbox("hello world", "/tmp/hermes-results/abc.txt", env)
+        assert result is True
+        env.execute.assert_called_once()
+        cmd = env.execute.call_args[0][0]
+        assert "mkdir -p" in cmd
+        assert "hello world" in cmd
+        assert HEREDOC_MARKER in cmd
+
+    def test_failure_returns_false(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "error", "returncode": 1}
+        result = _write_to_sandbox("content", "/tmp/hermes-results/abc.txt", env)
+        assert result is False
+
+    def test_heredoc_collision_uses_uuid_marker(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        content = f"text with {HEREDOC_MARKER} inside"
+        _write_to_sandbox(content, "/tmp/hermes-results/abc.txt", env)
+        cmd = env.execute.call_args[0][0]
+        # The default marker should NOT be used as the delimiter
+        lines = cmd.split("\n")
+        # The first and last lines contain the actual delimiter
+        assert HEREDOC_MARKER not in lines[0].split("<<")[1]
+
+    def test_timeout_passed(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        _write_to_sandbox("content", "/tmp/hermes-results/abc.txt", env)
+        assert env.execute.call_args[1]["timeout"] == 30
+
+
+# ── _build_persisted_message ──────────────────────────────────────────
+
+class TestBuildPersistedMessage:
+    def test_structure(self):
+        msg = _build_persisted_message(
+            preview="first 100 chars...",
+            has_more=True,
+            original_size=50_000,
+            file_path="/tmp/hermes-results/test123.txt",
+        )
+        assert msg.startswith(PERSISTED_OUTPUT_TAG)
+        assert msg.endswith(PERSISTED_OUTPUT_CLOSING_TAG)
+        assert "50,000 characters" in msg
+        assert "/tmp/hermes-results/test123.txt" in msg
+        assert "read_file" in msg
+        assert "first 100 chars..." in msg
+        assert "..." in msg  # has_more indicator
+
+    def test_no_ellipsis_when_complete(self):
+        msg = _build_persisted_message(
+            preview="complete content",
+            has_more=False,
+            original_size=16,
+            file_path="/tmp/hermes-results/x.txt",
+        )
+        # Should not have the trailing "..." indicator before closing tag
+        lines = msg.strip().split("\n")
+        assert lines[-2] != "..."
+
+    def test_large_size_shows_mb(self):
+        msg = _build_persisted_message(
+            preview="x",
+            has_more=True,
+            original_size=2_000_000,
+            file_path="/tmp/hermes-results/big.txt",
+        )
+        assert "MB" in msg
+
+
+# ── maybe_persist_tool_result ─────────────────────────────────────────
+
+class TestMaybePersistToolResult:
+    def test_below_threshold_returns_unchanged(self):
+        content = "small result"
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_123",
+            env=None,
+            threshold=50_000,
+        )
+        assert result == content
+
+    def test_above_threshold_with_env_persists(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        content = "x" * 60_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_456",
+            env=env,
+            threshold=30_000,
+        )
+        assert PERSISTED_OUTPUT_TAG in result
+        assert "tc_456.txt" in result
+        assert len(result) < len(content)
+        env.execute.assert_called_once()
+
+    def test_persists_raw_output_not_json_wrapper(self):
+        """When content is JSON with 'output' key, file should contain raw output."""
+        import json
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        raw = "line1\nline2\n" * 5_000
+        content = json.dumps({"output": raw, "exit_code": 0, "error": None})
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_json",
+            env=env,
+            threshold=30_000,
+        )
+        assert PERSISTED_OUTPUT_TAG in result
+        # The heredoc written to sandbox should contain raw text, not JSON
+        cmd = env.execute.call_args[0][0]
+        assert "line1\nline2\n" in cmd
+        assert '"exit_code"' not in cmd
+
+    def test_above_threshold_no_env_truncates_inline(self):
+        content = "x" * 60_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_789",
+            env=None,
+            threshold=30_000,
+        )
+        assert PERSISTED_OUTPUT_TAG not in result
+        assert "Truncated" in result
+        assert len(result) < len(content)
+
+    def test_env_write_failure_falls_back_to_truncation(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "disk full", "returncode": 1}
+        content = "x" * 60_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_fail",
+            env=env,
+            threshold=30_000,
+        )
+        assert PERSISTED_OUTPUT_TAG not in result
+        assert "Truncated" in result
+
+    def test_env_execute_exception_falls_back(self):
+        env = MagicMock()
+        env.execute.side_effect = RuntimeError("connection lost")
+        content = "x" * 60_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_exc",
+            env=env,
+            threshold=30_000,
+        )
+        assert "Truncated" in result
+
+    def test_read_file_never_persisted(self):
+        """read_file has threshold=inf, should never be persisted."""
+        env = MagicMock()
+        content = "x" * 200_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="read_file",
+            tool_use_id="tc_rf",
+            env=env,
+            threshold=float("inf"),
+        )
+        assert result == content
+        env.execute.assert_not_called()
+
+    def test_uses_registry_threshold_when_not_provided(self):
+        """When threshold=None, looks up from registry."""
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        content = "x" * 60_000
+
+        mock_registry = MagicMock()
+        mock_registry.get_max_result_size.return_value = 30_000
+
+        with patch("tools.registry.registry", mock_registry):
+            result = maybe_persist_tool_result(
+                content=content,
+                tool_name="terminal",
+                tool_use_id="tc_reg",
+                env=env,
+                threshold=None,
+            )
+        # Should have persisted since 60K > 30K
+        assert PERSISTED_OUTPUT_TAG in result or "Truncated" in result
+
+    def test_unicode_content_survives(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        content = "日本語テスト " * 10_000  # ~60K chars of unicode
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_uni",
+            env=env,
+            threshold=30_000,
+        )
+        assert PERSISTED_OUTPUT_TAG in result
+        # Preview should contain unicode
+        assert "日本語テスト" in result
+
+    def test_empty_content_returns_unchanged(self):
+        result = maybe_persist_tool_result(
+            content="",
+            tool_name="terminal",
+            tool_use_id="tc_empty",
+            env=None,
+            threshold=30_000,
+        )
+        assert result == ""
+
+    def test_whitespace_only_below_threshold(self):
+        content = " " * 100
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_ws",
+            env=None,
+            threshold=30_000,
+        )
+        assert result == content
+
+    def test_file_path_uses_tool_use_id(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        content = "x" * 60_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="unique_id_abc",
+            env=env,
+            threshold=30_000,
+        )
+        assert "unique_id_abc.txt" in result
+
+    def test_preview_included_in_persisted_output(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        # Create content with a distinctive start
+        content = "DISTINCTIVE_START_MARKER" + "x" * 60_000
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_prev",
+            env=env,
+            threshold=30_000,
+        )
+        assert "DISTINCTIVE_START_MARKER" in result
+
+    def test_threshold_zero_forces_persist(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        content = "even short content"
+        result = maybe_persist_tool_result(
+            content=content,
+            tool_name="terminal",
+            tool_use_id="tc_zero",
+            env=env,
+            threshold=0,
+        )
+        # Any non-empty content with threshold=0 should be persisted
+        assert PERSISTED_OUTPUT_TAG in result
+
+
+# ── enforce_turn_budget ───────────────────────────────────────────────
+
+class TestEnforceTurnBudget:
+    def test_under_budget_no_changes(self):
+        msgs = [
+            {"role": "tool", "tool_call_id": "t1", "content": "small"},
+            {"role": "tool", "tool_call_id": "t2", "content": "also small"},
+        ]
+        result = enforce_turn_budget(msgs, env=None, budget=200_000)
+        assert result[0]["content"] == "small"
+        assert result[1]["content"] == "also small"
+
+    def test_over_budget_largest_persisted_first(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        msgs = [
+            {"role": "tool", "tool_call_id": "t1", "content": "a" * 80_000},
+            {"role": "tool", "tool_call_id": "t2", "content": "b" * 130_000},
+        ]
+        # Total 210K > 200K budget
+        enforce_turn_budget(msgs, env=env, budget=200_000)
+        # The larger one (130K) should be persisted first
+        assert PERSISTED_OUTPUT_TAG in msgs[1]["content"]
+
+    def test_already_persisted_results_skipped(self):
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        msgs = [
+            {"role": "tool", "tool_call_id": "t1",
+             "content": f"{PERSISTED_OUTPUT_TAG}\nalready persisted\n{PERSISTED_OUTPUT_CLOSING_TAG}"},
+            {"role": "tool", "tool_call_id": "t2", "content": "x" * 250_000},
+        ]
+        enforce_turn_budget(msgs, env=env, budget=200_000)
+        # t1 should be untouched (already persisted)
+        assert msgs[0]["content"].startswith(PERSISTED_OUTPUT_TAG)
+        # t2 should be persisted
+        assert PERSISTED_OUTPUT_TAG in msgs[1]["content"]
+
+    def test_medium_result_regression(self):
+        """6 results of 42K chars each (252K total) — each under 50K default
+        threshold but aggregate exceeds 200K budget. L3 should persist."""
+        env = MagicMock()
+        env.execute.return_value = {"output": "", "returncode": 0}
+        msgs = [
+            {"role": "tool", "tool_call_id": f"t{i}", "content": "x" * 42_000}
+            for i in range(6)
+        ]
+        enforce_turn_budget(msgs, env=env, budget=200_000)
+        # At least some results should be persisted to get under 200K
+        persisted_count = sum(
+            1 for m in msgs if PERSISTED_OUTPUT_TAG in m["content"]
+        )
+        assert persisted_count >= 2  # Need to shed at least ~52K
+
+    def test_no_env_falls_back_to_truncation(self):
+        msgs = [
+            {"role": "tool", "tool_call_id": "t1", "content": "x" * 250_000},
+        ]
+        enforce_turn_budget(msgs, env=None, budget=200_000)
+        # Should be truncated (no sandbox available)
+        assert "Truncated" in msgs[0]["content"] or PERSISTED_OUTPUT_TAG in msgs[0]["content"]
+
+    def test_returns_same_list(self):
+        msgs = [{"role": "tool", "tool_call_id": "t1", "content": "ok"}]
+        result = enforce_turn_budget(msgs, env=None, budget=200_000)
+        assert result is msgs
+
+    def test_empty_messages(self):
+        result = enforce_turn_budget([], env=None, budget=200_000)
+        assert result == []
+
+
+# ── Per-tool threshold integration ────────────────────────────────────
+
+class TestPerToolThresholds:
+    """Verify registry wiring for per-tool thresholds."""
+
+    def test_registry_has_get_max_result_size(self):
+        from tools.registry import registry
+        assert hasattr(registry, "get_max_result_size")
+
+    def test_default_threshold(self):
+        from tools.registry import registry
+        # Unknown tool should return the default
+        val = registry.get_max_result_size("nonexistent_tool_xyz")
+        assert val == DEFAULT_MAX_RESULT_SIZE_CHARS
+
+    def test_terminal_threshold(self):
+        from tools.registry import registry
+        # Trigger import of terminal_tool to register the tool
+        try:
+            import tools.terminal_tool  # noqa: F401
+            val = registry.get_max_result_size("terminal")
+            assert val == 30_000
+        except ImportError:
+            pytest.skip("terminal_tool not importable in test env")
+
+    def test_read_file_never_persisted(self):
+        from tools.registry import registry
+        try:
+            import tools.file_tools  # noqa: F401
+            val = registry.get_max_result_size("read_file")
+            assert val == float("inf")
+        except ImportError:
+            pytest.skip("file_tools not importable in test env")
+
+    def test_search_files_threshold(self):
+        from tools.registry import registry
+        try:
+            import tools.file_tools  # noqa: F401
+            val = registry.get_max_result_size("search_files")
+            assert val == 20_000
+        except ImportError:
+            pytest.skip("file_tools not importable in test env")
diff --git a/tools/binary_extensions.py b/tools/binary_extensions.py
new file mode 100644
index 00000000..f7e63bda
--- /dev/null
+++ b/tools/binary_extensions.py
@@ -0,0 +1,42 @@
+"""Binary file extensions to skip for text-based operations.
+
+These files can't be meaningfully compared as text and are often large.
+Ported from free-code src/constants/files.ts.
+"""
+
+BINARY_EXTENSIONS = frozenset({
+    # Images
+    ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp", ".tiff", ".tif",
+    # Videos
+    ".mp4", ".mov", ".avi", ".mkv", ".webm", ".wmv", ".flv", ".m4v", ".mpeg", ".mpg",
+    # Audio
+    ".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma", ".aiff", ".opus",
+    # Archives
+    ".zip", ".tar", ".gz", ".bz2", ".7z", ".rar", ".xz", ".z", ".tgz", ".iso",
+    # Executables/binaries
+    ".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib",
+    ".app", ".msi", ".deb", ".rpm",
+    # Documents (PDF is here; read_file excludes it at the call site)
+    ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
+    ".odt", ".ods", ".odp",
+    # Fonts
+    ".ttf", ".otf", ".woff", ".woff2", ".eot",
+    # Bytecode / VM artifacts
+    ".pyc", ".pyo", ".class", ".jar", ".war", ".ear", ".node", ".wasm", ".rlib",
+    # Database files
+    ".sqlite", ".sqlite3", ".db", ".mdb", ".idx",
+    # Design / 3D
+    ".psd", ".ai", ".eps", ".sketch", ".fig", ".xd", ".blend", ".3ds", ".max",
+    # Flash
+    ".swf", ".fla",
+    # Lock/profiling data
+    ".lockb", ".dat", ".data",
+})
+
+
+def has_binary_extension(path: str) -> bool:
+    """Check if a file path has a binary extension. Pure string check, no I/O."""
+    dot = path.rfind(".")
+    if dot == -1:
+        return False
+    return path[dot:].lower() in BINARY_EXTENSIONS
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index 77be5569..f48c4b99 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -1343,4 +1343,5 @@ registry.register(
         enabled_tools=kw.get("enabled_tools")),
     check_fn=check_sandbox_requirements,
     emoji="🐍",
+    max_result_size_chars=30_000,
 )
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 43e40315..265c9ed2 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -7,6 +7,7 @@ import logging
 import os
 import threading
 from pathlib import Path
+from tools.binary_extensions import has_binary_extension
 from tools.file_operations import ShellFileOperations
 from agent.redact import redact_sensitive_text
 
@@ -290,11 +291,24 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                 ),
             })
 
+        # Resolve path once for all guards below
+        import pathlib as _pathlib
+        _resolved = _pathlib.Path(path).expanduser().resolve()
+
+        # ── Binary file guard ─────────────────────────────────────────
+        # Block binary files by extension (no I/O).
+        if has_binary_extension(str(_resolved)):
+            _ext = _resolved.suffix.lower()
+            return json.dumps({
+                "error": (
+                    f"Cannot read binary file '{path}' ({_ext}). "
+                    "Use vision_analyze for images, or terminal to inspect binary files."
+                ),
+            })
+
         # ── Hermes internal path guard ────────────────────────────────
         # Prevent prompt injection via catalog or hub metadata files.
-        import pathlib as _pathlib
         from hermes_constants import get_hermes_home as _get_hh
-        _resolved = _pathlib.Path(path).expanduser().resolve()
         _hermes_home = _get_hh().resolve()
         _blocked_dirs = [
             _hermes_home / "skills" / ".hub" / "index-cache",
@@ -313,6 +327,27 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
             except ValueError:
                 pass
 
+        # ── Pre-read file size guard ──────────────────────────────────
+        # Stat the file before reading.  If it's large and the model
+        # didn't request a narrow range, block and tell it to use
+        # offset/limit — cheaper than reading 200K chars then rejecting.
+        _PRE_READ_MAX_BYTES = 100_000
+        _NARROW_LIMIT = 200
+        try:
+            _fsize = os.path.getsize(str(_resolved))
+        except OSError:
+            _fsize = 0
+        if _fsize > _PRE_READ_MAX_BYTES and limit > _NARROW_LIMIT:
+            return json.dumps({
+                "error": (
+                    f"File is too large to read in full ({_fsize:,} bytes). "
+                    f"Use offset and limit parameters to read specific sections "
+                    f"(e.g. offset=1, limit=100 for the first 100 lines)."
+                ),
+                "path": path,
+                "file_size": _fsize,
+            }, ensure_ascii=False)
+
         # ── Dedup check ───────────────────────────────────────────────
         # If we already read this exact (path, offset, limit) and the
         # file hasn't been modified since, return a lightweight stub
@@ -726,7 +761,7 @@ def _check_file_reqs():
 
 READ_FILE_SCHEMA = {
     "name": "read_file",
-    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
+    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 100KB will be rejected unless you specify a narrow range (limit <= 200). NOTE: Cannot read images or binary files — use vision_analyze for images.",
     "parameters": {
         "type": "object",
         "properties": {
@@ -817,7 +852,7 @@ def _handle_search_files(args, **kw):
         output_mode=args.get("output_mode", "content"), context=args.get("context", 0), task_id=tid)
 
 
-registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖")
-registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️")
-registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧")
-registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎")
+registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=float('inf'))
+registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️", max_result_size_chars=100_000)
+registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧", max_result_size_chars=100_000)
+registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=20_000)
diff --git a/tools/registry.py b/tools/registry.py
index 079052a3..c01c60c0 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -27,10 +27,12 @@ class ToolEntry:
     __slots__ = (
         "name", "toolset", "schema", "handler", "check_fn",
         "requires_env", "is_async", "description", "emoji",
+        "max_result_size_chars",
     )
 
     def __init__(self, name, toolset, schema, handler, check_fn,
-                 requires_env, is_async, description, emoji):
+                 requires_env, is_async, description, emoji,
+                 max_result_size_chars=None):
         self.name = name
         self.toolset = toolset
         self.schema = schema
@@ -40,6 +42,7 @@ class ToolEntry:
         self.is_async = is_async
         self.description = description
         self.emoji = emoji
+        self.max_result_size_chars = max_result_size_chars
 
 
 class ToolRegistry:
@@ -64,6 +67,7 @@ class ToolRegistry:
         is_async: bool = False,
         description: str = "",
         emoji: str = "",
+        max_result_size_chars: int | float | None = None,
     ):
         """Register a tool.  Called at module-import time by each tool file."""
         existing = self._tools.get(name)
@@ -83,6 +87,7 @@ class ToolRegistry:
             is_async=is_async,
             description=description or schema.get("description", ""),
             emoji=emoji,
+            max_result_size_chars=max_result_size_chars,
         )
         if check_fn and toolset not in self._toolset_checks:
             self._toolset_checks[toolset] = check_fn
@@ -164,6 +169,14 @@ class ToolRegistry:
     # Query helpers  (replace redundant dicts in model_tools.py)
     # ------------------------------------------------------------------
 
+    def get_max_result_size(self, name: str) -> int | float:
+        """Return per-tool max result size, or global default."""
+        from tools.tool_result_storage import DEFAULT_MAX_RESULT_SIZE_CHARS
+        entry = self._tools.get(name)
+        if entry and entry.max_result_size_chars is not None:
+            return entry.max_result_size_chars
+        return DEFAULT_MAX_RESULT_SIZE_CHARS
+
     def get_all_tool_names(self) -> List[str]:
         """Return sorted list of all registered tool names."""
         return sorted(self._tools.keys())
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index 32f1bd3b..ff9e064b 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -811,6 +811,12 @@ def _stop_cleanup_thread():
             pass
 
 
+def get_active_env(task_id: str):
+    """Return the active BaseEnvironment for *task_id*, or None."""
+    with _env_lock:
+        return _active_environments.get(task_id)
+
+
 def get_active_environments_info() -> Dict[str, Any]:
     """Get information about currently active environments."""
     info = {
@@ -1617,4 +1623,5 @@ registry.register(
     handler=_handle_terminal,
     check_fn=check_terminal_requirements,
     emoji="💻",
+    max_result_size_chars=30_000,
 )
diff --git a/tools/tool_result_storage.py b/tools/tool_result_storage.py
new file mode 100644
index 00000000..c478431b
--- /dev/null
+++ b/tools/tool_result_storage.py
@@ -0,0 +1,223 @@
+"""Tool result persistence -- preserves large outputs instead of truncating.
+
+Defense against context-window overflow operates at three levels:
+
+1. **Per-tool output cap** (inside each tool): Tools like search_files
+   pre-truncate their own output before returning. This is the first line
+   of defense and the only one the tool author controls.
+
+2. **Per-result persistence** (maybe_persist_tool_result): After a tool
+   returns, if its output exceeds the tool's registered threshold
+   (registry.get_max_result_size), the full output is written INTO THE
+   SANDBOX at /tmp/hermes-results/{tool_use_id}.txt via env.execute().
+   The in-context content is replaced with a preview + file path reference.
+   The model can read_file to access the full output on any backend.
+
+3. **Per-turn aggregate budget** (enforce_turn_budget): After all tool
+   results in a single assistant turn are collected, if the total exceeds
+   MAX_TURN_BUDGET_CHARS (200K), the largest non-persisted results are
+   spilled to disk until the aggregate is under budget. This catches cases
+   where many medium-sized results combine to overflow context.
+"""
+
+import json
+import logging
+import uuid
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MAX_RESULT_SIZE_CHARS: int = 50_000
+MAX_TURN_BUDGET_CHARS: int = 200_000
+PREVIEW_SIZE_CHARS: int = 2_000
+PERSISTED_OUTPUT_TAG = "<persisted-output>"
+PERSISTED_OUTPUT_CLOSING_TAG = "</persisted-output>"
+STORAGE_DIR = "/tmp/hermes-results"
+HEREDOC_MARKER = "HERMES_PERSIST_EOF"
+_BUDGET_TOOL_NAME = "__budget_enforcement__"
+
+
+def generate_preview(content: str, max_chars: int = PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
+    """Truncate at last newline within max_chars. Returns (preview, has_more)."""
+    if len(content) <= max_chars:
+        return content, False
+    truncated = content[:max_chars]
+    last_nl = truncated.rfind("\n")
+    if last_nl > max_chars // 2:
+        truncated = truncated[:last_nl + 1]
+    return truncated, True
+
+
+def _heredoc_marker(content: str) -> str:
+    """Return a heredoc delimiter that doesn't collide with content."""
+    if HEREDOC_MARKER not in content:
+        return HEREDOC_MARKER
+    return f"HERMES_PERSIST_{uuid.uuid4().hex[:8]}"
+
+
+def _extract_raw_output(content: str) -> str:
+    """Extract the 'output' field from JSON tool results for cleaner persistence.
+
+    Tool handlers return json.dumps({"output": ..., "exit_code": ...}) for the
+    API, but persisted files should contain readable text, not a JSON blob.
+    """
+    try:
+        data = json.loads(content)
+        if isinstance(data, dict) and "output" in data:
+            return data["output"]
+    except (json.JSONDecodeError, TypeError):
+        pass
+    return content
+
+
+def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
+    """Write content into the sandbox via env.execute(). Returns True on success."""
+    marker = _heredoc_marker(content)
+    cmd = (
+        f"mkdir -p {STORAGE_DIR} && cat > {remote_path} << '{marker}'\n"
+        f"{content}\n"
+        f"{marker}"
+    )
+    result = env.execute(cmd, timeout=30)
+    return result.get("returncode", 1) == 0
+
+
+def _build_persisted_message(
+    preview: str,
+    has_more: bool,
+    original_size: int,
+    file_path: str,
+) -> str:
+    """Build the <persisted-output> replacement block."""
+    size_kb = original_size / 1024
+    if size_kb >= 1024:
+        size_str = f"{size_kb / 1024:.1f} MB"
+    else:
+        size_str = f"{size_kb:.1f} KB"
+
+    msg = f"{PERSISTED_OUTPUT_TAG}\n"
+    msg += f"This tool result was too large ({original_size:,} characters, {size_str}).\n"
+    msg += f"Full output saved to: {file_path}\n"
+    msg += "Use the read_file tool with offset and limit to access specific sections of this output.\n\n"
+    msg += f"Preview (first {len(preview)} chars):\n"
+    msg += preview
+    if has_more:
+        msg += "\n..."
+    msg += f"\n{PERSISTED_OUTPUT_CLOSING_TAG}"
+    return msg
+
+
+def maybe_persist_tool_result(
+    content: str,
+    tool_name: str,
+    tool_use_id: str,
+    env=None,
+    threshold: int | float | None = None,
+) -> str:
+    """Layer 2: persist oversized result into the sandbox, return preview + path.
+
+    Writes via env.execute() so the file is accessible from any backend
+    (local, Docker, SSH, Modal, Daytona). Falls back to inline truncation
+    if write fails or no env is available.
+
+    Args:
+        content: Raw tool result string.
+        tool_name: Name of the tool (used for threshold lookup).
+        tool_use_id: Unique ID for this tool call (used as filename).
+        env: The active BaseEnvironment instance, or None.
+        threshold: Override threshold; if None, looked up from registry.
+
+    Returns:
+        Original content if small, or <persisted-output> replacement.
+    """
+    if threshold is None:
+        from tools.registry import registry
+        threshold = registry.get_max_result_size(tool_name)
+
+    # Infinity means never persist (e.g. read_file)
+    if threshold == float("inf"):
+        return content
+
+    if len(content) <= threshold:
+        return content
+
+    remote_path = f"{STORAGE_DIR}/{tool_use_id}.txt"
+    # Write raw output (not JSON wrapper) so read_file returns readable text
+    file_content = _extract_raw_output(content)
+    preview, has_more = generate_preview(file_content)
+
+    # Try writing into the sandbox
+    if env is not None:
+        try:
+            if _write_to_sandbox(file_content, remote_path, env):
+                logger.info(
+                    "Persisted large tool result: %s (%s, %d chars -> %s)",
+                    tool_name, tool_use_id, len(content), remote_path,
+                )
+                return _build_persisted_message(preview, has_more, len(content), remote_path)
+        except Exception as exc:
+            logger.warning("Sandbox write failed for %s: %s", tool_use_id, exc)
+
+    # Fallback: inline truncation (no sandbox available or write failed)
+    logger.info(
+        "Inline-truncating large tool result: %s (%d chars, no sandbox write)",
+        tool_name, len(content),
+    )
+    return (
+        f"{preview}\n\n"
+        f"[Truncated: tool response was {len(content):,} chars. "
+        f"Full output could not be saved to sandbox.]"
+    )
+
+
+def enforce_turn_budget(
+    tool_messages: list[dict],
+    env=None,
+    budget: int = MAX_TURN_BUDGET_CHARS,
+) -> list[dict]:
+    """Layer 3: enforce aggregate budget across all tool results in a turn.
+
+    If total chars exceed budget, persist the largest non-persisted results
+    first (via sandbox write) until under budget. Already-persisted results
+    are skipped.
+
+    Mutates the list in-place and returns it.
+    """
+    candidates = []
+    total_size = 0
+    for i, msg in enumerate(tool_messages):
+        content = msg.get("content", "")
+        size = len(content)
+        total_size += size
+        if PERSISTED_OUTPUT_TAG not in content:
+            candidates.append((i, size))
+
+    if total_size <= budget:
+        return tool_messages
+
+    # Sort candidates by size descending — persist largest first
+    candidates.sort(key=lambda x: x[1], reverse=True)
+
+    for idx, size in candidates:
+        if total_size <= budget:
+            break
+        msg = tool_messages[idx]
+        content = msg["content"]
+        tool_use_id = msg.get("tool_call_id", f"budget_{idx}")
+
+        replacement = maybe_persist_tool_result(
+            content=content,
+            tool_name=_BUDGET_TOOL_NAME,
+            tool_use_id=tool_use_id,
+            env=env,
+            threshold=0,
+        )
+        if replacement != content:
+            total_size -= size
+            total_size += len(replacement)
+            tool_messages[idx]["content"] = replacement
+            logger.info(
+                "Budget enforcement: persisted tool result %s (%d chars)",
+                tool_use_id, size,
+            )
+
+    return tool_messages
diff --git a/tools/web_tools.py b/tools/web_tools.py
index 803a09c0..f743c427 100644
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -2085,6 +2085,7 @@ registry.register(
     check_fn=check_web_api_key,
     requires_env=_web_requires_env(),
     emoji="🔍",
+    max_result_size_chars=100_000,
 )
 registry.register(
     name="web_extract",
@@ -2096,4 +2097,5 @@ registry.register(
     requires_env=_web_requires_env(),
     is_async=True,
     emoji="📄",
+    max_result_size_chars=100_000,
 )

From 77c5bc9da9af185ba844ca079868fe8178247600 Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Tue, 7 Apr 2026 22:31:06 -0700
Subject: [PATCH 125/154] feat(budget): make tool result persistence thresholds
 configurable

Add BudgetConfig dataclass to centralize and make overridable the
hardcoded constants (50K per-result, 200K per-turn, 2K preview) that
control when tool outputs get persisted to sandbox. Configurable at
the RL environment level via HermesAgentEnvConfig fields, threaded
through HermesAgentLoop to the storage layer.

Resolution: pinned (read_file=inf) > env config overrides > registry
per-tool > default. CLI override: --env.turn_budget_chars 80000
---
 environments/agent_loop.py                    | 15 +++++-
 environments/agentic_opd_env.py               |  1 +
 .../terminalbench_2/terminalbench2_env.py     |  2 +
 .../benchmarks/yc_bench/yc_bench_env.py       |  1 +
 environments/hermes_base_env.py               | 44 ++++++++++++++++
 environments/web_research_env.py              |  1 +
 tools/budget_config.py                        | 52 +++++++++++++++++++
 tools/registry.py                             |  8 +--
 tools/tool_result_storage.py                  | 16 ++++--
 9 files changed, 131 insertions(+), 9 deletions(-)
 create mode 100644 tools/budget_config.py

diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index ba2db0b5..cbf9c774 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -140,6 +140,7 @@ class HermesAgentLoop:
         temperature: float = 1.0,
         max_tokens: Optional[int] = None,
         extra_body: Optional[Dict[str, Any]] = None,
+        budget_config: Optional["BudgetConfig"] = None,
     ):
         """
         Initialize the agent loop.
@@ -156,7 +157,11 @@ class HermesAgentLoop:
             extra_body: Extra parameters passed to the OpenAI client's create() call.
                         Used for OpenRouter provider preferences, transforms, etc.
                         e.g. {"provider": {"ignore": ["DeepInfra"]}}
+            budget_config: Tool result persistence budget. Controls per-tool
+                        thresholds, per-turn aggregate budget, and preview size.
+                        If None, uses DEFAULT_BUDGET (current hardcoded values).
         """
+        from tools.budget_config import DEFAULT_BUDGET
         self.server = server
         self.tool_schemas = tool_schemas
         self.valid_tool_names = valid_tool_names
@@ -165,6 +170,7 @@ class HermesAgentLoop:
         self.temperature = temperature
         self.max_tokens = max_tokens
         self.extra_body = extra_body
+        self.budget_config = budget_config or DEFAULT_BUDGET
 
     async def run(self, messages: List[Dict[str, Any]]) -> AgentResult:
         """
@@ -455,6 +461,8 @@ class HermesAgentLoop:
                             tool_name=tool_name,
                             tool_use_id=tc_id,
                             env=get_active_env(self.task_id),
+                            threshold=self.budget_config.resolve_threshold(tool_name),
+                            preview_size=self.budget_config.preview_size,
                         )
                     except Exception:
                         pass  # Persistence is best-effort in eval path
@@ -470,7 +478,12 @@ class HermesAgentLoop:
                 try:
                     num_tcs = len(assistant_msg.tool_calls)
                     if num_tcs > 0:
-                        enforce_turn_budget(messages[-num_tcs:], env=get_active_env(self.task_id))
+                        enforce_turn_budget(
+                            messages[-num_tcs:],
+                            env=get_active_env(self.task_id),
+                            budget=self.budget_config.turn_budget,
+                            preview_size=self.budget_config.preview_size,
+                        )
                 except Exception:
                     pass
 
diff --git a/environments/agentic_opd_env.py b/environments/agentic_opd_env.py
index b9627123..44311f55 100644
--- a/environments/agentic_opd_env.py
+++ b/environments/agentic_opd_env.py
@@ -1048,6 +1048,7 @@ class AgenticOPDEnv(HermesAgentBaseEnv):
                     temperature=0.0,
                     max_tokens=self.config.max_token_length,
                     extra_body=self.config.extra_body,
+                    budget_config=self.config.build_budget_config(),
                 )
                 result = await agent.run(messages)
 
diff --git a/environments/benchmarks/terminalbench_2/terminalbench2_env.py b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
index 2f0d9262..c7eaff6c 100644
--- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py
+++ b/environments/benchmarks/terminalbench_2/terminalbench2_env.py
@@ -541,6 +541,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
                         temperature=self.config.agent_temperature,
                         max_tokens=self.config.max_token_length,
                         extra_body=self.config.extra_body,
+                        budget_config=self.config.build_budget_config(),
                     )
                     result = await agent.run(messages)
             else:
@@ -553,6 +554,7 @@ class TerminalBench2EvalEnv(HermesAgentBaseEnv):
                     temperature=self.config.agent_temperature,
                     max_tokens=self.config.max_token_length,
                     extra_body=self.config.extra_body,
+                    budget_config=self.config.build_budget_config(),
                 )
                 result = await agent.run(messages)
 
diff --git a/environments/benchmarks/yc_bench/yc_bench_env.py b/environments/benchmarks/yc_bench/yc_bench_env.py
index 5b6bf9ad..4247ae56 100644
--- a/environments/benchmarks/yc_bench/yc_bench_env.py
+++ b/environments/benchmarks/yc_bench/yc_bench_env.py
@@ -549,6 +549,7 @@ class YCBenchEvalEnv(HermesAgentBaseEnv):
                 temperature=self.config.agent_temperature,
                 max_tokens=self.config.max_token_length,
                 extra_body=self.config.extra_body,
+                budget_config=self.config.build_budget_config(),
             )
             result = await agent.run(messages)
 
diff --git a/environments/hermes_base_env.py b/environments/hermes_base_env.py
index 651722ff..ededab35 100644
--- a/environments/hermes_base_env.py
+++ b/environments/hermes_base_env.py
@@ -62,6 +62,11 @@ from atroposlib.type_definitions import Item
 
 from environments.agent_loop import AgentResult, HermesAgentLoop
 from environments.tool_context import ToolContext
+from tools.budget_config import (
+    DEFAULT_RESULT_SIZE_CHARS,
+    DEFAULT_TURN_BUDGET_CHARS,
+    DEFAULT_PREVIEW_SIZE_CHARS,
+)
 
 # Import hermes-agent toolset infrastructure
 from model_tools import get_tool_definitions
@@ -160,6 +165,32 @@ class HermesAgentEnvConfig(BaseEnvConfig):
         "Options: hermes, mistral, llama3_json, qwen, deepseek_v3, etc.",
     )
 
+    # --- Tool result budget ---
+    # Defaults imported from tools.budget_config (single source of truth).
+    default_result_size_chars: int = Field(
+        default=DEFAULT_RESULT_SIZE_CHARS,
+        description="Default per-tool threshold (chars) for persisting large results "
+        "to sandbox. Results exceeding this are written to /tmp/hermes-results/ "
+        "and replaced with a preview. Per-tool registry values take precedence "
+        "unless overridden via tool_result_overrides.",
+    )
+    turn_budget_chars: int = Field(
+        default=DEFAULT_TURN_BUDGET_CHARS,
+        description="Aggregate char budget per assistant turn. If all tool results "
+        "in a single turn exceed this, the largest are persisted to disk first.",
+    )
+    preview_size_chars: int = Field(
+        default=DEFAULT_PREVIEW_SIZE_CHARS,
+        description="Size of the inline preview shown after a tool result is persisted.",
+    )
+    tool_result_overrides: Optional[Dict[str, int]] = Field(
+        default=None,
+        description="Per-tool threshold overrides (chars). Keys are tool names, "
+        "values are char thresholds. Overrides both the default and registry "
+        "per-tool values. Example: {'terminal': 10000, 'search_files': 5000}. "
+        "Note: read_file is pinned to infinity and cannot be overridden.",
+    )
+
     # --- Provider-specific parameters ---
     # Passed as extra_body to the OpenAI client's chat.completions.create() call.
     # Useful for OpenRouter provider preferences, transforms, route settings, etc.
@@ -176,6 +207,16 @@ class HermesAgentEnvConfig(BaseEnvConfig):
         "transforms, and other provider-specific settings.",
     )
 
+    def build_budget_config(self):
+        """Build a BudgetConfig from env config fields."""
+        from tools.budget_config import BudgetConfig
+        return BudgetConfig(
+            default_result_size=self.default_result_size_chars,
+            turn_budget=self.turn_budget_chars,
+            preview_size=self.preview_size_chars,
+            tool_overrides=dict(self.tool_result_overrides) if self.tool_result_overrides else {},
+        )
+
 
 class HermesAgentBaseEnv(BaseEnv):
     """
@@ -490,6 +531,7 @@ class HermesAgentBaseEnv(BaseEnv):
                         temperature=self.config.agent_temperature,
                         max_tokens=self.config.max_token_length,
                         extra_body=self.config.extra_body,
+                        budget_config=self.config.build_budget_config(),
                     )
                     result = await agent.run(messages)
             except NotImplementedError:
@@ -507,6 +549,7 @@ class HermesAgentBaseEnv(BaseEnv):
                     temperature=self.config.agent_temperature,
                     max_tokens=self.config.max_token_length,
                     extra_body=self.config.extra_body,
+                    budget_config=self.config.build_budget_config(),
                 )
                 result = await agent.run(messages)
         else:
@@ -520,6 +563,7 @@ class HermesAgentBaseEnv(BaseEnv):
                 temperature=self.config.agent_temperature,
                 max_tokens=self.config.max_token_length,
                 extra_body=self.config.extra_body,
+                budget_config=self.config.build_budget_config(),
             )
             result = await agent.run(messages)
 
diff --git a/environments/web_research_env.py b/environments/web_research_env.py
index b234159f..c637a7cb 100644
--- a/environments/web_research_env.py
+++ b/environments/web_research_env.py
@@ -472,6 +472,7 @@ class WebResearchEnv(HermesAgentBaseEnv):
                     temperature=0.0,  # Deterministic for eval
                     max_tokens=self.config.max_token_length,
                     extra_body=self.config.extra_body,
+                    budget_config=self.config.build_budget_config(),
                 )
                 result = await agent.run(messages)
 
diff --git a/tools/budget_config.py b/tools/budget_config.py
new file mode 100644
index 00000000..52204cdf
--- /dev/null
+++ b/tools/budget_config.py
@@ -0,0 +1,52 @@
+"""Configurable budget constants for tool result persistence.
+
+Overridable at the RL environment level via HermesAgentEnvConfig fields.
+Per-tool resolution: pinned > config overrides > registry > default.
+"""
+
+from dataclasses import dataclass, field
+from typing import Dict
+
+# Tools whose thresholds must never be overridden.
+# read_file=inf prevents infinite persist->read->persist loops.
+PINNED_THRESHOLDS: Dict[str, float] = {
+    "read_file": float("inf"),
+}
+
+# Defaults matching the current hardcoded values in tool_result_storage.py.
+# Kept here as the single source of truth; tool_result_storage.py imports these.
+DEFAULT_RESULT_SIZE_CHARS: int = 50_000
+DEFAULT_TURN_BUDGET_CHARS: int = 200_000
+DEFAULT_PREVIEW_SIZE_CHARS: int = 2_000
+
+
+@dataclass(frozen=True)
+class BudgetConfig:
+    """Immutable budget constants for the 3-layer tool result persistence system.
+
+    Layer 2 (per-result): resolve_threshold(tool_name) -> threshold in chars.
+    Layer 3 (per-turn):   turn_budget -> aggregate char budget across all tool
+                          results in a single assistant turn.
+    Preview:              preview_size -> inline snippet size after persistence.
+    """
+
+    default_result_size: int = DEFAULT_RESULT_SIZE_CHARS
+    turn_budget: int = DEFAULT_TURN_BUDGET_CHARS
+    preview_size: int = DEFAULT_PREVIEW_SIZE_CHARS
+    tool_overrides: Dict[str, int] = field(default_factory=dict)
+
+    def resolve_threshold(self, tool_name: str) -> int | float:
+        """Resolve the persistence threshold for a tool.
+
+        Priority: pinned -> tool_overrides -> registry per-tool -> default.
+        """
+        if tool_name in PINNED_THRESHOLDS:
+            return PINNED_THRESHOLDS[tool_name]
+        if tool_name in self.tool_overrides:
+            return self.tool_overrides[tool_name]
+        from tools.registry import registry
+        return registry.get_max_result_size(tool_name, default=self.default_result_size)
+
+
+# Default config -- matches current hardcoded behavior exactly.
+DEFAULT_BUDGET = BudgetConfig()
diff --git a/tools/registry.py b/tools/registry.py
index c01c60c0..9437a6b4 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -169,12 +169,14 @@ class ToolRegistry:
     # Query helpers  (replace redundant dicts in model_tools.py)
     # ------------------------------------------------------------------
 
-    def get_max_result_size(self, name: str) -> int | float:
-        """Return per-tool max result size, or global default."""
-        from tools.tool_result_storage import DEFAULT_MAX_RESULT_SIZE_CHARS
+    def get_max_result_size(self, name: str, default: int | float | None = None) -> int | float:
+        """Return per-tool max result size, or *default* (or global default)."""
         entry = self._tools.get(name)
         if entry and entry.max_result_size_chars is not None:
             return entry.max_result_size_chars
+        if default is not None:
+            return default
+        from tools.tool_result_storage import DEFAULT_MAX_RESULT_SIZE_CHARS
         return DEFAULT_MAX_RESULT_SIZE_CHARS
 
     def get_all_tool_names(self) -> List[str]:
diff --git a/tools/tool_result_storage.py b/tools/tool_result_storage.py
index c478431b..8b2abb91 100644
--- a/tools/tool_result_storage.py
+++ b/tools/tool_result_storage.py
@@ -24,11 +24,13 @@ import json
 import logging
 import uuid
 
-logger = logging.getLogger(__name__)
+from tools.budget_config import (
+    DEFAULT_RESULT_SIZE_CHARS as DEFAULT_MAX_RESULT_SIZE_CHARS,
+    DEFAULT_TURN_BUDGET_CHARS as MAX_TURN_BUDGET_CHARS,
+    DEFAULT_PREVIEW_SIZE_CHARS as PREVIEW_SIZE_CHARS,
+)
 
-DEFAULT_MAX_RESULT_SIZE_CHARS: int = 50_000
-MAX_TURN_BUDGET_CHARS: int = 200_000
-PREVIEW_SIZE_CHARS: int = 2_000
+logger = logging.getLogger(__name__)
 PERSISTED_OUTPUT_TAG = "<persisted-output>"
 PERSISTED_OUTPUT_CLOSING_TAG = "</persisted-output>"
 STORAGE_DIR = "/tmp/hermes-results"
@@ -112,6 +114,7 @@ def maybe_persist_tool_result(
     tool_use_id: str,
     env=None,
     threshold: int | float | None = None,
+    preview_size: int = PREVIEW_SIZE_CHARS,
 ) -> str:
     """Layer 2: persist oversized result into the sandbox, return preview + path.
 
@@ -125,6 +128,7 @@ def maybe_persist_tool_result(
         tool_use_id: Unique ID for this tool call (used as filename).
         env: The active BaseEnvironment instance, or None.
         threshold: Override threshold; if None, looked up from registry.
+        preview_size: Max chars for the inline preview after persistence.
 
     Returns:
         Original content if small, or <persisted-output> replacement.
@@ -143,7 +147,7 @@ def maybe_persist_tool_result(
     remote_path = f"{STORAGE_DIR}/{tool_use_id}.txt"
     # Write raw output (not JSON wrapper) so read_file returns readable text
     file_content = _extract_raw_output(content)
-    preview, has_more = generate_preview(file_content)
+    preview, has_more = generate_preview(file_content, max_chars=preview_size)
 
     # Try writing into the sandbox
     if env is not None:
@@ -173,6 +177,7 @@ def enforce_turn_budget(
     tool_messages: list[dict],
     env=None,
     budget: int = MAX_TURN_BUDGET_CHARS,
+    preview_size: int = PREVIEW_SIZE_CHARS,
 ) -> list[dict]:
     """Layer 3: enforce aggregate budget across all tool results in a turn.
 
@@ -210,6 +215,7 @@ def enforce_turn_budget(
             tool_use_id=tool_use_id,
             env=env,
             threshold=0,
+            preview_size=preview_size,
         )
         if replacement != content:
             total_size -= size

From bbcff8dcd05ef16c13e3ed03e021205f4274998b Mon Sep 17 00:00:00 2001
From: alt-glitch <balyan.sid@gmail.com>
Date: Wed, 8 Apr 2026 00:13:41 -0700
Subject: [PATCH 126/154] =?UTF-8?q?fix(tools):=20address=20PR=20review=20?=
 =?UTF-8?q?=E2=80=94=20remove=20=5Fextract=5Fraw=5Foutput,=20BudgetConfig?=
 =?UTF-8?q?=20everywhere,=20read=5Ffile=20hardening?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove _extract_raw_output: persist content verbatim (fixes size mismatch bug)
- Drop import aliases: import from budget_config directly, one canonical name
- BudgetConfig param on maybe_persist_tool_result and enforce_turn_budget
- read_file: limit=None signature, pre-read guard fires only when limit omitted (256KB)
- Unify binary extensions: file_operations.py imports from binary_extensions.py
- Exclude .pdf and .svg from binary set (text-based, agents may inspect)
- Remove redundant outer try/except in eval path (internal fallback handles it)
- Fix broken tests: update assertion strings for new persistence format
- Module-level constants: _PRE_READ_MAX_BYTES, _DEFAULT_READ_LIMIT
- Remove redundant pathlib import (Path already at module level)
- Update spec.md with IMPLEMENTED annotations and design decisions
---
 environments/agent_loop.py              | 36 ++++++---------
 tests/run_agent/test_run_agent.py       |  8 ++--
 tests/tools/test_tool_result_storage.py | 60 ++++++++-----------------
 tools/binary_extensions.py              |  4 +-
 tools/file_operations.py                | 21 +--------
 tools/file_tools.py                     | 51 ++++++++++-----------
 tools/registry.py                       |  4 +-
 tools/tool_result_storage.py            | 57 +++++++----------------
 8 files changed, 83 insertions(+), 158 deletions(-)

diff --git a/environments/agent_loop.py b/environments/agent_loop.py
index cbf9c774..891ce42f 100644
--- a/environments/agent_loop.py
+++ b/environments/agent_loop.py
@@ -455,17 +455,13 @@ class HermesAgentLoop:
                             pass
 
                     tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id
-                    try:
-                        tool_result = maybe_persist_tool_result(
-                            content=tool_result,
-                            tool_name=tool_name,
-                            tool_use_id=tc_id,
-                            env=get_active_env(self.task_id),
-                            threshold=self.budget_config.resolve_threshold(tool_name),
-                            preview_size=self.budget_config.preview_size,
-                        )
-                    except Exception:
-                        pass  # Persistence is best-effort in eval path
+                    tool_result = maybe_persist_tool_result(
+                        content=tool_result,
+                        tool_name=tool_name,
+                        tool_use_id=tc_id,
+                        env=get_active_env(self.task_id),
+                        config=self.budget_config,
+                    )
 
                     messages.append(
                         {
@@ -475,17 +471,13 @@ class HermesAgentLoop:
                         }
                     )
 
-                try:
-                    num_tcs = len(assistant_msg.tool_calls)
-                    if num_tcs > 0:
-                        enforce_turn_budget(
-                            messages[-num_tcs:],
-                            env=get_active_env(self.task_id),
-                            budget=self.budget_config.turn_budget,
-                            preview_size=self.budget_config.preview_size,
-                        )
-                except Exception:
-                    pass
+                num_tcs = len(assistant_msg.tool_calls)
+                if num_tcs > 0:
+                    enforce_turn_budget(
+                        messages[-num_tcs:],
+                        env=get_active_env(self.task_id),
+                        config=self.budget_config,
+                    )
 
                 turn_elapsed = _time.monotonic() - turn_start
                 logger.info(
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 7f6ab4c3..104881a0 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1011,10 +1011,9 @@ class TestExecuteToolCalls:
         big_result = "x" * 150_000
         with patch("run_agent.handle_function_call", return_value=big_result):
             agent._execute_tool_calls(mock_msg, messages, "task-1")
-        # Content should be replaced with preview + file path
+        # Content should be replaced with persisted-output or truncation
         assert len(messages[0]["content"]) < 150_000
-        assert "Large tool response" in messages[0]["content"]
-        assert "Full output saved to:" in messages[0]["content"]
+        assert ("Truncated" in messages[0]["content"] or "<persisted-output>" in messages[0]["content"])
 
 
 class TestConcurrentToolExecution:
@@ -1249,8 +1248,7 @@ class TestConcurrentToolExecution:
         assert len(messages) == 2
         for m in messages:
             assert len(m["content"]) < 150_000
-            assert "Large tool response" in m["content"]
-            assert "Full output saved to:" in m["content"]
+            assert ("Truncated" in m["content"] or "<persisted-output>" in m["content"])
 
     def test_invoke_tool_dispatches_to_handle_function_call(self, agent):
         """_invoke_tool should route regular tools through handle_function_call."""
diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py
index 7c757027..96b904a5 100644
--- a/tests/tools/test_tool_result_storage.py
+++ b/tests/tools/test_tool_result_storage.py
@@ -3,16 +3,18 @@
 import pytest
 from unittest.mock import MagicMock, patch
 
+from tools.budget_config import (
+    DEFAULT_RESULT_SIZE_CHARS,
+    DEFAULT_TURN_BUDGET_CHARS,
+    DEFAULT_PREVIEW_SIZE_CHARS,
+    BudgetConfig,
+)
 from tools.tool_result_storage import (
-    DEFAULT_MAX_RESULT_SIZE_CHARS,
     HEREDOC_MARKER,
-    MAX_TURN_BUDGET_CHARS,
     PERSISTED_OUTPUT_TAG,
     PERSISTED_OUTPUT_CLOSING_TAG,
-    PREVIEW_SIZE_CHARS,
     STORAGE_DIR,
     _build_persisted_message,
-    _extract_raw_output,
     _heredoc_marker,
     _write_to_sandbox,
     enforce_turn_budget,
@@ -56,35 +58,12 @@ class TestGeneratePreview:
         assert has_more is False
 
     def test_exact_boundary(self):
-        text = "x" * PREVIEW_SIZE_CHARS
+        text = "x" * DEFAULT_PREVIEW_SIZE_CHARS
         preview, has_more = generate_preview(text)
         assert preview == text
         assert has_more is False
 
 
-# ── _extract_raw_output ────────────────────────────────────────────────
-
-class TestExtractRawOutput:
-    def test_extracts_output_from_terminal_json(self):
-        import json
-        content = json.dumps({"output": "hello world\nline2", "exit_code": 0, "error": None})
-        assert _extract_raw_output(content) == "hello world\nline2"
-
-    def test_passes_through_non_json(self):
-        assert _extract_raw_output("plain text output") == "plain text output"
-
-    def test_passes_through_json_without_output_key(self):
-        import json
-        content = json.dumps({"result": "something", "status": "ok"})
-        assert _extract_raw_output(content) == content
-
-    def test_extracts_large_output(self):
-        import json
-        big = "x\n" * 30_000
-        content = json.dumps({"output": big, "exit_code": 0, "error": None})
-        assert _extract_raw_output(content) == big
-
-
 # ── _heredoc_marker ───────────────────────────────────────────────────
 
 class TestHeredocMarker:
@@ -206,8 +185,8 @@ class TestMaybePersistToolResult:
         assert len(result) < len(content)
         env.execute.assert_called_once()
 
-    def test_persists_raw_output_not_json_wrapper(self):
-        """When content is JSON with 'output' key, file should contain raw output."""
+    def test_persists_full_content_as_is(self):
+        """Content is persisted verbatim — no JSON extraction."""
         import json
         env = MagicMock()
         env.execute.return_value = {"output": "", "returncode": 0}
@@ -221,10 +200,9 @@ class TestMaybePersistToolResult:
             threshold=30_000,
         )
         assert PERSISTED_OUTPUT_TAG in result
-        # The heredoc written to sandbox should contain raw text, not JSON
+        # The heredoc written to sandbox should contain the full JSON blob
         cmd = env.execute.call_args[0][0]
-        assert "line1\nline2\n" in cmd
-        assert '"exit_code"' not in cmd
+        assert '"exit_code"' in cmd
 
     def test_above_threshold_no_env_truncates_inline(self):
         content = "x" * 60_000
@@ -386,7 +364,7 @@ class TestEnforceTurnBudget:
             {"role": "tool", "tool_call_id": "t1", "content": "small"},
             {"role": "tool", "tool_call_id": "t2", "content": "also small"},
         ]
-        result = enforce_turn_budget(msgs, env=None, budget=200_000)
+        result = enforce_turn_budget(msgs, env=None, config=BudgetConfig(turn_budget=200_000))
         assert result[0]["content"] == "small"
         assert result[1]["content"] == "also small"
 
@@ -398,7 +376,7 @@ class TestEnforceTurnBudget:
             {"role": "tool", "tool_call_id": "t2", "content": "b" * 130_000},
         ]
         # Total 210K > 200K budget
-        enforce_turn_budget(msgs, env=env, budget=200_000)
+        enforce_turn_budget(msgs, env=env, config=BudgetConfig(turn_budget=200_000))
         # The larger one (130K) should be persisted first
         assert PERSISTED_OUTPUT_TAG in msgs[1]["content"]
 
@@ -410,7 +388,7 @@ class TestEnforceTurnBudget:
              "content": f"{PERSISTED_OUTPUT_TAG}\nalready persisted\n{PERSISTED_OUTPUT_CLOSING_TAG}"},
             {"role": "tool", "tool_call_id": "t2", "content": "x" * 250_000},
         ]
-        enforce_turn_budget(msgs, env=env, budget=200_000)
+        enforce_turn_budget(msgs, env=env, config=BudgetConfig(turn_budget=200_000))
         # t1 should be untouched (already persisted)
         assert msgs[0]["content"].startswith(PERSISTED_OUTPUT_TAG)
         # t2 should be persisted
@@ -425,7 +403,7 @@ class TestEnforceTurnBudget:
             {"role": "tool", "tool_call_id": f"t{i}", "content": "x" * 42_000}
             for i in range(6)
         ]
-        enforce_turn_budget(msgs, env=env, budget=200_000)
+        enforce_turn_budget(msgs, env=env, config=BudgetConfig(turn_budget=200_000))
         # At least some results should be persisted to get under 200K
         persisted_count = sum(
             1 for m in msgs if PERSISTED_OUTPUT_TAG in m["content"]
@@ -436,17 +414,17 @@ class TestEnforceTurnBudget:
         msgs = [
             {"role": "tool", "tool_call_id": "t1", "content": "x" * 250_000},
         ]
-        enforce_turn_budget(msgs, env=None, budget=200_000)
+        enforce_turn_budget(msgs, env=None, config=BudgetConfig(turn_budget=200_000))
         # Should be truncated (no sandbox available)
         assert "Truncated" in msgs[0]["content"] or PERSISTED_OUTPUT_TAG in msgs[0]["content"]
 
     def test_returns_same_list(self):
         msgs = [{"role": "tool", "tool_call_id": "t1", "content": "ok"}]
-        result = enforce_turn_budget(msgs, env=None, budget=200_000)
+        result = enforce_turn_budget(msgs, env=None, config=BudgetConfig(turn_budget=200_000))
         assert result is msgs
 
     def test_empty_messages(self):
-        result = enforce_turn_budget([], env=None, budget=200_000)
+        result = enforce_turn_budget([], env=None, config=BudgetConfig(turn_budget=200_000))
         assert result == []
 
 
@@ -463,7 +441,7 @@ class TestPerToolThresholds:
         from tools.registry import registry
         # Unknown tool should return the default
         val = registry.get_max_result_size("nonexistent_tool_xyz")
-        assert val == DEFAULT_MAX_RESULT_SIZE_CHARS
+        assert val == DEFAULT_RESULT_SIZE_CHARS
 
     def test_terminal_threshold(self):
         from tools.registry import registry
diff --git a/tools/binary_extensions.py b/tools/binary_extensions.py
index f7e63bda..bd4bb8d1 100644
--- a/tools/binary_extensions.py
+++ b/tools/binary_extensions.py
@@ -16,8 +16,8 @@ BINARY_EXTENSIONS = frozenset({
     # Executables/binaries
     ".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib",
     ".app", ".msi", ".deb", ".rpm",
-    # Documents (PDF is here; read_file excludes it at the call site)
-    ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
+    # Documents (exclude .pdf — text-based, agents may want to inspect)
+    ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
     ".odt", ".ods", ".odp",
     # Fonts
     ".ttf", ".otf", ".woff", ".woff2", ".eot",
diff --git a/tools/file_operations.py b/tools/file_operations.py
index 052f77a8..f2b37505 100644
--- a/tools/file_operations.py
+++ b/tools/file_operations.py
@@ -33,6 +33,7 @@ from dataclasses import dataclass, field
 from typing import Optional, List, Dict, Any
 from pathlib import Path
 from hermes_constants import get_hermes_home
+from tools.binary_extensions import BINARY_EXTENSIONS
 
 
 # ---------------------------------------------------------------------------
@@ -280,26 +281,6 @@ class FileOperations(ABC):
 # Shell-based Implementation
 # =============================================================================
 
-# Binary file extensions (fast path check)
-BINARY_EXTENSIONS = {
-    # Images
-    '.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.tiff', '.tif',
-    '.svg',  # SVG is text but often treated as binary
-    # Audio/Video
-    '.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv', '.flac', '.ogg', '.webm',
-    # Archives
-    '.zip', '.tar', '.gz', '.bz2', '.xz', '.7z', '.rar',
-    # Documents
-    '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
-    # Compiled/Binary
-    '.exe', '.dll', '.so', '.dylib', '.o', '.a', '.pyc', '.pyo', '.class',
-    '.wasm', '.bin',
-    # Fonts
-    '.ttf', '.otf', '.woff', '.woff2', '.eot',
-    # Other
-    '.db', '.sqlite', '.sqlite3',
-}
-
 # Image extensions (subset of binary that we can return as base64)
 IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico'}
 
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 265c9ed2..4ca10b2d 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -26,6 +26,8 @@ _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
 # Configurable via config.yaml:  file_read_max_chars: 200000
 # ---------------------------------------------------------------------------
 _DEFAULT_MAX_READ_CHARS = 100_000
+_PRE_READ_MAX_BYTES = 256_000  # reject full-file reads on files larger than this
+_DEFAULT_READ_LIMIT = 500
 _max_read_chars_cached: int | None = None
 
 
@@ -277,7 +279,7 @@ def clear_file_ops_cache(task_id: str = None):
             _file_ops_cache.clear()
 
 
-def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
+def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id: str = "default") -> str:
     """Read a file with pagination and line numbers."""
     try:
         # ── Device path guard ─────────────────────────────────────────
@@ -291,9 +293,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                 ),
             })
 
-        # Resolve path once for all guards below
-        import pathlib as _pathlib
-        _resolved = _pathlib.Path(path).expanduser().resolve()
+        _resolved = Path(path).expanduser().resolve()
 
         # ── Binary file guard ─────────────────────────────────────────
         # Block binary files by extension (no I/O).
@@ -328,25 +328,26 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
                 pass
 
         # ── Pre-read file size guard ──────────────────────────────────
-        # Stat the file before reading.  If it's large and the model
-        # didn't request a narrow range, block and tell it to use
-        # offset/limit — cheaper than reading 200K chars then rejecting.
-        _PRE_READ_MAX_BYTES = 100_000
-        _NARROW_LIMIT = 200
-        try:
-            _fsize = os.path.getsize(str(_resolved))
-        except OSError:
-            _fsize = 0
-        if _fsize > _PRE_READ_MAX_BYTES and limit > _NARROW_LIMIT:
-            return json.dumps({
-                "error": (
-                    f"File is too large to read in full ({_fsize:,} bytes). "
-                    f"Use offset and limit parameters to read specific sections "
-                    f"(e.g. offset=1, limit=100 for the first 100 lines)."
-                ),
-                "path": path,
-                "file_size": _fsize,
-            }, ensure_ascii=False)
+        # Guard only when the caller omits limit; an explicit limit means
+        # the caller knows what slice it wants.
+        if limit is None:
+            try:
+                _fsize = os.path.getsize(str(_resolved))
+            except OSError:
+                _fsize = 0
+            if _fsize > _PRE_READ_MAX_BYTES:
+                return json.dumps({
+                    "error": (
+                        f"File is too large to read in full ({_fsize:,} bytes). "
+                        f"Use offset and limit parameters to read specific sections "
+                        f"(e.g. offset=1, limit=100 for the first 100 lines)."
+                    ),
+                    "path": path,
+                    "file_size": _fsize,
+                }, ensure_ascii=False)
+
+        if limit is None:
+            limit = _DEFAULT_READ_LIMIT
 
         # ── Dedup check ───────────────────────────────────────────────
         # If we already read this exact (path, offset, limit) and the
@@ -761,7 +762,7 @@ def _check_file_reqs():
 
 READ_FILE_SCHEMA = {
     "name": "read_file",
-    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 100KB will be rejected unless you specify a narrow range (limit <= 200). NOTE: Cannot read images or binary files — use vision_analyze for images.",
+    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 256KB will be rejected unless you provide a limit parameter. NOTE: Cannot read images or binary files — use vision_analyze for images.",
     "parameters": {
         "type": "object",
         "properties": {
@@ -825,7 +826,7 @@ SEARCH_FILES_SCHEMA = {
 
 def _handle_read_file(args, **kw):
     tid = kw.get("task_id") or "default"
-    return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit", 500), task_id=tid)
+    return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit"), task_id=tid)
 
 
 def _handle_write_file(args, **kw):
diff --git a/tools/registry.py b/tools/registry.py
index 9437a6b4..d3590a42 100644
--- a/tools/registry.py
+++ b/tools/registry.py
@@ -176,8 +176,8 @@ class ToolRegistry:
             return entry.max_result_size_chars
         if default is not None:
             return default
-        from tools.tool_result_storage import DEFAULT_MAX_RESULT_SIZE_CHARS
-        return DEFAULT_MAX_RESULT_SIZE_CHARS
+        from tools.budget_config import DEFAULT_RESULT_SIZE_CHARS
+        return DEFAULT_RESULT_SIZE_CHARS
 
     def get_all_tool_names(self) -> List[str]:
         """Return sorted list of all registered tool names."""
diff --git a/tools/tool_result_storage.py b/tools/tool_result_storage.py
index 8b2abb91..076d37ae 100644
--- a/tools/tool_result_storage.py
+++ b/tools/tool_result_storage.py
@@ -20,14 +20,13 @@ Defense against context-window overflow operates at three levels:
    where many medium-sized results combine to overflow context.
 """
 
-import json
 import logging
 import uuid
 
 from tools.budget_config import (
-    DEFAULT_RESULT_SIZE_CHARS as DEFAULT_MAX_RESULT_SIZE_CHARS,
-    DEFAULT_TURN_BUDGET_CHARS as MAX_TURN_BUDGET_CHARS,
-    DEFAULT_PREVIEW_SIZE_CHARS as PREVIEW_SIZE_CHARS,
+    DEFAULT_PREVIEW_SIZE_CHARS,
+    BudgetConfig,
+    DEFAULT_BUDGET,
 )
 
 logger = logging.getLogger(__name__)
@@ -38,7 +37,7 @@ HEREDOC_MARKER = "HERMES_PERSIST_EOF"
 _BUDGET_TOOL_NAME = "__budget_enforcement__"
 
 
-def generate_preview(content: str, max_chars: int = PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
+def generate_preview(content: str, max_chars: int = DEFAULT_PREVIEW_SIZE_CHARS) -> tuple[str, bool]:
     """Truncate at last newline within max_chars. Returns (preview, has_more)."""
     if len(content) <= max_chars:
         return content, False
@@ -56,21 +55,6 @@ def _heredoc_marker(content: str) -> str:
     return f"HERMES_PERSIST_{uuid.uuid4().hex[:8]}"
 
 
-def _extract_raw_output(content: str) -> str:
-    """Extract the 'output' field from JSON tool results for cleaner persistence.
-
-    Tool handlers return json.dumps({"output": ..., "exit_code": ...}) for the
-    API, but persisted files should contain readable text, not a JSON blob.
-    """
-    try:
-        data = json.loads(content)
-        if isinstance(data, dict) and "output" in data:
-            return data["output"]
-    except (json.JSONDecodeError, TypeError):
-        pass
-    return content
-
-
 def _write_to_sandbox(content: str, remote_path: str, env) -> bool:
     """Write content into the sandbox via env.execute(). Returns True on success."""
     marker = _heredoc_marker(content)
@@ -113,8 +97,8 @@ def maybe_persist_tool_result(
     tool_name: str,
     tool_use_id: str,
     env=None,
+    config: BudgetConfig = DEFAULT_BUDGET,
     threshold: int | float | None = None,
-    preview_size: int = PREVIEW_SIZE_CHARS,
 ) -> str:
     """Layer 2: persist oversized result into the sandbox, return preview + path.
 
@@ -127,32 +111,26 @@ def maybe_persist_tool_result(
         tool_name: Name of the tool (used for threshold lookup).
         tool_use_id: Unique ID for this tool call (used as filename).
         env: The active BaseEnvironment instance, or None.
-        threshold: Override threshold; if None, looked up from registry.
-        preview_size: Max chars for the inline preview after persistence.
+        config: BudgetConfig controlling thresholds and preview size.
+        threshold: Explicit override; takes precedence over config resolution.
 
     Returns:
         Original content if small, or <persisted-output> replacement.
     """
-    if threshold is None:
-        from tools.registry import registry
-        threshold = registry.get_max_result_size(tool_name)
+    effective_threshold = threshold if threshold is not None else config.resolve_threshold(tool_name)
 
-    # Infinity means never persist (e.g. read_file)
-    if threshold == float("inf"):
+    if effective_threshold == float("inf"):
         return content
 
-    if len(content) <= threshold:
+    if len(content) <= effective_threshold:
         return content
 
     remote_path = f"{STORAGE_DIR}/{tool_use_id}.txt"
-    # Write raw output (not JSON wrapper) so read_file returns readable text
-    file_content = _extract_raw_output(content)
-    preview, has_more = generate_preview(file_content, max_chars=preview_size)
+    preview, has_more = generate_preview(content, max_chars=config.preview_size)
 
-    # Try writing into the sandbox
     if env is not None:
         try:
-            if _write_to_sandbox(file_content, remote_path, env):
+            if _write_to_sandbox(content, remote_path, env):
                 logger.info(
                     "Persisted large tool result: %s (%s, %d chars -> %s)",
                     tool_name, tool_use_id, len(content), remote_path,
@@ -161,7 +139,6 @@ def maybe_persist_tool_result(
         except Exception as exc:
             logger.warning("Sandbox write failed for %s: %s", tool_use_id, exc)
 
-    # Fallback: inline truncation (no sandbox available or write failed)
     logger.info(
         "Inline-truncating large tool result: %s (%d chars, no sandbox write)",
         tool_name, len(content),
@@ -176,8 +153,7 @@ def maybe_persist_tool_result(
 def enforce_turn_budget(
     tool_messages: list[dict],
     env=None,
-    budget: int = MAX_TURN_BUDGET_CHARS,
-    preview_size: int = PREVIEW_SIZE_CHARS,
+    config: BudgetConfig = DEFAULT_BUDGET,
 ) -> list[dict]:
     """Layer 3: enforce aggregate budget across all tool results in a turn.
 
@@ -196,14 +172,13 @@ def enforce_turn_budget(
         if PERSISTED_OUTPUT_TAG not in content:
             candidates.append((i, size))
 
-    if total_size <= budget:
+    if total_size <= config.turn_budget:
         return tool_messages
 
-    # Sort candidates by size descending — persist largest first
     candidates.sort(key=lambda x: x[1], reverse=True)
 
     for idx, size in candidates:
-        if total_size <= budget:
+        if total_size <= config.turn_budget:
             break
         msg = tool_messages[idx]
         content = msg["content"]
@@ -214,8 +189,8 @@ def enforce_turn_budget(
             tool_name=_BUDGET_TOOL_NAME,
             tool_use_id=tool_use_id,
             env=env,
+            config=config,
             threshold=0,
-            preview_size=preview_size,
         )
         if replacement != content:
             total_size -= size

From 3696c74bfbd8ba1761fb6a5f192003a50e8b5623 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 01:45:51 -0700
Subject: [PATCH 127/154] fix: preserve existing thresholds, remove pre-read
 byte guard

- DEFAULT_RESULT_SIZE_CHARS: 50K -> 100K (match current _LARGE_RESULT_CHARS)
- DEFAULT_PREVIEW_SIZE_CHARS: 2K -> 1.5K (match current _LARGE_RESULT_PREVIEW_CHARS)
- Per-tool overrides all set to 100K (terminal, execute_code, search_files)
- Remove pre-read byte guard (no behavioral regression vs current main)
- Revert limit signature change to int=500 (match current default)
- Restore original read_file schema description
- Update test assertions to match 100K thresholds
---
 tests/tools/test_tool_result_storage.py |  6 ++---
 tools/budget_config.py                  |  4 ++--
 tools/code_execution_tool.py            |  2 +-
 tools/file_tools.py                     | 32 ++++---------------------
 tools/terminal_tool.py                  |  2 +-
 5 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py
index 96b904a5..4e51fe7b 100644
--- a/tests/tools/test_tool_result_storage.py
+++ b/tests/tools/test_tool_result_storage.py
@@ -395,7 +395,7 @@ class TestEnforceTurnBudget:
         assert PERSISTED_OUTPUT_TAG in msgs[1]["content"]
 
     def test_medium_result_regression(self):
-        """6 results of 42K chars each (252K total) — each under 50K default
+        """6 results of 42K chars each (252K total) — each under 100K default
         threshold but aggregate exceeds 200K budget. L3 should persist."""
         env = MagicMock()
         env.execute.return_value = {"output": "", "returncode": 0}
@@ -449,7 +449,7 @@ class TestPerToolThresholds:
         try:
             import tools.terminal_tool  # noqa: F401
             val = registry.get_max_result_size("terminal")
-            assert val == 30_000
+            assert val == 100_000
         except ImportError:
             pytest.skip("terminal_tool not importable in test env")
 
@@ -467,6 +467,6 @@ class TestPerToolThresholds:
         try:
             import tools.file_tools  # noqa: F401
             val = registry.get_max_result_size("search_files")
-            assert val == 20_000
+            assert val == 100_000
         except ImportError:
             pytest.skip("file_tools not importable in test env")
diff --git a/tools/budget_config.py b/tools/budget_config.py
index 52204cdf..577e5944 100644
--- a/tools/budget_config.py
+++ b/tools/budget_config.py
@@ -15,9 +15,9 @@ PINNED_THRESHOLDS: Dict[str, float] = {
 
 # Defaults matching the current hardcoded values in tool_result_storage.py.
 # Kept here as the single source of truth; tool_result_storage.py imports these.
-DEFAULT_RESULT_SIZE_CHARS: int = 50_000
+DEFAULT_RESULT_SIZE_CHARS: int = 100_000
 DEFAULT_TURN_BUDGET_CHARS: int = 200_000
-DEFAULT_PREVIEW_SIZE_CHARS: int = 2_000
+DEFAULT_PREVIEW_SIZE_CHARS: int = 1_500
 
 
 @dataclass(frozen=True)
diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py
index f48c4b99..aa4cd086 100644
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -1343,5 +1343,5 @@ registry.register(
         enabled_tools=kw.get("enabled_tools")),
     check_fn=check_sandbox_requirements,
     emoji="🐍",
-    max_result_size_chars=30_000,
+    max_result_size_chars=100_000,
 )
diff --git a/tools/file_tools.py b/tools/file_tools.py
index 4ca10b2d..186a9d05 100644
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -26,8 +26,6 @@ _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}
 # Configurable via config.yaml:  file_read_max_chars: 200000
 # ---------------------------------------------------------------------------
 _DEFAULT_MAX_READ_CHARS = 100_000
-_PRE_READ_MAX_BYTES = 256_000  # reject full-file reads on files larger than this
-_DEFAULT_READ_LIMIT = 500
 _max_read_chars_cached: int | None = None
 
 
@@ -279,7 +277,7 @@ def clear_file_ops_cache(task_id: str = None):
             _file_ops_cache.clear()
 
 
-def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id: str = "default") -> str:
+def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str:
     """Read a file with pagination and line numbers."""
     try:
         # ── Device path guard ─────────────────────────────────────────
@@ -327,28 +325,6 @@ def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id
             except ValueError:
                 pass
 
-        # ── Pre-read file size guard ──────────────────────────────────
-        # Guard only when the caller omits limit; an explicit limit means
-        # the caller knows what slice it wants.
-        if limit is None:
-            try:
-                _fsize = os.path.getsize(str(_resolved))
-            except OSError:
-                _fsize = 0
-            if _fsize > _PRE_READ_MAX_BYTES:
-                return json.dumps({
-                    "error": (
-                        f"File is too large to read in full ({_fsize:,} bytes). "
-                        f"Use offset and limit parameters to read specific sections "
-                        f"(e.g. offset=1, limit=100 for the first 100 lines)."
-                    ),
-                    "path": path,
-                    "file_size": _fsize,
-                }, ensure_ascii=False)
-
-        if limit is None:
-            limit = _DEFAULT_READ_LIMIT
-
         # ── Dedup check ───────────────────────────────────────────────
         # If we already read this exact (path, offset, limit) and the
         # file hasn't been modified since, return a lightweight stub
@@ -762,7 +738,7 @@ def _check_file_reqs():
 
 READ_FILE_SCHEMA = {
     "name": "read_file",
-    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 256KB will be rejected unless you provide a limit parameter. NOTE: Cannot read images or binary files — use vision_analyze for images.",
+    "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.",
     "parameters": {
         "type": "object",
         "properties": {
@@ -826,7 +802,7 @@ SEARCH_FILES_SCHEMA = {
 
 def _handle_read_file(args, **kw):
     tid = kw.get("task_id") or "default"
-    return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit"), task_id=tid)
+    return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit", 500), task_id=tid)
 
 
 def _handle_write_file(args, **kw):
@@ -856,4 +832,4 @@ def _handle_search_files(args, **kw):
 registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=float('inf'))
 registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️", max_result_size_chars=100_000)
 registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧", max_result_size_chars=100_000)
-registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=20_000)
+registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=100_000)
diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py
index ff9e064b..6206c4aa 100644
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -1623,5 +1623,5 @@ registry.register(
     handler=_handle_terminal,
     check_fn=check_terminal_requirements,
     emoji="💻",
-    max_result_size_chars=30_000,
+    max_result_size_chars=100_000,
 )

From a18e5b95ad1f93102a5e29a72524a81e4a12b189 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 02:28:40 -0700
Subject: [PATCH 128/154] docs: add Hermes Mod visual skin editor section to
 skins page (#6095)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add documentation for cocktailpeanut's hermes-mod community tool —
a web UI for creating and managing Hermes skins visually. Covers
installation (Pinokio, npx, manual), usage walkthrough, and feature
overview including ASCII art generation from images.

Ref: https://github.com/cocktailpeanut/hermes-mod
---
 website/docs/user-guide/features/skins.md | 49 +++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/website/docs/user-guide/features/skins.md b/website/docs/user-guide/features/skins.md
index 5aec20cd..e093a763 100644
--- a/website/docs/user-guide/features/skins.md
+++ b/website/docs/user-guide/features/skins.md
@@ -196,6 +196,55 @@ branding:
 tool_prefix: "▏"
 ```
 
+## Hermes Mod — Visual Skin Editor
+
+[Hermes Mod](https://github.com/cocktailpeanut/hermes-mod) is a community-built web UI for creating and managing skins visually. Instead of writing YAML by hand, you get a point-and-click editor with live preview.
+
+![Hermes Mod skin editor](https://raw.githubusercontent.com/cocktailpeanut/hermes-mod/master/nous.png)
+
+**What it does:**
+
+- Lists all built-in and custom skins
+- Opens any skin into a visual editor with all Hermes skin fields (colors, spinner, branding, tool prefix, tool emojis)
+- Generates `banner_logo` text art from a text prompt
+- Converts uploaded images (PNG, JPG, GIF, WEBP) into `banner_hero` ASCII art with multiple render styles (braille, ASCII ramp, blocks, dots)
+- Saves directly to `~/.hermes/skins/`
+- Activates a skin by updating `~/.hermes/config.yaml`
+- Shows the generated YAML and a live preview
+
+### Install
+
+**Option 1 — Pinokio (1-click):**
+
+Find it on [pinokio.computer](https://pinokio.computer) and install with one click.
+
+**Option 2 — npx (quickest from terminal):**
+
+```bash
+npx -y hermes-mod
+```
+
+**Option 3 — Manual:**
+
+```bash
+git clone https://github.com/cocktailpeanut/hermes-mod.git
+cd hermes-mod/app
+npm install
+npm start
+```
+
+### Usage
+
+1. Start the app (via Pinokio or terminal).
+2. Open **Skin Studio**.
+3. Choose a built-in or custom skin to edit.
+4. Generate a logo from text and/or upload an image for hero art. Pick a render style and width.
+5. Edit colors, spinner, branding, and other fields.
+6. Click **Save** to write the skin YAML to `~/.hermes/skins/`.
+7. Click **Activate** to set it as the current skin (updates `display.skin` in `config.yaml`).
+
+Hermes Mod respects the `HERMES_HOME` environment variable, so it works with [profiles](/docs/user-guide/profiles) too.
+
 ## Operational notes
 
 - Built-in skins load from `hermes_cli/skin_engine.py`.

From 085c1c6875c4459b93ac23db1bc80f412640b68c Mon Sep 17 00:00:00 2001
From: Vasanthdev2004 <vasanth.dev2004@gmail.com>
Date: Wed, 8 Apr 2026 13:53:51 +0530
Subject: [PATCH 129/154] fix(browser): preserve agent-browser paths with
 spaces

---
 tests/tools/test_browser_homebrew_paths.py | 103 +++++++++++++++++++++
 tools/browser_tool.py                      |   6 +-
 2 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/tests/tools/test_browser_homebrew_paths.py b/tests/tools/test_browser_homebrew_paths.py
index 3e2e7666..33b72560 100644
--- a/tests/tools/test_browser_homebrew_paths.py
+++ b/tests/tools/test_browser_homebrew_paths.py
@@ -152,6 +152,109 @@ class TestFindAgentBrowser:
 class TestRunBrowserCommandPathConstruction:
     """Verify _run_browser_command() includes Homebrew node dirs in subprocess PATH."""
 
+    def test_subprocess_preserves_executable_path_with_spaces(self, tmp_path):
+        """A local agent-browser path containing spaces must stay one argv entry."""
+        captured_cmd = None
+
+        mock_proc = MagicMock()
+        mock_proc.returncode = 0
+        mock_proc.wait.return_value = 0
+
+        def capture_popen(cmd, **kwargs):
+            nonlocal captured_cmd
+            captured_cmd = cmd
+            return mock_proc
+
+        fake_session = {
+            "session_name": "test-session",
+            "session_id": "test-id",
+            "cdp_url": None,
+        }
+        fake_json = json.dumps({"success": True})
+        browser_path = "/Users/test/Library/Application Support/hermes/node_modules/.bin/agent-browser"
+        hermes_home = str(tmp_path / "hermes-home")
+
+        with patch("tools.browser_tool._find_agent_browser", return_value=browser_path), \
+             patch("tools.browser_tool._get_session_info", return_value=fake_session), \
+             patch("tools.browser_tool._socket_safe_tmpdir", return_value=str(tmp_path)), \
+             patch("tools.browser_tool._discover_homebrew_node_dirs", return_value=[]), \
+             patch("hermes_constants.Path.home", return_value=tmp_path), \
+             patch("subprocess.Popen", side_effect=capture_popen), \
+             patch("os.open", return_value=99), \
+             patch("os.close"), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch.dict(
+                 os.environ,
+                 {
+                     "PATH": "/usr/bin:/bin",
+                     "HOME": "/home/test",
+                     "HERMES_HOME": hermes_home,
+                 },
+                 clear=True,
+             ):
+            with patch("builtins.open", mock_open(read_data=fake_json)):
+                _run_browser_command("test-task", "navigate", ["https://example.com"])
+
+        assert captured_cmd is not None
+        assert captured_cmd[0] == browser_path
+        assert captured_cmd[1:5] == [
+            "--session",
+            "test-session",
+            "--json",
+            "navigate",
+        ]
+
+    def test_subprocess_splits_npx_fallback_into_command_and_package(self, tmp_path):
+        """The synthetic npx fallback should still expand into separate argv items."""
+        captured_cmd = None
+
+        mock_proc = MagicMock()
+        mock_proc.returncode = 0
+        mock_proc.wait.return_value = 0
+
+        def capture_popen(cmd, **kwargs):
+            nonlocal captured_cmd
+            captured_cmd = cmd
+            return mock_proc
+
+        fake_session = {
+            "session_name": "test-session",
+            "session_id": "test-id",
+            "cdp_url": None,
+        }
+        fake_json = json.dumps({"success": True})
+        hermes_home = str(tmp_path / "hermes-home")
+
+        with patch("tools.browser_tool._find_agent_browser", return_value="npx agent-browser"), \
+             patch("tools.browser_tool._get_session_info", return_value=fake_session), \
+             patch("tools.browser_tool._socket_safe_tmpdir", return_value=str(tmp_path)), \
+             patch("tools.browser_tool._discover_homebrew_node_dirs", return_value=[]), \
+             patch("hermes_constants.Path.home", return_value=tmp_path), \
+             patch("subprocess.Popen", side_effect=capture_popen), \
+             patch("os.open", return_value=99), \
+             patch("os.close"), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch.dict(
+                 os.environ,
+                 {
+                     "PATH": "/usr/bin:/bin",
+                     "HOME": "/home/test",
+                     "HERMES_HOME": hermes_home,
+                 },
+                 clear=True,
+             ):
+            with patch("builtins.open", mock_open(read_data=fake_json)):
+                _run_browser_command("test-task", "navigate", ["https://example.com"])
+
+        assert captured_cmd is not None
+        assert captured_cmd[:2] == ["npx", "agent-browser"]
+        assert captured_cmd[2:6] == [
+            "--session",
+            "test-session",
+            "--json",
+            "navigate",
+        ]
+
     def test_subprocess_path_includes_homebrew_node_dirs(self, tmp_path):
         """When _discover_homebrew_node_dirs returns dirs, they should appear
         in the subprocess env PATH passed to Popen."""
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 7e52ed78..012b8eb0 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -877,7 +877,11 @@ def _run_browser_command(
         # Local mode — launch a headless Chromium instance
         backend_args = ["--session", session_info["session_name"]]
 
-    cmd_parts = browser_cmd.split() + backend_args + [
+    # Keep concrete executable paths intact, even when they contain spaces.
+    # Only the synthetic npx fallback needs to expand into multiple argv items.
+    cmd_prefix = ["npx", "agent-browser"] if browser_cmd == "npx agent-browser" else [browser_cmd]
+
+    cmd_parts = cmd_prefix + backend_args + [
         "--json",
         command
     ] + args

From 55ac05692055295b6044ba0f9e468246d7f32b1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=8F=B2=E5=AE=98?= <historian@slock.team>
Date: Wed, 8 Apr 2026 17:21:59 +0800
Subject: [PATCH 130/154] fix(hindsight): add missing get_hermes_home import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Import hermes_constants.get_hermes_home at module level so it is
available in _start_daemon() when local mode starts the embedded
daemon. Previously the import was only inside _load_config(), causing
NameError when _start_daemon() referenced get_hermes_home().

Fixes #5993

Co-Authored-By: 史官 <historian@slock.team>
---
 plugins/memory/hindsight/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 51feb3cb..199a7dd5 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -23,6 +23,8 @@ import json
 import logging
 import os
 import threading
+
+from hermes_constants import get_hermes_home
 from typing import Any, Dict, List
 
 from agent.memory_provider import MemoryProvider
@@ -142,7 +144,6 @@ def _load_config() -> dict:
       3. Environment variables
     """
     from pathlib import Path
-    from hermes_constants import get_hermes_home
 
     # Profile-scoped path (preferred)
     profile_path = get_hermes_home() / "hindsight" / "config.json"

From 383db3592580a276dd55d3db1f8a879a5b686848 Mon Sep 17 00:00:00 2001
From: landy <gaixg94@gmail.com>
Date: Wed, 8 Apr 2026 16:40:07 +0800
Subject: [PATCH 131/154] fix: improve streaming fallback after edit failures

---
 gateway/stream_consumer.py            | 115 ++++++++++++++++++++++++--
 tests/gateway/test_stream_consumer.py |  88 ++++++++++++++++++++
 2 files changed, 196 insertions(+), 7 deletions(-)

diff --git a/gateway/stream_consumer.py b/gateway/stream_consumer.py
index 2cda3364..5522c631 100644
--- a/gateway/stream_consumer.py
+++ b/gateway/stream_consumer.py
@@ -74,6 +74,8 @@ class GatewayStreamConsumer:
         self._edit_supported = True  # Disabled on first edit failure (Signal/Email/HA)
         self._last_edit_time = 0.0
         self._last_sent_text = ""   # Track last-sent text to skip redundant edits
+        self._fallback_final_send = False
+        self._fallback_prefix = ""
 
     @property
     def already_sent(self) -> bool:
@@ -138,12 +140,19 @@ class GatewayStreamConsumer:
                     while (
                         len(self._accumulated) > _safe_limit
                         and self._message_id is not None
+                        and self._edit_supported
                     ):
                         split_at = self._accumulated.rfind("\n", 0, _safe_limit)
                         if split_at < _safe_limit // 2:
                             split_at = _safe_limit
                         chunk = self._accumulated[:split_at]
                         await self._send_or_edit(chunk)
+                        if self._fallback_final_send:
+                            # Edit failed while attempting to split an oversized
+                            # message. Keep the full accumulated text intact so
+                            # the fallback final-send path can deliver the
+                            # remaining continuation without dropping content.
+                            break
                         self._accumulated = self._accumulated[split_at:].lstrip("\n")
                         self._message_id = None
                         self._last_sent_text = ""
@@ -156,9 +165,17 @@ class GatewayStreamConsumer:
                     self._last_edit_time = time.monotonic()
 
                 if got_done:
-                    # Final edit without cursor
-                    if self._accumulated and self._message_id:
-                        await self._send_or_edit(self._accumulated)
+                    # Final edit without cursor. If progressive editing failed
+                    # mid-stream, send a single continuation/fallback message
+                    # here instead of letting the base gateway path send the
+                    # full response again.
+                    if self._accumulated:
+                        if self._fallback_final_send:
+                            await self._send_fallback_final(self._accumulated)
+                        elif self._message_id:
+                            await self._send_or_edit(self._accumulated)
+                        elif not self._already_sent:
+                            await self._send_or_edit(self._accumulated)
                     return
 
                 # Tool boundary: the should_edit block above already flushed
@@ -169,6 +186,8 @@ class GatewayStreamConsumer:
                     self._message_id = None
                     self._accumulated = ""
                     self._last_sent_text = ""
+                    self._fallback_final_send = False
+                    self._fallback_prefix = ""
 
                 await asyncio.sleep(0.05)  # Small yield to not busy-loop
 
@@ -207,6 +226,86 @@ class GatewayStreamConsumer:
         # Strip trailing whitespace/newlines but preserve leading content
         return cleaned.rstrip()
 
+    def _visible_prefix(self) -> str:
+        """Return the visible text already shown in the streamed message."""
+        prefix = self._last_sent_text or ""
+        if self.cfg.cursor and prefix.endswith(self.cfg.cursor):
+            prefix = prefix[:-len(self.cfg.cursor)]
+        return self._clean_for_display(prefix)
+
+    def _continuation_text(self, final_text: str) -> str:
+        """Return only the part of final_text the user has not already seen."""
+        prefix = self._fallback_prefix or self._visible_prefix()
+        if prefix and final_text.startswith(prefix):
+            return final_text[len(prefix):].lstrip()
+        return final_text
+
+    @staticmethod
+    def _split_text_chunks(text: str, limit: int) -> list[str]:
+        """Split text into reasonably sized chunks for fallback sends."""
+        if len(text) <= limit:
+            return [text]
+        chunks: list[str] = []
+        remaining = text
+        while len(remaining) > limit:
+            split_at = remaining.rfind("\n", 0, limit)
+            if split_at < limit // 2:
+                split_at = limit
+            chunks.append(remaining[:split_at])
+            remaining = remaining[split_at:].lstrip("\n")
+        if remaining:
+            chunks.append(remaining)
+        return chunks
+
+    async def _send_fallback_final(self, text: str) -> None:
+        """Send the final continuation after streaming edits stop working."""
+        final_text = self._clean_for_display(text)
+        continuation = self._continuation_text(final_text)
+        self._fallback_final_send = False
+        if not continuation.strip():
+            # Nothing new to send — the visible partial already matches final text.
+            self._already_sent = True
+            return
+
+        raw_limit = getattr(self.adapter, "MAX_MESSAGE_LENGTH", 4096)
+        safe_limit = max(500, raw_limit - 100)
+        chunks = self._split_text_chunks(continuation, safe_limit)
+
+        last_message_id: Optional[str] = None
+        last_successful_chunk = ""
+        sent_any_chunk = False
+        for chunk in chunks:
+            result = await self.adapter.send(
+                chat_id=self.chat_id,
+                content=chunk,
+                metadata=self.metadata,
+            )
+            if not result.success:
+                if sent_any_chunk:
+                    # Some continuation text already reached the user. Suppress
+                    # the base gateway final-send path so we don't resend the
+                    # full response and create another duplicate.
+                    self._already_sent = True
+                    self._message_id = last_message_id
+                    self._last_sent_text = last_successful_chunk
+                    self._fallback_prefix = ""
+                    return
+                # No fallback chunk reached the user — allow the normal gateway
+                # final-send path to try one more time.
+                self._already_sent = False
+                self._message_id = None
+                self._last_sent_text = ""
+                self._fallback_prefix = ""
+                return
+            sent_any_chunk = True
+            last_successful_chunk = chunk
+            last_message_id = result.message_id or last_message_id
+
+        self._message_id = last_message_id
+        self._already_sent = True
+        self._last_sent_text = chunks[-1]
+        self._fallback_prefix = ""
+
     async def _send_or_edit(self, text: str) -> None:
         """Send or edit the streaming message."""
         # Strip MEDIA: directives so they don't appear as visible text.
@@ -232,14 +331,16 @@ class GatewayStreamConsumer:
                         self._last_sent_text = text
                     else:
                         # If an edit fails mid-stream (especially Telegram flood control),
-                        # stop progressive edits and let the normal final send path deliver
-                        # the complete answer instead of leaving the user with a partial.
+                        # stop progressive edits and send only the missing tail once the
+                        # final response is available.
                         logger.debug("Edit failed, disabling streaming for this adapter")
+                        self._fallback_prefix = self._visible_prefix()
+                        self._fallback_final_send = True
                         self._edit_supported = False
-                        self._already_sent = False
+                        self._already_sent = True
                 else:
                     # Editing not supported — skip intermediate updates.
-                    # The final response will be sent by the normal path.
+                    # The final response will be sent by the fallback path.
                     pass
             else:
                 # First message — send new
diff --git a/tests/gateway/test_stream_consumer.py b/tests/gateway/test_stream_consumer.py
index 6c908bbe..ddc88fc2 100644
--- a/tests/gateway/test_stream_consumer.py
+++ b/tests/gateway/test_stream_consumer.py
@@ -324,3 +324,91 @@ class TestSegmentBreakOnToolBoundary:
         await consumer.run()
 
         assert consumer.already_sent
+
+    @pytest.mark.asyncio
+    async def test_edit_failure_sends_only_unsent_tail_at_finish(self):
+        """If an edit fails mid-stream, send only the missing tail once at finish."""
+        adapter = MagicMock()
+        send_results = [
+            SimpleNamespace(success=True, message_id="msg_1"),
+            SimpleNamespace(success=True, message_id="msg_2"),
+        ]
+        adapter.send = AsyncMock(side_effect=send_results)
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6"))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Hello")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" world")
+        await asyncio.sleep(0.08)
+        consumer.finish()
+        await task
+
+        assert adapter.send.call_count == 2
+        first_text = adapter.send.call_args_list[0][1]["content"]
+        second_text = adapter.send.call_args_list[1][1]["content"]
+        assert "Hello" in first_text
+        assert second_text.strip() == "world"
+        assert consumer.already_sent
+
+    @pytest.mark.asyncio
+    async def test_segment_break_clears_failed_edit_fallback_state(self):
+        """A tool boundary after edit failure must not duplicate the next segment."""
+        adapter = MagicMock()
+        send_results = [
+            SimpleNamespace(success=True, message_id="msg_1"),
+            SimpleNamespace(success=True, message_id="msg_2"),
+        ]
+        adapter.send = AsyncMock(side_effect=send_results)
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6"))
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        consumer.on_delta("Hello")
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.08)
+        consumer.on_delta(" world")
+        await asyncio.sleep(0.08)
+        consumer.on_delta(None)
+        consumer.on_delta("Next segment")
+        consumer.finish()
+        await task
+
+        sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
+        assert sent_texts == ["Hello ▉", "Next segment"]
+
+    @pytest.mark.asyncio
+    async def test_fallback_final_splits_long_continuation_without_dropping_text(self):
+        """Long continuation tails should be chunked when fallback final-send runs."""
+        adapter = MagicMock()
+        adapter.send = AsyncMock(side_effect=[
+            SimpleNamespace(success=True, message_id="msg_1"),
+            SimpleNamespace(success=True, message_id="msg_2"),
+            SimpleNamespace(success=True, message_id="msg_3"),
+        ])
+        adapter.edit_message = AsyncMock(return_value=SimpleNamespace(success=False, error="flood_control:6"))
+        adapter.MAX_MESSAGE_LENGTH = 610
+
+        config = StreamConsumerConfig(edit_interval=0.01, buffer_threshold=5, cursor=" ▉")
+        consumer = GatewayStreamConsumer(adapter, "chat_123", config)
+
+        prefix = "abc"
+        tail = "x" * 620
+        consumer.on_delta(prefix)
+        task = asyncio.create_task(consumer.run())
+        await asyncio.sleep(0.08)
+        consumer.on_delta(tail)
+        await asyncio.sleep(0.08)
+        consumer.finish()
+        await task
+
+        sent_texts = [call[1]["content"] for call in adapter.send.call_args_list]
+        assert len(sent_texts) == 3
+        assert sent_texts[0].startswith(prefix)
+        assert sum(len(t) for t in sent_texts[1:]) == len(tail)

From 19b0ddce408b33e3dcf6ce8e5628f028119ca65b Mon Sep 17 00:00:00 2001
From: mrshu <mrshu@cisco.com>
Date: Wed, 8 Apr 2026 08:59:52 +0200
Subject: [PATCH 132/154] fix(process): correct detached crash recovery state

Previously crash recovery recreated detached sessions as if they were
fully managed, so polls and kills could lie about liveness and the
checkpoint could forget recovered jobs after the next restart.
This commit refreshes recovered host-backed sessions from real PID
state, keeps checkpoint data durable, and preserves notify watcher
metadata while treating sandbox-only PIDs as non-recoverable.

- Persist `pid_scope` in `tools/process_registry.py` and skip
  recovering sandbox-backed entries without a host-visible PID handle
- Refresh detached sessions on access so `get`/`poll`/`wait` and active
  session queries observe exited processes instead of hanging forever
- Allow recovered host PIDs to be terminated honestly and requeue
  `notify_on_complete` watchers during checkpoint recovery
- Add regression tests for durable checkpoints, detached exit/kill
  behavior, sandbox skip logic, and recovered notify watchers
---
 tests/tools/test_notify_on_complete.py |  20 ++++
 tests/tools/test_process_registry.py   | 123 +++++++++++++++++++++++++
 tools/process_registry.py              | 111 +++++++++++++++++++---
 3 files changed, 241 insertions(+), 13 deletions(-)

diff --git a/tests/tools/test_notify_on_complete.py b/tests/tools/test_notify_on_complete.py
index 88872190..8cf17bfb 100644
--- a/tests/tools/test_notify_on_complete.py
+++ b/tests/tools/test_notify_on_complete.py
@@ -197,6 +197,26 @@ class TestCheckpointNotify:
             s = registry.get("proc_live")
             assert s.notify_on_complete is True
 
+    def test_recover_requeues_notify_watchers(self, registry, tmp_path):
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+            "session_key": "sk1",
+            "watcher_platform": "telegram",
+            "watcher_chat_id": "123",
+            "watcher_thread_id": "42",
+            "watcher_interval": 5,
+            "notify_on_complete": True,
+        }]))
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 1
+            assert len(registry.pending_watchers) == 1
+            assert registry.pending_watchers[0]["notify_on_complete"] is True
+
     def test_recover_defaults_false(self, registry, tmp_path):
         """Old checkpoint entries without the field default to False."""
         checkpoint = tmp_path / "procs.json"
diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py
index e6cfa40e..44e3a1bd 100644
--- a/tests/tools/test_process_registry.py
+++ b/tests/tools/test_process_registry.py
@@ -2,6 +2,9 @@
 
 import json
 import os
+import signal
+import subprocess
+import sys
 import time
 import pytest
 from pathlib import Path
@@ -45,6 +48,23 @@ def _make_session(
     return s
 
 
+def _spawn_python_sleep(seconds: float) -> subprocess.Popen:
+    """Spawn a portable short-lived Python sleep process."""
+    return subprocess.Popen(
+        [sys.executable, "-c", f"import time; time.sleep({seconds})"],
+    )
+
+
+def _wait_until(predicate, timeout: float = 5.0, interval: float = 0.05) -> bool:
+    """Poll a predicate until it returns truthy or the timeout elapses."""
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if predicate():
+            return True
+        time.sleep(interval)
+    return False
+
+
 # =========================================================================
 # Get / Poll
 # =========================================================================
@@ -349,6 +369,88 @@ class TestCheckpoint:
             assert recovered == 1
             assert len(registry.pending_watchers) == 0
 
+    def test_recovery_keeps_live_checkpoint_entries(self, registry, tmp_path):
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+            "session_key": "sk1",
+        }]))
+
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 1
+            assert registry.get("proc_live") is not None
+
+            data = json.loads(checkpoint.read_text())
+            assert len(data) == 1
+            assert data[0]["session_id"] == "proc_live"
+            assert data[0]["pid"] == os.getpid()
+            assert data != []
+
+    def test_recovery_skips_explicit_sandbox_backed_entries(self, registry, tmp_path):
+        checkpoint = tmp_path / "procs.json"
+        original = [{
+            "session_id": "proc_remote",
+            "command": "sleep 999",
+            "pid": os.getpid(),
+            "task_id": "t1",
+            "pid_scope": "sandbox",
+        }]
+        checkpoint.write_text(json.dumps(original))
+
+        with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+            recovered = registry.recover_from_checkpoint()
+            assert recovered == 0
+            assert registry.get("proc_remote") is None
+
+            data = json.loads(checkpoint.read_text())
+            assert data == []
+
+    def test_detached_recovered_process_eventually_exits(self, registry, tmp_path):
+        proc = _spawn_python_sleep(0.4)
+        checkpoint = tmp_path / "procs.json"
+        checkpoint.write_text(json.dumps([{
+            "session_id": "proc_live",
+            "command": "python -c 'import time; time.sleep(0.4)'",
+            "pid": proc.pid,
+            "task_id": "t1",
+            "session_key": "sk1",
+        }]))
+
+        try:
+            with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
+                recovered = registry.recover_from_checkpoint()
+                assert recovered == 1
+
+                session = registry.get("proc_live")
+                assert session is not None
+                assert session.detached is True
+
+                proc.wait(timeout=5)
+
+                assert _wait_until(
+                    lambda: registry.get("proc_live") is not None
+                    and registry.get("proc_live").exited,
+                    timeout=5,
+                )
+
+                poll_result = registry.poll("proc_live")
+                assert poll_result["status"] == "exited"
+
+                wait_result = registry.wait("proc_live", timeout=1)
+                assert wait_result["status"] == "exited"
+        finally:
+            if proc.poll() is None:
+                proc.terminate()
+                try:
+                    proc.wait(timeout=5)
+                except Exception:
+                    proc.kill()
+                    proc.wait(timeout=5)
+
 
 # =========================================================================
 # Kill process
@@ -365,6 +467,27 @@ class TestKillProcess:
         result = registry.kill_process(s.id)
         assert result["status"] == "already_exited"
 
+    def test_kill_detached_session_uses_host_pid(self, registry):
+        s = _make_session(sid="proc_detached", command="sleep 999")
+        s.pid = 424242
+        s.detached = True
+        registry._running[s.id] = s
+
+        calls = []
+
+        def fake_kill(pid, sig):
+            calls.append((pid, sig))
+
+        try:
+            with patch("tools.process_registry.os.kill", side_effect=fake_kill):
+                result = registry.kill_process(s.id)
+
+            assert result["status"] == "killed"
+            assert (424242, 0) in calls
+            assert (424242, signal.SIGTERM) in calls
+        finally:
+            registry._running.pop(s.id, None)
+
 
 # =========================================================================
 # Tool handler
diff --git a/tools/process_registry.py b/tools/process_registry.py
index 948f073a..b935f49c 100644
--- a/tools/process_registry.py
+++ b/tools/process_registry.py
@@ -76,6 +76,7 @@ class ProcessSession:
     output_buffer: str = ""                     # Rolling output (last MAX_OUTPUT_CHARS)
     max_output_chars: int = MAX_OUTPUT_CHARS
     detached: bool = False                      # True if recovered from crash (no pipe)
+    pid_scope: str = "host"                     # "host" for local/PTY PIDs, "sandbox" for env-local PIDs
     # Watcher/notification metadata (persisted for crash recovery)
     watcher_platform: str = ""
     watcher_chat_id: str = ""
@@ -127,6 +128,48 @@ class ProcessRegistry:
             lines.pop(0)
         return "\n".join(lines)
 
+    @staticmethod
+    def _is_host_pid_alive(pid: Optional[int]) -> bool:
+        """Best-effort liveness check for host-visible PIDs."""
+        if not pid:
+            return False
+        try:
+            os.kill(pid, 0)
+            return True
+        except (ProcessLookupError, PermissionError):
+            return False
+
+    def _refresh_detached_session(self, session: Optional[ProcessSession]) -> Optional[ProcessSession]:
+        """Update recovered host-PID sessions when the underlying process has exited."""
+        if session is None or session.exited or not session.detached or session.pid_scope != "host":
+            return session
+
+        if self._is_host_pid_alive(session.pid):
+            return session
+
+        with session._lock:
+            if session.exited:
+                return session
+            session.exited = True
+            # Recovered sessions no longer have a waitable handle, so the real
+            # exit code is unavailable once the original process object is gone.
+            session.exit_code = None
+
+        self._move_to_finished(session)
+        return session
+
+    @staticmethod
+    def _terminate_host_pid(pid: int) -> None:
+        """Terminate a host-visible PID without requiring the original process handle."""
+        if _IS_WINDOWS:
+            os.kill(pid, signal.SIGTERM)
+            return
+
+        try:
+            os.killpg(os.getpgid(pid), signal.SIGTERM)
+        except (OSError, ProcessLookupError, PermissionError):
+            os.kill(pid, signal.SIGTERM)
+
     # ----- Spawn -----
 
     def spawn_local(
@@ -269,6 +312,7 @@ class ProcessRegistry:
             cwd=cwd,
             started_at=time.time(),
             env_ref=env,
+            pid_scope="sandbox",
         )
 
         # Run the command in the sandbox with output capture
@@ -439,7 +483,8 @@ class ProcessRegistry:
     def get(self, session_id: str) -> Optional[ProcessSession]:
         """Get a session by ID (running or finished)."""
         with self._lock:
-            return self._running.get(session_id) or self._finished.get(session_id)
+            session = self._running.get(session_id) or self._finished.get(session_id)
+        return self._refresh_detached_session(session)
 
     def poll(self, session_id: str) -> dict:
         """Check status and get new output for a background process."""
@@ -531,6 +576,7 @@ class ProcessRegistry:
         deadline = time.monotonic() + effective_timeout
 
         while time.monotonic() < deadline:
+            session = self._refresh_detached_session(session)
             if session.exited:
                 result = {
                     "status": "exited",
@@ -596,6 +642,25 @@ class ProcessRegistry:
             elif session.env_ref and session.pid:
                 # Non-local -- kill inside sandbox
                 session.env_ref.execute(f"kill {session.pid} 2>/dev/null", timeout=5)
+            elif session.detached and session.pid_scope == "host" and session.pid:
+                if not self._is_host_pid_alive(session.pid):
+                    with session._lock:
+                        session.exited = True
+                        session.exit_code = None
+                    self._move_to_finished(session)
+                    return {
+                        "status": "already_exited",
+                        "exit_code": session.exit_code,
+                    }
+                self._terminate_host_pid(session.pid)
+            else:
+                return {
+                    "status": "error",
+                    "error": (
+                        "Recovered process cannot be killed after restart because "
+                        "its original runtime handle is no longer available"
+                    ),
+                }
             session.exited = True
             session.exit_code = -15  # SIGTERM
             self._move_to_finished(session)
@@ -640,6 +705,8 @@ class ProcessRegistry:
         with self._lock:
             all_sessions = list(self._running.values()) + list(self._finished.values())
 
+        all_sessions = [self._refresh_detached_session(s) for s in all_sessions]
+
         if task_id:
             all_sessions = [s for s in all_sessions if s.task_id == task_id]
 
@@ -666,6 +733,12 @@ class ProcessRegistry:
 
     def has_active_processes(self, task_id: str) -> bool:
         """Check if there are active (running) processes for a task_id."""
+        with self._lock:
+            sessions = list(self._running.values())
+
+        for session in sessions:
+            self._refresh_detached_session(session)
+
         with self._lock:
             return any(
                 s.task_id == task_id and not s.exited
@@ -674,6 +747,12 @@ class ProcessRegistry:
 
     def has_active_for_session(self, session_key: str) -> bool:
         """Check if there are active processes for a gateway session key."""
+        with self._lock:
+            sessions = list(self._running.values())
+
+        for session in sessions:
+            self._refresh_detached_session(session)
+
         with self._lock:
             return any(
                 s.session_key == session_key and not s.exited
@@ -727,6 +806,7 @@ class ProcessRegistry:
                             "session_id": s.id,
                             "command": s.command,
                             "pid": s.pid,
+                            "pid_scope": s.pid_scope,
                             "cwd": s.cwd,
                             "started_at": s.started_at,
                             "task_id": s.task_id,
@@ -764,13 +844,21 @@ class ProcessRegistry:
             if not pid:
                 continue
 
+            pid_scope = entry.get("pid_scope", "host")
+            if pid_scope != "host":
+                # Sandbox-backed processes keep only in-sandbox PIDs in the
+                # checkpoint, which are not meaningful to the restarted host
+                # process once the original environment handle is gone.
+                logger.info(
+                    "Skipping recovery for non-host process: %s (pid=%s, scope=%s)",
+                    entry.get("command", "unknown")[:60],
+                    pid,
+                    pid_scope,
+                )
+                continue
+
             # Check if PID is still alive
-            alive = False
-            try:
-                os.kill(pid, 0)
-                alive = True
-            except (ProcessLookupError, PermissionError):
-                pass
+            alive = self._is_host_pid_alive(pid)
 
             if alive:
                 session = ProcessSession(
@@ -779,6 +867,7 @@ class ProcessRegistry:
                     task_id=entry.get("task_id", ""),
                     session_key=entry.get("session_key", ""),
                     pid=pid,
+                    pid_scope=pid_scope,
                     cwd=entry.get("cwd"),
                     started_at=entry.get("started_at", time.time()),
                     detached=True,  # Can't read output, but can report status + kill
@@ -802,14 +891,10 @@ class ProcessRegistry:
                         "platform": session.watcher_platform,
                         "chat_id": session.watcher_chat_id,
                         "thread_id": session.watcher_thread_id,
+                        "notify_on_complete": session.notify_on_complete,
                     })
 
-        # Clear the checkpoint (will be rewritten as processes finish)
-        try:
-            from utils import atomic_json_write
-            atomic_json_write(CHECKPOINT_PATH, [])
-        except Exception as e:
-            logger.debug("Could not clear checkpoint file: %s", e, exc_info=True)
+        self._write_checkpoint()
 
         return recovered
 

From 30ea423ce8f064a4dab42d93b5adc26a9c2240b1 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 03:36:44 -0700
Subject: [PATCH 133/154] fix: unify reasoning_effort to config.yaml only,
 remove HERMES_REASONING_EFFORT env var
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Gateway and cron had inconsistent reasoning_effort resolution:
- CLI: config.yaml only (correct)
- Gateway: config.yaml first, env var fallback
- Cron: env var first, config.yaml fallback

All three now read exclusively from agent.reasoning_effort in config.yaml.
Removed HERMES_REASONING_EFFORT env var support entirely — .env is for
secrets only, not behavioral config.
---
 cron/scheduler.py                       |  6 +--
 gateway/run.py                          | 11 ++----
 tests/gateway/test_reasoning_command.py | 52 -------------------------
 3 files changed, 6 insertions(+), 63 deletions(-)

diff --git a/cron/scheduler.py b/cron/scheduler.py
index e164c1f3..33a9b899 100644
--- a/cron/scheduler.py
+++ b/cron/scheduler.py
@@ -585,11 +585,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
         except Exception as e:
             logger.warning("Job '%s': failed to load config.yaml, using defaults: %s", job_id, e)
 
-        # Reasoning config from env or config.yaml
+        # Reasoning config from config.yaml
         from hermes_constants import parse_reasoning_effort
-        effort = os.getenv("HERMES_REASONING_EFFORT", "")
-        if not effort:
-            effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
+        effort = str(_cfg.get("agent", {}).get("reasoning_effort", "")).strip()
         reasoning_config = parse_reasoning_effort(effort)
 
         # Prefill messages from env or config.yaml
diff --git a/gateway/run.py b/gateway/run.py
index 99c71d91..149b1f59 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -921,12 +921,11 @@ class GatewayRunner:
 
     @staticmethod
     def _load_reasoning_config() -> dict | None:
-        """Load reasoning effort from config with env fallback.
+        """Load reasoning effort from config.yaml.
 
-        Checks agent.reasoning_effort in config.yaml first, then
-        HERMES_REASONING_EFFORT as a fallback. Valid: "xhigh", "high",
-        "medium", "low", "minimal", "none". Returns None to use default
-        (medium).
+        Reads agent.reasoning_effort from config.yaml. Valid: "xhigh",
+        "high", "medium", "low", "minimal", "none". Returns None to use
+        default (medium).
         """
         from hermes_constants import parse_reasoning_effort
         effort = ""
@@ -939,8 +938,6 @@ class GatewayRunner:
                 effort = str(cfg.get("agent", {}).get("reasoning_effort", "") or "").strip()
         except Exception:
             pass
-        if not effort:
-            effort = os.getenv("HERMES_REASONING_EFFORT", "")
         result = parse_reasoning_effort(effort)
         if effort and effort.strip() and result is None:
             logger.warning("Unknown reasoning_effort '%s', using default (medium)", effort)
diff --git a/tests/gateway/test_reasoning_command.py b/tests/gateway/test_reasoning_command.py
index cb9e01f1..e39ed112 100644
--- a/tests/gateway/test_reasoning_command.py
+++ b/tests/gateway/test_reasoning_command.py
@@ -87,7 +87,6 @@ class TestReasoningCommand:
         )
 
         monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
-        monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
 
         runner = _make_runner()
         runner._reasoning_config = {"enabled": True, "effort": "xhigh"}
@@ -108,7 +107,6 @@ class TestReasoningCommand:
         config_path.write_text("agent:\n  reasoning_effort: medium\n", encoding="utf-8")
 
         monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
-        monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
 
         runner = _make_runner()
         runner._reasoning_config = {"enabled": True, "effort": "medium"}
@@ -138,7 +136,6 @@ class TestReasoningCommand:
                 "api_key": "test-key",
             },
         )
-        monkeypatch.delenv("HERMES_REASONING_EFFORT", raising=False)
         fake_run_agent = types.ModuleType("run_agent")
         fake_run_agent.AIAgent = _CapturingAgent
         monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
@@ -170,55 +167,6 @@ class TestReasoningCommand:
         assert _CapturingAgent.last_init is not None
         assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": True, "effort": "low"}
 
-    def test_run_agent_prefers_config_over_stale_reasoning_env(self, tmp_path, monkeypatch):
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir()
-        (hermes_home / "config.yaml").write_text("agent:\n  reasoning_effort: none\n", encoding="utf-8")
-
-        monkeypatch.setattr(gateway_run, "_hermes_home", hermes_home)
-        monkeypatch.setattr(gateway_run, "_env_path", hermes_home / ".env")
-        monkeypatch.setattr(gateway_run, "load_dotenv", lambda *args, **kwargs: None)
-        monkeypatch.setattr(
-            gateway_run,
-            "_resolve_runtime_agent_kwargs",
-            lambda: {
-                "provider": "openrouter",
-                "api_mode": "chat_completions",
-                "base_url": "https://openrouter.ai/api/v1",
-                "api_key": "test-key",
-            },
-        )
-        monkeypatch.setenv("HERMES_REASONING_EFFORT", "low")
-        fake_run_agent = types.ModuleType("run_agent")
-        fake_run_agent.AIAgent = _CapturingAgent
-        monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)
-
-        _CapturingAgent.last_init = None
-        runner = _make_runner()
-
-        source = SessionSource(
-            platform=Platform.LOCAL,
-            chat_id="cli",
-            chat_name="CLI",
-            chat_type="dm",
-            user_id="user-1",
-        )
-
-        result = asyncio.run(
-            runner._run_agent(
-                message="ping",
-                context_prompt="",
-                history=[],
-                source=source,
-                session_id="session-1",
-                session_key="agent:main:local:dm",
-            )
-        )
-
-        assert result["final_response"] == "ok"
-        assert _CapturingAgent.last_init is not None
-        assert _CapturingAgent.last_init["reasoning_config"] == {"enabled": False}
-
     def test_run_agent_includes_enabled_mcp_servers_in_gateway_toolsets(self, tmp_path, monkeypatch):
         hermes_home = tmp_path / "hermes"
         hermes_home.mkdir()

From 1368caf66f6a012947a386f29522b176e8a32dd1 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 03:38:08 -0700
Subject: [PATCH 134/154] fix(anthropic): smart thinking block signature
 management (#6112)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Anthropic signs thinking blocks against the full turn content. Any
upstream mutation (context compression, session truncation, orphan
stripping, message merging) invalidates the signature, causing HTTP 400
'Invalid signature in thinking block' — especially in long-lived
gateway sessions.

Strategy (following clawdbot/OpenClaw pattern):

1. Strip thinking/redacted_thinking from all assistant messages EXCEPT
   the last one — preserves reasoning continuity on the current
   tool-use chain while avoiding stale signature errors on older turns.

2. Downgrade unsigned thinking blocks to plain text — Anthropic can't
   validate them, but the reasoning content is preserved.

3. Strip cache_control from thinking/redacted_thinking blocks to
   prevent cache markers from interfering with signature validation.

4. Drop thinking blocks from the second message when merging
   consecutive assistant messages (role alternation enforcement).

5. Error recovery: on HTTP 400 mentioning 'signature' and 'thinking',
   strip all reasoning_details from the conversation and retry once.
   This is the safety net for edge cases the proactive stripping
   misses.

Addresses the issue reported in PR #6086 by @mingginwan while
preserving reasoning continuity (their PR stripped ALL thinking
blocks unconditionally).

Files changed:
- agent/anthropic_adapter.py: thinking block management in
  convert_messages_to_anthropic (strip old turns, downgrade unsigned,
  strip cache_control, merge-time strip)
- run_agent.py: one-shot signature error recovery in retry loop
- tests/test_anthropic_adapter.py: 10 new tests covering all cases
---
 agent/anthropic_adapter.py            |  72 +++++++-
 run_agent.py                          |  35 +++-
 tests/agent/test_anthropic_adapter.py | 252 ++++++++++++++++++++++++++
 3 files changed, 356 insertions(+), 3 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 3292f0c6..2d6c2dd8 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1102,7 +1102,15 @@ def convert_messages_to_anthropic(
                         curr_content = [{"type": "text", "text": curr_content}]
                     fixed[-1]["content"] = prev_content + curr_content
             else:
-                # Consecutive assistant messages — merge text content
+                # Consecutive assistant messages — merge text content.
+                # Drop thinking blocks from the *second* message: their
+                # signature was computed against a different turn boundary
+                # and becomes invalid once merged.
+                if isinstance(m["content"], list):
+                    m["content"] = [
+                        b for b in m["content"]
+                        if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
+                    ]
                 prev_blocks = fixed[-1]["content"]
                 curr_blocks = m["content"]
                 if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
@@ -1120,6 +1128,68 @@ def convert_messages_to_anthropic(
             fixed.append(m)
     result = fixed
 
+    # ── Thinking block signature management ──────────────────────────
+    # Anthropic signs thinking blocks against the full turn content.
+    # Any upstream mutation (context compression, session truncation,
+    # orphan stripping, message merging) invalidates the signature,
+    # causing HTTP 400 "Invalid signature in thinking block".
+    #
+    # Strategy (following clawdbot/OpenClaw pattern):
+    # 1. Strip thinking/redacted_thinking from all assistant messages
+    #    EXCEPT the last one — preserves reasoning continuity on the
+    #    current tool-use chain while avoiding stale signature errors.
+    # 2. Downgrade unsigned thinking blocks (no signature) to text —
+    #    Anthropic can't validate them and will reject them.
+    # 3. Strip cache_control from thinking/redacted_thinking blocks —
+    #    cache markers can interfere with signature validation.
+    _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
+
+    last_assistant_idx = None
+    for i in range(len(result) - 1, -1, -1):
+        if result[i].get("role") == "assistant":
+            last_assistant_idx = i
+            break
+
+    for idx, m in enumerate(result):
+        if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
+            continue
+
+        if idx != last_assistant_idx:
+            # Strip ALL thinking blocks from non-latest assistant messages
+            stripped = [
+                b for b in m["content"]
+                if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
+            ]
+            m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
+        else:
+            # Latest assistant: keep signed thinking blocks for reasoning
+            # continuity; downgrade unsigned ones to plain text.
+            new_content = []
+            for b in m["content"]:
+                if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
+                    new_content.append(b)
+                    continue
+                if b.get("type") == "redacted_thinking":
+                    # Redacted blocks use 'data' for the signature payload
+                    if b.get("data"):
+                        new_content.append(b)
+                    # else: drop — no data means it can't be validated
+                elif b.get("signature"):
+                    # Signed thinking block — keep it
+                    new_content.append(b)
+                else:
+                    # Unsigned thinking — downgrade to text so it's not lost
+                    thinking_text = b.get("thinking", "")
+                    if thinking_text:
+                        new_content.append({"type": "text", "text": thinking_text})
+            m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
+
+        # Strip cache_control from any remaining thinking/redacted_thinking
+        # blocks — cache markers interfere with signature validation.
+        for b in m["content"]:
+            if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
+                b.pop("cache_control", None)
+
     return system, result
 
 
diff --git a/run_agent.py b/run_agent.py
index 49f36da4..a0ae15a1 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -7288,6 +7288,7 @@ class AIAgent:
             codex_auth_retry_attempted=False
             anthropic_auth_retry_attempted=False
             nous_auth_retry_attempted=False
+            thinking_sig_retry_attempted = False
             has_retried_429 = False
             restart_with_compressed_messages = False
             restart_with_length_continuation = False
@@ -7877,8 +7878,38 @@ class AIAgent:
                         print(f"{self.log_prefix}     • Check ANTHROPIC_API_KEY in {_dhh}/.env for API keys or legacy token values")
                         print(f"{self.log_prefix}     • For API keys: verify at https://console.anthropic.com/settings/keys")
                         print(f"{self.log_prefix}     • For Claude Code: run 'claude /login' to refresh, then retry")
-                        print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_TOKEN \"\"")
-                        print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_API_KEY \"\"")
+                        print(f"{self.log_prefix}     • Legacy cleanup: hermes config set ANTHROPIC_TOKEN \"\"")
+                        print(f"{self.log_prefix}     • Clear stale keys: hermes config set ANTHROPIC_API_KEY \"\"")
+
+                    # ── Thinking block signature recovery ─────────────────
+                    # Anthropic signs thinking blocks against the full turn
+                    # content.  Any upstream mutation (context compression,
+                    # session truncation, message merging) invalidates the
+                    # signature → HTTP 400.  Recovery: strip reasoning_details
+                    # from all messages so the next retry sends no thinking
+                    # blocks at all.  One-shot — don't retry infinitely.
+                    if (
+                        self.api_mode == "anthropic_messages"
+                        and status_code == 400
+                        and not thinking_sig_retry_attempted
+                    ):
+                        _err_msg_lower = str(api_error).lower()
+                        if "signature" in _err_msg_lower and "thinking" in _err_msg_lower:
+                            thinking_sig_retry_attempted = True
+                            for _m in messages:
+                                if isinstance(_m, dict):
+                                    _m.pop("reasoning_details", None)
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Thinking block signature invalid — "
+                                f"stripped all thinking blocks, retrying...",
+                                force=True,
+                            )
+                            logging.warning(
+                                "%sThinking block signature recovery: stripped "
+                                "reasoning_details from %d messages",
+                                self.log_prefix, len(messages),
+                            )
+                            continue
 
                     retry_count += 1
                     elapsed_time = time.time() - api_start_time
diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py
index 9aa8c10b..0024fac6 100644
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@@ -1276,6 +1276,258 @@ class TestRoleAlternation:
         assert [m["role"] for m in result] == ["user", "assistant", "user"]
 
 
+# ---------------------------------------------------------------------------
+# Thinking block signature management
+# ---------------------------------------------------------------------------
+
+
+class TestThinkingBlockSignatureManagement:
+    """Tests for the thinking block handling strategy:
+    strip from old turns, preserve latest signed, downgrade unsigned."""
+
+    def test_thinking_stripped_from_non_last_assistant(self):
+        """Thinking blocks are removed from all assistant messages except the last."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc_1", "function": {"name": "tool1", "arguments": "{}"}},
+                ],
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Old reasoning.", "signature": "sig_old"},
+                ],
+            },
+            {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc_2", "function": {"name": "tool2", "arguments": "{}"}},
+                ],
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Latest reasoning.", "signature": "sig_new"},
+                ],
+            },
+            {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+
+        # Find both assistant messages
+        assistants = [m for m in result if m["role"] == "assistant"]
+        assert len(assistants) == 2
+
+        # First (non-last) assistant: no thinking blocks
+        first_types = [b.get("type") for b in assistants[0]["content"]]
+        assert "thinking" not in first_types
+        assert "redacted_thinking" not in first_types
+        assert "tool_use" in first_types  # tool_use should survive
+
+        # Last assistant: thinking block preserved with signature
+        last_blocks = assistants[1]["content"]
+        thinking_blocks = [b for b in last_blocks if b.get("type") == "thinking"]
+        assert len(thinking_blocks) == 1
+        assert thinking_blocks[0]["thinking"] == "Latest reasoning."
+        assert thinking_blocks[0]["signature"] == "sig_new"
+
+    def test_signed_thinking_preserved_on_last_turn(self):
+        """A signed thinking block on the last assistant message is kept."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "The answer is 42.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Deep thought.", "signature": "sig_valid"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        thinking = [b for b in blocks if b.get("type") == "thinking"]
+        assert len(thinking) == 1
+        assert thinking[0]["signature"] == "sig_valid"
+
+    def test_unsigned_thinking_downgraded_to_text_on_last_turn(self):
+        """Unsigned thinking blocks on the last turn become text blocks."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Response text.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Unsigned reasoning."},
+                    # No 'signature' field
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+
+        # No thinking blocks should remain
+        assert not any(b.get("type") == "thinking" for b in blocks)
+        # The reasoning text should be preserved as a text block
+        text_contents = [b.get("text", "") for b in blocks if b.get("type") == "text"]
+        assert "Unsigned reasoning." in text_contents
+
+    def test_redacted_thinking_with_data_preserved(self):
+        """Redacted thinking with 'data' field is kept on last turn."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Response.",
+                "reasoning_details": [
+                    {"type": "redacted_thinking", "data": "opaque_signature_data"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        redacted = [b for b in blocks if b.get("type") == "redacted_thinking"]
+        assert len(redacted) == 1
+        assert redacted[0]["data"] == "opaque_signature_data"
+
+    def test_redacted_thinking_without_data_dropped(self):
+        """Redacted thinking without 'data' is dropped — can't be validated."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "Response.",
+                "reasoning_details": [
+                    {"type": "redacted_thinking"},
+                    # No 'data' field
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        blocks = result[0]["content"]
+        assert not any(b.get("type") == "redacted_thinking" for b in blocks)
+
+    def test_cache_control_stripped_from_thinking_blocks(self):
+        """cache_control markers are removed from thinking/redacted_thinking blocks."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "tool_calls": [
+                    {"id": "tc_1", "function": {"name": "t", "arguments": "{}"}},
+                ],
+                "reasoning_details": [
+                    {
+                        "type": "thinking",
+                        "thinking": "Reasoning.",
+                        "signature": "sig_1",
+                        "cache_control": {"type": "ephemeral"},
+                    },
+                ],
+            },
+            {"role": "tool", "tool_call_id": "tc_1", "content": "result"},
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        assistant = next(m for m in result if m["role"] == "assistant")
+        for block in assistant["content"]:
+            if block.get("type") in ("thinking", "redacted_thinking"):
+                assert "cache_control" not in block
+
+    def test_thinking_stripped_from_merged_consecutive_assistants(self):
+        """When consecutive assistants are merged, second one's thinking is dropped."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "First response.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "First thought.", "signature": "sig_1"},
+                ],
+            },
+            {
+                "role": "assistant",
+                "content": "Second response.",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Second thought.", "signature": "sig_2"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+
+        # Should be merged into one assistant message
+        assistants = [m for m in result if m["role"] == "assistant"]
+        assert len(assistants) == 1
+
+        # Only the first thinking block should remain (signed, on the last/only assistant)
+        blocks = assistants[0]["content"]
+        thinking = [b for b in blocks if b.get("type") == "thinking"]
+        assert len(thinking) == 1
+        assert thinking[0]["thinking"] == "First thought."
+
+    def test_empty_content_after_strip_gets_placeholder(self):
+        """If stripping thinking leaves an empty message, a placeholder is added."""
+        messages = [
+            {
+                "role": "assistant",
+                "content": "",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Only thinking, no text."},
+                    # Unsigned — will be downgraded, but content was empty string
+                ],
+            },
+            {"role": "user", "content": "Next message."},
+            {"role": "assistant", "content": "Final."},
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+        # First assistant is non-last, so thinking is stripped completely.
+        # The original content was empty and thinking was unsigned → placeholder
+        first_assistant = result[0]
+        assert first_assistant["role"] == "assistant"
+        assert len(first_assistant["content"]) >= 1
+
+    def test_multi_turn_conversation_preserves_only_last(self):
+        """Full multi-turn conversation: only last assistant keeps thinking."""
+        messages = [
+            {"role": "user", "content": "Question 1"},
+            {
+                "role": "assistant",
+                "content": "Answer 1",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Thought 1", "signature": "sig_1"},
+                ],
+            },
+            {"role": "user", "content": "Question 2"},
+            {
+                "role": "assistant",
+                "content": "Answer 2",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Thought 2", "signature": "sig_2"},
+                ],
+            },
+            {"role": "user", "content": "Question 3"},
+            {
+                "role": "assistant",
+                "content": "Answer 3",
+                "reasoning_details": [
+                    {"type": "thinking", "thinking": "Thought 3", "signature": "sig_3"},
+                ],
+            },
+        ]
+        _, result = convert_messages_to_anthropic(messages)
+
+        assistants = [m for m in result if m["role"] == "assistant"]
+        assert len(assistants) == 3
+
+        # First two: no thinking blocks
+        for a in assistants[:2]:
+            assert not any(
+                b.get("type") in ("thinking", "redacted_thinking")
+                for b in a["content"]
+                if isinstance(b, dict)
+            )
+
+        # Last one: thinking preserved
+        last_thinking = [
+            b for b in assistants[2]["content"]
+            if isinstance(b, dict) and b.get("type") == "thinking"
+        ]
+        assert len(last_thinking) == 1
+        assert last_thinking[0]["signature"] == "sig_3"
+
+
 # ---------------------------------------------------------------------------
 # Tool choice
 # ---------------------------------------------------------------------------

From c8a5e36be8f59eba491d9b319a5842fc389a528b Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 04:06:42 -0700
Subject: [PATCH 135/154] feat(prompting): self-optimized GPT/Codex tool-use
 guidance via automated behavioral benchmarking (#6120)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Hermes Agent identified and patched its own prompting blind spots through
automated self-evaluation — running 64+ tool-use benchmarks across GPT-5.4
and Codex-5.3, diagnosing 5 failure modes, writing targeted prompt patches,
and verifying the fix in a closed loop.

Failure modes discovered and fixed:
- Mental arithmetic (wrong answers: 39,152,053 vs correct 39,151,253)
- User profile hallucination ('Windows 11' when running on Linux)
- Time guessing without verification
- Clarification-seeking instead of acting ('open where?' for port checks)
- Hash computation from memory (SHA-256, encodings)
- Confusing system RAM with agent's own persistent memory store

Two new XML sections added to OPENAI_MODEL_EXECUTION_GUIDANCE:
- <mandatory_tool_use>: explicit categories that must always use tools
- <act_dont_ask>: default to action on obvious interpretations

Results:
  gpt-5.4:       68.8% → 100% tool compliance (+31.2pp)
  gpt-5.3-codex: 62.5% → 100% tool compliance (+37.5pp)
  Regression:    0/8 conversational prompts over-tooled
---
 agent/prompt_builder.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py
index df5532e1..b1b0891f 100644
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -204,6 +204,30 @@ OPENAI_MODEL_EXECUTION_GUIDANCE = (
     "the result.\n"
     "</tool_persistence>\n"
     "\n"
+    "<mandatory_tool_use>\n"
+    "NEVER answer these from memory or mental computation — ALWAYS use a tool:\n"
+    "- Arithmetic, math, calculations → use terminal or execute_code\n"
+    "- Hashes, encodings, checksums → use terminal (e.g. sha256sum, base64)\n"
+    "- Current time, date, timezone → use terminal (e.g. date)\n"
+    "- System state: OS, CPU, memory, disk, ports, processes → use terminal\n"
+    "- File contents, sizes, line counts → use read_file, search_files, or terminal\n"
+    "- Git history, branches, diffs → use terminal\n"
+    "- Current facts (weather, news, versions) → use web_search\n"
+    "Your memory and user profile describe the USER, not the system you are "
+    "running on. The execution environment may differ from what the user profile "
+    "says about their personal setup.\n"
+    "</mandatory_tool_use>\n"
+    "\n"
+    "<act_dont_ask>\n"
+    "When a question has an obvious default interpretation, act on it immediately "
+    "instead of asking for clarification. Examples:\n"
+    "- 'Is port 443 open?' → check THIS machine (don't ask 'open where?')\n"
+    "- 'What OS am I running?' → check the live system (don't use user profile)\n"
+    "- 'What time is it?' → run `date` (don't guess)\n"
+    "Only ask for clarification when the ambiguity genuinely changes what tool "
+    "you would call.\n"
+    "</act_dont_ask>\n"
+    "\n"
     "<prerequisite_checks>\n"
     "- Before taking an action, check whether prerequisite discovery, lookup, or "
     "context-gathering steps are needed.\n"

From bdc72ec355a77594d2849a97c290f10aab016db0 Mon Sep 17 00:00:00 2001
From: Felipe de Leon <felipe@felipedeleon.com>
Date: Wed, 8 Apr 2026 03:47:40 +0400
Subject: [PATCH 136/154] feat(cli): add on_session_finalize and
 on_session_reset plugin hooks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Plugins can now subscribe to session boundary events via
ctx.register_hook('on_session_finalize', ...) and
ctx.register_hook('on_session_reset', ...).

on_session_finalize — fires during CLI exit (/quit, Ctrl-C) and
before /new or /reset, giving plugins a chance to flush or clean up.

on_session_reset — fires after a new session is created via
/new or /reset, so plugins can initialize per-session state.

Closes #5592
---
 cli.py                               | 26 +++++++++++
 hermes_cli/plugins.py                |  2 +
 tests/test_session_boundary_hooks.py | 66 ++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+)
 create mode 100644 tests/test_session_boundary_hooks.py

diff --git a/cli.py b/cli.py
index b4358a16..01ea17ff 100644
--- a/cli.py
+++ b/cli.py
@@ -612,6 +612,11 @@ def _run_cleanup():
         pass
     # Shut down memory provider (on_session_end + shutdown_all) at actual
     # session boundary — NOT per-turn inside run_conversation().
+    try:
+        from hermes_cli.plugins import invoke_hook as _invoke_hook
+        _invoke_hook("on_session_finalize", session_id=_active_agent_ref.session_id if _active_agent_ref else None, platform="cli")
+    except Exception:
+        pass
     try:
         if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'):
             _active_agent_ref.shutdown_memory_provider(
@@ -3314,6 +3319,22 @@ class HermesCLI:
         flush_tool_summary()
         print()
     
+    def _notify_session_boundary(self, event_type: str) -> None:
+        """Fire a session-boundary plugin hook (on_session_finalize or on_session_reset).
+
+        Non-blocking — errors are caught and logged.  Safe to call from any
+        lifecycle point (shutdown, /new, /reset).
+        """
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _invoke_hook(
+                event_type,
+                session_id=self.agent.session_id if self.agent else None,
+                platform=getattr(self, "platform", None) or "cli",
+            )
+        except Exception:
+            pass
+
     def new_session(self, silent=False):
         """Start a fresh session with a new session ID and cleared agent state."""
         if self.agent and self.conversation_history:
@@ -3321,6 +3342,10 @@ class HermesCLI:
                 self.agent.flush_memories(self.conversation_history)
             except (Exception, KeyboardInterrupt):
                 pass
+            self._notify_session_boundary("on_session_finalize")
+        elif self.agent:
+            # First session or empty history — still finalize the old session
+            self._notify_session_boundary("on_session_finalize")
 
         old_session_id = self.session_id
         if self._session_db and old_session_id:
@@ -3365,6 +3390,7 @@ class HermesCLI:
                     )
                 except Exception:
                     pass
+            self._notify_session_boundary("on_session_reset")
 
         if not silent:
             print("(^_^)v New session started!")
diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py
index 23a655aa..7323bbd0 100644
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -61,6 +61,8 @@ VALID_HOOKS: Set[str] = {
     "post_api_request",
     "on_session_start",
     "on_session_end",
+    "on_session_finalize",
+    "on_session_reset",
 }
 
 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
diff --git a/tests/test_session_boundary_hooks.py b/tests/test_session_boundary_hooks.py
new file mode 100644
index 00000000..19de4cd9
--- /dev/null
+++ b/tests/test_session_boundary_hooks.py
@@ -0,0 +1,66 @@
+import pytest
+from unittest.mock import MagicMock, patch
+from hermes_cli.plugins import VALID_HOOKS, PluginManager
+import os
+import shutil
+import tempfile
+from cli import HermesCLI
+
+
+def test_session_hooks_in_valid_hooks():
+    """Verify on_session_finalize and on_session_reset are registered as valid hooks."""
+    assert "on_session_finalize" in VALID_HOOKS
+    assert "on_session_reset" in VALID_HOOKS
+
+
+@patch("hermes_cli.plugins.invoke_hook")
+def test_session_finalize_on_reset(mock_invoke_hook):
+    """Verify on_session_finalize fires when /new or /reset is used."""
+    cli = HermesCLI()
+    cli.agent = MagicMock()
+    cli.agent.session_id = "test-session-id"
+
+    # Simulate /new command which triggers on_session_finalize for the old session
+    cli.new_session(silent=True)
+
+    # Check if on_session_finalize was called for the old session
+    mock_invoke_hook.assert_any_call(
+        "on_session_finalize", session_id="test-session-id", platform="cli"
+    )
+    # Check if on_session_reset was called for the new session
+    mock_invoke_hook.assert_any_call(
+        "on_session_reset", session_id=cli.session_id, platform="cli"
+    )
+
+
+@patch("hermes_cli.plugins.invoke_hook")
+def test_session_finalize_on_cleanup(mock_invoke_hook):
+    """Verify on_session_finalize fires during CLI exit cleanup."""
+    import cli as cli_mod
+
+    mock_agent = MagicMock()
+    mock_agent.session_id = "cleanup-session-id"
+    cli_mod._active_agent_ref = mock_agent
+    cli_mod._cleanup_done = False
+
+    cli_mod._run_cleanup()
+
+    mock_invoke_hook.assert_any_call(
+        "on_session_finalize", session_id="cleanup-session-id", platform="cli"
+    )
+
+
+@patch("hermes_cli.plugins.invoke_hook")
+def test_hook_errors_are_caught(mock_invoke_hook):
+    """Verify hook exceptions are caught and don't crash the agent."""
+    mgr = PluginManager()
+
+    # Register a hook that raises
+    def bad_callback(**kwargs):
+        raise Exception("Hook failed")
+
+    mgr._hooks["on_session_finalize"] = [bad_callback]
+
+    # This should not raise
+    results = mgr.invoke_hook("on_session_finalize", session_id="test", platform="cli")
+    assert results == []

From ab21fbfd89f4f168afcc024c3cf329140671ea98 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 04:22:55 -0700
Subject: [PATCH 137/154] fix: add gateway coverage for session boundary hooks,
 move test to tests/cli/

- Fire on_session_finalize and on_session_reset in gateway _handle_reset_command()
- Fire on_session_finalize during gateway stop() for each active agent
- Move CLI test from tests/ root to tests/cli/ (matches recent restructure)
- Add 5 gateway tests covering reset hooks, ordering, shutdown, and error handling
- Place on_session_reset after new session is guaranteed to exist (covers
  the get_or_create_session fallback path)
---
 gateway/run.py                                |  30 +++-
 .../{ => cli}/test_session_boundary_hooks.py  |   0
 tests/gateway/test_session_boundary_hooks.py  | 158 ++++++++++++++++++
 3 files changed, 186 insertions(+), 2 deletions(-)
 rename tests/{ => cli}/test_session_boundary_hooks.py (100%)
 create mode 100644 tests/gateway/test_session_boundary_hooks.py

diff --git a/gateway/run.py b/gateway/run.py
index 149b1f59..7a551be1 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1481,6 +1481,14 @@ class GatewayRunner:
                 logger.debug("Interrupted running agent for session %s during shutdown", session_key[:20])
             except Exception as e:
                 logger.debug("Failed interrupting agent during shutdown: %s", e)
+            # Fire plugin on_session_finalize hook before memory shutdown
+            try:
+                from hermes_cli.plugins import invoke_hook as _invoke_hook
+                _invoke_hook("on_session_finalize",
+                             session_id=getattr(agent, 'session_id', None),
+                             platform="gateway")
+            except Exception:
+                pass
             # Shut down memory provider at actual session boundary
             try:
                 if hasattr(agent, 'shutdown_memory_provider'):
@@ -3274,6 +3282,15 @@ class GatewayRunner:
         # the configured default instead of the previously switched model.
         self._session_model_overrides.pop(session_key, None)
 
+        # Fire plugin on_session_finalize hook (session boundary)
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _old_sid = old_entry.session_id if old_entry else None
+            _invoke_hook("on_session_finalize", session_id=_old_sid,
+                         platform=source.platform.value if source.platform else "")
+        except Exception:
+            pass
+
         # Emit session:end hook (session is ending)
         await self.hooks.emit("session:end", {
             "platform": source.platform.value if source.platform else "",
@@ -3287,7 +3304,7 @@ class GatewayRunner:
             "user_id": source.user_id,
             "session_key": session_key,
         })
-        
+
         # Resolve session config info to surface to the user
         try:
             session_info = self._format_session_info()
@@ -3298,9 +3315,18 @@ class GatewayRunner:
             header = "✨ Session reset! Starting fresh."
         else:
             # No existing session, just create one
-            self.session_store.get_or_create_session(source, force_new=True)
+            new_entry = self.session_store.get_or_create_session(source, force_new=True)
             header = "✨ New session started!"
 
+        # Fire plugin on_session_reset hook (new session guaranteed to exist)
+        try:
+            from hermes_cli.plugins import invoke_hook as _invoke_hook
+            _new_sid = new_entry.session_id if new_entry else None
+            _invoke_hook("on_session_reset", session_id=_new_sid,
+                         platform=source.platform.value if source.platform else "")
+        except Exception:
+            pass
+
         if session_info:
             return f"{header}\n\n{session_info}"
         return header
diff --git a/tests/test_session_boundary_hooks.py b/tests/cli/test_session_boundary_hooks.py
similarity index 100%
rename from tests/test_session_boundary_hooks.py
rename to tests/cli/test_session_boundary_hooks.py
diff --git a/tests/gateway/test_session_boundary_hooks.py b/tests/gateway/test_session_boundary_hooks.py
new file mode 100644
index 00000000..31e02980
--- /dev/null
+++ b/tests/gateway/test_session_boundary_hooks.py
@@ -0,0 +1,158 @@
+"""Tests that on_session_finalize and on_session_reset plugin hooks fire in the gateway."""
+from datetime import datetime
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionEntry, SessionSource, build_session_key
+
+
+def _make_source() -> SessionSource:
+    return SessionSource(
+        platform=Platform.TELEGRAM,
+        user_id="u1",
+        chat_id="c1",
+        user_name="tester",
+        chat_type="dm",
+    )
+
+
+def _make_event(text: str) -> MessageEvent:
+    return MessageEvent(text=text, source=_make_source(), message_id="m1")
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    adapter = MagicMock()
+    adapter.send = AsyncMock()
+    runner.adapters = {Platform.TELEGRAM: adapter}
+    runner._voice_mode = {}
+    runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
+    runner._session_model_overrides = {}
+    runner._pending_model_notes = {}
+    runner._background_tasks = set()
+
+    session_key = build_session_key(_make_source())
+    session_entry = SessionEntry(
+        session_key=session_key,
+        session_id="sess-old",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    new_session_entry = SessionEntry(
+        session_key=session_key,
+        session_id="sess-new",
+        created_at=datetime.now(),
+        updated_at=datetime.now(),
+        platform=Platform.TELEGRAM,
+        chat_type="dm",
+    )
+    runner.session_store = MagicMock()
+    runner.session_store.get_or_create_session.return_value = new_session_entry
+    runner.session_store.reset_session.return_value = new_session_entry
+    runner.session_store._entries = {session_key: session_entry}
+    runner.session_store._generate_session_key.return_value = session_key
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._session_db = None
+    runner._agent_cache_lock = None
+    runner._is_user_authorized = lambda _source: True
+    runner._format_session_info = lambda: ""
+
+    return runner
+
+
+@pytest.mark.asyncio
+@patch("hermes_cli.plugins.invoke_hook")
+async def test_reset_fires_finalize_hook(mock_invoke_hook):
+    """/new must fire on_session_finalize with the OLD session id."""
+    runner = _make_runner()
+
+    await runner._handle_reset_command(_make_event("/new"))
+
+    mock_invoke_hook.assert_any_call(
+        "on_session_finalize", session_id="sess-old", platform="telegram"
+    )
+
+
+@pytest.mark.asyncio
+@patch("hermes_cli.plugins.invoke_hook")
+async def test_reset_fires_reset_hook(mock_invoke_hook):
+    """/new must fire on_session_reset with the NEW session id."""
+    runner = _make_runner()
+
+    await runner._handle_reset_command(_make_event("/new"))
+
+    mock_invoke_hook.assert_any_call(
+        "on_session_reset", session_id="sess-new", platform="telegram"
+    )
+
+
+@pytest.mark.asyncio
+@patch("hermes_cli.plugins.invoke_hook")
+async def test_finalize_before_reset(mock_invoke_hook):
+    """on_session_finalize must fire before on_session_reset."""
+    runner = _make_runner()
+
+    await runner._handle_reset_command(_make_event("/new"))
+
+    calls = [c for c in mock_invoke_hook.call_args_list
+             if c[0][0] in ("on_session_finalize", "on_session_reset")]
+    hook_names = [c[0][0] for c in calls]
+    assert hook_names == ["on_session_finalize", "on_session_reset"]
+
+
+@pytest.mark.asyncio
+@patch("hermes_cli.plugins.invoke_hook")
+async def test_shutdown_fires_finalize_for_active_agents(mock_invoke_hook):
+    """Gateway stop() must fire on_session_finalize for each active agent."""
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner._running = True
+    runner._background_tasks = set()
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner._shutdown_event = MagicMock()
+    runner.adapters = {}
+    runner._exit_reason = "test"
+
+    agent1 = MagicMock()
+    agent1.session_id = "sess-a"
+    agent2 = MagicMock()
+    agent2.session_id = "sess-b"
+    runner._running_agents = {"key-a": agent1, "key-b": agent2}
+
+    with patch("gateway.status.remove_pid_file"), \
+         patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    finalize_calls = [
+        c for c in mock_invoke_hook.call_args_list
+        if c[0][0] == "on_session_finalize"
+    ]
+    session_ids = {c[1]["session_id"] for c in finalize_calls}
+    assert session_ids == {"sess-a", "sess-b"}
+
+
+@pytest.mark.asyncio
+@patch("hermes_cli.plugins.invoke_hook", side_effect=Exception("boom"))
+async def test_hook_error_does_not_break_reset(mock_invoke_hook):
+    """Plugin hook errors must not prevent /new from completing."""
+    runner = _make_runner()
+
+    result = await runner._handle_reset_command(_make_event("/new"))
+
+    # Should still return a success message despite hook errors
+    assert "Session reset" in result or "New session" in result

From 8b0afa0e5708c359503c15e903e063063d87d628 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 04:44:49 -0700
Subject: [PATCH 138/154] fix: aggressive worktree and branch cleanup to
 prevent accumulation (#6134)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem: hermes -w sessions accumulated 37+ worktrees and 1200+ orphaned
branches because:
- _cleanup_worktree bailed on any dirty working tree, but agent sessions
  almost always leave untracked files/artifacts behind
- _prune_stale_worktrees had the same dirty-check, so stale worktrees
  survived indefinitely
- pr-* and hermes/* branches from PR review had zero cleanup mechanism

Changes:
- _cleanup_worktree: check for unpushed commits instead of dirty state.
  Agent work lives in pushed commits/PRs — dirty working tree without
  unpushed commits is just artifacts, safe to remove.
- _prune_stale_worktrees: three-tier age system:
  - Under 24h: skip (session may be active)
  - 24h-72h: remove if no unpushed commits
  - Over 72h: force remove regardless
- New _prune_orphaned_branches: on each -w startup, deletes local
  hermes/hermes-* and pr-* branches with no corresponding worktree.
  Protects main, checked-out branch, and active worktree branches.

Tests: 42 pass (6 new covering unpushed-commit logic, force-prune
tier, and orphaned branch cleanup).
---
 cli.py                     | 141 ++++++++++++++++----
 tests/cli/test_worktree.py | 255 +++++++++++++++++++++++++++++++++----
 2 files changed, 346 insertions(+), 50 deletions(-)

diff --git a/cli.py b/cli.py
index 01ea17ff..f00e6b7f 100644
--- a/cli.py
+++ b/cli.py
@@ -760,7 +760,10 @@ def _setup_worktree(repo_root: str = None) -> Optional[Dict[str, str]]:
 def _cleanup_worktree(info: Dict[str, str] = None) -> None:
     """Remove a worktree and its branch on exit.
 
-    If the worktree has uncommitted changes, warn and keep it.
+    Preserves the worktree only if it has unpushed commits (real work
+    that hasn't been pushed to any remote).  Uncommitted changes alone
+    (untracked files, test artifacts) are not enough to keep it — agent
+    work lives in commits/PRs, not the working tree.
     """
     global _active_worktree
     info = info or _active_worktree
@@ -776,23 +779,27 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
     if not Path(wt_path).exists():
         return
 
-    # Check for uncommitted changes
+    # Check for unpushed commits — commits reachable from HEAD but not
+    # from any remote branch.  These represent real work the agent did
+    # but didn't push.
+    has_unpushed = False
     try:
-        status = subprocess.run(
-            ["git", "status", "--porcelain"],
+        result = subprocess.run(
+            ["git", "log", "--oneline", "HEAD", "--not", "--remotes"],
             capture_output=True, text=True, timeout=10, cwd=wt_path,
         )
-        has_changes = bool(status.stdout.strip())
+        has_unpushed = bool(result.stdout.strip())
     except Exception:
-        has_changes = True  # Assume dirty on error — don't delete
+        has_unpushed = True  # Assume unpushed on error — don't delete
 
-    if has_changes:
-        print(f"\n\033[33m⚠ Worktree has uncommitted changes, keeping: {wt_path}\033[0m")
-        print(f"  To clean up manually: git worktree remove {wt_path}")
+    if has_unpushed:
+        print(f"\n\033[33m⚠ Worktree has unpushed commits, keeping: {wt_path}\033[0m")
+        print(f"  To clean up manually: git worktree remove --force {wt_path}")
         _active_worktree = None
         return
 
-    # Remove worktree
+    # Remove worktree (even if working tree is dirty — uncommitted
+    # changes without unpushed commits are just artifacts)
     try:
         subprocess.run(
             ["git", "worktree", "remove", wt_path, "--force"],
@@ -801,7 +808,7 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
     except Exception as e:
         logger.debug("Failed to remove worktree: %s", e)
 
-    # Delete the branch (only if it was never pushed / has no upstream)
+    # Delete the branch
     try:
         subprocess.run(
             ["git", "branch", "-D", branch],
@@ -815,19 +822,27 @@ def _cleanup_worktree(info: Dict[str, str] = None) -> None:
 
 
 def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
-    """Remove worktrees older than max_age_hours that have no uncommitted changes.
+    """Remove stale worktrees and orphaned branches on startup.
 
-    Runs silently on startup to clean up after crashed/killed sessions.
+    Age-based tiers:
+    - Under max_age_hours (24h): skip — session may still be active.
+    - 24h–72h: remove if no unpushed commits.
+    - Over 72h: force remove regardless (nothing should sit this long).
+
+    Also prunes orphaned ``hermes/*`` and ``pr-*`` local branches that
+    have no corresponding worktree.
     """
     import subprocess
     import time
 
     worktrees_dir = Path(repo_root) / ".worktrees"
     if not worktrees_dir.exists():
+        _prune_orphaned_branches(repo_root)
         return
 
     now = time.time()
-    cutoff = now - (max_age_hours * 3600)
+    soft_cutoff = now - (max_age_hours * 3600)       # 24h default
+    hard_cutoff = now - (max_age_hours * 3 * 3600)   # 72h default
 
     for entry in worktrees_dir.iterdir():
         if not entry.is_dir() or not entry.name.startswith("hermes-"):
@@ -836,21 +851,24 @@ def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
         # Check age
         try:
             mtime = entry.stat().st_mtime
-            if mtime > cutoff:
+            if mtime > soft_cutoff:
                 continue  # Too recent — skip
         except Exception:
             continue
 
-        # Check for uncommitted changes
-        try:
-            status = subprocess.run(
-                ["git", "status", "--porcelain"],
-                capture_output=True, text=True, timeout=5, cwd=str(entry),
-            )
-            if status.stdout.strip():
-                continue  # Has changes — skip
-        except Exception:
-            continue  # Can't check — skip
+        force = mtime <= hard_cutoff  # Over 72h — force remove
+
+        if not force:
+            # 24h–72h tier: only remove if no unpushed commits
+            try:
+                result = subprocess.run(
+                    ["git", "log", "--oneline", "HEAD", "--not", "--remotes"],
+                    capture_output=True, text=True, timeout=5, cwd=str(entry),
+                )
+                if result.stdout.strip():
+                    continue  # Has unpushed commits — skip
+            except Exception:
+                continue  # Can't check — skip
 
         # Safe to remove
         try:
@@ -869,10 +887,81 @@ def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
                     ["git", "branch", "-D", branch],
                     capture_output=True, text=True, timeout=10, cwd=repo_root,
                 )
-            logger.debug("Pruned stale worktree: %s", entry.name)
+            logger.debug("Pruned stale worktree: %s (force=%s)", entry.name, force)
         except Exception as e:
             logger.debug("Failed to prune worktree %s: %s", entry.name, e)
 
+    _prune_orphaned_branches(repo_root)
+
+
+def _prune_orphaned_branches(repo_root: str) -> None:
+    """Delete local ``hermes/hermes-*`` and ``pr-*`` branches with no worktree.
+
+    These are auto-generated by ``hermes -w`` sessions and PR review
+    workflows respectively.  Once their worktree is gone they serve no
+    purpose and just accumulate.
+    """
+    import subprocess
+
+    try:
+        result = subprocess.run(
+            ["git", "branch", "--format=%(refname:short)"],
+            capture_output=True, text=True, timeout=10, cwd=repo_root,
+        )
+        if result.returncode != 0:
+            return
+        all_branches = [b.strip() for b in result.stdout.strip().split("\n") if b.strip()]
+    except Exception:
+        return
+
+    # Collect branches that are actively checked out in a worktree
+    active_branches: set = set()
+    try:
+        wt_result = subprocess.run(
+            ["git", "worktree", "list", "--porcelain"],
+            capture_output=True, text=True, timeout=10, cwd=repo_root,
+        )
+        for line in wt_result.stdout.split("\n"):
+            if line.startswith("branch refs/heads/"):
+                active_branches.add(line.split("branch refs/heads/", 1)[-1].strip())
+    except Exception:
+        return  # Can't determine active branches — bail
+
+    # Also protect the currently checked-out branch and main
+    try:
+        head_result = subprocess.run(
+            ["git", "branch", "--show-current"],
+            capture_output=True, text=True, timeout=5, cwd=repo_root,
+        )
+        current = head_result.stdout.strip()
+        if current:
+            active_branches.add(current)
+    except Exception:
+        pass
+    active_branches.add("main")
+
+    orphaned = [
+        b for b in all_branches
+        if b not in active_branches
+        and (b.startswith("hermes/hermes-") or b.startswith("pr-"))
+    ]
+
+    if not orphaned:
+        return
+
+    # Delete in batches
+    for i in range(0, len(orphaned), 50):
+        batch = orphaned[i:i + 50]
+        try:
+            subprocess.run(
+                ["git", "branch", "-D"] + batch,
+                capture_output=True, text=True, timeout=30, cwd=repo_root,
+            )
+        except Exception as e:
+            logger.debug("Failed to prune orphaned branches: %s", e)
+
+    logger.debug("Pruned %d orphaned branches", len(orphaned))
+
 # ============================================================================
 # ASCII Art & Branding
 # ============================================================================
diff --git a/tests/cli/test_worktree.py b/tests/cli/test_worktree.py
index f545baa3..fece9cf6 100644
--- a/tests/cli/test_worktree.py
+++ b/tests/cli/test_worktree.py
@@ -33,6 +33,13 @@ def git_repo(tmp_path):
         ["git", "commit", "-m", "Initial commit"],
         cwd=repo, capture_output=True,
     )
+    # Add a fake remote ref so cleanup logic sees the initial commit as
+    # "pushed".  Without this, `git log HEAD --not --remotes` treats every
+    # commit as unpushed and cleanup refuses to delete worktrees.
+    subprocess.run(
+        ["git", "update-ref", "refs/remotes/origin/main", "HEAD"],
+        cwd=repo, capture_output=True,
+    )
     return repo
 
 
@@ -81,7 +88,11 @@ def _setup_worktree(repo_root):
 
 
 def _cleanup_worktree(info):
-    """Test version of _cleanup_worktree."""
+    """Test version of _cleanup_worktree.
+
+    Preserves the worktree only if it has unpushed commits.
+    Dirty working tree alone is not enough to keep it.
+    """
     wt_path = info["path"]
     branch = info["branch"]
     repo_root = info["repo_root"]
@@ -89,15 +100,15 @@ def _cleanup_worktree(info):
     if not Path(wt_path).exists():
         return
 
-    # Check for uncommitted changes
-    status = subprocess.run(
-        ["git", "status", "--porcelain"],
+    # Check for unpushed commits
+    result = subprocess.run(
+        ["git", "log", "--oneline", "HEAD", "--not", "--remotes"],
         capture_output=True, text=True, timeout=10, cwd=wt_path,
     )
-    has_changes = bool(status.stdout.strip())
+    has_unpushed = bool(result.stdout.strip())
 
-    if has_changes:
-        return False  # Did not clean up
+    if has_unpushed:
+        return False  # Did not clean up — has unpushed commits
 
     subprocess.run(
         ["git", "worktree", "remove", wt_path, "--force"],
@@ -204,20 +215,45 @@ class TestWorktreeCleanup:
         assert result is True
         assert not Path(info["path"]).exists()
 
-    def test_dirty_worktree_kept(self, git_repo):
+    def test_dirty_worktree_cleaned_when_no_unpushed(self, git_repo):
+        """Dirty working tree without unpushed commits is cleaned up.
+
+        Agent sessions typically leave untracked files / artifacts behind.
+        Since all real work is in pushed commits, these don't warrant
+        keeping the worktree.
+        """
         info = _setup_worktree(str(git_repo))
         assert info is not None
 
-        # Make uncommitted changes
+        # Make uncommitted changes (untracked file)
         (Path(info["path"]) / "new-file.txt").write_text("uncommitted")
         subprocess.run(
             ["git", "add", "new-file.txt"],
             cwd=info["path"], capture_output=True,
         )
 
+        # The git_repo fixture already has a fake remote ref so the initial
+        # commit is seen as "pushed".  No unpushed commits → cleanup proceeds.
         result = _cleanup_worktree(info)
-        assert result is False
-        assert Path(info["path"]).exists()  # Still there
+        assert result is True  # Cleaned up despite dirty working tree
+        assert not Path(info["path"]).exists()
+
+    def test_worktree_with_unpushed_commits_kept(self, git_repo):
+        """Worktree with unpushed commits is preserved."""
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Make a commit that is NOT on any remote
+        (Path(info["path"]) / "work.txt").write_text("real work")
+        subprocess.run(["git", "add", "work.txt"], cwd=info["path"], capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "agent work"],
+            cwd=info["path"], capture_output=True,
+        )
+
+        result = _cleanup_worktree(info)
+        assert result is False  # Kept — has unpushed commits
+        assert Path(info["path"]).exists()
 
     def test_branch_deleted_on_cleanup(self, git_repo):
         info = _setup_worktree(str(git_repo))
@@ -367,7 +403,7 @@ class TestMultipleWorktrees:
         lines = [l for l in result.stdout.strip().splitlines() if l.strip()]
         assert len(lines) == 11
 
-        # Cleanup all
+        # Cleanup all (git_repo fixture has a fake remote ref so cleanup works)
         for info in worktrees:
             # Discard changes first so cleanup works
             subprocess.run(
@@ -492,33 +528,77 @@ class TestStaleWorktreePruning:
         assert not pruned
         assert Path(info["path"]).exists()
 
-    def test_keeps_dirty_old_worktree(self, git_repo):
-        """Old worktrees with uncommitted changes should NOT be pruned."""
+    def test_keeps_old_worktree_with_unpushed_commits(self, git_repo):
+        """Old worktrees (24-72h) with unpushed commits should NOT be pruned."""
         import time
 
         info = _setup_worktree(str(git_repo))
         assert info is not None
 
-        # Make it dirty
-        (Path(info["path"]) / "dirty.txt").write_text("uncommitted")
+        # Make an unpushed commit
+        (Path(info["path"]) / "work.txt").write_text("real work")
+        subprocess.run(["git", "add", "work.txt"], cwd=info["path"], capture_output=True)
         subprocess.run(
-            ["git", "add", "dirty.txt"],
+            ["git", "commit", "-m", "agent work"],
             cwd=info["path"], capture_output=True,
         )
 
-        # Make it old
+        # Make it old (25h — in the 24-72h soft tier)
         old_time = time.time() - (25 * 3600)
         os.utime(info["path"], (old_time, old_time))
 
-        # Check if it would be pruned
-        status = subprocess.run(
-            ["git", "status", "--porcelain"],
+        # Check for unpushed commits (simulates prune logic)
+        result = subprocess.run(
+            ["git", "log", "--oneline", "HEAD", "--not", "--remotes"],
             capture_output=True, text=True, cwd=info["path"],
         )
-        has_changes = bool(status.stdout.strip())
-        assert has_changes  # Should be dirty → not pruned
+        has_unpushed = bool(result.stdout.strip())
+        assert has_unpushed  # Has unpushed commits → not pruned in soft tier
         assert Path(info["path"]).exists()
 
+    def test_force_prunes_very_old_worktree(self, git_repo):
+        """Worktrees older than 72h should be force-pruned regardless."""
+        import time
+
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        # Make an unpushed commit (would normally protect it)
+        (Path(info["path"]) / "work.txt").write_text("stale work")
+        subprocess.run(["git", "add", "work.txt"], cwd=info["path"], capture_output=True)
+        subprocess.run(
+            ["git", "commit", "-m", "old agent work"],
+            cwd=info["path"], capture_output=True,
+        )
+
+        # Make it very old (73h — beyond the 72h hard threshold)
+        old_time = time.time() - (73 * 3600)
+        os.utime(info["path"], (old_time, old_time))
+
+        # Simulate the force-prune tier check
+        hard_cutoff = time.time() - (72 * 3600)
+        mtime = Path(info["path"]).stat().st_mtime
+        assert mtime <= hard_cutoff  # Should qualify for force removal
+
+        # Actually remove it (simulates _prune_stale_worktrees force path)
+        branch_result = subprocess.run(
+            ["git", "branch", "--show-current"],
+            capture_output=True, text=True, timeout=5, cwd=info["path"],
+        )
+        branch = branch_result.stdout.strip()
+
+        subprocess.run(
+            ["git", "worktree", "remove", info["path"], "--force"],
+            capture_output=True, text=True, timeout=15, cwd=str(git_repo),
+        )
+        if branch:
+            subprocess.run(
+                ["git", "branch", "-D", branch],
+                capture_output=True, text=True, timeout=10, cwd=str(git_repo),
+            )
+
+        assert not Path(info["path"]).exists()
+
 
 class TestEdgeCases:
     """Test edge cases for robustness."""
@@ -611,6 +691,133 @@ class TestTerminalCWDIntegration:
         assert result.stdout.strip() == "true"
 
 
+class TestOrphanedBranchPruning:
+    """Test cleanup of orphaned hermes/* and pr-* branches."""
+
+    def test_prunes_orphaned_hermes_branch(self, git_repo):
+        """hermes/hermes-* branches with no worktree should be deleted."""
+        # Create a branch that looks like a worktree branch but has no worktree
+        subprocess.run(
+            ["git", "branch", "hermes/hermes-deadbeef", "HEAD"],
+            cwd=str(git_repo), capture_output=True,
+        )
+
+        # Verify it exists
+        result = subprocess.run(
+            ["git", "branch", "--list", "hermes/hermes-deadbeef"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        assert "hermes/hermes-deadbeef" in result.stdout
+
+        # Simulate _prune_orphaned_branches logic
+        result = subprocess.run(
+            ["git", "branch", "--format=%(refname:short)"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        all_branches = [b.strip() for b in result.stdout.strip().split("\n") if b.strip()]
+
+        wt_result = subprocess.run(
+            ["git", "worktree", "list", "--porcelain"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        active_branches = {"main"}
+        for line in wt_result.stdout.split("\n"):
+            if line.startswith("branch refs/heads/"):
+                active_branches.add(line.split("branch refs/heads/", 1)[-1].strip())
+
+        orphaned = [
+            b for b in all_branches
+            if b not in active_branches
+            and (b.startswith("hermes/hermes-") or b.startswith("pr-"))
+        ]
+        assert "hermes/hermes-deadbeef" in orphaned
+
+        # Delete them
+        if orphaned:
+            subprocess.run(
+                ["git", "branch", "-D"] + orphaned,
+                capture_output=True, text=True, cwd=str(git_repo),
+            )
+
+        # Verify gone
+        result = subprocess.run(
+            ["git", "branch", "--list", "hermes/hermes-deadbeef"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        assert "hermes/hermes-deadbeef" not in result.stdout
+
+    def test_prunes_orphaned_pr_branch(self, git_repo):
+        """pr-* branches should be deleted during pruning."""
+        subprocess.run(
+            ["git", "branch", "pr-1234", "HEAD"],
+            cwd=str(git_repo), capture_output=True,
+        )
+        subprocess.run(
+            ["git", "branch", "pr-5678", "HEAD"],
+            cwd=str(git_repo), capture_output=True,
+        )
+
+        result = subprocess.run(
+            ["git", "branch", "--format=%(refname:short)"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        all_branches = [b.strip() for b in result.stdout.strip().split("\n") if b.strip()]
+
+        active_branches = {"main"}
+        orphaned = [
+            b for b in all_branches
+            if b not in active_branches and b.startswith("pr-")
+        ]
+        assert "pr-1234" in orphaned
+        assert "pr-5678" in orphaned
+
+        subprocess.run(
+            ["git", "branch", "-D"] + orphaned,
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+
+        # Verify gone
+        result = subprocess.run(
+            ["git", "branch", "--format=%(refname:short)"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        remaining = result.stdout.strip()
+        assert "pr-1234" not in remaining
+        assert "pr-5678" not in remaining
+
+    def test_preserves_active_worktree_branch(self, git_repo):
+        """Branches with active worktrees should NOT be pruned."""
+        info = _setup_worktree(str(git_repo))
+        assert info is not None
+
+        result = subprocess.run(
+            ["git", "worktree", "list", "--porcelain"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        active_branches = set()
+        for line in result.stdout.split("\n"):
+            if line.startswith("branch refs/heads/"):
+                active_branches.add(line.split("branch refs/heads/", 1)[-1].strip())
+
+        assert info["branch"] in active_branches  # Protected
+
+    def test_preserves_main_branch(self, git_repo):
+        """main branch should never be pruned."""
+        result = subprocess.run(
+            ["git", "branch", "--format=%(refname:short)"],
+            capture_output=True, text=True, cwd=str(git_repo),
+        )
+        all_branches = [b.strip() for b in result.stdout.strip().split("\n") if b.strip()]
+        active_branches = {"main"}
+
+        orphaned = [
+            b for b in all_branches
+            if b not in active_branches
+            and (b.startswith("hermes/hermes-") or b.startswith("pr-"))
+        ]
+        assert "main" not in orphaned
+
+
 class TestSystemPromptInjection:
     """Test that the agent gets worktree context in its system prompt."""
 
@@ -625,7 +832,7 @@ class TestSystemPromptInjection:
             f"{info['path']}. Your branch is `{info['branch']}`. "
             f"Changes here do not affect the main working tree or other agents. "
             f"Remember to commit and push your changes, and create a PR if appropriate. "
-            f"The original repo is at {info['repo_root']}.]"
+            f"The original repo is at {info['repo_root']}.]\n"
         )
 
         assert info["path"] in wt_note

From 86960cdbb0148145890e2ee90b4e157fa899f6e1 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 04:56:20 -0700
Subject: [PATCH 139/154] chore: release v0.8.0 (2026.4.8) (#6135)

---
 RELEASE_v0.8.0.md      | 342 +++++++++++++++++++++++++++++++++++++++++
 hermes_cli/__init__.py |   4 +-
 pyproject.toml         |   2 +-
 3 files changed, 345 insertions(+), 3 deletions(-)
 create mode 100644 RELEASE_v0.8.0.md

diff --git a/RELEASE_v0.8.0.md b/RELEASE_v0.8.0.md
new file mode 100644
index 00000000..8cdea2b1
--- /dev/null
+++ b/RELEASE_v0.8.0.md
@@ -0,0 +1,342 @@
+# Hermes Agent v0.8.0 (v2026.4.8)
+
+**Release Date:** April 8, 2026
+
+> The intelligence release — native Google AI Studio provider, live model switching across all platforms, self-optimized GPT/Codex guidance, smart inactivity timeouts, approval buttons, interactive model pickers, MCP OAuth 2.1, and 209 merged PRs with 82 resolved issues.
+
+---
+
+## ✨ Highlights
+
+- **Google AI Studio (Gemini) Native Provider** — Direct access to Gemini models through Google's AI Studio API. Includes automatic models.dev registry integration for real-time context length detection across any provider. ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
+
+- **Live Model Switching (`/model` Command)** — Switch models and providers mid-session from CLI, Telegram, Discord, Slack, or any gateway platform. Aggregator-aware resolution keeps you on OpenRouter/Nous when possible, with automatic cross-provider fallback when needed. Interactive model pickers on Telegram and Discord with inline buttons. ([#5181](https://github.com/NousResearch/hermes-agent/pull/5181), [#5742](https://github.com/NousResearch/hermes-agent/pull/5742))
+
+- **Self-Optimized GPT/Codex Tool-Use Guidance** — The agent diagnosed and patched 5 failure modes in GPT and Codex tool calling through automated behavioral benchmarking, dramatically improving reliability on OpenAI models. Includes execution discipline guidance and thinking-only prefill continuation for structured reasoning. ([#6120](https://github.com/NousResearch/hermes-agent/pull/6120), [#5414](https://github.com/NousResearch/hermes-agent/pull/5414), [#5931](https://github.com/NousResearch/hermes-agent/pull/5931))
+
+- **Inactivity-Based Agent Timeouts** — Gateway and cron timeouts now track actual tool activity instead of wall-clock time. Long-running tasks that are actively working will never be killed — only truly idle agents time out. ([#5389](https://github.com/NousResearch/hermes-agent/pull/5389), [#5440](https://github.com/NousResearch/hermes-agent/pull/5440))
+
+- **Approval Buttons on Slack & Telegram** — Dangerous command approval via native platform buttons instead of typing `/approve`. Slack gets thread context preservation; Telegram gets emoji reactions for approval status. ([#5890](https://github.com/NousResearch/hermes-agent/pull/5890), [#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
+
+- **MCP OAuth 2.1 PKCE + OSV Malware Scanning** — Full standards-compliant OAuth for MCP server authentication, plus automatic malware scanning of MCP extension packages via the OSV vulnerability database. ([#5420](https://github.com/NousResearch/hermes-agent/pull/5420), [#5305](https://github.com/NousResearch/hermes-agent/pull/5305))
+
+- **Centralized Logging & Config Validation** — Structured logging to `~/.hermes/logs/` (agent.log + errors.log) with the `hermes logs` command for tailing and filtering. Config structure validation catches malformed YAML at startup before it causes cryptic failures. ([#5430](https://github.com/NousResearch/hermes-agent/pull/5430), [#5426](https://github.com/NousResearch/hermes-agent/pull/5426))
+
+- **Plugin System Expansion** — Plugins can now register CLI subcommands, receive request-scoped API hooks with correlation IDs, prompt for required env vars during install, and hook into session lifecycle events (finalize/reset). ([#5295](https://github.com/NousResearch/hermes-agent/pull/5295), [#5427](https://github.com/NousResearch/hermes-agent/pull/5427), [#5470](https://github.com/NousResearch/hermes-agent/pull/5470), [#6129](https://github.com/NousResearch/hermes-agent/pull/6129))
+
+- **Matrix Tier 1 & Platform Hardening** — Matrix gets reactions, read receipts, rich formatting, and room management. Discord adds channel controls and ignored channels. Signal gets full MEDIA: tag delivery. Mattermost gets file attachments. Comprehensive reliability fixes across all platforms. ([#5275](https://github.com/NousResearch/hermes-agent/pull/5275), [#5975](https://github.com/NousResearch/hermes-agent/pull/5975), [#5602](https://github.com/NousResearch/hermes-agent/pull/5602))
+
+- **Security Hardening Pass** — Consolidated SSRF protections, timing attack mitigations, tar traversal prevention, credential leakage guards, cron path traversal hardening, and cross-session isolation. Terminal workdir sanitization across all backends. ([#5944](https://github.com/NousResearch/hermes-agent/pull/5944), [#5613](https://github.com/NousResearch/hermes-agent/pull/5613), [#5629](https://github.com/NousResearch/hermes-agent/pull/5629))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Provider & Model Support
+- **Native Google AI Studio (Gemini) provider** with models.dev integration for automatic context length detection ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
+- **`/model` command — full provider+model system overhaul** — live switching across CLI and all gateway platforms with aggregator-aware resolution ([#5181](https://github.com/NousResearch/hermes-agent/pull/5181))
+- **Interactive model picker for Telegram and Discord** — inline button-based model selection ([#5742](https://github.com/NousResearch/hermes-agent/pull/5742))
+- **Nous Portal free-tier model gating** with pricing display in model selection ([#5880](https://github.com/NousResearch/hermes-agent/pull/5880))
+- **Model pricing display** for OpenRouter and Nous Portal providers ([#5416](https://github.com/NousResearch/hermes-agent/pull/5416))
+- **xAI (Grok) prompt caching** via `x-grok-conv-id` header ([#5604](https://github.com/NousResearch/hermes-agent/pull/5604))
+- **Grok added to tool-use enforcement models** for direct xAI usage ([#5595](https://github.com/NousResearch/hermes-agent/pull/5595))
+- **MiniMax TTS provider** (speech-2.8) ([#4963](https://github.com/NousResearch/hermes-agent/pull/4963))
+- **Non-agentic model warning** — warns users when loading Hermes LLM models not designed for tool use ([#5378](https://github.com/NousResearch/hermes-agent/pull/5378))
+- **Ollama Cloud auth, /model switch persistence**, and alias tab completion ([#5269](https://github.com/NousResearch/hermes-agent/pull/5269))
+- **Preserve dots in OpenCode Go model names** (minimax-m2.7, glm-4.5, kimi-k2.5) ([#5597](https://github.com/NousResearch/hermes-agent/pull/5597))
+- **MiniMax models 404 fix** — strip /v1 from Anthropic base URL for OpenCode Go ([#4918](https://github.com/NousResearch/hermes-agent/pull/4918))
+- **Provider credential reset windows** honored in pooled failover ([#5188](https://github.com/NousResearch/hermes-agent/pull/5188))
+- **OAuth token sync** between credential pool and credentials file ([#4981](https://github.com/NousResearch/hermes-agent/pull/4981))
+- **Stale OAuth credentials** no longer block OpenRouter users on auto-detect ([#5746](https://github.com/NousResearch/hermes-agent/pull/5746))
+- **Codex OAuth credential pool disconnect** + expired token import fix ([#5681](https://github.com/NousResearch/hermes-agent/pull/5681))
+- **Codex pool entry sync** from `~/.codex/auth.json` on exhaustion — @GratefulDave ([#5610](https://github.com/NousResearch/hermes-agent/pull/5610))
+- **Auxiliary client payment fallback** — retry with next provider on 402 ([#5599](https://github.com/NousResearch/hermes-agent/pull/5599))
+- **Auxiliary client resolves named custom providers** and 'main' alias ([#5978](https://github.com/NousResearch/hermes-agent/pull/5978))
+- **Use mimo-v2-pro** for non-vision auxiliary tasks on Nous free tier ([#6018](https://github.com/NousResearch/hermes-agent/pull/6018))
+- **Vision auto-detection** tries main provider first ([#6041](https://github.com/NousResearch/hermes-agent/pull/6041))
+- **Provider re-ordering and Quick Install** — @austinpickett ([#4664](https://github.com/NousResearch/hermes-agent/pull/4664))
+- **Nous OAuth access_token** no longer used as inference API key — @SHL0MS ([#5564](https://github.com/NousResearch/hermes-agent/pull/5564))
+- **HERMES_PORTAL_BASE_URL env var** respected during Nous login — @benbarclay ([#5745](https://github.com/NousResearch/hermes-agent/pull/5745))
+- **Env var overrides** for Nous portal/inference URLs ([#5419](https://github.com/NousResearch/hermes-agent/pull/5419))
+- **Z.AI endpoint auto-detect** via probe and cache ([#5763](https://github.com/NousResearch/hermes-agent/pull/5763))
+- **MiniMax context lengths, model catalog, thinking guard, aux model, and config base_url** corrections ([#6082](https://github.com/NousResearch/hermes-agent/pull/6082))
+- **Community provider/model resolution fixes** — salvaged 4 community PRs + MiniMax aux URL ([#5983](https://github.com/NousResearch/hermes-agent/pull/5983))
+
+### Agent Loop & Conversation
+- **Self-optimized GPT/Codex tool-use guidance** via automated behavioral benchmarking — agent self-diagnosed and patched 5 failure modes ([#6120](https://github.com/NousResearch/hermes-agent/pull/6120))
+- **GPT/Codex execution discipline guidance** in system prompts ([#5414](https://github.com/NousResearch/hermes-agent/pull/5414))
+- **Thinking-only prefill continuation** for structured reasoning responses ([#5931](https://github.com/NousResearch/hermes-agent/pull/5931))
+- **Accept reasoning-only responses** without retries — set content to "(empty)" instead of infinite retry ([#5278](https://github.com/NousResearch/hermes-agent/pull/5278))
+- **Jittered retry backoff** — exponential backoff with jitter for API retries ([#6048](https://github.com/NousResearch/hermes-agent/pull/6048))
+- **Smart thinking block signature management** — preserve and manage Anthropic thinking signatures across turns ([#6112](https://github.com/NousResearch/hermes-agent/pull/6112))
+- **Coerce tool call arguments** to match JSON Schema types — fixes models that send strings instead of numbers/booleans ([#5265](https://github.com/NousResearch/hermes-agent/pull/5265))
+- **Save oversized tool results to file** instead of destructive truncation ([#5210](https://github.com/NousResearch/hermes-agent/pull/5210))
+- **Sandbox-aware tool result persistence** ([#6085](https://github.com/NousResearch/hermes-agent/pull/6085))
+- **Streaming fallback** improved after edit failures ([#6110](https://github.com/NousResearch/hermes-agent/pull/6110))
+- **Codex empty-output gaps** covered in fallback + normalizer + auxiliary client ([#5724](https://github.com/NousResearch/hermes-agent/pull/5724), [#5730](https://github.com/NousResearch/hermes-agent/pull/5730), [#5734](https://github.com/NousResearch/hermes-agent/pull/5734))
+- **Codex stream output backfill** from output_item.done events ([#5689](https://github.com/NousResearch/hermes-agent/pull/5689))
+- **Stream consumer creates new message** after tool boundaries ([#5739](https://github.com/NousResearch/hermes-agent/pull/5739))
+- **Codex validation aligned** with normalization for empty stream output ([#5940](https://github.com/NousResearch/hermes-agent/pull/5940))
+- **Bridge tool-calls** in copilot-acp adapter ([#5460](https://github.com/NousResearch/hermes-agent/pull/5460))
+- **Filter transcript-only roles** from chat-completions payload ([#4880](https://github.com/NousResearch/hermes-agent/pull/4880))
+- **Context compaction failures fixed** on temperature-restricted models — @MadKangYu ([#5608](https://github.com/NousResearch/hermes-agent/pull/5608))
+- **Sanitize tool_calls for all strict APIs** (Fireworks, Mistral, etc.) — @lumethegreat ([#5183](https://github.com/NousResearch/hermes-agent/pull/5183))
+
+### Memory & Sessions
+- **Supermemory memory provider** — new memory plugin with multi-container, search_mode, identity template, and env var override ([#5737](https://github.com/NousResearch/hermes-agent/pull/5737), [#5933](https://github.com/NousResearch/hermes-agent/pull/5933))
+- **Shared thread sessions** by default — multi-user thread support across gateway platforms ([#5391](https://github.com/NousResearch/hermes-agent/pull/5391))
+- **Subagent sessions linked to parent** and hidden from session list ([#5309](https://github.com/NousResearch/hermes-agent/pull/5309))
+- **Profile-scoped memory isolation** and clone support ([#4845](https://github.com/NousResearch/hermes-agent/pull/4845))
+- **Thread gateway user_id to memory plugins** for per-user scoping ([#5895](https://github.com/NousResearch/hermes-agent/pull/5895))
+- **Honcho plugin drift overhaul** + plugin CLI registration system ([#5295](https://github.com/NousResearch/hermes-agent/pull/5295))
+- **Honcho holographic prompt and trust score** rendering preserved ([#4872](https://github.com/NousResearch/hermes-agent/pull/4872))
+- **Honcho doctor fix** — use recall_mode instead of memory_mode — @techguysimon ([#5645](https://github.com/NousResearch/hermes-agent/pull/5645))
+- **RetainDB** — API routes, write queue, dialectic, agent model, file tools fixes ([#5461](https://github.com/NousResearch/hermes-agent/pull/5461))
+- **Hindsight memory plugin overhaul** + memory setup wizard fixes ([#5094](https://github.com/NousResearch/hermes-agent/pull/5094))
+- **mem0 API v2 compat**, prefetch context fencing, secret redaction ([#5423](https://github.com/NousResearch/hermes-agent/pull/5423))
+- **mem0 env vars merged** with mem0.json instead of either/or ([#4939](https://github.com/NousResearch/hermes-agent/pull/4939))
+- **Clean user message** used for all memory provider operations ([#4940](https://github.com/NousResearch/hermes-agent/pull/4940))
+- **Silent memory flush failure** on /new and /resume fixed — @ryanautomated ([#5640](https://github.com/NousResearch/hermes-agent/pull/5640))
+- **OpenViking atexit safety net** for session commit ([#5664](https://github.com/NousResearch/hermes-agent/pull/5664))
+- **OpenViking tenant-scoping headers** for multi-tenant servers ([#4936](https://github.com/NousResearch/hermes-agent/pull/4936))
+- **ByteRover brv query** runs synchronously before LLM call ([#4831](https://github.com/NousResearch/hermes-agent/pull/4831))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### Gateway Core
+- **Inactivity-based agent timeout** — replaces wall-clock timeout with smart activity tracking; long-running active tasks never killed ([#5389](https://github.com/NousResearch/hermes-agent/pull/5389))
+- **Approval buttons for Slack & Telegram** + Slack thread context preservation ([#5890](https://github.com/NousResearch/hermes-agent/pull/5890))
+- **Live-stream /update output** + forward interactive prompts to user ([#5180](https://github.com/NousResearch/hermes-agent/pull/5180))
+- **Infinite timeout support** + periodic notifications + actionable error messages ([#4959](https://github.com/NousResearch/hermes-agent/pull/4959))
+- **Duplicate message prevention** — gateway dedup + partial stream guard ([#4878](https://github.com/NousResearch/hermes-agent/pull/4878))
+- **Webhook delivery_info persistence** + full session id in /status ([#5942](https://github.com/NousResearch/hermes-agent/pull/5942))
+- **Tool preview truncation** respects tool_preview_length in all/new progress modes ([#5937](https://github.com/NousResearch/hermes-agent/pull/5937))
+- **Short preview truncation** restored for all/new tool progress modes ([#4935](https://github.com/NousResearch/hermes-agent/pull/4935))
+- **Update-pending state** written atomically to prevent corruption ([#4923](https://github.com/NousResearch/hermes-agent/pull/4923))
+- **Approval session key isolated** per turn ([#4884](https://github.com/NousResearch/hermes-agent/pull/4884))
+- **Active-session guard bypass** for /approve, /deny, /stop, /new ([#4926](https://github.com/NousResearch/hermes-agent/pull/4926), [#5765](https://github.com/NousResearch/hermes-agent/pull/5765))
+- **Typing indicator paused** during approval waits ([#5893](https://github.com/NousResearch/hermes-agent/pull/5893))
+- **Caption check** uses exact line-by-line match instead of substring (all platforms) ([#5939](https://github.com/NousResearch/hermes-agent/pull/5939))
+- **MEDIA: tags stripped** from streamed gateway messages ([#5152](https://github.com/NousResearch/hermes-agent/pull/5152))
+- **MEDIA: tags extracted** from cron delivery before sending ([#5598](https://github.com/NousResearch/hermes-agent/pull/5598))
+- **Profile-aware service units** + voice transcription cleanup ([#5972](https://github.com/NousResearch/hermes-agent/pull/5972))
+- **Thread-safe PairingStore** with atomic writes — @CharlieKerfoot ([#5656](https://github.com/NousResearch/hermes-agent/pull/5656))
+- **Sanitize media URLs** in base platform logs — @WAXLYY ([#5631](https://github.com/NousResearch/hermes-agent/pull/5631))
+- **Reduce Telegram fallback IP activation log noise** — @MadKangYu ([#5615](https://github.com/NousResearch/hermes-agent/pull/5615))
+- **Cron static method wrappers** to prevent self-binding ([#5299](https://github.com/NousResearch/hermes-agent/pull/5299))
+- **Stale 'hermes login' replaced** with 'hermes auth' + credential removal re-seeding fix ([#5670](https://github.com/NousResearch/hermes-agent/pull/5670))
+
+### Telegram
+- **Group topics skill binding** for supergroup forum topics ([#4886](https://github.com/NousResearch/hermes-agent/pull/4886))
+- **Emoji reactions** for approval status and notifications ([#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
+- **Duplicate message delivery prevented** on send timeout ([#5153](https://github.com/NousResearch/hermes-agent/pull/5153))
+- **Command names sanitized** to strip invalid characters ([#5596](https://github.com/NousResearch/hermes-agent/pull/5596))
+- **Per-platform disabled skills** respected in Telegram menu and gateway dispatch ([#4799](https://github.com/NousResearch/hermes-agent/pull/4799))
+- **/approve and /deny** routed through running-agent guard ([#4798](https://github.com/NousResearch/hermes-agent/pull/4798))
+
+### Discord
+- **Channel controls** — ignored_channels and no_thread_channels config options ([#5975](https://github.com/NousResearch/hermes-agent/pull/5975))
+- **Skills registered as native slash commands** via shared gateway logic ([#5603](https://github.com/NousResearch/hermes-agent/pull/5603))
+- **/approve, /deny, /queue, /background, /btw** registered as native slash commands ([#4800](https://github.com/NousResearch/hermes-agent/pull/4800), [#5477](https://github.com/NousResearch/hermes-agent/pull/5477))
+- **Unnecessary members intent** removed on startup + token lock leak fix ([#5302](https://github.com/NousResearch/hermes-agent/pull/5302))
+
+### Slack
+- **Thread engagement** — auto-respond in bot-started and mentioned threads ([#5897](https://github.com/NousResearch/hermes-agent/pull/5897))
+- **mrkdwn in edit_message** + thread replies without @mentions ([#5733](https://github.com/NousResearch/hermes-agent/pull/5733))
+
+### Matrix
+- **Tier 1 feature parity** — reactions, read receipts, rich formatting, room management ([#5275](https://github.com/NousResearch/hermes-agent/pull/5275))
+- **MATRIX_REQUIRE_MENTION and MATRIX_AUTO_THREAD** support ([#5106](https://github.com/NousResearch/hermes-agent/pull/5106))
+- **Comprehensive reliability** — encrypted media, auth recovery, cron E2EE, Synapse compat ([#5271](https://github.com/NousResearch/hermes-agent/pull/5271))
+- **CJK input, E2EE, and reconnect** fixes ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
+
+### Signal
+- **Full MEDIA: tag delivery** — send_image_file, send_voice, and send_video implemented ([#5602](https://github.com/NousResearch/hermes-agent/pull/5602))
+
+### Mattermost
+- **File attachments** — set message type to DOCUMENT when post has file attachments — @nericervin ([#5609](https://github.com/NousResearch/hermes-agent/pull/5609))
+
+### Feishu
+- **Interactive card approval buttons** ([#6043](https://github.com/NousResearch/hermes-agent/pull/6043))
+- **Reconnect and ACL** fixes ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
+
+### Webhooks
+- **`{__raw__}` template token** and thread_id passthrough for forum topics ([#5662](https://github.com/NousResearch/hermes-agent/pull/5662))
+
+---
+
+## 🖥️ CLI & User Experience
+
+### Interactive CLI
+- **Defer response content** until reasoning block completes ([#5773](https://github.com/NousResearch/hermes-agent/pull/5773))
+- **Ghost status-bar lines cleared** on terminal resize ([#4960](https://github.com/NousResearch/hermes-agent/pull/4960))
+- **Normalise \r\n and \r line endings** in pasted text ([#4849](https://github.com/NousResearch/hermes-agent/pull/4849))
+- **ChatConsole errors, curses scroll, skin-aware banner, git state** banner fixes ([#5974](https://github.com/NousResearch/hermes-agent/pull/5974))
+- **Native Windows image paste** support ([#5917](https://github.com/NousResearch/hermes-agent/pull/5917))
+- **--yolo and other flags** no longer silently dropped when placed before 'chat' subcommand ([#5145](https://github.com/NousResearch/hermes-agent/pull/5145))
+
+### Setup & Configuration
+- **Config structure validation** — detect malformed YAML at startup with actionable error messages ([#5426](https://github.com/NousResearch/hermes-agent/pull/5426))
+- **Centralized logging** to `~/.hermes/logs/` — agent.log (INFO+), errors.log (WARNING+) with `hermes logs` command ([#5430](https://github.com/NousResearch/hermes-agent/pull/5430))
+- **Docs links added** to setup wizard sections ([#5283](https://github.com/NousResearch/hermes-agent/pull/5283))
+- **Doctor diagnostics** — sync provider checks, config migration, WAL and mem0 diagnostics ([#5077](https://github.com/NousResearch/hermes-agent/pull/5077))
+- **Timeout debug logging** and user-facing diagnostics improved ([#5370](https://github.com/NousResearch/hermes-agent/pull/5370))
+- **Reasoning effort unified** to config.yaml only ([#6118](https://github.com/NousResearch/hermes-agent/pull/6118))
+- **Permanent command allowlist** loaded on startup ([#5076](https://github.com/NousResearch/hermes-agent/pull/5076))
+- **`hermes auth remove`** now clears env-seeded credentials permanently ([#5285](https://github.com/NousResearch/hermes-agent/pull/5285))
+- **Bundled skills synced to all profiles** during update ([#5795](https://github.com/NousResearch/hermes-agent/pull/5795))
+- **`hermes update` no longer kills** freshly-restarted gateway service ([#5448](https://github.com/NousResearch/hermes-agent/pull/5448))
+- **Subprocess.run() timeouts** added to all gateway CLI commands ([#5424](https://github.com/NousResearch/hermes-agent/pull/5424))
+- **Actionable error message** when Codex refresh token is reused — @tymrtn ([#5612](https://github.com/NousResearch/hermes-agent/pull/5612))
+- **Google-workspace skill scripts** can now run directly — @xinbenlv ([#5624](https://github.com/NousResearch/hermes-agent/pull/5624))
+
+### Cron System
+- **Inactivity-based cron timeout** — replaces wall-clock; active tasks run indefinitely ([#5440](https://github.com/NousResearch/hermes-agent/pull/5440))
+- **Pre-run script injection** for data collection and change detection ([#5082](https://github.com/NousResearch/hermes-agent/pull/5082))
+- **Delivery failure tracking** in job status ([#6042](https://github.com/NousResearch/hermes-agent/pull/6042))
+- **Delivery guidance** in cron prompts — stops send_message thrashing ([#5444](https://github.com/NousResearch/hermes-agent/pull/5444))
+- **MEDIA files delivered** as native platform attachments ([#5921](https://github.com/NousResearch/hermes-agent/pull/5921))
+- **[SILENT] suppression** works anywhere in response — @auspic7 ([#5654](https://github.com/NousResearch/hermes-agent/pull/5654))
+- **Cron path traversal** hardening ([#5147](https://github.com/NousResearch/hermes-agent/pull/5147))
+
+---
+
+## 🔧 Tool System
+
+### Terminal & Execution
+- **Execute_code on remote backends** — code execution now works on Docker, SSH, Modal, and other remote terminal backends ([#5088](https://github.com/NousResearch/hermes-agent/pull/5088))
+- **Exit code context** for common CLI tools in terminal results — helps agent understand what went wrong ([#5144](https://github.com/NousResearch/hermes-agent/pull/5144))
+- **Progressive subdirectory hint discovery** — agent learns project structure as it navigates ([#5291](https://github.com/NousResearch/hermes-agent/pull/5291))
+- **notify_on_complete for background processes** — get notified when long-running tasks finish ([#5779](https://github.com/NousResearch/hermes-agent/pull/5779))
+- **Docker env config** — explicit container environment variables via docker_env config ([#4738](https://github.com/NousResearch/hermes-agent/pull/4738))
+- **Approval metadata included** in terminal tool results ([#5141](https://github.com/NousResearch/hermes-agent/pull/5141))
+- **Workdir parameter sanitized** in terminal tool across all backends ([#5629](https://github.com/NousResearch/hermes-agent/pull/5629))
+- **Detached process crash recovery** state corrected ([#6101](https://github.com/NousResearch/hermes-agent/pull/6101))
+- **Agent-browser paths with spaces** preserved — @Vasanthdev2004 ([#6077](https://github.com/NousResearch/hermes-agent/pull/6077))
+- **Portable base64 encoding** for image reading on macOS — @CharlieKerfoot ([#5657](https://github.com/NousResearch/hermes-agent/pull/5657))
+
+### Browser
+- **Switch managed browser provider** from Browserbase to Browser Use — @benbarclay ([#5750](https://github.com/NousResearch/hermes-agent/pull/5750))
+- **Firecrawl cloud browser** provider — @alt-glitch ([#5628](https://github.com/NousResearch/hermes-agent/pull/5628))
+- **JS evaluation** via browser_console expression parameter ([#5303](https://github.com/NousResearch/hermes-agent/pull/5303))
+- **Windows browser** fixes ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
+
+### MCP
+- **MCP OAuth 2.1 PKCE** — full standards-compliant OAuth client support ([#5420](https://github.com/NousResearch/hermes-agent/pull/5420))
+- **OSV malware check** for MCP extension packages ([#5305](https://github.com/NousResearch/hermes-agent/pull/5305))
+- **Prefer structuredContent over text** + no_mcp sentinel ([#5979](https://github.com/NousResearch/hermes-agent/pull/5979))
+- **Unknown toolsets warning suppressed** for MCP server names ([#5279](https://github.com/NousResearch/hermes-agent/pull/5279))
+
+### Web & Files
+- **.zip document support** + auto-mount cache dirs into remote backends ([#4846](https://github.com/NousResearch/hermes-agent/pull/4846))
+- **Redact query secrets** in send_message errors — @WAXLYY ([#5650](https://github.com/NousResearch/hermes-agent/pull/5650))
+
+### Delegation
+- **Credential pool sharing** + workspace path hints for subagents ([#5748](https://github.com/NousResearch/hermes-agent/pull/5748))
+
+### ACP (VS Code / Zed / JetBrains)
+- **Aggregate ACP improvements** — auth compat, protocol fixes, command ads, delegation, SSE events ([#5292](https://github.com/NousResearch/hermes-agent/pull/5292))
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills System
+- **Skill config interface** — skills can declare required config.yaml settings, prompted during setup, injected at load time ([#5635](https://github.com/NousResearch/hermes-agent/pull/5635))
+- **Plugin CLI registration system** — plugins register their own CLI subcommands without touching main.py ([#5295](https://github.com/NousResearch/hermes-agent/pull/5295))
+- **Request-scoped API hooks** with tool call correlation IDs for plugins ([#5427](https://github.com/NousResearch/hermes-agent/pull/5427))
+- **Session lifecycle hooks** — on_session_finalize and on_session_reset for CLI + gateway ([#6129](https://github.com/NousResearch/hermes-agent/pull/6129))
+- **Prompt for required env vars** during plugin install — @kshitijk4poor ([#5470](https://github.com/NousResearch/hermes-agent/pull/5470))
+- **Plugin name validation** — reject names that resolve to plugins root ([#5368](https://github.com/NousResearch/hermes-agent/pull/5368))
+- **pre_llm_call plugin context** moved to user message to preserve prompt cache ([#5146](https://github.com/NousResearch/hermes-agent/pull/5146))
+
+### New & Updated Skills
+- **popular-web-designs** — 54 production website design systems ([#5194](https://github.com/NousResearch/hermes-agent/pull/5194))
+- **p5js creative coding** — @SHL0MS ([#5600](https://github.com/NousResearch/hermes-agent/pull/5600))
+- **manim-video** — mathematical and technical animations — @SHL0MS ([#4930](https://github.com/NousResearch/hermes-agent/pull/4930))
+- **llm-wiki** — Karpathy's LLM Wiki skill ([#5635](https://github.com/NousResearch/hermes-agent/pull/5635))
+- **gitnexus-explorer** — codebase indexing and knowledge serving ([#5208](https://github.com/NousResearch/hermes-agent/pull/5208))
+- **research-paper-writing** — AI-Scientist & GPT-Researcher patterns — @SHL0MS ([#5421](https://github.com/NousResearch/hermes-agent/pull/5421))
+- **blogwatcher** updated to JulienTant's fork ([#5759](https://github.com/NousResearch/hermes-agent/pull/5759))
+- **claude-code skill** comprehensive rewrite v2.0 + v2.2 ([#5155](https://github.com/NousResearch/hermes-agent/pull/5155), [#5158](https://github.com/NousResearch/hermes-agent/pull/5158))
+- **Code verification skills** consolidated into one ([#4854](https://github.com/NousResearch/hermes-agent/pull/4854))
+- **Manim CE reference docs** expanded — geometry, animations, LaTeX — @leotrs ([#5791](https://github.com/NousResearch/hermes-agent/pull/5791))
+- **Manim-video references** — design thinking, updaters, paper explainer, decorations, production quality — @SHL0MS ([#5588](https://github.com/NousResearch/hermes-agent/pull/5588), [#5408](https://github.com/NousResearch/hermes-agent/pull/5408))
+
+---
+
+## 🔒 Security & Reliability
+
+### Security Hardening
+- **Consolidated security** — SSRF protections, timing attack mitigations, tar traversal prevention, credential leakage guards ([#5944](https://github.com/NousResearch/hermes-agent/pull/5944))
+- **Cross-session isolation** + cron path traversal hardening ([#5613](https://github.com/NousResearch/hermes-agent/pull/5613))
+- **Workdir parameter sanitized** in terminal tool across all backends ([#5629](https://github.com/NousResearch/hermes-agent/pull/5629))
+- **Approval 'once' session escalation** prevented + cron delivery platform validation ([#5280](https://github.com/NousResearch/hermes-agent/pull/5280))
+- **Profile-scoped Google Workspace OAuth tokens** protected ([#4910](https://github.com/NousResearch/hermes-agent/pull/4910))
+
+### Reliability
+- **Aggressive worktree and branch cleanup** to prevent accumulation ([#6134](https://github.com/NousResearch/hermes-agent/pull/6134))
+- **O(n²) catastrophic backtracking** in redact regex fixed — 100x improvement on large outputs ([#4962](https://github.com/NousResearch/hermes-agent/pull/4962))
+- **Runtime stability fixes** across core, web, delegate, and browser tools ([#4843](https://github.com/NousResearch/hermes-agent/pull/4843))
+- **API server streaming fix** + conversation history support ([#5977](https://github.com/NousResearch/hermes-agent/pull/5977))
+- **OpenViking API endpoint paths** and response parsing corrected ([#5078](https://github.com/NousResearch/hermes-agent/pull/5078))
+
+---
+
+## 🐛 Notable Bug Fixes
+
+- **9 community bugfixes salvaged** — gateway, cron, deps, macOS launchd in one batch ([#5288](https://github.com/NousResearch/hermes-agent/pull/5288))
+- **Batch core bug fixes** — model config, session reset, alias fallback, launchctl, delegation, atomic writes ([#5630](https://github.com/NousResearch/hermes-agent/pull/5630))
+- **Batch gateway/platform fixes** — matrix E2EE, CJK input, Windows browser, Feishu reconnect + ACL ([#5665](https://github.com/NousResearch/hermes-agent/pull/5665))
+- **Stale test skips removed**, regex backtracking, file search bug, and test flakiness ([#4969](https://github.com/NousResearch/hermes-agent/pull/4969))
+- **Nix flake** — read version, regen uv.lock, add hermes_logging — @alt-glitch ([#5651](https://github.com/NousResearch/hermes-agent/pull/5651))
+- **Lowercase variable redaction** regression tests ([#5185](https://github.com/NousResearch/hermes-agent/pull/5185))
+
+---
+
+## 🧪 Testing
+
+- **57 failing CI tests repaired** across 14 files ([#5823](https://github.com/NousResearch/hermes-agent/pull/5823))
+- **Test suite re-architecture** + CI failure fixes — @alt-glitch ([#5946](https://github.com/NousResearch/hermes-agent/pull/5946))
+- **Codebase-wide lint cleanup** — unused imports, dead code, and inefficient patterns ([#5821](https://github.com/NousResearch/hermes-agent/pull/5821))
+- **browser_close tool removed** — auto-cleanup handles it ([#5792](https://github.com/NousResearch/hermes-agent/pull/5792))
+
+---
+
+## 📚 Documentation
+
+- **Comprehensive documentation audit** — fix stale info, expand thin pages, add depth ([#5393](https://github.com/NousResearch/hermes-agent/pull/5393))
+- **40+ discrepancies fixed** between documentation and codebase ([#5818](https://github.com/NousResearch/hermes-agent/pull/5818))
+- **13 features documented** from last week's PRs ([#5815](https://github.com/NousResearch/hermes-agent/pull/5815))
+- **Guides section overhaul** — fix existing + add 3 new tutorials ([#5735](https://github.com/NousResearch/hermes-agent/pull/5735))
+- **Salvaged 4 docs PRs** — docker setup, post-update validation, local LLM guide, signal-cli install ([#5727](https://github.com/NousResearch/hermes-agent/pull/5727))
+- **Discord configuration reference** ([#5386](https://github.com/NousResearch/hermes-agent/pull/5386))
+- **Community FAQ entries** for common workflows and troubleshooting ([#4797](https://github.com/NousResearch/hermes-agent/pull/4797))
+- **WSL2 networking guide** for local model servers ([#5616](https://github.com/NousResearch/hermes-agent/pull/5616))
+- **Honcho CLI reference** + plugin CLI registration docs ([#5308](https://github.com/NousResearch/hermes-agent/pull/5308))
+- **Obsidian Headless setup** for servers in llm-wiki ([#5660](https://github.com/NousResearch/hermes-agent/pull/5660))
+- **Hermes Mod visual skin editor** added to skins page ([#6095](https://github.com/NousResearch/hermes-agent/pull/6095))
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** — 179 PRs
+
+### Top Community Contributors
+- **@SHL0MS** (7 PRs) — p5js creative coding skill, manim-video skill + 5 reference expansions, research-paper-writing, Nous OAuth fix, manim font fix
+- **@alt-glitch** (3 PRs) — Firecrawl cloud browser provider, test re-architecture + CI fixes, Nix flake fixes
+- **@benbarclay** (2 PRs) — Browser Use managed provider switch, Nous portal base URL fix
+- **@CharlieKerfoot** (2 PRs) — macOS portable base64 encoding, thread-safe PairingStore
+- **@WAXLYY** (2 PRs) — send_message secret redaction, gateway media URL sanitization
+- **@MadKangYu** (2 PRs) — Telegram log noise reduction, context compaction fix for temperature-restricted models
+
+### All Contributors
+@alt-glitch, @austinpickett, @auspic7, @benbarclay, @CharlieKerfoot, @GratefulDave, @kshitijk4poor, @leotrs, @lumethegreat, @MadKangYu, @nericervin, @ryanautomated, @SHL0MS, @techguysimon, @tymrtn, @Vasanthdev2004, @WAXLYY, @xinbenlv
+
+---
+
+**Full Changelog**: [v2026.4.3...v2026.4.8](https://github.com/NousResearch/hermes-agent/compare/v2026.4.3...v2026.4.8)
diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py
index 0873d3d2..959332e8 100644
--- a/hermes_cli/__init__.py
+++ b/hermes_cli/__init__.py
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """
 
-__version__ = "0.7.0"
-__release_date__ = "2026.4.3"
+__version__ = "0.8.0"
+__release_date__ = "2026.4.8"
diff --git a/pyproject.toml b/pyproject.toml
index c35c94e2..8982e6e4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "hermes-agent"
-version = "0.7.0"
+version = "0.8.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"

From ff6a86cb529a372198b4b80d5e022e32a4a3f2cc Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 04:59:45 -0700
Subject: [PATCH 140/154] =?UTF-8?q?docs:=20update=20v0.8.0=20highlights=20?=
 =?UTF-8?q?=E2=80=94=20notify=5Fon=5Fcomplete,=20MiMo=20v2=20Pro,=20reorde?=
 =?UTF-8?q?r?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 RELEASE_v0.8.0.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/RELEASE_v0.8.0.md b/RELEASE_v0.8.0.md
index 8cdea2b1..57c8b05a 100644
--- a/RELEASE_v0.8.0.md
+++ b/RELEASE_v0.8.0.md
@@ -2,18 +2,22 @@
 
 **Release Date:** April 8, 2026
 
-> The intelligence release — native Google AI Studio provider, live model switching across all platforms, self-optimized GPT/Codex guidance, smart inactivity timeouts, approval buttons, interactive model pickers, MCP OAuth 2.1, and 209 merged PRs with 82 resolved issues.
+> The intelligence release — background task auto-notifications, free MiMo v2 Pro on Nous Portal, live model switching across all platforms, self-optimized GPT/Codex guidance, native Google AI Studio, smart inactivity timeouts, approval buttons, MCP OAuth 2.1, and 209 merged PRs with 82 resolved issues.
 
 ---
 
 ## ✨ Highlights
 
-- **Google AI Studio (Gemini) Native Provider** — Direct access to Gemini models through Google's AI Studio API. Includes automatic models.dev registry integration for real-time context length detection across any provider. ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
+- **Background Process Auto-Notifications (`notify_on_complete`)** — Background tasks can now automatically notify the agent when they finish. Start a long-running process (AI model training, test suites, deployments, builds) and the agent gets notified on completion — no polling needed. The agent can keep working on other things and pick up results when they land. ([#5779](https://github.com/NousResearch/hermes-agent/pull/5779))
+
+- **Free Xiaomi MiMo v2 Pro on Nous Portal** — Nous Portal now supports the free-tier Xiaomi MiMo v2 Pro model for auxiliary tasks (compression, vision, summarization), with free-tier model gating and pricing display in model selection. ([#6018](https://github.com/NousResearch/hermes-agent/pull/6018), [#5880](https://github.com/NousResearch/hermes-agent/pull/5880))
 
 - **Live Model Switching (`/model` Command)** — Switch models and providers mid-session from CLI, Telegram, Discord, Slack, or any gateway platform. Aggregator-aware resolution keeps you on OpenRouter/Nous when possible, with automatic cross-provider fallback when needed. Interactive model pickers on Telegram and Discord with inline buttons. ([#5181](https://github.com/NousResearch/hermes-agent/pull/5181), [#5742](https://github.com/NousResearch/hermes-agent/pull/5742))
 
 - **Self-Optimized GPT/Codex Tool-Use Guidance** — The agent diagnosed and patched 5 failure modes in GPT and Codex tool calling through automated behavioral benchmarking, dramatically improving reliability on OpenAI models. Includes execution discipline guidance and thinking-only prefill continuation for structured reasoning. ([#6120](https://github.com/NousResearch/hermes-agent/pull/6120), [#5414](https://github.com/NousResearch/hermes-agent/pull/5414), [#5931](https://github.com/NousResearch/hermes-agent/pull/5931))
 
+- **Google AI Studio (Gemini) Native Provider** — Direct access to Gemini models through Google's AI Studio API. Includes automatic models.dev registry integration for real-time context length detection across any provider. ([#5577](https://github.com/NousResearch/hermes-agent/pull/5577))
+
 - **Inactivity-Based Agent Timeouts** — Gateway and cron timeouts now track actual tool activity instead of wall-clock time. Long-running tasks that are actively working will never be killed — only truly idle agents time out. ([#5389](https://github.com/NousResearch/hermes-agent/pull/5389), [#5440](https://github.com/NousResearch/hermes-agent/pull/5440))
 
 - **Approval Buttons on Slack & Telegram** — Dangerous command approval via native platform buttons instead of typing `/approve`. Slack gets thread context preservation; Telegram gets emoji reactions for approval status. ([#5890](https://github.com/NousResearch/hermes-agent/pull/5890), [#5975](https://github.com/NousResearch/hermes-agent/pull/5975))

From 4f467700d44d133c24ea1c6cc9819d8bfcb89c97 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 13:44:58 -0700
Subject: [PATCH 141/154] fix(doctor): only check the active memory provider,
 not all providers unconditionally (#6285)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(tools): skip camofox auto-cleanup when managed persistence is enabled

When managed_persistence is enabled, cleanup_browser() was calling
camofox_close() which destroys the server-side browser context via
DELETE /sessions/{userId}, killing login sessions across cron runs.

Add camofox_soft_cleanup() — a public wrapper that drops only the
in-memory session entry when managed persistence is on, returning True.
When persistence is off it returns False so the caller falls back to
the full camofox_close().  The inactivity reaper still handles idle
resource cleanup.

Also surface a logger.warning() when _managed_persistence_enabled()
fails to load config, replacing a silent except-and-return-False.

Salvaged from #6182 by el-analista (Eduardo Perea Fernandez).
Added public API wrapper to avoid cross-module private imports,
and test coverage for both persistence paths.

Co-authored-by: Eduardo Perea Fernandez <el-analista@users.noreply.github.com>

* fix(doctor): only check the active memory provider, not all providers unconditionally

hermes doctor had hardcoded Honcho Memory and Mem0 Memory sections that
always ran regardless of the user's memory.provider config setting. After
the swappable memory provider update (#4623), users with leftover Honcho
config but no active provider saw false 'broken' errors.

Replaced both sections with a single Memory Provider section that reads
memory.provider from config.yaml and only checks the configured provider.
Users with no external provider see a green 'Built-in memory active' check.

Reported by community user michaelruiz001, confirmed by Eri (Honcho).

---------

Co-authored-by: Eduardo Perea Fernandez <el-analista@users.noreply.github.com>
---
 hermes_cli/doctor.py                          | 126 ++++++++++--------
 tests/hermes_cli/test_doctor.py               |  70 ++++++++++
 .../tools/test_browser_camofox_persistence.py |  48 +++++++
 tests/tools/test_browser_cleanup.py           |  56 ++++++++
 tools/browser_camofox.py                      |  19 ++-
 tools/browser_tool.py                         |  10 +-
 6 files changed, 269 insertions(+), 60 deletions(-)

diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index 876ab15d..361e81d2 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -812,69 +812,83 @@ def run_doctor(args):
         check_warn("No GITHUB_TOKEN", f"(60 req/hr rate limit — set in {_DHH}/.env for better rates)")
 
     # =========================================================================
-    # Honcho memory
+    # Memory Provider (only check the active provider, if any)
     # =========================================================================
     print()
-    print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))
+    print(color("◆ Memory Provider", Colors.CYAN, Colors.BOLD))
 
+    _active_memory_provider = ""
     try:
-        from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
-        hcfg = HonchoClientConfig.from_global_config()
-        _honcho_cfg_path = resolve_config_path()
+        import yaml as _yaml
+        _mem_cfg_path = HERMES_HOME / "config.yaml"
+        if _mem_cfg_path.exists():
+            with open(_mem_cfg_path) as _f:
+                _raw_cfg = _yaml.safe_load(_f) or {}
+            _active_memory_provider = (_raw_cfg.get("memory") or {}).get("provider", "")
+    except Exception:
+        pass
 
-        if not _honcho_cfg_path.exists():
-            check_warn("Honcho config not found", "run: hermes memory setup")
-        elif not hcfg.enabled:
-            check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
-        elif not (hcfg.api_key or hcfg.base_url):
-            check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
-            issues.append("No Honcho API key — run 'hermes memory setup'")
-        else:
-            from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
-            reset_honcho_client()
-            try:
-                get_honcho_client(hcfg)
-                check_ok(
-                    "Honcho connected",
-                    f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
-                )
-            except Exception as _e:
-                check_fail("Honcho connection failed", str(_e))
-                issues.append(f"Honcho unreachable: {_e}")
-    except ImportError:
-        check_warn("honcho-ai not installed", "pip install honcho-ai")
-    except Exception as _e:
-        check_warn("Honcho check failed", str(_e))
+    if not _active_memory_provider:
+        check_ok("Built-in memory active", "(no external provider configured — this is fine)")
+    elif _active_memory_provider == "honcho":
+        try:
+            from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
+            hcfg = HonchoClientConfig.from_global_config()
+            _honcho_cfg_path = resolve_config_path()
 
-    # =========================================================================
-    # Mem0 memory
-    # =========================================================================
-    print()
-    print(color("◆ Mem0 Memory", Colors.CYAN, Colors.BOLD))
-
-    try:
-        from plugins.memory.mem0 import _load_config as _load_mem0_config
-        mem0_cfg = _load_mem0_config()
-        mem0_key = mem0_cfg.get("api_key", "")
-        if mem0_key:
-            check_ok("Mem0 API key configured")
-            check_info(f"user_id={mem0_cfg.get('user_id', '?')}  agent_id={mem0_cfg.get('agent_id', '?')}")
-            # Check if mem0.json exists but is missing api_key (the bug we fixed)
-            mem0_json = HERMES_HOME / "mem0.json"
-            if mem0_json.exists():
+            if not _honcho_cfg_path.exists():
+                check_warn("Honcho config not found", "run: hermes memory setup")
+            elif not hcfg.enabled:
+                check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
+            elif not (hcfg.api_key or hcfg.base_url):
+                check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
+                issues.append("No Honcho API key — run 'hermes memory setup'")
+            else:
+                from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
+                reset_honcho_client()
                 try:
-                    import json as _json
-                    file_cfg = _json.loads(mem0_json.read_text())
-                    if not file_cfg.get("api_key") and mem0_key:
-                        check_info("api_key from .env (not in mem0.json) — this is fine")
-                except Exception:
-                    pass
-        else:
-            check_warn("Mem0 not configured", "(set MEM0_API_KEY in .env or run hermes memory setup)")
-    except ImportError:
-        check_warn("Mem0 plugin not loadable", "(optional)")
-    except Exception as _e:
-        check_warn("Mem0 check failed", str(_e))
+                    get_honcho_client(hcfg)
+                    check_ok(
+                        "Honcho connected",
+                        f"workspace={hcfg.workspace_id} mode={hcfg.recall_mode} freq={hcfg.write_frequency}",
+                    )
+                except Exception as _e:
+                    check_fail("Honcho connection failed", str(_e))
+                    issues.append(f"Honcho unreachable: {_e}")
+        except ImportError:
+            check_fail("honcho-ai not installed", "pip install honcho-ai")
+            issues.append("Honcho is set as memory provider but honcho-ai is not installed")
+        except Exception as _e:
+            check_warn("Honcho check failed", str(_e))
+    elif _active_memory_provider == "mem0":
+        try:
+            from plugins.memory.mem0 import _load_config as _load_mem0_config
+            mem0_cfg = _load_mem0_config()
+            mem0_key = mem0_cfg.get("api_key", "")
+            if mem0_key:
+                check_ok("Mem0 API key configured")
+                check_info(f"user_id={mem0_cfg.get('user_id', '?')}  agent_id={mem0_cfg.get('agent_id', '?')}")
+            else:
+                check_fail("Mem0 API key not set", "(set MEM0_API_KEY in .env or run hermes memory setup)")
+                issues.append("Mem0 is set as memory provider but API key is missing")
+        except ImportError:
+            check_fail("Mem0 plugin not loadable", "pip install mem0ai")
+            issues.append("Mem0 is set as memory provider but mem0ai is not installed")
+        except Exception as _e:
+            check_warn("Mem0 check failed", str(_e))
+    else:
+        # Generic check for other memory providers (openviking, hindsight, etc.)
+        try:
+            from plugins.memory import load_memory_provider
+            _provider = load_memory_provider(_active_memory_provider)
+            if _provider and _provider.is_available():
+                check_ok(f"{_active_memory_provider} provider active")
+            elif _provider:
+                check_warn(f"{_active_memory_provider} configured but not available", "run: hermes memory status")
+            else:
+                check_warn(f"{_active_memory_provider} plugin not found", "run: hermes memory setup")
+        except Exception as _e:
+            check_warn(f"{_active_memory_provider} check failed", str(_e))
 
     # =========================================================================
     # Profiles
diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py
index d91cf3f6..f30fb483 100644
--- a/tests/hermes_cli/test_doctor.py
+++ b/tests/hermes_cli/test_doctor.py
@@ -136,3 +136,73 @@ def test_check_gateway_service_linger_skips_when_service_not_installed(monkeypat
     out = capsys.readouterr().out
     assert out == ""
     assert issues == []
+
+
+# ── Memory provider section (doctor should only check the *active* provider) ──
+
+
+class TestDoctorMemoryProviderSection:
+    """The ◆ Memory Provider section should respect memory.provider config."""
+
+    def _make_hermes_home(self, tmp_path, provider=""):
+        """Create a minimal HERMES_HOME with config.yaml."""
+        home = tmp_path / ".hermes"
+        home.mkdir(parents=True, exist_ok=True)
+        import yaml
+        config = {"memory": {"provider": provider}} if provider else {"memory": {}}
+        (home / "config.yaml").write_text(yaml.dump(config))
+        return home
+
+    def _run_doctor_and_capture(self, monkeypatch, tmp_path, provider=""):
+        """Run doctor and capture stdout."""
+        home = self._make_hermes_home(tmp_path, provider)
+        monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
+        monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
+        monkeypatch.setattr(doctor_mod, "_DHH", str(home))
+        (tmp_path / "project").mkdir(exist_ok=True)
+
+        # Stub tool availability (returns empty) so doctor runs past it
+        fake_model_tools = types.SimpleNamespace(
+            check_tool_availability=lambda *a, **kw: ([], []),
+            TOOLSET_REQUIREMENTS={},
+        )
+        monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
+
+        # Stub auth checks to avoid real API calls
+        try:
+            from hermes_cli import auth as _auth_mod
+            monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
+            monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
+        except Exception:
+            pass
+
+        import io, contextlib
+        buf = io.StringIO()
+        with contextlib.redirect_stdout(buf):
+            doctor_mod.run_doctor(Namespace(fix=False))
+        return buf.getvalue()
+
+    def test_no_provider_shows_builtin_ok(self, monkeypatch, tmp_path):
+        out = self._run_doctor_and_capture(monkeypatch, tmp_path, provider="")
+        assert "Memory Provider" in out
+        assert "Built-in memory active" in out
+        # Should NOT mention Honcho or Mem0 errors
+        assert "Honcho API key" not in out
+        assert "Mem0" not in out
+
+    def test_honcho_provider_not_installed_shows_fail(self, monkeypatch, tmp_path):
+        # Make honcho import fail
+        monkeypatch.setitem(
+            sys.modules, "plugins.memory.honcho.client", None
+        )
+        out = self._run_doctor_and_capture(monkeypatch, tmp_path, provider="honcho")
+        assert "Memory Provider" in out
+        # Should show failure since honcho is set but not importable
+        assert "Built-in memory active" not in out
+
+    def test_mem0_provider_not_installed_shows_fail(self, monkeypatch, tmp_path):
+        # Make mem0 import fail
+        monkeypatch.setitem(sys.modules, "plugins.memory.mem0", None)
+        out = self._run_doctor_and_capture(monkeypatch, tmp_path, provider="mem0")
+        assert "Memory Provider" in out
+        assert "Built-in memory active" not in out
diff --git a/tests/tools/test_browser_camofox_persistence.py b/tests/tools/test_browser_camofox_persistence.py
index 0fa5723c..0e9c8637 100644
--- a/tests/tools/test_browser_camofox_persistence.py
+++ b/tests/tools/test_browser_camofox_persistence.py
@@ -16,6 +16,7 @@ from tools.browser_camofox import (
     _managed_persistence_enabled,
     camofox_close,
     camofox_navigate,
+    camofox_soft_cleanup,
     check_camofox_available,
     cleanup_all_camofox_sessions,
     get_vnc_url,
@@ -240,3 +241,50 @@ class TestVncUrlDiscovery:
 
         assert result["vnc_url"] == "http://localhost:6080"
         assert "vnc_hint" in result
+
+
+class TestCamofoxSoftCleanup:
+    """camofox_soft_cleanup drops local state only when managed persistence is on."""
+
+    def test_returns_true_and_drops_session_when_enabled(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        with _enable_persistence():
+            _get_session("task-1")
+            result = camofox_soft_cleanup("task-1")
+
+        assert result is True
+        # Session should have been dropped from in-memory store
+        import tools.browser_camofox as mod
+        with mod._sessions_lock:
+            assert "task-1" not in mod._sessions
+
+    def test_returns_false_when_disabled(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        _get_session("task-1")
+        config = {"browser": {"camofox": {"managed_persistence": False}}}
+        with patch("tools.browser_camofox.load_config", return_value=config):
+            result = camofox_soft_cleanup("task-1")
+
+        assert result is False
+        # Session should still be present — not dropped
+        import tools.browser_camofox as mod
+        with mod._sessions_lock:
+            assert "task-1" in mod._sessions
+
+    def test_does_not_call_server_delete(self, tmp_path, monkeypatch):
+        """Soft cleanup must never hit the Camofox /sessions DELETE endpoint."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
+
+        with (
+            _enable_persistence(),
+            patch("tools.browser_camofox.requests.delete") as mock_delete,
+        ):
+            _get_session("task-1")
+            camofox_soft_cleanup("task-1")
+
+        mock_delete.assert_not_called()
diff --git a/tests/tools/test_browser_cleanup.py b/tests/tools/test_browser_cleanup.py
index df21f3a0..81792790 100644
--- a/tests/tools/test_browser_cleanup.py
+++ b/tests/tools/test_browser_cleanup.py
@@ -65,6 +65,62 @@ class TestBrowserCleanup:
         mock_stop.assert_called_once_with("task-1")
         mock_run.assert_called_once_with("task-1", "close", [], timeout=10)
 
+    def test_cleanup_camofox_managed_persistence_skips_close(self):
+        """When camofox mode + managed persistence, soft_cleanup fires instead of close."""
+        browser_tool = self.browser_tool
+        browser_tool._active_sessions["task-1"] = {
+            "session_name": "sess-1",
+            "bb_session_id": None,
+        }
+        browser_tool._session_last_activity["task-1"] = 123.0
+
+        with (
+            patch("tools.browser_tool._is_camofox_mode", return_value=True),
+            patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
+            patch(
+                "tools.browser_tool._run_browser_command",
+                return_value={"success": True},
+            ),
+            patch("tools.browser_tool.os.path.exists", return_value=False),
+            patch(
+                "tools.browser_camofox.camofox_soft_cleanup",
+                return_value=True,
+            ) as mock_soft,
+            patch("tools.browser_camofox.camofox_close") as mock_close,
+        ):
+            browser_tool.cleanup_browser("task-1")
+
+        mock_soft.assert_called_once_with("task-1")
+        mock_close.assert_not_called()
+
+    def test_cleanup_camofox_no_persistence_calls_close(self):
+        """When camofox mode but managed persistence is off, camofox_close fires."""
+        browser_tool = self.browser_tool
+        browser_tool._active_sessions["task-1"] = {
+            "session_name": "sess-1",
+            "bb_session_id": None,
+        }
+        browser_tool._session_last_activity["task-1"] = 123.0
+
+        with (
+            patch("tools.browser_tool._is_camofox_mode", return_value=True),
+            patch("tools.browser_tool._maybe_stop_recording") as mock_stop,
+            patch(
+                "tools.browser_tool._run_browser_command",
+                return_value={"success": True},
+            ),
+            patch("tools.browser_tool.os.path.exists", return_value=False),
+            patch(
+                "tools.browser_camofox.camofox_soft_cleanup",
+                return_value=False,
+            ) as mock_soft,
+            patch("tools.browser_camofox.camofox_close") as mock_close,
+        ):
+            browser_tool.cleanup_browser("task-1")
+
+        mock_soft.assert_called_once_with("task-1")
+        mock_close.assert_called_once_with("task-1")
+
     def test_emergency_cleanup_clears_all_tracking_state(self):
         browser_tool = self.browser_tool
         browser_tool._cleanup_done = False
diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py
index 226e99b5..3a305bbc 100644
--- a/tools/browser_camofox.py
+++ b/tools/browser_camofox.py
@@ -101,7 +101,8 @@ def _managed_persistence_enabled() -> bool:
     """
     try:
         camofox_cfg = load_config().get("browser", {}).get("camofox", {})
-    except Exception:
+    except Exception as exc:
+        logger.warning("managed_persistence check failed, defaulting to disabled: %s", exc)
         return False
     return bool(camofox_cfg.get("managed_persistence"))
 
@@ -172,6 +173,22 @@ def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]:
         return _sessions.pop(task_id, None)
 
 
+def camofox_soft_cleanup(task_id: Optional[str] = None) -> bool:
+    """Release the in-memory session without destroying the server-side context.
+
+    When managed persistence is enabled the browser profile (and its cookies)
+    must survive across agent tasks.  This helper drops only the local tracking
+    entry and returns ``True``.  When managed persistence is *not* enabled it
+    does nothing and returns ``False`` so the caller can fall back to
+    :func:`camofox_close`.
+    """
+    if _managed_persistence_enabled():
+        _drop_session(task_id)
+        logger.debug("Camofox soft cleanup for task %s (managed persistence)", task_id)
+        return True
+    return False
+
+
 # ---------------------------------------------------------------------------
 # HTTP helpers
 # ---------------------------------------------------------------------------
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index 012b8eb0..e62a586c 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1935,11 +1935,15 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
     if task_id is None:
         task_id = "default"
     
-    # Also clean up Camofox session if running in Camofox mode
+    # Also clean up Camofox session if running in Camofox mode.
+    # Skip full close when managed persistence is enabled — the browser
+    # profile (and its session cookies) must survive across agent tasks.
+    # The inactivity reaper still frees idle resources.
     if _is_camofox_mode():
         try:
-            from tools.browser_camofox import camofox_close
-            camofox_close(task_id)
+            from tools.browser_camofox import camofox_close, camofox_soft_cleanup
+            if not camofox_soft_cleanup(task_id):
+                camofox_close(task_id)
         except Exception as e:
             logger.debug("Camofox cleanup for task %s: %s", task_id, e)
 

From 1631895d5a05d3489ec82be72d8f599b1d217065 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 13:45:04 -0700
Subject: [PATCH 142/154] docs(telegram): add proxy support section

Documents the proxy env var support added in PR #3591 (salvage of #3411
by @kufufu9). Covers HTTPS_PROXY/HTTP_PROXY/ALL_PROXY precedence,
configuration methods, and scope.
---
 website/docs/user-guide/messaging/telegram.md | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md
index a59b73ca..4e4495ad 100644
--- a/website/docs/user-guide/messaging/telegram.md
+++ b/website/docs/user-guide/messaging/telegram.md
@@ -463,6 +463,40 @@ platforms:
 You usually don't need to configure this manually. The auto-discovery via DoH handles most restricted-network scenarios. The `TELEGRAM_FALLBACK_IPS` env var is only needed if DoH is also blocked on your network.
 :::
 
+## Proxy Support
+
+If your network requires an HTTP proxy to reach the internet (common in corporate environments), the Telegram adapter automatically reads standard proxy environment variables and routes all connections through the proxy.
+
+### Supported variables
+
+The adapter checks these environment variables in order, using the first one that is set:
+
+1. `HTTPS_PROXY`
+2. `HTTP_PROXY`
+3. `ALL_PROXY`
+4. `https_proxy` / `http_proxy` / `all_proxy` (lowercase variants)
+
+### Configuration
+
+Set the proxy in your environment before starting the gateway:
+
+```bash
+export HTTPS_PROXY=http://proxy.example.com:8080
+hermes gateway
+```
+
+Or add it to `~/.hermes/.env`:
+
+```bash
+HTTPS_PROXY=http://proxy.example.com:8080
+```
+
+The proxy applies to both the primary transport and all fallback IP transports. No additional Hermes configuration is needed — if the environment variable is set, it's used automatically.
+
+:::note
+This covers the custom fallback transport layer that Hermes uses for Telegram connections. The standard `httpx` client used elsewhere already respects proxy env vars natively.
+:::
+
 ## Message Reactions
 
 The bot can add emoji reactions to messages as visual processing feedback:

From a1213d06bdffe9a631f0f94663a08de125a41498 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 13:46:14 -0700
Subject: [PATCH 143/154] fix(hindsight): correct config key mismatch and add
 base URL support (#6282)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #6259. Three bugs fixed:

1. Config key mismatch: _get_client() and _start_daemon() read
   'llmApiKey' (camelCase) but save_config() stores 'llm_api_key'
   (snake_case). The config value was never read — only the env var
   fallback worked.

2. Missing base URL support: users on OpenRouter or custom endpoints
   had no way to configure HINDSIGHT_API_LLM_BASE_URL through setup.
   Added llm_base_url to config schema with empty default, passed
   conditionally to HindsightEmbedded constructor.

3. Daemon config change detection: config_changed now also checks
   HINDSIGHT_API_LLM_BASE_URL, and the daemon profile .env includes
   the base URL when set.

Keeps HINDSIGHT_API_LLM_API_KEY (with double API) in the daemon
profile .env — this matches the upstream hindsight .env.example
convention.
---
 plugins/memory/hindsight/README.md   |  2 ++
 plugins/memory/hindsight/__init__.py | 20 +++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/plugins/memory/hindsight/README.md b/plugins/memory/hindsight/README.md
index 34f5088f..3a1df59e 100644
--- a/plugins/memory/hindsight/README.md
+++ b/plugins/memory/hindsight/README.md
@@ -73,6 +73,7 @@ Config file: `~/.hermes/hindsight/config.json`
 |-----|---------|-------------|
 | `llm_provider` | `openai` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `minimax`, `ollama` |
 | `llm_model` | per-provider | Model name (e.g. `gpt-4o-mini`, `openai/gpt-oss-120b`) |
+| `llm_base_url` | — | LLM Base URL override (e.g. `https://openrouter.ai/api/v1`) |
 
 The LLM API key is stored in `~/.hermes/.env` as `HINDSIGHT_LLM_API_KEY`.
 
@@ -92,6 +93,7 @@ Available in `hybrid` and `tools` memory modes:
 |----------|-------------|
 | `HINDSIGHT_API_KEY` | API key for Hindsight Cloud |
 | `HINDSIGHT_LLM_API_KEY` | LLM API key for local mode |
+| `HINDSIGHT_API_LLM_BASE_URL` | LLM Base URL for local mode (e.g. OpenRouter) |
 | `HINDSIGHT_API_URL` | Override API endpoint |
 | `HINDSIGHT_BANK_ID` | Override bank name |
 | `HINDSIGHT_BUDGET` | Override recall budget |
diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py
index 199a7dd5..c8749774 100644
--- a/plugins/memory/hindsight/__init__.py
+++ b/plugins/memory/hindsight/__init__.py
@@ -235,6 +235,7 @@ class HindsightMemoryProvider(MemoryProvider):
             {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://ui.hindsight.vectorize.io", "when": {"mode": "cloud"}},
             {"key": "llm_provider", "description": "LLM provider for local mode", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "minimax", "ollama"], "when": {"mode": "local"}},
             {"key": "llm_api_key", "description": "LLM API key for local Hindsight", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local"}},
+            {"key": "llm_base_url", "description": "LLM Base URL (e.g. for OpenRouter)", "default": "", "env_var": "HINDSIGHT_API_LLM_BASE_URL", "when": {"mode": "local"}},
             {"key": "llm_model", "description": "LLM model for local mode", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local"}},
             {"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
             {"key": "budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
@@ -251,12 +252,16 @@ class HindsightMemoryProvider(MemoryProvider):
                 # different loop" errors during GC — we handle cleanup in
                 # shutdown() instead.
                 HindsightEmbedded.__del__ = lambda self: None
-                self._client = HindsightEmbedded(
+                kwargs = dict(
                     profile=self._config.get("profile", "hermes"),
                     llm_provider=self._config.get("llm_provider", ""),
-                    llm_api_key=self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
+                    llm_api_key=self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
                     llm_model=self._config.get("llm_model", ""),
                 )
+                base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
+                if base_url:
+                    kwargs["llm_base_url"] = base_url
+                self._client = HindsightEmbedded(**kwargs)
             else:
                 from hindsight_client import Hindsight
                 kwargs = {"base_url": self._api_url, "timeout": 30.0}
@@ -311,9 +316,10 @@ class HindsightMemoryProvider(MemoryProvider):
                     # If the config changed and the daemon is running, stop it.
                     from pathlib import Path as _Path
                     profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
-                    current_key = self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
+                    current_key = self._config.get("llm_api_key") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
                     current_provider = self._config.get("llm_provider", "")
                     current_model = self._config.get("llm_model", "")
+                    current_base_url = self._config.get("llm_base_url") or os.environ.get("HINDSIGHT_API_LLM_BASE_URL", "")
 
                     # Read saved profile config
                     saved = {}
@@ -326,18 +332,22 @@ class HindsightMemoryProvider(MemoryProvider):
                     config_changed = (
                         saved.get("HINDSIGHT_API_LLM_PROVIDER") != current_provider or
                         saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
-                        saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key
+                        saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key or
+                        saved.get("HINDSIGHT_API_LLM_BASE_URL", "") != current_base_url
                     )
 
                     if config_changed:
                         # Write updated profile .env
                         profile_env.parent.mkdir(parents=True, exist_ok=True)
-                        profile_env.write_text(
+                        env_lines = (
                             f"HINDSIGHT_API_LLM_PROVIDER={current_provider}\n"
                             f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
                             f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
                             f"HINDSIGHT_API_LOG_LEVEL=info\n"
                         )
+                        if current_base_url:
+                            env_lines += f"HINDSIGHT_API_LLM_BASE_URL={current_base_url}\n"
+                        profile_env.write_text(env_lines)
                         if client._manager.is_running(profile):
                             with open(log_path, "a") as f:
                                 f.write("\n=== Config changed, restarting daemon ===\n")

From 3377017eb4a0741d8887dd28d3bc35808b04d077 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
Date: Wed, 8 Apr 2026 20:48:21 +0530
Subject: [PATCH 144/154] feat(qwen): add Qwen OAuth provider with portal
 request support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Based on #6079 by @tunamitom with critical fixes and comprehensive tests.

Changes from #6079:
- Fix: sanitization overwrite bug — Qwen message prep now runs AFTER codex
  field sanitization, not before (was silently discarding Qwen transforms)
- Fix: missing try/except AuthError in runtime_provider.py — stale Qwen
  credentials now fall through to next provider on auto-detect
- Fix: 'qwen' alias conflict — bare 'qwen' stays mapped to 'alibaba'
  (DashScope); use 'qwen-portal' or 'qwen-cli' for the OAuth provider
- Fix: hardcoded ['coder-model'] replaced with live API fetch + curated
  fallback list (qwen3-coder-plus, qwen3-coder)
- Fix: extract _is_qwen_portal() helper + _qwen_portal_headers() to replace
  5 inline 'portal.qwen.ai' string checks and share headers between init
  and credential swap
- Fix: add Qwen branch to _apply_client_headers_for_base_url for mid-session
  credential swaps
- Fix: remove suspicious TypeError catch blocks around _prompt_provider_choice
- Fix: handle bare string items in content lists (were silently dropped)
- Fix: remove redundant dict() copies after deepcopy in message prep
- Revert: unrelated ai-gateway test mock removal and model_switch.py comment deletion

New tests (30 test functions):
- _qwen_cli_auth_path, _read_qwen_cli_tokens (success + 3 error paths)
- _save_qwen_cli_tokens (roundtrip, parent creation, permissions)
- _qwen_access_token_is_expiring (5 edge cases: fresh, expired, within skew,
  None, non-numeric)
- _refresh_qwen_cli_tokens (success, preserve old refresh, 4 error paths,
  default expires_in, disk persistence)
- resolve_qwen_runtime_credentials (fresh, auto-refresh, force-refresh,
  missing token, env override)
- get_qwen_auth_status (logged in, not logged in)
- Runtime provider resolution (direct, pool entry, alias)
- _build_api_kwargs (metadata, vl_high_resolution_images, message formatting,
  max_tokens suppression)
---
 .env.example                                  |   8 +
 agent/auxiliary_client.py                     |   1 +
 agent/model_metadata.py                       |   3 +
 agent/models_dev.py                           |   1 +
 hermes_cli/auth.py                            | 183 ++++++++
 hermes_cli/auth_commands.py                   |  24 +-
 hermes_cli/main.py                            |  54 +++
 hermes_cli/model_normalize.py                 |   1 +
 hermes_cli/models.py                          |   3 +
 hermes_cli/providers.py                       |   6 +
 hermes_cli/runtime_provider.py                |  23 +
 hermes_cli/status.py                          |  19 +-
 run_agent.py                                  | 112 ++++-
 tests/hermes_cli/test_auth_qwen_provider.py   | 399 ++++++++++++++++++
 .../test_runtime_provider_resolution.py       |  76 ++++
 tests/run_agent/test_run_agent.py             |  46 ++
 16 files changed, 955 insertions(+), 4 deletions(-)
 create mode 100644 tests/hermes_cli/test_auth_qwen_provider.py

diff --git a/.env.example b/.env.example
index 02d05919..c8c4af9b 100644
--- a/.env.example
+++ b/.env.example
@@ -81,6 +81,14 @@
 # HF_TOKEN=
 # OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1  # Override default base URL
 
+# =============================================================================
+# LLM PROVIDER (Qwen OAuth)
+# =============================================================================
+# Qwen OAuth reuses your local Qwen CLI login (qwen auth qwen-oauth).
+# No API key needed — credentials come from ~/.qwen/oauth_creds.json.
+# Optional base URL override:
+# HERMES_QWEN_BASE_URL=https://portal.qwen.ai/v1
+
 # =============================================================================
 # TOOL API KEYS
 # =============================================================================
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 2b99ac07..b71c96ac 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -106,6 +106,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "opencode-zen": "gemini-3-flash",
     "opencode-go": "glm-5",
     "kilocode": "google/gemini-3-flash-preview",
+    "qwen-oauth": "qwen3-coder",
 }
 
 # OpenRouter app attribution headers
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 0a227118..14364a1e 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -26,12 +26,14 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
     "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
     "gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
     "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
+    "qwen-oauth",
     "custom", "local",
     # Common aliases
     "google", "google-gemini", "google-ai-studio",
     "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
     "github-models", "kimi", "moonshot", "claude", "deep-seek",
     "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
+    "qwen-portal",
 })
 
 
@@ -187,6 +189,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
     "api.minimax": "minimax",
     "dashscope.aliyuncs.com": "alibaba",
     "dashscope-intl.aliyuncs.com": "alibaba",
+    "portal.qwen.ai": "qwen-oauth",
     "openrouter.ai": "openrouter",
     "generativelanguage.googleapis.com": "gemini",
     "inference-api.nousresearch.com": "nous",
diff --git a/agent/models_dev.py b/agent/models_dev.py
index d3de5061..cc360d77 100644
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@@ -153,6 +153,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
     "minimax-cn": "minimax-cn",
     "deepseek": "deepseek",
     "alibaba": "alibaba",
+    "qwen-oauth": "alibaba",
     "copilot": "github-copilot",
     "ai-gateway": "vercel",
     "opencode-zen": "opencode",
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 2025bbcc..b7360fdd 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -67,12 +67,16 @@ DEFAULT_AGENT_KEY_MIN_TTL_SECONDS = 30 * 60  # 30 minutes
 ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120       # refresh 2 min before expiry
 DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS = 1     # poll at most every 1s
 DEFAULT_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+DEFAULT_QWEN_BASE_URL = "https://portal.qwen.ai/v1"
 DEFAULT_GITHUB_MODELS_BASE_URL = "https://api.githubcopilot.com"
 DEFAULT_COPILOT_ACP_BASE_URL = "acp://copilot"
 DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai"
 CODEX_OAUTH_CLIENT_ID = "app_EMoamEEZ73f0CkXaXp7hrann"
 CODEX_OAUTH_TOKEN_URL = "https://auth.openai.com/oauth/token"
 CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
+QWEN_OAUTH_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
+QWEN_OAUTH_TOKEN_URL = "https://chat.qwen.ai/api/v1/oauth2/token"
+QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS = 120
 
 
 # =============================================================================
@@ -112,6 +116,12 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
         auth_type="oauth_external",
         inference_base_url=DEFAULT_CODEX_BASE_URL,
     ),
+    "qwen-oauth": ProviderConfig(
+        id="qwen-oauth",
+        name="Qwen OAuth",
+        auth_type="oauth_external",
+        inference_base_url=DEFAULT_QWEN_BASE_URL,
+    ),
     "copilot": ProviderConfig(
         id="copilot",
         name="GitHub Copilot",
@@ -817,6 +827,7 @@ def resolve_provider(
         "github-copilot-acp": "copilot-acp", "copilot-acp-agent": "copilot-acp",
         "aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
         "opencode": "opencode-zen", "zen": "opencode-zen",
+        "qwen-portal": "qwen-oauth", "qwen-cli": "qwen-oauth", "qwen-oauth": "qwen-oauth",
         "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
         "go": "opencode-go", "opencode-go-sub": "opencode-go",
         "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
@@ -946,6 +957,176 @@ def _codex_access_token_is_expiring(access_token: Any, skew_seconds: int) -> boo
     return float(exp) <= (time.time() + max(0, int(skew_seconds)))
 
 
+def _qwen_cli_auth_path() -> Path:
+    return Path.home() / ".qwen" / "oauth_creds.json"
+
+
+def _read_qwen_cli_tokens() -> Dict[str, Any]:
+    auth_path = _qwen_cli_auth_path()
+    if not auth_path.exists():
+        raise AuthError(
+            "Qwen CLI credentials not found. Run 'qwen auth qwen-oauth' first.",
+            provider="qwen-oauth",
+            code="qwen_auth_missing",
+        )
+    try:
+        data = json.loads(auth_path.read_text(encoding="utf-8"))
+    except Exception as exc:
+        raise AuthError(
+            f"Failed to read Qwen CLI credentials from {auth_path}: {exc}",
+            provider="qwen-oauth",
+            code="qwen_auth_read_failed",
+        ) from exc
+    if not isinstance(data, dict):
+        raise AuthError(
+            f"Invalid Qwen CLI credentials in {auth_path}.",
+            provider="qwen-oauth",
+            code="qwen_auth_invalid",
+        )
+    return data
+
+
+def _save_qwen_cli_tokens(tokens: Dict[str, Any]) -> Path:
+    auth_path = _qwen_cli_auth_path()
+    auth_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp_path = auth_path.with_suffix(".tmp")
+    tmp_path.write_text(json.dumps(tokens, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+    os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR)
+    tmp_path.replace(auth_path)
+    return auth_path
+
+
+def _qwen_access_token_is_expiring(expiry_date_ms: Any, skew_seconds: int = QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS) -> bool:
+    try:
+        expiry_ms = int(expiry_date_ms)
+    except Exception:
+        return True
+    return (time.time() + max(0, int(skew_seconds))) * 1000 >= expiry_ms
+
+
+def _refresh_qwen_cli_tokens(tokens: Dict[str, Any], timeout_seconds: float = 20.0) -> Dict[str, Any]:
+    refresh_token = str(tokens.get("refresh_token", "") or "").strip()
+    if not refresh_token:
+        raise AuthError(
+            "Qwen OAuth refresh token missing. Re-run 'qwen auth qwen-oauth'.",
+            provider="qwen-oauth",
+            code="qwen_refresh_token_missing",
+        )
+
+    try:
+        response = httpx.post(
+            QWEN_OAUTH_TOKEN_URL,
+            headers={
+                "Content-Type": "application/x-www-form-urlencoded",
+                "Accept": "application/json",
+            },
+            data={
+                "grant_type": "refresh_token",
+                "refresh_token": refresh_token,
+                "client_id": QWEN_OAUTH_CLIENT_ID,
+            },
+            timeout=timeout_seconds,
+        )
+    except Exception as exc:
+        raise AuthError(
+            f"Qwen OAuth refresh failed: {exc}",
+            provider="qwen-oauth",
+            code="qwen_refresh_failed",
+        ) from exc
+
+    if response.status_code >= 400:
+        body = response.text.strip()
+        raise AuthError(
+            "Qwen OAuth refresh failed. Re-run 'qwen auth qwen-oauth'."
+            + (f" Response: {body}" if body else ""),
+            provider="qwen-oauth",
+            code="qwen_refresh_failed",
+        )
+
+    try:
+        payload = response.json()
+    except Exception as exc:
+        raise AuthError(
+            f"Qwen OAuth refresh returned invalid JSON: {exc}",
+            provider="qwen-oauth",
+            code="qwen_refresh_invalid_json",
+        ) from exc
+
+    if not isinstance(payload, dict) or not str(payload.get("access_token", "") or "").strip():
+        raise AuthError(
+            "Qwen OAuth refresh response missing access_token.",
+            provider="qwen-oauth",
+            code="qwen_refresh_invalid_response",
+        )
+
+    expires_in = payload.get("expires_in")
+    try:
+        expires_in_seconds = int(expires_in)
+    except Exception:
+        expires_in_seconds = 6 * 60 * 60
+
+    refreshed = {
+        "access_token": str(payload.get("access_token", "") or "").strip(),
+        "refresh_token": str(payload.get("refresh_token", refresh_token) or refresh_token).strip(),
+        "token_type": str(payload.get("token_type", tokens.get("token_type", "Bearer")) or "Bearer").strip() or "Bearer",
+        "resource_url": str(payload.get("resource_url", tokens.get("resource_url", "portal.qwen.ai")) or "portal.qwen.ai").strip(),
+        "expiry_date": int(time.time() * 1000) + max(1, expires_in_seconds) * 1000,
+    }
+    _save_qwen_cli_tokens(refreshed)
+    return refreshed
+
+
+def resolve_qwen_runtime_credentials(
+    *,
+    force_refresh: bool = False,
+    refresh_if_expiring: bool = True,
+    refresh_skew_seconds: int = QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+) -> Dict[str, Any]:
+    tokens = _read_qwen_cli_tokens()
+    access_token = str(tokens.get("access_token", "") or "").strip()
+    should_refresh = bool(force_refresh)
+    if not should_refresh and refresh_if_expiring:
+        should_refresh = _qwen_access_token_is_expiring(tokens.get("expiry_date"), refresh_skew_seconds)
+    if should_refresh:
+        tokens = _refresh_qwen_cli_tokens(tokens)
+        access_token = str(tokens.get("access_token", "") or "").strip()
+    if not access_token:
+        raise AuthError(
+            "Qwen OAuth access token missing. Re-run 'qwen auth qwen-oauth'.",
+            provider="qwen-oauth",
+            code="qwen_access_token_missing",
+        )
+
+    base_url = os.getenv("HERMES_QWEN_BASE_URL", "").strip().rstrip("/") or DEFAULT_QWEN_BASE_URL
+    return {
+        "provider": "qwen-oauth",
+        "base_url": base_url,
+        "api_key": access_token,
+        "source": "qwen-cli",
+        "expires_at_ms": tokens.get("expiry_date"),
+        "auth_file": str(_qwen_cli_auth_path()),
+    }
+
+
+def get_qwen_auth_status() -> Dict[str, Any]:
+    auth_path = _qwen_cli_auth_path()
+    try:
+        creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+        return {
+            "logged_in": True,
+            "auth_file": str(auth_path),
+            "source": creds.get("source"),
+            "api_key": creds.get("api_key"),
+            "expires_at_ms": creds.get("expires_at_ms"),
+        }
+    except AuthError as exc:
+        return {
+            "logged_in": False,
+            "auth_file": str(auth_path),
+            "error": str(exc),
+        }
+
+
 # =============================================================================
 # SSH / remote session detection
 # =============================================================================
@@ -2072,6 +2253,8 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
         return get_nous_auth_status()
     if target == "openai-codex":
         return get_codex_auth_status()
+    if target == "qwen-oauth":
+        return get_qwen_auth_status()
     if target == "copilot-acp":
         return get_external_process_provider_status(target)
     # API-key providers
diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py
index 97c2d850..eca6b292 100644
--- a/hermes_cli/auth_commands.py
+++ b/hermes_cli/auth_commands.py
@@ -32,7 +32,7 @@ from hermes_constants import OPENROUTER_BASE_URL
 
 
 # Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"}
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}
 
 
 def _get_custom_provider_names() -> list:
@@ -147,7 +147,7 @@ def auth_add_command(args) -> None:
         if provider.startswith(CUSTOM_POOL_PREFIX):
             requested_type = AUTH_TYPE_API_KEY
         else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex", "qwen-oauth"} else AUTH_TYPE_API_KEY
 
     pool = load_pool(provider)
 
@@ -250,6 +250,26 @@ def auth_add_command(args) -> None:
         print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
         return
 
+    if provider == "qwen-oauth":
+        creds = auth_mod.resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["api_key"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:qwen_cli",
+            access_token=creds["api_key"],
+            base_url=creds.get("base_url"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
     raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
 
 
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 3d1e2847..5b180fc2 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -918,6 +918,7 @@ def select_provider_and_model(args=None):
         "openrouter": "OpenRouter",
         "nous": "Nous Portal",
         "openai-codex": "OpenAI Codex",
+        "qwen-oauth": "Qwen OAuth",
         "copilot-acp": "GitHub Copilot ACP",
         "copilot": "GitHub Copilot",
         "anthropic": "Anthropic",
@@ -947,6 +948,7 @@ def select_provider_and_model(args=None):
         ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
         ("anthropic", "Anthropic (Claude models — API key or Claude Code)"),
         ("openai-codex", "OpenAI Codex"),
+        ("qwen-oauth", "Qwen OAuth (reuses local Qwen CLI login)"),
         ("copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
         ("huggingface", "Hugging Face Inference Providers (20+ open models)"),
     ]
@@ -1043,6 +1045,8 @@ def select_provider_and_model(args=None):
         _model_flow_nous(config, current_model, args=args)
     elif selected_provider == "openai-codex":
         _model_flow_openai_codex(config, current_model)
+    elif selected_provider == "qwen-oauth":
+        _model_flow_qwen_oauth(config, current_model)
     elif selected_provider == "copilot-acp":
         _model_flow_copilot_acp(config, current_model)
     elif selected_provider == "copilot":
@@ -1359,6 +1363,56 @@ def _model_flow_openai_codex(config, current_model=""):
 
 
 
+_DEFAULT_QWEN_PORTAL_MODELS = [
+    "qwen3-coder-plus",
+    "qwen3-coder",
+]
+
+
+def _model_flow_qwen_oauth(_config, current_model=""):
+    """Qwen OAuth provider: reuse local Qwen CLI login, then pick model."""
+    from hermes_cli.auth import (
+        get_qwen_auth_status,
+        resolve_qwen_runtime_credentials,
+        _prompt_model_selection,
+        _save_model_choice,
+        _update_config_for_provider,
+        DEFAULT_QWEN_BASE_URL,
+    )
+    from hermes_cli.models import fetch_api_models
+
+    status = get_qwen_auth_status()
+    if not status.get("logged_in"):
+        print("Not logged into Qwen CLI OAuth.")
+        print("Run: qwen auth qwen-oauth")
+        auth_file = status.get("auth_file")
+        if auth_file:
+            print(f"Expected credentials file: {auth_file}")
+        if status.get("error"):
+            print(f"Error: {status.get('error')}")
+        return
+
+    # Try live model discovery, fall back to curated list.
+    models = None
+    try:
+        creds = resolve_qwen_runtime_credentials(refresh_if_expiring=True)
+        models = fetch_api_models(creds["api_key"], creds["base_url"])
+    except Exception:
+        pass
+    if not models:
+        models = list(_DEFAULT_QWEN_PORTAL_MODELS)
+
+    default = current_model or (models[0] if models else "qwen3-coder-plus")
+    selected = _prompt_model_selection(models, current_model=default)
+    if selected:
+        _save_model_choice(selected)
+        _update_config_for_provider("qwen-oauth", DEFAULT_QWEN_BASE_URL)
+        print(f"Default model set to: {selected} (via Qwen OAuth)")
+    else:
+        print("No change.")
+
+
+
 def _model_flow_custom(config):
     """Custom endpoint: collect URL, API key, and model name.
 
diff --git a/hermes_cli/model_normalize.py b/hermes_cli/model_normalize.py
index 378e1e19..7b541363 100644
--- a/hermes_cli/model_normalize.py
+++ b/hermes_cli/model_normalize.py
@@ -84,6 +84,7 @@ _PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
     "minimax",
     "minimax-cn",
     "alibaba",
+    "qwen-oauth",
     "huggingface",
     "openai-codex",
     "custom",
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index aa68f877..ce89bdea 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -483,6 +483,7 @@ _PROVIDER_LABELS = {
     "ai-gateway": "AI Gateway",
     "kilocode": "Kilo Code",
     "alibaba": "Alibaba Cloud (DashScope)",
+    "qwen-oauth": "Qwen OAuth (Portal)",
     "huggingface": "Hugging Face",
     "custom": "Custom endpoint",
 }
@@ -522,6 +523,7 @@ _PROVIDER_ALIASES = {
     "aliyun": "alibaba",
     "qwen": "alibaba",
     "alibaba-cloud": "alibaba",
+    "qwen-portal": "qwen-oauth",
     "hf": "huggingface",
     "hugging-face": "huggingface",
     "huggingface-hub": "huggingface",
@@ -767,6 +769,7 @@ def list_available_providers() -> list[dict[str, str]]:
         "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
         "gemini", "huggingface",
         "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
+        "qwen-oauth",
         "opencode-zen", "opencode-go",
         "ai-gateway", "deepseek", "custom",
     ]
diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py
index 5cd02380..18109e6e 100644
--- a/hermes_cli/providers.py
+++ b/hermes_cli/providers.py
@@ -58,6 +58,12 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
         auth_type="oauth_external",
         base_url_override="https://chatgpt.com/backend-api/codex",
     ),
+    "qwen-oauth": HermesOverlay(
+        transport="openai_chat",
+        auth_type="oauth_external",
+        base_url_override="https://portal.qwen.ai/v1",
+        base_url_env_var="HERMES_QWEN_BASE_URL",
+    ),
     "copilot-acp": HermesOverlay(
         transport="codex_responses",
         auth_type="external_process",
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index fa9d4939..4457a735 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -14,11 +14,13 @@ from agent.credential_pool import CredentialPool, PooledCredential, get_custom_p
 from hermes_cli.auth import (
     AuthError,
     DEFAULT_CODEX_BASE_URL,
+    DEFAULT_QWEN_BASE_URL,
     PROVIDER_REGISTRY,
     format_auth_error,
     resolve_provider,
     resolve_nous_runtime_credentials,
     resolve_codex_runtime_credentials,
+    resolve_qwen_runtime_credentials,
     resolve_api_key_provider_credentials,
     resolve_external_process_provider_credentials,
     has_usable_secret,
@@ -148,6 +150,9 @@ def _resolve_runtime_from_pool_entry(
     if provider == "openai-codex":
         api_mode = "codex_responses"
         base_url = base_url or DEFAULT_CODEX_BASE_URL
+    elif provider == "qwen-oauth":
+        api_mode = "chat_completions"
+        base_url = base_url or DEFAULT_QWEN_BASE_URL
     elif provider == "anthropic":
         api_mode = "anthropic_messages"
         cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
@@ -691,6 +696,24 @@ def resolve_runtime_provider(
             logger.info("Auto-detected Codex provider but credentials failed; "
                         "falling through to next provider.")
 
+    if provider == "qwen-oauth":
+        try:
+            creds = resolve_qwen_runtime_credentials()
+            return {
+                "provider": "qwen-oauth",
+                "api_mode": "chat_completions",
+                "base_url": creds.get("base_url", "").rstrip("/"),
+                "api_key": creds.get("api_key", ""),
+                "source": creds.get("source", "qwen-cli"),
+                "expires_at_ms": creds.get("expires_at_ms"),
+                "requested_provider": requested_provider,
+            }
+        except AuthError:
+            if requested_provider != "auto":
+                raise
+            logger.info("Qwen OAuth credentials failed; "
+                        "falling through to next provider.")
+
     if provider == "copilot-acp":
         creds = resolve_external_process_provider_credentials(provider)
         return {
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 77a3e0ef..6fe8f7df 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -153,12 +153,14 @@ def show_status(args):
     print(color("◆ Auth Providers", Colors.CYAN, Colors.BOLD))
 
     try:
-        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status
+        from hermes_cli.auth import get_nous_auth_status, get_codex_auth_status, get_qwen_auth_status
         nous_status = get_nous_auth_status()
         codex_status = get_codex_auth_status()
+        qwen_status = get_qwen_auth_status()
     except Exception:
         nous_status = {}
         codex_status = {}
+        qwen_status = {}
 
     nous_logged_in = bool(nous_status.get("logged_in"))
     print(
@@ -189,6 +191,21 @@ def show_status(args):
     if codex_status.get("error") and not codex_logged_in:
         print(f"    Error:      {codex_status.get('error')}")
 
+    qwen_logged_in = bool(qwen_status.get("logged_in"))
+    print(
+        f"  {'Qwen OAuth':<12}  {check_mark(qwen_logged_in)} "
+        f"{'logged in' if qwen_logged_in else 'not logged in (run: qwen auth qwen-oauth)'}"
+    )
+    qwen_auth_file = qwen_status.get("auth_file")
+    if qwen_auth_file:
+        print(f"    Auth file:  {qwen_auth_file}")
+    qwen_exp = qwen_status.get("expires_at_ms")
+    if qwen_exp:
+        from datetime import datetime, timezone
+        print(f"    Access exp: {datetime.fromtimestamp(int(qwen_exp) / 1000, tz=timezone.utc).isoformat()}")
+    if qwen_status.get("error") and not qwen_logged_in:
+        print(f"    Error:      {qwen_status.get('error')}")
+
     # =========================================================================
     # Nous Subscription Features
     # =========================================================================
diff --git a/run_agent.py b/run_agent.py
index a0ae15a1..dc423532 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -413,6 +413,27 @@ def _strip_budget_warnings_from_history(messages: list) -> None:
 # =========================================================================
 
 
+# =========================================================================
+# Qwen Portal headers — mimics QwenCode CLI for portal.qwen.ai compatibility.
+# Extracted as a module-level helper so both __init__ and
+# _apply_client_headers_for_base_url can share it.
+# =========================================================================
+_QWEN_CODE_VERSION = "0.14.1"
+
+
+def _qwen_portal_headers() -> dict:
+    """Return default HTTP headers required by Qwen Portal API."""
+    import platform as _plat
+
+    _ua = f"QwenCode/{_QWEN_CODE_VERSION} ({_plat.system().lower()}; {_plat.machine()})"
+    return {
+        "User-Agent": _ua,
+        "X-DashScope-CacheControl": "enable",
+        "X-DashScope-UserAgent": _ua,
+        "X-DashScope-AuthType": "qwen-oauth",
+    }
+
+
 class AIAgent:
     """
     AI Agent with tool calling capabilities.
@@ -756,6 +777,8 @@ class AIAgent:
                     client_kwargs["default_headers"] = {
                         "User-Agent": "KimiCLI/1.3",
                     }
+                elif "portal.qwen.ai" in effective_base.lower():
+                    client_kwargs["default_headers"] = _qwen_portal_headers()
             else:
                 # No explicit creds — use the centralized provider router
                 from agent.auxiliary_client import resolve_provider_client
@@ -4080,6 +4103,8 @@ class AIAgent:
             self._client_kwargs["default_headers"] = copilot_default_headers()
         elif "api.kimi.com" in normalized:
             self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
+        elif "portal.qwen.ai" in normalized:
+            self._client_kwargs["default_headers"] = _qwen_portal_headers()
         else:
             self._client_kwargs.pop("default_headers", None)
 
@@ -5226,6 +5251,71 @@ class AIAgent:
         base = (getattr(self, "base_url", "") or "").lower()
         return "dashscope" in base or "aliyuncs" in base or "opencode.ai/zen/go" in base
 
+    def _is_qwen_portal(self) -> bool:
+        """Return True when the base URL targets Qwen Portal."""
+        return "portal.qwen.ai" in self._base_url_lower
+
+    def _qwen_prepare_chat_messages(self, api_messages: list) -> list:
+        prepared = copy.deepcopy(api_messages)
+        if not prepared:
+            return prepared
+
+        for msg in prepared:
+            if not isinstance(msg, dict):
+                continue
+            content = msg.get("content")
+            if isinstance(content, str):
+                msg["content"] = [{"type": "text", "text": content}]
+            elif isinstance(content, list):
+                # Normalize: convert bare strings to text dicts, keep dicts as-is.
+                # deepcopy already created independent copies, no need for dict().
+                normalized_parts = []
+                for part in content:
+                    if isinstance(part, str):
+                        normalized_parts.append({"type": "text", "text": part})
+                    elif isinstance(part, dict):
+                        normalized_parts.append(part)
+                if normalized_parts:
+                    msg["content"] = normalized_parts
+
+        # Inject cache_control on the last part of the system message.
+        for msg in prepared:
+            if isinstance(msg, dict) and msg.get("role") == "system":
+                content = msg.get("content")
+                if isinstance(content, list) and content and isinstance(content[-1], dict):
+                    content[-1]["cache_control"] = {"type": "ephemeral"}
+                break
+
+        return prepared
+
+    def _qwen_prepare_chat_messages_inplace(self, messages: list) -> None:
+        """In-place variant — mutates an already-copied message list."""
+        if not messages:
+            return
+
+        for msg in messages:
+            if not isinstance(msg, dict):
+                continue
+            content = msg.get("content")
+            if isinstance(content, str):
+                msg["content"] = [{"type": "text", "text": content}]
+            elif isinstance(content, list):
+                normalized_parts = []
+                for part in content:
+                    if isinstance(part, str):
+                        normalized_parts.append({"type": "text", "text": part})
+                    elif isinstance(part, dict):
+                        normalized_parts.append(part)
+                if normalized_parts:
+                    msg["content"] = normalized_parts
+
+        for msg in messages:
+            if isinstance(msg, dict) and msg.get("role") == "system":
+                content = msg.get("content")
+                if isinstance(content, list) and content and isinstance(content[-1], dict):
+                    content[-1]["cache_control"] = {"type": "ephemeral"}
+                break
+
     def _build_api_kwargs(self, api_messages: list) -> dict:
         """Build the keyword arguments dict for the active API mode."""
         if self.api_mode == "anthropic_messages":
@@ -5337,6 +5427,17 @@ class AIAgent:
                             tool_call.pop("call_id", None)
                             tool_call.pop("response_item_id", None)
 
+        # Qwen portal: normalize content to list-of-dicts, inject cache_control.
+        # Must run AFTER codex sanitization so we transform the final messages.
+        # If sanitization already deepcopied, reuse that copy (in-place).
+        if self._is_qwen_portal():
+            if sanitized_messages is api_messages:
+                # No sanitization was done — we need our own copy.
+                sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages)
+            else:
+                # Already a deepcopy — transform in place to avoid a second deepcopy.
+                self._qwen_prepare_chat_messages_inplace(sanitized_messages)
+
         # GPT-5 and Codex models respond better to 'developer' than 'system'
         # for instruction-following.  Swap the role at the API boundary so
         # internal message representation stays uniform ("system").
@@ -5369,11 +5470,17 @@ class AIAgent:
             "messages": sanitized_messages,
             "timeout": float(os.getenv("HERMES_API_TIMEOUT", 1800.0)),
         }
+        if self._is_qwen_portal():
+            api_kwargs["metadata"] = {
+                "sessionId": self.session_id or "hermes",
+                "promptId": str(uuid.uuid4()),
+            }
         if self.tools:
             api_kwargs["tools"] = self.tools
 
         if self.max_tokens is not None:
-            api_kwargs.update(self._max_tokens_param(self.max_tokens))
+            if not self._is_qwen_portal():
+                api_kwargs.update(self._max_tokens_param(self.max_tokens))
         elif self._is_openrouter_url() and "claude" in (self.model or "").lower():
             # OpenRouter translates requests to Anthropic's Messages API,
             # which requires max_tokens as a mandatory field.  When we omit
@@ -5438,6 +5545,9 @@ class AIAgent:
             options["num_ctx"] = self._ollama_num_ctx
             extra_body["options"] = options
 
+        if self._is_qwen_portal():
+            extra_body["vl_high_resolution_images"] = True
+
         if extra_body:
             api_kwargs["extra_body"] = extra_body
 
diff --git a/tests/hermes_cli/test_auth_qwen_provider.py b/tests/hermes_cli/test_auth_qwen_provider.py
new file mode 100644
index 00000000..f1943d84
--- /dev/null
+++ b/tests/hermes_cli/test_auth_qwen_provider.py
@@ -0,0 +1,399 @@
+"""Tests for Qwen OAuth provider authentication (hermes_cli/auth.py).
+
+Covers: _qwen_cli_auth_path, _read_qwen_cli_tokens, _save_qwen_cli_tokens,
+_qwen_access_token_is_expiring, _refresh_qwen_cli_tokens,
+resolve_qwen_runtime_credentials, get_qwen_auth_status.
+"""
+
+import json
+import os
+import stat
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from hermes_cli.auth import (
+    AuthError,
+    DEFAULT_QWEN_BASE_URL,
+    QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+    _qwen_cli_auth_path,
+    _read_qwen_cli_tokens,
+    _save_qwen_cli_tokens,
+    _qwen_access_token_is_expiring,
+    _refresh_qwen_cli_tokens,
+    resolve_qwen_runtime_credentials,
+    get_qwen_auth_status,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_qwen_tokens(
+    access_token="test-access-token",
+    refresh_token="test-refresh-token",
+    expiry_date=None,
+    **extra,
+):
+    """Create a minimal Qwen CLI OAuth credential dict."""
+    if expiry_date is None:
+        # 1 hour from now in milliseconds
+        expiry_date = int((time.time() + 3600) * 1000)
+    data = {
+        "access_token": access_token,
+        "refresh_token": refresh_token,
+        "token_type": "Bearer",
+        "expiry_date": expiry_date,
+        "resource_url": "portal.qwen.ai",
+    }
+    data.update(extra)
+    return data
+
+
+def _write_qwen_creds(tmp_path, tokens=None):
+    """Write tokens to the Qwen CLI credentials file and return the path."""
+    qwen_dir = tmp_path / ".qwen"
+    qwen_dir.mkdir(parents=True, exist_ok=True)
+    creds_path = qwen_dir / "oauth_creds.json"
+    if tokens is None:
+        tokens = _make_qwen_tokens()
+    creds_path.write_text(json.dumps(tokens), encoding="utf-8")
+    return creds_path
+
+
+@pytest.fixture()
+def qwen_env(tmp_path, monkeypatch):
+    """Redirect _qwen_cli_auth_path to tmp_path/.qwen/oauth_creds.json."""
+    creds_path = tmp_path / ".qwen" / "oauth_creds.json"
+    monkeypatch.setattr(
+        "hermes_cli.auth._qwen_cli_auth_path", lambda: creds_path
+    )
+    return tmp_path
+
+
+# ---------------------------------------------------------------------------
+# _qwen_cli_auth_path
+# ---------------------------------------------------------------------------
+
+def test_qwen_cli_auth_path_returns_expected_location():
+    path = _qwen_cli_auth_path()
+    assert path == Path.home() / ".qwen" / "oauth_creds.json"
+
+
+# ---------------------------------------------------------------------------
+# _read_qwen_cli_tokens
+# ---------------------------------------------------------------------------
+
+def test_read_qwen_cli_tokens_success(qwen_env):
+    tokens = _make_qwen_tokens(access_token="my-access")
+    _write_qwen_creds(qwen_env, tokens)
+    result = _read_qwen_cli_tokens()
+    assert result["access_token"] == "my-access"
+    assert result["refresh_token"] == "test-refresh-token"
+
+
+def test_read_qwen_cli_tokens_missing_file(qwen_env):
+    with pytest.raises(AuthError) as exc:
+        _read_qwen_cli_tokens()
+    assert exc.value.code == "qwen_auth_missing"
+
+
+def test_read_qwen_cli_tokens_invalid_json(qwen_env):
+    creds_path = qwen_env / ".qwen" / "oauth_creds.json"
+    creds_path.parent.mkdir(parents=True, exist_ok=True)
+    creds_path.write_text("not json{{{", encoding="utf-8")
+    with pytest.raises(AuthError) as exc:
+        _read_qwen_cli_tokens()
+    assert exc.value.code == "qwen_auth_read_failed"
+
+
+def test_read_qwen_cli_tokens_non_dict(qwen_env):
+    creds_path = qwen_env / ".qwen" / "oauth_creds.json"
+    creds_path.parent.mkdir(parents=True, exist_ok=True)
+    creds_path.write_text(json.dumps(["a", "b"]), encoding="utf-8")
+    with pytest.raises(AuthError) as exc:
+        _read_qwen_cli_tokens()
+    assert exc.value.code == "qwen_auth_invalid"
+
+
+# ---------------------------------------------------------------------------
+# _save_qwen_cli_tokens
+# ---------------------------------------------------------------------------
+
+def test_save_qwen_cli_tokens_roundtrip(qwen_env):
+    tokens = _make_qwen_tokens(access_token="saved-token")
+    saved_path = _save_qwen_cli_tokens(tokens)
+    assert saved_path.exists()
+    loaded = json.loads(saved_path.read_text(encoding="utf-8"))
+    assert loaded["access_token"] == "saved-token"
+
+
+def test_save_qwen_cli_tokens_creates_parent(qwen_env):
+    tokens = _make_qwen_tokens()
+    saved_path = _save_qwen_cli_tokens(tokens)
+    assert saved_path.parent.exists()
+
+
+def test_save_qwen_cli_tokens_permissions(qwen_env):
+    tokens = _make_qwen_tokens()
+    saved_path = _save_qwen_cli_tokens(tokens)
+    mode = saved_path.stat().st_mode
+    assert mode & stat.S_IRUSR  # owner read
+    assert mode & stat.S_IWUSR  # owner write
+    assert not (mode & stat.S_IRGRP)  # no group read
+    assert not (mode & stat.S_IROTH)  # no other read
+
+
+# ---------------------------------------------------------------------------
+# _qwen_access_token_is_expiring
+# ---------------------------------------------------------------------------
+
+def test_expiring_token_not_expired():
+    # 1 hour from now in milliseconds
+    future_ms = int((time.time() + 3600) * 1000)
+    assert not _qwen_access_token_is_expiring(future_ms)
+
+
+def test_expiring_token_already_expired():
+    # 1 hour ago in milliseconds
+    past_ms = int((time.time() - 3600) * 1000)
+    assert _qwen_access_token_is_expiring(past_ms)
+
+
+def test_expiring_token_within_skew():
+    # Just inside the default skew window
+    near_ms = int((time.time() + QWEN_ACCESS_TOKEN_REFRESH_SKEW_SECONDS - 5) * 1000)
+    assert _qwen_access_token_is_expiring(near_ms)
+
+
+def test_expiring_token_none_returns_true():
+    assert _qwen_access_token_is_expiring(None)
+
+
+def test_expiring_token_non_numeric_returns_true():
+    assert _qwen_access_token_is_expiring("not-a-number")
+
+
+# ---------------------------------------------------------------------------
+# _refresh_qwen_cli_tokens
+# ---------------------------------------------------------------------------
+
+def test_refresh_qwen_cli_tokens_success(qwen_env):
+    tokens = _make_qwen_tokens(refresh_token="old-refresh")
+
+    resp = MagicMock()
+    resp.status_code = 200
+    resp.json.return_value = {
+        "access_token": "new-access",
+        "refresh_token": "new-refresh",
+        "expires_in": 7200,
+    }
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        result = _refresh_qwen_cli_tokens(tokens)
+
+    assert result["access_token"] == "new-access"
+    assert result["refresh_token"] == "new-refresh"
+    assert "expiry_date" in result
+
+
+def test_refresh_qwen_cli_tokens_preserves_old_refresh_if_not_in_response(qwen_env):
+    tokens = _make_qwen_tokens(refresh_token="keep-me")
+
+    resp = MagicMock()
+    resp.status_code = 200
+    resp.json.return_value = {
+        "access_token": "new-access",
+        # No refresh_token in response — should keep old one
+        "expires_in": 3600,
+    }
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        result = _refresh_qwen_cli_tokens(tokens)
+
+    assert result["refresh_token"] == "keep-me"
+
+
+def test_refresh_qwen_cli_tokens_missing_refresh_token():
+    tokens = {"access_token": "at", "refresh_token": ""}
+    with pytest.raises(AuthError) as exc:
+        _refresh_qwen_cli_tokens(tokens)
+    assert exc.value.code == "qwen_refresh_token_missing"
+
+
+def test_refresh_qwen_cli_tokens_http_error(qwen_env):
+    tokens = _make_qwen_tokens()
+
+    resp = MagicMock()
+    resp.status_code = 401
+    resp.text = "unauthorized"
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        with pytest.raises(AuthError) as exc:
+            _refresh_qwen_cli_tokens(tokens)
+    assert exc.value.code == "qwen_refresh_failed"
+
+
+def test_refresh_qwen_cli_tokens_network_error(qwen_env):
+    tokens = _make_qwen_tokens()
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.side_effect = ConnectionError("timeout")
+        with pytest.raises(AuthError) as exc:
+            _refresh_qwen_cli_tokens(tokens)
+    assert exc.value.code == "qwen_refresh_failed"
+
+
+def test_refresh_qwen_cli_tokens_invalid_json_response(qwen_env):
+    tokens = _make_qwen_tokens()
+
+    resp = MagicMock()
+    resp.status_code = 200
+    resp.json.side_effect = ValueError("bad json")
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        with pytest.raises(AuthError) as exc:
+            _refresh_qwen_cli_tokens(tokens)
+    assert exc.value.code == "qwen_refresh_invalid_json"
+
+
+def test_refresh_qwen_cli_tokens_missing_access_token_in_response(qwen_env):
+    tokens = _make_qwen_tokens()
+
+    resp = MagicMock()
+    resp.status_code = 200
+    resp.json.return_value = {"something": "but no access_token"}
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        with pytest.raises(AuthError) as exc:
+            _refresh_qwen_cli_tokens(tokens)
+    assert exc.value.code == "qwen_refresh_invalid_response"
+
+
+def test_refresh_qwen_cli_tokens_default_expires_in(qwen_env):
+    """When expires_in is missing, default to 6 hours."""
+    tokens = _make_qwen_tokens()
+
+    resp = MagicMock()
+    resp.status_code = 200
+    resp.json.return_value = {"access_token": "new"}
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        result = _refresh_qwen_cli_tokens(tokens)
+
+    # Verify expiry_date is roughly now + 6h (within 60s tolerance)
+    expected_ms = int(time.time() * 1000) + 6 * 60 * 60 * 1000
+    assert abs(result["expiry_date"] - expected_ms) < 60_000
+
+
+def test_refresh_qwen_cli_tokens_saves_to_disk(qwen_env):
+    tokens = _make_qwen_tokens()
+
+    resp = MagicMock()
+    resp.status_code = 200
+    resp.json.return_value = {
+        "access_token": "disk-check",
+        "expires_in": 3600,
+    }
+
+    with patch("hermes_cli.auth.httpx") as mock_httpx:
+        mock_httpx.post.return_value = resp
+        _refresh_qwen_cli_tokens(tokens)
+
+    # Verify it was persisted
+    creds_path = qwen_env / ".qwen" / "oauth_creds.json"
+    assert creds_path.exists()
+    saved = json.loads(creds_path.read_text(encoding="utf-8"))
+    assert saved["access_token"] == "disk-check"
+
+
+# ---------------------------------------------------------------------------
+# resolve_qwen_runtime_credentials
+# ---------------------------------------------------------------------------
+
+def test_resolve_qwen_runtime_credentials_fresh_token(qwen_env):
+    tokens = _make_qwen_tokens(access_token="fresh-at")
+    _write_qwen_creds(qwen_env, tokens)
+
+    creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+    assert creds["provider"] == "qwen-oauth"
+    assert creds["api_key"] == "fresh-at"
+    assert creds["base_url"] == DEFAULT_QWEN_BASE_URL
+    assert creds["source"] == "qwen-cli"
+
+
+def test_resolve_qwen_runtime_credentials_triggers_refresh(qwen_env):
+    # Write an expired token
+    expired_ms = int((time.time() - 3600) * 1000)
+    tokens = _make_qwen_tokens(access_token="old", expiry_date=expired_ms)
+    _write_qwen_creds(qwen_env, tokens)
+
+    refreshed = _make_qwen_tokens(access_token="refreshed-at")
+
+    with patch(
+        "hermes_cli.auth._refresh_qwen_cli_tokens", return_value=refreshed
+    ) as mock_refresh:
+        creds = resolve_qwen_runtime_credentials()
+    mock_refresh.assert_called_once()
+    assert creds["api_key"] == "refreshed-at"
+
+
+def test_resolve_qwen_runtime_credentials_force_refresh(qwen_env):
+    tokens = _make_qwen_tokens(access_token="old-at")
+    _write_qwen_creds(qwen_env, tokens)
+
+    refreshed = _make_qwen_tokens(access_token="force-refreshed")
+
+    with patch(
+        "hermes_cli.auth._refresh_qwen_cli_tokens", return_value=refreshed
+    ) as mock_refresh:
+        creds = resolve_qwen_runtime_credentials(force_refresh=True)
+    mock_refresh.assert_called_once()
+    assert creds["api_key"] == "force-refreshed"
+
+
+def test_resolve_qwen_runtime_credentials_missing_access_token(qwen_env):
+    tokens = _make_qwen_tokens(access_token="")
+    _write_qwen_creds(qwen_env, tokens)
+
+    with pytest.raises(AuthError) as exc:
+        resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+    assert exc.value.code == "qwen_access_token_missing"
+
+
+def test_resolve_qwen_runtime_credentials_base_url_env_override(qwen_env, monkeypatch):
+    tokens = _make_qwen_tokens(access_token="at")
+    _write_qwen_creds(qwen_env, tokens)
+    monkeypatch.setenv("HERMES_QWEN_BASE_URL", "https://custom.qwen.ai/v1")
+
+    creds = resolve_qwen_runtime_credentials(refresh_if_expiring=False)
+    assert creds["base_url"] == "https://custom.qwen.ai/v1"
+
+
+# ---------------------------------------------------------------------------
+# get_qwen_auth_status
+# ---------------------------------------------------------------------------
+
+def test_get_qwen_auth_status_logged_in(qwen_env):
+    tokens = _make_qwen_tokens(access_token="status-at")
+    _write_qwen_creds(qwen_env, tokens)
+
+    status = get_qwen_auth_status()
+    assert status["logged_in"] is True
+    assert status["api_key"] == "status-at"
+
+
+def test_get_qwen_auth_status_not_logged_in(qwen_env):
+    # No credentials file
+    status = get_qwen_auth_status()
+    assert status["logged_in"] is False
+    assert "error" in status
diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py
index 0abc8196..f46b2dd1 100644
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@@ -143,6 +143,82 @@ def test_resolve_runtime_provider_codex(monkeypatch):
     assert resolved["requested_provider"] == "openai-codex"
 
 
+def test_resolve_runtime_provider_qwen_oauth(monkeypatch):
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "qwen-oauth")
+    monkeypatch.setattr(
+        rp,
+        "resolve_qwen_runtime_credentials",
+        lambda: {
+            "provider": "qwen-oauth",
+            "base_url": "https://portal.qwen.ai/v1",
+            "api_key": "qwen-token",
+            "source": "qwen-cli",
+            "expires_at_ms": 1775640710946,
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="qwen-oauth")
+
+    assert resolved["provider"] == "qwen-oauth"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["base_url"] == "https://portal.qwen.ai/v1"
+    assert resolved["api_key"] == "qwen-token"
+    assert resolved["requested_provider"] == "qwen-oauth"
+
+
+def test_resolve_runtime_provider_uses_qwen_pool_entry(monkeypatch):
+    class _Entry:
+        access_token = "pool-qwen-token"
+        source = "manual:qwen_cli"
+        base_url = "https://portal.qwen.ai/v1"
+
+    class _Pool:
+        def has_credentials(self):
+            return True
+
+        def select(self):
+            return _Entry()
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "qwen-oauth")
+    monkeypatch.setattr(rp, "load_pool", lambda provider: _Pool())
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "qwen-oauth", "default": "coder-model"})
+
+    resolved = rp.resolve_runtime_provider(requested="qwen-oauth")
+
+    assert resolved["provider"] == "qwen-oauth"
+    assert resolved["api_mode"] == "chat_completions"
+    assert resolved["base_url"] == "https://portal.qwen.ai/v1"
+    assert resolved["api_key"] == "pool-qwen-token"
+    assert resolved["source"] == "manual:qwen_cli"
+
+
+def test_resolve_provider_alias_qwen(monkeypatch):
+    monkeypatch.setattr(rp.auth_mod, "_load_auth_store", lambda: {})
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    assert rp.resolve_provider("qwen-portal") == "qwen-oauth"
+    assert rp.resolve_provider("qwen-cli") == "qwen-oauth"
+
+
+def test_qwen_oauth_auto_fallthrough_on_auth_failure(monkeypatch):
+    """When requested_provider is 'auto' and Qwen creds fail, fall through."""
+    from hermes_cli.auth import AuthError
+
+    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "qwen-oauth")
+    monkeypatch.setattr(
+        rp,
+        "resolve_qwen_runtime_credentials",
+        lambda **kw: (_ for _ in ()).throw(AuthError("stale", provider="qwen-oauth", code="qwen_auth_missing")),
+    )
+    monkeypatch.setattr(rp, "_get_model_config", lambda: {})
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+
+    # Should NOT raise — falls through to OpenRouter
+    resolved = rp.resolve_runtime_provider(requested="auto")
+    # The fallthrough means it won't be qwen-oauth
+    assert resolved["provider"] != "qwen-oauth"
+
+
 def test_resolve_runtime_provider_ai_gateway(monkeypatch):
     monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "ai-gateway")
     monkeypatch.setattr(rp, "_get_model_config", lambda: {})
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 104881a0..59f88601 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -872,6 +872,52 @@ class TestBuildApiKwargs:
         kwargs = agent._build_api_kwargs(messages)
         assert kwargs["max_tokens"] == 4096
 
+    def test_qwen_portal_formats_messages_and_metadata(self, agent):
+        agent.base_url = "https://portal.qwen.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.session_id = "sess-123"
+        messages = [
+            {"role": "system", "content": "You are helpful"},
+            {"role": "assistant", "content": "Got it"},
+            {"role": "user", "content": "hi"},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        assert kwargs["metadata"]["sessionId"] == "sess-123"
+        assert kwargs["extra_body"]["vl_high_resolution_images"] is True
+        assert isinstance(kwargs["messages"][0]["content"], list)
+        assert kwargs["messages"][0]["content"][0]["cache_control"] == {"type": "ephemeral"}
+        assert kwargs["messages"][2]["content"][0]["text"] == "hi"
+
+    def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
+        agent.base_url = "https://portal.qwen.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        messages = [
+            {"role": "system", "content": [{"type": "text", "text": "system"}]},
+            {"role": "user", "content": ["hello", {"type": "text", "text": "world"}]},
+        ]
+        kwargs = agent._build_api_kwargs(messages)
+        user_content = kwargs["messages"][1]["content"]
+        assert user_content[0] == {"type": "text", "text": "hello"}
+        assert user_content[1] == {"type": "text", "text": "world"}
+
+    def test_qwen_portal_no_system_message(self, agent):
+        agent.base_url = "https://portal.qwen.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        # Should not crash even without a system message
+        assert kwargs["messages"][0]["content"][0]["text"] == "hi"
+        assert "cache_control" not in kwargs["messages"][0]["content"][0]
+
+    def test_qwen_portal_omits_max_tokens(self, agent):
+        agent.base_url = "https://portal.qwen.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.max_tokens = 4096
+        messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "max_tokens" not in kwargs
+        assert "max_completion_tokens" not in kwargs
+
 
 class TestBuildAssistantMessage:
     def test_basic_message(self, agent):

From 5d2fc6d928d4d027473a4bf7e0548d505558fbbe Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 13:39:57 -0700
Subject: [PATCH 145/154] fix: cleanup Qwen OAuth provider gaps

- Add HERMES_QWEN_BASE_URL to OPTIONAL_ENV_VARS in config.py (was missing
  despite being referenced in code)
- Remove redundant qwen-oauth entry from _API_KEY_PROVIDER_AUX_MODELS
  (non-aggregator providers use their main model for aux tasks automatically)
---
 agent/auxiliary_client.py | 1 -
 hermes_cli/config.py      | 8 ++++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index b71c96ac..2b99ac07 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -106,7 +106,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
     "opencode-zen": "gemini-3-flash",
     "opencode-go": "glm-5",
     "kilocode": "google/gemini-3-flash-preview",
-    "qwen-oauth": "qwen3-coder",
 }
 
 # OpenRouter app attribution headers
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 3338a13c..4f114204 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -724,6 +724,14 @@ OPTIONAL_ENV_VARS = {
         "category": "provider",
         "advanced": True,
     },
+    "HERMES_QWEN_BASE_URL": {
+        "description": "Qwen Portal base URL override (default: https://portal.qwen.ai/v1)",
+        "prompt": "Qwen Portal base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
     "OPENCODE_ZEN_API_KEY": {
         "description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
         "prompt": "OpenCode Zen API key",

From 5f4b93c20f41f302e57800868f6a324928db5e69 Mon Sep 17 00:00:00 2001
From: jjovalle99 <juan.ovalle@mistral.ai>
Date: Mon, 6 Apr 2026 17:38:25 +0100
Subject: [PATCH 146/154] feat(tools): add Voxtral Transcribe STT provider
 (Mistral AI)

---
 cli-config.yaml.example                 |   6 +-
 hermes_cli/config.py                    |   5 +-
 pyproject.toml                          |   2 +
 tests/tools/test_transcription_tools.py | 181 ++++++++++++++++++++++++
 tools/transcription_tools.py            |  65 ++++++++-
 uv.lock                                 |  99 +++++++++----
 website/docs/user-guide/features/tts.md |   7 +-
 7 files changed, 331 insertions(+), 34 deletions(-)

diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 73bff981..14d764d7 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -644,10 +644,14 @@ platform_toolsets:
 # Voice Transcription (Speech-to-Text)
 # =============================================================================
 # Automatically transcribe voice messages on messaging platforms.
-# Requires OPENAI_API_KEY in .env (uses OpenAI Whisper API directly).
+# Providers: local (free, faster-whisper) | groq (free tier) | openai (Whisper API) | mistral (Voxtral Transcribe)
+# Set the corresponding API key in .env: GROQ_API_KEY, OPENAI_API_KEY, or MISTRAL_API_KEY.
 stt:
   enabled: true
+  # provider: "local"          # auto-detected if omitted
   model: "whisper-1"  # whisper-1 (cheapest) | gpt-4o-mini-transcribe | gpt-4o-transcribe
+  # mistral:
+  #   model: "voxtral-mini-latest"  # voxtral-mini-latest | voxtral-mini-2602
 
 # =============================================================================
 # Response Pacing (Messaging Platforms)
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 4f114204..350d99cf 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -413,7 +413,7 @@ DEFAULT_CONFIG = {
     
     "stt": {
         "enabled": True,
-        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API)
+        "provider": "local",  # "local" (free, faster-whisper) | "groq" | "openai" (Whisper API) | "mistral" (Voxtral Transcribe)
         "local": {
             "model": "base",  # tiny, base, small, medium, large-v3
             "language": "",  # auto-detect by default; set to "en", "es", "fr", etc. to force
@@ -421,6 +421,9 @@ DEFAULT_CONFIG = {
         "openai": {
             "model": "whisper-1",  # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
         },
+        "mistral": {
+            "model": "voxtral-mini-latest",  # voxtral-mini-latest, voxtral-mini-2602
+        },
     },
 
     "voice": {
diff --git a/pyproject.toml b/pyproject.toml
index 8982e6e4..de0e6106 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,6 +62,7 @@ mcp = ["mcp>=1.2.0,<2"]
 homeassistant = ["aiohttp>=3.9.0,<4"]
 sms = ["aiohttp>=3.9.0,<4"]
 acp = ["agent-client-protocol>=0.9.0,<1.0"]
+mistral = ["mistralai>=2.3.0,<3"]
 dingtalk = ["dingtalk-stream>=0.1.0,<1"]
 feishu = ["lark-oapi>=1.5.3,<2"]
 rl = [
@@ -94,6 +95,7 @@ all = [
   "hermes-agent[voice]",
   "hermes-agent[dingtalk]",
   "hermes-agent[feishu]",
+  "hermes-agent[mistral]",
 ]
 
 [project.scripts]
diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py
index 0cd4c8e3..9f5fab62 100644
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@@ -48,6 +48,7 @@ def clean_env(monkeypatch):
     monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
     monkeypatch.delenv("OPENAI_API_KEY", raising=False)
     monkeypatch.delenv("GROQ_API_KEY", raising=False)
+    monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
     monkeypatch.delenv("HERMES_LOCAL_STT_COMMAND", raising=False)
     monkeypatch.delenv("HERMES_LOCAL_STT_LANGUAGE", raising=False)
 
@@ -858,3 +859,183 @@ class TestGetSttModelFromConfig:
 
         from tools.transcription_tools import get_stt_model_from_config
         assert get_stt_model_from_config() is None
+
+
+# ============================================================================
+# _transcribe_mistral
+# ============================================================================
+
+
+@pytest.fixture
+def mock_mistral_module():
+    """Inject a fake mistralai module into sys.modules for testing."""
+    mock_client = MagicMock()
+    mock_client.__enter__ = MagicMock(return_value=mock_client)
+    mock_client.__exit__ = MagicMock(return_value=False)
+    mock_mistral_cls = MagicMock(return_value=mock_client)
+    fake_module = MagicMock()
+    fake_module.Mistral = mock_mistral_cls
+    with patch.dict("sys.modules", {"mistralai": fake_module}):
+        yield mock_client
+
+
+class TestTranscribeMistral:
+    def test_no_key(self, monkeypatch):
+        monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
+        from tools.transcription_tools import _transcribe_mistral
+        result = _transcribe_mistral("/tmp/test.ogg", "voxtral-mini-latest")
+        assert result["success"] is False
+        assert "MISTRAL_API_KEY" in result["error"]
+
+    def test_successful_transcription(self, monkeypatch, sample_ogg, mock_mistral_module):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+
+        mock_result = MagicMock()
+        mock_result.text = "hello from mistral"
+        mock_mistral_module.audio.transcriptions.complete.return_value = mock_result
+
+        from tools.transcription_tools import _transcribe_mistral
+        result = _transcribe_mistral(sample_ogg, "voxtral-mini-latest")
+
+        assert result["success"] is True
+        assert result["transcript"] == "hello from mistral"
+        assert result["provider"] == "mistral"
+        mock_mistral_module.audio.transcriptions.complete.assert_called_once()
+        mock_mistral_module.__exit__.assert_called_once()
+
+    def test_api_error_returns_failure(self, monkeypatch, sample_ogg, mock_mistral_module):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        mock_mistral_module.audio.transcriptions.complete.side_effect = RuntimeError("secret-key-leaked")
+
+        from tools.transcription_tools import _transcribe_mistral
+        result = _transcribe_mistral(sample_ogg, "voxtral-mini-latest")
+
+        assert result["success"] is False
+        assert "RuntimeError" in result["error"]
+        assert "secret-key-leaked" not in result["error"]
+
+    def test_permission_error(self, monkeypatch, sample_ogg, mock_mistral_module):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        mock_mistral_module.audio.transcriptions.complete.side_effect = PermissionError("denied")
+
+        from tools.transcription_tools import _transcribe_mistral
+        result = _transcribe_mistral(sample_ogg, "voxtral-mini-latest")
+
+        assert result["success"] is False
+        assert "Permission denied" in result["error"]
+
+
+# ============================================================================
+# _get_provider — Mistral
+# ============================================================================
+
+class TestGetProviderMistral:
+    """Mistral-specific provider selection tests."""
+
+    def test_mistral_when_key_and_sdk_available(self, monkeypatch):
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        with patch("tools.transcription_tools._HAS_MISTRAL", True):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({"provider": "mistral"}) == "mistral"
+
+    def test_mistral_explicit_no_key_returns_none(self, monkeypatch):
+        """Explicit mistral with no key returns none — no cross-provider fallback."""
+        monkeypatch.delenv("MISTRAL_API_KEY", raising=False)
+        with patch("tools.transcription_tools._HAS_MISTRAL", True):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({"provider": "mistral"}) == "none"
+
+    def test_mistral_explicit_no_sdk_returns_none(self, monkeypatch):
+        """Explicit mistral with key but no SDK returns none."""
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        with patch("tools.transcription_tools._HAS_MISTRAL", False):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({"provider": "mistral"}) == "none"
+
+    def test_auto_detect_mistral_after_openai(self, monkeypatch):
+        """Auto-detect: mistral is tried after openai when both are unavailable."""
+        monkeypatch.delenv("GROQ_API_KEY", raising=False)
+        monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", True):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "mistral"
+
+    def test_auto_detect_openai_preferred_over_mistral(self, monkeypatch):
+        """Auto-detect: openai is preferred over mistral (both paid, openai more common)."""
+        monkeypatch.setenv("VOICE_TOOLS_OPENAI_KEY", "sk-test")
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        monkeypatch.delenv("GROQ_API_KEY", raising=False)
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", True), \
+             patch("tools.transcription_tools._HAS_MISTRAL", True):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "openai"
+
+    def test_auto_detect_groq_preferred_over_mistral(self, monkeypatch):
+        """Auto-detect: groq (free) is preferred over mistral (paid)."""
+        monkeypatch.setenv("GROQ_API_KEY", "gsk-test")
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", True), \
+             patch("tools.transcription_tools._HAS_MISTRAL", True):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "groq"
+
+    def test_auto_detect_skips_mistral_without_sdk(self, monkeypatch):
+        """Auto-detect: mistral skipped when key is set but SDK is not installed."""
+        monkeypatch.delenv("GROQ_API_KEY", raising=False)
+        monkeypatch.delenv("VOICE_TOOLS_OPENAI_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.setenv("MISTRAL_API_KEY", "test-key")
+        with patch("tools.transcription_tools._HAS_FASTER_WHISPER", False), \
+             patch("tools.transcription_tools._has_local_command", return_value=False), \
+             patch("tools.transcription_tools._HAS_OPENAI", False), \
+             patch("tools.transcription_tools._HAS_MISTRAL", False):
+            from tools.transcription_tools import _get_provider
+            assert _get_provider({}) == "none"
+
+
+# ============================================================================
+# transcribe_audio — Mistral dispatch
+# ============================================================================
+
+class TestTranscribeAudioMistralDispatch:
+    def test_dispatches_to_mistral(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={"provider": "mistral"}), \
+             patch("tools.transcription_tools._get_provider", return_value="mistral"), \
+             patch("tools.transcription_tools._transcribe_mistral",
+                   return_value={"success": True, "transcript": "hi", "provider": "mistral"}) as mock_mistral:
+            from tools.transcription_tools import transcribe_audio
+            result = transcribe_audio(sample_ogg)
+
+        assert result["success"] is True
+        assert result["provider"] == "mistral"
+        mock_mistral.assert_called_once()
+
+    def test_config_mistral_model_used(self, sample_ogg):
+        config = {"provider": "mistral", "mistral": {"model": "voxtral-mini-2602"}}
+        with patch("tools.transcription_tools._load_stt_config", return_value=config), \
+             patch("tools.transcription_tools._get_provider", return_value="mistral"), \
+             patch("tools.transcription_tools._transcribe_mistral",
+                   return_value={"success": True, "transcript": "hi"}) as mock_mistral:
+            from tools.transcription_tools import transcribe_audio
+            transcribe_audio(sample_ogg, model=None)
+
+        assert mock_mistral.call_args[0][1] == "voxtral-mini-2602"
+
+    def test_model_override_passed_to_mistral(self, sample_ogg):
+        with patch("tools.transcription_tools._load_stt_config", return_value={}), \
+             patch("tools.transcription_tools._get_provider", return_value="mistral"), \
+             patch("tools.transcription_tools._transcribe_mistral",
+                   return_value={"success": True, "transcript": "hi"}) as mock_mistral:
+            from tools.transcription_tools import transcribe_audio
+            transcribe_audio(sample_ogg, model="voxtral-mini-2602")
+
+        assert mock_mistral.call_args[0][1] == "voxtral-mini-2602"
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 4f07e5c4..296c74a2 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -57,6 +57,7 @@ def _safe_find_spec(module_name: str) -> bool:
 
 _HAS_FASTER_WHISPER = _safe_find_spec("faster_whisper")
 _HAS_OPENAI = _safe_find_spec("openai")
+_HAS_MISTRAL = _safe_find_spec("mistralai")
 
 # ---------------------------------------------------------------------------
 # Constants
@@ -67,6 +68,7 @@ DEFAULT_LOCAL_MODEL = "base"
 DEFAULT_LOCAL_STT_LANGUAGE = "en"
 DEFAULT_STT_MODEL = os.getenv("STT_OPENAI_MODEL", "whisper-1")
 DEFAULT_GROQ_STT_MODEL = os.getenv("STT_GROQ_MODEL", "whisper-large-v3-turbo")
+DEFAULT_MISTRAL_STT_MODEL = os.getenv("STT_MISTRAL_MODEL", "voxtral-mini-latest")
 LOCAL_STT_COMMAND_ENV = "HERMES_LOCAL_STT_COMMAND"
 LOCAL_STT_LANGUAGE_ENV = "HERMES_LOCAL_STT_LANGUAGE"
 COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
@@ -74,7 +76,7 @@ COMMON_LOCAL_BIN_DIRS = ("/opt/homebrew/bin", "/usr/local/bin")
 GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1")
 OPENAI_BASE_URL = os.getenv("STT_OPENAI_BASE_URL", "https://api.openai.com/v1")
 
-SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac"}
+SUPPORTED_FORMATS = {".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm", ".ogg", ".aac", ".flac"}
 LOCAL_NATIVE_AUDIO_FORMATS = {".wav", ".aiff", ".aif"}
 MAX_FILE_SIZE = 25 * 1024 * 1024  # 25 MB
 
@@ -227,9 +229,18 @@ def _get_provider(stt_config: dict) -> str:
             )
             return "none"
 
+        if provider == "mistral":
+            if _HAS_MISTRAL and os.getenv("MISTRAL_API_KEY"):
+                return "mistral"
+            logger.warning(
+                "STT provider 'mistral' configured but mistralai package "
+                "not installed or MISTRAL_API_KEY not set"
+            )
+            return "none"
+
         return provider  # Unknown — let it fail downstream
 
-    # --- Auto-detect (no explicit provider): local > groq > openai ---------
+    # --- Auto-detect (no explicit provider): local > groq > openai > mistral -
 
     if _HAS_FASTER_WHISPER:
         return "local"
@@ -241,6 +252,9 @@ def _get_provider(stt_config: dict) -> str:
     if _HAS_OPENAI and _has_openai_audio_backend():
         logger.info("No local STT available, using OpenAI Whisper API")
         return "openai"
+    if _HAS_MISTRAL and os.getenv("MISTRAL_API_KEY"):
+        logger.info("No local STT available, using Mistral Voxtral Transcribe API")
+        return "mistral"
     return "none"
 
 # ---------------------------------------------------------------------------
@@ -516,6 +530,45 @@ def _transcribe_openai(file_path: str, model_name: str) -> Dict[str, Any]:
         logger.error("OpenAI transcription failed: %s", e, exc_info=True)
         return {"success": False, "transcript": "", "error": f"Transcription failed: {e}"}
 
+# ---------------------------------------------------------------------------
+# Provider: mistral (Voxtral Transcribe API)
+# ---------------------------------------------------------------------------
+
+
+def _transcribe_mistral(file_path: str, model_name: str) -> Dict[str, Any]:
+    """Transcribe using Mistral Voxtral Transcribe API.
+
+    Uses the ``mistralai`` Python SDK to call ``/v1/audio/transcriptions``.
+    Requires ``MISTRAL_API_KEY`` environment variable.
+    """
+    api_key = os.getenv("MISTRAL_API_KEY")
+    if not api_key:
+        return {"success": False, "transcript": "", "error": "MISTRAL_API_KEY not set"}
+
+    try:
+        from mistralai import Mistral
+
+        with Mistral(api_key=api_key) as client:
+            with open(file_path, "rb") as audio_file:
+                result = client.audio.transcriptions.complete(
+                    model=model_name,
+                    file={"content": audio_file, "file_name": Path(file_path).name},
+                )
+
+            transcript_text = _extract_transcript_text(result)
+            logger.info(
+                "Transcribed %s via Mistral API (%s, %d chars)",
+                Path(file_path).name, model_name, len(transcript_text),
+            )
+            return {"success": True, "transcript": transcript_text, "provider": "mistral"}
+
+    except PermissionError:
+        return {"success": False, "transcript": "", "error": f"Permission denied: {file_path}"}
+    except Exception as e:
+        logger.error("Mistral transcription failed: %s", e, exc_info=True)
+        return {"success": False, "transcript": "", "error": f"Mistral transcription failed: {type(e).__name__}"}
+
+
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
@@ -577,6 +630,11 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
         model_name = model or openai_cfg.get("model", DEFAULT_STT_MODEL)
         return _transcribe_openai(file_path, model_name)
 
+    if provider == "mistral":
+        mistral_cfg = stt_config.get("mistral", {})
+        model_name = model or mistral_cfg.get("model", DEFAULT_MISTRAL_STT_MODEL)
+        return _transcribe_mistral(file_path, model_name)
+
     # No provider available
     return {
         "success": False,
@@ -584,7 +642,8 @@ def transcribe_audio(file_path: str, model: Optional[str] = None) -> Dict[str, A
         "error": (
             "No STT provider available. Install faster-whisper for free local "
             f"transcription, configure {LOCAL_STT_COMMAND_ENV} or install a local whisper CLI, "
-            "set GROQ_API_KEY for free Groq Whisper, or set VOICE_TOOLS_OPENAI_KEY "
+            "set GROQ_API_KEY for free Groq Whisper, set MISTRAL_API_KEY for Mistral "
+            "Voxtral Transcribe, or set VOICE_TOOLS_OPENAI_KEY "
             "or OPENAI_API_KEY for the OpenAI Whisper API."
         ),
     }
diff --git a/uv.lock b/uv.lock
index 8a5db543..a3df304c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1158,6 +1158,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/97/a8/c070e1340636acb38d4e6a7e45c46d168a462b48b9b3257e14ca0e5af79b/environs-14.6.0-py3-none-any.whl", hash = "sha256:f8fb3d6c6a55872b0c6db077a28f5a8c7b8984b7c32029613d44cef95cfc0812", size = 17205, upload-time = "2026-02-20T04:02:07.299Z" },
 ]
 
+[[package]]
+name = "eval-type-backport"
+version = "0.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fb/a3/cafafb4558fd638aadfe4121dc6cefb8d743368c085acb2f521df0f3d9d7/eval_type_backport-0.3.1.tar.gz", hash = "sha256:57e993f7b5b69d271e37482e62f74e76a0276c82490cf8e4f0dffeb6b332d5ed", size = 9445, upload-time = "2025-12-02T11:51:42.987Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cf/22/fdc2e30d43ff853720042fa15baa3e6122722be1a7950a98233ebb55cd71/eval_type_backport-0.3.1-py3-none-any.whl", hash = "sha256:279ab641905e9f11129f56a8a78f493518515b83402b860f6f06dd7c011fdfa8", size = 6063, upload-time = "2025-12-02T11:51:41.665Z" },
+]
+
 [[package]]
 name = "exa-py"
 version = "2.10.2"
@@ -1683,6 +1692,7 @@ all = [
     { name = "honcho-ai" },
     { name = "lark-oapi" },
     { name = "mcp" },
+    { name = "mistralai" },
     { name = "modal" },
     { name = "numpy" },
     { name = "ptyprocess", marker = "sys_platform != 'win32'" },
@@ -1738,6 +1748,9 @@ messaging = [
     { name = "slack-bolt" },
     { name = "slack-sdk" },
 ]
+mistral = [
+    { name = "mistralai" },
+]
 modal = [
     { name = "modal" },
 ]
@@ -1803,6 +1816,7 @@ requires-dist = [
     { name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["mcp"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["messaging"], marker = "extra == 'all'" },
+    { name = "hermes-agent", extras = ["mistral"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["modal"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["pty"], marker = "extra == 'all'" },
     { name = "hermes-agent", extras = ["slack"], marker = "extra == 'all'" },
@@ -1817,6 +1831,7 @@ requires-dist = [
     { name = "matrix-nio", extras = ["e2e"], marker = "extra == 'matrix'", specifier = ">=0.24.0,<1" },
     { name = "mcp", marker = "extra == 'dev'", specifier = ">=1.2.0,<2" },
     { name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.2.0,<2" },
+    { name = "mistralai", marker = "extra == 'mistral'", specifier = ">=2.3.0,<3" },
     { name = "modal", marker = "extra == 'modal'", specifier = ">=1.0.0,<2" },
     { name = "numpy", marker = "extra == 'voice'", specifier = ">=1.24.0,<3" },
     { name = "openai", specifier = ">=2.21.0,<3" },
@@ -1846,7 +1861,7 @@ requires-dist = [
     { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
     { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
 ]
-provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "dingtalk", "feishu", "rl", "yc-bench", "all"]
+provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "dingtalk", "feishu", "rl", "yc-bench", "all"]
 
 [[package]]
 name = "hf-transfer"
@@ -2191,6 +2206,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" },
 ]
 
+[[package]]
+name = "jsonpath-python"
+version = "1.1.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2d/db/2f4ecc24da35c6142b39c353d5b7c16eef955cc94b35a48d3fa47996d7c3/jsonpath_python-1.1.5.tar.gz", hash = "sha256:ceea2efd9e56add09330a2c9631ea3d55297b9619348c1055e5bfb9cb0b8c538", size = 87352, upload-time = "2026-03-17T06:16:40.597Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/50/1a313fb700526b134c71eb8a225d8b83be0385dbb0204337b4379c698cef/jsonpath_python-1.1.5-py3-none-any.whl", hash = "sha256:a60315404d70a65e76c9a782c84e50600480221d94a58af47b7b4d437351cb4b", size = 14090, upload-time = "2026-03-17T06:16:39.152Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.26.0"
@@ -2616,6 +2640,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "mistralai"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "eval-type-backport" },
+    { name = "httpx" },
+    { name = "jsonpath-python" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-semantic-conventions" },
+    { name = "pydantic" },
+    { name = "python-dateutil" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4d/05/40c38c8893f0ec858756b30f4a939378fc62cf33565af538a843497f3f24/mistralai-2.3.0.tar.gz", hash = "sha256:eb371a9b3b62552f3d4a274ecf5b2c48b90fd3439ecd1425e7f5163cdd87e29a", size = 387145, upload-time = "2026-04-03T15:06:48.927Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bd/57/d06cbfd96ec6dc45d5c1fe9456f7fcfcb9549c9fa91e213561d1d88729e7/mistralai-2.3.0-py3-none-any.whl", hash = "sha256:22111747c215f1632141660151924f06579f87cd8db2649e0b1f87721d076851", size = 925544, upload-time = "2026-04-03T15:06:47.593Z" },
+]
+
 [[package]]
 name = "modal"
 version = "1.3.4"
@@ -3073,32 +3116,32 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "importlib-metadata" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2c/1d/4049a9e8698361cc1a1aa03a6c59e4fa4c71e0c0f94a30f988a6876a2ae6/opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f", size = 70851, upload-time = "2026-03-04T14:17:21.555Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/97/b9/3161be15bb8e3ad01be8be5a968a9237c3027c5be504362ff800fca3e442/opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c", size = 65767, upload-time = "2025-12-11T13:32:39.182Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/bf/93795954016c522008da367da292adceed71cca6ee1717e1d64c83089099/opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9", size = 68676, upload-time = "2026-03-04T14:17:01.24Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/df/d3f1ddf4bb4cb50ed9b1139cc7b1c54c34a1e7ce8fd1b9a37c0d1551a6bd/opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950", size = 66356, upload-time = "2025-12-11T13:32:17.304Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-proto" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/51/bc/1559d46557fe6eca0b46c88d4c2676285f1f3be2e8d06bb5d15fbffc814a/opentelemetry_exporter_otlp_proto_common-1.40.0.tar.gz", hash = "sha256:1cbee86a4064790b362a86601ee7934f368b81cd4cc2f2e163902a6e7818a0fa", size = 20416, upload-time = "2026-03-04T14:17:23.801Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e9/9d/22d241b66f7bbde88a3bfa6847a351d2c46b84de23e71222c6aae25c7050/opentelemetry_exporter_otlp_proto_common-1.39.1.tar.gz", hash = "sha256:763370d4737a59741c89a67b50f9e39271639ee4afc999dadfe768541c027464", size = 20409, upload-time = "2025-12-11T13:32:40.885Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/ca/8f122055c97a932311a3f640273f084e738008933503d0c2563cd5d591fc/opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149", size = 18369, upload-time = "2026-03-04T14:17:04.796Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/02/ffc3e143d89a27ac21fd557365b98bd0653b98de8a101151d5805b5d4c33/opentelemetry_exporter_otlp_proto_common-1.39.1-py3-none-any.whl", hash = "sha256:08f8a5862d64cc3435105686d0216c1365dc5701f86844a8cd56597d0c764fde", size = 18366, upload-time = "2025-12-11T13:32:20.2Z" },
 ]
 
 [[package]]
 name = "opentelemetry-exporter-otlp-proto-http"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "googleapis-common-protos" },
@@ -3109,14 +3152,14 @@ dependencies = [
     { name = "requests" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2e/fa/73d50e2c15c56be4d000c98e24221d494674b0cc95524e2a8cb3856d95a4/opentelemetry_exporter_otlp_proto_http-1.40.0.tar.gz", hash = "sha256:db48f5e0f33217588bbc00274a31517ba830da576e59503507c839b38fa0869c", size = 17772, upload-time = "2026-03-04T14:17:25.324Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/80/04/2a08fa9c0214ae38880df01e8bfae12b067ec0793446578575e5080d6545/opentelemetry_exporter_otlp_proto_http-1.39.1.tar.gz", hash = "sha256:31bdab9745c709ce90a49a0624c2bd445d31a28ba34275951a6a362d16a0b9cb", size = 17288, upload-time = "2025-12-11T13:32:42.029Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/3a/8865d6754e61c9fb170cdd530a124a53769ee5f740236064816eb0ca7301/opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069", size = 19960, upload-time = "2026-03-04T14:17:07.153Z" },
+    { url = "https://files.pythonhosted.org/packages/95/f1/b27d3e2e003cd9a3592c43d099d2ed8d0a947c15281bf8463a256db0b46c/opentelemetry_exporter_otlp_proto_http-1.39.1-py3-none-any.whl", hash = "sha256:d9f5207183dd752a412c4cd564ca8875ececba13be6e9c6c370ffb752fd59985", size = 19641, upload-time = "2025-12-11T13:32:22.248Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
@@ -3124,14 +3167,14 @@ dependencies = [
     { name = "packaging" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/da/37/6bf8e66bfcee5d3c6515b79cb2ee9ad05fe573c20f7ceb288d0e7eeec28c/opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7", size = 32606, upload-time = "2026-03-04T14:20:16.825Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/0f/7e6b713ac117c1f5e4e3300748af699b9902a2e5e34c9cf443dde25a01fa/opentelemetry_instrumentation-0.60b1.tar.gz", hash = "sha256:57ddc7974c6eb35865af0426d1a17132b88b2ed8586897fee187fd5b8944bd6a", size = 31706, upload-time = "2025-12-11T13:36:42.515Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d8/3e/f6f10f178b6316de67f0dfdbbb699a24fbe8917cf1743c1595fb9dcdd461/opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63", size = 33448, upload-time = "2026-03-04T14:19:02.447Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d2/6788e83c5c86a2690101681aeef27eeb2a6bf22df52d3f263a22cee20915/opentelemetry_instrumentation-0.60b1-py3-none-any.whl", hash = "sha256:04480db952b48fb1ed0073f822f0ee26012b7be7c3eac1a3793122737c78632d", size = 33096, upload-time = "2025-12-11T13:35:33.067Z" },
 ]
 
 [[package]]
 name = "opentelemetry-instrumentation-aiohttp-client"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
@@ -3140,57 +3183,57 @@ dependencies = [
     { name = "opentelemetry-util-http" },
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/24fed4de661de107f2426b28bbd87b51eaab28a2339b62f269a36ae24505/opentelemetry_instrumentation_aiohttp_client-0.61b0.tar.gz", hash = "sha256:c53ab3b88efcb7ce98c1129cc0389f0a1f214eb3675269b6c157770adcf47877", size = 19292, upload-time = "2026-03-04T14:20:18.408Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c0/79/95be90c555fd7efde79dcba36ea5c668815aa2d0a4250b63687e0f91c74a/opentelemetry_instrumentation_aiohttp_client-0.60b1.tar.gz", hash = "sha256:d0e7d5aa057791ca4d9090b0d3c9982f253c1a24b6bc78a734fc18d8dd97927b", size = 15907, upload-time = "2025-12-11T13:36:44.434Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/f3/1edc42716521a3f754ac32ffb908f102e0f131f8e43fcd9ab29cab286723/opentelemetry_instrumentation_aiohttp_client-0.61b0-py3-none-any.whl", hash = "sha256:09bc47514c162507b357366ce15578743fd6305078cf7d872db1c99c13fa6972", size = 14534, upload-time = "2026-03-04T14:19:05.165Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/f4/1a1ec632c86269750ae833c8fbdd4c8d15316eb1c21e3544e34791c805ee/opentelemetry_instrumentation_aiohttp_client-0.60b1-py3-none-any.whl", hash = "sha256:34c5097256a30b16c5a2a88a409ed82b92972a494c43212c85632d204a78c2a1", size = 12694, upload-time = "2025-12-11T13:35:35.034Z" },
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4c/77/dd38991db037fdfce45849491cb61de5ab000f49824a00230afb112a4392/opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd", size = 45667, upload-time = "2026-03-04T14:17:31.194Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b9/b2/189b2577dde745b15625b3214302605b1353436219d42b7912e77fa8dc24/opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f", size = 72073, upload-time = "2026-03-04T14:17:16.673Z" },
+    { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" },
 ]
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.40.0"
+version = "1.39.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-semantic-conventions" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/58/fd/3c3125b20ba18ce2155ba9ea74acb0ae5d25f8cd39cfd37455601b7955cc/opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2", size = 184252, upload-time = "2026-03-04T14:17:31.87Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/fb/c76080c9ba07e1e8235d24cdcc4d125ef7aa3edf23eb4e497c2e50889adc/opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6", size = 171460, upload-time = "2025-12-11T13:32:49.369Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/c5/6a852903d8bfac758c6dc6e9a68b015d3c33f2f1be5e9591e0f4b69c7e0a/opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1", size = 141951, upload-time = "2026-03-04T14:17:17.961Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/98/e91cf858f203d86f4eccdf763dcf01cf03f1dae80c3750f7e635bfa206b6/opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c", size = 132565, upload-time = "2025-12-11T13:32:35.069Z" },
 ]
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6d/c0/4ae7973f3c2cfd2b6e321f1675626f0dab0a97027cc7a297474c9c8f3d04/opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a", size = 145755, upload-time = "2026-03-04T14:17:32.664Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/91/df/553f93ed38bf22f4b999d9be9c185adb558982214f33eae539d3b5cd0858/opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953", size = 137935, upload-time = "2025-12-11T13:32:50.487Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b2/37/cc6a55e448deaa9b27377d087da8615a3416d8ad523d5960b78dbeadd02a/opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2", size = 231621, upload-time = "2026-03-04T14:17:19.33Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/5e/5958555e09635d09b75de3c4f8b9cae7335ca545d77392ffe7331534c402/opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb", size = 219982, upload-time = "2025-12-11T13:32:36.955Z" },
 ]
 
 [[package]]
 name = "opentelemetry-util-http"
-version = "0.61b0"
+version = "0.60b1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/57/3c/f0196223efc5c4ca19f8fad3d5462b171ac6333013335ce540c01af419e9/opentelemetry_util_http-0.61b0.tar.gz", hash = "sha256:1039cb891334ad2731affdf034d8fb8b48c239af9b6dd295e5fabd07f1c95572", size = 11361, upload-time = "2026-03-04T14:20:57.01Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/50/fc/c47bb04a1d8a941a4061307e1eddfa331ed4d0ab13d8a9781e6db256940a/opentelemetry_util_http-0.60b1.tar.gz", hash = "sha256:0d97152ca8c8a41ced7172d29d3622a219317f74ae6bb3027cfbdcf22c3cc0d6", size = 11053, upload-time = "2025-12-11T13:37:25.115Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/e5/c08aaaf2f64288d2b6ef65741d2de5454e64af3e050f34285fb1907492fe/opentelemetry_util_http-0.61b0-py3-none-any.whl", hash = "sha256:8e715e848233e9527ea47e275659ea60a57a75edf5206a3b937e236a6da5fc33", size = 9281, upload-time = "2026-03-04T14:20:08.364Z" },
+    { url = "https://files.pythonhosted.org/packages/16/5c/d3f1733665f7cd582ef0842fb1d2ed0bc1fba10875160593342d22bba375/opentelemetry_util_http-0.60b1-py3-none-any.whl", hash = "sha256:66381ba28550c91bee14dcba8979ace443444af1ed609226634596b4b0faf199", size = 8947, upload-time = "2025-12-11T13:36:37.151Z" },
 ]
 
 [[package]]
diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md
index ca64170d..0cd4ed69 100644
--- a/website/docs/user-guide/features/tts.md
+++ b/website/docs/user-guide/features/tts.md
@@ -102,11 +102,13 @@ Local transcription works out of the box when `faster-whisper` is installed. If
 ```yaml
 # In ~/.hermes/config.yaml
 stt:
-  provider: "local"           # "local" | "groq" | "openai"
+  provider: "local"           # "local" | "groq" | "openai" | "mistral"
   local:
     model: "base"             # tiny, base, small, medium, large-v3
   openai:
     model: "whisper-1"        # whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe
+  mistral:
+    model: "voxtral-mini-latest"  # voxtral-mini-latest, voxtral-mini-2602
 ```
 
 ### Provider Details
@@ -125,6 +127,8 @@ stt:
 
 **OpenAI API** — Accepts `VOICE_TOOLS_OPENAI_KEY` first and falls back to `OPENAI_API_KEY`. Supports `whisper-1`, `gpt-4o-mini-transcribe`, and `gpt-4o-transcribe`.
 
+**Mistral API (Voxtral Transcribe)** — Requires `MISTRAL_API_KEY`. Uses Mistral's [Voxtral Transcribe](https://docs.mistral.ai/capabilities/audio/speech_to_text/) models. Supports 13 languages, speaker diarization, and word-level timestamps. Install with `pip install hermes-agent[mistral]`.
+
 **Custom local CLI fallback** — Set `HERMES_LOCAL_STT_COMMAND` if you want Hermes to call a local transcription command directly. The command template supports `{input_path}`, `{output_dir}`, `{language}`, and `{model}` placeholders.
 
 ### Fallback Behavior
@@ -133,4 +137,5 @@ If your configured provider isn't available, Hermes automatically falls back:
 - **Local faster-whisper unavailable** → Tries a local `whisper` CLI or `HERMES_LOCAL_STT_COMMAND` before cloud providers
 - **Groq key not set** → Falls back to local transcription, then OpenAI
 - **OpenAI key not set** → Falls back to local transcription, then Groq
+- **Mistral key/SDK not set** → Skipped in auto-detect; falls through to next available provider
 - **Nothing available** → Voice messages pass through with an accurate note to the user

From d46db0a1b45b3f56e4fbb1f788f70d68482d1533 Mon Sep 17 00:00:00 2001
From: jjovalle99 <juan.ovalle@mistral.ai>
Date: Mon, 6 Apr 2026 18:27:56 +0100
Subject: [PATCH 147/154] fix(tools): use correct import path for mistralai SDK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mistralai v2.x is a namespace package — `Mistral` class lives at
`mistralai.client`, not at the top-level `mistralai` module. The
previous `from mistralai import Mistral` raises ImportError at runtime.

Update both production code and test fixture to use the correct path.
---
 tests/tools/test_transcription_tools.py | 2 +-
 tools/transcription_tools.py            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py
index 9f5fab62..f781c32b 100644
--- a/tests/tools/test_transcription_tools.py
+++ b/tests/tools/test_transcription_tools.py
@@ -875,7 +875,7 @@ def mock_mistral_module():
     mock_mistral_cls = MagicMock(return_value=mock_client)
     fake_module = MagicMock()
     fake_module.Mistral = mock_mistral_cls
-    with patch.dict("sys.modules", {"mistralai": fake_module}):
+    with patch.dict("sys.modules", {"mistralai": fake_module, "mistralai.client": fake_module}):
         yield mock_client
 
 
diff --git a/tools/transcription_tools.py b/tools/transcription_tools.py
index 296c74a2..d4f9145c 100644
--- a/tools/transcription_tools.py
+++ b/tools/transcription_tools.py
@@ -546,7 +546,7 @@ def _transcribe_mistral(file_path: str, model_name: str) -> Dict[str, Any]:
         return {"success": False, "transcript": "", "error": "MISTRAL_API_KEY not set"}
 
     try:
-        from mistralai import Mistral
+        from mistralai.client import Mistral
 
         with Mistral(api_key=api_key) as client:
             with open(file_path, "rb") as audio_file:

From 105caa001bfd8d2c856c776220bd1ed98e98c6f6 Mon Sep 17 00:00:00 2001
From: Teknium <teknium1@gmail.com>
Date: Wed, 8 Apr 2026 13:41:47 -0700
Subject: [PATCH 148/154] chore: regenerate uv.lock against current main

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index a3df304c..8bad8b38 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1652,7 +1652,7 @@ wheels = [
 
 [[package]]
 name = "hermes-agent"
-version = "0.7.0"
+version = "0.8.0"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },

From 8385f54e9842fae583cabf2f22e0c6184ef41066 Mon Sep 17 00:00:00 2001
From: yyovil <birdiegyal@gmail.com>
Date: Thu, 9 Apr 2026 03:39:39 +0530
Subject: [PATCH 149/154] fix(nix): preserve voice deps on aarch64-darwin via
 nixpkgs (#5079)

* Fixes the nix profile installation for hermes agent

(cherry picked from commit c822a082a8c0ce33f3d406e6b2ae1b2833071df0)

* Update nix/python.nix

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Applied gating for aarch64-darwin platform

Entire-Checkpoint: 1ab2074bd4f1

---------

Co-authored-by: yyovil <tanishq231003@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 nix/python.nix | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/nix/python.nix b/nix/python.nix
index 406e7aee..160b4ee7 100644
--- a/nix/python.nix
+++ b/nix/python.nix
@@ -6,14 +6,68 @@
   uv2nix,
   pyproject-nix,
   pyproject-build-systems,
+  stdenv,
 }:
 let
   workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
+  hacks = callPackage pyproject-nix.build.hacks { };
 
   overlay = workspace.mkPyprojectOverlay {
     sourcePreference = "wheel";
   };
 
+  isAarch64Darwin = stdenv.hostPlatform.system == "aarch64-darwin";
+
+  # Keep the workspace locked through uv2nix, but supply the local voice stack
+  # from nixpkgs so wheel-only transitive artifacts do not break evaluation.
+  mkPrebuiltPassthru = dependencies: {
+    inherit dependencies;
+    optional-dependencies = { };
+    dependency-groups = { };
+  };
+
+  mkPrebuiltOverride = final: from: dependencies:
+    hacks.nixpkgsPrebuilt {
+      inherit from;
+      prev = {
+        nativeBuildInputs = [ final.pyprojectHook ];
+        passthru = mkPrebuiltPassthru dependencies;
+      };
+    };
+
+  pythonPackageOverrides = final: _prev:
+    if isAarch64Darwin then {
+      numpy = mkPrebuiltOverride final python311.pkgs.numpy { };
+
+      av = mkPrebuiltOverride final python311.pkgs.av { };
+
+      humanfriendly = mkPrebuiltOverride final python311.pkgs.humanfriendly { };
+
+      coloredlogs = mkPrebuiltOverride final python311.pkgs.coloredlogs {
+        humanfriendly = [ ];
+      };
+
+      onnxruntime = mkPrebuiltOverride final python311.pkgs.onnxruntime {
+        coloredlogs = [ ];
+        numpy = [ ];
+        packaging = [ ];
+      };
+
+      ctranslate2 = mkPrebuiltOverride final python311.pkgs.ctranslate2 {
+        numpy = [ ];
+        pyyaml = [ ];
+      };
+
+      faster-whisper = mkPrebuiltOverride final python311.pkgs.faster-whisper {
+        av = [ ];
+        ctranslate2 = [ ];
+        huggingface-hub = [ ];
+        onnxruntime = [ ];
+        tokenizers = [ ];
+        tqdm = [ ];
+      };
+    } else {};
+
   pythonSet =
     (callPackage pyproject-nix.build.packages {
       python = python311;
@@ -21,6 +75,7 @@ let
       (lib.composeManyExtensions [
         pyproject-build-systems.overlays.default
         overlay
+        pythonPackageOverrides
       ]);
 in
 pythonSet.mkVirtualEnv "hermes-agent-env" {

From 8de91ce9d22fd979f6ab61a4e5ff9da5e1c69647 Mon Sep 17 00:00:00 2001
From: Siddharth Balyan <52913345+alt-glitch@users.noreply.github.com>
Date: Wed, 8 Apr 2026 15:39:53 -0700
Subject: [PATCH 150/154] fix(nix): make addToSystemPackages fully functional
 for interactive CLI (#6317)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(nix): export HERMES_HOME system-wide when addToSystemPackages is true

The `addToSystemPackages` option's documentation (and the `:::tip` block in
`website/docs/getting-started/nix-setup.md`) promises that enabling it both
puts the `hermes` CLI on PATH and sets `HERMES_HOME` system-wide so interactive
shells share state with the gateway service. The module only did the former,
so running `hermes` in a user shell silently created a separate `~/.hermes/`
directory instead of the managed `${stateDir}/.hermes`.

Implement the documented behavior by also setting
`environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes"` in the same
mkIf block, and update the option description to match.

Fixes #6044

* fix(nix): preserve group-readable permissions in managed mode

The NixOS module sets HERMES_HOME directories to 0750 and files to 0640
so interactive users in the hermes group can share state with the gateway
service. Two issues prevented this from working:

1. hermes_cli/config.py: _secure_dir() unconditionally chmod'd HERMES_HOME
   to 0700 on every startup, overwriting the NixOS module's 0750. Similarly,
   _secure_file() forced 0600 on config files. Both now skip in managed mode
   (detected via .managed marker or HERMES_MANAGED env var).

2. nix/nixosModules.nix: the .env file was created with 0600 (owner-only),
   while config.yaml was already 0640 (group-readable). Changed to 0640 for
   consistency — users granted hermes group membership should be able to read
   the managed .env.

Verified with a NixOS VM integration test: a normal user in the hermes group
can now run `hermes version` and `hermes config` against the managed
HERMES_HOME without PermissionError.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: zerone0x <zerone0x@users.noreply.github.com>
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 hermes_cli/config.py | 17 +++++++++++++++--
 nix/nixosModules.nix | 12 ++++++++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 350d99cf..6e86886b 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -157,7 +157,14 @@ def get_project_root() -> Path:
     return Path(__file__).parent.parent.resolve()
 
 def _secure_dir(path):
-    """Set directory to owner-only access (0700). No-op on Windows."""
+    """Set directory to owner-only access (0700). No-op on Windows.
+
+    Skipped in managed mode — the NixOS module sets group-readable
+    permissions (0750) so interactive users in the hermes group can
+    share state with the gateway service.
+    """
+    if is_managed():
+        return
     try:
         os.chmod(path, 0o700)
     except (OSError, NotImplementedError):
@@ -165,7 +172,13 @@ def _secure_dir(path):
 
 
 def _secure_file(path):
-    """Set file to owner-only read/write (0600). No-op on Windows."""
+    """Set file to owner-only read/write (0600). No-op on Windows.
+
+    Skipped in managed mode — the NixOS activation script sets
+    group-readable permissions (0640) on config files.
+    """
+    if is_managed():
+        return
     try:
         if os.path.exists(str(path)):
             os.chmod(path, 0o600)
diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix
index acf9a6e9..c961aa61 100644
--- a/nix/nixosModules.nix
+++ b/nix/nixosModules.nix
@@ -464,7 +464,11 @@
       addToSystemPackages = mkOption {
         type = types.bool;
         default = false;
-        description = "Add hermes CLI to environment.systemPackages.";
+        description = ''
+          Add the hermes CLI to environment.systemPackages and export
+          HERMES_HOME system-wide (via environment.variables) so interactive
+          shells share state with the gateway service.
+        '';
       };
 
       # ── OCI Container (opt-in) ──────────────────────────────────────────
@@ -545,8 +549,12 @@
       })
 
       # ── Host CLI ──────────────────────────────────────────────────────
+      # Add the hermes CLI to system PATH and export HERMES_HOME system-wide
+      # so interactive shells share state (sessions, skills, cron) with the
+      # gateway service instead of creating a separate ~/.hermes/.
       (lib.mkIf cfg.addToSystemPackages {
         environment.systemPackages = [ cfg.package ];
+        environment.variables.HERMES_HOME = "${cfg.stateDir}/.hermes";
       })
 
       # ── Directories ───────────────────────────────────────────────────
@@ -601,7 +609,7 @@
           # so this is the single source of truth for both native and container mode.
           ${lib.optionalString (cfg.environment != {} || cfg.environmentFiles != []) ''
             ENV_FILE="${cfg.stateDir}/.hermes/.env"
-            install -o ${cfg.user} -g ${cfg.group} -m 0600 /dev/null "$ENV_FILE"
+            install -o ${cfg.user} -g ${cfg.group} -m 0640 /dev/null "$ENV_FILE"
             cat > "$ENV_FILE" <<'HERMES_NIX_ENV_EOF'
 ${envFileContent}
 HERMES_NIX_ENV_EOF

From 7156f8d866a1f064b96fcf2c7f05fa64ed74d238 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 16:37:05 -0700
Subject: [PATCH 151/154] =?UTF-8?q?fix:=20CI=20test=20failures=20=E2=80=94?=
 =?UTF-8?q?=20metadata=20key,=20cli=20console,=20docker=20env,=20vision=20?=
 =?UTF-8?q?order=20(#6294)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes 9 test failures on current main, incorporating ideas from PR stack
#6219-#6222 by xinbenlv with corrections:

- model_metadata: sync HF context length key casing
  (minimaxai/minimax-m2.5 → MiniMaxAI/MiniMax-M2.5)

- cli.py: route quick command error output through self.console
  instead of creating a new ChatConsole() instance

- docker.py: explicit docker_forward_env entries now bypass the
  Hermes secret blocklist (intentional opt-in wins over generic filter)

- auxiliary_client: revert _read_main_provider() to simple
  provider.strip().lower() — the _normalize_aux_provider() call
  introduced in 5c03f2e7 stripped the custom: prefix, breaking
  named custom provider resolution

- auxiliary_client: flip vision auto-detection order to
  active provider → OpenRouter → Nous → stop (was OR → Nous → active)

- test: update vision priority test to match new order

Based on PR #6219-#6222 by xinbenlv.
---
 agent/auxiliary_client.py            | 73 +++++++++++++++++-----------
 agent/model_metadata.py              |  2 +-
 cli.py                               | 12 ++---
 tests/agent/test_auxiliary_client.py | 11 +++--
 tools/environments/docker.py         | 11 +++--
 5 files changed, 64 insertions(+), 45 deletions(-)

diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 2b99ac07..f743a64e 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -834,7 +834,7 @@ def _read_main_provider() -> str:
         if isinstance(model_cfg, dict):
             provider = model_cfg.get("provider", "")
             if isinstance(provider, str) and provider.strip():
-                return _normalize_aux_provider(provider)
+                return provider.strip().lower()
     except Exception:
         pass
     return ""
@@ -1470,19 +1470,25 @@ def _preferred_main_vision_provider() -> Optional[str]:
 def get_available_vision_backends() -> List[str]:
     """Return the currently available vision backends in auto-selection order.
 
-    Order: OpenRouter → Nous → active provider.  This is the single source
-    of truth for setup, tool gating, and runtime auto-routing of vision tasks.
+    Order: active provider → OpenRouter → Nous → stop.  This is the single
+    source of truth for setup, tool gating, and runtime auto-routing of
+    vision tasks.
     """
-    available = [p for p in _VISION_AUTO_PROVIDER_ORDER
-                 if _strict_vision_backend_available(p)]
-    # Also check the user's active provider (may be DeepSeek, Alibaba, named
-    # custom, etc.) — resolve_provider_client handles all provider types.
+    available: List[str] = []
+    # 1. Active provider — if the user configured a provider, try it first.
     main_provider = _read_main_provider()
-    if (main_provider and main_provider not in ("auto", "")
-            and main_provider not in available):
-        client, _ = resolve_provider_client(main_provider, _read_main_model())
-        if client is not None:
-            available.append(main_provider)
+    if main_provider and main_provider not in ("auto", ""):
+        if main_provider in _VISION_AUTO_PROVIDER_ORDER:
+            if _strict_vision_backend_available(main_provider):
+                available.append(main_provider)
+        else:
+            client, _ = resolve_provider_client(main_provider, _read_main_model())
+            if client is not None:
+                available.append(main_provider)
+    # 2. OpenRouter, 3. Nous — skip if already covered by main provider.
+    for p in _VISION_AUTO_PROVIDER_ORDER:
+        if p not in available and _strict_vision_backend_available(p):
+            available.append(p)
     return available
 
 
@@ -1529,28 +1535,37 @@ def resolve_vision_provider_client(
 
     if requested == "auto":
         # Vision auto-detection order:
-        #   1. OpenRouter  (known vision-capable default model)
-        #   2. Nous Portal (known vision-capable default model)
-        #   3. Active provider + model (user's main chat config)
+        #   1. Active provider + model (user's main chat config)
+        #   2. OpenRouter  (known vision-capable default model)
+        #   3. Nous Portal (known vision-capable default model)
         #   4. Stop
-        for candidate in _VISION_AUTO_PROVIDER_ORDER:
-            sync_client, default_model = _resolve_strict_vision_backend(candidate)
-            if sync_client is not None:
-                return _finalize(candidate, sync_client, default_model)
-
-        # Fall back to the user's active provider + model.
         main_provider = _read_main_provider()
         main_model = _read_main_model()
         if main_provider and main_provider not in ("auto", ""):
-            sync_client, resolved_model = resolve_provider_client(
-                main_provider, main_model)
+            if main_provider in _VISION_AUTO_PROVIDER_ORDER:
+                # Known strict backend — use its defaults.
+                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
+                if sync_client is not None:
+                    return _finalize(main_provider, sync_client, default_model)
+            else:
+                # Exotic provider (DeepSeek, Alibaba, named custom, etc.)
+                rpc_client, rpc_model = resolve_provider_client(
+                    main_provider, main_model)
+                if rpc_client is not None:
+                    logger.info(
+                        "Vision auto-detect: using active provider %s (%s)",
+                        main_provider, rpc_model or main_model,
+                    )
+                    return _finalize(
+                        main_provider, rpc_client, rpc_model or main_model)
+
+        # Fall back through aggregators.
+        for candidate in _VISION_AUTO_PROVIDER_ORDER:
+            if candidate == main_provider:
+                continue  # already tried above
+            sync_client, default_model = _resolve_strict_vision_backend(candidate)
             if sync_client is not None:
-                logger.info(
-                    "Vision auto-detect: using active provider %s (%s)",
-                    main_provider, resolved_model or main_model,
-                )
-                return _finalize(
-                    main_provider, sync_client, resolved_model or main_model)
+                return _finalize(candidate, sync_client, default_model)
 
         logger.debug("Auxiliary vision client: none available")
         return None, None, None
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 14364a1e..5b1d3376 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -136,7 +136,7 @@ DEFAULT_CONTEXT_LENGTHS = {
     "deepseek-ai/DeepSeek-V3.2": 65536,
     "moonshotai/Kimi-K2.5": 262144,
     "moonshotai/Kimi-K2-Thinking": 262144,
-    "minimaxai/minimax-m2.5": 1048576,
+    "MiniMaxAI/MiniMax-M2.5": 1048576,
     "XiaomiMiMo/MiMo-V2-Flash": 32768,
     "mimo-v2-pro": 1048576,
     "mimo-v2-omni": 1048576,
diff --git a/cli.py b/cli.py
index f00e6b7f..f0edf67e 100644
--- a/cli.py
+++ b/cli.py
@@ -4668,13 +4668,13 @@ class HermesCLI:
                             if output:
                                 self.console.print(_rich_text_from_ansi(output))
                             else:
-                                ChatConsole().print("[dim]Command returned no output[/]")
+                                self.console.print("[dim]Command returned no output[/]")
                         except subprocess.TimeoutExpired:
-                            ChatConsole().print("[bold red]Quick command timed out (30s)[/]")
+                            self.console.print("[bold red]Quick command timed out (30s)[/]")
                         except Exception as e:
-                            ChatConsole().print(f"[bold red]Quick command error: {e}[/]")
+                            self.console.print(f"[bold red]Quick command error: {e}[/]")
                     else:
-                        ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
+                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no command defined[/]")
                 elif qcmd.get("type") == "alias":
                     target = qcmd.get("target", "").strip()
                     if target:
@@ -4683,9 +4683,9 @@ class HermesCLI:
                         aliased_command = f"{target} {user_args}".strip()
                         return self.process_command(aliased_command)
                     else:
-                        ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
+                        self.console.print(f"[bold red]Quick command '{base_cmd}' has no target defined[/]")
                 else:
-                    ChatConsole().print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
+                    self.console.print(f"[bold red]Quick command '{base_cmd}' has unsupported type (supported: 'exec', 'alias')[/]")
             # Check for plugin-registered slash commands
             elif base_cmd.lstrip("/") in _get_plugin_cmd_handler_names():
                 from hermes_cli.plugins import get_plugin_command_handler
diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py
index c7cd12ae..dd02ad23 100644
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@@ -737,8 +737,8 @@ class TestAuxiliaryPoolAwareness:
         assert client is not None
         assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
 
-    def test_vision_auto_prefers_openrouter_over_active_provider(self, monkeypatch):
-        """OpenRouter is tried before the active provider in vision auto."""
+    def test_vision_auto_prefers_active_provider_over_openrouter(self, monkeypatch):
+        """Active provider is tried before OpenRouter in vision auto."""
         monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
         monkeypatch.setenv("ANTHROPIC_API_KEY", "***")
 
@@ -746,12 +746,13 @@ class TestAuxiliaryPoolAwareness:
             patch("agent.auxiliary_client._read_nous_auth", return_value=None),
             patch("agent.auxiliary_client._read_main_provider", return_value="anthropic"),
             patch("agent.auxiliary_client._read_main_model", return_value="claude-sonnet-4"),
-            patch("agent.auxiliary_client.OpenAI") as mock_openai,
+            patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
+            patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="***"),
         ):
             provider, client, model = resolve_vision_provider_client()
 
-        # OpenRouter should win over anthropic active provider
-        assert provider == "openrouter"
+        # Active provider should win over OpenRouter
+        assert provider == "anthropic"
 
     def test_vision_auto_uses_named_custom_as_active_provider(self, monkeypatch):
         """Named custom provider works as active provider fallback in vision auto."""
diff --git a/tools/environments/docker.py b/tools/environments/docker.py
index 4d3b6f50..b97040d4 100644
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -505,14 +505,17 @@ class DockerEnvironment(BaseEnvironment):
         # (dynamic from host process).  Forward values take precedence.
         exec_env: dict[str, str] = dict(self._env)
 
-        forward_keys = set(self._forward_env)
+        explicit_forward_keys = set(self._forward_env)
+        passthrough_keys: set[str] = set()
         try:
             from tools.env_passthrough import get_all_passthrough
-            forward_keys |= get_all_passthrough()
+            passthrough_keys = set(get_all_passthrough())
         except Exception:
             pass
-        # Strip Hermes-managed secrets so they never leak into the container.
-        forward_keys -= _HERMES_PROVIDER_ENV_BLOCKLIST
+        # Explicit docker_forward_env entries are an intentional opt-in and must
+        # win over the generic Hermes secret blocklist. Only implicit passthrough
+        # keys are filtered.
+        forward_keys = explicit_forward_keys | (passthrough_keys - _HERMES_PROVIDER_ENV_BLOCKLIST)
         hermes_env = _load_hermes_env_vars() if forward_keys else {}
         for key in sorted(forward_keys):
             value = os.getenv(key)

From 20a5e589c66ad4e0f456d807b67cd7b14c8e220f Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 16:39:17 -0700
Subject: [PATCH 152/154] docs: clarify that provider "main" is for auxiliary
 tasks only (#6291)

Users were setting model.provider to "main" after reading the auxiliary
provider docs, causing "Unknown provider" errors. The "main" alias is
only valid inside auxiliary:, compression:, and fallback_model: configs
where it means "use the same provider as my main agent chat."

Added warning admonitions and inline clarifications to:
- configuration.md: Auxiliary Models provider list and Provider Options table
- fallback-providers.md: Provider Options for Auxiliary Tasks table

Reported by community member cn on Discord.
---
 website/docs/user-guide/configuration.md               | 10 ++++++++--
 website/docs/user-guide/features/fallback-providers.md |  4 +++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 468806b8..a31fb700 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -553,7 +553,11 @@ Every model slot in Hermes — auxiliary tasks, compression, fallback — uses t
 
 When `base_url` is set, Hermes ignores the provider and calls that endpoint directly (using `api_key` or `OPENAI_API_KEY` for auth). When only `provider` is set, Hermes uses that provider's built-in auth and base URL.
 
-Available providers: `auto`, `openrouter`, `nous`, `codex`, `copilot`, `anthropic`, `main`, `zai`, `kimi-coding`, `minimax`, any provider registered in the [provider registry](/docs/reference/environment-variables), or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
+Available providers for auxiliary tasks: `auto`, `openrouter`, `nous`, `codex`, `copilot`, `anthropic`, `main`, `zai`, `kimi-coding`, `minimax`, any provider registered in the [provider registry](/docs/reference/environment-variables), or any named custom provider from your `custom_providers` list (e.g. `provider: "beans"`).
+
+:::warning `"main"` is for auxiliary tasks only
+The `"main"` provider option means "use whatever provider my main agent uses" — it's only valid inside `auxiliary:`, `compression:`, and `fallback_model:` configs. It is **not** a valid value for your top-level `model.provider` setting. If you use a custom OpenAI-compatible endpoint, set `provider: custom` in your `model:` section. See [AI Providers](/docs/integrations/providers) for all main model provider options.
+:::
 
 ### Full auxiliary config reference
 
@@ -647,13 +651,15 @@ AUXILIARY_VISION_MODEL=openai/gpt-4o
 
 ### Provider Options
 
+These options apply to **auxiliary task configs** (`auxiliary:`, `compression:`, `fallback_model:`), not to your main `model.provider` setting.
+
 | Provider | Description | Requirements |
 |----------|-------------|-------------|
 | `"auto"` | Best available (default). Vision tries OpenRouter → Nous → Codex. | — |
 | `"openrouter"` | Force OpenRouter — routes to any model (Gemini, GPT-4o, Claude, etc.) | `OPENROUTER_API_KEY` |
 | `"nous"` | Force Nous Portal | `hermes auth` |
 | `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex |
-| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. | Custom endpoint credentials + base URL |
+| `"main"` | Use your active custom/main endpoint. This can come from `OPENAI_BASE_URL` + `OPENAI_API_KEY` or from a custom endpoint saved via `hermes model` / `config.yaml`. Works with OpenAI, local models, or any OpenAI-compatible API. **Auxiliary tasks only — not valid for `model.provider`.** | Custom endpoint credentials + base URL |
 
 ### Common Setups
 
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index 8868162e..39c907c7 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -240,13 +240,15 @@ All three — auxiliary, compression, fallback — work the same way: set `provi
 
 ### Provider Options for Auxiliary Tasks
 
+These options apply to `auxiliary:`, `compression:`, and `fallback_model:` configs only — `"main"` is **not** a valid value for your top-level `model.provider`. For custom endpoints, use `provider: custom` in your `model:` section (see [AI Providers](/docs/integrations/providers)).
+
 | Provider | Description | Requirements |
 |----------|-------------|-------------|
 | `"auto"` | Try providers in order until one works (default) | At least one provider configured |
 | `"openrouter"` | Force OpenRouter | `OPENROUTER_API_KEY` |
 | `"nous"` | Force Nous Portal | `hermes auth` |
 | `"codex"` | Force Codex OAuth | `hermes model` → Codex |
-| `"main"` | Use whatever provider the main agent uses | Active main provider configured |
+| `"main"` | Use whatever provider the main agent uses (auxiliary tasks only) | Active main provider configured |
 | `"anthropic"` | Force Anthropic native | `ANTHROPIC_API_KEY` or Claude Code credentials |
 
 ### Direct Endpoint Override

From 875a72e4c86aa3b522fcb97b194042e4264dd076 Mon Sep 17 00:00:00 2001
From: kshitijk4poor <kshitijk4poor@users.noreply.github.com>
Date: Wed, 8 Apr 2026 13:51:41 -0700
Subject: [PATCH 153/154] fix: normalize httpx.URL base_url + strip thinking
 signatures for third-party endpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two linked fixes for MiniMax Anthropic-compatible fallback:

1. Normalize httpx.URL to str before calling .rstrip() in auth/provider
   detection helpers. Some client objects expose base_url as httpx.URL,
   not str — crashed with AttributeError in _requires_bearer_auth() and
   _is_third_party_anthropic_endpoint(). Also fixes _try_activate_fallback()
   to use the already-stringified fb_base_url instead of raw httpx.URL.

2. Strip Anthropic-proprietary thinking block signatures when targeting
   third-party Anthropic-compatible endpoints (MiniMax, Azure AI Foundry,
   self-hosted proxies). These endpoints cannot validate Anthropic's
   signatures and reject them with HTTP 400 'Invalid signature in
   thinking block'. Now threads base_url through convert_messages_to_anthropic()
   → build_anthropic_kwargs() so signature management is endpoint-aware.

Based on PR #4945 by kshitijk4poor (rstrip fix).
Fixes #4944.
---
 agent/anthropic_adapter.py | 63 +++++++++++++++++++++++++++++---------
 run_agent.py               |  3 +-
 2 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py
index 2d6c2dd8..fa5e391a 100644
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -163,6 +163,17 @@ def _is_oauth_token(key: str) -> bool:
     return True
 
 
+def _normalize_base_url_text(base_url) -> str:
+    """Normalize SDK/base transport URL values to a plain string for inspection.
+
+    Some client objects expose ``base_url`` as an ``httpx.URL`` instead of a raw
+    string.  Provider/auth detection should accept either shape.
+    """
+    if not base_url:
+        return ""
+    return str(base_url).strip()
+
+
 def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
     """Return True for non-Anthropic endpoints using the Anthropic Messages API.
 
@@ -170,9 +181,10 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
     with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
     detection should be skipped for these endpoints.
     """
-    if not base_url:
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
         return False  # No base_url = direct Anthropic API
-    normalized = base_url.rstrip("/").lower()
+    normalized = normalized.rstrip("/").lower()
     if "anthropic.com" in normalized:
         return False  # Direct Anthropic API — OAuth applies
     return True  # Any other endpoint is a third-party proxy
@@ -182,12 +194,13 @@ def _requires_bearer_auth(base_url: str | None) -> bool:
     """Return True for Anthropic-compatible providers that require Bearer auth.
 
     Some third-party /anthropic endpoints implement Anthropic's Messages API but
-    require Authorization: Bearer instead of Anthropic's native x-api-key header.
+    require Authorization: Bearer *** of Anthropic's native x-api-key header.
     MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
     """
-    if not base_url:
+    normalized = _normalize_base_url_text(base_url)
+    if not normalized:
         return False
-    normalized = base_url.rstrip("/").lower()
+    normalized = normalized.rstrip("/").lower()
     return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
 
 
@@ -203,13 +216,14 @@ def build_anthropic_client(api_key: str, base_url: str = None):
         )
     from httpx import Timeout
 
+    normalized_base_url = _normalize_base_url_text(base_url)
     kwargs = {
         "timeout": Timeout(timeout=900.0, connect=10.0),
     }
-    if base_url:
-        kwargs["base_url"] = base_url
+    if normalized_base_url:
+        kwargs["base_url"] = normalized_base_url
 
-    if _requires_bearer_auth(base_url):
+    if _requires_bearer_auth(normalized_base_url):
         # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
         # Authorization: Bearer even for regular API keys. Route those endpoints
         # through auth_token so the SDK sends Bearer auth instead of x-api-key.
@@ -942,12 +956,18 @@ def _convert_content_to_anthropic(content: Any) -> Any:
 
 def convert_messages_to_anthropic(
     messages: List[Dict],
+    base_url: str | None = None,
 ) -> Tuple[Optional[Any], List[Dict]]:
     """Convert OpenAI-format messages to Anthropic format.
 
     Returns (system_prompt, anthropic_messages).
     System messages are extracted since Anthropic takes them as a separate param.
     system_prompt is a string or list of content blocks (when cache_control present).
+
+    When *base_url* is provided and points to a third-party Anthropic-compatible
+    endpoint, all thinking block signatures are stripped.  Signatures are
+    Anthropic-proprietary — third-party endpoints cannot validate them and will
+    reject them with HTTP 400 "Invalid signature in thinking block".
     """
     system = None
     result = []
@@ -1134,7 +1154,14 @@ def convert_messages_to_anthropic(
     # orphan stripping, message merging) invalidates the signature,
     # causing HTTP 400 "Invalid signature in thinking block".
     #
-    # Strategy (following clawdbot/OpenClaw pattern):
+    # Signatures are Anthropic-proprietary.  Third-party endpoints
+    # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
+    # them and will reject them outright.  When targeting a third-party
+    # endpoint, strip ALL thinking/redacted_thinking blocks from every
+    # assistant message — the third-party will generate its own
+    # thinking blocks if it supports extended thinking.
+    #
+    # For direct Anthropic (strategy following clawdbot/OpenClaw):
     # 1. Strip thinking/redacted_thinking from all assistant messages
     #    EXCEPT the last one — preserves reasoning continuity on the
     #    current tool-use chain while avoiding stale signature errors.
@@ -1143,6 +1170,7 @@ def convert_messages_to_anthropic(
     # 3. Strip cache_control from thinking/redacted_thinking blocks —
     #    cache markers can interfere with signature validation.
     _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
+    _is_third_party = _is_third_party_anthropic_endpoint(base_url)
 
     last_assistant_idx = None
     for i in range(len(result) - 1, -1, -1):
@@ -1154,16 +1182,19 @@ def convert_messages_to_anthropic(
         if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
             continue
 
-        if idx != last_assistant_idx:
-            # Strip ALL thinking blocks from non-latest assistant messages
+        if _is_third_party or idx != last_assistant_idx:
+            # Third-party endpoint: strip ALL thinking blocks from every
+            # assistant message — signatures are Anthropic-proprietary.
+            # Direct Anthropic: strip from non-latest assistant messages only.
             stripped = [
                 b for b in m["content"]
                 if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
             ]
             m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
         else:
-            # Latest assistant: keep signed thinking blocks for reasoning
-            # continuity; downgrade unsigned ones to plain text.
+            # Latest assistant on direct Anthropic: keep signed thinking
+            # blocks for reasoning continuity; downgrade unsigned ones to
+            # plain text.
             new_content = []
             for b in m["content"]:
                 if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
@@ -1203,6 +1234,7 @@ def build_anthropic_kwargs(
     is_oauth: bool = False,
     preserve_dots: bool = False,
     context_length: Optional[int] = None,
+    base_url: str | None = None,
 ) -> Dict[str, Any]:
     """Build kwargs for anthropic.messages.create().
 
@@ -1216,8 +1248,11 @@ def build_anthropic_kwargs(
 
     When *preserve_dots* is True, model name dots are not converted to hyphens
     (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
+
+    When *base_url* points to a third-party Anthropic-compatible endpoint,
+    thinking block signatures are stripped (they are Anthropic-proprietary).
     """
-    system, anthropic_messages = convert_messages_to_anthropic(messages)
+    system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
     anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
 
     model = normalize_model_name(model, preserve_dots=preserve_dots)
diff --git a/run_agent.py b/run_agent.py
index dc423532..f57072e9 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -4895,7 +4895,7 @@ class AIAgent:
                 effective_key = (fb_client.api_key or resolve_anthropic_token() or "") if fb_provider == "anthropic" else (fb_client.api_key or "")
                 self.api_key = effective_key
                 self._anthropic_api_key = effective_key
-                self._anthropic_base_url = getattr(fb_client, "base_url", None)
+                self._anthropic_base_url = fb_base_url
                 self._anthropic_client = build_anthropic_client(effective_key, self._anthropic_base_url)
                 self._is_anthropic_oauth = _is_oauth_token(effective_key)
                 self.client = None
@@ -5334,6 +5334,7 @@ class AIAgent:
                 is_oauth=self._is_anthropic_oauth,
                 preserve_dots=self._anthropic_preserve_dots(),
                 context_length=ctx_len,
+                base_url=getattr(self, "_anthropic_base_url", None),
             )
 
         if self.api_mode == "codex_responses":

From 7d26feb9a3327204447af9dc0045c44ca942c1e9 Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Wed, 8 Apr 2026 17:08:40 -0700
Subject: [PATCH 154/154] feat(discord): add DISCORD_REPLY_TO_MODE setting
 (#6333)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add configurable reply-reference behavior for Discord, matching the
existing Telegram (TELEGRAM_REPLY_TO_MODE) and Mattermost
(MATTERMOST_REPLY_MODE) implementations.

Modes:
- 'off': never reply-reference the original message
- 'first': reply-reference on first chunk only (default, current behavior)
- 'all': reply-reference on every chunk

Set DISCORD_REPLY_TO_MODE=off in .env to disable reply-to messages.

Changes:
- gateway/config.py: parse DISCORD_REPLY_TO_MODE env var
- gateway/platforms/discord.py: read reply_to_mode from config, respect
  it in send() — skip fetch_message entirely when 'off'
- hermes_cli/config.py: add to OPTIONAL_ENV_VARS for hermes setup
- 23 tests covering config, send behavior, env var override
- docs: discord.md env var table + environment-variables.md reference

Closes community request from Stuart on Discord.
---
 gateway/config.py                             |   7 +
 gateway/platforms/discord.py                  |  10 +-
 hermes_cli/config.py                          |   7 +
 tests/gateway/test_discord_reply_mode.py      | 277 ++++++++++++++++++
 .../docs/reference/environment-variables.md   |   1 +
 website/docs/user-guide/messaging/discord.md  |   1 +
 6 files changed, 301 insertions(+), 2 deletions(-)
 create mode 100644 tests/gateway/test_discord_reply_mode.py

diff --git a/gateway/config.py b/gateway/config.py
index ab0d7c11..047ad542 100644
--- a/gateway/config.py
+++ b/gateway/config.py
@@ -712,6 +712,13 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
             name=os.getenv("DISCORD_HOME_CHANNEL_NAME", "Home"),
         )
     
+    # Reply threading mode for Discord (off/first/all)
+    discord_reply_mode = os.getenv("DISCORD_REPLY_TO_MODE", "").lower()
+    if discord_reply_mode in ("off", "first", "all"):
+        if Platform.DISCORD not in config.platforms:
+            config.platforms[Platform.DISCORD] = PlatformConfig()
+        config.platforms[Platform.DISCORD].reply_to_mode = discord_reply_mode
+    
     # WhatsApp (typically uses different auth mechanism)
     whatsapp_enabled = os.getenv("WHATSAPP_ENABLED", "").lower() in ("true", "1", "yes")
     if whatsapp_enabled:
diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py
index 703c7549..b802f571 100644
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -455,6 +455,9 @@ class DiscordAdapter(BasePlatformAdapter):
         self._seen_messages: Dict[str, float] = {}
         self._SEEN_TTL = 300   # 5 minutes
         self._SEEN_MAX = 2000  # prune threshold
+        # Reply threading mode: "off" (no replies), "first" (reply on first
+        # chunk only, default), "all" (reply-reference on every chunk).
+        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
 
     async def connect(self) -> bool:
         """Connect to Discord and start receiving events."""
@@ -774,7 +777,7 @@ class DiscordAdapter(BasePlatformAdapter):
             message_ids = []
             reference = None
 
-            if reply_to:
+            if reply_to and self._reply_to_mode != "off":
                 try:
                     ref_msg = await channel.fetch_message(int(reply_to))
                     reference = ref_msg
@@ -782,7 +785,10 @@ class DiscordAdapter(BasePlatformAdapter):
                     logger.debug("Could not fetch reply-to message: %s", e)
 
             for i, chunk in enumerate(chunks):
-                chunk_reference = reference if i == 0 else None
+                if self._reply_to_mode == "all":
+                    chunk_reference = reference
+                else:  # "first" (default) or "off"
+                    chunk_reference = reference if i == 0 else None
                 try:
                     msg = await channel.send(
                         content=chunk,
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 6e86886b..7c860f15 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -999,6 +999,13 @@ OPTIONAL_ENV_VARS = {
         "password": False,
         "category": "messaging",
     },
+    "DISCORD_REPLY_TO_MODE": {
+        "description": "Discord reply threading mode: 'off' (no reply references), 'first' (reply on first message only, default), 'all' (reply on every chunk)",
+        "prompt": "Discord reply mode (off/first/all)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
     "SLACK_BOT_TOKEN": {
         "description": "Slack bot token (xoxb-). Get from OAuth & Permissions after installing your app. "
                        "Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
diff --git a/tests/gateway/test_discord_reply_mode.py b/tests/gateway/test_discord_reply_mode.py
new file mode 100644
index 00000000..5a9bb9cd
--- /dev/null
+++ b/tests/gateway/test_discord_reply_mode.py
@@ -0,0 +1,277 @@
+"""Tests for Discord reply_to_mode functionality.
+
+Covers the threading behavior control for multi-chunk replies:
+- "off": Never reply-reference to original message
+- "first": Only first chunk uses reply reference (default)
+- "all": All chunks reply-reference the original message
+"""
+import os
+import sys
+from types import SimpleNamespace
+from unittest.mock import MagicMock, AsyncMock, patch
+
+import pytest
+
+from gateway.config import PlatformConfig, GatewayConfig, Platform, _apply_env_overrides
+
+
+def _ensure_discord_mock():
+    """Install a mock discord module when discord.py isn't available."""
+    if "discord" in sys.modules and hasattr(sys.modules["discord"], "__file__"):
+        return
+
+    discord_mod = MagicMock()
+    discord_mod.Intents.default.return_value = MagicMock()
+    discord_mod.Client = MagicMock
+    discord_mod.File = MagicMock
+    discord_mod.DMChannel = type("DMChannel", (), {})
+    discord_mod.Thread = type("Thread", (), {})
+    discord_mod.ForumChannel = type("ForumChannel", (), {})
+    discord_mod.ui = SimpleNamespace(View=object, button=lambda *a, **k: (lambda fn: fn), Button=object)
+    discord_mod.ButtonStyle = SimpleNamespace(success=1, primary=2, secondary=2, danger=3, green=1, grey=2, blurple=2, red=3)
+    discord_mod.Color = SimpleNamespace(orange=lambda: 1, green=lambda: 2, blue=lambda: 3, red=lambda: 4, purple=lambda: 5)
+    discord_mod.Interaction = object
+    discord_mod.Embed = MagicMock
+    discord_mod.app_commands = SimpleNamespace(
+        describe=lambda **kwargs: (lambda fn: fn),
+        choices=lambda **kwargs: (lambda fn: fn),
+        Choice=lambda **kwargs: SimpleNamespace(**kwargs),
+    )
+
+    ext_mod = MagicMock()
+    commands_mod = MagicMock()
+    commands_mod.Bot = MagicMock
+    ext_mod.commands = commands_mod
+
+    sys.modules.setdefault("discord", discord_mod)
+    sys.modules.setdefault("discord.ext", ext_mod)
+    sys.modules.setdefault("discord.ext.commands", commands_mod)
+
+
+_ensure_discord_mock()
+
+from gateway.platforms.discord import DiscordAdapter  # noqa: E402
+
+
+@pytest.fixture()
+def adapter_factory():
+    """Factory to create DiscordAdapter with custom reply_to_mode."""
+    def create(reply_to_mode: str = "first"):
+        config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
+        return DiscordAdapter(config)
+    return create
+
+
+class TestReplyToModeConfig:
+    """Tests for reply_to_mode configuration loading."""
+
+    def test_default_mode_is_first(self, adapter_factory):
+        adapter = adapter_factory()
+        assert adapter._reply_to_mode == "first"
+
+    def test_off_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="off")
+        assert adapter._reply_to_mode == "off"
+
+    def test_first_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="first")
+        assert adapter._reply_to_mode == "first"
+
+    def test_all_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="all")
+        assert adapter._reply_to_mode == "all"
+
+    def test_invalid_mode_stored_as_is(self, adapter_factory):
+        """Invalid modes are stored but send() handles them gracefully."""
+        adapter = adapter_factory(reply_to_mode="invalid")
+        assert adapter._reply_to_mode == "invalid"
+
+    def test_none_mode_defaults_to_first(self):
+        config = PlatformConfig(enabled=True, token="test-token")
+        adapter = DiscordAdapter(config)
+        assert adapter._reply_to_mode == "first"
+
+    def test_empty_string_mode_defaults_to_first(self):
+        config = PlatformConfig(enabled=True, token="test-token", reply_to_mode="")
+        adapter = DiscordAdapter(config)
+        assert adapter._reply_to_mode == "first"
+
+
+def _make_discord_adapter(reply_to_mode: str = "first"):
+    """Create a DiscordAdapter with mocked client and channel for send() tests."""
+    config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
+    adapter = DiscordAdapter(config)
+
+    # Mock the Discord client and channel
+    mock_channel = AsyncMock()
+    ref_message = MagicMock()
+    mock_channel.fetch_message = AsyncMock(return_value=ref_message)
+
+    sent_msg = MagicMock()
+    sent_msg.id = 42
+    mock_channel.send = AsyncMock(return_value=sent_msg)
+
+    mock_client = MagicMock()
+    mock_client.get_channel = MagicMock(return_value=mock_channel)
+
+    adapter._client = mock_client
+    return adapter, mock_channel, ref_message
+
+
+class TestSendWithReplyToMode:
+    """Tests for send() method respecting reply_to_mode."""
+
+    @pytest.mark.asyncio
+    async def test_off_mode_no_reply_reference(self):
+        adapter, channel, ref_msg = _make_discord_adapter("off")
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+
+        await adapter.send("12345", "test content", reply_to="999")
+
+        # Should never try to fetch the reference message
+        channel.fetch_message.assert_not_called()
+        # All chunks sent without reference
+        for call in channel.send.call_args_list:
+            assert call.kwargs.get("reference") is None
+
+    @pytest.mark.asyncio
+    async def test_first_mode_only_first_chunk_references(self):
+        adapter, channel, ref_msg = _make_discord_adapter("first")
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+
+        await adapter.send("12345", "test content", reply_to="999")
+
+        # Should fetch the reference message
+        channel.fetch_message.assert_called_once_with(999)
+        calls = channel.send.call_args_list
+        assert len(calls) == 3
+        assert calls[0].kwargs.get("reference") is ref_msg
+        assert calls[1].kwargs.get("reference") is None
+        assert calls[2].kwargs.get("reference") is None
+
+    @pytest.mark.asyncio
+    async def test_all_mode_all_chunks_reference(self):
+        adapter, channel, ref_msg = _make_discord_adapter("all")
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+
+        await adapter.send("12345", "test content", reply_to="999")
+
+        channel.fetch_message.assert_called_once_with(999)
+        calls = channel.send.call_args_list
+        assert len(calls) == 3
+        for call in calls:
+            assert call.kwargs.get("reference") is ref_msg
+
+    @pytest.mark.asyncio
+    async def test_no_reply_to_param_no_reference(self):
+        adapter, channel, ref_msg = _make_discord_adapter("all")
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+
+        await adapter.send("12345", "test content", reply_to=None)
+
+        channel.fetch_message.assert_not_called()
+        for call in channel.send.call_args_list:
+            assert call.kwargs.get("reference") is None
+
+    @pytest.mark.asyncio
+    async def test_single_chunk_respects_first_mode(self):
+        adapter, channel, ref_msg = _make_discord_adapter("first")
+        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+
+        await adapter.send("12345", "test", reply_to="999")
+
+        calls = channel.send.call_args_list
+        assert len(calls) == 1
+        assert calls[0].kwargs.get("reference") is ref_msg
+
+    @pytest.mark.asyncio
+    async def test_single_chunk_off_mode(self):
+        adapter, channel, ref_msg = _make_discord_adapter("off")
+        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+
+        await adapter.send("12345", "test", reply_to="999")
+
+        channel.fetch_message.assert_not_called()
+        calls = channel.send.call_args_list
+        assert len(calls) == 1
+        assert calls[0].kwargs.get("reference") is None
+
+    @pytest.mark.asyncio
+    async def test_invalid_mode_falls_back_to_first_behavior(self):
+        """Invalid mode behaves like 'first' — only first chunk gets reference."""
+        adapter, channel, ref_msg = _make_discord_adapter("banana")
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+
+        await adapter.send("12345", "test", reply_to="999")
+
+        calls = channel.send.call_args_list
+        assert len(calls) == 2
+        assert calls[0].kwargs.get("reference") is ref_msg
+        assert calls[1].kwargs.get("reference") is None
+
+
+class TestConfigSerialization:
+    """Tests for reply_to_mode serialization (shared with Telegram)."""
+
+    def test_to_dict_includes_reply_to_mode(self):
+        config = PlatformConfig(enabled=True, token="test", reply_to_mode="all")
+        result = config.to_dict()
+        assert result["reply_to_mode"] == "all"
+
+    def test_from_dict_loads_reply_to_mode(self):
+        data = {"enabled": True, "token": "***", "reply_to_mode": "off"}
+        config = PlatformConfig.from_dict(data)
+        assert config.reply_to_mode == "off"
+
+    def test_from_dict_defaults_to_first(self):
+        data = {"enabled": True, "token": "***"}
+        config = PlatformConfig.from_dict(data)
+        assert config.reply_to_mode == "first"
+
+
+class TestEnvVarOverride:
+    """Tests for DISCORD_REPLY_TO_MODE environment variable override."""
+
+    def _make_config(self):
+        config = GatewayConfig()
+        config.platforms[Platform.DISCORD] = PlatformConfig(enabled=True, token="test")
+        return config
+
+    def test_env_var_sets_off_mode(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "off"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.DISCORD].reply_to_mode == "off"
+
+    def test_env_var_sets_all_mode(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "all"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.DISCORD].reply_to_mode == "all"
+
+    def test_env_var_case_insensitive(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "ALL"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.DISCORD].reply_to_mode == "all"
+
+    def test_env_var_invalid_value_ignored(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "banana"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.DISCORD].reply_to_mode == "first"
+
+    def test_env_var_empty_value_ignored(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": ""}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.DISCORD].reply_to_mode == "first"
+
+    def test_env_var_creates_platform_config_if_missing(self):
+        """DISCORD_REPLY_TO_MODE creates PlatformConfig even without DISCORD_BOT_TOKEN."""
+        config = GatewayConfig()
+        assert Platform.DISCORD not in config.platforms
+        with patch.dict(os.environ, {"DISCORD_REPLY_TO_MODE": "off"}, clear=False):
+            _apply_env_overrides(config)
+        assert Platform.DISCORD in config.platforms
+        assert config.platforms[Platform.DISCORD].reply_to_mode == "off"
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index beacb8c1..00b42869 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -175,6 +175,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
 | `DISCORD_REACTIONS` | Enable emoji reactions on messages during processing (default: `true`) |
 | `DISCORD_IGNORED_CHANNELS` | Comma-separated channel IDs where the bot never responds |
 | `DISCORD_NO_THREAD_CHANNELS` | Comma-separated channel IDs where bot responds without auto-threading |
+| `DISCORD_REPLY_TO_MODE` | Reply-reference behavior: `off`, `first` (default), or `all` |
 | `SLACK_BOT_TOKEN` | Slack bot token (`xoxb-...`) |
 | `SLACK_APP_TOKEN` | Slack app-level token (`xapp-...`, required for Socket Mode) |
 | `SLACK_ALLOWED_USERS` | Comma-separated Slack user IDs |
diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md
index a015dbb9..111bea59 100644
--- a/website/docs/user-guide/messaging/discord.md
+++ b/website/docs/user-guide/messaging/discord.md
@@ -282,6 +282,7 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede
 | `DISCORD_REACTIONS` | No | `true` | When `true`, the bot adds emoji reactions to messages during processing (👀 when starting, ✅ on success, ❌ on error). Set to `false` to disable reactions entirely. |
 | `DISCORD_IGNORED_CHANNELS` | No | — | Comma-separated channel IDs where the bot **never** responds, even when `@mentioned`. Takes priority over all other channel settings. |
 | `DISCORD_NO_THREAD_CHANNELS` | No | — | Comma-separated channel IDs where the bot responds directly in the channel instead of creating a thread. Only relevant when `DISCORD_AUTO_THREAD` is `true`. |
+| `DISCORD_REPLY_TO_MODE` | No | `"first"` | Controls reply-reference behavior: `"off"` — never reply to the original message, `"first"` — reply-reference on the first message chunk only (default), `"all"` — reply-reference on every chunk. |
 
 ### Config File (`config.yaml`)