fix(mcp): move discovery out of model_tools import side effect (#16856) (#16899)

model_tools.py ran discover_mcp_tools() as a module-level side effect.
discover_mcp_tools() uses a blocking 120s wait internally (via
_run_on_mcp_loop -> future.result(timeout=120)).

The gateway lazy-imports run_agent -> model_tools on the first user
message, which happens inside the asyncio event loop thread.  A slow or
unreachable MCP server therefore froze Discord shard heartbeats and
Telegram polling for up to 120s on the first message after gateway
start.

Fix: remove the module-level call.  Every entry point now runs
discovery explicitly at its own startup, using the context-appropriate
blocking/non-blocking pattern:

- gateway/run.py:       loop.run_in_executor(None, discover_mcp_tools)
                        before platforms start accepting traffic
- hermes_cli/main.py:   inline (no event loop at CLI startup)
- tui_gateway/entry.py: inline (sync stdin loop, no event loop)
- acp_adapter/entry.py: inline before asyncio.run()

Closes #16856.
This commit is contained in:
Teknium 2026-04-28 01:17:58 -07:00 committed by GitHub
parent c8ef786926
commit dd789a4fdf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 58 additions and 6 deletions

View File

@ -112,6 +112,17 @@ def main() -> None:
import acp
from .server import HermesACPAgent
# MCP tool discovery from config.yaml — run before asyncio.run() so
# it's safe to use blocking waits. (ACP also registers per-session
# MCP servers dynamically via asyncio.to_thread inside the event
# loop; that path is unaffected.) Moved from model_tools.py module
# scope to avoid freezing the gateway's loop on lazy import (#16856).
try:
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
except Exception:
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
agent = HermesACPAgent()
try:
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))

View File

@ -11663,6 +11663,19 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
atexit.register(remove_pid_file)
atexit.register(release_gateway_runtime_lock)
# MCP tool discovery — run in an executor so the asyncio event loop
# stays responsive even when a configured MCP server is slow or
# unreachable. discover_mcp_tools() uses a blocking 120s wait
# internally; calling it from the loop thread would freeze platform
# heartbeats (Discord shard, Telegram polling) until it returned.
# See #16856.
try:
from tools.mcp_tool import discover_mcp_tools
_loop = asyncio.get_running_loop()
await _loop.run_in_executor(None, discover_mcp_tools)
except Exception as e:
logger.debug("MCP tool discovery failed: %s", e)
# Start the gateway
success = await runner.start()
if not success:

View File

@ -10193,6 +10193,17 @@ Examples:
logger.debug(
"plugin discovery failed at CLI startup", exc_info=True,
)
try:
# MCP tool discovery — no event loop running in CLI/TUI startup,
# so inline is safe. Moved here from model_tools.py module scope
# to avoid freezing the gateway's event loop on its first message
# via the same lazy import path (#16856).
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
except Exception:
logger.debug(
"MCP tool discovery failed at CLI startup", exc_info=True,
)
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config

View File

@ -138,12 +138,18 @@ def _run_async(coro):
discover_builtin_tools()
# MCP tool discovery (external MCP servers from config)
try:
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
except Exception as e:
logger.debug("MCP tool discovery failed: %s", e)
# MCP tool discovery (external MCP servers from config) used to run here as
# a module-level side effect. It was removed because discover_mcp_tools()
# internally uses a blocking future.result(timeout=120) wait, and the
# gateway lazy-imports this module from inside the asyncio event loop on
# the first user message — freezing Discord/Telegram heartbeats for up to
# 120s whenever any configured MCP server was slow or unreachable (#16856).
#
# Each entry point now runs discovery explicitly at its own startup:
# - gateway/run.py -> start_gateway() uses run_in_executor
# - cli.py, hermes_cli/* -> inline on startup (no event loop)
# - tui_gateway/server.py -> inline on startup (no event loop)
# - acp_adapter/server.py -> asyncio.to_thread on session init
# Plugin tool discovery (user/project/pip plugins)
try:

View File

@ -105,6 +105,17 @@ def _log_exit(reason: str) -> None:
def main():
_install_sidecar_publisher()
# MCP tool discovery — inline is safe here: TUI entry is a plain
# sync loop with no asyncio event loop to block. Previously ran as
# a model_tools.py module-level side effect; moved to explicit
# startup calls to avoid freezing the gateway's loop on lazy import
# (#16856).
try:
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
except Exception:
pass
if not write_json({
"jsonrpc": "2.0",
"method": "event",