model_tools.py ran discover_mcp_tools() as a module-level side effect.
discover_mcp_tools() uses a blocking 120s wait internally (via
_run_on_mcp_loop -> future.result(timeout=120)).
The gateway lazy-imports run_agent -> model_tools on the first user
message, which happens inside the asyncio event loop thread. A slow or
unreachable MCP server therefore froze Discord shard heartbeats and
Telegram polling for up to 120s on the first message after gateway
start.
Fix: remove the module-level call. Every entry point now runs
discovery explicitly at its own startup, using the context-appropriate
blocking/non-blocking pattern:
- gateway/run.py: loop.run_in_executor(None, discover_mcp_tools)
before platforms start accepting traffic
- hermes_cli/main.py: inline (no event loop at CLI startup)
- tui_gateway/entry.py: inline (sync stdin loop, no event loop)
- acp_adapter/entry.py: inline before asyncio.run()
Closes #16856.
This commit is contained in:
parent
c8ef786926
commit
dd789a4fdf
@ -112,6 +112,17 @@ def main() -> None:
|
||||
import acp
|
||||
from .server import HermesACPAgent
|
||||
|
||||
# MCP tool discovery from config.yaml — run before asyncio.run() so
|
||||
# it's safe to use blocking waits. (ACP also registers per-session
|
||||
# MCP servers dynamically via asyncio.to_thread inside the event
|
||||
# loop; that path is unaffected.) Moved from model_tools.py module
|
||||
# scope to avoid freezing the gateway's loop on lazy import (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug("MCP tool discovery failed at ACP startup", exc_info=True)
|
||||
|
||||
agent = HermesACPAgent()
|
||||
try:
|
||||
asyncio.run(acp.run_agent(agent, use_unstable_protocol=True))
|
||||
|
||||
@ -11663,6 +11663,19 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
||||
atexit.register(remove_pid_file)
|
||||
atexit.register(release_gateway_runtime_lock)
|
||||
|
||||
# MCP tool discovery — run in an executor so the asyncio event loop
|
||||
# stays responsive even when a configured MCP server is slow or
|
||||
# unreachable. discover_mcp_tools() uses a blocking 120s wait
|
||||
# internally; calling it from the loop thread would freeze platform
|
||||
# heartbeats (Discord shard, Telegram polling) until it returned.
|
||||
# See #16856.
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
_loop = asyncio.get_running_loop()
|
||||
await _loop.run_in_executor(None, discover_mcp_tools)
|
||||
except Exception as e:
|
||||
logger.debug("MCP tool discovery failed: %s", e)
|
||||
|
||||
# Start the gateway
|
||||
success = await runner.start()
|
||||
if not success:
|
||||
|
||||
@ -10193,6 +10193,17 @@ Examples:
|
||||
logger.debug(
|
||||
"plugin discovery failed at CLI startup", exc_info=True,
|
||||
)
|
||||
try:
|
||||
# MCP tool discovery — no event loop running in CLI/TUI startup,
|
||||
# so inline is safe. Moved here from model_tools.py module scope
|
||||
# to avoid freezing the gateway's event loop on its first message
|
||||
# via the same lazy import path (#16856).
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
logger.debug(
|
||||
"MCP tool discovery failed at CLI startup", exc_info=True,
|
||||
)
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
from agent.shell_hooks import register_from_config
|
||||
|
||||
@ -138,12 +138,18 @@ def _run_async(coro):
|
||||
|
||||
discover_builtin_tools()
|
||||
|
||||
# MCP tool discovery (external MCP servers from config)
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception as e:
|
||||
logger.debug("MCP tool discovery failed: %s", e)
|
||||
# MCP tool discovery (external MCP servers from config) used to run here as
|
||||
# a module-level side effect. It was removed because discover_mcp_tools()
|
||||
# internally uses a blocking future.result(timeout=120) wait, and the
|
||||
# gateway lazy-imports this module from inside the asyncio event loop on
|
||||
# the first user message — freezing Discord/Telegram heartbeats for up to
|
||||
# 120s whenever any configured MCP server was slow or unreachable (#16856).
|
||||
#
|
||||
# Each entry point now runs discovery explicitly at its own startup:
|
||||
# - gateway/run.py -> start_gateway() uses run_in_executor
|
||||
# - cli.py, hermes_cli/* -> inline on startup (no event loop)
|
||||
# - tui_gateway/server.py -> inline on startup (no event loop)
|
||||
# - acp_adapter/server.py -> asyncio.to_thread on session init
|
||||
|
||||
# Plugin tool discovery (user/project/pip plugins)
|
||||
try:
|
||||
|
||||
@ -105,6 +105,17 @@ def _log_exit(reason: str) -> None:
|
||||
def main():
|
||||
_install_sidecar_publisher()
|
||||
|
||||
# MCP tool discovery — inline is safe here: TUI entry is a plain
|
||||
# sync loop with no asyncio event loop to block. Previously ran as
|
||||
# a model_tools.py module-level side effect; moved to explicit
|
||||
# startup calls to avoid freezing the gateway's loop on lazy import
|
||||
# (#16856).
|
||||
try:
|
||||
from tools.mcp_tool import discover_mcp_tools
|
||||
discover_mcp_tools()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not write_json({
|
||||
"jsonrpc": "2.0",
|
||||
"method": "event",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user