fix(discord): /reload-skills now refreshes the /skill autocomplete live (#18754)

`_register_skill_group` captured the skill catalog in closure variables
(`entries` and `skill_lookup`) so the single `tree.add_command` call at
startup owned the only live copy. The closure is never re-entered after
startup, so `/reload-skills` — which rescans the on-disk skills dir and
refreshes the in-process `_skill_commands` registry — had no way to
propagate results into the `/skill` autocomplete on Discord. New skills
stayed invisible in the dropdown, and deleted skills returned
"Unknown skill" when the stale autocomplete entry was clicked.

The fix is purely a dataflow change: promote `entries` and `skill_lookup`
to instance attributes (`_skill_entries`, `_skill_lookup`), split the
collector-driven rebuild into a helper (`_refresh_skill_catalog_state`),
and add a public `refresh_skill_group()` method that re-runs the helper
and is safe to call at any point after the initial registration.

The gateway's `_handle_reload_skills_command` then iterates
`self.adapters` and calls `refresh_skill_group()` on any adapter that
exposes it (currently only Discord). Both sync and async implementations
are supported; adapters that don't override the method (Telegram's
BotCommand menu, Slack subcommand map, etc.) are silently skipped — the
in-process `reload_skills()` call covers them.

No `tree.sync()` is required because Discord fetches autocomplete
options dynamically on every keystroke — mutating the instance state the
callbacks already read from is sufficient. That sidesteps the per-app
command-bucket rate limit (~5 writes / 20 s) that made the previous
bulk-sync-on-reload approach unusable (#16713 context).

Tests: tests/gateway/test_reload_skills_discord_resync.py — five cases
covering (1) refresh replaces entries, (2) entries stay sorted after
refresh, (3) collector exception leaves cached state intact, (4)
`_refresh_skill_catalog_state` populates the instance attrs, (5)
orchestrator calls `refresh_skill_group()` on sync + async adapters and
skips adapters that don't expose it.
This commit is contained in:
Teknium 2026-05-02 02:00:11 -07:00 committed by GitHub
parent 6ec74aec07
commit 10297fa23c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 348 additions and 28 deletions

View File

@ -2584,40 +2584,32 @@ class DiscordAdapter(BasePlatformAdapter):
hidden skills. The slash picker also becomes more discoverable
Discord live-filters by the user's typed prefix against both the
skill name and its description.
The entries list and lookup dict are stored on ``self`` rather
than captured in closure variables so :meth:`refresh_skill_group`
can repopulate them when the user runs ``/reload-skills`` without
needing to touch the Discord slash-command tree or trigger a
``tree.sync()`` call.
"""
try:
from hermes_cli.commands import discord_skill_commands_by_category
existing_names = set()
try:
existing_names = {cmd.name for cmd in tree.get_commands()}
except Exception:
pass
# Reuse the existing collector for consistent filtering
# (per-platform disabled, hub-excluded, name clamping), then
# flatten — the category grouping was only useful for the
# nested layout.
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=existing_names,
)
entries: list[tuple[str, str, str]] = list(uncategorized)
for cat_skills in categories.values():
entries.extend(cat_skills)
# Populate the instance-level entries/lookup so the
# autocomplete + handler callbacks below always read the
# freshest state. refresh_skill_group() re-runs the same
# collector and mutates these two attributes in place.
self._skill_entries: list[tuple[str, str, str]] = []
self._skill_lookup: dict[str, tuple[str, str]] = {}
self._skill_group_reserved_names: set[str] = set(existing_names)
self._refresh_skill_catalog_state()
if not entries:
if not self._skill_entries:
return
# Stable alphabetical order so the autocomplete suggestion
# list is predictable across restarts.
entries.sort(key=lambda t: t[0])
# name -> (description, cmd_key) — used by both the autocomplete
# callback and the handler for O(1) dispatch.
skill_lookup: dict[str, tuple[str, str]] = {
n: (d, k) for n, d, k in entries
}
async def _autocomplete_name(
interaction: "discord.Interaction", current: str,
) -> list:
@ -2627,10 +2619,13 @@ class DiscordAdapter(BasePlatformAdapter):
"/skill pdf" surfaces skills whose description mentions
PDFs even if the name doesn't. Discord caps this list at
25 entries per query.
Reads ``self._skill_entries`` so a ``/reload-skills`` run
since process start shows up on the very next keystroke.
"""
q = (current or "").strip().lower()
choices: list = []
for name, desc, _key in entries:
for name, desc, _key in self._skill_entries:
if not q or q in name.lower() or (desc and q in desc.lower()):
if desc:
label = f"{name}{desc}"
@ -2654,7 +2649,7 @@ class DiscordAdapter(BasePlatformAdapter):
async def _skill_handler(
interaction: "discord.Interaction", name: str, args: str = "",
):
entry = skill_lookup.get(name)
entry = self._skill_lookup.get(name)
if not entry:
await interaction.response.send_message(
f"Unknown skill: `{name}`. Start typing for "
@ -2676,16 +2671,74 @@ class DiscordAdapter(BasePlatformAdapter):
logger.info(
"[%s] Registered /skill command with %d skill(s) via autocomplete",
self.name, len(entries),
self.name, len(self._skill_entries),
)
if hidden:
if self._skill_group_hidden_count:
logger.info(
"[%s] %d skill(s) filtered out of /skill (name clamp / reserved)",
self.name, hidden,
self.name, self._skill_group_hidden_count,
)
except Exception as exc:
logger.warning("[%s] Failed to register /skill command: %s", self.name, exc)
def _refresh_skill_catalog_state(self) -> None:
"""Re-scan disk for skills and repopulate ``self._skill_entries``.
Called once from :meth:`_register_skill_group` at startup and
again from :meth:`refresh_skill_group` whenever the user runs
``/reload-skills``. No Discord API calls are made autocomplete
and the handler both read from these instance attributes
directly, so an in-place mutation is sufficient.
"""
from hermes_cli.commands import discord_skill_commands_by_category
reserved = getattr(self, "_skill_group_reserved_names", set())
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(reserved),
)
entries: list[tuple[str, str, str]] = list(uncategorized)
for cat_skills in categories.values():
entries.extend(cat_skills)
# Stable alphabetical order so the autocomplete suggestion
# list is predictable across restarts.
entries.sort(key=lambda t: t[0])
self._skill_entries = entries
self._skill_lookup = {n: (d, k) for n, d, k in entries}
self._skill_group_hidden_count = hidden
def refresh_skill_group(self) -> tuple[int, int]:
"""Rescan skills and update the live ``/skill`` autocomplete state.
Invoked by :meth:`gateway.run.GatewayOrchestrator._handle_reload_skills_command`
after :func:`agent.skill_commands.reload_skills` has refreshed
the in-process skill-command registry. Without this call, the
``/skill`` autocomplete dropdown keeps showing the list captured
at process start new skills stay invisible and deleted skills
return an "Unknown skill" error when clicked.
Because autocomplete options are fetched dynamically by Discord,
we only need to mutate the entries/lookup attributes read by the
callbacks no ``tree.sync()`` is required.
Returns ``(new_count, hidden_count)``.
"""
try:
self._refresh_skill_catalog_state()
except Exception as exc:
logger.warning(
"[%s] Failed to refresh /skill autocomplete after reload: %s",
self.name, exc,
)
return (len(getattr(self, "_skill_entries", [])), 0)
logger.info(
"[%s] Refreshed /skill autocomplete: %d skill(s) available (%d filtered)",
self.name,
len(self._skill_entries),
self._skill_group_hidden_count,
)
return (len(self._skill_entries), self._skill_group_hidden_count)
def _build_slash_event(self, interaction: discord.Interaction, text: str) -> MessageEvent:
"""Build a MessageEvent from a Discord slash command interaction."""
is_dm = isinstance(interaction.channel, discord.DMChannel)

View File

@ -15,6 +15,7 @@ Usage:
import asyncio
import dataclasses
import inspect
import json
import logging
import os
@ -9687,6 +9688,28 @@ class GatewayRunner:
removed = result.get("removed", []) # [{"name", "description"}, ...]
total = result.get("total", 0)
# Let each connected adapter refresh any platform-side state
# that cached the skill list at startup. Today that's the
# Discord /skill autocomplete (registered once per connect);
# without this call, new skills stay invisible in the
# dropdown and deleted skills error out when clicked. Other
# adapters that don't override refresh_skill_group (Telegram's
# BotCommand menu, Slack subcommand map, etc.) are silently
# skipped — the in-process reload above is enough for them.
for adapter in list(self.adapters.values()):
refresh = getattr(adapter, "refresh_skill_group", None)
if not callable(refresh):
continue
try:
maybe = refresh()
if inspect.isawaitable(maybe):
await maybe
except Exception as exc:
logger.warning(
"Adapter %s refresh_skill_group raised: %s",
getattr(adapter, "name", adapter), exc,
)
lines = ["🔄 **Skills Reloaded**\n"]
if not added and not removed:
lines.append("No new skills detected.")

View File

@ -0,0 +1,244 @@
"""Tests for `/reload-skills` resyncing the Discord ``/skill`` autocomplete.
Before this change, ``_register_skill_group`` captured the skill catalog
in closure variables (``entries`` and ``skill_lookup``) so that the one
``tree.add_command`` call at startup owned the only live copy of the
skill list. The closure is never re-entered after startup, so
``/reload-skills`` (which rescans the on-disk skill dir and refreshes
the in-process registry) had no way to propagate its results into the
autocomplete new skills stayed invisible in the dropdown and deleted
skills returned an "Unknown skill" error when the stale autocomplete
entry was clicked.
The fix promotes those two variables to instance attributes
(``_skill_entries`` / ``_skill_lookup``) and exposes a
``refresh_skill_group()`` method that rescans and mutates them in
place. The gateway ``_handle_reload_skills_command`` iterates its
connected adapters and calls the method on any that expose it.
No ``tree.sync()`` is required because Discord fetches autocomplete
options dynamically on every keystroke we only need to rebind the
data the live callbacks already read from.
"""
from __future__ import annotations
from unittest.mock import MagicMock
def _make_adapter():
"""Construct a DiscordAdapter without going through __init__ / token checks."""
from gateway.platforms.discord import DiscordAdapter
from gateway.platforms.base import Platform
adapter = object.__new__(DiscordAdapter)
adapter.config = MagicMock()
adapter.config.extra = {}
# ``platform`` is set by BasePlatformAdapter.__init__, which we skip
# above; the inherited ``.name`` property dereferences it for log
# formatting, so set it explicitly.
adapter.platform = Platform.DISCORD
return adapter
class TestRefreshSkillGroup:
def test_refresh_repopulates_entries_after_catalog_change(
self, monkeypatch
) -> None:
"""The initial catalog is replaced wholesale on refresh.
Mirrors the observable /reload-skills case: a user adds a new
skill to ~/.hermes/skills/, runs /reload-skills, and expects
the autocomplete to surface it on the very next keystroke.
"""
adapter = _make_adapter()
# Start-of-process state: /register built the catalog from the
# original collector output.
adapter._skill_entries = [
("old-skill", "Pre-existing skill", "/old-skill"),
]
adapter._skill_lookup = {"old-skill": ("Pre-existing skill", "/old-skill")}
adapter._skill_group_reserved_names = set()
adapter._skill_group_hidden_count = 0
# User adds new-skill to disk and removes old-skill.
def fake_collector(*, reserved_names):
return (
{"creative": [("new-skill", "Fresh skill", "/new-skill")]}, # categories
[], # uncategorized
0, # hidden
)
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
fake_collector,
)
new_count, hidden = adapter.refresh_skill_group()
assert new_count == 1
assert hidden == 0
# Old skill is gone, new skill is present.
names = [n for n, _d, _k in adapter._skill_entries]
assert names == ["new-skill"]
assert "old-skill" not in adapter._skill_lookup
assert adapter._skill_lookup["new-skill"] == ("Fresh skill", "/new-skill")
def test_refresh_sorts_entries_alphabetically(self, monkeypatch) -> None:
"""Autocomplete order must be stable and predictable across refreshes."""
adapter = _make_adapter()
adapter._skill_entries = []
adapter._skill_lookup = {}
adapter._skill_group_reserved_names = set()
adapter._skill_group_hidden_count = 0
def fake_collector(*, reserved_names):
# Intentionally unsorted — the fix must resort.
return (
{"zzz": [("zebra", "", "/zebra")]},
[("alpha", "", "/alpha")],
0,
)
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
fake_collector,
)
adapter.refresh_skill_group()
names = [n for n, _d, _k in adapter._skill_entries]
assert names == sorted(names) == ["alpha", "zebra"]
def test_refresh_handles_collector_exception_gracefully(
self, monkeypatch
) -> None:
"""A broken collector must not take down /reload-skills."""
adapter = _make_adapter()
adapter._skill_entries = [("keep", "kept", "/keep")]
adapter._skill_lookup = {"keep": ("kept", "/keep")}
adapter._skill_group_reserved_names = set()
adapter._skill_group_hidden_count = 0
def boom(*, reserved_names):
raise RuntimeError("simulated collector failure")
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
boom,
)
new_count, hidden = adapter.refresh_skill_group()
# Returns previously-cached count, no crash, existing entries
# preserved so the live autocomplete keeps working.
assert new_count == 1
assert hidden == 0
assert adapter._skill_entries == [("keep", "kept", "/keep")]
class TestRegisterSkillGroupUsesInstanceState:
"""The closure-based ``entries`` / ``skill_lookup`` must be gone.
If the callbacks in ``_register_skill_group`` still close over
local variables instead of reading from ``self``, the refresh
method is useless autocomplete will keep serving the stale list.
The full slash-command registration path pulls in ``discord.app_commands``
decorators (``@describe`` / ``@autocomplete`` / ``Command``), which
are unstubbed in the hermetic test env. We assert the data-shaped
side-effects instead: after ``_register_skill_group`` returns
(successfully or not), ``_skill_entries`` and ``_skill_lookup`` must
be populated from the collector output, because
``_refresh_skill_catalog_state`` runs before any decorator evaluation.
"""
def test_refresh_catalog_state_populates_instance_attrs(
self, monkeypatch
) -> None:
adapter = _make_adapter()
adapter._skill_group_reserved_names = set()
def fake_collector(*, reserved_names):
return (
{"creative": [("ascii-art", "Make ASCII", "/ascii-art")]},
[],
0,
)
monkeypatch.setattr(
"hermes_cli.commands.discord_skill_commands_by_category",
fake_collector,
)
adapter._refresh_skill_catalog_state()
# Instance-level state populated — the autocomplete + handler
# callbacks both read from these, so `refresh_skill_group`
# mutating them in place is enough to pick up new skills.
assert adapter._skill_entries == [
("ascii-art", "Make ASCII", "/ascii-art"),
]
assert adapter._skill_lookup == {
"ascii-art": ("Make ASCII", "/ascii-art"),
}
assert adapter._skill_group_hidden_count == 0
class TestHandleReloadSkillsCallsRefreshSkillGroup:
"""Gateway-side integration: /reload-skills must call refresh on adapters."""
def test_orchestrator_calls_refresh_skill_group_on_every_adapter(self):
"""Sync + async refresh_skill_group implementations both get awaited/called.
The orchestrator iterates ``self.adapters`` and calls
``refresh_skill_group`` if it exists. Adapters that don't
implement it (today: everything except Discord) are silently
skipped without raising.
"""
import asyncio
from unittest.mock import patch, MagicMock
# Import without constructing a real runner — test the method
# directly against an ``object.__new__`` instance.
from gateway.run import GatewayRunner
runner = object.__new__(GatewayRunner)
sync_refresh = MagicMock(return_value=(5, 0))
async_called = {"flag": False}
class AsyncAdapter:
name = "async-platform"
async def refresh_skill_group(self):
async_called["flag"] = True
return (3, 0)
class SyncAdapter:
name = "sync-platform"
refresh_skill_group = sync_refresh
class NoOpAdapter:
name = "other"
# No refresh_skill_group — must not crash.
runner.adapters = {
"discord": AsyncAdapter(),
"slack": SyncAdapter(),
"telegram": NoOpAdapter(),
}
# Mock reload_skills itself so no disk scan runs.
fake_result = {"added": [], "removed": [], "total": 7}
with patch(
"agent.skill_commands.reload_skills", return_value=fake_result
):
event = MagicMock()
event.source = MagicMock()
# _session_key_for_source may be called — make it safe.
runner._session_key_for_source = lambda src: None
runner._pending_skills_reload_notes = {}
result = asyncio.get_event_loop().run_until_complete(
runner._handle_reload_skills_command(event)
)
assert "Skills Reloaded" in result
assert sync_refresh.called, "sync adapter refresh must be invoked"
assert async_called["flag"], "async adapter refresh must be awaited"