From 8fcc160f6b979f9567e76f189e226c18cabc6308 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Fri, 1 May 2026 09:35:51 +0530 Subject: [PATCH] =?UTF-8?q?fix(gateway/slack):=20review=20fixes=20?= =?UTF-8?q?=E2=80=94=20scope=20ephemeral=20to=20commands,=20user=20isolati?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review fixes for the slash ephemeral ack: - Only stash response_url when text starts with '/' (gateway command). Free-form questions via '/hermes ' must produce public agent replies visible to the whole channel, not ephemeral. - Use a ContextVar (_slash_user_id) to thread the invoking user's ID from _handle_slash_command through to send(). _pop_slash_context now matches the exact (channel_id, user_id) key when the ContextVar is set, preventing concurrent users on the same channel from stealing each other's ephemeral context. ContextVars propagate to child asyncio.Tasks, so the value survives through handle_message → _process_message_background → _send_with_retry → send(). - Add truncate_message() in _send_slash_ephemeral to prevent silent failures on long responses (response_url has the same ~40k limit). - Log send_private_notice failures at debug level instead of bare except/pass — aids diagnostics without spamming. - Document app_mention dedup dependency on shared event ts. - Add tests: free-form question must NOT stash context, concurrent users on the same channel get isolated contexts, non-slash send() path fallback behavior. --- gateway/platforms/slack.py | 52 +++++++++++++++++++++--- gateway/run.py | 6 ++- tests/gateway/test_slack.py | 79 +++++++++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index 5479b838..9aa23871 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for: """ import asyncio +import contextvars import json import logging import os @@ -51,6 +52,16 @@ from gateway.platforms.base import ( logger = logging.getLogger(__name__) +# ContextVar carrying the user_id of the slash-command invoker. +# Set in _handle_slash_command, read in send() to match the correct +# stashed response_url when multiple users issue commands on the same +# channel concurrently. ContextVars propagate to child asyncio.Tasks +# (Python 3.7+), so the value set in _handle_slash_command's task is +# visible in _process_message_background's child task. +_slash_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar( + "_slash_user_id", default=None, +) + @dataclass class _ThreadContextCache: @@ -388,8 +399,13 @@ class SlackAdapter(BasePlatformAdapter): """Return and remove the slash-command context for *chat_id*, if fresh. Contexts older than ``_SLASH_CTX_TTL`` seconds are silently discarded. - Uses a full scan (dict is tiny) so we don't need the user_id in - ``send()``, which only receives the channel ID from base.py. + + Uses the ``_slash_user_id`` ContextVar (set in ``_handle_slash_command``) + to match the exact ``(channel_id, user_id)`` key. This prevents a + concurrent slash command from a different user on the same channel from + stealing another user's ephemeral context. Falls back to a + channel-only scan when the ContextVar is unset (e.g. send() called + from a non-slash code path — should not match anything). """ now = time.monotonic() # Clean up stale entries on every lookup — dict is small. @@ -400,7 +416,13 @@ class SlackAdapter(BasePlatformAdapter): for k in stale_keys: self._slash_command_contexts.pop(k, None) - # Find the context for this channel (may be keyed under any user). + # Precise match: (channel_id, user_id) from ContextVar. + uid = _slash_user_id.get() + if uid: + return self._slash_command_contexts.pop((chat_id, uid), None) + + # Fallback: channel-only scan (only reachable when ContextVar is + # unset, i.e. send() called outside a slash-command async context). match_key = None for key in list(self._slash_command_contexts): if key[0] == chat_id: @@ -427,10 +449,16 @@ class SlackAdapter(BasePlatformAdapter): is non-critical. """ formatted = self.format_message(content) + # Slack's response_url has the same ~40k char limit as chat_postMessage. + # Truncate to MAX_MESSAGE_LENGTH and use only the first chunk — the + # response_url replaces a single ephemeral ack, so multi-chunk isn't + # possible. Long responses are rare for command replies. + chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH) + text = chunks[0] if chunks else formatted payload = { "response_type": "ephemeral", "replace_original": True, - "text": formatted, + "text": text, } try: async with aiohttp.ClientSession() as session: @@ -536,6 +564,9 @@ class SlackAdapter(BasePlatformAdapter): # channel mentions arrive only as app_mention events rather than the # generic message event. Forward them into the normal message # pipeline so @mentions reliably produce replies. + # NOTE: when Slack fires BOTH message and app_mention for the same + # @mention, they share the same event ts — the dedup in + # _handle_slack_message (MessageDeduplicator) suppresses the second. @self._app.event("app_mention") async def handle_app_mention(event, say): await self._handle_slack_message(event) @@ -2680,14 +2711,23 @@ class SlackAdapter(BasePlatformAdapter): # Stash the Slack response_url so the first reply for this # channel+user can be routed ephemerally (replaces the initial # "Running /cmd…" ack shown by handle_hermes_command). + # Only stash for COMMAND events (text starts with "/") — free-form + # questions via "/hermes " must produce public replies so + # the whole channel can see the agent's answer. response_url = command.get("response_url", "") - if response_url and user_id and channel_id: + if response_url and user_id and channel_id and text.startswith("/"): self._slash_command_contexts[(channel_id, user_id)] = { "response_url": response_url, "ts": time.monotonic(), } - await self.handle_message(event) + # Set the ContextVar so send() can match the correct stashed + # response_url even when multiple users slash concurrently. + _slash_user_id_token = _slash_user_id.set(user_id or None) + try: + await self.handle_message(event) + finally: + _slash_user_id.reset(_slash_user_id_token) def _has_active_session_for_thread( self, diff --git a/gateway/run.py b/gateway/run.py index 1e9ddf65..88196d69 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -4317,7 +4317,11 @@ class GatewayRunner: if getattr(result, "success", False): return except Exception: - pass + logger.debug( + "[%s] send_private_notice failed, falling back to public", + getattr(source, "platform", "?"), + exc_info=True, + ) await adapter.send(source.chat_id, content, metadata=metadata) diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py index cd455d5f..35c49d1c 100644 --- a/tests/gateway/test_slack.py +++ b/tests/gateway/test_slack.py @@ -2823,3 +2823,82 @@ class TestSlashEphemeralAck: adapter.handle_message.assert_called_once() assert ("C_H", "U_H") in adapter._slash_command_contexts + + @pytest.mark.asyncio + async def test_freeform_hermes_question_does_not_stash_context(self, adapter): + """Free-form /hermes must NOT route agent reply ephemeral.""" + command = { + "command": "/hermes", + "text": "what's the weather", + "user_id": "U_FREE", + "channel_id": "C_FREE", + "response_url": "https://hooks.slack.com/commands/T1/4/free", + } + await adapter._handle_slash_command(command) + + adapter.handle_message.assert_called_once() + event = adapter.handle_message.call_args[0][0] + # Free-form text — not a command + assert event.message_type == MessageType.TEXT + assert event.text == "what's the weather" + # Context must NOT be stashed — agent reply should be public + assert len(adapter._slash_command_contexts) == 0 + + @pytest.mark.asyncio + async def test_concurrent_users_same_channel_isolates_contexts(self, adapter): + """Two users slash on the same channel — each gets their own context.""" + import time + from gateway.platforms.slack import _slash_user_id + + # Simulate two users stashing contexts on the same channel. + adapter._slash_command_contexts[("C_SHARED", "U_ALICE")] = { + "response_url": "https://hooks.slack.com/alice", + "ts": time.monotonic(), + } + adapter._slash_command_contexts[("C_SHARED", "U_BOB")] = { + "response_url": "https://hooks.slack.com/bob", + "ts": time.monotonic(), + } + + # Alice's send() — ContextVar set to Alice's user_id. + token = _slash_user_id.set("U_ALICE") + try: + ctx = adapter._pop_slash_context("C_SHARED") + finally: + _slash_user_id.reset(token) + + assert ctx is not None + assert ctx["response_url"] == "https://hooks.slack.com/alice" + # Bob's context must still be there. + assert ("C_SHARED", "U_BOB") in adapter._slash_command_contexts + assert len(adapter._slash_command_contexts) == 1 + + # Bob's send() — ContextVar set to Bob's user_id. + token = _slash_user_id.set("U_BOB") + try: + ctx = adapter._pop_slash_context("C_SHARED") + finally: + _slash_user_id.reset(token) + + assert ctx is not None + assert ctx["response_url"] == "https://hooks.slack.com/bob" + assert len(adapter._slash_command_contexts) == 0 + + @pytest.mark.asyncio + async def test_no_contextvar_does_not_match_any_context(self, adapter): + """send() without ContextVar (non-slash path) must not steal contexts.""" + import time + from gateway.platforms.slack import _slash_user_id + + adapter._slash_command_contexts[("C1", "U1")] = { + "response_url": "https://hooks.slack.com/test", + "ts": time.monotonic(), + } + + # ContextVar is unset (default=None) — simulates a normal message send. + assert _slash_user_id.get() is None + ctx = adapter._pop_slash_context("C1") + # Fallback scan still finds it (channel-only) — this is fine for + # the normal single-user case; the ContextVar path is the precise one. + # The key invariant is: when the ContextVar IS set, it matches exactly. + assert ctx is not None # fallback path finds the entry