fix(gateway/slack): review fixes — scope ephemeral to commands, user isolation

Self-review fixes for the slash ephemeral ack:

- Only stash response_url when text starts with '/' (gateway command).
  Free-form questions via '/hermes <question>' must produce public agent
  replies visible to the whole channel, not ephemeral.
- Use a ContextVar (_slash_user_id) to thread the invoking user's ID
  from _handle_slash_command through to send().  _pop_slash_context now
  matches the exact (channel_id, user_id) key when the ContextVar is
  set, preventing concurrent users on the same channel from stealing
  each other's ephemeral context.  ContextVars propagate to child
  asyncio.Tasks, so the value survives through handle_message →
  _process_message_background → _send_with_retry → send().
- Add truncate_message() in _send_slash_ephemeral to prevent silent
  failures on long responses (response_url has the same ~40k limit).
- Log send_private_notice failures at debug level instead of bare
  except/pass — aids diagnostics without spamming.
- Document app_mention dedup dependency on shared event ts.
- Add tests: free-form question must NOT stash context, concurrent
  users on the same channel get isolated contexts, non-slash send()
  path fallback behavior.
This commit is contained in:
kshitijk4poor 2026-05-01 09:35:51 +05:30 committed by kshitij
parent f34d298495
commit 8fcc160f6b
3 changed files with 130 additions and 7 deletions

View File

@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for:
"""
import asyncio
import contextvars
import json
import logging
import os
@ -51,6 +52,16 @@ from gateway.platforms.base import (
logger = logging.getLogger(__name__)
# ContextVar carrying the user_id of the slash-command invoker.
# Set in _handle_slash_command, read in send() to match the correct
# stashed response_url when multiple users issue commands on the same
# channel concurrently. ContextVars propagate to child asyncio.Tasks
# (Python 3.7+), so the value set in _handle_slash_command's task is
# visible in _process_message_background's child task.
_slash_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
"_slash_user_id", default=None,
)
@dataclass
class _ThreadContextCache:
@ -388,8 +399,13 @@ class SlackAdapter(BasePlatformAdapter):
"""Return and remove the slash-command context for *chat_id*, if fresh.
Contexts older than ``_SLASH_CTX_TTL`` seconds are silently discarded.
Uses a full scan (dict is tiny) so we don't need the user_id in
``send()``, which only receives the channel ID from base.py.
Uses the ``_slash_user_id`` ContextVar (set in ``_handle_slash_command``)
to match the exact ``(channel_id, user_id)`` key. This prevents a
concurrent slash command from a different user on the same channel from
stealing another user's ephemeral context. Falls back to a
channel-only scan when the ContextVar is unset (e.g. send() called
from a non-slash code path should not match anything).
"""
now = time.monotonic()
# Clean up stale entries on every lookup — dict is small.
@ -400,7 +416,13 @@ class SlackAdapter(BasePlatformAdapter):
for k in stale_keys:
self._slash_command_contexts.pop(k, None)
# Find the context for this channel (may be keyed under any user).
# Precise match: (channel_id, user_id) from ContextVar.
uid = _slash_user_id.get()
if uid:
return self._slash_command_contexts.pop((chat_id, uid), None)
# Fallback: channel-only scan (only reachable when ContextVar is
# unset, i.e. send() called outside a slash-command async context).
match_key = None
for key in list(self._slash_command_contexts):
if key[0] == chat_id:
@ -427,10 +449,16 @@ class SlackAdapter(BasePlatformAdapter):
is non-critical.
"""
formatted = self.format_message(content)
# Slack's response_url has the same ~40k char limit as chat_postMessage.
# Truncate to MAX_MESSAGE_LENGTH and use only the first chunk — the
# response_url replaces a single ephemeral ack, so multi-chunk isn't
# possible. Long responses are rare for command replies.
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
text = chunks[0] if chunks else formatted
payload = {
"response_type": "ephemeral",
"replace_original": True,
"text": formatted,
"text": text,
}
try:
async with aiohttp.ClientSession() as session:
@ -536,6 +564,9 @@ class SlackAdapter(BasePlatformAdapter):
# channel mentions arrive only as app_mention events rather than the
# generic message event. Forward them into the normal message
# pipeline so @mentions reliably produce replies.
# NOTE: when Slack fires BOTH message and app_mention for the same
# @mention, they share the same event ts — the dedup in
# _handle_slack_message (MessageDeduplicator) suppresses the second.
@self._app.event("app_mention")
async def handle_app_mention(event, say):
await self._handle_slack_message(event)
@ -2680,14 +2711,23 @@ class SlackAdapter(BasePlatformAdapter):
# Stash the Slack response_url so the first reply for this
# channel+user can be routed ephemerally (replaces the initial
# "Running /cmd…" ack shown by handle_hermes_command).
# Only stash for COMMAND events (text starts with "/") — free-form
# questions via "/hermes <question>" must produce public replies so
# the whole channel can see the agent's answer.
response_url = command.get("response_url", "")
if response_url and user_id and channel_id:
if response_url and user_id and channel_id and text.startswith("/"):
self._slash_command_contexts[(channel_id, user_id)] = {
"response_url": response_url,
"ts": time.monotonic(),
}
await self.handle_message(event)
# Set the ContextVar so send() can match the correct stashed
# response_url even when multiple users slash concurrently.
_slash_user_id_token = _slash_user_id.set(user_id or None)
try:
await self.handle_message(event)
finally:
_slash_user_id.reset(_slash_user_id_token)
def _has_active_session_for_thread(
self,

View File

@ -4317,7 +4317,11 @@ class GatewayRunner:
if getattr(result, "success", False):
return
except Exception:
pass
logger.debug(
"[%s] send_private_notice failed, falling back to public",
getattr(source, "platform", "?"),
exc_info=True,
)
await adapter.send(source.chat_id, content, metadata=metadata)

View File

@ -2823,3 +2823,82 @@ class TestSlashEphemeralAck:
adapter.handle_message.assert_called_once()
assert ("C_H", "U_H") in adapter._slash_command_contexts
@pytest.mark.asyncio
async def test_freeform_hermes_question_does_not_stash_context(self, adapter):
"""Free-form /hermes <question> must NOT route agent reply ephemeral."""
command = {
"command": "/hermes",
"text": "what's the weather",
"user_id": "U_FREE",
"channel_id": "C_FREE",
"response_url": "https://hooks.slack.com/commands/T1/4/free",
}
await adapter._handle_slash_command(command)
adapter.handle_message.assert_called_once()
event = adapter.handle_message.call_args[0][0]
# Free-form text — not a command
assert event.message_type == MessageType.TEXT
assert event.text == "what's the weather"
# Context must NOT be stashed — agent reply should be public
assert len(adapter._slash_command_contexts) == 0
@pytest.mark.asyncio
async def test_concurrent_users_same_channel_isolates_contexts(self, adapter):
"""Two users slash on the same channel — each gets their own context."""
import time
from gateway.platforms.slack import _slash_user_id
# Simulate two users stashing contexts on the same channel.
adapter._slash_command_contexts[("C_SHARED", "U_ALICE")] = {
"response_url": "https://hooks.slack.com/alice",
"ts": time.monotonic(),
}
adapter._slash_command_contexts[("C_SHARED", "U_BOB")] = {
"response_url": "https://hooks.slack.com/bob",
"ts": time.monotonic(),
}
# Alice's send() — ContextVar set to Alice's user_id.
token = _slash_user_id.set("U_ALICE")
try:
ctx = adapter._pop_slash_context("C_SHARED")
finally:
_slash_user_id.reset(token)
assert ctx is not None
assert ctx["response_url"] == "https://hooks.slack.com/alice"
# Bob's context must still be there.
assert ("C_SHARED", "U_BOB") in adapter._slash_command_contexts
assert len(adapter._slash_command_contexts) == 1
# Bob's send() — ContextVar set to Bob's user_id.
token = _slash_user_id.set("U_BOB")
try:
ctx = adapter._pop_slash_context("C_SHARED")
finally:
_slash_user_id.reset(token)
assert ctx is not None
assert ctx["response_url"] == "https://hooks.slack.com/bob"
assert len(adapter._slash_command_contexts) == 0
@pytest.mark.asyncio
async def test_no_contextvar_does_not_match_any_context(self, adapter):
"""send() without ContextVar (non-slash path) must not steal contexts."""
import time
from gateway.platforms.slack import _slash_user_id
adapter._slash_command_contexts[("C1", "U1")] = {
"response_url": "https://hooks.slack.com/test",
"ts": time.monotonic(),
}
# ContextVar is unset (default=None) — simulates a normal message send.
assert _slash_user_id.get() is None
ctx = adapter._pop_slash_context("C1")
# Fallback scan still finds it (channel-only) — this is fine for
# the normal single-user case; the ContextVar path is the precise one.
# The key invariant is: when the ContextVar IS set, it matches exactly.
assert ctx is not None # fallback path finds the entry