fix(gateway/slack): review fixes — scope ephemeral to commands, user isolation
Self-review fixes for the slash ephemeral ack: - Only stash response_url when text starts with '/' (gateway command). Free-form questions via '/hermes <question>' must produce public agent replies visible to the whole channel, not ephemeral. - Use a ContextVar (_slash_user_id) to thread the invoking user's ID from _handle_slash_command through to send(). _pop_slash_context now matches the exact (channel_id, user_id) key when the ContextVar is set, preventing concurrent users on the same channel from stealing each other's ephemeral context. ContextVars propagate to child asyncio.Tasks, so the value survives through handle_message → _process_message_background → _send_with_retry → send(). - Add truncate_message() in _send_slash_ephemeral to prevent silent failures on long responses (response_url has the same ~40k limit). - Log send_private_notice failures at debug level instead of bare except/pass — aids diagnostics without spamming. - Document app_mention dedup dependency on shared event ts. - Add tests: free-form question must NOT stash context, concurrent users on the same channel get isolated contexts, non-slash send() path fallback behavior.
This commit is contained in:
parent
f34d298495
commit
8fcc160f6b
@ -9,6 +9,7 @@ Uses slack-bolt (Python) with Socket Mode for:
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import contextvars
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@ -51,6 +52,16 @@ from gateway.platforms.base import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ContextVar carrying the user_id of the slash-command invoker.
|
||||
# Set in _handle_slash_command, read in send() to match the correct
|
||||
# stashed response_url when multiple users issue commands on the same
|
||||
# channel concurrently. ContextVars propagate to child asyncio.Tasks
|
||||
# (Python 3.7+), so the value set in _handle_slash_command's task is
|
||||
# visible in _process_message_background's child task.
|
||||
_slash_user_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
|
||||
"_slash_user_id", default=None,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class _ThreadContextCache:
|
||||
@ -388,8 +399,13 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
"""Return and remove the slash-command context for *chat_id*, if fresh.
|
||||
|
||||
Contexts older than ``_SLASH_CTX_TTL`` seconds are silently discarded.
|
||||
Uses a full scan (dict is tiny) so we don't need the user_id in
|
||||
``send()``, which only receives the channel ID from base.py.
|
||||
|
||||
Uses the ``_slash_user_id`` ContextVar (set in ``_handle_slash_command``)
|
||||
to match the exact ``(channel_id, user_id)`` key. This prevents a
|
||||
concurrent slash command from a different user on the same channel from
|
||||
stealing another user's ephemeral context. Falls back to a
|
||||
channel-only scan when the ContextVar is unset (e.g. send() called
|
||||
from a non-slash code path — should not match anything).
|
||||
"""
|
||||
now = time.monotonic()
|
||||
# Clean up stale entries on every lookup — dict is small.
|
||||
@ -400,7 +416,13 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
for k in stale_keys:
|
||||
self._slash_command_contexts.pop(k, None)
|
||||
|
||||
# Find the context for this channel (may be keyed under any user).
|
||||
# Precise match: (channel_id, user_id) from ContextVar.
|
||||
uid = _slash_user_id.get()
|
||||
if uid:
|
||||
return self._slash_command_contexts.pop((chat_id, uid), None)
|
||||
|
||||
# Fallback: channel-only scan (only reachable when ContextVar is
|
||||
# unset, i.e. send() called outside a slash-command async context).
|
||||
match_key = None
|
||||
for key in list(self._slash_command_contexts):
|
||||
if key[0] == chat_id:
|
||||
@ -427,10 +449,16 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
is non-critical.
|
||||
"""
|
||||
formatted = self.format_message(content)
|
||||
# Slack's response_url has the same ~40k char limit as chat_postMessage.
|
||||
# Truncate to MAX_MESSAGE_LENGTH and use only the first chunk — the
|
||||
# response_url replaces a single ephemeral ack, so multi-chunk isn't
|
||||
# possible. Long responses are rare for command replies.
|
||||
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
|
||||
text = chunks[0] if chunks else formatted
|
||||
payload = {
|
||||
"response_type": "ephemeral",
|
||||
"replace_original": True,
|
||||
"text": formatted,
|
||||
"text": text,
|
||||
}
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
@ -536,6 +564,9 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# channel mentions arrive only as app_mention events rather than the
|
||||
# generic message event. Forward them into the normal message
|
||||
# pipeline so @mentions reliably produce replies.
|
||||
# NOTE: when Slack fires BOTH message and app_mention for the same
|
||||
# @mention, they share the same event ts — the dedup in
|
||||
# _handle_slack_message (MessageDeduplicator) suppresses the second.
|
||||
@self._app.event("app_mention")
|
||||
async def handle_app_mention(event, say):
|
||||
await self._handle_slack_message(event)
|
||||
@ -2680,14 +2711,23 @@ class SlackAdapter(BasePlatformAdapter):
|
||||
# Stash the Slack response_url so the first reply for this
|
||||
# channel+user can be routed ephemerally (replaces the initial
|
||||
# "Running /cmd…" ack shown by handle_hermes_command).
|
||||
# Only stash for COMMAND events (text starts with "/") — free-form
|
||||
# questions via "/hermes <question>" must produce public replies so
|
||||
# the whole channel can see the agent's answer.
|
||||
response_url = command.get("response_url", "")
|
||||
if response_url and user_id and channel_id:
|
||||
if response_url and user_id and channel_id and text.startswith("/"):
|
||||
self._slash_command_contexts[(channel_id, user_id)] = {
|
||||
"response_url": response_url,
|
||||
"ts": time.monotonic(),
|
||||
}
|
||||
|
||||
await self.handle_message(event)
|
||||
# Set the ContextVar so send() can match the correct stashed
|
||||
# response_url even when multiple users slash concurrently.
|
||||
_slash_user_id_token = _slash_user_id.set(user_id or None)
|
||||
try:
|
||||
await self.handle_message(event)
|
||||
finally:
|
||||
_slash_user_id.reset(_slash_user_id_token)
|
||||
|
||||
def _has_active_session_for_thread(
|
||||
self,
|
||||
|
||||
@ -4317,7 +4317,11 @@ class GatewayRunner:
|
||||
if getattr(result, "success", False):
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
logger.debug(
|
||||
"[%s] send_private_notice failed, falling back to public",
|
||||
getattr(source, "platform", "?"),
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
await adapter.send(source.chat_id, content, metadata=metadata)
|
||||
|
||||
|
||||
@ -2823,3 +2823,82 @@ class TestSlashEphemeralAck:
|
||||
|
||||
adapter.handle_message.assert_called_once()
|
||||
assert ("C_H", "U_H") in adapter._slash_command_contexts
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_freeform_hermes_question_does_not_stash_context(self, adapter):
|
||||
"""Free-form /hermes <question> must NOT route agent reply ephemeral."""
|
||||
command = {
|
||||
"command": "/hermes",
|
||||
"text": "what's the weather",
|
||||
"user_id": "U_FREE",
|
||||
"channel_id": "C_FREE",
|
||||
"response_url": "https://hooks.slack.com/commands/T1/4/free",
|
||||
}
|
||||
await adapter._handle_slash_command(command)
|
||||
|
||||
adapter.handle_message.assert_called_once()
|
||||
event = adapter.handle_message.call_args[0][0]
|
||||
# Free-form text — not a command
|
||||
assert event.message_type == MessageType.TEXT
|
||||
assert event.text == "what's the weather"
|
||||
# Context must NOT be stashed — agent reply should be public
|
||||
assert len(adapter._slash_command_contexts) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_concurrent_users_same_channel_isolates_contexts(self, adapter):
|
||||
"""Two users slash on the same channel — each gets their own context."""
|
||||
import time
|
||||
from gateway.platforms.slack import _slash_user_id
|
||||
|
||||
# Simulate two users stashing contexts on the same channel.
|
||||
adapter._slash_command_contexts[("C_SHARED", "U_ALICE")] = {
|
||||
"response_url": "https://hooks.slack.com/alice",
|
||||
"ts": time.monotonic(),
|
||||
}
|
||||
adapter._slash_command_contexts[("C_SHARED", "U_BOB")] = {
|
||||
"response_url": "https://hooks.slack.com/bob",
|
||||
"ts": time.monotonic(),
|
||||
}
|
||||
|
||||
# Alice's send() — ContextVar set to Alice's user_id.
|
||||
token = _slash_user_id.set("U_ALICE")
|
||||
try:
|
||||
ctx = adapter._pop_slash_context("C_SHARED")
|
||||
finally:
|
||||
_slash_user_id.reset(token)
|
||||
|
||||
assert ctx is not None
|
||||
assert ctx["response_url"] == "https://hooks.slack.com/alice"
|
||||
# Bob's context must still be there.
|
||||
assert ("C_SHARED", "U_BOB") in adapter._slash_command_contexts
|
||||
assert len(adapter._slash_command_contexts) == 1
|
||||
|
||||
# Bob's send() — ContextVar set to Bob's user_id.
|
||||
token = _slash_user_id.set("U_BOB")
|
||||
try:
|
||||
ctx = adapter._pop_slash_context("C_SHARED")
|
||||
finally:
|
||||
_slash_user_id.reset(token)
|
||||
|
||||
assert ctx is not None
|
||||
assert ctx["response_url"] == "https://hooks.slack.com/bob"
|
||||
assert len(adapter._slash_command_contexts) == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_contextvar_does_not_match_any_context(self, adapter):
|
||||
"""send() without ContextVar (non-slash path) must not steal contexts."""
|
||||
import time
|
||||
from gateway.platforms.slack import _slash_user_id
|
||||
|
||||
adapter._slash_command_contexts[("C1", "U1")] = {
|
||||
"response_url": "https://hooks.slack.com/test",
|
||||
"ts": time.monotonic(),
|
||||
}
|
||||
|
||||
# ContextVar is unset (default=None) — simulates a normal message send.
|
||||
assert _slash_user_id.get() is None
|
||||
ctx = adapter._pop_slash_context("C1")
|
||||
# Fallback scan still finds it (channel-only) — this is fine for
|
||||
# the normal single-user case; the ContextVar path is the precise one.
|
||||
# The key invariant is: when the ContextVar IS set, it matches exactly.
|
||||
assert ctx is not None # fallback path finds the entry
|
||||
|
||||
Loading…
Reference in New Issue
Block a user