fix(runtime+canvas): surface actionable provider error reason instead of opaque "Agent error (Exception)" #1420
@@ -67,9 +67,21 @@ export function useChatSocket(
|
||||
const own = (targetId || msg.workspace_id) === workspaceId;
|
||||
if (own) {
|
||||
callbacksRef.current.onSendComplete?.();
|
||||
callbacksRef.current.onSendError?.(
|
||||
"Agent error (Exception) — see workspace logs for details.",
|
||||
);
|
||||
// internal#211/#212: surface the runtime's curated,
|
||||
// user-actionable reason (provider HTTP status + error
|
||||
// code + the provider's own guidance, e.g. a 403 "org
|
||||
// disabled · use an API key / ask your admin"). The
|
||||
// server now includes error_detail in the ACTIVITY_LOGGED
|
||||
// broadcast; fall back to summary, and only as a last
|
||||
// resort to a generic line. The old hardcoded
|
||||
// "Agent error (Exception) — see workspace logs for
|
||||
// details." string pointed at a logs UI that does not
|
||||
// exist and discarded the actionable reason entirely.
|
||||
const detail =
|
||||
(p.error_detail as string) ||
|
||||
(p.summary as string) ||
|
||||
"The agent turn failed but the runtime reported no detail. Retry once; if it repeats the workspace runtime may need a restart.";
|
||||
callbacksRef.current.onSendError?.(detail);
|
||||
}
|
||||
}
|
||||
} else if (type === "a2a_send") {
|
||||
|
||||
@@ -62,6 +62,7 @@ TOP_LEVEL_MODULES = {
|
||||
"a2a_tools_memory",
|
||||
"a2a_tools_messaging",
|
||||
"a2a_tools_rbac",
|
||||
"a2a_tools_identity",
|
||||
"adapter_base",
|
||||
"agent",
|
||||
"agents_md",
|
||||
|
||||
@@ -691,6 +691,19 @@ func logActivityExec(ctx context.Context, exec activityExecutor, broadcaster eve
|
||||
if respStr != nil {
|
||||
payload["response_body"] = json.RawMessage(respJSON)
|
||||
}
|
||||
// internal#211/#212: error_detail carries the runtime's curated,
|
||||
// user-actionable, secret-safe failure reason (provider HTTP
|
||||
// status + error code + the provider's own guidance, e.g. a 403
|
||||
// "org disabled · use an API key / ask your admin"). It is
|
||||
// already persisted to the DB column above and capped by the
|
||||
// runtime's report_activity helper (4096 chars). Previously it
|
||||
// was dropped from the LIVE broadcast, so the canvas had nothing
|
||||
// to render and fell back to a hardcoded opaque
|
||||
// "Agent error (Exception) — see workspace logs" string. Include
|
||||
// it so the chat bubble shows the real reason in real time.
|
||||
if params.ErrorDetail != nil && *params.ErrorDetail != "" {
|
||||
payload["error_detail"] = *params.ErrorDetail
|
||||
}
|
||||
}
|
||||
|
||||
return func() {
|
||||
|
||||
@@ -599,6 +599,28 @@ def _sanitize_for_external(msg: str) -> str:
|
||||
import re as _re
|
||||
|
||||
msg = _re.sub(r"(?i)(?:bearer|token|api[_-]?key|sk-)[ :=]+[A-Za-z0-9_/.-]{20,}", "[REDACTED]", msg)
|
||||
# Bare provider key with NO separator after the prefix — a real
|
||||
# `sk-ant-api03-…` / `sk-…` key uses `-` (not `[ :=]`) so the rule
|
||||
# above misses it. Require ≥24 key-ish chars after the `sk-`/`sk-ant-`
|
||||
# prefix so curated examples like `sk-ant-EXAMPLE-SHORT` (13 chars
|
||||
# after `sk-ant-`) still pass through un-redacted.
|
||||
msg = _re.sub(r"(?i)\bsk-(?:ant-)?[A-Za-z0-9_-]{24,}", "[REDACTED]", msg)
|
||||
# JSON-quoted credential values: {"token": "…"} / {"apiKey": "…"} /
|
||||
# {"secret": "…"} / {"password": "…"}. Redact only the value, and only
|
||||
# when it is ≥24 chars so a short curated sample like
|
||||
# `"api_key": "sk-ant-EXAMPLE-SHORT"` (20-char value) still passes.
|
||||
msg = _re.sub(
|
||||
r'(?i)("(?:token|api[_-]?key|secret|password)"\s*:\s*")[^"]{24,}(")',
|
||||
r"\1[REDACTED]\2",
|
||||
msg,
|
||||
)
|
||||
# AWS secret access key in `aws_secret_access_key=…` form (env dumps,
|
||||
# boto tracebacks). The base64-ish value runs until whitespace/quote.
|
||||
msg = _re.sub(
|
||||
r"(?i)(aws_secret_access_key\s*[:=]\s*)\S+",
|
||||
r"\1[REDACTED]",
|
||||
msg,
|
||||
)
|
||||
# Absolute paths: /etc/shadow, /home/user/.aws/credentials, etc.
|
||||
msg = _re.sub(r"(?:/[^/\s]+){2,}", lambda m: m.group(0) if len(m.group(0)) < 60 else "[REDACTED_PATH]", msg)
|
||||
return msg
|
||||
@@ -608,6 +630,7 @@ def sanitize_agent_error(
|
||||
exc: BaseException | None = None,
|
||||
category: str | None = None,
|
||||
stderr: str | None = None,
|
||||
reason: str | None = None,
|
||||
) -> str:
|
||||
"""Render an agent-side failure into a user-safe error message.
|
||||
|
||||
@@ -615,6 +638,18 @@ def sanitize_agent_error(
|
||||
category string (e.g. from `classify_subprocess_error`). If both are
|
||||
given, `category` wins. If neither, the tag defaults to "unknown".
|
||||
|
||||
When ``reason`` is provided (internal#211/#212), it is a *pre-curated,
|
||||
user-actionable, secret-safe* explanation built by the caller from a
|
||||
provider-side failure — e.g. a 403 "Your organization has disabled
|
||||
Claude subscription access · Use an Anthropic API key instead, or ask
|
||||
your admin to enable access" with error code ``oauth_org_not_allowed``.
|
||||
This text is exactly what the user needs to self-serve, so it is
|
||||
surfaced VERBATIM as the message instead of being collapsed to the
|
||||
opaque exception class name. It still passes through the
|
||||
key/token/bearer/path scrubber as a belt-and-braces second pass so a
|
||||
buggy caller can't leak a credential that snuck into the reason.
|
||||
``reason`` wins over ``stderr``; both lose to neither being set.
|
||||
|
||||
When ``stderr`` is provided (e.g. the first ~1 KB of a subprocess stderr
|
||||
or HTTP error body), it is sanitized and appended to the output so the
|
||||
A2A caller gets actionable context without needing to dig through workspace
|
||||
@@ -629,6 +664,13 @@ def sanitize_agent_error(
|
||||
else:
|
||||
tag = "unknown"
|
||||
|
||||
if reason:
|
||||
# Curated, user-actionable reason — surface it as the message.
|
||||
# Still scrub: a 403/auth/quota message is safe, but the scrubber
|
||||
# is cheap insurance against a caller that didn't curate cleanly.
|
||||
clean = _sanitize_for_external(reason[:_MAX_STDERR_PREVIEW])
|
||||
return f"Agent error ({tag}): {clean}"
|
||||
|
||||
if stderr:
|
||||
# Truncate and sanitize before including — prevents DoS via
|
||||
# a malicious or buggy peer injecting a huge error body, and
|
||||
|
||||
@@ -788,6 +788,123 @@ def test_sanitize_agent_error_stderr_combined_with_existing_tests():
|
||||
assert "workspace logs" in out
|
||||
|
||||
|
||||
# ─── reason passthrough (internal#211/#212: surface actionable provider error) ───
|
||||
|
||||
|
||||
def test_sanitize_agent_error_reason_surfaced_verbatim():
|
||||
"""A curated provider reason is shown to the user, not collapsed to the
|
||||
exception class name. This is the internal#211 regression: a 403
|
||||
org-disabled message must reach the canvas."""
|
||||
reason = (
|
||||
"provider HTTP 403 — oauth_org_not_allowed — Your organization has "
|
||||
"disabled Claude subscription access for Claude Code · Use an "
|
||||
"Anthropic API key instead, or ask your admin to enable access"
|
||||
)
|
||||
|
||||
class _ResultErr(Exception):
|
||||
pass
|
||||
|
||||
out = sanitize_agent_error(exc=_ResultErr("opaque"), reason=reason)
|
||||
# The actionable provider guidance and status code must be visible.
|
||||
assert "403" in out
|
||||
assert "oauth_org_not_allowed" in out
|
||||
assert "disabled Claude subscription access" in out
|
||||
assert "ask your admin to enable access" in out
|
||||
# NOT the old opaque form.
|
||||
assert "see workspace logs" not in out
|
||||
|
||||
|
||||
def test_sanitize_agent_error_reason_still_scrubs_secrets():
|
||||
"""Even on the reason path the key/token scrubber runs — a buggy caller
|
||||
that lets a bearer token into the reason still gets it redacted."""
|
||||
leaky = (
|
||||
"provider HTTP 401 — auth failed — Authorization: Bearer "
|
||||
"PLACEHOLDER_LONG_TOKEN_0123456789abcdefghijklm please re-auth"
|
||||
)
|
||||
out = sanitize_agent_error(reason=leaky)
|
||||
assert "[REDACTED]" in out
|
||||
assert "PLACEHOLDER_LONG_TOKEN_0123456789abcdefghijklm" not in out
|
||||
# The non-secret guidance still survives the scrub.
|
||||
assert "401" in out
|
||||
assert "please re-auth" in out
|
||||
|
||||
|
||||
def test_sanitize_agent_error_reason_scrubs_all_secret_formats():
|
||||
"""The scrubber must redact every realistic credential shape — not just
|
||||
the `Bearer <tok>` form the original test happened to exercise
|
||||
(internal#212 review finding: bare `sk-ant-api03-…` keys, JSON-quoted
|
||||
"token"/"apiKey" values, and `aws_secret_access_key=` all leaked).
|
||||
All curated/actionable guidance must still survive the scrub.
|
||||
"""
|
||||
# 1. Bare sk-ant-api03 key — no `[ :=]` separator after the prefix
|
||||
# (a real Anthropic key uses `-`), so the legacy regex missed it.
|
||||
bare = (
|
||||
"provider HTTP 401 — auth failed — invalid key "
|
||||
"sk-FAKEPLACEHOLDERabcdefghijklmnopqrstuvwxy0123456789 "
|
||||
"please re-auth"
|
||||
)
|
||||
out = sanitize_agent_error(reason=bare)
|
||||
assert "sk-FAKEPLACEHOLDERabcdefghijklmnopqrstuvwxy0123456789" not in out
|
||||
assert "[REDACTED]" in out
|
||||
assert "401" in out # actionable status survives
|
||||
assert "please re-auth" in out # actionable guidance survives
|
||||
|
||||
# 2. JSON-quoted "token" / "apiKey" values.
|
||||
jblob = (
|
||||
'provider error — config dump {"token": '
|
||||
'"abcDEF0123456789ghIJKL0123456789mnopQRST", "apiKey": '
|
||||
'"anon_fakefakefakefakefakefakefakefakefakefake"} — '
|
||||
"use an API key instead"
|
||||
)
|
||||
out = sanitize_agent_error(reason=jblob)
|
||||
assert "abcDEF0123456789ghIJKL0123456789mnopQRST" not in out
|
||||
assert "anon_fakefakefakefakefakefakefakefakefakefake" not in out
|
||||
assert "[REDACTED]" in out
|
||||
assert "use an API key instead" in out # actionable guidance survives
|
||||
|
||||
# 3. aws_secret_access_key=… form.
|
||||
awsblob = (
|
||||
"provider HTTP 403 — boto credential error "
|
||||
"aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY — "
|
||||
"ask your admin to enable access"
|
||||
)
|
||||
out = sanitize_agent_error(reason=awsblob)
|
||||
assert "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" not in out
|
||||
assert "[REDACTED]" in out
|
||||
assert "403" in out # actionable status survives
|
||||
assert "ask your admin to enable access" in out # guidance survives
|
||||
|
||||
# 4. Regression: the original Bearer form still redacts.
|
||||
# Uses PLACEHOLDER_LONG_TOKEN (>=40 chars, no sk-ant- prefix) to avoid
|
||||
# triggering the secret-scan workflow pattern
|
||||
# `sk-ant-[A-Za-z0-9_-]{40,}`.
|
||||
bearer = (
|
||||
"provider HTTP 401 — Authorization: Bearer "
|
||||
"PLACEHOLDER_LONG_TOKEN_9876543210abcdefghij re-auth"
|
||||
)
|
||||
out = sanitize_agent_error(reason=bearer)
|
||||
assert "PLACEHOLDER_LONG_TOKEN_9876543210abcdefghij" not in out
|
||||
assert "[REDACTED]" in out
|
||||
assert "re-auth" in out
|
||||
|
||||
|
||||
def test_sanitize_agent_error_reason_wins_over_stderr():
|
||||
"""When both reason and stderr are passed, the curated reason wins."""
|
||||
out = sanitize_agent_error(
|
||||
reason="provider HTTP 403 — use an API key",
|
||||
stderr="raw subprocess noise that should not be shown",
|
||||
)
|
||||
assert "use an API key" in out
|
||||
assert "raw subprocess noise" not in out
|
||||
|
||||
|
||||
def test_sanitize_agent_error_no_reason_unchanged():
|
||||
"""Omitting reason preserves the original generic behavior."""
|
||||
out = sanitize_agent_error(exc=ValueError("boom"))
|
||||
assert "ValueError" in out
|
||||
assert "workspace logs" in out
|
||||
|
||||
|
||||
|
||||
# ======================================================================
|
||||
# classify_subprocess_error
|
||||
|
||||
Reference in New Issue
Block a user