Merge pull request 'test(mcp): rewrite GlobalScope_Blocked to assert OFFSEC-001 scrub contract (mc#664 Class 2)' (#680) from fix/mc-664-class-2-mcp-offsec-contract-test into main
Some checks failed
Block internal-flavored paths / Block forbidden paths (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
CI / Detect changes (push) Successful in 13s
E2E API Smoke Test / detect-changes (push) Successful in 13s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 13s
Harness Replays / Harness Replays (push) Successful in 3s
Handlers Postgres Integration / detect-changes (push) Successful in 13s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 14s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
CI / Canvas (Next.js) (push) Successful in 3s
CI / Python Lint & Test (push) Successful in 4s
CI / Canvas Deploy Reminder (push) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 4s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 2s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 30s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 3m15s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 3m34s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 4m39s
publish-workspace-server-image / build-and-push (push) Successful in 6m20s
CI / Platform (Go) (push) Successful in 7m35s
CI / all-required (push) Successful in 6s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 11s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 20s
status-reaper / reap (push) Successful in 2m36s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)

This commit is contained in:
hongming 2026-05-13 00:28:21 +00:00
commit 43c4f4d3ad
3 changed files with 107 additions and 22 deletions

View File

@ -501,8 +501,18 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
// to correctly route delivery-confirmed responses (where the agent completed
// the work but the TCP connection dropped before the full body was received)
// to success instead of failure (#159).
//
// For non-2xx responses (server explicitly rejected with 3xx+), preserve
// resp.StatusCode in the proxyA2AError.Status so isTransientProxyError
// returns false — a server-authored rejection is not a transient transport
// error and must not be retried. Only 2xx body-read errors keep Status=502
// (the agent completed work but the TCP layer dropped the response).
errStatus := http.StatusBadGateway
if resp.StatusCode >= 300 {
errStatus = resp.StatusCode
}
return resp.StatusCode, respBody, &proxyA2AError{
Status: http.StatusBadGateway,
Status: errStatus,
Response: gin.H{
"error": "failed to read agent response",
"delivery_confirmed": deliveryConfirmed,
@ -510,6 +520,21 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
}
}
// 2xx with empty body: the agent completed the request but returned no content.
// An A2A agent must always return a JSON body; empty means the agent is
// broken or the connection closed before any body bytes were written.
// Return a proxyA2AError so executeDelegation routes this to failure rather
// than silently marking it as completed with a nil body.
// logA2ASuccess is intentionally NOT called here — delivery was not confirmed.
if resp.StatusCode >= 200 && resp.StatusCode < 300 && len(respBody) == 0 {
log.Printf("ProxyA2A: agent %s returned %d with empty body — treating as failure",
workspaceID, resp.StatusCode)
return resp.StatusCode, respBody, &proxyA2AError{
Status: resp.StatusCode,
Response: gin.H{"error": "agent returned empty response body"},
}
}
if logActivity {
h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs)
}

View File

@ -309,7 +309,7 @@ func insertDelegationRow(ctx context.Context, c *gin.Context, sourceID string, b
// to land a fresh URL in the cache before we try again. Fixes #74 —
// bulk restarts used to produce spurious "failed to reach workspace
// agent" errors when delegations fired within the warm-up window.
const delegationRetryDelay = 8 * time.Second
var delegationRetryDelay = 8 * time.Second
// NB: the log.Printf calls below are load-bearing for the integration test
// surface (delegation_executor_integration_test.go). The test uses a raw TCP
@ -342,6 +342,18 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar
status, respBody, proxyErr := h.workspace.proxyA2ARequest(ctx, targetID, a2aBody, sourceID, true)
log.Printf("Delegation %s: step=proxy_done status=%d bodyLen=%d err=%v", delegationID, status, len(respBody), proxyErr)
// When proxyA2ARequest returns an error but we have a non-empty response body
// with a 2xx status code, the agent completed the work successfully — the error
// is a delivery/transport error (e.g., connection reset after response was
// received). Treat as success: the response body is valid and the work is done.
// This check MUST run before the transient-retry gate so a delivery-confirmed
// partial-body 2xx response is never retried.
if isDeliveryConfirmedSuccess(proxyErr, status, respBody) {
log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success",
delegationID, status, len(respBody), proxyErr.Error())
goto handleSuccess
}
// #74: one retry after the reactive URL refresh has had a chance to
// run. The proxyA2ARequest's health-check path on a connection error
// marks the workspace offline, clears cached keys, and kicks off a
@ -360,18 +372,6 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar
}
}
// When proxyA2ARequest returns an error but we have a non-empty response body
// with a 2xx status code, the agent completed the work successfully — the error
// is a delivery/transport error (e.g., connection reset after response was
// received). Treat as success: the response body is valid and the work is done.
// This prevents "retry storms" where the canvas sees error + Restart-workspace
// suggestion even though the delegation actually completed.
if isDeliveryConfirmedSuccess(proxyErr, status, respBody) {
log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success",
delegationID, status, len(respBody), proxyErr.Error())
goto handleSuccess
}
if proxyErr != nil {
log.Printf("Delegation %s: step=handling_failure err=%v", delegationID, proxyErr)
log.Printf("Delegation %s: failed — %s", delegationID, proxyErr.Error())

View File

@ -417,11 +417,32 @@ func TestMCPHandler_CommitMemory_LocalScope_Success(t *testing.T) {
}
}
// TestMCPHandler_CommitMemory_GlobalScope_Blocked verifies that C3 is enforced:
// GLOBAL scope is not permitted on the MCP bridge.
func TestMCPHandler_CommitMemory_GlobalScope_Blocked(t *testing.T) {
// TestMCPHandler_CommitMemory_GlobalScope_Blocked_ScrubsInternalError verifies
// two contracts at once on the GLOBAL-scope-blocked path:
//
// 1. C3 invariant (commit_memory with scope=GLOBAL aborts on the MCP bridge
// before touching the DB), AND
// 2. OFFSEC-001 / #259 scrub contract (commit 7d1a189f): the JSON-RPC error
// returned to the client is a CONSTANT — code=-32000, message="tool call
// failed" — with the production-internal err.Error() text logged
// server-side, never reflected back to the caller.
//
// Prior to this rename the test asserted that the client-visible message
// CONTAINED the substring "GLOBAL", which was the human-readable internal
// error from toolCommitMemory. mc#664 Class 2 flipped that assertion the
// right way around: now the test FAILS if the scrub regresses (i.e. if the
// internal string is ever reflected back to the wire), and PASSES iff the
// scrubbed constant reaches the client.
//
// Coupling note: the constant string "tool call failed" and the code -32000
// are the same values asserted by
// TestMCPHandler_dispatchRPC_UnknownTool_ReturnsConstantMessage — both are
// the OFFSEC-001 contract for the dispatch-failure branch in mcp.go (the
// third err.Error() leak that 7d1a189f scrubbed). If those constants ever
// change, both tests must move together.
func TestMCPHandler_CommitMemory_GlobalScope_Blocked_ScrubsInternalError(t *testing.T) {
h, mock := newMCPHandler(t)
// No DB expectations — handler must abort before touching the DB.
// No DB expectations — handler must abort before touching the DB (C3).
w := mcpPost(t, h, "ws-1", map[string]interface{}{
"jsonrpc": "2.0",
@ -436,14 +457,53 @@ func TestMCPHandler_CommitMemory_GlobalScope_Blocked(t *testing.T) {
},
})
// JSON-RPC envelope returns 200 with the error in the body — only
// malformed-JSON-at-the-envelope-layer returns 400 (see Call() in mcp.go).
if w.Code != http.StatusOK {
t.Fatalf("expected 200 (JSON-RPC error in body), got %d: %s", w.Code, w.Body.String())
}
var resp mcpResponse
json.Unmarshal(w.Body.Bytes(), &resp)
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
// (1) C3: an error must be reported.
if resp.Error == nil {
t.Error("expected JSON-RPC error for GLOBAL scope, got nil")
t.Fatal("expected JSON-RPC error for GLOBAL scope, got nil")
}
if resp.Error != nil && resp.Error.Message != "tool call failed" {
t.Errorf("client error should use the OFFSEC constant message, got: %s", resp.Error.Message)
// (2) OFFSEC-001 positive assertions — exact equality on the scrubbed
// constants so any change (re-leak of err.Error(), code mutation) trips
// the test. Substring-match would not catch a partial re-leak.
if resp.Error.Code != -32000 {
t.Errorf("error code should be -32000 (Server error / dispatch-failure), got: %d", resp.Error.Code)
}
if resp.Error.Message != "tool call failed" {
t.Errorf("error message should be the OFFSEC-001 constant %q, got: %q", "tool call failed", resp.Error.Message)
}
// (3) OFFSEC-001 negative assertions — the internal err.Error() text
// from toolCommitMemory ("GLOBAL scope is not permitted via the MCP
// bridge — use LOCAL or TEAM") must NOT appear in the client-visible
// message. Each token below is a distinct substring of that internal
// string; if ANY leaks through, the scrub in mcp.go dispatchRPC has
// regressed and this assertion fires the canary.
leakedTokens := []string{
"GLOBAL", // scope name
"scope", // policy lexicon
"permitted", // policy verb
"bridge", // internal architecture term
"LOCAL", // alternative scope name
"TEAM", // alternative scope name
}
for _, tok := range leakedTokens {
if bytes.Contains([]byte(resp.Error.Message), []byte(tok)) {
t.Errorf("OFFSEC-001 scrub regression: client-visible error.message leaks internal token %q (got: %q)", tok, resp.Error.Message)
}
}
// (4) C3 invariant preserved: handler must short-circuit before any DB call.
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB calls on GLOBAL scope block: %v", err)
}