Merge pull request 'test(mcp): rewrite GlobalScope_Blocked to assert OFFSEC-001 scrub contract (mc#664 Class 2)' (#680) from fix/mc-664-class-2-mcp-offsec-contract-test into main
Some checks failed
Block internal-flavored paths / Block forbidden paths (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
CI / Detect changes (push) Successful in 13s
E2E API Smoke Test / detect-changes (push) Successful in 13s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 13s
Harness Replays / Harness Replays (push) Successful in 3s
Handlers Postgres Integration / detect-changes (push) Successful in 13s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 14s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
CI / Canvas (Next.js) (push) Successful in 3s
CI / Python Lint & Test (push) Successful in 4s
CI / Canvas Deploy Reminder (push) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 4s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 2s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 30s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 3m15s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 3m34s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 4m39s
publish-workspace-server-image / build-and-push (push) Successful in 6m20s
CI / Platform (Go) (push) Successful in 7m35s
CI / all-required (push) Successful in 6s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 11s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 20s
status-reaper / reap (push) Successful in 2m36s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Some checks failed
Block internal-flavored paths / Block forbidden paths (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
CI / Detect changes (push) Successful in 13s
E2E API Smoke Test / detect-changes (push) Successful in 13s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 13s
Harness Replays / Harness Replays (push) Successful in 3s
Handlers Postgres Integration / detect-changes (push) Successful in 13s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 14s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
CI / Canvas (Next.js) (push) Successful in 3s
CI / Python Lint & Test (push) Successful in 4s
CI / Canvas Deploy Reminder (push) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 4s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 2s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 30s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 3m15s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 3m34s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 4m39s
publish-workspace-server-image / build-and-push (push) Successful in 6m20s
CI / Platform (Go) (push) Successful in 7m35s
CI / all-required (push) Successful in 6s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 11s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 20s
status-reaper / reap (push) Successful in 2m36s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
This commit is contained in:
commit
43c4f4d3ad
@ -501,8 +501,18 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
||||
// to correctly route delivery-confirmed responses (where the agent completed
|
||||
// the work but the TCP connection dropped before the full body was received)
|
||||
// to success instead of failure (#159).
|
||||
//
|
||||
// For non-2xx responses (server explicitly rejected with 3xx+), preserve
|
||||
// resp.StatusCode in the proxyA2AError.Status so isTransientProxyError
|
||||
// returns false — a server-authored rejection is not a transient transport
|
||||
// error and must not be retried. Only 2xx body-read errors keep Status=502
|
||||
// (the agent completed work but the TCP layer dropped the response).
|
||||
errStatus := http.StatusBadGateway
|
||||
if resp.StatusCode >= 300 {
|
||||
errStatus = resp.StatusCode
|
||||
}
|
||||
return resp.StatusCode, respBody, &proxyA2AError{
|
||||
Status: http.StatusBadGateway,
|
||||
Status: errStatus,
|
||||
Response: gin.H{
|
||||
"error": "failed to read agent response",
|
||||
"delivery_confirmed": deliveryConfirmed,
|
||||
@ -510,6 +520,21 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
||||
}
|
||||
}
|
||||
|
||||
// 2xx with empty body: the agent completed the request but returned no content.
|
||||
// An A2A agent must always return a JSON body; empty means the agent is
|
||||
// broken or the connection closed before any body bytes were written.
|
||||
// Return a proxyA2AError so executeDelegation routes this to failure rather
|
||||
// than silently marking it as completed with a nil body.
|
||||
// logA2ASuccess is intentionally NOT called here — delivery was not confirmed.
|
||||
if resp.StatusCode >= 200 && resp.StatusCode < 300 && len(respBody) == 0 {
|
||||
log.Printf("ProxyA2A: agent %s returned %d with empty body — treating as failure",
|
||||
workspaceID, resp.StatusCode)
|
||||
return resp.StatusCode, respBody, &proxyA2AError{
|
||||
Status: resp.StatusCode,
|
||||
Response: gin.H{"error": "agent returned empty response body"},
|
||||
}
|
||||
}
|
||||
|
||||
if logActivity {
|
||||
h.logA2ASuccess(ctx, workspaceID, callerID, body, respBody, a2aMethod, resp.StatusCode, durationMs)
|
||||
}
|
||||
|
||||
@ -309,7 +309,7 @@ func insertDelegationRow(ctx context.Context, c *gin.Context, sourceID string, b
|
||||
// to land a fresh URL in the cache before we try again. Fixes #74 —
|
||||
// bulk restarts used to produce spurious "failed to reach workspace
|
||||
// agent" errors when delegations fired within the warm-up window.
|
||||
const delegationRetryDelay = 8 * time.Second
|
||||
var delegationRetryDelay = 8 * time.Second
|
||||
|
||||
// NB: the log.Printf calls below are load-bearing for the integration test
|
||||
// surface (delegation_executor_integration_test.go). The test uses a raw TCP
|
||||
@ -342,6 +342,18 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar
|
||||
status, respBody, proxyErr := h.workspace.proxyA2ARequest(ctx, targetID, a2aBody, sourceID, true)
|
||||
log.Printf("Delegation %s: step=proxy_done status=%d bodyLen=%d err=%v", delegationID, status, len(respBody), proxyErr)
|
||||
|
||||
// When proxyA2ARequest returns an error but we have a non-empty response body
|
||||
// with a 2xx status code, the agent completed the work successfully — the error
|
||||
// is a delivery/transport error (e.g., connection reset after response was
|
||||
// received). Treat as success: the response body is valid and the work is done.
|
||||
// This check MUST run before the transient-retry gate so a delivery-confirmed
|
||||
// partial-body 2xx response is never retried.
|
||||
if isDeliveryConfirmedSuccess(proxyErr, status, respBody) {
|
||||
log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success",
|
||||
delegationID, status, len(respBody), proxyErr.Error())
|
||||
goto handleSuccess
|
||||
}
|
||||
|
||||
// #74: one retry after the reactive URL refresh has had a chance to
|
||||
// run. The proxyA2ARequest's health-check path on a connection error
|
||||
// marks the workspace offline, clears cached keys, and kicks off a
|
||||
@ -360,18 +372,6 @@ func (h *DelegationHandler) executeDelegation(ctx context.Context, sourceID, tar
|
||||
}
|
||||
}
|
||||
|
||||
// When proxyA2ARequest returns an error but we have a non-empty response body
|
||||
// with a 2xx status code, the agent completed the work successfully — the error
|
||||
// is a delivery/transport error (e.g., connection reset after response was
|
||||
// received). Treat as success: the response body is valid and the work is done.
|
||||
// This prevents "retry storms" where the canvas sees error + Restart-workspace
|
||||
// suggestion even though the delegation actually completed.
|
||||
if isDeliveryConfirmedSuccess(proxyErr, status, respBody) {
|
||||
log.Printf("Delegation %s: completed with delivery error (status=%d, respBody=%d bytes, proxyErr=%v) — treating as success",
|
||||
delegationID, status, len(respBody), proxyErr.Error())
|
||||
goto handleSuccess
|
||||
}
|
||||
|
||||
if proxyErr != nil {
|
||||
log.Printf("Delegation %s: step=handling_failure err=%v", delegationID, proxyErr)
|
||||
log.Printf("Delegation %s: failed — %s", delegationID, proxyErr.Error())
|
||||
|
||||
@ -417,11 +417,32 @@ func TestMCPHandler_CommitMemory_LocalScope_Success(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMCPHandler_CommitMemory_GlobalScope_Blocked verifies that C3 is enforced:
|
||||
// GLOBAL scope is not permitted on the MCP bridge.
|
||||
func TestMCPHandler_CommitMemory_GlobalScope_Blocked(t *testing.T) {
|
||||
// TestMCPHandler_CommitMemory_GlobalScope_Blocked_ScrubsInternalError verifies
|
||||
// two contracts at once on the GLOBAL-scope-blocked path:
|
||||
//
|
||||
// 1. C3 invariant (commit_memory with scope=GLOBAL aborts on the MCP bridge
|
||||
// before touching the DB), AND
|
||||
// 2. OFFSEC-001 / #259 scrub contract (commit 7d1a189f): the JSON-RPC error
|
||||
// returned to the client is a CONSTANT — code=-32000, message="tool call
|
||||
// failed" — with the production-internal err.Error() text logged
|
||||
// server-side, never reflected back to the caller.
|
||||
//
|
||||
// Prior to this rename the test asserted that the client-visible message
|
||||
// CONTAINED the substring "GLOBAL", which was the human-readable internal
|
||||
// error from toolCommitMemory. mc#664 Class 2 flipped that assertion the
|
||||
// right way around: now the test FAILS if the scrub regresses (i.e. if the
|
||||
// internal string is ever reflected back to the wire), and PASSES iff the
|
||||
// scrubbed constant reaches the client.
|
||||
//
|
||||
// Coupling note: the constant string "tool call failed" and the code -32000
|
||||
// are the same values asserted by
|
||||
// TestMCPHandler_dispatchRPC_UnknownTool_ReturnsConstantMessage — both are
|
||||
// the OFFSEC-001 contract for the dispatch-failure branch in mcp.go (the
|
||||
// third err.Error() leak that 7d1a189f scrubbed). If those constants ever
|
||||
// change, both tests must move together.
|
||||
func TestMCPHandler_CommitMemory_GlobalScope_Blocked_ScrubsInternalError(t *testing.T) {
|
||||
h, mock := newMCPHandler(t)
|
||||
// No DB expectations — handler must abort before touching the DB.
|
||||
// No DB expectations — handler must abort before touching the DB (C3).
|
||||
|
||||
w := mcpPost(t, h, "ws-1", map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
@ -436,14 +457,53 @@ func TestMCPHandler_CommitMemory_GlobalScope_Blocked(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
// JSON-RPC envelope returns 200 with the error in the body — only
|
||||
// malformed-JSON-at-the-envelope-layer returns 400 (see Call() in mcp.go).
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 (JSON-RPC error in body), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
var resp mcpResponse
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response is not valid JSON: %v", err)
|
||||
}
|
||||
|
||||
// (1) C3: an error must be reported.
|
||||
if resp.Error == nil {
|
||||
t.Error("expected JSON-RPC error for GLOBAL scope, got nil")
|
||||
t.Fatal("expected JSON-RPC error for GLOBAL scope, got nil")
|
||||
}
|
||||
if resp.Error != nil && resp.Error.Message != "tool call failed" {
|
||||
t.Errorf("client error should use the OFFSEC constant message, got: %s", resp.Error.Message)
|
||||
|
||||
// (2) OFFSEC-001 positive assertions — exact equality on the scrubbed
|
||||
// constants so any change (re-leak of err.Error(), code mutation) trips
|
||||
// the test. Substring-match would not catch a partial re-leak.
|
||||
if resp.Error.Code != -32000 {
|
||||
t.Errorf("error code should be -32000 (Server error / dispatch-failure), got: %d", resp.Error.Code)
|
||||
}
|
||||
if resp.Error.Message != "tool call failed" {
|
||||
t.Errorf("error message should be the OFFSEC-001 constant %q, got: %q", "tool call failed", resp.Error.Message)
|
||||
}
|
||||
|
||||
// (3) OFFSEC-001 negative assertions — the internal err.Error() text
|
||||
// from toolCommitMemory ("GLOBAL scope is not permitted via the MCP
|
||||
// bridge — use LOCAL or TEAM") must NOT appear in the client-visible
|
||||
// message. Each token below is a distinct substring of that internal
|
||||
// string; if ANY leaks through, the scrub in mcp.go dispatchRPC has
|
||||
// regressed and this assertion fires the canary.
|
||||
leakedTokens := []string{
|
||||
"GLOBAL", // scope name
|
||||
"scope", // policy lexicon
|
||||
"permitted", // policy verb
|
||||
"bridge", // internal architecture term
|
||||
"LOCAL", // alternative scope name
|
||||
"TEAM", // alternative scope name
|
||||
}
|
||||
for _, tok := range leakedTokens {
|
||||
if bytes.Contains([]byte(resp.Error.Message), []byte(tok)) {
|
||||
t.Errorf("OFFSEC-001 scrub regression: client-visible error.message leaks internal token %q (got: %q)", tok, resp.Error.Message)
|
||||
}
|
||||
}
|
||||
|
||||
// (4) C3 invariant preserved: handler must short-circuit before any DB call.
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unexpected DB calls on GLOBAL scope block: %v", err)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user