From 741737dfadde6a2b9f5185067cb11e9c2f0d6d50 Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 27 May 2026 11:46:14 +0000 Subject: [PATCH 1/3] a2a_proxy: canvas-user requests bypass token validation regardless of live tokens (fixes #1673) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit validateCallerToken checked HasAnyLiveToken before IsSameOriginCanvas. When a canvas-user identity workspace (RFC#637) acquired live tokens, canvas requests fell into the hasLive=true branch, which demands a bearer token the canvas frontend never sends. This produced a silent 401 that dropped the message before logA2AReceiveQueued could write the activity_logs row — breaking canvas chat for all poll-mode workspaces using that identity. Fix: move IsSameOriginCanvas to the top of validateCallerToken so same-origin canvas requests always bypass token validation. Test: add subprocess-based handler test that runs with CANVAS_PROXY_URL set (so canvasProxyActive is true at init) and verifies poll-mode canvas messages are queued even when the caller workspace has live tokens. Closes #1673 Co-Authored-By: Claude Opus 4.7 --- .../internal/handlers/a2a_proxy_helpers.go | 18 +++-- .../internal/handlers/a2a_proxy_test.go | 77 +++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go index ef6317a64..a483b4889 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go @@ -436,6 +436,17 @@ func nilIfEmpty(s string) *string { // On auth failure this writes the 401 via c and returns an error so the // handler aborts without running the proxy. func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (isCanvasUser bool, err error) { + // Same-origin canvas requests are always from the browser UI. They must + // bypass workspace token validation regardless of whether the caller's + // identity workspace has live tokens (RFC#637 canvas-user identity). + // Without this guard, a canvas-user workspace that acquires a token + // falls into the hasLive=true branch below, which demands a bearer + // token the canvas frontend never sends — producing a 401 that silently + // drops the message before it reaches activity_logs (issue #1673). + if middleware.IsSameOriginCanvas(c) { + return true, nil + } + hasLive, dbErr := wsauth.HasAnyLiveToken(ctx, db.DB, callerID) if dbErr != nil { // Fail-open here matches the heartbeat path — A2A caller auth is @@ -446,11 +457,8 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) ( return false, nil } if !hasLive { - // Tokenless workspace — could be legacy/pre-upgrade caller or - // canvas-user identity. Distinguish by request auth signals. - if middleware.IsSameOriginCanvas(c) { - return true, nil - } + // Tokenless workspace — could be legacy/pre-upgrade caller. + // Admin and org tokens are still accepted as canvas-user signals. tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")) if tok != "" { adminSecret := os.Getenv("ADMIN_TOKEN") diff --git a/workspace-server/internal/handlers/a2a_proxy_test.go b/workspace-server/internal/handlers/a2a_proxy_test.go index 947b6f572..26e97a895 100644 --- a/workspace-server/internal/handlers/a2a_proxy_test.go +++ b/workspace-server/internal/handlers/a2a_proxy_test.go @@ -11,6 +11,7 @@ import ( "net/http" "net/http/httptest" "os" + "os/exec" "strings" "testing" "time" @@ -2341,6 +2342,82 @@ func TestProxyA2A_PollMode_ShortCircuits_NoSSRF_NoDispatch(t *testing.T) { } } +// TestProxyA2A_PollMode_CanvasUserWithLiveToken verifies the fix for issue +// #1673: when a canvas-user identity workspace (RFC#637) has live tokens, +// validateCallerToken must still treat same-origin canvas requests as canvas +// users. Previously the hasLive=true branch demanded a bearer token the canvas +// frontend never sends, causing a 401 that silently dropped the message before +// logA2AReceiveQueued could write the activity row. +// +// This test runs in a subprocess with CANVAS_PROXY_URL set so that +// middleware.canvasProxyActive is true at package init time. +func TestProxyA2A_PollMode_CanvasUserWithLiveToken(t *testing.T) { + if os.Getenv("CANVAS_PROXY_URL") == "" { + cmd := exec.Command(os.Args[0], "-test.run=^TestProxyA2A_PollMode_CanvasUserWithLiveToken$") + cmd.Env = append(os.Environ(), "CANVAS_PROXY_URL=http://localhost") + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("subprocess test failed: %v\n%s", err, out) + } + return + } + + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + const wsTarget = "ws-poll-canvas" + const wsCanvasUser = "ws-canvas-user-344a" + + expectBudgetCheck(mock, wsTarget) + + // The caller workspace has live tokens, but because IsSameOriginCanvas + // is true the fix short-circuits BEFORE HasAnyLiveToken is queried. + // No SELECT COUNT(*) expectation here — that is the regression path. + + mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id"). + WithArgs(wsTarget). + WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll")) + + // Activity log: the queued receive must still fire. + mock.ExpectExec("INSERT INTO activity_logs"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: wsTarget}} + + body := `{"jsonrpc":"2.0","id":"canvas-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hello from canvas"}]}}}` + req := httptest.NewRequest("POST", "/workspaces/"+wsTarget+"/a2a", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Workspace-ID", wsCanvasUser) + // Same-origin headers so IsSameOriginCanvas returns true. + req.Host = "localhost" + req.Header.Set("Referer", "https://localhost/") + c.Request = req + + handler.ProxyA2A(c) + + time.Sleep(50 * time.Millisecond) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200 (queued), got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("response is not valid JSON: %v", err) + } + if resp["status"] != "queued" { + t.Errorf("response.status = %v, want %q", resp["status"], "queued") + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // TestProxyA2A_PushMode_NoShortCircuit verifies the symmetric contract: // a push-mode workspace (default) is NOT affected by the new short-circuit. // It still proceeds to resolveAgentURL + dispatch. Without this guard, a -- 2.52.0 From f390d1026d55c384b065dff3c2eaedd507f4479b Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 27 May 2026 13:43:15 +0000 Subject: [PATCH 2/3] fix(a2a): classify canvas-user by delivery_mode, not forgeable Origin (#1944) Reverts the top-level IsSameOriginCanvas bypass in validateCallerToken that allowed any push-mode peer to skip bearer-token checks with a forged Origin/Referer header (SECURITY HOLD). Option (c) per PM recommendation: root-fix canvas-user identity classification. - lookupDeliveryMode is now called BEFORE HasAnyLiveToken. - Poll-mode workspaces are canvas-user identities; they bypass the hasLive+bearer gate (canvas never sends bearer tokens). - Push-mode workspaces always go through the standard bearer-token validation, regardless of Origin/Referer. Fixes #1673 without opening the bypass vulnerability. Co-Authored-By: Claude Opus 4.7 --- .../internal/handlers/a2a_proxy_helpers.go | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go index a483b4889..3e4e61376 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go @@ -436,15 +436,35 @@ func nilIfEmpty(s string) *string { // On auth failure this writes the 401 via c and returns an error so the // handler aborts without running the proxy. func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (isCanvasUser bool, err error) { - // Same-origin canvas requests are always from the browser UI. They must - // bypass workspace token validation regardless of whether the caller's - // identity workspace has live tokens (RFC#637 canvas-user identity). - // Without this guard, a canvas-user workspace that acquires a token - // falls into the hasLive=true branch below, which demands a bearer - // token the canvas frontend never sends — producing a 401 that silently - // drops the message before it reaches activity_logs (issue #1673). - if middleware.IsSameOriginCanvas(c) { - return true, nil + // RFC#637 canvas-user identity classification: poll-mode workspaces are + // human operators in the browser, not peer agents. They must bypass + // workspace token validation regardless of whether the identity workspace + // has live tokens, because the canvas frontend never sends a bearer token. + // This check MUST happen before HasAnyLiveToken so a token-acquired + // poll-mode workspace doesn't fall into the hasLive=true branch and 401 + // (issue #1673). Security: only poll-mode workspaces get this bypass; + // push-mode peers always go through the standard bearer-token gate below. + // A forgeable Origin/Referer on a push-mode peer can no longer skip auth. + deliveryMode, _ := lookupDeliveryMode(ctx, callerID) + if deliveryMode == models.DeliveryModePoll { + // Poll-mode canvas-user — validate same-origin, admin, or org token. + if middleware.IsSameOriginCanvas(c) { + return true, nil + } + tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")) + if tok != "" { + adminSecret := os.Getenv("ADMIN_TOKEN") + if adminSecret != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(adminSecret)) == 1 { + return true, nil + } + if _, _, _, err := orgtoken.Validate(ctx, db.DB, tok); err == nil { + return true, nil + } + } + // Poll-mode without canvas or admin/org auth: treat as legacy + // tokenless caller (same as the pre-upgrade path). This preserves + // backward compatibility for operators using direct API calls. + return false, nil } hasLive, dbErr := wsauth.HasAnyLiveToken(ctx, db.DB, callerID) -- 2.52.0 From 7c0f1bc12884f50c180ab8c9a9eff939112c6a8c Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Wed, 27 May 2026 13:58:35 +0000 Subject: [PATCH 3/3] test(a2a_proxy): update poll-mode canvas-user test for delivery_mode classification TestProxyA2A_PollMode_CanvasUserWithLiveToken now mocks the caller's delivery_mode lookup (validateCallerToken queries it before HasAnyLiveToken per the option-c security fix). Without this mock, lookupDeliveryMode fails open to push, the caller falls into the hasLive branch, and the test gets 403 instead of 200 queued. Co-Authored-By: Claude Opus 4.7 --- workspace-server/internal/handlers/a2a_proxy_test.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/workspace-server/internal/handlers/a2a_proxy_test.go b/workspace-server/internal/handlers/a2a_proxy_test.go index 26e97a895..e15eda6c2 100644 --- a/workspace-server/internal/handlers/a2a_proxy_test.go +++ b/workspace-server/internal/handlers/a2a_proxy_test.go @@ -2370,12 +2370,16 @@ func TestProxyA2A_PollMode_CanvasUserWithLiveToken(t *testing.T) { const wsTarget = "ws-poll-canvas" const wsCanvasUser = "ws-canvas-user-344a" + // validateCallerToken now classifies by delivery_mode BEFORE HasAnyLiveToken. + // The caller is a poll-mode canvas-user identity, so it must bypass the + // hasLive+bearer gate entirely (issue #1673 / option-c security fix). + mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id"). + WithArgs(wsCanvasUser). + WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll")) + expectBudgetCheck(mock, wsTarget) - // The caller workspace has live tokens, but because IsSameOriginCanvas - // is true the fix short-circuits BEFORE HasAnyLiveToken is queried. - // No SELECT COUNT(*) expectation here — that is the regression path. - + // Target workspace is also poll-mode → short-circuit to queued receive. mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id"). WithArgs(wsTarget). WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll")) -- 2.52.0