diff --git a/workspace-server/internal/handlers/a2a_proxy_helpers.go b/workspace-server/internal/handlers/a2a_proxy_helpers.go index ef6317a64..3e4e61376 100644 --- a/workspace-server/internal/handlers/a2a_proxy_helpers.go +++ b/workspace-server/internal/handlers/a2a_proxy_helpers.go @@ -436,6 +436,37 @@ func nilIfEmpty(s string) *string { // On auth failure this writes the 401 via c and returns an error so the // handler aborts without running the proxy. func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (isCanvasUser bool, err error) { + // RFC#637 canvas-user identity classification: poll-mode workspaces are + // human operators in the browser, not peer agents. They must bypass + // workspace token validation regardless of whether the identity workspace + // has live tokens, because the canvas frontend never sends a bearer token. + // This check MUST happen before HasAnyLiveToken so a token-acquired + // poll-mode workspace doesn't fall into the hasLive=true branch and 401 + // (issue #1673). Security: only poll-mode workspaces get this bypass; + // push-mode peers always go through the standard bearer-token gate below. + // A forgeable Origin/Referer on a push-mode peer can no longer skip auth. + deliveryMode, _ := lookupDeliveryMode(ctx, callerID) + if deliveryMode == models.DeliveryModePoll { + // Poll-mode canvas-user — validate same-origin, admin, or org token. + if middleware.IsSameOriginCanvas(c) { + return true, nil + } + tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")) + if tok != "" { + adminSecret := os.Getenv("ADMIN_TOKEN") + if adminSecret != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(adminSecret)) == 1 { + return true, nil + } + if _, _, _, err := orgtoken.Validate(ctx, db.DB, tok); err == nil { + return true, nil + } + } + // Poll-mode without canvas or admin/org auth: treat as legacy + // tokenless caller (same as the pre-upgrade path). This preserves + // backward compatibility for operators using direct API calls. + return false, nil + } + hasLive, dbErr := wsauth.HasAnyLiveToken(ctx, db.DB, callerID) if dbErr != nil { // Fail-open here matches the heartbeat path — A2A caller auth is @@ -446,11 +477,8 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) ( return false, nil } if !hasLive { - // Tokenless workspace — could be legacy/pre-upgrade caller or - // canvas-user identity. Distinguish by request auth signals. - if middleware.IsSameOriginCanvas(c) { - return true, nil - } + // Tokenless workspace — could be legacy/pre-upgrade caller. + // Admin and org tokens are still accepted as canvas-user signals. tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")) if tok != "" { adminSecret := os.Getenv("ADMIN_TOKEN") diff --git a/workspace-server/internal/handlers/a2a_proxy_test.go b/workspace-server/internal/handlers/a2a_proxy_test.go index 947b6f572..e15eda6c2 100644 --- a/workspace-server/internal/handlers/a2a_proxy_test.go +++ b/workspace-server/internal/handlers/a2a_proxy_test.go @@ -11,6 +11,7 @@ import ( "net/http" "net/http/httptest" "os" + "os/exec" "strings" "testing" "time" @@ -2341,6 +2342,86 @@ func TestProxyA2A_PollMode_ShortCircuits_NoSSRF_NoDispatch(t *testing.T) { } } +// TestProxyA2A_PollMode_CanvasUserWithLiveToken verifies the fix for issue +// #1673: when a canvas-user identity workspace (RFC#637) has live tokens, +// validateCallerToken must still treat same-origin canvas requests as canvas +// users. Previously the hasLive=true branch demanded a bearer token the canvas +// frontend never sends, causing a 401 that silently dropped the message before +// logA2AReceiveQueued could write the activity row. +// +// This test runs in a subprocess with CANVAS_PROXY_URL set so that +// middleware.canvasProxyActive is true at package init time. +func TestProxyA2A_PollMode_CanvasUserWithLiveToken(t *testing.T) { + if os.Getenv("CANVAS_PROXY_URL") == "" { + cmd := exec.Command(os.Args[0], "-test.run=^TestProxyA2A_PollMode_CanvasUserWithLiveToken$") + cmd.Env = append(os.Environ(), "CANVAS_PROXY_URL=http://localhost") + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("subprocess test failed: %v\n%s", err, out) + } + return + } + + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir()) + + const wsTarget = "ws-poll-canvas" + const wsCanvasUser = "ws-canvas-user-344a" + + // validateCallerToken now classifies by delivery_mode BEFORE HasAnyLiveToken. + // The caller is a poll-mode canvas-user identity, so it must bypass the + // hasLive+bearer gate entirely (issue #1673 / option-c security fix). + mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id"). + WithArgs(wsCanvasUser). + WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll")) + + expectBudgetCheck(mock, wsTarget) + + // Target workspace is also poll-mode → short-circuit to queued receive. + mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id"). + WithArgs(wsTarget). + WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll")) + + // Activity log: the queued receive must still fire. + mock.ExpectExec("INSERT INTO activity_logs"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: wsTarget}} + + body := `{"jsonrpc":"2.0","id":"canvas-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hello from canvas"}]}}}` + req := httptest.NewRequest("POST", "/workspaces/"+wsTarget+"/a2a", bytes.NewBufferString(body)) + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Workspace-ID", wsCanvasUser) + // Same-origin headers so IsSameOriginCanvas returns true. + req.Host = "localhost" + req.Header.Set("Referer", "https://localhost/") + c.Request = req + + handler.ProxyA2A(c) + + time.Sleep(50 * time.Millisecond) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200 (queued), got %d: %s", w.Code, w.Body.String()) + } + + var resp map[string]interface{} + if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil { + t.Fatalf("response is not valid JSON: %v", err) + } + if resp["status"] != "queued" { + t.Errorf("response.status = %v, want %q", resp["status"], "queued") + } + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + // TestProxyA2A_PushMode_NoShortCircuit verifies the symmetric contract: // a push-mode workspace (default) is NOT affected by the new short-circuit. // It still proceeds to resolveAgentURL + dispatch. Without this guard, a