a2a_proxy: canvas-user requests bypass token validation regardless of live tokens (fixes #1673) #1944

Closed
agent-pm wants to merge 3 commits from fix/canvas-chat-poll-mode-1673 into main
2 changed files with 114 additions and 5 deletions
@@ -436,6 +436,37 @@ func nilIfEmpty(s string) *string {
// On auth failure this writes the 401 via c and returns an error so the
// handler aborts without running the proxy.
func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (isCanvasUser bool, err error) {
// RFC#637 canvas-user identity classification: poll-mode workspaces are
// human operators in the browser, not peer agents. They must bypass
// workspace token validation regardless of whether the identity workspace
// has live tokens, because the canvas frontend never sends a bearer token.
// This check MUST happen before HasAnyLiveToken so a token-acquired
// poll-mode workspace doesn't fall into the hasLive=true branch and 401
// (issue #1673). Security: only poll-mode workspaces get this bypass;
// push-mode peers always go through the standard bearer-token gate below.
// A forgeable Origin/Referer on a push-mode peer can no longer skip auth.
deliveryMode, _ := lookupDeliveryMode(ctx, callerID)
if deliveryMode == models.DeliveryModePoll {
// Poll-mode canvas-user — validate same-origin, admin, or org token.
if middleware.IsSameOriginCanvas(c) {
return true, nil
}
tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
if tok != "" {
adminSecret := os.Getenv("ADMIN_TOKEN")
if adminSecret != "" && subtle.ConstantTimeCompare([]byte(tok), []byte(adminSecret)) == 1 {
return true, nil
}
if _, _, _, err := orgtoken.Validate(ctx, db.DB, tok); err == nil {
return true, nil
}
}
// Poll-mode without canvas or admin/org auth: treat as legacy
// tokenless caller (same as the pre-upgrade path). This preserves
// backward compatibility for operators using direct API calls.
return false, nil
}
hasLive, dbErr := wsauth.HasAnyLiveToken(ctx, db.DB, callerID)
if dbErr != nil {
// Fail-open here matches the heartbeat path — A2A caller auth is
@@ -446,11 +477,8 @@ func validateCallerToken(ctx context.Context, c *gin.Context, callerID string) (
return false, nil
}
if !hasLive {
// Tokenless workspace — could be legacy/pre-upgrade caller or
// canvas-user identity. Distinguish by request auth signals.
if middleware.IsSameOriginCanvas(c) {
return true, nil
}
// Tokenless workspace — could be legacy/pre-upgrade caller.
// Admin and org tokens are still accepted as canvas-user signals.
tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization"))
if tok != "" {
adminSecret := os.Getenv("ADMIN_TOKEN")
@@ -11,6 +11,7 @@ import (
"net/http"
"net/http/httptest"
"os"
"os/exec"
"strings"
"testing"
"time"
@@ -2341,6 +2342,86 @@ func TestProxyA2A_PollMode_ShortCircuits_NoSSRF_NoDispatch(t *testing.T) {
}
}
// TestProxyA2A_PollMode_CanvasUserWithLiveToken verifies the fix for issue
// #1673: when a canvas-user identity workspace (RFC#637) has live tokens,
// validateCallerToken must still treat same-origin canvas requests as canvas
// users. Previously the hasLive=true branch demanded a bearer token the canvas
// frontend never sends, causing a 401 that silently dropped the message before
// logA2AReceiveQueued could write the activity row.
//
// This test runs in a subprocess with CANVAS_PROXY_URL set so that
// middleware.canvasProxyActive is true at package init time.
func TestProxyA2A_PollMode_CanvasUserWithLiveToken(t *testing.T) {
if os.Getenv("CANVAS_PROXY_URL") == "" {
cmd := exec.Command(os.Args[0], "-test.run=^TestProxyA2A_PollMode_CanvasUserWithLiveToken$")
cmd.Env = append(os.Environ(), "CANVAS_PROXY_URL=http://localhost")
out, err := cmd.CombinedOutput()
if err != nil {
t.Fatalf("subprocess test failed: %v\n%s", err, out)
}
return
}
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
const wsTarget = "ws-poll-canvas"
const wsCanvasUser = "ws-canvas-user-344a"
// validateCallerToken now classifies by delivery_mode BEFORE HasAnyLiveToken.
// The caller is a poll-mode canvas-user identity, so it must bypass the
// hasLive+bearer gate entirely (issue #1673 / option-c security fix).
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
WithArgs(wsCanvasUser).
WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll"))
expectBudgetCheck(mock, wsTarget)
// Target workspace is also poll-mode → short-circuit to queued receive.
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
WithArgs(wsTarget).
WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll"))
// Activity log: the queued receive must still fire.
mock.ExpectExec("INSERT INTO activity_logs").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: wsTarget}}
body := `{"jsonrpc":"2.0","id":"canvas-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"hello from canvas"}]}}}`
req := httptest.NewRequest("POST", "/workspaces/"+wsTarget+"/a2a", bytes.NewBufferString(body))
req.Header.Set("Content-Type", "application/json")
req.Header.Set("X-Workspace-ID", wsCanvasUser)
// Same-origin headers so IsSameOriginCanvas returns true.
req.Host = "localhost"
req.Header.Set("Referer", "https://localhost/")
c.Request = req
handler.ProxyA2A(c)
time.Sleep(50 * time.Millisecond)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 (queued), got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("response is not valid JSON: %v", err)
}
if resp["status"] != "queued" {
t.Errorf("response.status = %v, want %q", resp["status"], "queued")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestProxyA2A_PushMode_NoShortCircuit verifies the symmetric contract:
// a push-mode workspace (default) is NOT affected by the new short-circuit.
// It still proceeds to resolveAgentURL + dispatch. Without this guard, a