From 07448d132f3ae75682d8182657e63802a8b1992f Mon Sep 17 00:00:00 2001 From: "Molecule AI Dev Engineer A (Kimi)" Date: Mon, 15 Jun 2026 22:21:58 +0000 Subject: [PATCH] fix(registry#2970): fail-closed platform-agent register gate on missing MODEL secret A platform agent (concierge) that reaches /registry/register without a seeded MODEL workspace_secret must not be marked online. The MISSING_MODEL gate in prepareProvisionContext is the primary defense, but if a model-less/ identity-less concierge somehow boots on a path that bypasses that gate, this second-layer guard marks the workspace failed instead of letting it serve users as generic Claude Code. - Add platformAgentHasModelSecret + markWorkspaceFailed helpers. - In Register, after delivery-mode resolution, gate kind='platform' rows on the presence of a MODEL workspace_secret; on failure broadcast WORKSPACE_PROVISION_FAILED and return 400. - Use existingState.ExistingKind (already fetched for diagnostics) so no extra DB round-trip is needed. - Add/update tests. Refs #2970 track 2. Does not close the deployment/identity track; that is handled by the #2955 image-entrypoint work. Co-Authored-By: Claude --- .../internal/handlers/registry.go | 54 ++++++++++++++++++ .../internal/handlers/registry_test.go | 57 +++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/workspace-server/internal/handlers/registry.go b/workspace-server/internal/handlers/registry.go index 026dd74b3..918ede892 100644 --- a/workspace-server/internal/handlers/registry.go +++ b/workspace-server/internal/handlers/registry.go @@ -353,6 +353,33 @@ func isPlatformTunnelHostname(h string) bool { return strings.HasSuffix(h, "."+domain) } +// platformAgentHasModelSecret reports whether the workspace has a MODEL +// workspace_secret. The concierge's declared model is seeded by +// ensureConciergeModel before every platform-agent provision; a platform agent +// that reaches registration without this secret has not received its identity +// and must not be marked online. +func (h *RegistryHandler) platformAgentHasModelSecret(ctx context.Context, workspaceID string) (bool, error) { + var exists bool + err := db.DB.QueryRowContext(ctx, + `SELECT EXISTS(SELECT 1 FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL')`, + workspaceID).Scan(&exists) + return exists, err +} + +// markWorkspaceFailed updates a workspace row to status='failed' and broadcasts +// WORKSPACE_PROVISION_FAILED. It is a RegistryHandler-local fallback for the +// fail-closed platform-agent identity gate; the WorkspaceHandler's +// markProvisionFailed is the primary path during provisioning. +func (h *RegistryHandler) markWorkspaceFailed(ctx context.Context, workspaceID, msg string) { + extra := map[string]interface{}{"error": msg, "code": "PLATFORM_AGENT_IDENTITY_GATE"} + h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), workspaceID, extra) + if _, dbErr := db.DB.ExecContext(ctx, + `UPDATE workspaces SET status = $3, last_sample_error = $2, updated_at = now() WHERE id = $1`, + workspaceID, msg, models.StatusFailed); dbErr != nil { + log.Printf("markWorkspaceFailed: db update failed for %s: %v", workspaceID, dbErr) + } +} + // Register handles POST /registry/register // Upserts workspace, sets Redis TTL, broadcasts WORKSPACE_ONLINE. func (h *RegistryHandler) Register(c *gin.Context) { @@ -484,6 +511,33 @@ func (h *RegistryHandler) Register(c *gin.Context) { return } + // Issue #2970: fail CLOSED if a platform agent reaches registration without + // the seeded MODEL workspace_secret. The MISSING_MODEL gate in + // prepareProvisionContext is the primary defense, but if a model-less/identity- + // less concierge somehow boots on a path that bypasses that gate (e.g. an old + // or generic image), this second-layer guard prevents it from ever marking + // itself online-routable. Instead we mark the workspace failed so the canvas + // surfaces a provision failure rather than serving users a generic Claude Code. + // + // existingState.ExistingKind is populated by fetchExistingWorkspaceStateForDiagnostics + // (best-effort). We treat "platform" literally; any other value (including "(new)" + // or "(unavailable)") means the gate does not apply unless payload.Kind itself is + // "platform" (covered by the privilege-escalation precheck above). + if payload.Kind == models.KindPlatform || existingState.ExistingKind == models.KindPlatform { + if hasModel, mErr := h.platformAgentHasModelSecret(ctx, payload.ID); mErr != nil { + log.Printf("Registry register: model secret lookup failed for %s: %v", payload.ID, mErr) + c.JSON(http.StatusInternalServerError, gin.H{"error": "registration failed"}) + return + } else if !hasModel { + msg := "platform agent registered without a seeded MODEL secret; refusing online" + log.Printf("Registry register: %s (workspace=%s)", msg, payload.ID) + h.markWorkspaceFailed(ctx, payload.ID, msg) + logRegister400Reason("platform_agent_model_missing", payload.ID, payload, existingState, msg) + c.JSON(http.StatusBadRequest, gin.H{"error": "platform agent identity incomplete"}) + return + } + } + // URL handling diverges by mode: // push: URL is required and must pass the SSRF safety check — // same as pre-#2339 behavior (the workspace must be reachable for diff --git a/workspace-server/internal/handlers/registry_test.go b/workspace-server/internal/handlers/registry_test.go index 4f6fb6bac..252333f0d 100644 --- a/workspace-server/internal/handlers/registry_test.go +++ b/workspace-server/internal/handlers/registry_test.go @@ -2014,6 +2014,12 @@ func TestRegister_AllowsAlreadyPlatformReRegister(t *testing.T) { WithArgs(wsID). WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform")) + // Issue #2970: platform-agent identity gate — payload.kind="platform", so we + // verify the seeded MODEL workspace_secret exists before marking online. + mock.ExpectQuery("SELECT EXISTS\\(SELECT 1 FROM workspace_secrets WHERE workspace_id = \\$1 AND key = 'MODEL'\\)"). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true)) + // delivery_mode="push" is set explicitly, so resolveDeliveryMode // short-circuits (no SELECT delivery_mode lookup). The upsert MUST carry // kind="platform" as the 6th arg. @@ -2130,6 +2136,57 @@ func TestRegister_RejectsPlatformPromotion(t *testing.T) { } } +// TestRegister_PlatformAgentMissingModelSecret_FailsClosed guards issue #2970: +// a platform agent that reaches /registry/register without a seeded MODEL +// workspace_secret must NOT be marked online. Instead the workspace is marked +// 'failed' and the register call returns 400, so a generic/model-less concierge +// cannot serve users. +func TestRegister_PlatformAgentMissingModelSecret_FailsClosed(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewRegistryHandler(broadcaster) + + const wsID = "ws-platform-no-model" + + // Bootstrap path — no live tokens. + mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens"). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0)) + + // kind precheck: existing row is kind="platform". + mock.ExpectQuery("SELECT kind FROM workspaces WHERE id"). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform")) + + // Identity gate: payload.kind="platform" → check MODEL secret → absent. + mock.ExpectQuery("SELECT EXISTS\\(SELECT 1 FROM workspace_secrets WHERE workspace_id = \\$1 AND key = 'MODEL'\\)"). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false)) + + // Gate failure broadcasts WORKSPACE_PROVISION_FAILED and marks the row failed. + mock.ExpectExec("INSERT INTO structure_events"). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec("UPDATE workspaces SET status = \\$3, last_sample_error = \\$2, updated_at = now\\(\\) WHERE id = \\$1"). + WithArgs(wsID, sqlmock.AnyArg(), models.StatusFailed). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Request = httptest.NewRequest("POST", "/registry/register", + bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","delivery_mode":"push","kind":"platform","agent_card":{"name":"concierge"}}`)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Register(c) + + if w.Code != http.StatusBadRequest { + t.Fatalf("platform agent missing MODEL secret: expected 400, got %d: %s", w.Code, w.Body.String()) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet expectations: %v", err) + } +} + // TestRegister_PollMode_PreservesExistingValue: when the row already // has delivery_mode=poll and the payload doesn't set it, the resolved // mode should be poll — i.e. "absent payload mode" must NOT silently -- 2.52.0