fix(registry#2970): fail-closed platform-agent register gate on missing MODEL secret #2973

Merged
devops-engineer merged 1 commits from fix/2970-concierge-register-model-gate into main 2026-06-15 22:30:15 +00:00
2 changed files with 111 additions and 0 deletions
@@ -353,6 +353,33 @@ func isPlatformTunnelHostname(h string) bool {
return strings.HasSuffix(h, "."+domain)
}
// platformAgentHasModelSecret reports whether the workspace has a MODEL
// workspace_secret. The concierge's declared model is seeded by
// ensureConciergeModel before every platform-agent provision; a platform agent
// that reaches registration without this secret has not received its identity
// and must not be marked online.
func (h *RegistryHandler) platformAgentHasModelSecret(ctx context.Context, workspaceID string) (bool, error) {
var exists bool
err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL')`,
workspaceID).Scan(&exists)
return exists, err
}
// markWorkspaceFailed updates a workspace row to status='failed' and broadcasts
// WORKSPACE_PROVISION_FAILED. It is a RegistryHandler-local fallback for the
// fail-closed platform-agent identity gate; the WorkspaceHandler's
// markProvisionFailed is the primary path during provisioning.
func (h *RegistryHandler) markWorkspaceFailed(ctx context.Context, workspaceID, msg string) {
extra := map[string]interface{}{"error": msg, "code": "PLATFORM_AGENT_IDENTITY_GATE"}
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), workspaceID, extra)
if _, dbErr := db.DB.ExecContext(ctx,
`UPDATE workspaces SET status = $3, last_sample_error = $2, updated_at = now() WHERE id = $1`,
workspaceID, msg, models.StatusFailed); dbErr != nil {
log.Printf("markWorkspaceFailed: db update failed for %s: %v", workspaceID, dbErr)
}
}
// Register handles POST /registry/register
// Upserts workspace, sets Redis TTL, broadcasts WORKSPACE_ONLINE.
func (h *RegistryHandler) Register(c *gin.Context) {
@@ -484,6 +511,33 @@ func (h *RegistryHandler) Register(c *gin.Context) {
return
}
// Issue #2970: fail CLOSED if a platform agent reaches registration without
// the seeded MODEL workspace_secret. The MISSING_MODEL gate in
// prepareProvisionContext is the primary defense, but if a model-less/identity-
// less concierge somehow boots on a path that bypasses that gate (e.g. an old
// or generic image), this second-layer guard prevents it from ever marking
// itself online-routable. Instead we mark the workspace failed so the canvas
// surfaces a provision failure rather than serving users a generic Claude Code.
//
// existingState.ExistingKind is populated by fetchExistingWorkspaceStateForDiagnostics
// (best-effort). We treat "platform" literally; any other value (including "(new)"
// or "(unavailable)") means the gate does not apply unless payload.Kind itself is
// "platform" (covered by the privilege-escalation precheck above).
if payload.Kind == models.KindPlatform || existingState.ExistingKind == models.KindPlatform {
if hasModel, mErr := h.platformAgentHasModelSecret(ctx, payload.ID); mErr != nil {
log.Printf("Registry register: model secret lookup failed for %s: %v", payload.ID, mErr)
c.JSON(http.StatusInternalServerError, gin.H{"error": "registration failed"})
return
} else if !hasModel {
msg := "platform agent registered without a seeded MODEL secret; refusing online"
log.Printf("Registry register: %s (workspace=%s)", msg, payload.ID)
h.markWorkspaceFailed(ctx, payload.ID, msg)
logRegister400Reason("platform_agent_model_missing", payload.ID, payload, existingState, msg)
c.JSON(http.StatusBadRequest, gin.H{"error": "platform agent identity incomplete"})
return
}
}
// URL handling diverges by mode:
// push: URL is required and must pass the SSRF safety check —
// same as pre-#2339 behavior (the workspace must be reachable for
@@ -2014,6 +2014,12 @@ func TestRegister_AllowsAlreadyPlatformReRegister(t *testing.T) {
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
// Issue #2970: platform-agent identity gate — payload.kind="platform", so we
// verify the seeded MODEL workspace_secret exists before marking online.
mock.ExpectQuery("SELECT EXISTS\\(SELECT 1 FROM workspace_secrets WHERE workspace_id = \\$1 AND key = 'MODEL'\\)").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// delivery_mode="push" is set explicitly, so resolveDeliveryMode
// short-circuits (no SELECT delivery_mode lookup). The upsert MUST carry
// kind="platform" as the 6th arg.
@@ -2130,6 +2136,57 @@ func TestRegister_RejectsPlatformPromotion(t *testing.T) {
}
}
// TestRegister_PlatformAgentMissingModelSecret_FailsClosed guards issue #2970:
// a platform agent that reaches /registry/register without a seeded MODEL
// workspace_secret must NOT be marked online. Instead the workspace is marked
// 'failed' and the register call returns 400, so a generic/model-less concierge
// cannot serve users.
func TestRegister_PlatformAgentMissingModelSecret_FailsClosed(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewRegistryHandler(broadcaster)
const wsID = "ws-platform-no-model"
// Bootstrap path — no live tokens.
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
// kind precheck: existing row is kind="platform".
mock.ExpectQuery("SELECT kind FROM workspaces WHERE id").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
// Identity gate: payload.kind="platform" → check MODEL secret → absent.
mock.ExpectQuery("SELECT EXISTS\\(SELECT 1 FROM workspace_secrets WHERE workspace_id = \\$1 AND key = 'MODEL'\\)").
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
// Gate failure broadcasts WORKSPACE_PROVISION_FAILED and marks the row failed.
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("UPDATE workspaces SET status = \\$3, last_sample_error = \\$2, updated_at = now\\(\\) WHERE id = \\$1").
WithArgs(wsID, sqlmock.AnyArg(), models.StatusFailed).
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("POST", "/registry/register",
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","delivery_mode":"push","kind":"platform","agent_card":{"name":"concierge"}}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.Register(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("platform agent missing MODEL secret: expected 400, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet expectations: %v", err)
}
}
// TestRegister_PollMode_PreservesExistingValue: when the row already
// has delivery_mode=poll and the payload doesn't set it, the resolved
// mode should be poll — i.e. "absent payload mode" must NOT silently