fix(registry#2970): fail-closed platform-agent register gate on missing MODEL secret #2973
@@ -353,6 +353,33 @@ func isPlatformTunnelHostname(h string) bool {
|
||||
return strings.HasSuffix(h, "."+domain)
|
||||
}
|
||||
|
||||
// platformAgentHasModelSecret reports whether the workspace has a MODEL
|
||||
// workspace_secret. The concierge's declared model is seeded by
|
||||
// ensureConciergeModel before every platform-agent provision; a platform agent
|
||||
// that reaches registration without this secret has not received its identity
|
||||
// and must not be marked online.
|
||||
func (h *RegistryHandler) platformAgentHasModelSecret(ctx context.Context, workspaceID string) (bool, error) {
|
||||
var exists bool
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT EXISTS(SELECT 1 FROM workspace_secrets WHERE workspace_id = $1 AND key = 'MODEL')`,
|
||||
workspaceID).Scan(&exists)
|
||||
return exists, err
|
||||
}
|
||||
|
||||
// markWorkspaceFailed updates a workspace row to status='failed' and broadcasts
|
||||
// WORKSPACE_PROVISION_FAILED. It is a RegistryHandler-local fallback for the
|
||||
// fail-closed platform-agent identity gate; the WorkspaceHandler's
|
||||
// markProvisionFailed is the primary path during provisioning.
|
||||
func (h *RegistryHandler) markWorkspaceFailed(ctx context.Context, workspaceID, msg string) {
|
||||
extra := map[string]interface{}{"error": msg, "code": "PLATFORM_AGENT_IDENTITY_GATE"}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), workspaceID, extra)
|
||||
if _, dbErr := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET status = $3, last_sample_error = $2, updated_at = now() WHERE id = $1`,
|
||||
workspaceID, msg, models.StatusFailed); dbErr != nil {
|
||||
log.Printf("markWorkspaceFailed: db update failed for %s: %v", workspaceID, dbErr)
|
||||
}
|
||||
}
|
||||
|
||||
// Register handles POST /registry/register
|
||||
// Upserts workspace, sets Redis TTL, broadcasts WORKSPACE_ONLINE.
|
||||
func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
@@ -484,6 +511,33 @@ func (h *RegistryHandler) Register(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Issue #2970: fail CLOSED if a platform agent reaches registration without
|
||||
// the seeded MODEL workspace_secret. The MISSING_MODEL gate in
|
||||
// prepareProvisionContext is the primary defense, but if a model-less/identity-
|
||||
// less concierge somehow boots on a path that bypasses that gate (e.g. an old
|
||||
// or generic image), this second-layer guard prevents it from ever marking
|
||||
// itself online-routable. Instead we mark the workspace failed so the canvas
|
||||
// surfaces a provision failure rather than serving users a generic Claude Code.
|
||||
//
|
||||
// existingState.ExistingKind is populated by fetchExistingWorkspaceStateForDiagnostics
|
||||
// (best-effort). We treat "platform" literally; any other value (including "(new)"
|
||||
// or "(unavailable)") means the gate does not apply unless payload.Kind itself is
|
||||
// "platform" (covered by the privilege-escalation precheck above).
|
||||
if payload.Kind == models.KindPlatform || existingState.ExistingKind == models.KindPlatform {
|
||||
if hasModel, mErr := h.platformAgentHasModelSecret(ctx, payload.ID); mErr != nil {
|
||||
log.Printf("Registry register: model secret lookup failed for %s: %v", payload.ID, mErr)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "registration failed"})
|
||||
return
|
||||
} else if !hasModel {
|
||||
msg := "platform agent registered without a seeded MODEL secret; refusing online"
|
||||
log.Printf("Registry register: %s (workspace=%s)", msg, payload.ID)
|
||||
h.markWorkspaceFailed(ctx, payload.ID, msg)
|
||||
logRegister400Reason("platform_agent_model_missing", payload.ID, payload, existingState, msg)
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "platform agent identity incomplete"})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// URL handling diverges by mode:
|
||||
// push: URL is required and must pass the SSRF safety check —
|
||||
// same as pre-#2339 behavior (the workspace must be reachable for
|
||||
|
||||
@@ -2014,6 +2014,12 @@ func TestRegister_AllowsAlreadyPlatformReRegister(t *testing.T) {
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
|
||||
|
||||
// Issue #2970: platform-agent identity gate — payload.kind="platform", so we
|
||||
// verify the seeded MODEL workspace_secret exists before marking online.
|
||||
mock.ExpectQuery("SELECT EXISTS\\(SELECT 1 FROM workspace_secrets WHERE workspace_id = \\$1 AND key = 'MODEL'\\)").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
|
||||
// delivery_mode="push" is set explicitly, so resolveDeliveryMode
|
||||
// short-circuits (no SELECT delivery_mode lookup). The upsert MUST carry
|
||||
// kind="platform" as the 6th arg.
|
||||
@@ -2130,6 +2136,57 @@ func TestRegister_RejectsPlatformPromotion(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegister_PlatformAgentMissingModelSecret_FailsClosed guards issue #2970:
|
||||
// a platform agent that reaches /registry/register without a seeded MODEL
|
||||
// workspace_secret must NOT be marked online. Instead the workspace is marked
|
||||
// 'failed' and the register call returns 400, so a generic/model-less concierge
|
||||
// cannot serve users.
|
||||
func TestRegister_PlatformAgentMissingModelSecret_FailsClosed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
const wsID = "ws-platform-no-model"
|
||||
|
||||
// Bootstrap path — no live tokens.
|
||||
mock.ExpectQuery("SELECT COUNT\\(\\*\\) FROM workspace_auth_tokens").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
|
||||
// kind precheck: existing row is kind="platform".
|
||||
mock.ExpectQuery("SELECT kind FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
|
||||
|
||||
// Identity gate: payload.kind="platform" → check MODEL secret → absent.
|
||||
mock.ExpectQuery("SELECT EXISTS\\(SELECT 1 FROM workspace_secrets WHERE workspace_id = \\$1 AND key = 'MODEL'\\)").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(false))
|
||||
|
||||
// Gate failure broadcasts WORKSPACE_PROVISION_FAILED and marks the row failed.
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec("UPDATE workspaces SET status = \\$3, last_sample_error = \\$2, updated_at = now\\(\\) WHERE id = \\$1").
|
||||
WithArgs(wsID, sqlmock.AnyArg(), models.StatusFailed).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/registry/register",
|
||||
bytes.NewBufferString(`{"id":"`+wsID+`","url":"http://localhost:9100","delivery_mode":"push","kind":"platform","agent_card":{"name":"concierge"}}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Register(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Fatalf("platform agent missing MODEL secret: expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegister_PollMode_PreservesExistingValue: when the row already
|
||||
// has delivery_mode=poll and the payload doesn't set it, the resolved
|
||||
// mode should be poll — i.e. "absent payload mode" must NOT silently
|
||||
|
||||
Reference in New Issue
Block a user