fix(core#2566): refuse concierge/agent set_workspace_secret self-writes on org root #2756
@@ -144,6 +144,57 @@ func NewSecretsHandler(restartFunc func(string)) *SecretsHandler {
|
||||
return &SecretsHandler{restartFunc: restartFunc}
|
||||
}
|
||||
|
||||
// conciergeSelfSecretWriteBlocked (core#2566) reports whether a Set call
|
||||
// from the concierge/agent surface (AdminAuth ADMIN_TOKEN) targeting the
|
||||
// org's kind='platform' concierge workspace should be refused outright
|
||||
// (self-DoS guard). Returns a non-empty reason when blocked.
|
||||
//
|
||||
// Threat model (core#2566, 2026-06-11 live incident):
|
||||
// - The concierge's management MCP authenticates with the tenant
|
||||
// ADMIN token. On a Set call against the concierge's own workspace
|
||||
// (kind='platform'), #2573's auto-restart skip *does* fire (skip 2
|
||||
// covers kind='platform') — but that guard is best-effort and there
|
||||
// is a second, earlier failure path: the secret write itself
|
||||
// triggers env-var reload side effects inside the live container
|
||||
// mid-turn, and any path that later invokes a restart (operator
|
||||
// click, restart-on-failure watchdog, the next Set/Delete) tears
|
||||
// the concierge down. The org root going offline has already cost
|
||||
// a multi-hour outage once.
|
||||
// - Approval gating (#2574) is necessary but NOT sufficient: an
|
||||
// approval can be issued by a sleepy operator, and the concierge
|
||||
// consuming it still self-DoSes. The only safe posture is to
|
||||
// refuse the self-targeted write and force a human to apply the
|
||||
// change through a non-agent path (canvas Secrets tab, operator
|
||||
// session with explicit operator-action audit).
|
||||
//
|
||||
// Scope: AdminAuth admin-token callers ONLY. Session-cookie
|
||||
// (cp_session_actor) and ordinary workspace-token callers are NOT
|
||||
// blocked — they are human operators / non-concierge agents and may
|
||||
// legitimately need to write the concierge's own secrets.
|
||||
//
|
||||
// Fail-closed: if the kind lookup errors, we refuse the write (and
|
||||
// log) — a wrongly-fired write on the org root is exactly the outage
|
||||
// this guard exists to prevent, while a refused write is just a retry
|
||||
// after the DB hiccup clears.
|
||||
func conciergeSelfSecretWriteBlocked(c *gin.Context, targetWorkspaceID string) (bool, string) {
|
||||
if !callerIsAdminToken(c) {
|
||||
return false, ""
|
||||
}
|
||||
ctx := c.Request.Context()
|
||||
var kind string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT COALESCE(kind, 'workspace') FROM workspaces WHERE id = $1`, targetWorkspaceID).
|
||||
Scan(&kind); err != nil {
|
||||
// Fail closed — see doc comment.
|
||||
log.Printf("secrets: blocking admin-token set_workspace_secret on %s (kind lookup failed, fail-closed: %v)", targetWorkspaceID, err)
|
||||
return true, "self-DoS guard: workspace kind lookup failed; refusing to write to a workspace whose kind cannot be verified"
|
||||
}
|
||||
if kind == "platform" {
|
||||
return true, "concierge cannot set_workspace_secret on its own platform-root workspace (self-DoS guard, core#2566); apply this secret through the canvas Secrets tab as a human operator"
|
||||
}
|
||||
return false, ""
|
||||
}
|
||||
|
||||
// autoRestartAllowed (core#2573) decides whether a secret change on
|
||||
// workspaceID may fire the auto-restart. Two skips:
|
||||
//
|
||||
@@ -412,6 +463,28 @@ func (h *SecretsHandler) Set(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// core#2566: refuse concierge/agent self-targeted secret writes on the
|
||||
// org-root (kind='platform') workspace. Fires BEFORE the approval gate so
|
||||
// a refused write does not even create a pending approval — the operator
|
||||
// is told to apply the change through the canvas Secrets tab instead, and
|
||||
// the audit trail records the refused attempt.
|
||||
if blocked, reason := conciergeSelfSecretWriteBlocked(c, workspaceID); blocked {
|
||||
log.Printf("secrets: refusing admin-token set_workspace_secret on %s key=%s (core#2566 self-DoS guard): %s", workspaceID, body.Key, reason)
|
||||
c.JSON(http.StatusForbidden, gin.H{
|
||||
"error": reason,
|
||||
"workspace_id": workspaceID,
|
||||
"key": body.Key,
|
||||
"code": "CONCIERGE_SELF_WRITE_BLOCKED",
|
||||
})
|
||||
audit.Emit(c.Request.Context(), "secret.set.refused", map[string]any{
|
||||
"workspace_id": workspaceID,
|
||||
"key": body.Key,
|
||||
"reason": "concierge_self_write_blocked",
|
||||
"issue": "core#2566",
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
// RFC platform-agent Phase 4b: gate org-token (platform-agent) secret writes
|
||||
// behind human approval. The context includes the key so an approval for one
|
||||
// secret cannot authorise writing another. No-op for ordinary callers and
|
||||
|
||||
@@ -1297,6 +1297,15 @@ func TestSecretsSet_AdminToken_GatedByApproval(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
// core#2566 self-DoS guard: the guard runs BEFORE the approval gate
|
||||
// for admin-token callers. The target here is a regular workspace
|
||||
// (kind='workspace'), so the guard passes and the approval gate
|
||||
// takes over. Mock the kind lookup so the guard does not fail-closed
|
||||
// on an unmocked query.
|
||||
mock.ExpectQuery(`SELECT COALESCE\(kind`).
|
||||
WithArgs("550e8400-e29b-41d4-a716-446655440000").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("workspace"))
|
||||
|
||||
// requireApproval sequence for an admin-token caller (gated action,
|
||||
// no pre-existing approval). The gate's requireApproval runs THREE
|
||||
// queries: UPDATE (consume), INSERT (new pending), SELECT (parent_id).
|
||||
@@ -1941,3 +1950,227 @@ func TestPlatformManagedLLMModeForWorkspace_GatesOnModelNotResolvedMode(t *testi
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// core#2566: concierge self-DoS guard for set_workspace_secret
|
||||
// =============================================================================
|
||||
//
|
||||
// The org-root concierge authenticates its management MCP with the tenant
|
||||
// ADMIN_TOKEN (AdminAuth, caller_is_admin_token=true). When the concierge
|
||||
// calls set_workspace_secret against its own workspace (kind='platform'),
|
||||
// any of (a) a future restart on the container, (b) env-var reload side
|
||||
// effects, or (c) the next concierge turn starting on a half-applied env
|
||||
// can terminate the org root mid-task — the live 2026-06-11 incident took
|
||||
// the concierge offline and required operator hand-repair.
|
||||
//
|
||||
// The guard: for admin-token callers, look up the target's kind and refuse
|
||||
// the write when it is 'platform'. Fail-closed on DB error. Session-cookie
|
||||
// (cp_session_actor) and ordinary workspace-token callers are NOT blocked —
|
||||
// they are human operators / non-concierge agents and may legitimately
|
||||
// need to write the concierge's own secrets.
|
||||
|
||||
// TestSecretsSet_AdminTokenSelfWrite_Refused (#2566): an admin-token caller
|
||||
// writing a secret to the org-root (kind='platform') workspace MUST be
|
||||
// refused with 403. No secret row is written, no approval is created, and
|
||||
// the response carries a structured code so the caller can surface a
|
||||
// self-DoS-safe error to the user.
|
||||
func TestSecretsSet_AdminTokenSelfWrite_Refused(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
// Guard's kind lookup. If the guard were bypassed (the bug), the
|
||||
// handler would proceed to the approval gate and sqlmock would NOT see
|
||||
// this query, failing the test. Mock it now to drive the guard.
|
||||
mock.ExpectQuery(`SELECT COALESCE\(kind`).
|
||||
WithArgs("550e8400-e29b-41d4-a716-446655440000").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("platform"))
|
||||
|
||||
// NOTE: deliberately NO approval-gate queries, NO INSERT mock setup.
|
||||
// A refused write must short-circuit BEFORE the gate — so neither is
|
||||
// reached.
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "550e8400-e29b-41d4-a716-446655440000"}}
|
||||
body := `{"key":"SELF_DOS_TEST","value":"x"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/550e8400-e29b-41d4-a716-446655440000/secrets", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
// core#2566: the concierge's MCP authenticates with the tenant
|
||||
// ADMIN_TOKEN. AdminAuth sets these context keys.
|
||||
c.Set("caller_is_admin_token", true)
|
||||
c.Set("caller_credential_class", "admin-token")
|
||||
|
||||
handler.Set(c)
|
||||
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Fatalf("admin-token self-write to kind='platform' MUST be refused with 403, got %d: %s",
|
||||
w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if resp["code"] != "CONCIERGE_SELF_WRITE_BLOCKED" {
|
||||
t.Errorf("code = %v, want CONCIERGE_SELF_WRITE_BLOCKED", resp["code"])
|
||||
}
|
||||
if resp["workspace_id"] != "550e8400-e29b-41d4-a716-446655440000" {
|
||||
t.Errorf("workspace_id = %v, want 550e8400-e29b-41d4-a716-446655440000", resp["workspace_id"])
|
||||
}
|
||||
if resp["key"] != "SELF_DOS_TEST" {
|
||||
t.Errorf("key = %v, want SELF_DOS_TEST", resp["key"])
|
||||
}
|
||||
if errMsg, _ := resp["error"].(string); !strings.Contains(errMsg, "self-DoS") {
|
||||
t.Errorf("error %q should mention self-DoS", errMsg)
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsSet_AdminTokenOnOtherWorkspace_NotBlocked (#2566): the guard
|
||||
// is scoped to the concierge's OWN workspace. An admin-token write to a
|
||||
// regular workspace must NOT be blocked by this guard (it is still gated
|
||||
// by the Phase-4 approval gate in #2574, which returns 202).
|
||||
func TestSecretsSet_AdminTokenOnOtherWorkspace_NotBlocked(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
// Guard's kind lookup: target is a regular workspace, so the guard
|
||||
// returns false and the approval gate takes over.
|
||||
mock.ExpectQuery(`SELECT COALESCE\(kind`).
|
||||
WithArgs("550e8400-e29b-41d4-a716-446655440000").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"kind"}).AddRow("workspace"))
|
||||
|
||||
// Approval gate (admin-token caller → always gated, regardless of
|
||||
// rollout flag — see TestSecretsSet_AdminToken_GatedByApproval).
|
||||
mock.ExpectQuery(`UPDATE approval_requests SET consumed_at`).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
mock.ExpectQuery(`WITH existing AS`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("appr-core2566-allow"))
|
||||
mock.ExpectQuery(`SELECT parent_id FROM workspaces WHERE id`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "550e8400-e29b-41d4-a716-446655440000"}}
|
||||
body := `{"key":"NOT_SELF","value":"v"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/550e8400-e29b-41d4-a716-446655440000/secrets", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Set("caller_is_admin_token", true)
|
||||
c.Set("caller_credential_class", "admin-token")
|
||||
|
||||
handler.Set(c)
|
||||
|
||||
// Guard did NOT block → approval gate fires → 202 (admin-token
|
||||
// always gated). This proves the guard only fires on the platform
|
||||
// root and lets ordinary writes fall through to the gate.
|
||||
if w.Code != http.StatusAccepted {
|
||||
t.Fatalf("admin-token write to a regular workspace must fall through to the approval gate (202), got %d: %s",
|
||||
w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsSet_AdminTokenKindLookupError_FailsClosed (#2566): when the
|
||||
// DB hiccup prevents verifying the target's kind, the guard refuses
|
||||
// the write (fail-closed). This is the safe default: a wrongly-fired
|
||||
// self-DoS is exactly the outage this guard exists to prevent, while
|
||||
// a refused write is just a retry after the DB hiccup clears.
|
||||
func TestSecretsSet_AdminTokenKindLookupError_FailsClosed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT COALESCE\(kind`).
|
||||
WithArgs("550e8400-e29b-41d4-a716-446655440000").
|
||||
WillReturnError(sql.ErrConnDone) // arbitrary transient DB error
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "550e8400-e29b-41d4-a716-446655440000"}}
|
||||
body := `{"key":"FAIL_CLOSED","value":"v"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/550e8400-e29b-41d4-a716-446655440000/secrets", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Set("caller_is_admin_token", true)
|
||||
c.Set("caller_credential_class", "admin-token")
|
||||
|
||||
handler.Set(c)
|
||||
|
||||
// Fail-closed: 403 with CONCIERGE_SELF_WRITE_BLOCKED. We chose 403
|
||||
// (not 500) because the guard is making a policy decision, not a
|
||||
// transient error report — the operator sees the structured code
|
||||
// and knows exactly why the write was refused. A 500 would invite
|
||||
// a retry loop on a path that is fundamentally unsafe to retry.
|
||||
if w.Code != http.StatusForbidden {
|
||||
t.Fatalf("admin-token write with kind-lookup failure must fail closed with 403, got %d: %s",
|
||||
w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("failed to parse response: %v", err)
|
||||
}
|
||||
if resp["code"] != "CONCIERGE_SELF_WRITE_BLOCKED" {
|
||||
t.Errorf("code = %v, want CONCIERGE_SELF_WRITE_BLOCKED", resp["code"])
|
||||
}
|
||||
if errMsg, _ := resp["error"].(string); !strings.Contains(errMsg, "kind lookup failed") {
|
||||
t.Errorf("error %q should mention the kind-lookup failure", errMsg)
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsSet_SessionCookieCaller_NotBlocked (#2566): a human operator
|
||||
// using the canvas session cookie (cp_session_actor, NOT admin-token) MUST
|
||||
// NOT be blocked by this guard. The guard is scoped to AdminAuth
|
||||
// admin-token callers only — the concierge's surface. Operators must
|
||||
// retain the ability to write the concierge's own secrets through the
|
||||
// canvas Secrets tab, which uses the session tier.
|
||||
func TestSecretsSet_SessionCookieCaller_NotBlocked(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
// Guard's kind lookup: should NOT happen, because the caller is
|
||||
// session-cookie, not admin-token. If the test fails with an unmet
|
||||
// expectation here, the guard is over-firing.
|
||||
//
|
||||
// (No `SELECT COALESCE(kind` mock.)
|
||||
//
|
||||
// The handler proceeds past the (skipped) guard to the approval
|
||||
// gate. Session cookies are not in the gated set, so the gate
|
||||
// passes (returns true → handler INSERTs and 200s).
|
||||
mock.ExpectExec("INSERT INTO workspace_secrets").
|
||||
WithArgs("550e8400-e29b-41d4-a716-446655440000", "OPERATOR_SECRET", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "550e8400-e29b-41d4-a716-446655440000"}}
|
||||
body := `{"key":"OPERATOR_SECRET","value":"v"}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/550e8400-e29b-41d4-a716-446655440000/secrets", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
// Session-cookie caller: cp_session_actor set, caller_is_admin_token
|
||||
// is NOT set (AdminAuth's session tier does not flip that flag).
|
||||
c.Set("cp_session_actor", "user-op-1234")
|
||||
|
||||
handler.Set(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("session-cookie caller must NOT be blocked by the self-DoS guard, got %d: %s",
|
||||
w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user