Merge branch 'main' of https://git.moleculesai.app/molecule-ai/molecule-core into ci/146-lint-no-tenant-gitea-token

2026-05-19 04:12:42 +00:00
parent 23506ab751 cf1438a525
commit f2161bdad0
33 changed files with 565 additions and 74 deletions
@@ -401,7 +401,7 @@ jobs:

  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
-    runs-on: ubuntu-latest
+    runs-on: docker-host
    # mc#774 root-fix: added job-level `if:` so ci-required-drift.py's
    # ci_job_names() detects this as github.ref-gated and skips it from F1.
    # The step-level exit 0 handles the "not main push" case; the job-level
@@ -164,7 +164,7 @@ jobs:
  # bp-required: pending #1296
  peer-visibility-local:
    name: E2E Peer Visibility (local)
-    runs-on: ubuntu-latest
+    runs-on: docker-host
    timeout-minutes: 30
    env:
      # Per-run names + ephemeral ports — same collision-avoidance as
@@ -88,17 +88,6 @@ jobs:
                  with open(path) as f:
                      raw_lines = f.readlines()

-                  # Strip pure-comment lines for docker-exec detection so that
-                  # documentation comments don't trigger the lint.
-                  scan_text = ''.join(
-                      l for l in raw_lines
-                      if not re.match(r'^\s*#', l)
-                  )
-
-                  has_docker = bool(DOCKER_EXEC.search(scan_text)) or bool(DOCKER_ACTION.search(scan_text))
-                  if not has_docker:
-                      continue
-
                  # Parse job headers + their runs-on. Simple line scan; relies on
                  # 2-space job indent + 4-space runs-on indent under `jobs:`.
                  jobs = []
@@ -113,8 +102,9 @@ jobs:
                      mh = JOB_HEADER.match(line)
                      if mh:
                          if current:
+                              current['end'] = i - 1
                              jobs.append(current)
-                          current = {'name': mh.group(2), 'line': i, 'runs_on': None}
+                          current = {'name': mh.group(2), 'line': i, 'end': len(raw_lines), 'runs_on': None}
                          continue
                      mr = RUNS_ON.match(line)
                      if mr and current and current['runs_on'] is None:
@@ -123,6 +113,18 @@ jobs:
                      jobs.append(current)

                  for j in jobs:
+                      # Strip pure-comment lines for docker-exec detection so
+                      # documentation comments don't trigger the lint. Scan the
+                      # current job body only: a workflow may contain one
+                      # docker-bound job and several harmless metadata jobs.
+                      job_lines = raw_lines[j['line'] - 1:j['end']]
+                      scan_text = ''.join(
+                          l for l in job_lines
+                          if not re.match(r'^\s*#', l)
+                      )
+                      has_docker = bool(DOCKER_EXEC.search(scan_text)) or bool(DOCKER_ACTION.search(scan_text))
+                      if not has_docker:
+                          continue
                      ro = j['runs_on']
                      if ro is None:
                          # Reusable workflow caller (`uses:` instead of `runs-on:`) —
@@ -134,9 +136,11 @@ jobs:
                      labels = [t.strip().strip('"\'') for t in ro_norm.split(',') if t.strip()]
                      if any(lbl in ALLOWED_LABELS for lbl in labels):
                          continue
-                      # Allow caller-supplied label expression `${{ ... }}` — caller
-                      # is responsible.
-                      if any('${{' in lbl for lbl in labels):
+                      # Allow caller-supplied label expressions; spell the
+                      # marker indirectly so Gitea's expression parser does
+                      # not try to parse this Python heredoc.
+                      expression_marker = '$' + '{{'
+                      if any(expression_marker in lbl for lbl in labels):
                          continue
                      fails.append(
                          f"{path}:{j['line']}: job `{j['name']}` uses docker but runs-on={ro!r} "
@@ -300,7 +300,7 @@ jobs:

  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
-    runs-on: ubuntu-latest
+    runs-on: docker-host
    needs: [changes, canvas-build]
    # Only fires on direct pushes to main (i.e. after staging→main promotion).
    if: needs.changes.outputs.canvas == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main'
@@ -440,4 +440,3 @@ jobs:

      # SDK + plugin validation moved to standalone repo:
      # github.com/molecule-ai/molecule-sdk-python
-
@@ -128,7 +128,7 @@ jobs:
  e2e-api:
    needs: detect-changes
    name: E2E API Smoke Test
-    runs-on: ubuntu-latest
+    runs-on: docker-host
    timeout-minutes: 15
    env:
      # Unique per-run container names so concurrent runs on the host-
@@ -88,7 +88,7 @@ jobs:
  integration:
    name: Handlers Postgres Integration
    needs: detect-changes
-    runs-on: ubuntu-latest
+    runs-on: docker-host
    env:
      # Unique name per run so concurrent jobs don't collide on the
      # bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
@@ -249,4 +249,3 @@ jobs:
          # already gone (e.g. concurrent rerun race), don't fail the job.
          docker rm -f "${PG_NAME}" >/dev/null 2>&1 || true
          echo "Cleaned up ${PG_NAME}"
-
@@ -102,7 +102,7 @@ jobs:
  harness-replays:
    needs: detect-changes
    name: Harness Replays
-    runs-on: ubuntu-latest
+    runs-on: docker-host
    timeout-minutes: 30
    steps:
      - name: No-op pass (paths filter excluded this commit)
@@ -39,7 +39,7 @@ env:
 jobs:
  build-and-push:
    name: Build & push canvas image
-    runs-on: ubuntu-latest
+    runs-on: publish
    steps:
      - name: Checkout
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -69,7 +69,7 @@ or other removed paths — open against `molecule-ai/docs` instead.
 | OG images, visual assets | `molecule-ai/docs` → `app/` or `marketing/` |
 | SEO briefs | `molecule-ai/docs` → `marketing/` |
 | DevRel demos (runnable code) | Standalone repo under `molecule-ai/`, OR embedded in `molecule-ai/docs` |
-| Launch checklists, internal tracking | GitHub Issues — **not** committed files |
+| Launch checklists, internal tracking | Gitea Issues — **not** committed files |
 | Engineering docs (`docs/adr/`, `docs/architecture/`, `docs/incidents/`) | This repo (internal, not published) |
 | Live product pages (e.g. `canvas/src/app/pricing/page.tsx`) | This repo (these are app code, not marketing copy) |

@@ -106,7 +106,7 @@ causing a render loop when any node position changed.

 #### Auto-merge & the "extra commit" trap

-**Two system guards protect against pushing commits after auto-merge has been enabled.** Don't try to work around them — they exist because we shipped a half-merged PR on 2026-04-27 (`#2174` merged with only its first commit; the second was orphaned on a branch GitHub had already deleted).
+**Two system guards protect against pushing commits after auto-merge has been enabled.** Don't try to work around them — they exist because we shipped a half-merged PR on 2026-04-27 (`#2174` merged with only its first commit; the second was orphaned on a branch the host had already deleted).

 1. **Repo-wide:** "Automatically delete head branches" is on. Once a PR merges, the branch is deleted server-side. Any subsequent `git push` to that branch fails with `remote rejected — no such branch`.

@@ -145,7 +145,7 @@ Fix violations before committing — the hook will reject the commit.

 ### CI Pipeline

-CI runs on GitHub Actions with a self-hosted runner. External contributors:
+CI runs on Gitea Actions with self-hosted runners. External contributors:
 PRs from forks will not trigger CI automatically. A maintainer will review
 and run CI manually.

@@ -192,7 +192,7 @@ live in their own repos:

 - [`molecule-ai/molecule-ai-workspace-runtime`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime) — Python adapter SDK (`molecule_runtime`) that runs inside containerized Molecule workspaces. Bridges Claude Code SDK / hermes / langgraph / etc. → A2A queue.
 - [`molecule-ai/molecule-sdk-python`](https://git.moleculesai.app/molecule-ai/molecule-sdk-python) — `A2AServer` + `RemoteAgentClient` for external agents that register over the public `/registry/register` flow.
- [`molecule-ai/molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install inside Claude Code via `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels --channels plugin:molecule@molecule-channel`.
+- [`molecule-ai/molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install inside Claude Code via `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels=plugin:molecule@molecule-channel`.

 When extending the **A2A surface** in molecule-core (`workspace-server/internal/handlers/a2a_proxy.go` etc.), consider whether the change has a downstream impact on the runtime SDK or the channel plugin — they're versioned independently but share the wire shape.

@@ -206,7 +206,7 @@ See `CLAUDE.md` for detailed architecture documentation, including:

 ## Reporting Issues

-Use GitHub Issues with a clear title and reproduction steps. Include:
+Use Gitea Issues with a clear title and reproduction steps. Include:
 - What you expected
 - What actually happened
 - Platform/OS version
@@ -214,8 +214,9 @@ Use GitHub Issues with a clear title and reproduction steps. Include:

 ## Security

-If you discover a security vulnerability, please report it privately via
-GitHub Security Advisories rather than opening a public issue.
+If you discover a security vulnerability, please report it privately by
+opening an issue against `molecule-ai/internal` (a private repo only
+maintainers can see) rather than filing a public issue here.

 ## License

@@ -238,7 +238,7 @@ The result is not just “an agent that learns.” It is **an organization that
 - subscribe to one or more workspaces; peer messages surface as conversation turns; replies route back through Molecule's A2A
 - no tunnel, no public endpoint — the plugin self-registers each watched workspace as `delivery_mode=poll` and long-polls `/activity?since_id=…`
 - multi-tenant friendly: one plugin install can watch workspaces across multiple Molecule tenants (`MOLECULE_PLATFORM_URLS` per-workspace)
- install via the standard marketplace flow: `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels --channels plugin:molecule@molecule-channel`
+- install via the standard marketplace flow: `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels=plugin:molecule@molecule-channel`

 ## Built For Teams That Need More Than A Demo

@@ -237,7 +237,7 @@ Molecule AI 并不是要替代下面这些 framework，而是把它们纳入更
 - 订阅一个或多个 workspace；peer 的消息会以 user-turn 出现，回复会经 Molecule A2A 路由出去
 - 无需公网隧道、无需公开端点 —— 插件启动时自动把每个 watched workspace 注册成 `delivery_mode=poll`，长轮询 `/activity?since_id=…`
 - 多租户友好：单次安装即可同时 watch 跨多个 Molecule 租户的 workspace（`MOLECULE_PLATFORM_URLS` 按 workspace 配置）
- 通过标准 marketplace 流程安装：`/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`，然后用 `claude --dangerously-load-development-channels --channels plugin:molecule@molecule-channel` 启动
+- 通过标准 marketplace 流程安装：`/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`，然后用 `claude --dangerously-load-development-channels=plugin:molecule@molecule-channel` 启动

 ## 适合什么团队

@@ -153,11 +153,13 @@ print('OK:found=%d/%d' % (len(found), len(expect)))
      # Caller bug, not a runtime regression — surface loudly so a
      # mis-wired backend can't mint a false green.
      echo "  ✗ $rt: no expected peers were configured for this caller"
+      # shellcheck disable=SC2034 # exported verdict is read by the caller's map plumbing.
      PV_VERDICT="FAIL(rpc=NO_EXPECTED_PEERS_CONFIGURED)"
      return 1
      ;;
    *)
      echo "  ✗ $rt: unexpected verdict '$parse'"
+      # shellcheck disable=SC2034 # exported verdict is read by the caller's map plumbing.
      PV_VERDICT="FAIL(unknown)"
      return 1
      ;;
@@ -208,7 +208,9 @@ log "    PARENT_ID=$PARENT_ID"
 # box (bash 3.2, no associative arrays) per feedback_local_must_mimic_
 # production. WS_IDS / VERDICT are kept as newline-delimited "rt<TAB>val"
 # maps with tiny get/set helpers (portable to bash 3.2+ AND ubuntu CI).
+# shellcheck disable=SC2034 # map values are updated through portable eval-based helpers.
 WS_IDS_MAP=""
+# shellcheck disable=SC2034 # map values are updated through portable eval-based helpers.
 VERDICT_MAP=""
 _map_set() { # _map_set <mapvarname> <key> <value>
  local __m="$1" __k="$2" __v="$3" __cur
@@ -556,7 +556,14 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
 	// Track LLM token usage for cost transparency (#593).
 	// Fires in a detached goroutine so token accounting never adds latency
 	// to the critical A2A path.
-	go extractAndUpsertTokenUsage(context.WithoutCancel(ctx), workspaceID, respBody)
+	// RFC internal#524 Layer 1: extractAndUpsertTokenUsage reads db.DB
+	// (INSERT INTO llm_token_usage). Without globalGoAsync, the detached
+	// write races a subsequent test's db.DB swap exactly like the
+	// maybeMarkContainerDead path that 69d9b4e3 fixed.
+	tokCtx := context.WithoutCancel(ctx)
+	wsID := workspaceID
+	tokBody := respBody
+	globalGoAsync(func() { extractAndUpsertTokenUsage(tokCtx, wsID, tokBody) })

 	// Non-2xx agent response: the agent received the request but returned an
 	// error status. Return a proxyErr so the caller (DrainQueueForWorkspace)
@@ -931,6 +938,12 @@ func applyIdleTimeout(parent context.Context, b *events.Broadcaster, workspaceID
 	}
 	ctx, cancel := context.WithCancel(parent)
 	sub, unsub := b.SubscribeSSE(workspaceID)
+	// goAsync-exempt (RFC internal#524 Layer 2.2 annotation): this
+	// goroutine owns the parent ctx's cancel and exits only on
+	// ctx.Done() / sub-channel close — wrapping it in globalGoAsync would
+	// deadlock drainTestAsync because the request that owns ctx hasn't
+	// completed when t.Cleanup fires. Does NOT read db.DB; idle-timer
+	// management only.
 	go func() {
 		defer unsub()
 		timer := time.NewTimer(idle)
@@ -189,13 +189,16 @@ func (h *AdminPluginDriftHandler) Apply(c *gin.Context) {
 	// at construction. Trigger it asynchronously so the HTTP response returns
 	// immediately after the install; the restart is best-effort.
 	if h.pluginsHandler != nil {
-		go func() {
+		// RFC internal#524 Layer 1: globalGoAsync so the detached restart
+		// is drained before db.DB swap (see workspace.go:globalGoAsync).
+		wsID := entry.WorkspaceID
+		globalGoAsync(func() {
 			// We can't use result.PluginName as a restart key since the
 			// restartFunc takes a workspaceID. Pass the workspaceID.
 			if restart := h.pluginsHandler.GetRestartFunc(); restart != nil {
-				restart(entry.WorkspaceID)
+				restart(wsID)
 			}
-		}()
+		})
 	}

 	log.Printf("AdminPluginDrift: applied drift update for %s/%s (queue_id=%s)",
@@ -556,13 +556,16 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
 		return
 	}

-	// Process asynchronously — don't block the webhook response
-	go func() {
+	// Process asynchronously — don't block the webhook response.
+	// RFC internal#524 Layer 1: globalGoAsync — HandleInbound traverses
+	// db.DB to resolve workspace + record the channel event; drained by
+	// drainTestAsync before db.DB swap.
+	globalGoAsync(func() {
 		bgCtx := context.Background()
 		if err := h.manager.HandleInbound(bgCtx, ch, msg); err != nil {
 			log.Printf("Channels: async HandleInbound error for workspace %s: %v", ch.WorkspaceID[:12], err)
 		}
-	}()
+	})

 	c.JSON(http.StatusOK, gin.H{"status": "accepted"})
 }
@@ -185,10 +185,15 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
 	delegationCtx, cancelDelegation := context.WithTimeout(
 		context.WithoutCancel(ctx), 30*time.Minute,
 	)
-	go func() {
+	// RFC internal#524 Layer 1: route through workspace.goAsync so the
+	// detached executeDelegation (which writes A2A status rows to db.DB
+	// across multiple stages) is drained before db.DB is restored in a
+	// later test's t.Cleanup. Tracked via the parent workspace handler's
+	// asyncWG.
+	h.workspace.goAsync(func() {
 		defer cancelDelegation()
 		h.executeDelegation(delegationCtx, sourceID, body.TargetID, delegationID, a2aBody)
-	}()
+	})

 	// Broadcast event so canvas shows delegation in real-time
 	h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
@@ -129,6 +129,14 @@ var (

 // getEICTunnelPool returns the singleton pool, lazy-initialising on
 // first call. Idempotent.
+//
+// goAsync-exempt (RFC internal#524 Layer 2.2): every `go` in this file
+// is pool-internal lifecycle (janitor + per-entry cleanup closures).
+// None reads db.DB — the pool tracks SSH tunnels, not workspace state.
+// The janitor exits on close(p.stopJanitor); cleanups exit when the
+// captured tunnel's resources are released. Wrapping in globalGoAsync
+// would block test cleanup on the singleton janitor that intentionally
+// runs forever.
 func getEICTunnelPool() *eicTunnelPool {
 	globalEICTunnelPoolOnce.Do(func() {
 		globalEICTunnelPool = newEICTunnelPool()
@@ -48,6 +48,14 @@ func init() {
 // finish. Called from setupTestDB's cleanup before db.DB is restored so
 // no detached restart/provision goroutine is mid-read of db.DB when the
 // pointer is swapped.
+//
+// Also drains the package-level globalAsync WaitGroup (RFC internal#524
+// Layer 1 deliverable 2) so sibling handlers (SecretsHandler /
+// PluginsHandler / etc.) that route through globalGoAsync rather than
+// h.goAsync are likewise drained before db.DB is swapped. Without this
+// drain a SecretsHandler.Set's restartFunc-via-globalGoAsync could race
+// the db.DB restore exactly the same way maybeMarkContainerDead did
+// before commit 69d9b4e3.
 func drainTestAsync() {
 	liveTestHandlersMu.Lock()
 	handlers := make([]*WorkspaceHandler, len(liveTestHandlers))
@@ -56,6 +64,7 @@ func drainTestAsync() {
 	for _, h := range handlers {
 		h.waitAsyncForTest()
 	}
+	waitGlobalAsyncForTest()
 }

 // setupTestDB creates a sqlmock DB and assigns it to the global db.DB.
@@ -278,7 +278,10 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,

 	// Fire and forget in a detached goroutine. Use a background context so
 	// the call is not cancelled when the HTTP request completes.
-	go func() {
+	// RFC internal#524 Layer 1: globalGoAsync — the detached call reads
+	// db.DB (mcpResolveURL + updateMCPDelegationStatus) and must be
+	// drained by drainTestAsync before any t.Cleanup-driven db.DB swap.
+	globalGoAsync(func() {
 		bgCtx, cancel := context.WithTimeout(context.Background(), mcpAsyncCallTimeout)
 		defer cancel()

@@ -322,7 +325,7 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
 		defer func() { _ = resp.Body.Close() }()
 		// Drain response so the connection can be reused.
 		_, _ = io.Copy(io.Discard, resp.Body)
-	}()
+	})

 	return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, delegationID, targetID), nil
 }
@@ -534,10 +534,14 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 		// Docker-mode otherwise; the org-import call site doesn't need
 		// to know which.
 		provisionSem <- struct{}{} // acquire
-		go func(wID, tPath string, cFiles map[string][]byte, p models.CreateWorkspacePayload) {
+		// RFC internal#524 Layer 1: route through workspace.goAsync —
+		// provisionWorkspaceAuto inserts/updates the workspaces row in
+		// db.DB and must be drained before any test cleanup swap.
+		wID, tPath, cFiles, p := id, templatePath, configFiles, payload
+		h.workspace.goAsync(func() {
 			defer func() { <-provisionSem }() // release
 			h.workspace.provisionWorkspaceAuto(wID, tPath, cFiles, p)
-		}(id, templatePath, configFiles, payload)
+		})
 	}

 	// Insert schedules if defined. Resolve each schedule's prompt body from
@@ -198,12 +198,16 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
 		log.Printf("Plugin uninstall: failed to delete workspace_plugins row for %s: %v (container cleanup succeeded)", pluginName, err)
 	}

-	// Auto-restart (small delay to ensure fs writes are flushed)
+	// Auto-restart (small delay to ensure fs writes are flushed).
+	// RFC internal#524 Layer 1: globalGoAsync so the detached restart
+	// goroutine is drained by drainTestAsync before db.DB swap. See
+	// workspace.go:globalGoAsync for the contract.
 	if h.restartFunc != nil {
-		go func() {
+		wsID := workspaceID
+		globalGoAsync(func() {
 			time.Sleep(2 * time.Second)
-			h.restartFunc(workspaceID)
-		}()
+			h.restartFunc(wsID)
+		})
 	}

 	log.Printf("Plugin uninstall: %s from workspace %s (restarting)", pluginName, workspaceID)
@@ -260,10 +264,12 @@ func (h *PluginsHandler) uninstallViaEIC(ctx context.Context, c *gin.Context, wo
 	}

 	if h.restartFunc != nil {
-		go func() {
+		// RFC internal#524 Layer 1: see uninstallViaDocker above.
+		wsID := workspaceID
+		globalGoAsync(func() {
 			time.Sleep(2 * time.Second)
-			h.restartFunc(workspaceID)
-		}()
+			h.restartFunc(wsID)
+		})
 	}

 	log.Printf("Plugin uninstall: %s from workspace %s (restarting via SaaS path)", pluginName, workspaceID)
@@ -320,7 +320,10 @@ func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID str
 			if kind == classifyKindSkillContentOnly {
 				log.Printf("Plugin install: %s → workspace %s — SKILL-content-only update, SKIPPING restart", r.PluginName, workspaceID)
 			} else {
-				go h.restartFunc(workspaceID)
+				// RFC internal#524 Layer 1: drain via globalGoAsync (see
+				// workspace.go:globalGoAsync).
+				wsID := workspaceID
+				globalGoAsync(func() { h.restartFunc(wsID) })
 			}
 		}
 		return nil
@@ -334,7 +337,9 @@ func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID str
 			})
 		}
 		if h.restartFunc != nil {
-			go h.restartFunc(workspaceID)
+			// RFC internal#524 Layer 1: see Docker path above.
+			wsID := workspaceID
+			globalGoAsync(func() { h.restartFunc(wsID) })
 		}
 		return nil
 	}
@@ -819,8 +819,11 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
 		if payload.ActiveTasks < maxConcurrent {
 			// context.WithoutCancel: heartbeat handler's ctx is about to
 			// expire as soon as we return. The drain needs to outlive it.
+			// RFC internal#524 Layer 1: drainQueue reads db.DB; route
+			// through globalGoAsync so test cleanup waits for it.
 			drainCtx := context.WithoutCancel(ctx)
-			go h.drainQueue(drainCtx, payload.WorkspaceID)
+			wsID := payload.WorkspaceID
+			globalGoAsync(func() { h.drainQueue(drainCtx, wsID) })
 		}
 	}
 }
@@ -0,0 +1,93 @@
+package handlers
+
+// rfc524_layer1_async_drain_test.go — regression test for RFC internal#524
+// Layer 1 forward-port. Asserts:
+//
+//   1. globalGoAsync goroutines are drained by drainTestAsync before the
+//      test cleanup chain returns control.
+//   2. Routing through globalGoAsync (rather than bare `go ...`) ensures
+//      a sibling-handler's detached goroutine cannot outlive a test's
+//      db.DB swap.
+//
+// Companion of handlers_test.go:drainTestAsync (canonical 69d9b4e3 fix
+// extended to non-*WorkspaceHandler call sites). If either property
+// regresses, this test fails fast.
+
+import (
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// TestRFC524_GlobalGoAsync_DrainsBeforeCleanup asserts that goroutines
+// scheduled via globalGoAsync run to completion before drainTestAsync
+// returns. Concretely: schedule a globalGoAsync that flips a counter
+// after a short sleep, then call drainTestAsync; the counter must
+// already be 1 when the call returns.
+func TestRFC524_GlobalGoAsync_DrainsBeforeCleanup(t *testing.T) {
+	var ran int32
+	globalGoAsync(func() {
+		time.Sleep(20 * time.Millisecond)
+		atomic.StoreInt32(&ran, 1)
+	})
+
+	// drainTestAsync drains per-handler asyncWG + the package-level
+	// globalAsync WG. After it returns the goroutine MUST have run.
+	drainTestAsync()
+
+	if atomic.LoadInt32(&ran) != 1 {
+		t.Fatalf("drainTestAsync returned before globalGoAsync goroutine finished — regression of RFC internal#524 Layer 1 drain coupling")
+	}
+}
+
+// TestRFC524_GlobalGoAsync_MultipleConcurrent asserts the drain is
+// O(n)-correct: schedule a fan-out of globalGoAsync calls (like
+// restartAllAffectedByGlobalKey does on a large global secret rotation)
+// and confirm every one completes before drainTestAsync returns.
+func TestRFC524_GlobalGoAsync_MultipleConcurrent(t *testing.T) {
+	const n = 32
+	var completed int32
+	for i := 0; i < n; i++ {
+		globalGoAsync(func() {
+			// Short, random-ish work; the point is they're all in flight
+			// at the same time when drainTestAsync is called.
+			time.Sleep(5 * time.Millisecond)
+			atomic.AddInt32(&completed, 1)
+		})
+	}
+
+	drainTestAsync()
+
+	got := atomic.LoadInt32(&completed)
+	if got != n {
+		t.Fatalf("drainTestAsync returned with %d/%d globalGoAsync goroutines incomplete — fan-out drain broken", n-got, n)
+	}
+}
+
+// TestRFC524_HandlerGoAsync_AndGlobalAsync_BothDrained asserts that
+// drainTestAsync waits for BOTH the per-handler asyncWG (the original
+// 69d9b4e3 primitive) AND the package-level globalAsync (the Layer 1
+// extension). Schedules one of each and confirms both finish.
+func TestRFC524_HandlerGoAsync_AndGlobalAsync_BothDrained(t *testing.T) {
+	setupTestDB(t) // registers handlers + arms the drain
+
+	var perHandlerDone, globalDone int32
+	wh := NewWorkspaceHandler(nil, nil, "", t.TempDir())
+	wh.goAsync(func() {
+		time.Sleep(15 * time.Millisecond)
+		atomic.StoreInt32(&perHandlerDone, 1)
+	})
+	globalGoAsync(func() {
+		time.Sleep(15 * time.Millisecond)
+		atomic.StoreInt32(&globalDone, 1)
+	})
+
+	drainTestAsync()
+
+	if atomic.LoadInt32(&perHandlerDone) != 1 {
+		t.Errorf("per-handler asyncWG drain regressed (RFC internal#524 Layer 1 expects 69d9b4e3 to remain wired)")
+	}
+	if atomic.LoadInt32(&globalDone) != 1 {
+		t.Errorf("global async drain not wired (RFC internal#524 Layer 1 extension missing)")
+	}
+}
@@ -262,9 +262,13 @@ func (h *SecretsHandler) Set(c *gin.Context) {
 		return
 	}

-	// Auto-restart workspace to pick up new secret
+	// Auto-restart workspace to pick up new secret.
+	// RFC internal#524 Layer 1: route through globalGoAsync so tests can
+	// drain the detached restart goroutine before db.DB is swapped — see
+	// drainTestAsync in handlers_test.go and the canonical 69d9b4e3 fix.
 	if h.restartFunc != nil {
-		go h.restartFunc(workspaceID)
+		wsID := workspaceID
+		globalGoAsync(func() { h.restartFunc(wsID) })
 	}

 	c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key})
@@ -297,9 +301,11 @@ func (h *SecretsHandler) Delete(c *gin.Context) {
 		return
 	}

-	// Auto-restart workspace to pick up removed secret
+	// Auto-restart workspace to pick up removed secret.
+	// RFC internal#524 Layer 1: see Set() above for the drain rationale.
 	if h.restartFunc != nil {
-		go h.restartFunc(workspaceID)
+		wsID := workspaceID
+		globalGoAsync(func() { h.restartFunc(wsID) })
 	}

 	c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key})
@@ -379,7 +385,13 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) {
 	// reach existing workspaces until the container is recreated. Auto-restart
 	// every workspace whose env is affected — i.e. those WITHOUT a
 	// workspace-level override of the same key.
-	go h.restartAllAffectedByGlobalKey(body.Key)
+	//
+	// RFC internal#524 Layer 1: globalGoAsync so tests drain the fan-out
+	// (which itself spawns N more globalGoAsync restart calls below) before
+	// db.DB swap. Without this, the SELECT for affected workspaces races a
+	// subsequent test's db.DB restore.
+	key := body.Key
+	globalGoAsync(func() { h.restartAllAffectedByGlobalKey(key) })

 	c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key, "scope": "global"})
 }
@@ -423,7 +435,11 @@ func (h *SecretsHandler) restartAllAffectedByGlobalKey(key string) {
 	}
 	log.Printf("Global secret %s changed: auto-restarting %d workspace(s) to refresh env", key, len(ids))
 	for _, id := range ids {
-		go h.restartFunc(id)
+		// RFC internal#524 Layer 1: per-workspace restart via globalGoAsync
+		// so each restart goroutine is drained before db.DB is swapped in
+		// the test cleanup chain.
+		wsID := id
+		globalGoAsync(func() { h.restartFunc(wsID) })
 	}
 }

@@ -450,7 +466,10 @@ func (h *SecretsHandler) DeleteGlobal(c *gin.Context) {

 	// Issue #15: propagate deletion to running containers — otherwise they
 	// keep the stale env var until manual restart.
-	go h.restartAllAffectedByGlobalKey(key)
+	// RFC internal#524 Layer 1: globalGoAsync for the same drain rationale
+	// as SetGlobal above.
+	k := key
+	globalGoAsync(func() { h.restartAllAffectedByGlobalKey(k) })

 	c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key, "scope": "global"})
 }
@@ -552,7 +571,9 @@ func (h *SecretsHandler) SetModel(c *gin.Context) {
 	}

 	if h.restartFunc != nil {
-		go h.restartFunc(workspaceID)
+		// RFC internal#524 Layer 1: globalGoAsync (see Set()).
+		wsID := workspaceID
+		globalGoAsync(func() { h.restartFunc(wsID) })
 	}
 	if body.Model == "" {
 		c.JSON(http.StatusOK, gin.H{"status": "cleared"})
@@ -669,7 +690,9 @@ func (h *SecretsHandler) SetProvider(c *gin.Context) {
 	}

 	if h.restartFunc != nil {
-		go h.restartFunc(workspaceID)
+		// RFC internal#524 Layer 1: globalGoAsync (see Set()).
+		wsID := workspaceID
+		globalGoAsync(func() { h.restartFunc(wsID) })
 	}
 	if body.Provider == "" {
 		c.JSON(http.StatusOK, gin.H{"status": "cleared"})
@@ -88,6 +88,11 @@ func (h *SocketHandler) HandleConnect(c *gin.Context) {

 	// Wrap WritePump and ReadPump so the gauge is decremented exactly once
 	// when the client's write goroutine exits (WritePump owns conn lifetime).
+	// goAsync-exempt (RFC internal#524 Layer 2.2): WebSocket pumps live
+	// for the duration of the client connection (minutes-hours), not a
+	// single request. Wrapping them in globalGoAsync would block every
+	// test's t.Cleanup until every connected WS client disconnects. No
+	// db.DB access in either pump.
 	go func() {
 		ws.WritePump(client)
 		metrics.TrackWSDisconnect()
@@ -234,7 +234,9 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
 			"source":    "ec2-ssh",
 		})
 		if h.wh != nil {
-			go h.wh.RestartByID(workspaceID)
+			// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+			wsID := workspaceID
+			h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 		}
 		return
 	}
@@ -268,7 +270,9 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
 			"source":    "container",
 		})
 		if h.wh != nil {
-			go h.wh.RestartByID(workspaceID)
+			// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+			wsID := workspaceID
+			h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 		}
 		return
 	}
@@ -288,6 +292,8 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {

 	c.JSON(http.StatusOK, gin.H{"status": "replaced", "workspace": workspaceID, "files": len(body.Files), "source": "volume"})
 	if h.wh != nil {
-		go h.wh.RestartByID(workspaceID)
+		// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+		wsID := workspaceID
+		h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 	}
 }
@@ -570,7 +570,9 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
 		}
 		c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
 		if h.wh != nil {
-			go h.wh.RestartByID(workspaceID)
+			// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+			wsID := workspaceID
+			h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 		}
 		return
 	}
@@ -584,7 +586,9 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
 		}
 		c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
 		if h.wh != nil {
-			go h.wh.RestartByID(workspaceID)
+			// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+			wsID := workspaceID
+			h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 		}
 		return
 	}
@@ -598,7 +602,9 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
 	}
 	c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
 	if h.wh != nil {
-		go h.wh.RestartByID(workspaceID)
+		// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+		wsID := workspaceID
+		h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 	}
 }

@@ -651,7 +657,9 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
 		}
 		c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
 		if h.wh != nil {
-			go h.wh.RestartByID(workspaceID)
+			// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+			wsID := workspaceID
+			h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 		}
 		return
 	}
@@ -669,7 +677,9 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
 		}
 		c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
 		if h.wh != nil {
-			go h.wh.RestartByID(workspaceID)
+			// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+			wsID := workspaceID
+			h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 		}
 		return
 	}
@@ -682,6 +692,8 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
 	}
 	c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
 	if h.wh != nil {
-		go h.wh.RestartByID(workspaceID)
+		// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
+		wsID := workspaceID
+		h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
 	}
 }
@@ -211,6 +211,10 @@ func (h *TerminalHandler) handleLocalConnect(c *gin.Context, workspaceID string)
 	}

 	// Bridge: container stdout → WebSocket
+	// goAsync-exempt (RFC internal#524 Layer 2.2): per-WebSocket I/O
+	// bridge — lifetime is the connection, not a request. The handler
+	// blocks on `done` below, so the goroutine is already drained
+	// synchronously. No db.DB access on this path.
 	done := make(chan struct{})
 	go func() {
 		defer close(done)
@@ -433,6 +437,8 @@ func (h *TerminalHandler) handleRemoteConnect(c *gin.Context, workspaceID, insta
 	done := make(chan struct{})

 	// PTY → WebSocket
+	// goAsync-exempt (RFC internal#524 Layer 2.2): WebSocket-lifetime
+	// I/O bridge; handler blocks on `done` below. No db.DB access.
 	go func() {
 		defer close(done)
 		buf := make([]byte, 4096)
@@ -455,6 +461,7 @@ func (h *TerminalHandler) handleRemoteConnect(c *gin.Context, workspaceID, insta
 	}()

 	// WebSocket → PTY (stdin)
+	// goAsync-exempt (RFC internal#524 Layer 2.2): see above.
 	go func() {
 		for {
 			_, msg, rErr := conn.ReadMessage()
@@ -101,6 +101,47 @@ func (h *WorkspaceHandler) waitAsyncForTest() {
 	h.asyncWG.Wait()
 }

+// globalAsync tracks goroutines launched by globalGoAsync — the
+// equivalent of WorkspaceHandler.goAsync for sibling handlers that
+// don't carry a *WorkspaceHandler reference (SecretsHandler /
+// PluginsHandler / AdminPluginDriftHandler / ChannelHandler /
+// MCPHandler / RegistryHandler), and for callers of package-level
+// free functions (a2a_proxy_helpers extractAndUpsertTokenUsage).
+//
+// Forward-port of RFC internal#524 Layer 1 deliverable 2: the
+// canonical db.DB race fix lives at workspace.go:goAsync / asyncWG,
+// but ~25 sibling bare-`go` sites still write to db.DB outside any
+// WorkspaceHandler's drain window. globalAsync gives them the same
+// drain hook (waitGlobalAsyncForTest, drained from drainTestAsync)
+// so a test's t.Cleanup db.DB restore cannot race a detached
+// goroutine spawned by any sibling handler.
+//
+// Zero-cost in production (a single sync.WaitGroup Add/Done per
+// fire-and-forget call, no test-only branching).
+var globalAsync sync.WaitGroup
+
+// globalGoAsync schedules fn on a detached goroutine tracked by
+// globalAsync. Use this in package-internal callers that don't have
+// a *WorkspaceHandler receiver to thread h.goAsync through.
+//
+// When a *WorkspaceHandler IS available, prefer h.goAsync — it lets
+// per-handler tests (waitAsyncForTest) wait without disturbing
+// unrelated handlers' inflight work.
+func globalGoAsync(fn func()) {
+	globalAsync.Add(1)
+	go func() {
+		defer globalAsync.Done()
+		fn()
+	}()
+}
+
+// waitGlobalAsyncForTest blocks until every globalGoAsync goroutine
+// finishes. Called from drainTestAsync's cleanup chain in the test
+// harness; production code never calls it.
+func waitGlobalAsyncForTest() {
+	globalAsync.Wait()
+}
+
 func NewWorkspaceHandler(b events.EventEmitter, p *provisioner.Provisioner, platformURL, configsDir string) *WorkspaceHandler {
 	h := &WorkspaceHandler{
 		broadcaster: b,
@@ -0,0 +1,70 @@
+-- Reverse of 20260518000000_seed_production_team_agent_cards.up.sql.
+--
+-- Clears the identity fields back to the gap state that the up
+-- migration was designed to fix. After this down migration, the PR
+-- #1427 reconcile has nothing to substitute again: name reverts to the
+-- workspace UUID (the runtime's fallback), role to NULL, agent_card
+-- description/skills to empty. This is the pre-#1427 + pre-this-seed
+-- behaviour.
+--
+-- Match strategy mirrors the up migration (id::text LIKE prefix for 5,
+-- exact UUID for CEO-Assistant) so any down-roll touches the exact
+-- same rows.
+
+BEGIN;
+
+-- PM
+UPDATE workspaces
+SET name = id::text,
+    role = NULL,
+    agent_card = (agent_card - 'description' - 'skills' - 'role') ||
+        jsonb_build_object('name', id::text),
+    updated_at = now()
+WHERE id::text LIKE '8a71d4d4-%';
+
+-- Reviewer
+UPDATE workspaces
+SET name = id::text,
+    role = NULL,
+    agent_card = (agent_card - 'description' - 'skills' - 'role') ||
+        jsonb_build_object('name', id::text),
+    updated_at = now()
+WHERE id::text LIKE '27e66b5a-%';
+
+-- Researcher
+UPDATE workspaces
+SET name = id::text,
+    role = NULL,
+    agent_card = (agent_card - 'description' - 'skills' - 'role') ||
+        jsonb_build_object('name', id::text),
+    updated_at = now()
+WHERE id::text LIKE '5773bd5f-%';
+
+-- Dev-A
+UPDATE workspaces
+SET name = id::text,
+    role = NULL,
+    agent_card = (agent_card - 'description' - 'skills' - 'role') ||
+        jsonb_build_object('name', id::text),
+    updated_at = now()
+WHERE id::text LIKE '4ca4c06c-%';
+
+-- Dev-B
+UPDATE workspaces
+SET name = id::text,
+    role = NULL,
+    agent_card = (agent_card - 'description' - 'skills' - 'role') ||
+        jsonb_build_object('name', id::text),
+    updated_at = now()
+WHERE id::text LIKE '31eb65ed-%';
+
+-- CEO-Assistant
+UPDATE workspaces
+SET name = id::text,
+    role = NULL,
+    agent_card = (agent_card - 'description' - 'skills' - 'role') ||
+        jsonb_build_object('name', id::text),
+    updated_at = now()
+WHERE id = '30ba7f0b-b303-4a20-aefe-3a4a675b8aa4'::uuid;
+
+COMMIT;
@@ -0,0 +1,165 @@
+-- Seed identity (name + role + agent_card description/skills) for the
+-- 6 production-team workspaces. Pairs with the PR #1427 server-side
+-- reconcile (internal#492): #1427 added the platform-side backfill that
+-- pulls workspaces.name and workspaces.role into the stored agent_card
+-- on /registry/register; this migration populates the trusted DB row
+-- those reads consume.
+--
+-- Without this seed, the reconcile has nothing to substitute and the
+-- card stays at name=UUID / description="" / role=null for the prod
+-- team agents — the exact gap internal#492 is filed against.
+--
+-- Identity stays platform-controlled — the agent runtime cannot
+-- self-write these fields. The 6 workspace UUIDs are the CTO-locked
+-- production-team topology (see project_production_agent_team_topology):
+--
+--   PM             8a71d4d4... — Claude Code on Opus, read-only,
+--                                A2A-delegate-only coordinator
+--   Reviewer       27e66b5a... — codex on openai-subscription,
+--                                5-axis non-author review
+--   Researcher     5773bd5f... — codex on openai-subscription,
+--                                root-cause investigation
+--   Dev-A          4ca4c06c... — Claude Code on Kimi K2.6
+--                                (api.kimi.com/coding base + ANTHROPIC_API_KEY)
+--   Dev-B          31eb65ed... — Claude Code on MiniMax
+--                                (api.minimax.io/anthropic base + sk-cp-* key)
+--   CEO-Assistant  30ba7f0b-b303-4a20-aefe-3a4a675b8aa4 — Claude Code,
+--                                orchestrator-side operations + canvas relay
+--
+-- Match strategy: 5 of 6 production UUIDs were provided to me by the CTO
+-- as 8-char prefixes only (the full UUIDs live in the prod tenant DB).
+-- We match those 5 with `id::text LIKE '<prefix>-%'` so this migration
+-- is unambiguous when reviewed without DB access — the CTO will confirm
+-- on review that each prefix resolves to a single row. CEO-Assistant
+-- (30ba7f0b-b303-4a20-aefe-3a4a675b8aa4) is known in full from
+-- chat_files_test.go and is matched exactly.
+--
+-- Idempotent: each UPDATE only touches the three identity fields. Re-
+-- running rewrites the same values. UUIDs not present in a given tenant
+-- DB match zero rows and are silently skipped — the migration never
+-- INSERTs rows it doesn't own.
+--
+-- All names obey validateWorkspaceFields (workspace_crud.go:526):
+-- <=255 chars, no newline/CR, no YAML-special chars `{}[]|>*&!`.
+-- All roles obey the same contract <=1000 chars. Per-skill description
+-- <=120 chars matches the discovery card surface shown on the canvas
+-- Agent Card view and the mobile peer chip.
+
+BEGIN;
+
+-- PM — read-only A2A coordinator
+UPDATE workspaces
+SET name = 'Production Manager',
+    role = 'product manager',
+    agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
+        'name', 'Production Manager',
+        'description', 'Read-only A2A coordinator that plans work and delegates to Dev/Reviewer/Researcher peers; never writes code itself.',
+        'role', 'product manager',
+        'skills', jsonb_build_array(
+            jsonb_build_object('id','planning','name','planning','description','Decompose CTO directives into peer-delegable units','tags',jsonb_build_array('planning'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','delegation','name','delegation','description','Route work to Dev-A / Dev-B / Reviewer / Researcher via A2A','tags',jsonb_build_array('delegation'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','coordination','name','coordination','description','Track peer activity and surface blockers back to the CTO','tags',jsonb_build_array('coordination'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','read-only','name','read-only','description','Never edits code or merges; proposes only','tags',jsonb_build_array('read-only','safety'),'examples',jsonb_build_array())
+        ),
+        'updated_at', now()::text
+    ),
+    updated_at = now()
+WHERE id::text LIKE '8a71d4d4-%';
+
+-- Reviewer — codex/openai, 5-axis non-author review
+UPDATE workspaces
+SET name = 'Code Reviewer',
+    role = 'code reviewer',
+    agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
+        'name', 'Code Reviewer',
+        'description', 'Non-author 5-axis review on codex/openai-subscription; runs the merge gate, never approves PRs it authored.',
+        'role', 'code reviewer',
+        'skills', jsonb_build_array(
+            jsonb_build_object('id','code-review','name','code-review','description','Five-axis PR review against the merge gate','tags',jsonb_build_array('review'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','security-axis','name','security-axis','description','Trust-boundary, secret-handling, injection surface checks','tags',jsonb_build_array('security'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','correctness-axis','name','correctness-axis','description','Logic, error-handling, race and boundary case checks','tags',jsonb_build_array('correctness'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','non-author-approve','name','non-author-approve','description','Approves only PRs the reviewer did not author','tags',jsonb_build_array('two-eyes'),'examples',jsonb_build_array())
+        ),
+        'updated_at', now()::text
+    ),
+    updated_at = now()
+WHERE id::text LIKE '27e66b5a-%';
+
+-- Researcher — codex/openai, root-cause investigation
+UPDATE workspaces
+SET name = 'Root-Cause Researcher',
+    role = 'researcher',
+    agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
+        'name', 'Root-Cause Researcher',
+        'description', 'Diagnostic investigation on codex/openai-subscription; obs-first, source-as-corroboration, no drive-by fixes.',
+        'role', 'researcher',
+        'skills', jsonb_build_array(
+            jsonb_build_object('id','root-cause','name','root-cause','description','Diagnose the underlying cause, never patch symptoms','tags',jsonb_build_array('investigation'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','obs-first','name','obs-first','description','Grafana/Loki query before source-guessing','tags',jsonb_build_array('observability'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','log-correlation','name','log-correlation','description','Cross-service step= / Delegation uuid tracing','tags',jsonb_build_array('observability'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','source-archaeology','name','source-archaeology','description','Git blame and prior-art recall across repos','tags',jsonb_build_array('git'),'examples',jsonb_build_array())
+        ),
+        'updated_at', now()::text
+    ),
+    updated_at = now()
+WHERE id::text LIKE '5773bd5f-%';
+
+-- Dev-A — Claude Code on Kimi K2.6
+UPDATE workspaces
+SET name = 'Dev Engineer A (Kimi)',
+    role = 'dev engineer',
+    agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
+        'name', 'Dev Engineer A (Kimi)',
+        'description', 'Claude Code routed to Kimi K2.6 via api.kimi.com/coding; implements PRs against the dev-tree protected branches.',
+        'role', 'dev engineer',
+        'skills', jsonb_build_array(
+            jsonb_build_object('id','implementation','name','implementation','description','Write code to merge gate (tests, lint, types)','tags',jsonb_build_array('coding'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','test-driven','name','test-driven','description','Failing test first, then minimal fix','tags',jsonb_build_array('tdd'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','bug-fixing','name','bug-fixing','description','Root-caused bug fixes with regression test','tags',jsonb_build_array('debugging'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','refactoring','name','refactoring','description','In-scope, behavior-preserving refactors only','tags',jsonb_build_array('refactor'),'examples',jsonb_build_array())
+        ),
+        'updated_at', now()::text
+    ),
+    updated_at = now()
+WHERE id::text LIKE '4ca4c06c-%';
+
+-- Dev-B — Claude Code on MiniMax
+UPDATE workspaces
+SET name = 'Dev Engineer B (MiniMax)',
+    role = 'dev engineer',
+    agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
+        'name', 'Dev Engineer B (MiniMax)',
+        'description', 'Claude Code routed to MiniMax via api.minimax.io/anthropic; parallel dev capacity to Dev-A on the same gate.',
+        'role', 'dev engineer',
+        'skills', jsonb_build_array(
+            jsonb_build_object('id','implementation','name','implementation','description','Write code to merge gate (tests, lint, types)','tags',jsonb_build_array('coding'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','test-driven','name','test-driven','description','Failing test first, then minimal fix','tags',jsonb_build_array('tdd'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','bug-fixing','name','bug-fixing','description','Root-caused bug fixes with regression test','tags',jsonb_build_array('debugging'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','refactoring','name','refactoring','description','In-scope, behavior-preserving refactors only','tags',jsonb_build_array('refactor'),'examples',jsonb_build_array())
+        ),
+        'updated_at', now()::text
+    ),
+    updated_at = now()
+WHERE id::text LIKE '31eb65ed-%';
+
+-- CEO-Assistant — Claude Code, orchestrator + canvas relay
+-- Full UUID known from chat_files_test.go:286 — match exactly.
+UPDATE workspaces
+SET name = 'CEO Assistant',
+    role = 'operator orchestrator',
+    agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
+        'name', 'CEO Assistant',
+        'description', 'Orchestrator-side Claude Code that runs the triage loop, relays canvas and Telegram, dispatches non-author reviewers.',
+        'role', 'operator orchestrator',
+        'skills', jsonb_build_array(
+            jsonb_build_object('id','triage-loop','name','triage-loop','description','Run the CI/PR triage loop; fix-what-you-find','tags',jsonb_build_array('orchestration'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','review-routing','name','review-routing','description','Dispatch non-author reviewers via delegate_task','tags',jsonb_build_array('routing'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','canvas-relay','name','canvas-relay','description','Relay CTO canvas/Telegram messages to peers','tags',jsonb_build_array('relay'),'examples',jsonb_build_array()),
+            jsonb_build_object('id','ops','name','ops','description','Direct hands-on ops on operator host and Neon','tags',jsonb_build_array('ops','direct-action'),'examples',jsonb_build_array())
+        ),
+        'updated_at', now()::text
+    ),
+    updated_at = now()
+WHERE id = '30ba7f0b-b303-4a20-aefe-3a4a675b8aa4'::uuid;
+
+COMMIT;