Merge branch 'main' of https://git.moleculesai.app/molecule-ai/molecule-core into ci/146-lint-no-tenant-gitea-token
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 6s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 22s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 5s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 6s
Lint no tenant GITEA/GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Failing after 28s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Successful in 1m11s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 4s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 21s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s
CI / Platform (Go) (pull_request) Successful in 5m19s
gate-check-v3 / gate-check (pull_request) Successful in 4s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m20s
security-review / approved (pull_request) Successful in 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
qa-review / approved (pull_request) Failing after 5s
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-tier-check / tier-check (pull_request) Successful in 5s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Failing after 1m9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 3s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 1s
CI / Canvas (Next.js) (pull_request) Successful in 7m10s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Python Lint & Test (pull_request) Successful in 7m6s
CI / all-required (pull_request) Successful in 6m44s
audit-force-merge / audit (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 6s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 22s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 5s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 6s
Lint no tenant GITEA/GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Failing after 28s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Successful in 1m11s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 4s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 21s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s
CI / Platform (Go) (pull_request) Successful in 5m19s
gate-check-v3 / gate-check (pull_request) Successful in 4s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m20s
security-review / approved (pull_request) Successful in 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
qa-review / approved (pull_request) Failing after 5s
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-tier-check / tier-check (pull_request) Successful in 5s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Failing after 1m9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 3s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Successful in 1s
CI / Canvas (Next.js) (pull_request) Successful in 7m10s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Python Lint & Test (pull_request) Successful in 7m6s
CI / all-required (pull_request) Successful in 6m44s
audit-force-merge / audit (pull_request) Successful in 7s
This commit is contained in:
@@ -401,7 +401,7 @@ jobs:
|
||||
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-host
|
||||
# mc#774 root-fix: added job-level `if:` so ci-required-drift.py's
|
||||
# ci_job_names() detects this as github.ref-gated and skips it from F1.
|
||||
# The step-level exit 0 handles the "not main push" case; the job-level
|
||||
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
# bp-required: pending #1296
|
||||
peer-visibility-local:
|
||||
name: E2E Peer Visibility (local)
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-host
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
# Per-run names + ephemeral ports — same collision-avoidance as
|
||||
|
||||
@@ -88,17 +88,6 @@ jobs:
|
||||
with open(path) as f:
|
||||
raw_lines = f.readlines()
|
||||
|
||||
# Strip pure-comment lines for docker-exec detection so that
|
||||
# documentation comments don't trigger the lint.
|
||||
scan_text = ''.join(
|
||||
l for l in raw_lines
|
||||
if not re.match(r'^\s*#', l)
|
||||
)
|
||||
|
||||
has_docker = bool(DOCKER_EXEC.search(scan_text)) or bool(DOCKER_ACTION.search(scan_text))
|
||||
if not has_docker:
|
||||
continue
|
||||
|
||||
# Parse job headers + their runs-on. Simple line scan; relies on
|
||||
# 2-space job indent + 4-space runs-on indent under `jobs:`.
|
||||
jobs = []
|
||||
@@ -113,8 +102,9 @@ jobs:
|
||||
mh = JOB_HEADER.match(line)
|
||||
if mh:
|
||||
if current:
|
||||
current['end'] = i - 1
|
||||
jobs.append(current)
|
||||
current = {'name': mh.group(2), 'line': i, 'runs_on': None}
|
||||
current = {'name': mh.group(2), 'line': i, 'end': len(raw_lines), 'runs_on': None}
|
||||
continue
|
||||
mr = RUNS_ON.match(line)
|
||||
if mr and current and current['runs_on'] is None:
|
||||
@@ -123,6 +113,18 @@ jobs:
|
||||
jobs.append(current)
|
||||
|
||||
for j in jobs:
|
||||
# Strip pure-comment lines for docker-exec detection so
|
||||
# documentation comments don't trigger the lint. Scan the
|
||||
# current job body only: a workflow may contain one
|
||||
# docker-bound job and several harmless metadata jobs.
|
||||
job_lines = raw_lines[j['line'] - 1:j['end']]
|
||||
scan_text = ''.join(
|
||||
l for l in job_lines
|
||||
if not re.match(r'^\s*#', l)
|
||||
)
|
||||
has_docker = bool(DOCKER_EXEC.search(scan_text)) or bool(DOCKER_ACTION.search(scan_text))
|
||||
if not has_docker:
|
||||
continue
|
||||
ro = j['runs_on']
|
||||
if ro is None:
|
||||
# Reusable workflow caller (`uses:` instead of `runs-on:`) —
|
||||
@@ -134,9 +136,11 @@ jobs:
|
||||
labels = [t.strip().strip('"\'') for t in ro_norm.split(',') if t.strip()]
|
||||
if any(lbl in ALLOWED_LABELS for lbl in labels):
|
||||
continue
|
||||
# Allow caller-supplied label expression `${{ ... }}` — caller
|
||||
# is responsible.
|
||||
if any('${{' in lbl for lbl in labels):
|
||||
# Allow caller-supplied label expressions; spell the
|
||||
# marker indirectly so Gitea's expression parser does
|
||||
# not try to parse this Python heredoc.
|
||||
expression_marker = '$' + '{{'
|
||||
if any(expression_marker in lbl for lbl in labels):
|
||||
continue
|
||||
fails.append(
|
||||
f"{path}:{j['line']}: job `{j['name']}` uses docker but runs-on={ro!r} "
|
||||
|
||||
@@ -300,7 +300,7 @@ jobs:
|
||||
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-host
|
||||
needs: [changes, canvas-build]
|
||||
# Only fires on direct pushes to main (i.e. after staging→main promotion).
|
||||
if: needs.changes.outputs.canvas == 'true' && github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
@@ -440,4 +440,3 @@ jobs:
|
||||
|
||||
# SDK + plugin validation moved to standalone repo:
|
||||
# github.com/molecule-ai/molecule-sdk-python
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ jobs:
|
||||
e2e-api:
|
||||
needs: detect-changes
|
||||
name: E2E API Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-host
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
# Unique per-run container names so concurrent runs on the host-
|
||||
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
integration:
|
||||
name: Handlers Postgres Integration
|
||||
needs: detect-changes
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-host
|
||||
env:
|
||||
# Unique name per run so concurrent jobs don't collide on the
|
||||
# bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
|
||||
@@ -249,4 +249,3 @@ jobs:
|
||||
# already gone (e.g. concurrent rerun race), don't fail the job.
|
||||
docker rm -f "${PG_NAME}" >/dev/null 2>&1 || true
|
||||
echo "Cleaned up ${PG_NAME}"
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
harness-replays:
|
||||
needs: detect-changes
|
||||
name: Harness Replays
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: docker-host
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: No-op pass (paths filter excluded this commit)
|
||||
|
||||
@@ -39,7 +39,7 @@ env:
|
||||
jobs:
|
||||
build-and-push:
|
||||
name: Build & push canvas image
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: publish
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
+8
-7
@@ -69,7 +69,7 @@ or other removed paths — open against `molecule-ai/docs` instead.
|
||||
| OG images, visual assets | `molecule-ai/docs` → `app/` or `marketing/` |
|
||||
| SEO briefs | `molecule-ai/docs` → `marketing/` |
|
||||
| DevRel demos (runnable code) | Standalone repo under `molecule-ai/`, OR embedded in `molecule-ai/docs` |
|
||||
| Launch checklists, internal tracking | GitHub Issues — **not** committed files |
|
||||
| Launch checklists, internal tracking | Gitea Issues — **not** committed files |
|
||||
| Engineering docs (`docs/adr/`, `docs/architecture/`, `docs/incidents/`) | This repo (internal, not published) |
|
||||
| Live product pages (e.g. `canvas/src/app/pricing/page.tsx`) | This repo (these are app code, not marketing copy) |
|
||||
|
||||
@@ -106,7 +106,7 @@ causing a render loop when any node position changed.
|
||||
|
||||
#### Auto-merge & the "extra commit" trap
|
||||
|
||||
**Two system guards protect against pushing commits after auto-merge has been enabled.** Don't try to work around them — they exist because we shipped a half-merged PR on 2026-04-27 (`#2174` merged with only its first commit; the second was orphaned on a branch GitHub had already deleted).
|
||||
**Two system guards protect against pushing commits after auto-merge has been enabled.** Don't try to work around them — they exist because we shipped a half-merged PR on 2026-04-27 (`#2174` merged with only its first commit; the second was orphaned on a branch the host had already deleted).
|
||||
|
||||
1. **Repo-wide:** "Automatically delete head branches" is on. Once a PR merges, the branch is deleted server-side. Any subsequent `git push` to that branch fails with `remote rejected — no such branch`.
|
||||
|
||||
@@ -145,7 +145,7 @@ Fix violations before committing — the hook will reject the commit.
|
||||
|
||||
### CI Pipeline
|
||||
|
||||
CI runs on GitHub Actions with a self-hosted runner. External contributors:
|
||||
CI runs on Gitea Actions with self-hosted runners. External contributors:
|
||||
PRs from forks will not trigger CI automatically. A maintainer will review
|
||||
and run CI manually.
|
||||
|
||||
@@ -192,7 +192,7 @@ live in their own repos:
|
||||
|
||||
- [`molecule-ai/molecule-ai-workspace-runtime`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-runtime) — Python adapter SDK (`molecule_runtime`) that runs inside containerized Molecule workspaces. Bridges Claude Code SDK / hermes / langgraph / etc. → A2A queue.
|
||||
- [`molecule-ai/molecule-sdk-python`](https://git.moleculesai.app/molecule-ai/molecule-sdk-python) — `A2AServer` + `RemoteAgentClient` for external agents that register over the public `/registry/register` flow.
|
||||
- [`molecule-ai/molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install inside Claude Code via `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels --channels plugin:molecule@molecule-channel`.
|
||||
- [`molecule-ai/molecule-mcp-claude-channel`](https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel) — Claude Code channel plugin. Bridges A2A traffic into a running Claude Code session via MCP `notifications/claude/channel`. Polling-based (no tunnel required); install inside Claude Code via `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels=plugin:molecule@molecule-channel`.
|
||||
|
||||
When extending the **A2A surface** in molecule-core (`workspace-server/internal/handlers/a2a_proxy.go` etc.), consider whether the change has a downstream impact on the runtime SDK or the channel plugin — they're versioned independently but share the wire shape.
|
||||
|
||||
@@ -206,7 +206,7 @@ See `CLAUDE.md` for detailed architecture documentation, including:
|
||||
|
||||
## Reporting Issues
|
||||
|
||||
Use GitHub Issues with a clear title and reproduction steps. Include:
|
||||
Use Gitea Issues with a clear title and reproduction steps. Include:
|
||||
- What you expected
|
||||
- What actually happened
|
||||
- Platform/OS version
|
||||
@@ -214,8 +214,9 @@ Use GitHub Issues with a clear title and reproduction steps. Include:
|
||||
|
||||
## Security
|
||||
|
||||
If you discover a security vulnerability, please report it privately via
|
||||
GitHub Security Advisories rather than opening a public issue.
|
||||
If you discover a security vulnerability, please report it privately by
|
||||
opening an issue against `molecule-ai/internal` (a private repo only
|
||||
maintainers can see) rather than filing a public issue here.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
@@ -238,7 +238,7 @@ The result is not just “an agent that learns.” It is **an organization that
|
||||
- subscribe to one or more workspaces; peer messages surface as conversation turns; replies route back through Molecule's A2A
|
||||
- no tunnel, no public endpoint — the plugin self-registers each watched workspace as `delivery_mode=poll` and long-polls `/activity?since_id=…`
|
||||
- multi-tenant friendly: one plugin install can watch workspaces across multiple Molecule tenants (`MOLECULE_PLATFORM_URLS` per-workspace)
|
||||
- install via the standard marketplace flow: `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels --channels plugin:molecule@molecule-channel`
|
||||
- install via the standard marketplace flow: `/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`, then launch with `claude --dangerously-load-development-channels=plugin:molecule@molecule-channel`
|
||||
|
||||
## Built For Teams That Need More Than A Demo
|
||||
|
||||
|
||||
+1
-1
@@ -237,7 +237,7 @@ Molecule AI 并不是要替代下面这些 framework,而是把它们纳入更
|
||||
- 订阅一个或多个 workspace;peer 的消息会以 user-turn 出现,回复会经 Molecule A2A 路由出去
|
||||
- 无需公网隧道、无需公开端点 —— 插件启动时自动把每个 watched workspace 注册成 `delivery_mode=poll`,长轮询 `/activity?since_id=…`
|
||||
- 多租户友好:单次安装即可同时 watch 跨多个 Molecule 租户的 workspace(`MOLECULE_PLATFORM_URLS` 按 workspace 配置)
|
||||
- 通过标准 marketplace 流程安装:`/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`,然后用 `claude --dangerously-load-development-channels --channels plugin:molecule@molecule-channel` 启动
|
||||
- 通过标准 marketplace 流程安装:`/plugin marketplace add https://git.moleculesai.app/molecule-ai/molecule-mcp-claude-channel.git` → `/plugin install molecule@molecule-channel`,然后用 `claude --dangerously-load-development-channels=plugin:molecule@molecule-channel` 启动
|
||||
|
||||
## 适合什么团队
|
||||
|
||||
|
||||
@@ -153,11 +153,13 @@ print('OK:found=%d/%d' % (len(found), len(expect)))
|
||||
# Caller bug, not a runtime regression — surface loudly so a
|
||||
# mis-wired backend can't mint a false green.
|
||||
echo " ✗ $rt: no expected peers were configured for this caller"
|
||||
# shellcheck disable=SC2034 # exported verdict is read by the caller's map plumbing.
|
||||
PV_VERDICT="FAIL(rpc=NO_EXPECTED_PEERS_CONFIGURED)"
|
||||
return 1
|
||||
;;
|
||||
*)
|
||||
echo " ✗ $rt: unexpected verdict '$parse'"
|
||||
# shellcheck disable=SC2034 # exported verdict is read by the caller's map plumbing.
|
||||
PV_VERDICT="FAIL(unknown)"
|
||||
return 1
|
||||
;;
|
||||
|
||||
@@ -208,7 +208,9 @@ log " PARENT_ID=$PARENT_ID"
|
||||
# box (bash 3.2, no associative arrays) per feedback_local_must_mimic_
|
||||
# production. WS_IDS / VERDICT are kept as newline-delimited "rt<TAB>val"
|
||||
# maps with tiny get/set helpers (portable to bash 3.2+ AND ubuntu CI).
|
||||
# shellcheck disable=SC2034 # map values are updated through portable eval-based helpers.
|
||||
WS_IDS_MAP=""
|
||||
# shellcheck disable=SC2034 # map values are updated through portable eval-based helpers.
|
||||
VERDICT_MAP=""
|
||||
_map_set() { # _map_set <mapvarname> <key> <value>
|
||||
local __m="$1" __k="$2" __v="$3" __cur
|
||||
|
||||
@@ -556,7 +556,14 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
|
||||
// Track LLM token usage for cost transparency (#593).
|
||||
// Fires in a detached goroutine so token accounting never adds latency
|
||||
// to the critical A2A path.
|
||||
go extractAndUpsertTokenUsage(context.WithoutCancel(ctx), workspaceID, respBody)
|
||||
// RFC internal#524 Layer 1: extractAndUpsertTokenUsage reads db.DB
|
||||
// (INSERT INTO llm_token_usage). Without globalGoAsync, the detached
|
||||
// write races a subsequent test's db.DB swap exactly like the
|
||||
// maybeMarkContainerDead path that 69d9b4e3 fixed.
|
||||
tokCtx := context.WithoutCancel(ctx)
|
||||
wsID := workspaceID
|
||||
tokBody := respBody
|
||||
globalGoAsync(func() { extractAndUpsertTokenUsage(tokCtx, wsID, tokBody) })
|
||||
|
||||
// Non-2xx agent response: the agent received the request but returned an
|
||||
// error status. Return a proxyErr so the caller (DrainQueueForWorkspace)
|
||||
@@ -931,6 +938,12 @@ func applyIdleTimeout(parent context.Context, b *events.Broadcaster, workspaceID
|
||||
}
|
||||
ctx, cancel := context.WithCancel(parent)
|
||||
sub, unsub := b.SubscribeSSE(workspaceID)
|
||||
// goAsync-exempt (RFC internal#524 Layer 2.2 annotation): this
|
||||
// goroutine owns the parent ctx's cancel and exits only on
|
||||
// ctx.Done() / sub-channel close — wrapping it in globalGoAsync would
|
||||
// deadlock drainTestAsync because the request that owns ctx hasn't
|
||||
// completed when t.Cleanup fires. Does NOT read db.DB; idle-timer
|
||||
// management only.
|
||||
go func() {
|
||||
defer unsub()
|
||||
timer := time.NewTimer(idle)
|
||||
|
||||
@@ -189,13 +189,16 @@ func (h *AdminPluginDriftHandler) Apply(c *gin.Context) {
|
||||
// at construction. Trigger it asynchronously so the HTTP response returns
|
||||
// immediately after the install; the restart is best-effort.
|
||||
if h.pluginsHandler != nil {
|
||||
go func() {
|
||||
// RFC internal#524 Layer 1: globalGoAsync so the detached restart
|
||||
// is drained before db.DB swap (see workspace.go:globalGoAsync).
|
||||
wsID := entry.WorkspaceID
|
||||
globalGoAsync(func() {
|
||||
// We can't use result.PluginName as a restart key since the
|
||||
// restartFunc takes a workspaceID. Pass the workspaceID.
|
||||
if restart := h.pluginsHandler.GetRestartFunc(); restart != nil {
|
||||
restart(entry.WorkspaceID)
|
||||
restart(wsID)
|
||||
}
|
||||
}()
|
||||
})
|
||||
}
|
||||
|
||||
log.Printf("AdminPluginDrift: applied drift update for %s/%s (queue_id=%s)",
|
||||
|
||||
@@ -556,13 +556,16 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Process asynchronously — don't block the webhook response
|
||||
go func() {
|
||||
// Process asynchronously — don't block the webhook response.
|
||||
// RFC internal#524 Layer 1: globalGoAsync — HandleInbound traverses
|
||||
// db.DB to resolve workspace + record the channel event; drained by
|
||||
// drainTestAsync before db.DB swap.
|
||||
globalGoAsync(func() {
|
||||
bgCtx := context.Background()
|
||||
if err := h.manager.HandleInbound(bgCtx, ch, msg); err != nil {
|
||||
log.Printf("Channels: async HandleInbound error for workspace %s: %v", ch.WorkspaceID[:12], err)
|
||||
}
|
||||
}()
|
||||
})
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "accepted"})
|
||||
}
|
||||
|
||||
@@ -185,10 +185,15 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
|
||||
delegationCtx, cancelDelegation := context.WithTimeout(
|
||||
context.WithoutCancel(ctx), 30*time.Minute,
|
||||
)
|
||||
go func() {
|
||||
// RFC internal#524 Layer 1: route through workspace.goAsync so the
|
||||
// detached executeDelegation (which writes A2A status rows to db.DB
|
||||
// across multiple stages) is drained before db.DB is restored in a
|
||||
// later test's t.Cleanup. Tracked via the parent workspace handler's
|
||||
// asyncWG.
|
||||
h.workspace.goAsync(func() {
|
||||
defer cancelDelegation()
|
||||
h.executeDelegation(delegationCtx, sourceID, body.TargetID, delegationID, a2aBody)
|
||||
}()
|
||||
})
|
||||
|
||||
// Broadcast event so canvas shows delegation in real-time
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventDelegationSent), sourceID, map[string]interface{}{
|
||||
|
||||
@@ -129,6 +129,14 @@ var (
|
||||
|
||||
// getEICTunnelPool returns the singleton pool, lazy-initialising on
|
||||
// first call. Idempotent.
|
||||
//
|
||||
// goAsync-exempt (RFC internal#524 Layer 2.2): every `go` in this file
|
||||
// is pool-internal lifecycle (janitor + per-entry cleanup closures).
|
||||
// None reads db.DB — the pool tracks SSH tunnels, not workspace state.
|
||||
// The janitor exits on close(p.stopJanitor); cleanups exit when the
|
||||
// captured tunnel's resources are released. Wrapping in globalGoAsync
|
||||
// would block test cleanup on the singleton janitor that intentionally
|
||||
// runs forever.
|
||||
func getEICTunnelPool() *eicTunnelPool {
|
||||
globalEICTunnelPoolOnce.Do(func() {
|
||||
globalEICTunnelPool = newEICTunnelPool()
|
||||
|
||||
@@ -48,6 +48,14 @@ func init() {
|
||||
// finish. Called from setupTestDB's cleanup before db.DB is restored so
|
||||
// no detached restart/provision goroutine is mid-read of db.DB when the
|
||||
// pointer is swapped.
|
||||
//
|
||||
// Also drains the package-level globalAsync WaitGroup (RFC internal#524
|
||||
// Layer 1 deliverable 2) so sibling handlers (SecretsHandler /
|
||||
// PluginsHandler / etc.) that route through globalGoAsync rather than
|
||||
// h.goAsync are likewise drained before db.DB is swapped. Without this
|
||||
// drain a SecretsHandler.Set's restartFunc-via-globalGoAsync could race
|
||||
// the db.DB restore exactly the same way maybeMarkContainerDead did
|
||||
// before commit 69d9b4e3.
|
||||
func drainTestAsync() {
|
||||
liveTestHandlersMu.Lock()
|
||||
handlers := make([]*WorkspaceHandler, len(liveTestHandlers))
|
||||
@@ -56,6 +64,7 @@ func drainTestAsync() {
|
||||
for _, h := range handlers {
|
||||
h.waitAsyncForTest()
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
}
|
||||
|
||||
// setupTestDB creates a sqlmock DB and assigns it to the global db.DB.
|
||||
|
||||
@@ -278,7 +278,10 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
|
||||
// Fire and forget in a detached goroutine. Use a background context so
|
||||
// the call is not cancelled when the HTTP request completes.
|
||||
go func() {
|
||||
// RFC internal#524 Layer 1: globalGoAsync — the detached call reads
|
||||
// db.DB (mcpResolveURL + updateMCPDelegationStatus) and must be
|
||||
// drained by drainTestAsync before any t.Cleanup-driven db.DB swap.
|
||||
globalGoAsync(func() {
|
||||
bgCtx, cancel := context.WithTimeout(context.Background(), mcpAsyncCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
@@ -322,7 +325,7 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
// Drain response so the connection can be reused.
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
}()
|
||||
})
|
||||
|
||||
return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, delegationID, targetID), nil
|
||||
}
|
||||
|
||||
@@ -534,10 +534,14 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
// Docker-mode otherwise; the org-import call site doesn't need
|
||||
// to know which.
|
||||
provisionSem <- struct{}{} // acquire
|
||||
go func(wID, tPath string, cFiles map[string][]byte, p models.CreateWorkspacePayload) {
|
||||
// RFC internal#524 Layer 1: route through workspace.goAsync —
|
||||
// provisionWorkspaceAuto inserts/updates the workspaces row in
|
||||
// db.DB and must be drained before any test cleanup swap.
|
||||
wID, tPath, cFiles, p := id, templatePath, configFiles, payload
|
||||
h.workspace.goAsync(func() {
|
||||
defer func() { <-provisionSem }() // release
|
||||
h.workspace.provisionWorkspaceAuto(wID, tPath, cFiles, p)
|
||||
}(id, templatePath, configFiles, payload)
|
||||
})
|
||||
}
|
||||
|
||||
// Insert schedules if defined. Resolve each schedule's prompt body from
|
||||
|
||||
@@ -198,12 +198,16 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
log.Printf("Plugin uninstall: failed to delete workspace_plugins row for %s: %v (container cleanup succeeded)", pluginName, err)
|
||||
}
|
||||
|
||||
// Auto-restart (small delay to ensure fs writes are flushed)
|
||||
// Auto-restart (small delay to ensure fs writes are flushed).
|
||||
// RFC internal#524 Layer 1: globalGoAsync so the detached restart
|
||||
// goroutine is drained by drainTestAsync before db.DB swap. See
|
||||
// workspace.go:globalGoAsync for the contract.
|
||||
if h.restartFunc != nil {
|
||||
go func() {
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() {
|
||||
time.Sleep(2 * time.Second)
|
||||
h.restartFunc(workspaceID)
|
||||
}()
|
||||
h.restartFunc(wsID)
|
||||
})
|
||||
}
|
||||
|
||||
log.Printf("Plugin uninstall: %s from workspace %s (restarting)", pluginName, workspaceID)
|
||||
@@ -260,10 +264,12 @@ func (h *PluginsHandler) uninstallViaEIC(ctx context.Context, c *gin.Context, wo
|
||||
}
|
||||
|
||||
if h.restartFunc != nil {
|
||||
go func() {
|
||||
// RFC internal#524 Layer 1: see uninstallViaDocker above.
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() {
|
||||
time.Sleep(2 * time.Second)
|
||||
h.restartFunc(workspaceID)
|
||||
}()
|
||||
h.restartFunc(wsID)
|
||||
})
|
||||
}
|
||||
|
||||
log.Printf("Plugin uninstall: %s from workspace %s (restarting via SaaS path)", pluginName, workspaceID)
|
||||
|
||||
@@ -320,7 +320,10 @@ func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID str
|
||||
if kind == classifyKindSkillContentOnly {
|
||||
log.Printf("Plugin install: %s → workspace %s — SKILL-content-only update, SKIPPING restart", r.PluginName, workspaceID)
|
||||
} else {
|
||||
go h.restartFunc(workspaceID)
|
||||
// RFC internal#524 Layer 1: drain via globalGoAsync (see
|
||||
// workspace.go:globalGoAsync).
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
}
|
||||
return nil
|
||||
@@ -334,7 +337,9 @@ func (h *PluginsHandler) deliverToContainer(ctx context.Context, workspaceID str
|
||||
})
|
||||
}
|
||||
if h.restartFunc != nil {
|
||||
go h.restartFunc(workspaceID)
|
||||
// RFC internal#524 Layer 1: see Docker path above.
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -819,8 +819,11 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
if payload.ActiveTasks < maxConcurrent {
|
||||
// context.WithoutCancel: heartbeat handler's ctx is about to
|
||||
// expire as soon as we return. The drain needs to outlive it.
|
||||
// RFC internal#524 Layer 1: drainQueue reads db.DB; route
|
||||
// through globalGoAsync so test cleanup waits for it.
|
||||
drainCtx := context.WithoutCancel(ctx)
|
||||
go h.drainQueue(drainCtx, payload.WorkspaceID)
|
||||
wsID := payload.WorkspaceID
|
||||
globalGoAsync(func() { h.drainQueue(drainCtx, wsID) })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
package handlers
|
||||
|
||||
// rfc524_layer1_async_drain_test.go — regression test for RFC internal#524
|
||||
// Layer 1 forward-port. Asserts:
|
||||
//
|
||||
// 1. globalGoAsync goroutines are drained by drainTestAsync before the
|
||||
// test cleanup chain returns control.
|
||||
// 2. Routing through globalGoAsync (rather than bare `go ...`) ensures
|
||||
// a sibling-handler's detached goroutine cannot outlive a test's
|
||||
// db.DB swap.
|
||||
//
|
||||
// Companion of handlers_test.go:drainTestAsync (canonical 69d9b4e3 fix
|
||||
// extended to non-*WorkspaceHandler call sites). If either property
|
||||
// regresses, this test fails fast.
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestRFC524_GlobalGoAsync_DrainsBeforeCleanup asserts that goroutines
|
||||
// scheduled via globalGoAsync run to completion before drainTestAsync
|
||||
// returns. Concretely: schedule a globalGoAsync that flips a counter
|
||||
// after a short sleep, then call drainTestAsync; the counter must
|
||||
// already be 1 when the call returns.
|
||||
func TestRFC524_GlobalGoAsync_DrainsBeforeCleanup(t *testing.T) {
|
||||
var ran int32
|
||||
globalGoAsync(func() {
|
||||
time.Sleep(20 * time.Millisecond)
|
||||
atomic.StoreInt32(&ran, 1)
|
||||
})
|
||||
|
||||
// drainTestAsync drains per-handler asyncWG + the package-level
|
||||
// globalAsync WG. After it returns the goroutine MUST have run.
|
||||
drainTestAsync()
|
||||
|
||||
if atomic.LoadInt32(&ran) != 1 {
|
||||
t.Fatalf("drainTestAsync returned before globalGoAsync goroutine finished — regression of RFC internal#524 Layer 1 drain coupling")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRFC524_GlobalGoAsync_MultipleConcurrent asserts the drain is
|
||||
// O(n)-correct: schedule a fan-out of globalGoAsync calls (like
|
||||
// restartAllAffectedByGlobalKey does on a large global secret rotation)
|
||||
// and confirm every one completes before drainTestAsync returns.
|
||||
func TestRFC524_GlobalGoAsync_MultipleConcurrent(t *testing.T) {
|
||||
const n = 32
|
||||
var completed int32
|
||||
for i := 0; i < n; i++ {
|
||||
globalGoAsync(func() {
|
||||
// Short, random-ish work; the point is they're all in flight
|
||||
// at the same time when drainTestAsync is called.
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
atomic.AddInt32(&completed, 1)
|
||||
})
|
||||
}
|
||||
|
||||
drainTestAsync()
|
||||
|
||||
got := atomic.LoadInt32(&completed)
|
||||
if got != n {
|
||||
t.Fatalf("drainTestAsync returned with %d/%d globalGoAsync goroutines incomplete — fan-out drain broken", n-got, n)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRFC524_HandlerGoAsync_AndGlobalAsync_BothDrained asserts that
|
||||
// drainTestAsync waits for BOTH the per-handler asyncWG (the original
|
||||
// 69d9b4e3 primitive) AND the package-level globalAsync (the Layer 1
|
||||
// extension). Schedules one of each and confirms both finish.
|
||||
func TestRFC524_HandlerGoAsync_AndGlobalAsync_BothDrained(t *testing.T) {
|
||||
setupTestDB(t) // registers handlers + arms the drain
|
||||
|
||||
var perHandlerDone, globalDone int32
|
||||
wh := NewWorkspaceHandler(nil, nil, "", t.TempDir())
|
||||
wh.goAsync(func() {
|
||||
time.Sleep(15 * time.Millisecond)
|
||||
atomic.StoreInt32(&perHandlerDone, 1)
|
||||
})
|
||||
globalGoAsync(func() {
|
||||
time.Sleep(15 * time.Millisecond)
|
||||
atomic.StoreInt32(&globalDone, 1)
|
||||
})
|
||||
|
||||
drainTestAsync()
|
||||
|
||||
if atomic.LoadInt32(&perHandlerDone) != 1 {
|
||||
t.Errorf("per-handler asyncWG drain regressed (RFC internal#524 Layer 1 expects 69d9b4e3 to remain wired)")
|
||||
}
|
||||
if atomic.LoadInt32(&globalDone) != 1 {
|
||||
t.Errorf("global async drain not wired (RFC internal#524 Layer 1 extension missing)")
|
||||
}
|
||||
}
|
||||
@@ -262,9 +262,13 @@ func (h *SecretsHandler) Set(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Auto-restart workspace to pick up new secret
|
||||
// Auto-restart workspace to pick up new secret.
|
||||
// RFC internal#524 Layer 1: route through globalGoAsync so tests can
|
||||
// drain the detached restart goroutine before db.DB is swapped — see
|
||||
// drainTestAsync in handlers_test.go and the canonical 69d9b4e3 fix.
|
||||
if h.restartFunc != nil {
|
||||
go h.restartFunc(workspaceID)
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key})
|
||||
@@ -297,9 +301,11 @@ func (h *SecretsHandler) Delete(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Auto-restart workspace to pick up removed secret
|
||||
// Auto-restart workspace to pick up removed secret.
|
||||
// RFC internal#524 Layer 1: see Set() above for the drain rationale.
|
||||
if h.restartFunc != nil {
|
||||
go h.restartFunc(workspaceID)
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key})
|
||||
@@ -379,7 +385,13 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) {
|
||||
// reach existing workspaces until the container is recreated. Auto-restart
|
||||
// every workspace whose env is affected — i.e. those WITHOUT a
|
||||
// workspace-level override of the same key.
|
||||
go h.restartAllAffectedByGlobalKey(body.Key)
|
||||
//
|
||||
// RFC internal#524 Layer 1: globalGoAsync so tests drain the fan-out
|
||||
// (which itself spawns N more globalGoAsync restart calls below) before
|
||||
// db.DB swap. Without this, the SELECT for affected workspaces races a
|
||||
// subsequent test's db.DB restore.
|
||||
key := body.Key
|
||||
globalGoAsync(func() { h.restartAllAffectedByGlobalKey(key) })
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "key": body.Key, "scope": "global"})
|
||||
}
|
||||
@@ -423,7 +435,11 @@ func (h *SecretsHandler) restartAllAffectedByGlobalKey(key string) {
|
||||
}
|
||||
log.Printf("Global secret %s changed: auto-restarting %d workspace(s) to refresh env", key, len(ids))
|
||||
for _, id := range ids {
|
||||
go h.restartFunc(id)
|
||||
// RFC internal#524 Layer 1: per-workspace restart via globalGoAsync
|
||||
// so each restart goroutine is drained before db.DB is swapped in
|
||||
// the test cleanup chain.
|
||||
wsID := id
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -450,7 +466,10 @@ func (h *SecretsHandler) DeleteGlobal(c *gin.Context) {
|
||||
|
||||
// Issue #15: propagate deletion to running containers — otherwise they
|
||||
// keep the stale env var until manual restart.
|
||||
go h.restartAllAffectedByGlobalKey(key)
|
||||
// RFC internal#524 Layer 1: globalGoAsync for the same drain rationale
|
||||
// as SetGlobal above.
|
||||
k := key
|
||||
globalGoAsync(func() { h.restartAllAffectedByGlobalKey(k) })
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted", "key": key, "scope": "global"})
|
||||
}
|
||||
@@ -552,7 +571,9 @@ func (h *SecretsHandler) SetModel(c *gin.Context) {
|
||||
}
|
||||
|
||||
if h.restartFunc != nil {
|
||||
go h.restartFunc(workspaceID)
|
||||
// RFC internal#524 Layer 1: globalGoAsync (see Set()).
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
if body.Model == "" {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
|
||||
@@ -669,7 +690,9 @@ func (h *SecretsHandler) SetProvider(c *gin.Context) {
|
||||
}
|
||||
|
||||
if h.restartFunc != nil {
|
||||
go h.restartFunc(workspaceID)
|
||||
// RFC internal#524 Layer 1: globalGoAsync (see Set()).
|
||||
wsID := workspaceID
|
||||
globalGoAsync(func() { h.restartFunc(wsID) })
|
||||
}
|
||||
if body.Provider == "" {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
|
||||
|
||||
@@ -88,6 +88,11 @@ func (h *SocketHandler) HandleConnect(c *gin.Context) {
|
||||
|
||||
// Wrap WritePump and ReadPump so the gauge is decremented exactly once
|
||||
// when the client's write goroutine exits (WritePump owns conn lifetime).
|
||||
// goAsync-exempt (RFC internal#524 Layer 2.2): WebSocket pumps live
|
||||
// for the duration of the client connection (minutes-hours), not a
|
||||
// single request. Wrapping them in globalGoAsync would block every
|
||||
// test's t.Cleanup until every connected WS client disconnects. No
|
||||
// db.DB access in either pump.
|
||||
go func() {
|
||||
ws.WritePump(client)
|
||||
metrics.TrackWSDisconnect()
|
||||
|
||||
@@ -234,7 +234,9 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
"source": "ec2-ssh",
|
||||
})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -268,7 +270,9 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
"source": "container",
|
||||
})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -288,6 +292,8 @@ func (h *TemplatesHandler) ReplaceFiles(c *gin.Context) {
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": "replaced", "workspace": workspaceID, "files": len(body.Files), "source": "volume"})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -570,7 +570,9 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -584,7 +586,9 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -598,7 +602,9 @@ func (h *TemplatesHandler) WriteFile(c *gin.Context) {
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "saved", "path": filePath})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -651,7 +657,9 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -669,7 +677,9 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -682,6 +692,8 @@ func (h *TemplatesHandler) DeleteFile(c *gin.Context) {
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{"status": "deleted", "path": filePath})
|
||||
if h.wh != nil {
|
||||
go h.wh.RestartByID(workspaceID)
|
||||
// RFC internal#524 Layer 1: per-handler goAsync (drains via h.wh.waitAsyncForTest)
|
||||
wsID := workspaceID
|
||||
h.wh.goAsync(func() { h.wh.RestartByID(wsID) })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,6 +211,10 @@ func (h *TerminalHandler) handleLocalConnect(c *gin.Context, workspaceID string)
|
||||
}
|
||||
|
||||
// Bridge: container stdout → WebSocket
|
||||
// goAsync-exempt (RFC internal#524 Layer 2.2): per-WebSocket I/O
|
||||
// bridge — lifetime is the connection, not a request. The handler
|
||||
// blocks on `done` below, so the goroutine is already drained
|
||||
// synchronously. No db.DB access on this path.
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
defer close(done)
|
||||
@@ -433,6 +437,8 @@ func (h *TerminalHandler) handleRemoteConnect(c *gin.Context, workspaceID, insta
|
||||
done := make(chan struct{})
|
||||
|
||||
// PTY → WebSocket
|
||||
// goAsync-exempt (RFC internal#524 Layer 2.2): WebSocket-lifetime
|
||||
// I/O bridge; handler blocks on `done` below. No db.DB access.
|
||||
go func() {
|
||||
defer close(done)
|
||||
buf := make([]byte, 4096)
|
||||
@@ -455,6 +461,7 @@ func (h *TerminalHandler) handleRemoteConnect(c *gin.Context, workspaceID, insta
|
||||
}()
|
||||
|
||||
// WebSocket → PTY (stdin)
|
||||
// goAsync-exempt (RFC internal#524 Layer 2.2): see above.
|
||||
go func() {
|
||||
for {
|
||||
_, msg, rErr := conn.ReadMessage()
|
||||
|
||||
@@ -101,6 +101,47 @@ func (h *WorkspaceHandler) waitAsyncForTest() {
|
||||
h.asyncWG.Wait()
|
||||
}
|
||||
|
||||
// globalAsync tracks goroutines launched by globalGoAsync — the
|
||||
// equivalent of WorkspaceHandler.goAsync for sibling handlers that
|
||||
// don't carry a *WorkspaceHandler reference (SecretsHandler /
|
||||
// PluginsHandler / AdminPluginDriftHandler / ChannelHandler /
|
||||
// MCPHandler / RegistryHandler), and for callers of package-level
|
||||
// free functions (a2a_proxy_helpers extractAndUpsertTokenUsage).
|
||||
//
|
||||
// Forward-port of RFC internal#524 Layer 1 deliverable 2: the
|
||||
// canonical db.DB race fix lives at workspace.go:goAsync / asyncWG,
|
||||
// but ~25 sibling bare-`go` sites still write to db.DB outside any
|
||||
// WorkspaceHandler's drain window. globalAsync gives them the same
|
||||
// drain hook (waitGlobalAsyncForTest, drained from drainTestAsync)
|
||||
// so a test's t.Cleanup db.DB restore cannot race a detached
|
||||
// goroutine spawned by any sibling handler.
|
||||
//
|
||||
// Zero-cost in production (a single sync.WaitGroup Add/Done per
|
||||
// fire-and-forget call, no test-only branching).
|
||||
var globalAsync sync.WaitGroup
|
||||
|
||||
// globalGoAsync schedules fn on a detached goroutine tracked by
|
||||
// globalAsync. Use this in package-internal callers that don't have
|
||||
// a *WorkspaceHandler receiver to thread h.goAsync through.
|
||||
//
|
||||
// When a *WorkspaceHandler IS available, prefer h.goAsync — it lets
|
||||
// per-handler tests (waitAsyncForTest) wait without disturbing
|
||||
// unrelated handlers' inflight work.
|
||||
func globalGoAsync(fn func()) {
|
||||
globalAsync.Add(1)
|
||||
go func() {
|
||||
defer globalAsync.Done()
|
||||
fn()
|
||||
}()
|
||||
}
|
||||
|
||||
// waitGlobalAsyncForTest blocks until every globalGoAsync goroutine
|
||||
// finishes. Called from drainTestAsync's cleanup chain in the test
|
||||
// harness; production code never calls it.
|
||||
func waitGlobalAsyncForTest() {
|
||||
globalAsync.Wait()
|
||||
}
|
||||
|
||||
func NewWorkspaceHandler(b events.EventEmitter, p *provisioner.Provisioner, platformURL, configsDir string) *WorkspaceHandler {
|
||||
h := &WorkspaceHandler{
|
||||
broadcaster: b,
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
-- Reverse of 20260518000000_seed_production_team_agent_cards.up.sql.
|
||||
--
|
||||
-- Clears the identity fields back to the gap state that the up
|
||||
-- migration was designed to fix. After this down migration, the PR
|
||||
-- #1427 reconcile has nothing to substitute again: name reverts to the
|
||||
-- workspace UUID (the runtime's fallback), role to NULL, agent_card
|
||||
-- description/skills to empty. This is the pre-#1427 + pre-this-seed
|
||||
-- behaviour.
|
||||
--
|
||||
-- Match strategy mirrors the up migration (id::text LIKE prefix for 5,
|
||||
-- exact UUID for CEO-Assistant) so any down-roll touches the exact
|
||||
-- same rows.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- PM
|
||||
UPDATE workspaces
|
||||
SET name = id::text,
|
||||
role = NULL,
|
||||
agent_card = (agent_card - 'description' - 'skills' - 'role') ||
|
||||
jsonb_build_object('name', id::text),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '8a71d4d4-%';
|
||||
|
||||
-- Reviewer
|
||||
UPDATE workspaces
|
||||
SET name = id::text,
|
||||
role = NULL,
|
||||
agent_card = (agent_card - 'description' - 'skills' - 'role') ||
|
||||
jsonb_build_object('name', id::text),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '27e66b5a-%';
|
||||
|
||||
-- Researcher
|
||||
UPDATE workspaces
|
||||
SET name = id::text,
|
||||
role = NULL,
|
||||
agent_card = (agent_card - 'description' - 'skills' - 'role') ||
|
||||
jsonb_build_object('name', id::text),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '5773bd5f-%';
|
||||
|
||||
-- Dev-A
|
||||
UPDATE workspaces
|
||||
SET name = id::text,
|
||||
role = NULL,
|
||||
agent_card = (agent_card - 'description' - 'skills' - 'role') ||
|
||||
jsonb_build_object('name', id::text),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '4ca4c06c-%';
|
||||
|
||||
-- Dev-B
|
||||
UPDATE workspaces
|
||||
SET name = id::text,
|
||||
role = NULL,
|
||||
agent_card = (agent_card - 'description' - 'skills' - 'role') ||
|
||||
jsonb_build_object('name', id::text),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '31eb65ed-%';
|
||||
|
||||
-- CEO-Assistant
|
||||
UPDATE workspaces
|
||||
SET name = id::text,
|
||||
role = NULL,
|
||||
agent_card = (agent_card - 'description' - 'skills' - 'role') ||
|
||||
jsonb_build_object('name', id::text),
|
||||
updated_at = now()
|
||||
WHERE id = '30ba7f0b-b303-4a20-aefe-3a4a675b8aa4'::uuid;
|
||||
|
||||
COMMIT;
|
||||
@@ -0,0 +1,165 @@
|
||||
-- Seed identity (name + role + agent_card description/skills) for the
|
||||
-- 6 production-team workspaces. Pairs with the PR #1427 server-side
|
||||
-- reconcile (internal#492): #1427 added the platform-side backfill that
|
||||
-- pulls workspaces.name and workspaces.role into the stored agent_card
|
||||
-- on /registry/register; this migration populates the trusted DB row
|
||||
-- those reads consume.
|
||||
--
|
||||
-- Without this seed, the reconcile has nothing to substitute and the
|
||||
-- card stays at name=UUID / description="" / role=null for the prod
|
||||
-- team agents — the exact gap internal#492 is filed against.
|
||||
--
|
||||
-- Identity stays platform-controlled — the agent runtime cannot
|
||||
-- self-write these fields. The 6 workspace UUIDs are the CTO-locked
|
||||
-- production-team topology (see project_production_agent_team_topology):
|
||||
--
|
||||
-- PM 8a71d4d4... — Claude Code on Opus, read-only,
|
||||
-- A2A-delegate-only coordinator
|
||||
-- Reviewer 27e66b5a... — codex on openai-subscription,
|
||||
-- 5-axis non-author review
|
||||
-- Researcher 5773bd5f... — codex on openai-subscription,
|
||||
-- root-cause investigation
|
||||
-- Dev-A 4ca4c06c... — Claude Code on Kimi K2.6
|
||||
-- (api.kimi.com/coding base + ANTHROPIC_API_KEY)
|
||||
-- Dev-B 31eb65ed... — Claude Code on MiniMax
|
||||
-- (api.minimax.io/anthropic base + sk-cp-* key)
|
||||
-- CEO-Assistant 30ba7f0b-b303-4a20-aefe-3a4a675b8aa4 — Claude Code,
|
||||
-- orchestrator-side operations + canvas relay
|
||||
--
|
||||
-- Match strategy: 5 of 6 production UUIDs were provided to me by the CTO
|
||||
-- as 8-char prefixes only (the full UUIDs live in the prod tenant DB).
|
||||
-- We match those 5 with `id::text LIKE '<prefix>-%'` so this migration
|
||||
-- is unambiguous when reviewed without DB access — the CTO will confirm
|
||||
-- on review that each prefix resolves to a single row. CEO-Assistant
|
||||
-- (30ba7f0b-b303-4a20-aefe-3a4a675b8aa4) is known in full from
|
||||
-- chat_files_test.go and is matched exactly.
|
||||
--
|
||||
-- Idempotent: each UPDATE only touches the three identity fields. Re-
|
||||
-- running rewrites the same values. UUIDs not present in a given tenant
|
||||
-- DB match zero rows and are silently skipped — the migration never
|
||||
-- INSERTs rows it doesn't own.
|
||||
--
|
||||
-- All names obey validateWorkspaceFields (workspace_crud.go:526):
|
||||
-- <=255 chars, no newline/CR, no YAML-special chars `{}[]|>*&!`.
|
||||
-- All roles obey the same contract <=1000 chars. Per-skill description
|
||||
-- <=120 chars matches the discovery card surface shown on the canvas
|
||||
-- Agent Card view and the mobile peer chip.
|
||||
|
||||
BEGIN;
|
||||
|
||||
-- PM — read-only A2A coordinator
|
||||
UPDATE workspaces
|
||||
SET name = 'Production Manager',
|
||||
role = 'product manager',
|
||||
agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
|
||||
'name', 'Production Manager',
|
||||
'description', 'Read-only A2A coordinator that plans work and delegates to Dev/Reviewer/Researcher peers; never writes code itself.',
|
||||
'role', 'product manager',
|
||||
'skills', jsonb_build_array(
|
||||
jsonb_build_object('id','planning','name','planning','description','Decompose CTO directives into peer-delegable units','tags',jsonb_build_array('planning'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','delegation','name','delegation','description','Route work to Dev-A / Dev-B / Reviewer / Researcher via A2A','tags',jsonb_build_array('delegation'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','coordination','name','coordination','description','Track peer activity and surface blockers back to the CTO','tags',jsonb_build_array('coordination'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','read-only','name','read-only','description','Never edits code or merges; proposes only','tags',jsonb_build_array('read-only','safety'),'examples',jsonb_build_array())
|
||||
),
|
||||
'updated_at', now()::text
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '8a71d4d4-%';
|
||||
|
||||
-- Reviewer — codex/openai, 5-axis non-author review
|
||||
UPDATE workspaces
|
||||
SET name = 'Code Reviewer',
|
||||
role = 'code reviewer',
|
||||
agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
|
||||
'name', 'Code Reviewer',
|
||||
'description', 'Non-author 5-axis review on codex/openai-subscription; runs the merge gate, never approves PRs it authored.',
|
||||
'role', 'code reviewer',
|
||||
'skills', jsonb_build_array(
|
||||
jsonb_build_object('id','code-review','name','code-review','description','Five-axis PR review against the merge gate','tags',jsonb_build_array('review'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','security-axis','name','security-axis','description','Trust-boundary, secret-handling, injection surface checks','tags',jsonb_build_array('security'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','correctness-axis','name','correctness-axis','description','Logic, error-handling, race and boundary case checks','tags',jsonb_build_array('correctness'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','non-author-approve','name','non-author-approve','description','Approves only PRs the reviewer did not author','tags',jsonb_build_array('two-eyes'),'examples',jsonb_build_array())
|
||||
),
|
||||
'updated_at', now()::text
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '27e66b5a-%';
|
||||
|
||||
-- Researcher — codex/openai, root-cause investigation
|
||||
UPDATE workspaces
|
||||
SET name = 'Root-Cause Researcher',
|
||||
role = 'researcher',
|
||||
agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
|
||||
'name', 'Root-Cause Researcher',
|
||||
'description', 'Diagnostic investigation on codex/openai-subscription; obs-first, source-as-corroboration, no drive-by fixes.',
|
||||
'role', 'researcher',
|
||||
'skills', jsonb_build_array(
|
||||
jsonb_build_object('id','root-cause','name','root-cause','description','Diagnose the underlying cause, never patch symptoms','tags',jsonb_build_array('investigation'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','obs-first','name','obs-first','description','Grafana/Loki query before source-guessing','tags',jsonb_build_array('observability'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','log-correlation','name','log-correlation','description','Cross-service step= / Delegation uuid tracing','tags',jsonb_build_array('observability'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','source-archaeology','name','source-archaeology','description','Git blame and prior-art recall across repos','tags',jsonb_build_array('git'),'examples',jsonb_build_array())
|
||||
),
|
||||
'updated_at', now()::text
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '5773bd5f-%';
|
||||
|
||||
-- Dev-A — Claude Code on Kimi K2.6
|
||||
UPDATE workspaces
|
||||
SET name = 'Dev Engineer A (Kimi)',
|
||||
role = 'dev engineer',
|
||||
agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
|
||||
'name', 'Dev Engineer A (Kimi)',
|
||||
'description', 'Claude Code routed to Kimi K2.6 via api.kimi.com/coding; implements PRs against the dev-tree protected branches.',
|
||||
'role', 'dev engineer',
|
||||
'skills', jsonb_build_array(
|
||||
jsonb_build_object('id','implementation','name','implementation','description','Write code to merge gate (tests, lint, types)','tags',jsonb_build_array('coding'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','test-driven','name','test-driven','description','Failing test first, then minimal fix','tags',jsonb_build_array('tdd'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','bug-fixing','name','bug-fixing','description','Root-caused bug fixes with regression test','tags',jsonb_build_array('debugging'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','refactoring','name','refactoring','description','In-scope, behavior-preserving refactors only','tags',jsonb_build_array('refactor'),'examples',jsonb_build_array())
|
||||
),
|
||||
'updated_at', now()::text
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '4ca4c06c-%';
|
||||
|
||||
-- Dev-B — Claude Code on MiniMax
|
||||
UPDATE workspaces
|
||||
SET name = 'Dev Engineer B (MiniMax)',
|
||||
role = 'dev engineer',
|
||||
agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
|
||||
'name', 'Dev Engineer B (MiniMax)',
|
||||
'description', 'Claude Code routed to MiniMax via api.minimax.io/anthropic; parallel dev capacity to Dev-A on the same gate.',
|
||||
'role', 'dev engineer',
|
||||
'skills', jsonb_build_array(
|
||||
jsonb_build_object('id','implementation','name','implementation','description','Write code to merge gate (tests, lint, types)','tags',jsonb_build_array('coding'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','test-driven','name','test-driven','description','Failing test first, then minimal fix','tags',jsonb_build_array('tdd'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','bug-fixing','name','bug-fixing','description','Root-caused bug fixes with regression test','tags',jsonb_build_array('debugging'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','refactoring','name','refactoring','description','In-scope, behavior-preserving refactors only','tags',jsonb_build_array('refactor'),'examples',jsonb_build_array())
|
||||
),
|
||||
'updated_at', now()::text
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE id::text LIKE '31eb65ed-%';
|
||||
|
||||
-- CEO-Assistant — Claude Code, orchestrator + canvas relay
|
||||
-- Full UUID known from chat_files_test.go:286 — match exactly.
|
||||
UPDATE workspaces
|
||||
SET name = 'CEO Assistant',
|
||||
role = 'operator orchestrator',
|
||||
agent_card = COALESCE(agent_card, '{}'::jsonb) || jsonb_build_object(
|
||||
'name', 'CEO Assistant',
|
||||
'description', 'Orchestrator-side Claude Code that runs the triage loop, relays canvas and Telegram, dispatches non-author reviewers.',
|
||||
'role', 'operator orchestrator',
|
||||
'skills', jsonb_build_array(
|
||||
jsonb_build_object('id','triage-loop','name','triage-loop','description','Run the CI/PR triage loop; fix-what-you-find','tags',jsonb_build_array('orchestration'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','review-routing','name','review-routing','description','Dispatch non-author reviewers via delegate_task','tags',jsonb_build_array('routing'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','canvas-relay','name','canvas-relay','description','Relay CTO canvas/Telegram messages to peers','tags',jsonb_build_array('relay'),'examples',jsonb_build_array()),
|
||||
jsonb_build_object('id','ops','name','ops','description','Direct hands-on ops on operator host and Neon','tags',jsonb_build_array('ops','direct-action'),'examples',jsonb_build_array())
|
||||
),
|
||||
'updated_at', now()::text
|
||||
),
|
||||
updated_at = now()
|
||||
WHERE id = '30ba7f0b-b303-4a20-aefe-3a4a675b8aa4'::uuid;
|
||||
|
||||
COMMIT;
|
||||
Reference in New Issue
Block a user