Compare commits

..

66 Commits

Author SHA1 Message Date
devops-engineer 1eb1327ad5 feat(providers): regenerate registry_gen + repin canonical sha for MiniMax-M3 sync
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
E2E API Smoke Test / detect-changes (pull_request) Failing after 0s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Has been skipped
Harness Replays / detect-changes (pull_request) Failing after 0s
CI / Python Lint & Test (pull_request) Successful in 3s
Harness Replays / Harness Replays (pull_request) Has been skipped
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (pull_request) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
qa-review / approved (pull_request_target) Failing after 6s
security-review / approved (pull_request_target) Failing after 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 16s
CI / Detect changes (pull_request) Successful in 18s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 0s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 1s
E2E Chat / detect-changes (pull_request) Successful in 30s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 29s
sop-tier-check / tier-check (pull_request_review) Successful in 4s
CI / Canvas (Next.js) (pull_request) Successful in 10s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 16s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 41s
E2E Chat / E2E Chat (pull_request) Successful in 19s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m14s
security-review / approved (pull_request) Refired via /security-recheck by unknown
CI / Platform (Go) (pull_request) Successful in 3m52s
CI / all-required (pull_request) Successful in 4m23s
sop-checklist / review-refire (pull_request_target) Has been skipped
sop-checklist / all-items-acked (pull_request) acked: 7/7
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-tier-check / tier-check (pull_request_target) Successful in 3s
sop-checklist / all-items-acked (pull_request_target) Successful in 5s
gate-check-v3 / gate-check (pull_request_target) Successful in 5s
audit-force-merge / audit (pull_request_target) Successful in 5s
2026-06-01 09:57:23 +00:00
devops-engineer a407c8d079 feat(providers): sync MiniMax-M3 from controlplane SSOT (providers.yaml + regenerated registry_gen + golden test) 2026-06-01 09:55:51 +00:00
claude-ceo-assistant 53efcb5c46 Merge pull request 'fix(provision): fail loud on runtime-seed mismatch instead of silent claude-code fallback (#2027)' (#2028) from fix/provision-fail-loud-runtime-seed-2027 into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 14s
Block internal-flavored paths / Block forbidden paths (push) Successful in 9s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 8s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 46s
publish-workspace-server-image / build-and-push (push) Successful in 3m3s
publish-workspace-server-image / Production auto-deploy (push) Failing after 37m20s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 8s
Harness Replays / Harness Replays (push) Successful in 9s
Harness Replays / detect-changes (push) Successful in 8s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 3m1s
E2E API Smoke Test / detect-changes (push) Successful in 11s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 3m32s
Handlers Postgres Integration / detect-changes (push) Successful in 8s
E2E Chat / E2E Chat (push) Successful in 6m50s
E2E Chat / detect-changes (push) Successful in 10s
CI / Detect changes (push) Successful in 19s
CI / Python Lint & Test (push) Successful in 16s
CI / all-required (push) Failing after 40m31s
CI / Canvas (Next.js) (push) Successful in 21s
CI / Shellcheck (E2E scripts) (push) Successful in 9s
CI / Platform (Go) (push) Successful in 5m28s
CI / Canvas Deploy Reminder (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
2026-06-01 04:22:55 +00:00
claude-ceo-assistant ea3bae5068 fix(provision): fail loud on runtime-seed mismatch instead of silent claude-code fallback (#2027)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
sop-checklist / na-declarations (pull_request) N/A: (none)
Block internal-flavored paths / Block forbidden paths (pull_request) Waiting to run
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Waiting to run
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Waiting to run
lint-required-no-paths / lint-required-no-paths (pull_request) Waiting to run
Secret scan / Scan diff for credential-shaped strings (pull_request) Waiting to run
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Waiting to run
gate-check-v3 / gate-check (pull_request) Waiting to run
qa-review / approved (pull_request) Waiting to run
sop-checklist / all-items-acked (pull_request) Waiting to run
sop-checklist / review-refire (pull_request) Waiting to run
sop-tier-check / tier-check (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 14s
security-review / approved (pull_request) Refired via /security-recheck by unknown
audit-force-merge / audit (pull_request) Successful in 11s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Has been cancelled
E2E API Smoke Test / E2E API Smoke Test (pull_request) Has been cancelled
CI / Platform (Go) (pull_request) Has been cancelled
CI / Canvas (Next.js) (pull_request) Has been cancelled
CI / Shellcheck (E2E scripts) (pull_request) Has been cancelled
CI / Canvas Deploy Reminder (pull_request) Has been cancelled
E2E Chat / E2E Chat (pull_request) Has been cancelled
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Has been cancelled
Harness Replays / Harness Replays (pull_request) Has been cancelled
Handlers Postgres Integration / detect-changes (pull_request) Has been cancelled
E2E API Smoke Test / detect-changes (pull_request) Has been cancelled
CI / all-required (pull_request) Failing after 40m25s
CI / Detect changes (pull_request) Has been cancelled
CI / Python Lint & Test (pull_request) Has been cancelled
E2E Chat / detect-changes (pull_request) Has been cancelled
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Has been cancelled
Harness Replays / detect-changes (pull_request) Has been cancelled
When a workspace NAMES a runtime but the config.yaml about to be seeded
declares a different top-level runtime, refuse to launch and surface
WORKSPACE_PROVISION_FAILED — the symmetric counterpart to selectImage's
ErrUnresolvableRuntime guard, on the config/template side.

Pre-fix: if a runtime's workspace template wasn't in the tenant cache at
provision time (or sanitizeRuntime coerced an unknown runtime), config
seeding silently fell back to claude-code-default. The image+env said
e.g. google-adk but the seeded config said claude-code, so the agent
booted mislabeled and personaless yet looked 'online' and returned canned
non-answers (hit the molecule-adk-demo hackathon org: 4 google-adk agents).

The guard is in prepareProvisionContext (shared by Docker + SaaS paths).
Empty requested runtime (org-template default path) and an indeterminate
seeded runtime (CP mode, no local config bytes) are both allowed — it only
fails on a concrete, contradictory signal.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 20:05:35 -07:00
devops-engineer 774a8c2a6a Merge pull request 'fix(providers): sync registry to controlplane SSOT — codex→openai-subscription byok' (#2025) from fix/providers-ssot-sync-codex-subscription into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 11s
Block internal-flavored paths / Block forbidden paths (push) Successful in 13s
CI / Python Lint & Test (push) Successful in 11s
CI / Detect changes (push) Successful in 12s
Handlers Postgres Integration / detect-changes (push) Successful in 8s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 10s
E2E API Smoke Test / detect-changes (push) Successful in 12s
E2E Chat / detect-changes (push) Successful in 11s
Harness Replays / detect-changes (push) Successful in 7s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 6s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 5s
CI / Canvas (Next.js) (push) Successful in 5s
CI / Shellcheck (E2E scripts) (push) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 1m10s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 30s
publish-workspace-server-image / build-and-push (push) Successful in 3m20s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m42s
Harness Replays / Harness Replays (push) Successful in 20s
CI / Canvas Deploy Reminder (push) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m7s
E2E Chat / E2E Chat (push) Successful in 4m55s
CI / Platform (Go) (push) Successful in 5m23s
CI / all-required (push) Successful in 6m1s
publish-workspace-server-image / Production auto-deploy (push) Failing after 6m34s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
main-red-watchdog / watchdog (push) Successful in 2m7s
gate-check-v3 / gate-check (push) Successful in 1m5s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
ci-required-drift / drift (push) Successful in 1m14s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 4s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
lint-bp-context-emit-match / lint-bp-context-emit-match (push) Successful in 1m33s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Weekly Platform-Go Surface / Weekly Platform-Go Surface (push) Successful in 5m13s
2026-05-31 23:50:53 +00:00
Hongming Wang cb660fc0b4 fix(providers): sync registry to controlplane SSOT — codex→openai-subscription byok
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 19s
CI / Detect changes (pull_request) Successful in 12s
CI / Python Lint & Test (pull_request) Successful in 8s
E2E API Smoke Test / detect-changes (pull_request) Successful in 15s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 16s
E2E Chat / detect-changes (pull_request) Successful in 18s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 5s
Harness Replays / detect-changes (pull_request) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 7s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 8s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (pull_request) Successful in 13s
gate-check-v3 / gate-check (pull_request) Successful in 16s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 33s
sop-checklist / review-refire (pull_request) Has been skipped
security-review / approved (pull_request) Successful in 10s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m8s
sop-tier-check / tier-check (pull_request) Successful in 11s
sop-checklist / all-items-acked (pull_request) acked: 7/7
sop-checklist / na-declarations (pull_request) N/A: (none)
CI / Canvas (Next.js) (pull_request) Successful in 24s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 17s
E2E Chat / E2E Chat (pull_request) Successful in 16s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 14s
Harness Replays / Harness Replays (pull_request) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m56s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m10s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Platform (Go) (pull_request) Successful in 5m26s
CI / all-required (pull_request) Successful in 13m31s
qa-review / approved (pull_request) Refired via /qa-recheck by unknown
audit-force-merge / audit (pull_request) Successful in 15s
molecule-core's synced copy of the provider registry was stale relative to
controlplane cp#423/#426, which split `openai`→`openai-subscription`
(auth_env CODEX_AUTH_JSON, IsPlatform false) / `openai-api` (OPENAI_API_KEY).
The stale copy derived codex→`openai` (and got band-aided to platform_managed),
producing "OpenAI requires OPENAI_API_KEY" + "codex adapter: no platform
provider" RuntimeError.

Sync to CP SSOT (CP HEAD fa44dc8), verbatim:
- providers.yaml, derive_provider.go, providers.go, and the
  derive/providers/runtimes tests copied byte-exact from controlplane.
- regenerated gen/registry_gen.go via `go generate` (now carries the
  openai-subscription entry: AuthEnv CODEX_AUTH_JSON, IsPlatform false).
- bumped canonicalProvidersYAMLSHA256 to the new synced-copy sha
  (dedbb8cc…f76187) so the hermetic drift gate stays green.

Core-only manual edit (CP has no such map):
- secrets.go: add CODEX_AUTH_JSON to platformManagedDirectLLMBypassKeys so the
  byok credential check counts the global CODEX_AUTH_JSON (codex byok now
  provisions with the shared subscription token) and strips it under
  platform-managed.

With the synced derive, codex+CODEX_AUTH_JSON → openai-subscription →
IsPlatform false → byok automatically via the existing billing resolver;
no derive logic was hand-edited and llm_billing_mode.go is untouched.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 16:05:55 -07:00
devops-engineer 446b8c78fd Merge pull request 'fix(workspace-server): central codex OAuth refresher (single-owner, anti-burn)' (#2023) from fix/codex-central-refresher into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
Block internal-flavored paths / Block forbidden paths (push) Successful in 6s
CI / Detect changes (push) Successful in 12s
CI / Python Lint & Test (push) Successful in 3s
E2E API Smoke Test / detect-changes (push) Successful in 7s
E2E Chat / detect-changes (push) Successful in 12s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 9s
Handlers Postgres Integration / detect-changes (push) Successful in 3s
Harness Replays / detect-changes (push) Successful in 3s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 8s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 11s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 1m18s
CI / Canvas (Next.js) (push) Successful in 10s
CI / Shellcheck (E2E scripts) (push) Successful in 9s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 35s
publish-workspace-server-image / build-and-push (push) Successful in 3m9s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m44s
Harness Replays / Harness Replays (push) Successful in 6s
CI / Canvas Deploy Reminder (push) Successful in 3s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m44s
CI / Platform (Go) (push) Successful in 5m40s
CI / all-required (push) Successful in 7m1s
E2E Chat / E2E Chat (push) Successful in 4m46s
publish-workspace-server-image / Production auto-deploy (push) Failing after 7m31s
main-red-watchdog / watchdog (push) Successful in 1m59s
gate-check-v3 / gate-check (push) Successful in 43s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 27s
ci-required-drift / drift (push) Successful in 1m7s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 4s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
2026-05-31 19:52:21 +00:00
hongming-personal df972a85e2 fix(workspace-server): central codex OAuth refresher (single-owner, anti-burn)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
CI / Detect changes (pull_request) Successful in 27s
CI / Python Lint & Test (pull_request) Successful in 27s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 13s
E2E API Smoke Test / detect-changes (pull_request) Successful in 16s
E2E Chat / detect-changes (pull_request) Successful in 15s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 14s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 7s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
gate-check-v3 / gate-check (pull_request) Successful in 11s
sop-checklist / review-refire (pull_request) Has been skipped
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 29s
sop-tier-check / tier-check (pull_request) Successful in 13s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m7s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
CI / Canvas (Next.js) (pull_request) Successful in 2s
E2E Chat / E2E Chat (pull_request) Successful in 23s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2s
Harness Replays / Harness Replays (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m56s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
sop-checklist / all-items-acked (pull_request) acked: 7/7
sop-checklist / na-declarations (pull_request) N/A: (none)
CI / Platform (Go) (pull_request) Successful in 5m28s
CI / all-required (pull_request) Successful in 7m16s
qa-review / approved (pull_request) Refired via /qa-recheck by unknown
security-review / approved (pull_request) Refired via /security-recheck by unknown
audit-force-merge / audit (pull_request) Successful in 9s
Multiple codex workspaces share ONE ChatGPT-Pro OAuth token (global_secrets
key CODEX_AUTH_JSON). OpenAI's refresh_token is single-use, so letting each
per-agent codex app-server refresh on its own 401 burned the shared seed within
seconds (a refresh storm → token_invalidated + "refresh token already used").

This adds a single platform-side owner of the refresh:
- internal/codexauth/refresher.go: one background goroutine, structurally
  single-flight (one goroutine + package mutex). Reads the global
  CODEX_AUTH_JSON, decodes the access_token JWT exp, and only within a safety
  margin of expiry POSTs the refresh_token ONCE per due cycle, then re-encrypts
  and writes the rotated blob back to global_secrets. Inert when the secret is
  absent; on a permanent failure (invalid_grant / "already used") it logs once
  and does NOT hot-loop. Billing-mode resolution + byok are untouched.
- cmd/server/main.go: wired under supervised.RunWithRecover like the other
  background sweeps.

Pairs with the codex template's codex_auth_sync.sh (GET-only re-sync; per-agent
OAuth POST disabled) so workspaces only consume the current token and never
rotate it themselves.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 12:38:13 -07:00
core-be e45033e15c Merge pull request 'fix(canvas): SSOT-drive runtime picker so google-adk shows correctly' (#2016) from feat/google-adk-runtime-ssot into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Block internal-flavored paths / Block forbidden paths (push) Successful in 7s
CI / Python Lint & Test (push) Successful in 9s
CI / Detect changes (push) Successful in 15s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 14s
E2E Chat / detect-changes (push) Successful in 12s
E2E API Smoke Test / detect-changes (push) Successful in 13s
Handlers Postgres Integration / detect-changes (push) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 7s
Harness Replays / detect-changes (push) Successful in 9s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 5s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
publish-canvas-image / Build & push canvas image (push) Successful in 1m24s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 1m4s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m5s
E2E Chat / E2E Chat (push) Successful in 4m32s
publish-workspace-server-image / build-and-push (push) Successful in 7m1s
CI / Platform (Go) (push) Successful in 7m12s
Harness Replays / Harness Replays (push) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m52s
CI / Canvas (Next.js) (push) Successful in 7m58s
CI / all-required (push) Successful in 8m44s
publish-workspace-server-image / Production auto-deploy (push) Successful in 4m1s
CI / Canvas Deploy Reminder (push) Successful in 2s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Failing after 16m56s
Railway pin audit (drift detection) / Audit Railway env vars for drift-prone pins (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m15s
main-red-watchdog / watchdog (push) Successful in 2m20s
gate-check-v3 / gate-check (push) Successful in 25s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
ci-required-drift / drift (push) Successful in 1m10s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 11s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 9s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 8s
2026-05-31 09:46:31 +00:00
core-be 418db083ff ci: re-trigger after gitea restart task desync (no-op)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Detect changes (pull_request) Successful in 5s
CI / Python Lint & Test (pull_request) Successful in 4s
E2E API Smoke Test / detect-changes (pull_request) Successful in 6s
E2E Chat / detect-changes (pull_request) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 4s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 7s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 8s
gate-check-v3 / gate-check (pull_request) Successful in 12s
qa-review / approved (pull_request) Successful in 5s
security-review / approved (pull_request) Successful in 6s
sop-checklist / na-declarations (pull_request) N/A: (none)
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 35s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / all-items-acked (pull_request) Successful in 7s
sop-tier-check / tier-check (pull_request) Successful in 6s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m12s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 6s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m42s
E2E Chat / E2E Chat (pull_request) Successful in 8s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 3s
Harness Replays / Harness Replays (pull_request) Successful in 3s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m28s
CI / Platform (Go) (pull_request) Successful in 7m53s
CI / Canvas (Next.js) (pull_request) Successful in 7m24s
CI / all-required (pull_request) Successful in 22m29s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 7s
Empty commit on the PR branch to get a clean CI run; the prior run's
tasks were orphaned by the 2026-05-31 08:30 gitea restart (task-not-found).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 02:17:20 -07:00
core-be b611b1a9bf fix(canvas): SSOT-drive runtime picker so google-adk shows correctly
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
CI / all-required (pull_request) Failing after 1s
Block internal-flavored paths / Block forbidden paths (pull_request) Failing after 1s
CI / Detect changes (pull_request) Failing after 0s
CI / Python Lint & Test (pull_request) Failing after 0s
CI / Platform (Go) (pull_request) Has been skipped
E2E API Smoke Test / detect-changes (pull_request) Failing after 1s
CI / Canvas (Next.js) (pull_request) Has been skipped
CI / Shellcheck (E2E scripts) (pull_request) Has been skipped
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E Chat / detect-changes (pull_request) Failing after 0s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Failing after 0s
Handlers Postgres Integration / detect-changes (pull_request) Failing after 0s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Has been skipped
E2E Chat / E2E Chat (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Has been skipped
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Has been skipped
Harness Replays / detect-changes (pull_request) Failing after 1s
Harness Replays / Harness Replays (pull_request) Has been skipped
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Failing after 2s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Failing after 1s
lint-required-no-paths / lint-required-no-paths (pull_request) Failing after 1s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Failing after 1s
Secret scan / Scan diff for credential-shaped strings (pull_request) Failing after 1s
gate-check-v3 / gate-check (pull_request) Failing after 1s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-tier-check / tier-check (pull_request) Successful in 32s
sop-checklist / all-items-acked (pull_request) Successful in 1m16s
qa-review / approved (pull_request) Refired via /qa-recheck by unknown
security-review / approved (pull_request) Refired via /security-recheck by unknown
The ConfigTab runtime dropdown filtered GET /templates through a hardcoded
SUPPORTED_RUNTIME_VALUES allowlist (claude-code/codex/openclaw/hermes).
google-adk shipped in manifest.json + the workspace-server knownRuntimes
registry but was dropped by this frontend Set, so a google-adk workspace's
Config tab rendered the wrong runtime option and a Save would clobber the
runtime to the wrong value.

Make the picker trust the backend SSOT: /templates is already gated to the
manifest maintained set by loadRuntimesFromManifest. Remove the allowlist;
hide a runtime only when its template declares displayable:false (new
optional flag plumbed manifest config.yaml -> templateSummary -> /templates).

- canvas/ConfigTab.tsx: drop SUPPORTED_RUNTIME_VALUES; filter on
  r.displayable===false; add google-adk to offline FALLBACK list.
- workspace-server templates.go: add Displayable *bool (yaml+json,
  omitempty) so a template can opt out of the picker declaratively.
- tests: ConfigTab.googleAdk.test.tsx (google-adk selected + displayable
  hidden) + TestTemplatesList_DisplayableFlag (nil/true/false + JSON contract).

Refs project_canvas_runtime_dropdown_ssot_fix.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 01:07:29 -07:00
hongming 5fce77aac9 Merge pull request 'feat(workspace): per-workspace data_persistence choice (internal#734 PR-2)' (#2014) from feat/workspace-data-persistence into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
publish-canvas-image / Build & push canvas image (push) Successful in 1m39s
publish-workspace-server-image / build-and-push (push) Successful in 3m30s
Block internal-flavored paths / Block forbidden paths (push) Successful in 15s
CI / Detect changes (push) Successful in 9s
CI / Python Lint & Test (push) Successful in 5s
E2E API Smoke Test / detect-changes (push) Successful in 6s
Handlers Postgres Integration / detect-changes (push) Successful in 8s
Harness Replays / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 3s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 46s
CI / Platform (Go) (push) Successful in 5m1s
CI / Canvas (Next.js) (push) Successful in 4m52s
CI / Shellcheck (E2E scripts) (push) Successful in 1s
CI / all-required (push) Successful in 20m0s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m4s
publish-workspace-server-image / Production auto-deploy (push) Successful in 18m42s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 6m13s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m27s
Harness Replays / Harness Replays (push) Successful in 2s
CI / Canvas Deploy Reminder (push) Successful in 1s
lint-bp-context-emit-match / lint-bp-context-emit-match (push) Successful in 1m21s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Failing after 3s
SECRET_PATTERNS drift lint / Detect SECRET_PATTERNS drift (push) Successful in 43s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 1s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Failing after 0s
E2E Staging External Runtime / E2E Staging External Runtime (push) Failing after 0s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Failing after 0s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Failing after 1s
E2E Staging Canvas (Playwright) / detect-changes (push) Failing after 0s
E2E Chat / detect-changes (push) Successful in 11s
E2E Chat / E2E Chat (push) Successful in 4m26s
main-red-watchdog / watchdog (push) Successful in 2m13s
gate-check-v3 / gate-check (push) Successful in 1m14s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 10s
E2E Legacy Advisory / Legacy local-platform E2E (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
ci-required-drift / drift (push) Successful in 1m1s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 14s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 3s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
2026-05-30 18:57:55 +00:00
hongming-ceo-delegated 257a61672b feat(canvas): per-workspace data persistence + erase-on-delete UI (internal#734)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Detect changes (pull_request) Successful in 9s
CI / Python Lint & Test (pull_request) Successful in 5s
E2E API Smoke Test / detect-changes (pull_request) Successful in 8s
E2E Chat / detect-changes (pull_request) Successful in 10s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 13s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 51s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 5s
Harness Replays / detect-changes (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m13s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 35s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m16s
qa-review / approved (pull_request) Successful in 4s
security-review / approved (pull_request) Successful in 4s
CI / Platform (Go) (pull_request) Successful in 4m30s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m12s
E2E Chat / E2E Chat (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 5s
CI / Canvas (Next.js) (pull_request) Successful in 5m24s
CI / all-required (pull_request) Successful in 28m22s
gate-check-v3 / gate-check (pull_request) Successful in 8s
sop-checklist / all-items-acked (pull_request) Successful in 13s
sop-tier-check / tier-check (pull_request) Successful in 11s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / review-refire (pull_request) Has been skipped
Harness Replays / Harness Replays (pull_request) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m34s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 48m58s
audit-force-merge / audit (pull_request) Successful in 21s
The user-facing choice for the prune/persistence backend:
- ContainerConfigTab: a 'Saved data' selector (Auto / Always keep / Don't keep)
  → compute.data_persistence (omitted when Auto = unchanged wire/default).
- DetailsTab delete: an 'also erase saved data' checkbox → DELETE
  ?erase_data=true (default off keeps it for the orphan-sweeper grace).
- WorkspaceCompute.data_persistence type.

+test: erase checkbox sends erase_data=true; default delete unchanged. The 37
ContainerConfigTab+DetailsTab tests pass; my files typecheck clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 09:22:20 -07:00
hongming-ceo-delegated db6f5b2e93 feat(workspace): prune-on-delete caller wiring (internal#734 F1 — pairs with cp#415)
Block internal-flavored paths / Block forbidden paths (pull_request) Waiting to run
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
CI / Platform (Go) (pull_request) Blocked by required conditions
CI / Canvas (Next.js) (pull_request) Blocked by required conditions
CI / Shellcheck (E2E scripts) (pull_request) Blocked by required conditions
E2E API Smoke Test / detect-changes (pull_request) Waiting to run
E2E API Smoke Test / E2E API Smoke Test (pull_request) Blocked by required conditions
E2E Chat / detect-changes (pull_request) Waiting to run
E2E Chat / E2E Chat (pull_request) Blocked by required conditions
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Waiting to run
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Blocked by required conditions
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Waiting to run
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Waiting to run
Handlers Postgres Integration / detect-changes (pull_request) Waiting to run
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Blocked by required conditions
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Waiting to run
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Waiting to run
Secret scan / Scan diff for credential-shaped strings (pull_request) Waiting to run
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Waiting to run
gate-check-v3 / gate-check (pull_request) Waiting to run
qa-review / approved (pull_request) Waiting to run
security-review / approved (pull_request) Waiting to run
sop-checklist / all-items-acked (pull_request) Waiting to run
sop-checklist / review-refire (pull_request) Waiting to run
sop-tier-check / tier-check (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Waiting to run
Harness Replays / detect-changes (pull_request) Waiting to run
Harness Replays / Harness Replays (pull_request) Blocked by required conditions
lint-required-no-paths / lint-required-no-paths (pull_request) Waiting to run
CI / Detect changes (pull_request) Successful in 10s
CI / Python Lint & Test (pull_request) Successful in 3s
CI / all-required (pull_request) Failing after 40m13s
CI / Canvas Deploy Reminder (pull_request) Has been cancelled
The caller side of the recreate-safe prune (cp#415 Five-Axis F1): the prune
signal reaches CP ONLY on a permanent user-delete-with-erase, NEVER on
restart/recreate/reconcile.

- CPProvisionerAPI.StopAndPrune (CPProvisioner builds DELETE with &prune=true;
  Stop never does — shared stopInternal).
- cpStopWithRetryErr(...prune): restart/hibernate pass false; delete passes the
  user choice.
- stopWorkspaceForDelete(...erase) → CascadeDelete(...erase): HTTP Delete reads
  ?erase_data=true (opt-in; default keeps data for the orphan-sweeper grace);
  org-import reconcile passes false.

Discriminating test: Stop sends NO prune=true (recreate-safety), StopAndPrune
sends it. All CPProvisionerAPI mocks gain StopAndPrune. Full handlers+provisioner
suite + vet + gofmt green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-30 09:16:47 -07:00
hongming-ceo-delegated acecb16d22 feat(workspace): per-workspace data_persistence choice (internal#734 PR-2)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 11s
CI / Detect changes (pull_request) Successful in 8s
CI / Python Lint & Test (pull_request) Successful in 3s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 6s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 3s
Harness Replays / detect-changes (pull_request) Successful in 3s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 33s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 32s
gate-check-v3 / gate-check (pull_request) Successful in 13s
qa-review / approved (pull_request) Failing after 7s
security-review / approved (pull_request) Failing after 5s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 7s
sop-tier-check / tier-check (pull_request) Successful in 8s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m13s
CI / Canvas (Next.js) (pull_request) Successful in 9s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
E2E Chat / E2E Chat (pull_request) Successful in 22s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m7s
Harness Replays / Harness Replays (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m41s
CI / Platform (Go) (pull_request) Successful in 6m17s
CI / all-required (pull_request) Successful in 10m14s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 50m1s
Threads the user's durable-data choice from the workspace Compute config
through to CP's provision request, so a user can pick persist vs ephemeral
per workspace (the caller side of cp#410's data_persistence support).

- models.WorkspaceCompute.DataPersistence (persisted in the compute JSONB)
- validateWorkspaceCompute: enum guard (persist|ephemeral|"") → clear 400
  before the CP round-trip; CP re-validates at its edge (defense in depth)
- WorkspaceConfig.DataPersistence + workspace_provision build site
- cpProvisionRequest.data_persistence (omitempty → ""=auto omitted on wire)

Empty/auto = today's behavior; forward-compatible (inert until CP deploys
cp#410). +validator enum test. build/vet/test/gofmt green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 23:49:37 -07:00
hongming 82bc28a098 Merge pull request 'test(e2e): wire google-adk into the runtime e2e suite' (#2012) from e2e/google-adk-ci-wiring into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
publish-workspace-server-image / build-and-push (push) Successful in 2m59s
Block internal-flavored paths / Block forbidden paths (push) Successful in 7s
CI / Python Lint & Test (push) Successful in 5s
CI / Detect changes (push) Successful in 7s
E2E API Smoke Test / detect-changes (push) Successful in 11s
Handlers Postgres Integration / detect-changes (push) Successful in 4s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 3s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (push) Successful in 3s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (push) Successful in 1m19s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 3s
Ops Scripts Tests / Ops scripts (unittest) (push) Successful in 1m6s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 54s
CI / Platform (Go) (push) Successful in 2s
CI / Canvas (Next.js) (push) Successful in 2s
CI / Shellcheck (E2E scripts) (push) Successful in 10s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2s
CI / all-required (push) Successful in 13m33s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m58s
publish-workspace-server-image / Production auto-deploy (push) Successful in 12m33s
CI / Canvas Deploy Reminder (push) Successful in 1s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
lint-bp-context-emit-match / lint-bp-context-emit-match (push) Successful in 1m28s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 3s
SECRET_PATTERNS drift lint / Detect SECRET_PATTERNS drift (push) Successful in 56s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 45s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 35m15s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m0s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m13s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 6s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 5m58s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Has been skipped
E2E Chat / detect-changes (push) Successful in 7s
E2E Chat / E2E Chat (push) Successful in 4m4s
E2E Legacy Advisory / Legacy local-platform E2E (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Railway pin audit (drift detection) / Audit Railway env vars for drift-prone pins (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m4s
main-red-watchdog / watchdog (push) Successful in 1m57s
gate-check-v3 / gate-check (push) Successful in 25s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
ci-required-drift / drift (push) Successful in 1m7s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 5s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 4s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 26s
2026-05-29 23:17:02 +00:00
core-devops 947cc730ba test(e2e): give google-adk a hermes-class online window
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
CI / Python Lint & Test (pull_request) Successful in 3s
E2E API Smoke Test / detect-changes (pull_request) Successful in 6s
E2E Chat / detect-changes (pull_request) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 6s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 51s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 5s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Failing after 1m18s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m12s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m27s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 3s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m7s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m29s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m27s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 37s
gate-check-v3 / gate-check (pull_request) Successful in 9s
security-review / approved (pull_request) Failing after 5s
qa-review / approved (pull_request) Failing after 5s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 3s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 7s
CI / Platform (Go) (pull_request) Successful in 2s
CI / Canvas (Next.js) (pull_request) Successful in 1s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 3s
E2E Chat / E2E Chat (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 9s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
CI / all-required (pull_request) Successful in 14m30s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 38m40s
audit-force-merge / audit (pull_request) Successful in 7s
First cold boot of a google-adk workspace pulls a large fresh ADK image;
the default 300s online wait can read a slow first pull as "failed".
Bump google-adk's wait to 180 iters (900s), matching the rationale for
hermes' extended window. No behavior change for other runtimes.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 14:51:58 -07:00
core-devops 1ee864d523 test(e2e): wire google-adk into the runtime e2e suite
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 5s
CI / Python Lint & Test (pull_request) Successful in 5s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 6s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 34s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 3s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Failing after 1m19s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m12s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m17s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m13s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 3s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m38s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m9s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 48s
gate-check-v3 / gate-check (pull_request) Successful in 3s
qa-review / approved (pull_request) Failing after 4s
security-review / approved (pull_request) Failing after 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
CI / Platform (Go) (pull_request) Successful in 2s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2s
E2E Chat / E2E Chat (pull_request) Successful in 2s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1s
CI / all-required (pull_request) Successful in 10m30s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 48m20s
google-adk was registered (manifest, provisioner, canvas, CP pin +
allowlist) but had no e2e coverage. Add it everywhere the other
runtimes sit so it is exercised "like other runtimes":

- scripts/test-all-runtimes-a2a-e2e.sh: provision + provider-key +
  online + A2A round-trip + session-continuity loops now include
  google-adk (5 runtimes). AI-Studio key via GOOGLE_API_KEY → workspace
  secret; SKIP_GOOGLE_ADK guard mirrors the other SKIP_* flags.
- e2e-staging-saas.yml + continuous-synth-e2e.yml: add the
  `google-adk)` per-runtime LLM-key case (expects
  MOLECULE_STAGING_GOOGLE_API_KEY) + E2E_GOOGLE_API_KEY env + the
  gemini model slug. Same dispatch-gated shape as codex/hermes/langgraph
  (Gitea drops workflow_dispatch.inputs, so E2E_RUNTIME-driven).

Auth note: PROD disallows API keys (Vertex+ADC there); CI uses the
keyed AI-Studio path (config model google_genai:gemini-2.5-pro). Vertex
stays the supported prod path. The MOLECULE_STAGING_GOOGLE_API_KEY
secret must be set for a green google-adk run (documented in-file).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 14:17:51 -07:00
hongming f84f9a5572 Merge pull request 'feat: register google-adk runtime (manifest + knownRuntimes + canvas)' (#2003) from feat/register-google-adk-runtime into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Block internal-flavored paths / Block forbidden paths (push) Successful in 7s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 38m14s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
CI / Detect changes (push) Successful in 9s
CI / Python Lint & Test (push) Successful in 5s
E2E API Smoke Test / detect-changes (push) Successful in 8s
E2E Chat / detect-changes (push) Successful in 8s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 6s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 38s
Handlers Postgres Integration / detect-changes (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 7s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 4s
publish-canvas-image / Build & push canvas image (push) Successful in 2m2s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 32s
publish-workspace-server-image / build-and-push (push) Successful in 3m12s
CI / Platform (Go) (push) Successful in 4m22s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
E2E Chat / E2E Chat (push) Successful in 3m32s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m31s
CI / Canvas (Next.js) (push) Successful in 5m8s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
CI / all-required (push) Successful in 11m48s
publish-workspace-server-image / Production auto-deploy (push) Successful in 10m48s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m32s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
Harness Replays / Harness Replays (push) Successful in 1s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 6m52s
CI / Canvas Deploy Reminder (push) Successful in 1s
ci-required-drift / drift (push) Successful in 1m16s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 4s
main-red-watchdog / watchdog (push) Successful in 2m13s
gate-check-v3 / gate-check (push) Successful in 37s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 15s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 15s
2026-05-29 18:51:46 +00:00
core-devops f82a980a79 test(canvas): add Google ADK to CreateWorkspaceDialog runtime-options assertion
audit-force-merge / audit (pull_request) Successful in 7s
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 6s
Check migration collisions / Migration version collision check (pull_request) Successful in 6s
CI / Detect changes (pull_request) Successful in 8s
CI / Python Lint & Test (pull_request) Successful in 3s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 44s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 37s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m18s
CI / Platform (Go) (pull_request) Successful in 5m1s
CI / Canvas (Next.js) (pull_request) Successful in 4m54s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 3s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
qa-review / approved (pull_request) Failing after 4s
security-review / approved (pull_request) Failing after 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
CI / Shellcheck (E2E scripts) (pull_request) Successful in 1s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
CI / all-required (pull_request) Successful in 25m28s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m11s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m22s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 32s
gate-check-v3 / gate-check (pull_request) Failing after 4s
sop-checklist / all-items-acked (pull_request) Successful in 3s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
Harness Replays / Harness Replays (pull_request) Successful in 1s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 37m39s
RUNTIME_OPTIONS gained 'Google ADK' but the test's hardcoded expected array
(separate-selectors test) still listed 4 → Canvas (Next.js) CI red (5 vs 4).
Add it in component order (after OpenAI Codex CLI). Caught by comprehensive
pre-merge review — a real regression from this PR's own diff, not the
staging-E2E infra flake.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 11:02:14 -07:00
hongming 64b7ecfb70 Merge pull request 'feat(budget): multi-period per-workspace LLM budget (hourly/daily/weekly/monthly)' (#2009) from feat/mc-multiperiod-workspace-budget into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
Block internal-flavored paths / Block forbidden paths (push) Successful in 4s
CI / Detect changes (push) Successful in 5s
CI / Python Lint & Test (push) Successful in 6s
E2E API Smoke Test / detect-changes (push) Successful in 7s
E2E Chat / detect-changes (push) Successful in 5s
publish-canvas-image / Build & push canvas image (push) Successful in 1m32s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m6s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 9s
publish-workspace-server-image / build-and-push (push) Successful in 3m50s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m9s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 1m5s
Handlers Postgres Integration / detect-changes (push) Successful in 3s
Harness Replays / detect-changes (push) Successful in 3s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 3s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 1m5s
CI / Platform (Go) (push) Successful in 5m12s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
CI / Canvas (Next.js) (push) Successful in 5m58s
CI / all-required (push) Successful in 22m41s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m37s
publish-workspace-server-image / Production auto-deploy (push) Successful in 20m53s
E2E Chat / E2E Chat (push) Successful in 3m5s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 7m0s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m44s
Harness Replays / Harness Replays (push) Successful in 1s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Failing after 32m28s
CI / Canvas Deploy Reminder (push) Successful in 1s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 35m52s
Railway pin audit (drift detection) / Audit Railway env vars for drift-prone pins (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m48s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
main-red-watchdog / watchdog (push) Successful in 2m5s
gate-check-v3 / gate-check (push) Successful in 25s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
ci-required-drift / drift (push) Successful in 1m18s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 35s
2026-05-29 11:45:32 +00:00
core-be cf7b587f16 feat(budget): multi-period per-workspace LLM budget (hourly/daily/weekly/monthly)
audit-force-merge / audit (pull_request) Successful in 4s
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 9s
CI / Detect changes (pull_request) Successful in 8s
CI / Python Lint & Test (pull_request) Successful in 3s
Check migration collisions / Migration version collision check (pull_request) Successful in 16s
E2E API Smoke Test / detect-changes (pull_request) Successful in 5s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 59s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 1m10s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m15s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 58s
CI / Platform (Go) (pull_request) Successful in 5m4s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 3s
gate-check-v3 / gate-check (pull_request) Successful in 6s
sop-checklist / na-declarations (pull_request) N/A: (none)
CI / Canvas (Next.js) (pull_request) Successful in 5m6s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 3s
CI / all-required (pull_request) Successful in 19m21s
Harness Replays / detect-changes (pull_request) Successful in 4s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m30s
sop-checklist / all-items-acked (pull_request) Successful in 3s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 6s
E2E Chat / E2E Chat (pull_request) Successful in 7s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m28s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 5s
Harness Replays / Harness Replays (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m24s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
qa-review / approved (pull_request) Refired via /qa-recheck by unknown
security-review / approved (pull_request) Refired via /security-recheck by unknown
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 30m49s
Extends the single monthly per-workspace budget to four independent ROLLING
windows so a workspace can be capped per hour/day/week/month (#49 — gives the
canvas Budget tab a real lever against runaway LLM spend, e.g. the reno-stars
opus drain). SSOT design:

- budget_periods.go = single source of truth: the period set + rolling windows,
  one FILTERed per-period spend query over the ledger, and the PURE
  parse/encode/exceededPeriods logic. Add a period = one line here.
- migration: workspaces.budget_limits jsonb (canonical config, backfilled from
  the legacy monthly budget_limit) + workspace_spend_events ledger.
- heartbeat (registry.go): derive the spend INCREMENT from the agent's existing
  cumulative report (delta vs prev; reset-aware) → ledger row. Server owns
  windowing; NO runtime change.
- budget.go GET/PATCH: per-period limit/spend/remaining; accepts the new
  {budget_limits:{...}} shape AND the legacy {budget_limit} (→ monthly); legacy
  response fields still emitted + budget_limit kept synced (rollout back-compat).
  A limit of 0 = block-all (preserved); null/absent = no limit.
- a2a_proxy.go checkWorkspaceBudget: 402 if ANY configured period's rolling
  window spend >= its limit; fail-open on DB error.
- canvas BudgetSection: four period rows (USD limit input + spend/limit + bar).

Tests: pure SSOT (parse/encode/exceededPeriods); GET/PATCH + multi-period +
A2A enforcement (sqlmock, migrated to the new two-query flow); shared
expectBudgetCheck helpers updated; canvas behavioral + per-period progress/aria.
go build + vet + full handlers suite + migrations + canvas vitest all green.

NOTE: the duplicate components/__tests__/BudgetSection.test.tsx (old single-limit
UI) was repurposed to a focused per-period progress/aria suite — behavioral
coverage now lives in tabs/__tests__/BudgetSection.test.tsx (one component, no
parallel identical suites).

Refs #49.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-29 04:11:21 -07:00
hongming ffbd1a7ff0 Merge pull request 'feat(admin-schedules): orphan monitor + cleaner endpoints (internal#2006 backstops)' (#2008) from feat/schedule-orphan-monitor-cleaner into main
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
main-red-watchdog / watchdog (push) Successful in 58s
gate-check-v3 / gate-check (push) Successful in 24s
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 10s
Block internal-flavored paths / Block forbidden paths (push) Successful in 4s
CI / Detect changes (push) Successful in 6s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 10s
CI / Python Lint & Test (push) Successful in 6s
E2E API Smoke Test / detect-changes (push) Successful in 5s
E2E Chat / detect-changes (push) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 6s
Harness Replays / detect-changes (push) Successful in 4s
Handlers Postgres Integration / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 3s
ci-required-drift / drift (push) Successful in 1m34s
E2E Chat / E2E Chat (push) Successful in 3m23s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 55s
CI / Canvas (Next.js) (push) Successful in 2s
Harness Replays / Harness Replays (push) Successful in 4s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
CI / Canvas Deploy Reminder (push) Successful in 2s
publish-workspace-server-image / build-and-push (push) Successful in 3m9s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m40s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 8s
CI / Platform (Go) (push) Successful in 4m51s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m38s
CI / all-required (push) Successful in 6m15s
publish-workspace-server-image / Production auto-deploy (push) Successful in 5m11s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 12s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
2026-05-29 09:38:34 +00:00
hongming feb2b8cfb8 Merge pull request 'fix(org-import): migrate runtime schedules from removed predecessor on recreate' (#2007) from fix/schedule-migration-on-recreate into main
ci-arm64-advisory / fast-checks (push) Waiting to run
CI / Platform (Go) (push) Blocked by required conditions
CI / Canvas (Next.js) (push) Blocked by required conditions
CI / Shellcheck (E2E scripts) (push) Blocked by required conditions
CI / Canvas Deploy Reminder (push) Blocked by required conditions
E2E API Smoke Test / E2E API Smoke Test (push) Blocked by required conditions
E2E Chat / E2E Chat (push) Blocked by required conditions
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Blocked by required conditions
Handlers Postgres Integration / Handlers Postgres Integration (push) Blocked by required conditions
Harness Replays / Harness Replays (push) Blocked by required conditions
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 9s
publish-workspace-server-image / build-and-push (push) Successful in 6m54s
Block internal-flavored paths / Block forbidden paths (push) Successful in 7s
CI / Detect changes (push) Successful in 11s
CI / Python Lint & Test (push) Successful in 5s
E2E API Smoke Test / detect-changes (push) Successful in 5s
E2E Chat / detect-changes (push) Successful in 13s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 12s
Handlers Postgres Integration / detect-changes (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 7s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 5s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 13s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 49s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
CI / all-required (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
publish-workspace-server-image / Production auto-deploy (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
2026-05-29 09:28:59 +00:00
hongming-personal 4bee6cb4a7 feat(admin-schedules): orphan monitor + cleaner endpoints (backstops)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 3s
CI / Detect changes (pull_request) Successful in 6s
CI / Python Lint & Test (pull_request) Successful in 4s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 7s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 3s
Harness Replays / detect-changes (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 3s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m15s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 44s
gate-check-v3 / gate-check (pull_request) Successful in 6s
qa-review / approved (pull_request) Failing after 4s
security-review / approved (pull_request) Failing after 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-tier-check / tier-check (pull_request) Successful in 4s
CI / Canvas (Next.js) (pull_request) Successful in 25s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 5s
E2E Chat / E2E Chat (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 6s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 3m14s
Harness Replays / Harness Replays (pull_request) Successful in 5s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m53s
CI / Platform (Go) (pull_request) Successful in 8m18s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / all-required (pull_request) Successful in 10m45s
audit-force-merge / audit (pull_request) Successful in 5s
internal#2006 — backstops for the recreate-orphans-schedules class. The
primary fix is migration-on-recreate (separate PR); these are defense-in-depth
so a future regression is detected + recoverable instead of silent.

GET /admin/schedules/health reports only LIVE workspaces' schedules
(JOIN … WHERE status != 'removed'), so a schedule stranded on a
removed/recreated workspace silently stops firing and never shows there —
which is exactly why tonight's orphans went unnoticed.

- GET /admin/schedules/orphans (Orphans): the monitor surface — lists every
  schedule bound to a removed OR missing workspace (id, name, source, enabled,
  ws_status). A monitor polls this and pages on non-empty.
- POST /admin/schedules/reap-orphans (ReapOrphans): the cleaner — re-points
  runtime schedules onto the live successor agent (matched by role+parent),
  then disables any remaining dead-bound schedules so the scheduler stops
  firing into removed workspaces. Idempotent; returns {repointed, disabled}.

Health() is unchanged (no churn to its tests). +2 tests, +2 routes. Build +
handler tests green.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 09:08:40 +00:00
hongming-personal a44a110d60 fix(org-import): migrate runtime schedules from removed predecessor on recreate
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
CI / Canvas Deploy Reminder (pull_request) Blocked by required conditions
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 10s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 6s
CI / Python Lint & Test (pull_request) Successful in 4s
E2E API Smoke Test / detect-changes (pull_request) Successful in 6s
E2E Chat / detect-changes (pull_request) Successful in 10s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 7s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 3s
Harness Replays / detect-changes (pull_request) Successful in 3s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 6s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m44s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s
gate-check-v3 / gate-check (pull_request) Successful in 6s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 28s
qa-review / approved (pull_request) Failing after 9s
security-review / approved (pull_request) Failing after 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 5s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 5s
CI / Canvas (Next.js) (pull_request) Successful in 3s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m22s
CI / Platform (Go) (pull_request) Successful in 4m58s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 7s
CI / all-required (pull_request) Successful in 23m53s
Harness Replays / Harness Replays (pull_request) Successful in 2s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m25s
audit-force-merge / audit (pull_request) Successful in 7s
internal#2006 — recreating an agent orphans its schedules.

Root cause: createWorkspaceTree's INSERT … ON CONFLICT (parent_id,name)
WHERE status != 'removed' only matches NON-removed rows, so when an agent
is recreated after its prior workspace was marked removed, a brand-new
workspace id is minted. Reconcile then re-derives template-sourced state
(MODEL, template schedules via the upsert loop), but schedules a user added
at runtime (source='runtime', via the canvas/API) bind to the ephemeral
workspace_id and are abandoned on the removed row — they silently stop
firing (the 2026-05-29 agents-team incident: all 5 *-autonomous-tick
schedules, source=runtime, orphaned on removed ids; canvas showed
"missing schedulers").

Fix: after a fresh insert, migrate runtime-created schedules from the
most-recent removed predecessor of the same agent onto the new workspace.
The predecessor is matched by the stable `role` (survives the name
auto-suffixing that yields "Agent (2)"), falling back to name+parent.
Template-sourced schedules are NOT migrated (reconcile re-derives those);
runs before the template upsert loop so a same-named template schedule
still wins; skips names already present on the new workspace; best-effort
(logs, never errors the import).

Tests: predecessor-found re-points; no-predecessor (first create) does NOT
run the UPDATE; name-fallback branch.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 09:03:27 +00:00
core-lead 02917decc6 Merge pull request 'docs: correct fabricated google-adk + gemini-cli runtime tutorials' (#2004) from fix/google-adk-runtime-doc-accuracy into main
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 5s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
E2E Chat / detect-changes (push) Successful in 6s
E2E Chat / E2E Chat (push) Successful in 4m48s
ci-arm64-advisory / fast-checks (push) Waiting to run
gate-check-v3 / gate-check (push) Successful in 26s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 11s
main-red-watchdog / watchdog (push) Successful in 2m30s
Block internal-flavored paths / Block forbidden paths (push) Successful in 10s
CI / Detect changes (push) Successful in 10s
CI / Python Lint & Test (push) Successful in 5s
E2E API Smoke Test / detect-changes (push) Successful in 7s
CI / all-required (push) Successful in 2m42s
Handlers Postgres Integration / detect-changes (push) Successful in 6s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 5s
CI / Platform (Go) (push) Successful in 3s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 51s
CI / Canvas (Next.js) (push) Successful in 4s
CI / Shellcheck (E2E scripts) (push) Successful in 7s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 8s
CI / Canvas Deploy Reminder (push) Successful in 3s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 16s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 14s
publish-workspace-server-image / build-and-push (push) Successful in 5m12s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m7s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
publish-workspace-server-image / Production auto-deploy (push) Successful in 2m20s
E2E Legacy Advisory / Legacy local-platform E2E (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
ci-required-drift / drift (push) Successful in 1m25s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 42s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 43m56s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m27s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m9s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 9s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Failing after 32m15s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 8m4s
2026-05-29 06:26:26 +00:00
core-devops 1b543d8582 docs: correct fabricated google-adk + gemini-cli runtime tutorials
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Python Lint & Test (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 6s
E2E API Smoke Test / detect-changes (pull_request) Successful in 9s
E2E Chat / detect-changes (pull_request) Successful in 8s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 5s
CI / all-required (pull_request) Successful in 38s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 7s
Harness Replays / detect-changes (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 43s
gate-check-v3 / gate-check (pull_request) Successful in 3s
qa-review / approved (pull_request) Successful in 4s
security-review / approved (pull_request) Successful in 4s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m7s
sop-tier-check / tier-check (pull_request) Successful in 6s
CI / Platform (Go) (pull_request) Successful in 4s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2s
E2E Chat / E2E Chat (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 5s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2s
Harness Replays / Harness Replays (pull_request) Successful in 2s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 6s
Both tutorials cited misattributed PRs and claimed shipped runtimes that
didn't exist (RFC internal#730 finding):
- google-adk-runtime.md: cited 'PR #550' (actually a MemoryTab test suite) +
  'already first-class'. Rewritten to the REAL implementation — ADK engine-only
  (google-adk[mcp]==2.1.0, no [a2a]), Vertex AI via ADC (keyless), a2a-1.x
  bridge — with correct PR refs (template PR #1, core #2003, ci #26) + a
  landing-status banner.
- gemini-cli-runtime.md: cited 'PR #379' (actually CI cleanup); no gemini-cli
  runtime exists in manifest/knownRuntimes. Added a correction banner pointing
  to the real google-adk runtime.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-28 23:07:13 -07:00
hongming 3f15c1448d Merge pull request 'fix(secrets): drop retired org-level guard from SetGlobal (global vendor keys are tenant-owned)' (#2002) from fix/setglobal-drop-retired-org-billing-guard into main
gate-check-v3 / gate-check (push) Successful in 29s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 20s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
ci-required-drift / drift (push) Successful in 1m1s
publish-workspace-server-image / build-and-push (push) Successful in 4m10s
CI / Detect changes (push) Successful in 5s
E2E API Smoke Test / detect-changes (push) Successful in 6s
Handlers Postgres Integration / detect-changes (push) Successful in 3s
Block internal-flavored paths / Block forbidden paths (push) Successful in 4s
CI / Python Lint & Test (push) Successful in 3s
E2E Chat / detect-changes (push) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 6s
Harness Replays / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 3s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 3s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 3s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 38s
ci-arm64-advisory / fast-checks (push) Waiting to run
CI / Canvas (Next.js) (push) Successful in 3s
CI / Shellcheck (E2E scripts) (push) Successful in 9s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m55s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 8s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m0s
Harness Replays / Harness Replays (push) Successful in 1s
CI / Platform (Go) (push) Successful in 5m38s
CI / all-required (push) Successful in 27m42s
E2E Chat / E2E Chat (push) Successful in 4m2s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
SECRET_PATTERNS drift lint / Detect SECRET_PATTERNS drift (push) Successful in 34s
CI / Canvas Deploy Reminder (push) Successful in 3s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
publish-workspace-server-image / Production auto-deploy (push) Successful in 25m46s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 8s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
main-red-watchdog / watchdog (push) Successful in 28s
2026-05-29 04:38:13 +00:00
core-devops 0359912d06 feat: register google-adk runtime (manifest + knownRuntimes + canvas)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 18s
CI / Python Lint & Test (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 16s
E2E API Smoke Test / detect-changes (pull_request) Successful in 8s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 7s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 4s
Harness Replays / detect-changes (pull_request) Successful in 3s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 34s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 9s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 55s
gate-check-v3 / gate-check (pull_request) Successful in 4s
qa-review / approved (pull_request) Failing after 6s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m26s
security-review / approved (pull_request) Failing after 10s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-tier-check / tier-check (pull_request) Successful in 8s
sop-checklist / all-items-acked (pull_request) Successful in 9s
CI / Platform (Go) (pull_request) Successful in 4m30s
CI / Canvas (Next.js) (pull_request) Failing after 4m54s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 1s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / all-required (pull_request) Failing after 18m25s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m18s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 3s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2s
Harness Replays / Harness Replays (pull_request) Successful in 1s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 42m20s
Platform-side registration for the google-adk workspace runtime (RFC
internal#730). Required so a workspace with runtime: google-adk provisions
(Docker path) and is creatable from the canvas:
- manifest.json: workspace_templates entry → handler allowlist (loadRuntimesFromManifest)
- provisioner/registry.go: knownRuntimes += google-adk (else ErrUnresolvableRuntime); test count 4→5
- canvas CreateWorkspaceDialog: RUNTIME_OPTIONS + BASE_RUNTIME_TEMPLATE_IDS
- canvas runtime-names.ts: display name

Depends on molecule-ai-workspace-template-google-adk (image build/publish) +
controlplane runtime_image_pins (SaaS path) — tracked in RFC #730.
Verified: go build + provisioner/handlers tests green; manifest.json valid.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-28 21:30:36 -07:00
hongming-personal 2cf7d006a9 fix(secrets): drop retired org-level guard from SetGlobal — global vendor keys are tenant-owned
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Python Lint & Test (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 7s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 8s
Harness Replays / detect-changes (pull_request) Successful in 7s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 11s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
qa-review / approved (pull_request) Failing after 15s
gate-check-v3 / gate-check (pull_request) Successful in 15s
security-review / approved (pull_request) Failing after 7s
sop-checklist / na-declarations (pull_request) N/A: (none)
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 31s
sop-checklist / all-items-acked (pull_request) Successful in 6s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m3s
CI / Canvas (Next.js) (pull_request) Successful in 6s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 4s
E2E Chat / E2E Chat (pull_request) Successful in 12s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m4s
Harness Replays / Harness Replays (pull_request) Successful in 15s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 6s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m29s
CI / Platform (Go) (pull_request) Successful in 7m12s
CI / all-required (pull_request) Successful in 11m6s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 4s
internal#718 retired the org-level LLM billing rung (billing is resolved
per-workspace now). SetGlobal still called the legacy org-env guard
rejectPlatformManagedDirectLLMBypass, which reads MOLECULE_LLM_BILLING_MODE and
400s any vendor/oauth key write when the (legacy) org default is
platform_managed. That blocked setting a tenant's own MINIMAX_API_KEY (or any
custom-provider key) at global scope on a byok tenant — agents-team hit "direct
Hermes custom provider secrets are blocked for platform-managed LLM workspaces".

A global secret is the tenant's OWN shared credential. The provision-time
provider-matched strip (workspace_provision, core#2000) already removes any
global cred a given workspace's resolved provider does not accept, and the
platform-managed path strips bypass keys at provision too — so a platform-managed
workspace can never USE a non-matching global vendor/oauth key. The SetGlobal
org-env gate was redundant belt-and-suspenders keyed off the retired rung.

- SetGlobal: remove the org-level guard call.
- Delete the now-dead legacy helpers platformManagedLLMMode +
  rejectPlatformManagedDirectLLMBypass (org-env shims; the per-workspace
  successors rejectPlatformManagedDirectLLMBypassForWorkspace /
  platformManagedLLMModeForWorkspace remain and still gate per-workspace writes).
- Tests: convert the obsolete platform-managed rejection test into
  TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv (asserts the global
  write SUCCEEDS even with the legacy env still set to platform_managed).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 04:26:28 +00:00
hongming c99b0e3601 Merge pull request 'fix(workspace-server): provider-matched byok credential injection (internal#728 Bug 1) [BEHAVIOR-AFFECTING — CTO merge-go]' (#2000) from fix/internal-728-provider-matched-cred-injection into main
lint-bp-context-emit-match / lint-bp-context-emit-match (push) Successful in 1m15s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 4s
main-red-watchdog / watchdog (push) Successful in 2m5s
gate-check-v3 / gate-check (push) Successful in 25s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 31s
Handlers Postgres Integration / detect-changes (push) Successful in 7s
Harness Replays / detect-changes (push) Successful in 9s
publish-workspace-server-image / build-and-push (push) Successful in 6m21s
ci-arm64-advisory / fast-checks (push) Waiting to run
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 46s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 9s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m40s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
CI / Platform (Go) (push) Successful in 5m52s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 9s
CI / Canvas (Next.js) (push) Successful in 5s
CI / Shellcheck (E2E scripts) (push) Successful in 15s
CI / all-required (push) Successful in 7m54s
CI / Canvas Deploy Reminder (push) Successful in 2s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 42s
Harness Replays / Harness Replays (push) Successful in 7s
ci-required-drift / drift (push) Successful in 1m5s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m31s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 4m47s
E2E Chat / E2E Chat (push) Successful in 4m23s
publish-workspace-server-image / Production auto-deploy (push) Successful in 3m41s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 14s
CI / Detect changes (push) Successful in 9s
Block internal-flavored paths / Block forbidden paths (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 4s
E2E API Smoke Test / detect-changes (push) Successful in 14s
E2E Chat / detect-changes (push) Successful in 11s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 4s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 7s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Has started running
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 11s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Has started running
2026-05-29 00:29:07 +00:00
hongming 4414c92a87 fix(workspace-server): provider-matched byok credential injection — strip stray non-matching global-origin LLM creds (internal#728 Bug 1) [BEHAVIOR-AFFECTING — CTO merge-go]
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 5m37s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
CI / Canvas (Next.js) (pull_request) Successful in 2s
Harness Replays / detect-changes (pull_request) Successful in 5s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 6s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 5s
E2E Chat / E2E Chat (pull_request) Successful in 4s
CI / Python Lint & Test (pull_request) Successful in 9s
CI / Detect changes (pull_request) Successful in 11s
gate-check-v3 / gate-check (pull_request) Successful in 5s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 10s
CI / Platform (Go) (pull_request) Successful in 4m37s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
qa-review / approved (pull_request) Failing after 6s
security-review / approved (pull_request) Failing after 10s
sop-checklist / all-items-acked (pull_request) Successful in 5s
sop-checklist / review-refire (pull_request) Has been skipped
Harness Replays / Harness Replays (pull_request) Successful in 3s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 25s
sop-checklist / na-declarations (pull_request) N/A: (none)
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m10s
sop-tier-check / tier-check (pull_request) Successful in 5s
CI / all-required (pull_request) Successful in 7m16s
E2E API Smoke Test / detect-changes (pull_request) Successful in 12s
E2E Chat / detect-changes (pull_request) Successful in 12s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 7s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 31s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m33s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m42s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 7s
#1995 removed the blanket global-LLM-cred strip on the byok branch (correct for
the platform-key co-mingling it targeted), but left EVERY claude-code workspace
inheriting the tenant-global CLAUDE_CODE_OAUTH_TOKEN. The claude-code runtime
greedily prefers that oauth (llm-auth: detected oauth -> api.anthropic.com), so
a workspace whose RESOLVED provider is NOT anthropic-oauth (minimax, kimi-byok)
routes its non-Anthropic model to Anthropic -> "Claude Code returned an error
result" (agents-team Dev Engineer B, MiniMax-M2.7; live-confirmed 2026-05-28 via
SSM container logs, internal#728 comment 52493).

Fix: provider-AWARE replacement for the over-removed strip. On the byok/disabled
branch, keep ONLY the global-origin LLM bypass creds whose env-var name is in
the RESOLVED provider's auth_env; strip the rest.
- minimax auth_env MINIMAX_API_KEY/ANTHROPIC_AUTH_TOKEN/ANTHROPIC_API_KEY ->
  stray global CLAUDE_CODE_OAUTH_TOKEN is non-matching -> stripped (fixes DevB).
- anthropic-oauth auth_env CLAUDE_CODE_OAUTH_TOKEN -> matches -> kept (PM opus +
  reno opus-byok NOT regressed; #1994 ByokGlobalScopeOAuthSurvives guard holds).
NOT a return to the blanket strip (which would re-break the byok-anthropic-oauth
case #1994 fixed) — keyed off DeriveProvider's resolved provider.

Provenance-scoped: only operator-store (global_secrets) origin keys are
provider-gated. User-authored workspace_secrets (provenance flag cleared by
loadWorkspaceSecrets) are NEVER stripped — JRS kimi workspace-key, reno's own
oauth are exempt. Fail-OPEN: an underivable provider / unavailable registry
strips nothing (keep-first; worst case is a kept stray, never removing the only
usable cred -> never fail-closes a legitimate byok workspace).

Threads loadWorkspaceSecrets's globalKeys provenance side-channel into
applyPlatformManagedLLMEnv (signature +map[string]struct{}); caller
prepareProvisionContext already has it.

Tests (llm_billing_mode_provision_parity_test.go):
- MinimaxStripsStrayGlobalOAuth — DevB repro: minimax-resolving ws strips the
  stray global oauth + keeps MINIMAX_API_KEY routing.
- WorkspaceOriginCredExemptFromStrip — user-authored ws_secrets cred survives
  even when non-matching.
- ByokGlobalScopeOAuthSurvives (strengthened) — global-origin oauth on opus
  SURVIVES via provider match (PM/reno regression guard).
Mutation-load-bearing (verified RED): (1) remove strip -> blanket-keep regresses
DevB; (2) empty keep set (provider-unaware) -> minimax routing + reno oauth
stripped; (3) iterate all bypass keys (provenance-unaware) -> user-authored cred
stripped.

build ok; build -tags=integration ok; go test ./internal/handlers/ ok;
golangci-lint ./internal/handlers/ -> 0 issues. Refs internal#728.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-29 00:05:21 +00:00
hongming efa60621f3 Merge pull request 'fix(prod-auto-deploy): fail on tenants not verified on target build (internal#724)' (#1998) from fix/internal-724-prod-auto-deploy-straggler-surfacing into main
CI / Detect changes (push) Successful in 19s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m27s
CI / Python Lint & Test (push) Successful in 27s
E2E Chat / detect-changes (push) Successful in 19s
publish-workspace-server-image / Production auto-deploy (push) Successful in 2m30s
E2E API Smoke Test / detect-changes (push) Successful in 21s
CI / all-required (push) Successful in 2m0s
Handlers Postgres Integration / detect-changes (push) Successful in 23s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 27s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 9s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (push) Successful in 10s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (push) Successful in 6s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m20s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 9s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 40s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (push) Successful in 1m36s
Ops Scripts Tests / Ops scripts (unittest) (push) Successful in 1m21s
publish-workspace-server-image / build-and-push (push) Successful in 5m25s
CI / Platform (Go) (push) Successful in 2s
CI / Canvas (Next.js) (push) Successful in 3s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 4s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 2s
E2E Chat / E2E Chat (push) Successful in 3s
CI / Canvas Deploy Reminder (push) Successful in 2s
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
Block internal-flavored paths / Block forbidden paths (push) Successful in 16s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 16s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 7s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
main-red-watchdog / watchdog (push) Successful in 2m1s
gate-check-v3 / gate-check (push) Successful in 24s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 4s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 10s
ci-required-drift / drift (push) Successful in 1m28s
2026-05-28 21:58:31 +00:00
hongming-personal 367bc1f7fc fix(prod-auto-deploy): fail on tenants not verified on target build (internal#724)
audit-force-merge / audit (pull_request) Successful in 17s
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
CI / Python Lint & Test (pull_request) Successful in 9s
CI / Detect changes (pull_request) Successful in 11s
E2E Chat / detect-changes (pull_request) Successful in 20s
CI / all-required (pull_request) Successful in 2m42s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 18s
E2E API Smoke Test / detect-changes (pull_request) Successful in 20s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 12s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 12s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 8s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 8s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Failing after 1m4s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 3s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m12s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m21s
gate-check-v3 / gate-check (pull_request) Successful in 8s
qa-review / approved (pull_request) Failing after 5s
security-review / approved (pull_request) Failing after 7s
sop-checklist / review-refire (pull_request) Has been skipped
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m31s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 35s
sop-tier-check / tier-check (pull_request) Successful in 4s
sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2
sop-checklist / na-declarations (pull_request) N/A: (none)
CI / Platform (Go) (pull_request) Successful in 5s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 2s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
E2E Chat / E2E Chat (pull_request) Successful in 6s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m34s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m7s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 4s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
The production auto-deploy aggregated per-tenant redeploy-fleet results
but never asserted fleet COVERAGE: a tenant that was enumerated but
silently skipped, or that SSM-succeeded onto the old image, passed as a
clean deploy. That is how agents-team stayed 46h behind the fleet with no
straggler reported.

Pairs with the controlplane fix that adds per-tenant verified_on_target
(docker-inspect proof the container is on the target tag). This change:

- rollout_stragglers(): every enumerated tenant NOT proven on the target
  build is a straggler — errored, skipped (no result row, the agents-team
  class), or verified_on_target=false. Backward-compatible: a missing key
  (pre-fix CP) is treated as verified so the gate degrades to the old
  ok-based behavior against an un-upgraded CP rather than failing spuriously.
- assert_full_coverage(): raises RolloutFailed (→ non-zero exit, response
  JSON written with ok=false + stragglers) when any straggler remains
  after a non-dry-run rollout. A dry run asserts nothing (it proves
  nothing landed).
- publish-workspace-server-image.yml: per-tenant summary gains an
  "On target" column and a loud ⚠ Stragglers section; the step emits a
  ::error:: naming the off-target tenants before failing.

Tests: straggler detection (off-target, no-result, dry-run-skip,
backward-compat missing key) + end-to-end execute_scoped_rollout fail/pass
— mutation-verified RED with the coverage gate removed. All existing
prod-auto-deploy tests still pass; ruff + py_compile clean; workflow YAML
validates.

Refs: internal#724

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-28 14:41:12 -07:00
hongming c2c6501a67 Merge pull request 'fix(workspace-server): provision-time billing derives from EFFECTIVE model, not raw payload.Model (#1994) [BEHAVIOR-AFFECTING — CTO merge-go]' (#1995) from fix/1994-provision-billing-model-passthrough into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 10s
Block internal-flavored paths / Block forbidden paths (push) Successful in 8s
CI / Detect changes (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 10s
E2E API Smoke Test / detect-changes (push) Successful in 11s
E2E Chat / detect-changes (push) Successful in 10s
Handlers Postgres Integration / detect-changes (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 4s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 28s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 16s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (push) Successful in 4s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m15s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 26s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (push) Successful in 1m37s
CI / Canvas (Next.js) (push) Successful in 14s
publish-workspace-server-image / build-and-push (push) Successful in 4m41s
CI / Shellcheck (E2E scripts) (push) Successful in 39s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 5m40s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 3m42s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 3s
Harness Replays / Harness Replays (push) Successful in 8s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m53s
CI / Canvas Deploy Reminder (push) Successful in 2s
CI / Platform (Go) (push) Successful in 8m22s
CI / all-required (push) Successful in 11m47s
E2E Chat / E2E Chat (push) Successful in 4m58s
publish-workspace-server-image / Production auto-deploy (push) Successful in 9m9s
main-red-watchdog / watchdog (push) Successful in 1m58s
gate-check-v3 / gate-check (push) Successful in 24s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 8s
ci-required-drift / drift (push) Successful in 1m7s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 5s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 3s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
2026-05-28 20:00:59 +00:00
hongming-ceo-delegated bbb445b956 fix(workspace-server): byok runs on the tenant's own global-scope LLM cred; stop stripping it (molecule-core#1994)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 9s
CI / Python Lint & Test (pull_request) Successful in 8s
E2E API Smoke Test / detect-changes (pull_request) Successful in 10s
E2E Chat / detect-changes (pull_request) Successful in 8s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 12s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 14s
Harness Replays / detect-changes (pull_request) Successful in 7s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 3s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 33s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 11s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Failing after 1m1s
lint-mask-pr-atomicity / lint-mask-pr-atomicity (pull_request) Successful in 1m13s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m11s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 3s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 56s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m29s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 33s
gate-check-v3 / gate-check (pull_request) Successful in 3s
qa-review / approved (pull_request) Failing after 4s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m26s
security-review / approved (pull_request) Failing after 5s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 5s
sop-tier-check / tier-check (pull_request) Successful in 6s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 15s
E2E Chat / E2E Chat (pull_request) Successful in 15s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 10s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 5m23s
Harness Replays / Harness Replays (pull_request) Successful in 5s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m18s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m27s
CI / Platform (Go) (pull_request) Successful in 6m4s
CI / all-required (pull_request) Successful in 9m54s
audit-force-merge / audit (pull_request) Successful in 8s
Corrected-model credential fix (CTO-confirmed). `global_secrets` is the
TENANT's own secret store (shared across that tenant's workspaces), NOT the
platform's. The platform's own LLM credential is the CP proxy usage token,
injected separately on the platform_managed path; it is never stored in a
tenant's global_secrets.

The internal#711 provider-aware strip rested on the inverted premise that a
global-scope LLM credential was "the platform's own". On the byok/disabled
branch it stripped the tenant's OWN oauth when that oauth lived at global
scope, leaving the workspace credential-less -> MISSING_BYOK_CREDENTIAL ->
dead (Reno Stars Marketing/SEO byok agents, live-confirmed 2026-05-28).

Changes:
- workspace_provision.go: remove the stripGlobalOriginLLMCreds call on the
  byok/disabled branch; delete the now-dead function; drop the unused
  globalKeys parameter from applyPlatformManagedLLMEnv.
- secrets.go: remove the symmetric byok strip on the remote-pull path
  (GET /workspaces/:id/secrets/values) + its now-unused globalKeys tracking;
  the bundle is the tenant's merged secrets served verbatim.
- platform_managed path UNCHANGED: still strips direct oauth + forces the CP
  proxy usage token (metered). Only byok/disabled stop being stripped.
- Fail-closed UNCHANGED in spirit: a byok workspace with no LLM credential at
  ANY scope still aborts MISSING_BYOK_CREDENTIAL; the trigger narrowed from
  "no workspace-scoped cred" to "no cred at any scope".

Guard (co-mingling prevention at the write boundary):
- SetGlobal still rejects bypass-list keys for a platform_managed tenant
  (keeps a platform-shaped credential out of global_secrets going forward);
  added a regression test pinning it.

Tests: inverted the strip-asserting unit + e2e tests to the corrected model
(global-scope oauth survives, byok runs direct, no proxy); added genuinely-
credential-less byok fail-closed coverage; all three behavior changes are
mutation-load-bearing (re-adding either strip / dropping the SetGlobal guard
turns the respective test RED). build + vet + golangci-lint + the full
integration-tagged handlers suite green. The #1994 model-passthrough fix and
the MiniMax A2A e2e on this branch are untouched.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-28 19:45:55 +00:00
hongming-ceo-delegated 3269e93216 test(e2e): add real-completion + per-provider liveness + byok-routing A2A gate (#1994 follow-on)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 22s
CI / Python Lint & Test (pull_request) Successful in 16s
CI / Detect changes (pull_request) Successful in 38s
E2E API Smoke Test / detect-changes (pull_request) Successful in 22s
E2E Chat / detect-changes (pull_request) Successful in 13s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 10s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 7s
Harness Replays / detect-changes (pull_request) Successful in 5s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 12s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 3s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 34s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Failing after 1m7s
lint-mask-pr-atomicity / lint-mask-pr-atomicity (pull_request) Successful in 1m14s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m11s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 8s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m24s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m3s
gate-check-v3 / gate-check (pull_request) Successful in 5s
qa-review / approved (pull_request) Failing after 8s
security-review / approved (pull_request) Failing after 12s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 27s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 6s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m25s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 4m31s
CI / Canvas (Next.js) (pull_request) Successful in 6s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 17s
E2E Chat / E2E Chat (pull_request) Successful in 4s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 8s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m12s
Harness Replays / Harness Replays (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m16s
CI / Platform (Go) (pull_request) Successful in 6m10s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / all-required (pull_request) Successful in 13m28s
The A2A e2e historically asserted only response SHAPE (test_a2a_e2e.sh
checked '"kind":"text"' only). A broken agent returns its error AS a
text part -- {"kind":"text","text":"Agent error (Exception) ..."} --
which STILL matches the shape check, so it PASSED on a fully broken
agent. That is why the 2026-05-2x drained-key / byok-misroute failures
(agents-team PM + reno marketing erroring on every LLM call) sailed
through CI. "Channel returns text shape" is not "agent completed an LLM
round-trip."

Adds, ADDITIVELY (no existing assertion weakened or removed):

- tests/e2e/lib/completion_assert.sh -- reusable gates:
  * a2a_assert_real_completion: deterministic known-answer round-trip;
    asserts CONTAINS the expected token AND NOT an error-as-text marker
    (Agent error / Exception / error result / MISSING_BYOK_CREDENTIAL).
  * provider_liveness_matrix + offered_platform_models_for_runtime:
    per-offered-provider cheap (max_tokens:4) probe; the offered set is
    read from the providers.yaml SSOT (runtimes.<rt>.providers[platform]
    .models) -- not a hardcoded list -- so the matrix tracks the SSOT.
  * assert_byok_not_platform_proxy: #1994 regression guard -- a
    byok-resolving workspace must NOT resolve platform_managed (reads the
    same derived resolver GET /admin/workspaces/:id/llm-billing-mode the
    provision strip gate uses).

- tests/e2e/test_staging_full_saas.sh (the live-agent lane, MiniMax
  primary): new stanzas 8b (PINEAPPLE known-answer, the core gate),
  8c (byok-routing guard), 8d (SSOT-driven per-provider liveness matrix).

- tests/e2e/test_a2a_e2e.sh: added check_no_error_as_text on Echo + SEO
  replies so the brief's literal shape-only example now FAILS on an
  error-as-text payload.

- tests/e2e/test_completion_assert_unit.sh: offline fail-direction proof
  (16 cases) that the negative gates are load-bearing -- error-as-text
  MUST fail, platform_managed MUST trip the #1994 guard. Wired into
  ci.yml "Run E2E bash unit tests (no live infra)" (required, per-PR +
  main). e2e-staging-saas.yml paths filter extended to re-trigger the
  live lane on lib changes.

No #1994 fix code touched -- tests/e2e + workflow wiring only.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-28 18:58:13 +00:00
hongming 442f79a987 fix(workspace-server): provision-time billing derives from EFFECTIVE model, not raw payload.Model (molecule-core#1994)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Python Lint & Test (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 9s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 8s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 5s
Harness Replays / detect-changes (pull_request) Successful in 3s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 9s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 45s
gate-check-v3 / gate-check (pull_request) Successful in 5s
qa-review / approved (pull_request) Failing after 10s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 39s
security-review / approved (pull_request) Failing after 5s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 6s
CI / Canvas (Next.js) (pull_request) Successful in 3s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 6s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m7s
E2E Chat / E2E Chat (pull_request) Successful in 23s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 6s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m2s
sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2
sop-checklist / na-declarations (pull_request) N/A: (none)
Harness Replays / Harness Replays (pull_request) Successful in 3s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m37s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 5m10s
CI / Platform (Go) (pull_request) Successful in 5m54s
CI / all-required (pull_request) Successful in 7m19s
The provision-time LLM billing resolver diverged from the read endpoint:
a byok workspace (claude-code, opus) was provisioned platform_managed and
routed through the platform LLM proxy, billing the platform Anthropic key
for the customer own usage (Reno Stars Marketing 6b66de8d; live-confirmed
2026-05-28).

Root cause: applyPlatformManagedLLMEnv passed the RAW payload.Model to
ResolveLLMBillingModeDerived. On a re-provision (restart/resume/
auto-restart) the payload is rebuilt from the DB with Name+Tier+Runtime
only (workspace_restart.go:333/844/1017 via withStoredCompute, which
backfills Compute but NOT Model), so payload.Model == "". DeriveProvider
errors on an empty model, the resolver defaults closed to platform_managed
and bakes ANTHROPIC_BASE_URL=<platform proxy>. The read endpoint
(ResolveLLMBillingMode -> readWorkspaceDeriveInputs) reads MODEL from
workspace_secrets, derives opus -> anthropic-oauth -> byok. Divergence,
deterministic on every re-provision.

Fix: extract effectiveModelForBilling (the fallback chain
applyRuntimeModelEnv already used: explicit -> MOLECULE_MODEL -> MODEL)
into a shared helper and have the billing resolver consult it, so the
provision-path derive inputs match the read-path. The stored model already
lives in the merged envVars (loadWorkspaceSecrets) — no new DB query. The
byok branch (no proxy override; strip only global-origin platform creds;
fail-closed on missing own cred, internal#711) is preserved unchanged;
genuinely-platform and no-model workspaces still default platform_managed
(CTO: default stays platform).

Tests (mutation-load-bearing): re-provision-uses-stored-model byok repro,
read/provision parity guard, default-preservation, and the #711 global-
only-oauth fail-closed guard. Reverting the envVars fallback turns the
repro + parity + #711 tests RED; default-preservation stays GREEN.

BEHAVIOR-AFFECTING (provisioning hot path) — needs CTO merge-go.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-28 18:45:58 +00:00
hongming 03aa69f46f Merge pull request 'P3 internal#718: canvas consumes registry-served /templates, retire hardcoded provider vocab #4/#5 (PR-B; NOT merged)' (#1978) from feat/internal-718-p3b-canvas-consume-registry into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
publish-canvas-image / Build & push canvas image (push) Successful in 3m11s
publish-workspace-server-image / build-and-push (push) Successful in 6m19s
Block internal-flavored paths / Block forbidden paths (push) Successful in 11s
CI / Python Lint & Test (push) Successful in 11s
CI / Detect changes (push) Successful in 12s
E2E API Smoke Test / detect-changes (push) Successful in 12s
Handlers Postgres Integration / detect-changes (push) Successful in 7s
Harness Replays / detect-changes (push) Successful in 10s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 11s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 9s
CI / Platform (Go) (push) Successful in 4s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 3s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Failing after 11m15s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m55s
Harness Replays / Harness Replays (push) Successful in 5s
CI / Canvas (Next.js) (push) Successful in 7m18s
CI / all-required (push) Successful in 21m18s
CI / Canvas Deploy Reminder (push) Successful in 7s
publish-workspace-server-image / Production auto-deploy (push) Successful in 46m7s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 30s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Successful in 5m51s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m3s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Has been skipped
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m11s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 6s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 5m53s
E2E Chat / detect-changes (push) Successful in 7s
E2E Chat / E2E Chat (push) Successful in 4m19s
E2E Legacy Advisory / Legacy local-platform E2E (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
Railway pin audit (drift detection) / Audit Railway env vars for drift-prone pins (push) Compensated by status-reaper (workflow has no push: trigger; Gitea 1.22.6 hardcoded-suffix bug — see .gitea/scripts/status-reaper.py)
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m24s
gate-check-v3 / gate-check (push) Successful in 34s
main-red-watchdog / watchdog (push) Successful in 2m16s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 18s
ci-required-drift / drift (push) Successful in 1m3s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 7s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 4m16s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 3s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 6m36s
2026-05-28 05:59:07 +00:00
hongming-personal 8546502ab8 test(canvas): make registryBilling test discriminate registry-vs-hardcoded billing precedence (#1978 review)
CI / Canvas Deploy Reminder (pull_request) Has been cancelled
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
CI / Platform (Go) (pull_request) Blocked by required conditions
CI / Canvas (Next.js) (pull_request) Blocked by required conditions
CI / Shellcheck (E2E scripts) (pull_request) Blocked by required conditions
CI / all-required (pull_request) Waiting to run
E2E API Smoke Test / E2E API Smoke Test (pull_request) Blocked by required conditions
E2E Chat / E2E Chat (pull_request) Blocked by required conditions
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Blocked by required conditions
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Blocked by required conditions
Harness Replays / Harness Replays (pull_request) Blocked by required conditions
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
qa-review / approved (pull_request) Failing after 9s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 8s
CI / Python Lint & Test (pull_request) Successful in 4s
E2E Chat / detect-changes (pull_request) Successful in 6s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Harness Replays / detect-changes (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 12s
gate-check-v3 / gate-check (pull_request) Successful in 11s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 33s
security-review / approved (pull_request) Failing after 7s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 5s
sop-tier-check / tier-check (pull_request) Successful in 11s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m14s
audit-force-merge / audit (pull_request) Successful in 11s
agent-reviewer #7790 (blocking) found that ConfigTab.registryBilling.test.tsx
did not actually pin retire-list #5's core claim — both existing assertions
("platform"→platform_managed, "anthropic-oauth"→byok) return the SAME value
under both the registry-authoritative impl and a regression to the old
hardcoded billingModeForProvider rule, so the test was tautological and a
regression would still pass. The misleading comment on the anthropic-oauth
case claimed it was "a case the hardcoded rule gets WRONG" but the hardcoded
rule actually agrees there too.

This commit adds a genuine disagreement case: a registry provider
"managed-federated" whose registry-served billing_mode is "platform_managed"
even though its name is not "" / "platform" (so the legacy
billingModeForProvider rule would return "byok"). The new test asserts the
two rules disagree on this input (sanity) and then asserts
billingModeForSelectedProvider returns the REGISTRY value
("platform_managed"), which is only reachable by honoring the catalog.

Load-bearing proof: with the registry-first impl, the new test PASSES; when
billingModeForSelectedProvider is temporarily forced to fall through to the
hardcoded rule, the new test (and only the new test) FAILS with
expected 'platform_managed' / received 'byok' — proving it pins the
registry-wins contract.

Also fixes the misleading "hardcoded rule gets WRONG" comment on the
anthropic-oauth case (explicitly annotates it as non-discriminating and
points to the new disagreement case as the registry-WINS proof).

Implementation (billingModeForSelectedProvider) untouched — confirmed
byte-identical to PR #1978 HEAD (f2d7f1da).

Verification:
  - targeted: 5 passed (was 4 — adds the discriminating case)
  - regressed-impl: only the new test fails, others pass (= they are
    non-discriminating as the review found)
  - full canvas vitest: 223 files / 3381 passed | 1 skipped (3382) — +1
    vs the 3380/1 baseline
  - tsc: 0 new errors (touched file clean; pre-existing 223 baseline
    unchanged with my diff stashed)
  - eslint on touched file: 0

Refs: #1978, review #7790, internal#718 P3 retire-list #5.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 05:06:25 +00:00
hongming-personal f5c2882acb feat(canvas): P3 internal#718 — consume registry-served /templates list, retire hardcoded provider vocab (#4/#5)
P3 item 2. The canvas Provider/Model selector + Config-tab billing-mode now
consume the registry-served GET /templates fields (registry_backed /
registry_providers / registry_models from PR-A) instead of re-deriving provider
knowledge client-side. Retires the hardcoded vocabularies as the PRIMARY path:

- ProviderModelSelector (#4): new buildProviderCatalogFromRegistry(providers,
  models) builds the dropdown catalog from the registry payload — provider
  label = registry display_name, bucket = DERIVED provider, billing + auth_env
  from the registry — instead of inferVendor / VENDOR_LABELS /
  BARE_VENDOR_PATTERNS. The selector takes an optional pre-built `catalog`
  prop and uses it verbatim when supplied. inferVendor/buildProviderCatalog
  remain ONLY as the fallback for non-registry runtimes / older backends.
- ConfigTab (#5): when the selected runtime is registry-backed, the provider
  catalog + selector models come from registry_providers/registry_models, and
  billingModeForSelectedProvider(provider, catalog) reads the DERIVED provider's
  billing_mode off the registry catalog. The hardcoded billingModeForProvider
  ('' | 'platform' → platform_managed else byok) stays as the fallback only.
  So the billing-mode the UI shows/sends reflects the DERIVED provider
  (folds in the closed #1931's canvas intent).

Federation/back-compat preserved: a non-registry runtime (external/mock/kimi/
future third-party) or an older backend that doesn't serve the registry fields
yields registry_backed=false → the canvas keeps the template-served models +
its heuristic, unchanged. NO hard-reject (the canvas just can't render an
option the registry didn't serve for registry-backed runtimes).

Out of scope (per brief): the manifest runtime allowlist
(SUPPORTED_RUNTIME_VALUES / FALLBACK_RUNTIME_OPTIONS) is NOT a provider
vocabulary and is untouched; PUT /workspaces/:id/provider is NOT retired (that
CTO #3 follow-through is a later phase).

Stacked on PR-A (workspace-server registry-served /templates); re-target to
main after PR-A merges.

TDD: ProviderModelSelector.registry.test.tsx (catalog bucketed by derived
provider, labelled from display_name, carries billing_mode + auth_env, no empty
buckets), ConfigTab.registryBilling.test.tsx (billing reads registry catalog;
falls back to the legacy rule with no catalog / unknown provider). Full canvas
suite green (3380 passed / 1 skipped), tsc clean for touched files, eslint 0.

internal#718 P3 — not merged; CTO merge-go after Five-Axis (UI-affecting).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 05:06:25 +00:00
hongming 3dd7108cb4 Merge pull request 'P4 closure follow-up internal#718: retire LLM_PROVIDER + PUT/GET /provider + deriveProviderFromModelSlug (core; BEHAVIOR-AFFECTING; NOT MERGED)' (#1984) from feat/internal-718-p4-followup-llm-provider-removal into main
ci-arm64-advisory / fast-checks (push) Waiting to run
CI / Canvas Deploy Reminder (push) Blocked by required conditions
Handlers Postgres Integration / Handlers Postgres Integration (push) Blocked by required conditions
Harness Replays / Harness Replays (push) Blocked by required conditions
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 11s
publish-canvas-image / Build & push canvas image (push) Successful in 1m41s
publish-workspace-server-image / build-and-push (push) Successful in 3m26s
Block internal-flavored paths / Block forbidden paths (push) Successful in 7s
CI / Python Lint & Test (push) Successful in 16s
CI / Detect changes (push) Successful in 29s
E2E API Smoke Test / detect-changes (push) Successful in 18s
E2E Chat / detect-changes (push) Successful in 24s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 30s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m26s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 34s
Handlers Postgres Integration / detect-changes (push) Successful in 4s
Harness Replays / detect-changes (push) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 8s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 48s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m24s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Successful in 5m25s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Successful in 7m51s
SECRET_PATTERNS drift lint / Detect SECRET_PATTERNS drift (push) Successful in 36s
main-red-watchdog / watchdog (push) Successful in 49s
CI / Shellcheck (E2E scripts) (push) Successful in 11s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m9s
gate-check-v3 / gate-check (push) Successful in 58s
CI / Platform (Go) (push) Successful in 5m39s
CI / Canvas (Next.js) (push) Successful in 6m40s
E2E Chat / E2E Chat (push) Successful in 3m59s
CI / all-required (push) Successful in 26m41s
publish-workspace-server-image / Production auto-deploy (push) Successful in 54m26s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 6m33s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 44s
ci-required-drift / drift (push) Successful in 1m48s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 10s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 9s
2026-05-28 04:46:27 +00:00
hongming add37f35b0 Merge pull request 'P4 PR-2 internal#718: flip only-registered (runtime, model) gate from WARN to HARD-REJECT 422 (BEHAVIOR-AFFECTING)' (#1981) from feat/internal-718-p4-pr2-hard-reject-unregistered into main
ci-arm64-advisory / fast-checks (push) Waiting to run
CI / Canvas Deploy Reminder (push) Blocked by required conditions
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Blocked by required conditions
Handlers Postgres Integration / Handlers Postgres Integration (push) Blocked by required conditions
Harness Replays / Harness Replays (push) Blocked by required conditions
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
publish-workspace-server-image / build-and-push (push) Successful in 3m17s
Block internal-flavored paths / Block forbidden paths (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 6s
CI / Detect changes (push) Successful in 10s
E2E API Smoke Test / detect-changes (push) Successful in 6s
E2E Chat / detect-changes (push) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 53s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 6s
Handlers Postgres Integration / detect-changes (push) Successful in 5s
Harness Replays / detect-changes (push) Successful in 3s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 10s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 30s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Successful in 5m45s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 4s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m4s
CI / Canvas (Next.js) (push) Successful in 5s
CI / Shellcheck (E2E scripts) (push) Successful in 6s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m16s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 19s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 5m13s
CI / Platform (Go) (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
CI / all-required (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
publish-workspace-server-image / Production auto-deploy (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
E2E Chat / E2E Chat (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 7m11s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 4s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 9s
2026-05-28 04:19:18 +00:00
claude-ceo-assistant 73871e7ade internal#718 P4 closure: retire LLM_PROVIDER + PUT/GET /provider + deriveProviderFromModelSlug
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
CI / Python Lint & Test (pull_request) Successful in 10s
CI / Detect changes (pull_request) Successful in 15s
E2E API Smoke Test / detect-changes (pull_request) Successful in 19s
E2E Chat / detect-changes (pull_request) Successful in 18s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Has been skipped
Check migration collisions / Migration version collision check (pull_request) Successful in 39s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 15s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 56s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 47s
Harness Replays / detect-changes (pull_request) Successful in 6s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 6s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 58s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m5s
qa-review / approved (pull_request) Failing after 4s
security-review / approved (pull_request) Failing after 4s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m16s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 4s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 5m53s
CI / Platform (Go) (pull_request) Successful in 6m15s
CI / Canvas (Next.js) (pull_request) Successful in 6m46s
CI / all-required (pull_request) Successful in 11m36s
E2E Chat / E2E Chat (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 23s
Harness Replays / Harness Replays (pull_request) Successful in 7s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m47s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m50s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
gate-check-v3 / gate-check (pull_request) Successful in 9s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 14s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 8s
audit-force-merge / audit (pull_request) Successful in 10s
The provider-SSOT closure: with the registry-derived provider model
(P0-P4) flowing through every decision point — proxy (P1), billing
(P2-B), templates (P3 PR-A/B), provisioner (P3 PR-C) — the
LLM_PROVIDER workspace_secret has no reader left on core. This PR
retires:

  - WorkspaceHandler.Create's setProviderSecret writes (the
    payload.LLMProvider and deriveProviderFromModelSlug-derived
    write paths). payload.LLMProvider is preserved on the request
    struct for backwards-compat with older canvases that still send
    it; the value is intentionally ignored. Coverage moved to
    TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL (asserts only
    the MODEL secret is written, even on a slug-prefixed model that
    pre-P4 would have triggered an LLM_PROVIDER write).

  - SecretsHandler.SetProvider / GetProvider gin handlers + the
    setProviderSecret helper. Both route registrations now point at
    handlers.ProviderEndpointGone, which returns 410 Gone with a
    structured PROVIDER_ENDPOINT_RETIRED body so older canvases that
    still call PUT /provider on Save fail loud rather than silently
    writing into a vanished row. Coverage: TestPutProvider_410Gone +
    TestGetProvider_410Gone + TestProviderEndpointGone_BodyShape.

  - deriveProviderFromModelSlug (retire-list #3) — the hand-rolled
    35-arm slug-prefix→provider switch in workspace_provision.go.
    Its only caller was Create's setProviderSecret write; the
    derivation now flows through providers.Manifest.DeriveProvider
    against the registry SSOT at every decision point. The drift
    test (derive_provider_drift_test.go) that pinned parity with the
    hermes template's derive-provider.sh is deleted with it. The
    shell script remains the in-container fallback; its byte-identity
    with the registry view of hermes is a P4 follow-up gated on
    registry data growth (see codegen of hermes config.yaml from the
    registry).

  - loadWorkspaceSecrets LLM_PROVIDER drop (defence-in-depth):
    any straggler workspace_secrets or global_secrets row keyed
    LLM_PROVIDER is filtered out before envVars is built, so a
    rolling deploy (new code, old DB) cannot re-emit the retired key
    into the CP-side provisioner env.

  - Canvas: ConfigTab.tsx no longer GETs or PUTs
    /workspaces/:id/provider, and the provider→billing-mode linkage
    (internal#703 Gap 2) is retired together — P2-B moved the
    platform-vs-byok decision to ResolveLLMBillingModeDerived, which
    derives the provider from (runtime, model). The provider
    dropdown still renders for display so users can preview the
    derived value locally. The two retired vitest suites
    (ConfigTab.provider, ConfigTab.billingMode) are replaced with
    documentation files pointing at the new coverage.

  - Migration 20260528000000_drop_llm_provider_workspace_secret
    removes any straggler rows from workspace_secrets. Idempotent:
    a fresh tenant with zero LLM_PROVIDER rows produces a 0-row
    delete. The .down.sql is a documented no-op (the rows cannot
    be reconstituted from a soft-delete, and the writers are gone).

Behavior delta — explicitly tested:

  - Registered (runtime, model) workspace → 201, provider derived,
    no LLM_PROVIDER stored. UNCHANGED for the runtime-visible
    `provider:` in /configs/config.yaml (CP-side commit derives it
    from the same registry).
  - PUT /workspaces/:id/provider → 410 Gone {code:
    PROVIDER_ENDPOINT_RETIRED, error, issue: internal#718}. Was 200
    with a workspace_secrets write.
  - GET /workspaces/:id/provider → 410 Gone. Was 200 + {provider,
    source}.
  - WorkspaceHandler.Create with a slug-prefixed model (e.g.
    minimax/MiniMax-M2.7) + an explicit llm_provider in the payload
    → only the MODEL workspace_secret is written. Pre-P4 both rows
    were written.
  - Existing workspace with an LLM_PROVIDER row → migration drops
    it at next deploy; loadWorkspaceSecrets filters it defensively
    in the interim.

Five-Axis review notes:

  - Correctness: the four readers of stored LLM_PROVIDER (core
    GetProvider, core loadWorkspaceSecrets, CP resolveModelAndProvider,
    CP ValidateProviderEnv) are all migrated in this PR + the
    CP-side commit. Audit query trail in the brief; the empirical
    finding is that no fifth reader exists (verified across both
    repos via grep of LLM_PROVIDER, setProviderSecret, SetProvider,
    GetProvider, llm_provider).
  - Tests: TDD red→green for the 410 Gone shape; SQL-mock for the
    "no LLM_PROVIDER write on Create" contract; existing P2-B
    billing tests confirm the derived-provider billing path is
    untouched (the regression risk this PR could have created).
  - Backward-compat: payload.LLMProvider preserved on the
    CreateWorkspacePayload struct; the canvas still sends it; the
    server ignores it. Older canvases that PUT /provider get a loud
    410 with a recognizable code so they can stop calling.
  - Rollback: revert the migration + revert this commit; the
    LLM_PROVIDER workspace_secret writers stay gone (the PUT route
    has no handler symbol to wire back without a separate revert).
  - Observability: provider derivation is logged in
    applyPlatformManagedLLMEnv (existing P2-B emission); no new
    structured-event surface added — the retirement is silent at
    the request boundary and the 410 Gone surface is the
    operator-facing signal.

cp#362 anthropic passthrough untouched. P1 proxy ResolveUpstream
untouched. P2-B billing derives via DeriveProvider — still reads
the same derivation, never the stored LLM_PROVIDER. P3 PR-A
templates-from-registry + P3 PR-C ValidateProviderEnv-from-registry
untouched. P4 PR-2 hard-reject 422 untouched.

NOT MERGED.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 21:12:55 -07:00
hongming 930f8753a9 Merge pull request 'P4 PR-1 internal#718 (sync): re-sync canonical providers.yaml with the colon-vocab reconcile (no behavior change)' (#1980) from feat/internal-718-p4-pr1-reconcile-colon-vocab-sync into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
Block internal-flavored paths / Block forbidden paths (push) Successful in 15s
CI / Detect changes (push) Successful in 16s
CI / Python Lint & Test (push) Successful in 6s
E2E API Smoke Test / detect-changes (push) Successful in 10s
E2E Chat / detect-changes (push) Successful in 10s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 8s
Handlers Postgres Integration / detect-changes (push) Successful in 8s
Harness Replays / detect-changes (push) Successful in 6s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 6s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 5s
publish-workspace-server-image / build-and-push (push) Successful in 8m29s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 49s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 6s
CI / Canvas (Next.js) (push) Successful in 3s
CI / Shellcheck (E2E scripts) (push) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 8s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m9s
Harness Replays / Harness Replays (push) Successful in 12s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m38s
E2E Chat / E2E Chat (push) Successful in 4m42s
CI / Platform (Go) (push) Successful in 6m6s
CI / Canvas Deploy Reminder (push) Successful in 2s
CI / all-required (push) Successful in 22m3s
publish-workspace-server-image / Production auto-deploy (push) Successful in 15m49s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m31s
main-red-watchdog / watchdog (push) Successful in 30s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 5m23s
gate-check-v3 / gate-check (push) Successful in 1m13s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 6s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 15s
ci-required-drift / drift (push) Successful in 1m26s
2026-05-28 03:41:48 +00:00
claude-ceo-assistant eacb8183c3 P4 PR-2 internal#718: flip only-registered (runtime, model) gate from WARN to HARD-REJECT 422 (BEHAVIOR-AFFECTING)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Detect changes (pull_request) Successful in 10s
CI / Python Lint & Test (pull_request) Successful in 4s
E2E API Smoke Test / detect-changes (pull_request) Successful in 6s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 8s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 50s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m18s
gate-check-v3 / gate-check (pull_request) Successful in 7s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 28s
qa-review / approved (pull_request) Successful in 9s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 5s
security-review / approved (pull_request) Failing after 6s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
CI / Canvas (Next.js) (pull_request) Successful in 6s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 4s
E2E Chat / E2E Chat (pull_request) Successful in 5s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 14s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m20s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m51s
Harness Replays / Harness Replays (pull_request) Successful in 6s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m30s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Platform (Go) (pull_request) Successful in 6m10s
CI / all-required (pull_request) Successful in 12m14s
audit-force-merge / audit (pull_request) Successful in 5s
WorkspaceHandler.Create now returns 422 UNREGISTERED_MODEL_FOR_RUNTIME when the provider registry knows the runtime but the (runtime, model) pair is not in its native model set. Was the P2-B WARN-mode signal (X-Molecule-Model-Unregistered header + log; create proceeds); now a hard rejection at the boundary with no DB rows touched.

Behavior delta (under test):
  * Workspace with a REGISTERED (runtime, model) → 201, unchanged.
  * Workspace with an UNREGISTERED (runtime, model) → 422 with body
    {code:UNREGISTERED_MODEL_FOR_RUNTIME, error, runtime, model}, no DB writes (mock ExpectationsWereMet asserts zero unexpected DB calls).
  * Workspace with the legacy colon-form anthropic:claude-opus-4-7 for runtime=claude-code → 201 (P4 PR-1 reconciled the colon-vocab into the registry, making this a first-class registered model alongside the slash form).
  * Workspace with runtime NOT in the registry (langgraph/external/kimi/mock/federated) → unchanged (fails OPEN — federation-ready, the registry can not speak to non-first-party runtimes).
  * External workspaces (external=true or external-like runtime) → unchanged (URL is the contract, not the model).

Why P4 vs P2-B: P2-B kept WARN-mode because the legacy colon-namespaced BYOK vocabulary (anthropic:claude-opus-4-7 etc.) was live across the create/import/template corpus and not yet in the registry. P4 PR-1 reconciled that vocab into the per-runtime native sets (each runtime now lists bare + slash + colon forms for the BYOK ids in the live corpus). With the reconcile landed, an unregistered pair is a real misconfiguration and the gate flips loud — the codex anthropic:claude-opus-4-7 wedge class (the MODEL_REQUIRED gate targets the same failure mode) now fails AT THE BOUNDARY instead of provisioning a workspace that will wedge at adapter init.

Test surface (workspace_test.go):
  * TestWorkspaceCreate_718_P4_UnregisteredModelHardReject422 (NEW) — explicit 422 + body code + no DB writes
  * TestWorkspaceCreate_718_P4_RegisteredModelProceeds (renamed from _RegisteredModelNoWarnHeader) — 201 + no legacy WARN header
  * TestWorkspaceCreate_718_P4_LegacyColonVocabAccepted (NEW) — anthropic:claude-opus-4-7 on claude-code proceeds (the central regression guard for the PR-1 reconcile + PR-2 flip combo)
  * TestWorkspaceCreate_718_NonRegistryRuntimeFailsOpen — unchanged (federation path)

Fixture updates for the flip (tests that previously used an unregistered model as a fixture for OTHER gate paths; updated to a valid model so those gates can actually fire):
  * TestWorkspaceCreate_WithInvalidCompute_ReturnsBadRequest — gpt-4 (no runtime owns it) → claude-opus-4-7 (so the compute-validation 400 path tests what it should)
  * TestWorkspaceCreate_TemplateDefaultsMissingRuntimeAndModel — hermes/nousresearch/hermes-4-70b → hermes/moonshot/kimi-k2.6 (hermes native set per the CTO matrix)
  * TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel — hermes/anthropic:claude-sonnet-4-5 → hermes/moonshot/kimi-k2.5
  * TestWorkspaceCreate_CallerModelOverridesTemplateDefault — hermes override minimax/MiniMax-M2.7 → moonshot/kimi-k2.5 (still tests the caller-overrides-template-default mechanic, just with a hermes-valid pair)

Phase-1 falsification + Phase-2 design were established in PR-1. Phase-3 TDD: each new behavior assertion mapped to a discriminating test (422 vs 201 vs unchanged WARN-header absence). Phase-4 Five-Axis to follow in PR review.

NOT regressed (verified via -short + -tags=integration -short for handlers + providers):
  * cp#362 anthropic passthrough (proxy layer; unaffected).
  * P1 proxy ResolveUpstream (registry resolution by namespace token; unaffected).
  * P2-B billing-derive (DeriveProvider semantics unchanged by the reconcile).
  * P3 templates-from-registry (GET /templates still serves ModelsForRuntime; PR-1 enlarges the set, this PR rejects calls outside it).

Stacked on feat/internal-718-p4-pr1-reconcile-colon-vocab-sync (PR-1 must merge first; this PR's tests would 422 the legacy colon vocab otherwise).

Refs internal#718.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 03:21:39 +00:00
claude-ceo-assistant 7bc52017ed P4 PR-1 sync internal#718: re-sync canonical providers.yaml from molecule-controlplane (colon-vocab reconcile)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 12s
CI / Python Lint & Test (pull_request) Successful in 20s
CI / Detect changes (pull_request) Successful in 21s
E2E API Smoke Test / detect-changes (pull_request) Successful in 17s
E2E Chat / detect-changes (pull_request) Successful in 17s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 18s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 11s
Harness Replays / detect-changes (pull_request) Successful in 9s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 8s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 8s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (pull_request) Failing after 7s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 7s
qa-review / approved (pull_request) Failing after 10s
gate-check-v3 / gate-check (pull_request) Successful in 10s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 11s
sop-checklist / review-refire (pull_request) Has been skipped
security-review / approved (pull_request) Failing after 11s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 31s
sop-tier-check / tier-check (pull_request) Successful in 6s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m9s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 6s
E2E Chat / E2E Chat (pull_request) Successful in 7s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 3s
Harness Replays / Harness Replays (pull_request) Successful in 4s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m55s
CI / Platform (Go) (pull_request) Successful in 5m7s
CI / all-required (pull_request) Successful in 11m56s
audit-force-merge / audit (pull_request) Successful in 25s
Mirrors the canonical change in molecule-controlplane PR feat/internal-718-p4-pr1-reconcile-colon-vocab:
adds the legacy colon-namespaced BYOK model ids (anthropic:claude-*, moonshot:kimi-k2.*, minimax:MiniMax-M2*) to each runtime native set so DeriveProvider / Manifest.ModelsForRuntime returns true for every legitimate model in the live workspace-create corpus (canvas/ConfigTab default + ~44 test files + openclaw template precedent).

Per the sync_canonical_test.go header procedure:
  1. Copied molecule-controlplane/internal/providers/providers.yaml verbatim.
  2. Regenerated internal/providers/gen/registry_gen.go via go run ./cmd/gen-providers.
  3. Bumped canonicalProvidersYAMLSHA256 to the new canonical sha (73e8003062edaa4ce75bfb324be615b6e2b380f07487e3af4dc16cb644dc12bc).
  4. Synced runtimes_test.go to match CP's expanded claude-code expectation set.

ZERO behavior change in core: the WARN-mode validateRegisteredModelForRuntime gate (workspace.go:451-456) just goes silent for the now-registered colon-form models; the X-Molecule-Model-Unregistered response header stops being emitted for legitimate colon-form workspaces. No new rejection path; no proxy/billing-derive change.

Stacked atop molecule-controlplane PR-1 — merge order: CP PR-1 → core PR-1 sync. The cross-repo sync-providers-yaml CI gate stays green once the canonical lands.

Refs internal#718.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 03:16:05 +00:00
hongming 753e0f569d Merge pull request 'P3 internal#718: serve GET /templates selectable provider/model list FROM the registry (PR-A backend; NOT merged)' (#1977) from feat/internal-718-p3a-templates-from-registry into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Block internal-flavored paths / Block forbidden paths (push) Successful in 6s
CI / Python Lint & Test (push) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
CI / Detect changes (push) Successful in 9s
E2E API Smoke Test / detect-changes (push) Successful in 7s
E2E Chat / detect-changes (push) Successful in 6s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 11s
Handlers Postgres Integration / detect-changes (push) Successful in 16s
Harness Replays / detect-changes (push) Successful in 12s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 8s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 9s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 51s
publish-workspace-server-image / build-and-push (push) Successful in 3m10s
CI / Canvas (Next.js) (push) Successful in 12s
CI / Shellcheck (E2E scripts) (push) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m26s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 9s
Harness Replays / Harness Replays (push) Successful in 5s
main-red-watchdog / watchdog (push) Successful in 55s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m5s
E2E Chat / E2E Chat (push) Successful in 5m4s
CI / Canvas Deploy Reminder (push) Successful in 2s
gate-check-v3 / gate-check (push) Successful in 39s
CI / Platform (Go) (push) Successful in 6m23s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 11s
CI / all-required (push) Successful in 13m4s
publish-workspace-server-image / Production auto-deploy (push) Successful in 11m54s
ci-required-drift / drift (push) Successful in 1m16s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 15s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 11s
lint-bp-context-emit-match / lint-bp-context-emit-match (push) Successful in 1m26s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m17s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 7m34s
2026-05-28 03:02:47 +00:00
hongming-personal 2d0d070040 feat(workspace-server): P3 internal#718 — serve GET /templates selectable provider/model list from the registry
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 10s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 6s
CI / Detect changes (pull_request) Successful in 12s
CI / Python Lint & Test (pull_request) Successful in 10s
E2E API Smoke Test / detect-changes (pull_request) Successful in 14s
E2E Chat / detect-changes (pull_request) Successful in 13s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 10s
Harness Replays / detect-changes (pull_request) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 10s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 31s
qa-review / approved (pull_request) Successful in 11s
security-review / approved (pull_request) Failing after 7s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m11s
gate-check-v3 / gate-check (pull_request) Successful in 27s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 12s
sop-tier-check / tier-check (pull_request) Successful in 15s
CI / Canvas (Next.js) (pull_request) Successful in 11s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 5s
E2E Chat / E2E Chat (pull_request) Successful in 4s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 7s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m22s
Harness Replays / Harness Replays (pull_request) Successful in 6s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m1s
CI / Platform (Go) (pull_request) Successful in 5m50s
CI / all-required (pull_request) Successful in 10m13s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 8s
P3 item 1 (retire-list #1 surface). GET /templates (templates.go List) now
ANNOTATES each registry-known runtime's template with an authoritative
registry-served selectable list, sourced from the provider registry
(workspace-server/internal/providers, the P2-A synced SSOT) instead of the
template's hand-authored config.yaml providers:/runtime_config.models block:

- registry_backed: true when the runtime is in the registry runtimes: block.
- registry_providers: the runtime's NATIVE provider set (ProvidersForRuntime),
  each with display_name + auth_env + billing_mode (platform_managed if the
  registry IsPlatform predicate holds, else byok) — the SSOT the canvas
  Provider dropdown consumes instead of its hardcoded VENDOR_LABELS map.
- registry_models: the runtime's NATIVE model ids (ModelsForRuntime), each
  annotated with its DERIVED provider (DeriveProvider) + the billing_mode that
  provider implies — so the canvas shows the billing source of the DERIVED
  provider (folds in #1931 intent) and can render no model the registry did
  not list for the runtime ("only registered selectable").

Additive + federation-ready + fail-OPEN: the existing template-served
Models/Providers/ProviderRegistry fields are UNCHANGED, so non-registry
runtimes (external/mock/kimi/future third-party) and older canvases keep
working — a runtime absent from the registry yields registry_backed=false and
no synthesized block. NO hard-reject: templates whose model isn't
registry-derivable are still served (WARN-level only; legacy-vocab reconcile
is P4).

Reuses the package-level providerRegistry() accessor + LLMBillingModePlatformManaged/
LLMBillingModeBYOK constants from llm_billing_mode.go (P2-B / #1972, now on
main) — one accessor + one constant set for the package; both the billing
derivation and this templates projection wrap the same providers.LoadManifest()
SSOT and the same wire strings.

Proxy ResolveUpstream / billing DeriveProvider untouched (P1/P2). Templates'
own config.yaml providers: codegen untouched (P4).

TDD: TestTemplatesList_RegistryServesSelectableModels (a template's bogus model
id never leaks into the registry-served list; native ids present),
TestTemplatesList_RegistryAnnotatesDerivedProviderAndBilling (derived
provider + platform_managed/byok per model; provider display_name/auth_env/
billing from the registry), TestTemplatesList_NonRegistryRuntimeFallsOpenToTemplate
(mock runtime: registry_backed=false, template fields untouched). All existing
TestTemplatesList_* stay green (template-served fields unchanged). Rebased onto
main after P2-B (#1972) landed; full handlers+providers suites green alongside it.

internal#718 P3 — not merged; CTO merge-go after Five-Axis (UI/API-affecting).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 19:21:04 -07:00
hongming 1e783ff6a2 Merge pull request 'provider-SSOT P2-B -> main: billing derives from provider (re-target #1971)' (#1972) from feat/internal-718-p2a-registry-codegen-distribution into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Block internal-flavored paths / Block forbidden paths (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 11s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
CI / Detect changes (push) Successful in 14s
E2E API Smoke Test / detect-changes (push) Successful in 13s
E2E Chat / detect-changes (push) Successful in 14s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 9s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 40s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 1m13s
Handlers Postgres Integration / detect-changes (push) Successful in 6s
Harness Replays / detect-changes (push) Successful in 2s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 3s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 39s
publish-workspace-server-image / build-and-push (push) Successful in 4m33s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Successful in 4m34s
CI / Canvas (Next.js) (push) Successful in 2s
E2E Staging External Runtime / E2E Staging External Runtime (push) Successful in 5m20s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Successful in 5m57s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 6s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m47s
CI / Platform (Go) (push) Successful in 5m19s
Harness Replays / Harness Replays (push) Successful in 15s
CI / Canvas Deploy Reminder (push) Successful in 2s
CI / all-required (push) Successful in 8m8s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m4s
E2E Chat / E2E Chat (push) Successful in 3m49s
publish-workspace-server-image / Production auto-deploy (push) Successful in 5m43s
ci-required-drift / drift (push) Successful in 1m15s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 11s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 16s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 12s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 4s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m14s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 5m24s
2026-05-28 02:09:09 +00:00
hongming 924dfa5598 test(workspace-server): remove unused wantWhy field in model_registry_validation_test (golangci-lint unused) — internal#718 P2-B
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
CI / Python Lint & Test (pull_request) Successful in 6s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 6s
CI / Detect changes (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
E2E API Smoke Test / detect-changes (pull_request) Successful in 8s
E2E Chat / detect-changes (pull_request) Successful in 7s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 7s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 38s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 50s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 3s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m7s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 40s
gate-check-v3 / gate-check (pull_request) Successful in 3s
qa-review / approved (pull_request) Successful in 4s
security-review / approved (pull_request) Failing after 4s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 3s
CI / Canvas (Next.js) (pull_request) Successful in 5s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 3s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m24s
E2E Chat / E2E Chat (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 5s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m33s
Harness Replays / Harness Replays (pull_request) Successful in 2s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 6m37s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m48s
CI / Platform (Go) (pull_request) Successful in 5m9s
CI / all-required (pull_request) Successful in 9m1s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 7s
2026-05-28 01:39:27 +00:00
hongming 3ab690c273 Merge pull request 'P2-B internal#718: billing/credential derives from provider + only-registered validation (BEHAVIOR-AFFECTING; supersedes #1966)' (#1971) from feat/internal-718-p2b-billing-derives-from-provider into feat/internal-718-p2a-registry-codegen-distribution
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
CI / Canvas Deploy Reminder (pull_request) Blocked by required conditions
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Python Lint & Test (pull_request) Successful in 5s
E2E API Smoke Test / detect-changes (pull_request) Successful in 8s
CI / Detect changes (pull_request) Successful in 10s
E2E Chat / detect-changes (pull_request) Successful in 9s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 12s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 13s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 31s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 4s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 50s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 5s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 1m1s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m10s
gate-check-v3 / gate-check (pull_request) Successful in 9s
qa-review / approved (pull_request) Failing after 6s
security-review / approved (pull_request) Failing after 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 4s
CI / Canvas (Next.js) (pull_request) Successful in 8s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 3s
CI / Platform (Go) (pull_request) Failing after 2m5s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 3s
CI / all-required (pull_request) Failing after 4m41s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m51s
Harness Replays / Harness Replays (pull_request) Successful in 3s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 4m50s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m12s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m32s
2026-05-28 01:22:20 +00:00
hongming 866a71777f Merge pull request 'P2-A internal#718: bring provider registry to molecule-core via codegen + verify-CI (NO behavior change)' (#1970) from feat/internal-718-p2a-registry-codegen-distribution into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 13s
Block internal-flavored paths / Block forbidden paths (push) Successful in 6s
CI / Python Lint & Test (push) Successful in 6s
CI / Detect changes (push) Successful in 8s
E2E API Smoke Test / detect-changes (push) Successful in 10s
E2E Chat / detect-changes (push) Successful in 18s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 18s
Harness Replays / detect-changes (push) Successful in 16s
Handlers Postgres Integration / detect-changes (push) Successful in 16s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (push) Successful in 12s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 10s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 11s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (push) Successful in 15s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 14s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (push) Successful in 7s
verify-providers-gen / Regenerate providers artifact and fail on drift (push) Successful in 32s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (push) Failing after 1m15s
CI / Canvas (Next.js) (push) Successful in 12s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (push) Successful in 1m28s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 13s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m12s
Harness Replays / Harness Replays (push) Successful in 6s
CI / Canvas Deploy Reminder (push) Successful in 3s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 16s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m41s
publish-workspace-server-image / build-and-push (push) Successful in 5m44s
E2E Chat / E2E Chat (push) Successful in 4m36s
ci-required-drift / drift (push) Successful in 1m6s
CI / Platform (Go) (push) Successful in 6m32s
CI / all-required (push) Successful in 8m48s
publish-workspace-server-image / Production auto-deploy (push) Successful in 5m12s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 7s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 3s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m5s
main-red-watchdog / watchdog (push) Successful in 2m2s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 5m22s
gate-check-v3 / gate-check (push) Successful in 27s
2026-05-28 01:10:25 +00:00
hongming-personal 11b0646b37 fix(ci): sync-providers-yaml gate fetch canonical via /raw not /contents
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Detect changes (pull_request) Successful in 7s
CI / Python Lint & Test (pull_request) Successful in 6s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 10s
E2E API Smoke Test / detect-changes (pull_request) Successful in 8s
E2E Chat / detect-changes (pull_request) Successful in 8s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 9s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 10s
Harness Replays / detect-changes (pull_request) Successful in 7s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 4s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 7s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Failing after 1m3s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 5s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Failing after 1m18s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m21s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m17s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (pull_request) Successful in 4s
gate-check-v3 / gate-check (pull_request) Failing after 10s
qa-review / approved (pull_request) Failing after 9s
sop-checklist / na-declarations (pull_request) N/A: (none)
security-review / approved (pull_request) Failing after 13s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / all-items-acked (pull_request) Successful in 14s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 31s
sop-tier-check / tier-check (pull_request) Successful in 7s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m22s
CI / Canvas (Next.js) (pull_request) Successful in 4s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 6s
E2E Chat / E2E Chat (pull_request) Successful in 8s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 9s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 14s
Harness Replays / Harness Replays (pull_request) Successful in 11s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m20s
CI / Platform (Go) (pull_request) Successful in 5m10s
CI / all-required (pull_request) Successful in 8m18s
audit-force-merge / audit (pull_request) Successful in 7s
The cross-repo drift gate fetched controlplane providers.yaml from the
Gitea /contents endpoint with Accept: application/vnd.gitea.raw. On this
Gitea (1.22.6) that header is NOT honored on /contents -- it returns the
JSON+base64 envelope ({"name":"providers.yaml","content":"<base64>"...},
~45.6 KB), not raw bytes. So diff -u compared JSON-vs-YAML and exited 1
(RED) on every run even when byte-identical, making the gate inert
(detected neither sync nor real drift).

Switch the fetch to the /raw endpoint, which returns the file bytes
directly (33319 B, sha256 48a66921...), byte-identical to core's synced
copy. diff now exits 0 on the in-sync state and goes RED on real drift.
Authorization: token header kept; soft-fail backstop and the hermetic
sha-pin in sync_canonical_test.go are untouched.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-28 00:55:08 +00:00
core-devops 3165b98cc8 fix(workspace-server): P2-B internal#718 — billing/credential decision DERIVES the provider; supersede #1966 stored-read; retire org rung; only-registered validation (BEHAVIOR-AFFECTING)
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 11s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 7s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 7s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m11s
gate-check-v3 / gate-check (pull_request) Successful in 11s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 43s
qa-review / approved (pull_request) Successful in 10s
security-review / approved (pull_request) Successful in 8s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 5s
sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2
sop-checklist / na-declarations (pull_request) N/A: (none)
audit-force-merge / audit (pull_request) Successful in 4s
Re-points the platform-vs-BYOK billing/credential decision to DERIVE the provider
from (runtime, model) via the registry SSOT, per the CTO directive (internal#718
comment, 2026-05-27): "the billing read must DERIVE the provider, not read a
stored LLM_PROVIDER", "remove LLM_PROVIDER entirely as a billing source", "retire
organizations.llm_billing_mode as a billing source".

## BEHAVIOR DELTA (this PR changes behavior — tested explicitly)
- platform-derived (or unset → platform default) → platform_managed → platform
  creds. UNCHANGED.
- non-platform-derived → byok → the already-merged #1963 strips platform
  scope:global LLM creds + FAIL-CLOSES if the workspace has no own cred. THIS IS
  THE INTENDED FIX (the Reno billing-leak class: Reno Stars SEO 352e3c2b /
  Marketing 6b66de8d ran on the platform's Anthropic credits because the never-
  written org rung always resolved platform_managed).
- unset model → platform default (CTO-confirmed).

## What changed
- `ResolveLLMBillingModeDerived(ctx, ws, runtime, model, authEnv)` — the new SSOT
  resolver: explicit `workspaces.llm_billing_mode` override (precedence 1, the
  only stored billing signal that survives — operator escape hatch) → else
  DeriveProvider + IsPlatform → else default-closed platform_managed.
- `ResolveLLMBillingMode(ctx, ws, orgMode)` legacy signature retained for callers
  without (runtime, model) (admin route, secrets remote-pull): reads the stored
  runtime + MODEL + auth-env names from DB and delegates to the derived resolver.
  `orgMode` is RETIRED/ignored; the org rung is gone.
- `applyPlatformManagedLLMEnv` calls the derived resolver directly (it has
  runtime + model + the workspace env) — no stored LLM_PROVIDER read. Feeds
  #1963's strip + fail-closed the correct DERIVED signal.
- SUPERSEDES core#1966: that PR made the billing read consult a stored
  LLM_PROVIDER first; this reworks the decision onto derive-from-provider. #1966
  should be closed in favor of this.
- Removed the now-dead org-default normalization (normalizeOrgDefault).
- ONLY-REGISTERED validation at create (model_registry_validation.go +
  WorkspaceHandler.Create): a (runtime, model) not in the registry's
  ModelsForRuntime for a REGISTRY-known runtime is flagged
  (X-Molecule-Model-Unregistered header + warning log). P2 = WARN mode (NOT hard
  422) because the legacy colon-namespaced model vocabulary ("anthropic:claude-
  opus-4-7") is still live across the create/import/template corpus and is not
  yet reconciled into the registry — hard-reject is a one-line flip gated on
  P3/P4 vocabulary convergence. Fails OPEN for non-registry runtimes
  (langgraph/external/kimi/mock/federated) so those flows are unchanged.

## Tests (TDD; behavior delta explicit)
- llm_billing_mode_derived_test.go — platform/non-platform/unset/override/
  unregistered/auth-env-disambiguation table + DB-error default-closed + empty-id.
- workspace_provision_shared_test.go — DERIVED platform→unchanged,
  non-platform→byok+strip+fail-closed (the FIX), unset→platform default, through
  the real applyPlatformManagedLLMEnv path. Existing #1963 override-byok strip +
  fail-closed tests unchanged (still pass).
- model_registry_validation_test.go + workspace_test.go — only-registered warn +
  registered-no-warn + non-registry-fail-open.
- Reworked the legacy resolver/admin/secrets tests off the retired org rung.

## Build/CI
go build ./... (+ -tags=integration) green; full `go test ./...` (43 pkgs) green
incl. -race on handlers; vet clean; changed files gofmt-clean. cp#362 anthropic
passthrough untouched (CP repo); merged #1963 strip+fail-closed reused unchanged.

internal#718 P2-B. BEHAVIOR-AFFECTING. Supersedes #1966. Not merged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 17:39:26 -07:00
core-devops 71c68e44f2 feat(providers): P2-A internal#718 — bring the provider registry to molecule-core via codegen + verify-CI (additive, zero behavior change)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 4s
CI / Python Lint & Test (pull_request) Successful in 5s
CI / Detect changes (pull_request) Successful in 7s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
E2E Chat / detect-changes (pull_request) Successful in 10s
E2E API Smoke Test / detect-changes (pull_request) Successful in 10s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 9s
Harness Replays / detect-changes (pull_request) Successful in 6s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 8s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 10s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m10s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m3s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Successful in 1m21s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 5s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 5s
sync-providers-yaml / Compare synced providers.yaml against controlplane canonical (pull_request) Failing after 5s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Failing after 1m36s
gate-check-v3 / gate-check (pull_request) Successful in 12s
verify-providers-gen / Regenerate providers artifact and fail on drift (pull_request) Successful in 38s
qa-review / approved (pull_request) Failing after 6s
security-review / approved (pull_request) Failing after 7s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 7s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 3s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 5s
E2E Chat / E2E Chat (pull_request) Successful in 3s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 14s
Harness Replays / Harness Replays (pull_request) Successful in 7s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m34s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m34s
CI / Platform (Go) (pull_request) Successful in 5m44s
CI / all-required (pull_request) Successful in 8m39s
sop-checklist / all-items-acked (pull_request) [info tier:low] acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, l
sop-checklist / na-declarations (pull_request) N/A: (none)
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Distributes the provider-registry SSOT into molecule-core per the CTO-decided
shape (internal#718 comment, 2026-05-27): "Distribution = SDK via codegen +
verify-CI", multi-repo branch "codegen-checked-into-each-repo + verify-CI".

molecule-core has no Go module dependency on molecule-controlplane, so this
lands a SYNCED COPY of the canonical providers.yaml plus the loader,
DeriveProvider/IsPlatform/ResolveUpstream, the generated Go projection
(cmd/gen-providers), and the drift gates — a byte-faithful mirror of the
controlplane P0/P1 machinery. Canonical SSOT stays in controlplane
internal/providers/providers.yaml.

ZERO behavior change (additive, like P0): NO production code path imports the
new package yet. P2-B wires the billing/credential decision onto the loader.

What lands:
- internal/providers/{providers.go,derive_provider.go,providers.yaml} — mirror
  of the controlplane loader + canonical YAML (synced copy).
- internal/providers/gen/registry_gen.go — generated projection; fingerprint
  faffcbe59bb9f38c matches controlplane.
- cmd/gen-providers — the generator (go generate + -check drift mode).
- .gitea/workflows/verify-providers-gen.yml — artifact ↔ synced-copy drift gate
  (mirror of the controlplane workflow; standalone, not in branch protection
  yet — same soak-then-promote posture).
- .gitea/workflows/sync-providers-yaml.yml — NEW cross-repo gate: fetches the
  controlplane canonical providers.yaml and byte-compares against core's synced
  copy (RED on canonical drift). Read-only AUTO_SYNC_TOKEN; degrades to a
  warning if the token is absent.
- internal/providers/sync_canonical_test.go — hermetic sha pin of the synced
  copy (the always-on backstop; catches a hand-edit even with no network).
- internal/providers/gen_import_boundary_test.go — arch-lint-equivalent AST gate
  (core has no go-arch-lint): no production package may import the raw gen
  projection. Proven load-bearing.

Build/test: go build ./... (+ -tags=integration) green; providers/gen/
gen-providers suites pass (incl. -race); gen -check in sync; gofmt + vet clean.

internal#718 P2-A. NO behavior change. Not merged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 17:10:12 -07:00
hongming 7cfec2d61f Merge pull request 'fix(workspace-server): provider-aware gate on platform scope:global LLM creds (internal#711)' (#1963) from fix/byok-global-llm-cred-leak-internal-711 into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
Block internal-flavored paths / Block forbidden paths (push) Successful in 11s
CI / Python Lint & Test (push) Successful in 5s
CI / Detect changes (push) Successful in 14s
E2E API Smoke Test / detect-changes (push) Successful in 11s
E2E Chat / detect-changes (push) Successful in 14s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 8s
Handlers Postgres Integration / detect-changes (push) Successful in 8s
Harness Replays / detect-changes (push) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 7s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 34s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 5s
CI / Canvas (Next.js) (push) Successful in 3s
CI / Shellcheck (E2E scripts) (push) Successful in 2s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m49s
publish-workspace-server-image / build-and-push (push) Successful in 3m13s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 13s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m50s
Harness Replays / Harness Replays (push) Successful in 4s
CI / Canvas Deploy Reminder (push) Successful in 2s
E2E Chat / E2E Chat (push) Successful in 3m54s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Successful in 5m55s
CI / Platform (Go) (push) Successful in 5m54s
CI / all-required (push) Successful in 7m12s
publish-workspace-server-image / Production auto-deploy (push) Successful in 6m21s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
ci-required-drift / drift (push) Successful in 1m14s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 12s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 8s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m44s
main-red-watchdog / watchdog (push) Successful in 27s
gate-check-v3 / gate-check (push) Successful in 23s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 5m57s
2026-05-27 20:18:18 +00:00
agent-platform-engineer 585b3d6ed0 fix(workspace-server): provider-aware gate on platform scope:global LLM creds (internal#711)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 7s
CI / Python Lint & Test (pull_request) Successful in 6s
CI / Detect changes (pull_request) Successful in 14s
E2E API Smoke Test / detect-changes (pull_request) Successful in 20s
E2E Chat / detect-changes (pull_request) Successful in 15s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 17s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 15s
Harness Replays / detect-changes (pull_request) Successful in 13s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 9s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 8s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 18s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 57s
gate-check-v3 / gate-check (pull_request) Successful in 7s
qa-review / approved (pull_request) Failing after 7s
sop-checklist / na-declarations (pull_request) N/A: (none)
security-review / approved (pull_request) Failing after 7s
sop-checklist / all-items-acked (pull_request) Successful in 7s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 5s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m9s
CI / Canvas (Next.js) (pull_request) Successful in 3s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 4s
E2E Chat / E2E Chat (pull_request) Successful in 19s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m39s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 7m40s
Harness Replays / Harness Replays (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m7s
CI / Platform (Go) (pull_request) Successful in 5m52s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / all-required (pull_request) Successful in 15m9s
audit-force-merge / audit (pull_request) Successful in 9s
A workspace whose resolved LLM billing mode is NOT platform_managed
(byok / subscription) was still being injected with the platform's
scope:global CLAUDE_CODE_OAUTH_TOKEN and ran on the platform's Anthropic
credits. Confirmed live 2026-05-27 on the Reno Stars tenant: the SEO
(352e3c2b-...) and Marketing (6b66de8d-...) claude-code agents had no
workspace-scoped LLM credential, yet ran MODEL=opus directly on
api.anthropic.com using the platform's global OAuth token.

Root cause: loadWorkspaceSecrets merges ALL global_secrets into every
workspace's env provenance-blind. applyPlatformManagedLLMEnv's
non-platform (byok/disabled) path then early-returned WITHOUT stripping
those inherited platform globals — so a workspace with no LLM credential
of its own kept the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN.
The same leak existed on the remote-pull path (GET
/workspaces/:id/secrets/values), which also merged globals unconditionally.

Fix (provider-aware, both injection vectors):
- applyPlatformManagedLLMEnv now takes the global-provenance key set and,
  on the non-platform path, strips every platform-managed LLM bypass key
  (CLAUDE_CODE_OAUTH_TOKEN + the rest) that originated from global_secrets.
  A workspace's OWN LLM cred (a workspace_secrets row — provenance flag
  dropped by loadWorkspaceSecrets) is NOT in the global set and survives.
- secrets.Values applies the same provenance-aware gate before returning
  the merged bundle to a remote agent.
- Fail closed: a byok workspace left with no usable LLM credential aborts
  provision with code MISSING_BYOK_CREDENTIAL instead of starting on the
  (now-stripped) platform creds. Scoped to byok; disabled mode strips but
  still boots (no-LLM workspaces are legitimate).
- platform_managed path is unchanged (it still receives + force-routes the
  platform creds via the CP proxy), and the LLM-proxy anthropic path is
  untouched.

Tests (all green; go build/test ./... + -tags=integration build pass):
- ByokStripsGlobalOriginOAuthToken — platform global token stripped, no cred.
- ByokKeepsWorkspaceOwnOAuthEvenWithGlobal — workspace's own token survives.
- DisabledStripsGlobalButReportsNoCred — disabled strips but does not abort.
- PlatformManagedStillReceivesGlobalCreds — no regression on platform path.
- PrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed — e2e abort.
- SecretsValues_ByokStripsGlobalLLMCred — remote-pull path gated.

Note: open PR #1930 (refactor/drop-org-tier-llm-billing-mode, internal#691
follow-up) changes ResolveLLMBillingMode's signature in the same files.
This change is built on current main and is orthogonal in intent; whichever
merges second needs a mechanical 1-line resolver-call adjustment (drop the
orgMode arg). #1930 does NOT fix this leak.

Refs internal#711

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 12:55:58 -07:00
hongming 9deb8e9ea6 Merge pull request 'fix(security): scope peer discovery + a2a routing to caller org (#1953)' (#1954) from fix/1953-scope-peer-discovery-a2a-to-org into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 17s
Block internal-flavored paths / Block forbidden paths (push) Successful in 9s
CI / Python Lint & Test (push) Successful in 9s
CI / Detect changes (push) Successful in 17s
E2E API Smoke Test / detect-changes (push) Successful in 11s
E2E Chat / detect-changes (push) Successful in 15s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 11s
publish-workspace-server-image / build-and-push (push) Successful in 3m23s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (push) Successful in 54s
E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 38s
Handlers Postgres Integration / detect-changes (push) Successful in 7s
Harness Replays / detect-changes (push) Successful in 6s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 4s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 5s
CI / Canvas (Next.js) (push) Successful in 5s
CI / Shellcheck (E2E scripts) (push) Successful in 20s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 1m31s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Successful in 5m28s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 3s
CI / Platform (Go) (push) Successful in 4m55s
Harness Replays / Harness Replays (push) Successful in 9s
CI / all-required (push) Successful in 9m15s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (push) Successful in 6m44s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m48s
E2E Chat / E2E Chat (push) Successful in 4m13s
publish-workspace-server-image / Production auto-deploy (push) Successful in 8m4s
CI / Canvas Deploy Reminder (push) Successful in 1s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 10s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 5s
main-red-watchdog / watchdog (push) Successful in 44s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 6m45s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 5m46s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 3s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 9s
gate-check-v3 / gate-check (push) Successful in 41s
ci-required-drift / drift (push) Successful in 1m3s
2026-05-27 17:51:46 +00:00
core-be 69391595f3 fix(e2e): delete child before parent in test_api delete/round-trip (#1953)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 6s
CI / Python Lint & Test (pull_request) Successful in 5s
E2E API Smoke Test / detect-changes (pull_request) Successful in 13s
CI / Detect changes (pull_request) Successful in 13s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 13s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 10s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 27s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 12s
Harness Replays / detect-changes (pull_request) Successful in 6s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 47s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
review-check-tests / review-check.sh regression tests (pull_request) Successful in 9s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 1m8s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
gate-check-v3 / gate-check (pull_request) Successful in 7s
qa-review / approved (pull_request) Failing after 7s
security-review / approved (pull_request) Failing after 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 4s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 5s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m5s
CI / Canvas (Next.js) (pull_request) Successful in 2s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 20s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m12s
E2E Chat / E2E Chat (pull_request) Successful in 5s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 6s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m3s
Harness Replays / Harness Replays (pull_request) Successful in 4s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m4s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 6m38s
CI / Platform (Go) (pull_request) Successful in 5m56s
CI / all-required (pull_request) Successful in 8m1s
audit-force-merge / audit (pull_request) Successful in 10s
The #1953 fixture re-seed made Summarizer a CHILD of Echo (same-org) so
the peer-discovery assertions exercise legit same-org enumeration. But
Test 21 still deleted the PARENT (Echo) first and asserted the other
workspace survives (count=1). CascadeDelete walks the recursive parent_id
CTE, so deleting Echo also removed its child Summarizer -> "List after
delete" saw 0, and Test 22 then hit 410 Gone deleting an already-removed
Summarizer ("got: {error: workspace removed}").

Fix: capture Summarizer's bundle, delete the CHILD (Summarizer) first
(child delete does not cascade upward so Echo survives -> count=1), then
delete the parent Echo in the round-trip block and re-import the captured
bundle. Cross-tenant isolation and the same-org parent/child relationship
are unchanged; only the delete ordering is corrected.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 17:42:44 +00:00
hongming 46606801c6 Merge pull request 'fix(ci): add explicit utf-8 encoding to Python open() calls' (#1920) from fix/python-open-encoding into main
ci-arm64-advisory / fast-checks (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 11s
publish-workspace-server-image / build-and-push (push) Successful in 6m8s
Block internal-flavored paths / Block forbidden paths (push) Successful in 5s
CI / Detect changes (push) Successful in 8s
CI / Python Lint & Test (push) Successful in 4s
E2E Chat / detect-changes (push) Successful in 9s
E2E API Smoke Test / detect-changes (push) Successful in 9s
Handlers Postgres Integration / detect-changes (push) Successful in 4s
CI / all-required (push) Successful in 8m28s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 7s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Successful in 5s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 6s
review-check-tests / review-check.sh regression tests (push) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s
Ops Scripts Tests / Ops scripts (unittest) (push) Successful in 1m5s
publish-workspace-server-image / Production auto-deploy (push) Successful in 4m52s
main-red-watchdog / watchdog (push) Successful in 57s
CI / Platform (Go) (push) Successful in 6s
CI / Canvas (Next.js) (push) Successful in 5s
CI / Shellcheck (E2E scripts) (push) Successful in 5s
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 6s
E2E Chat / E2E Chat (push) Successful in 6s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 6s
gate-check-v3 / gate-check (push) Successful in 1m12s
Sweep stale Cloudflare DNS records / Sweep CF orphans (push) Successful in 15s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 2m2s
ci-required-drift / drift (push) Successful in 1m10s
CI / Canvas Deploy Reminder (push) Successful in 5s
Sweep stale AWS Secrets Manager secrets / Sweep AWS Secrets Manager (push) Successful in 6s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 4m31s
Continuous synthetic E2E (staging) / Synthetic E2E against staging (push) Successful in 7m11s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 5s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 17s
2026-05-27 17:01:54 +00:00
hongming cd671e1263 Merge pull request 'fix(memory): upsert namespace before v2 commit' (#1925) from fix/memory-v2-upsert-namespace-20260526 into main
Block internal-flavored paths / Block forbidden paths (push) Waiting to run
ci-arm64-advisory / fast-checks (push) Waiting to run
CI / Detect changes (push) Waiting to run
CI / Platform (Go) (push) Blocked by required conditions
CI / Canvas (Next.js) (push) Blocked by required conditions
CI / Shellcheck (E2E scripts) (push) Blocked by required conditions
CI / Canvas Deploy Reminder (push) Blocked by required conditions
CI / Python Lint & Test (push) Waiting to run
E2E API Smoke Test / detect-changes (push) Waiting to run
E2E API Smoke Test / E2E API Smoke Test (push) Blocked by required conditions
E2E Chat / detect-changes (push) Waiting to run
E2E Chat / E2E Chat (push) Blocked by required conditions
E2E Staging Canvas (Playwright) / detect-changes (push) Waiting to run
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Blocked by required conditions
Handlers Postgres Integration / detect-changes (push) Waiting to run
Handlers Postgres Integration / Handlers Postgres Integration (push) Blocked by required conditions
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Waiting to run
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (push) Waiting to run
Secret scan / Scan diff for credential-shaped strings (push) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Successful in 12s
publish-workspace-server-image / build-and-push (push) Successful in 3m2s
Harness Replays / detect-changes (push) Successful in 3s
Sweep stale Cloudflare Tunnels / Sweep CF tunnels (push) Successful in 10s
Sweep stale e2e-* orgs (staging) / Sweep e2e orgs (push) Successful in 20s
Staging SaaS smoke (every 30 min) / Staging SaaS smoke (push) Successful in 5m52s
Harness Replays / Harness Replays (push) Successful in 4s
CI / all-required (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
publish-workspace-server-image / Production auto-deploy (push) Compensated by status-reaper (push run was cancelled/superseded; Gitea 1.22.6 reports cancelled runs as failure statuses)
2026-05-27 16:43:49 +00:00
core-be 51f74e9d8a fix(security): correct org-root CTE so same-org a2a routing works (#1953)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 34s
CI / Python Lint & Test (pull_request) Successful in 20s
CI / Detect changes (pull_request) Successful in 20s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Has been skipped
E2E API Smoke Test / detect-changes (pull_request) Successful in 28s
E2E Chat / detect-changes (pull_request) Successful in 28s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 43s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 10s
Harness Replays / detect-changes (pull_request) Successful in 8s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 1m2s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 7s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 1m30s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 18s
gate-check-v3 / gate-check (pull_request) Successful in 8s
qa-review / approved (pull_request) Successful in 12s
security-review / approved (pull_request) Failing after 10s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 9s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 5s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m11s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 7m21s
CI / Canvas (Next.js) (pull_request) Successful in 3s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 11s
E2E Chat / E2E Chat (pull_request) Successful in 25s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 12s
Harness Replays / Harness Replays (pull_request) Successful in 4s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Failing after 1m36s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m37s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Platform (Go) (pull_request) Successful in 5m46s
CI / all-required (pull_request) Successful in 28m17s
The #1953 sameOrg() guard over-blocked legitimate SAME-ORG a2a routing:
orgRootSubtreeCTE carried `id AS root_id` from the recursive SEED, so a
non-root workspace resolved to ITSELF instead of its topmost ancestor.
sameOrg(child, root) therefore compared child-id vs root-id, reported the
pair as DIFFERENT orgs, and 403'd a legitimate same-org delegation. The
cross-org case was unaffected (two distinct roots already resolve to
different ids), so isolation stayed closed — but real same-org delegation
broke. Caught only by the real-Postgres integration suite: the sqlmock
unit tests hand-feed sameOrg() a root_id row and so structurally cannot
exercise the CTE.

Fix: select the parentless chain row's own `id` (aliased root_id) instead
of the seed-carried value. A node that already IS an org root has a
one-row chain and still resolves to itself.

Why the two required checks were red:

- handlers-postgres-integration (real CTE): the executeDelegation
  success-path fixtures seeded source AND target both parent_id=NULL —
  two DISTINCT org roots, i.e. a CROSS-tenant pair that only ever
  "communicated" via the OLD leaky root-sibling behavior #1953 closes.
  Re-seeded target as a CHILD of source (same org). With the same-org
  fixture, the CTE bug surfaced and is now fixed; all 5 ExecuteDelegation
  tests pass (success + failure paths). Added
  TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain as the real-SQL
  regression gate for root→child→grandchild resolution + cross-org denial.

- e2e-api (test_api.sh): created Echo + Summarizer both as org roots and
  asserted they appear in each other's /registry/:id/peers — that
  enumeration WAS the cross-tenant leak (org root seeing another org
  root). Re-created Summarizer as a child of Echo so the peer assertions
  exercise legitimate same-org parent/child enumeration.

Cross-tenant isolation remains closed (all cross-org negative tests pass);
same-org peers + a2a now work. go build ./... + go test ./internal/handlers/...
green; integration suite green.

Refs #1953

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 09:41:44 -07:00
core-be 6211d27bc7 fix(security): scope peer discovery + a2a routing to caller org (#1953)
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 5s
CI / Python Lint & Test (pull_request) Successful in 4s
CI / Detect changes (pull_request) Successful in 7s
E2E API Smoke Test / detect-changes (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 8s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Has been skipped
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 14s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 13s
Harness Replays / detect-changes (pull_request) Successful in 5s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 40s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 10s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 5s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 56s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 10s
gate-check-v3 / gate-check (pull_request) Successful in 6s
qa-review / approved (pull_request) Successful in 7s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 5s
security-review / approved (pull_request) Failing after 5s
sop-checklist / review-refire (pull_request) Has been skipped
sop-tier-check / tier-check (pull_request) Successful in 5s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m2s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 8m6s
CI / Canvas (Next.js) (pull_request) Successful in 4s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 8s
E2E Chat / E2E Chat (pull_request) Successful in 6s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 9s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Failing after 1m36s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Failing after 2m6s
Harness Replays / Harness Replays (pull_request) Successful in 1s
CI / Platform (Go) (pull_request) Successful in 5m39s
CI / all-required (pull_request) Successful in 6m4s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
Three workspace-server paths computed an "org-root sibling set" as
`WHERE parent_id IS NULL`, which matches EVERY tenant's org root (the
workspaces table has no org_id column) → cross-tenant data exposure:

  1. GET /registry/:id/peers (discovery.Peers) — returned peer
     id/name/role/url/agent_card across ALL tenants when the caller
     was itself an org root.
  2. MCP toolListPeers (mcp_tools.go) — same cross-tenant peer
     enumeration via the MCP bridge.
  3. a2a routing (a2a_proxy.proxyA2ARequest → resolveAgentURL) —
     CanCommunicate's "root-level siblings, both no parent" rule treats
     every tenant's org root as a sibling, and resolveAgentURL accepts
     ANY workspace id with no org check, so an org root could resolve
     and route a2a to another tenant's org root.

Fix — reuse the OFFSEC-015 broadcast scoping (commit 5a05302c,
workspace_broadcast.go): the org is the parent_id-chain subtree from a
single org root. New org_scope.go centralises that recursive CTE
(orgRootID / sameOrg) so all paths derive "the caller's org" the same way:

  - discovery.Peers + toolListPeers: drop the `parent_id IS NULL`
    sibling branch entirely. An org root has no siblings inside its own
    org; its peers are its children (still enumerated). Only the
    parent_id-bound sibling branch remains, already scoped to one tenant.
  - a2a proxyA2ARequest: after CanCommunicate, add a sameOrg() guard that
    rejects (403) before resolveAgentURL when caller and target resolve
    to different org roots. Fail-closed: a DB error denies routing.

No org_id column is added — that is a separate architecture decision
pending CTO. This uses the existing parent_id-chain scoping.

Tests (cross_tenant_isolation_test.go): per-path cross-tenant regression
— a DIFFERENT-org workspace must NOT appear in /registry peers, must NOT
appear in toolListPeers, and a2a MUST reject resolving/routing to a
workspace outside the caller's org; plus same-org positive tests. The
three negative tests were verified to FAIL against the pre-fix code.
Existing peer/a2a/delegation tests updated to the org-scoped behavior.

Follow-up for CTO: registry.CanCommunicate still treats any two org
roots as siblings, so discovery.Discover and CheckAccess share the same
root-sibling weakness. Scoping CanCommunicate itself (registry package)
would close that class fully; flagged separately as it is outside the
three #1953 paths.

Refs #1953

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 08:45:27 -07:00
claude-ceo-assistant 42b16b33fb fix(memory): upsert namespace before v2 commit
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 11s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 10s
CI / Detect changes (pull_request) Successful in 13s
CI / Python Lint & Test (pull_request) Successful in 9s
E2E API Smoke Test / detect-changes (pull_request) Successful in 14s
E2E Chat / detect-changes (pull_request) Successful in 14s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 14s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s
Harness Replays / detect-changes (pull_request) Successful in 9s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 7s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 6s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 7s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m11s
CI / Canvas (Next.js) (pull_request) Successful in 10s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 10s
E2E Chat / E2E Chat (pull_request) Successful in 10s
Harness Replays / Harness Replays (pull_request) Successful in 3s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m58s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 2m45s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
CI / Platform (Go) (pull_request) Successful in 6m7s
CI / all-required (pull_request) Successful in 13m6s
security-review / approved (pull_request) Refired via /security-recheck by unknown
qa-review / approved (pull_request) Refired via /qa-recheck by unknown
gate-check-v3 / gate-check (pull_request) Successful in 31s
sop-checklist / review-refire (pull_request) Has been skipped
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 22s
sop-tier-check / tier-check (pull_request) Successful in 14s
audit-force-merge / audit (pull_request) Successful in 7s
2026-05-26 12:38:50 -07:00
121 changed files with 12064 additions and 3349 deletions
+63
View File
@@ -208,6 +208,61 @@ def _raise_for_redeploy_result(status: int, body: dict, slugs: list[str]) -> Non
)
def rollout_stragglers(enumerated: list[str], results: list[dict]) -> list[str]:
"""Return every enumerated tenant NOT proven on the target build.
A straggler is any tenant the rollout was supposed to cover that the
CP could not verify is running the target image tag — whether it
errored, was skipped, or SSM-succeeded onto the wrong image
(internal#724). CP marks each per-tenant result row with
``verified_on_target`` (the REDEPLOY_RUNNING_IMAGE docker-inspect
proof). A tenant enumerated for the rollout but absent from the
result set (no batch ever ran it) is also a straggler — that is the
exact agents-team silent-skip class.
Backward-compat: an OLDER CP that doesn't emit ``verified_on_target``
yet returns rows without the key. Treat a missing key as verified so
this surfacing degrades to the previous (ok-based) behavior against an
un-upgraded CP, rather than failing every deploy spuriously. Once the
CP fix is deployed the key is always present and real stragglers are
caught.
"""
verified: set[str] = set()
for row in results:
if str(row.get("ssm_status") or "") == "DryRun":
continue
slug = str(row.get("slug") or "").strip()
if not slug:
continue
# Missing key (old CP) => assume verified; present key is authoritative.
if "verified_on_target" not in row or row.get("verified_on_target"):
verified.add(slug)
return sorted(s for s in dict.fromkeys(enumerated) if s not in verified)
def assert_full_coverage(enumerated: list[str], aggregate: dict, dry_run: bool) -> None:
"""Fail the rollout if any enumerated tenant is not on the target build.
This is the no-silent-skip gate (internal#724). A dry run proves
nothing landed, so coverage is not asserted for it.
"""
if dry_run:
return
stragglers = rollout_stragglers(enumerated, aggregate.get("results") or [])
if stragglers:
msg = (
f"incomplete rollout: {len(stragglers)} tenant(s) not verified on target "
f"after redeploy-fleet: {', '.join(stragglers)} "
f"(enumerated {len(set(enumerated))})"
)
aggregate["ok"] = False
aggregate["error"] = msg
aggregate["stragglers"] = stragglers
raise RolloutFailed(msg, aggregate)
def execute_scoped_rollout(
plan: dict,
token: str,
@@ -254,6 +309,14 @@ def execute_scoped_rollout(
aggregate["error"] = str(exc)
raise RolloutFailed(str(exc), aggregate) from exc
# No-silent-skip coverage gate (internal#724): every enumerated tenant
# must be PROVEN on the target build. A per-tenant HTTP-200/ok response
# is not proof — a tenant that SSM-succeeded but stayed on the old tag,
# or one enumerated but never batched, is a straggler. Surfacing it as
# a RolloutFailed makes the deploy step exit non-zero instead of
# silently reporting success (the exact agents-team failure mode).
assert_full_coverage(all_slugs, aggregate, dry_run)
return aggregate
@@ -355,3 +355,134 @@ def test_rollout_from_plan_file_writes_partial_response_on_failure(tmp_path):
assert response_path.read_text(encoding="utf-8").strip()
assert '"ok": false' in response_path.read_text(encoding="utf-8")
assert '"slug": "hongming"' in response_path.read_text(encoding="utf-8")
# ──────────────────────────────────────────────────────────────────────
# No-silent-skip coverage gate (internal#724)
# ──────────────────────────────────────────────────────────────────────
def test_rollout_stragglers_flags_tenant_not_on_target():
# b SSM-succeeded but its container is on the old tag → straggler.
stragglers = prod.rollout_stragglers(
["a", "b", "c"],
[
{"slug": "a", "verified_on_target": True},
{"slug": "b", "verified_on_target": False, "running_image": "platform-tenant:staging-old"},
{"slug": "c", "verified_on_target": True},
],
)
assert stragglers == ["b"]
def test_rollout_stragglers_flags_enumerated_tenant_with_no_result():
# agents-team class: enumerated but no batch ever produced a row for it.
stragglers = prod.rollout_stragglers(
["a", "agents-team"],
[{"slug": "a", "verified_on_target": True}],
)
assert stragglers == ["agents-team"]
def test_rollout_stragglers_missing_key_is_backward_compatible():
# Older CP without verified_on_target → treat as verified (no spurious fail).
stragglers = prod.rollout_stragglers(
["a", "b"],
[{"slug": "a", "healthz_ok": True}, {"slug": "b", "healthz_ok": True}],
)
assert stragglers == []
def test_rollout_stragglers_ignores_dry_run_rows():
stragglers = prod.rollout_stragglers(
["a"], [{"slug": "a", "ssm_status": "DryRun"}]
)
# dry-run row is skipped, so "a" has no verifying row → straggler.
assert stragglers == ["a"]
def test_scoped_rollout_fails_when_a_tenant_stays_on_old_tag():
# Every per-tenant call returns ok=True, but agents-team is NOT
# verified_on_target. The rollout must still fail loudly — this is
# the exact "reported success, one tenant silently skipped" bug.
def fake_redeploy(_cp_url, _token, body):
rows = []
for slug in body["only_slugs"]:
rows.append({"slug": slug, "verified_on_target": slug != "agents-team"})
return 200, {"ok": True, "results": rows}
try:
prod.execute_scoped_rollout(
{
"cp_url": "https://api.moleculesai.app",
"body": {
"target_tag": "staging-new",
"batch_size": 5,
"dry_run": False,
"confirm": True,
},
},
token="secret",
list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
redeploy=fake_redeploy,
sleep=lambda _s: None,
)
except prod.RolloutFailed as exc:
assert "incomplete rollout" in str(exc)
assert exc.response["stragglers"] == ["agents-team"]
assert exc.response["ok"] is False
else:
raise AssertionError("expected an incomplete rollout to fail loudly")
def test_scoped_rollout_passes_when_all_tenants_verified_on_target():
def fake_redeploy(_cp_url, _token, body):
return 200, {
"ok": True,
"results": [{"slug": s, "verified_on_target": True} for s in body["only_slugs"]],
}
aggregate = prod.execute_scoped_rollout(
{
"cp_url": "https://api.moleculesai.app",
"body": {
"target_tag": "staging-new",
"batch_size": 5,
"dry_run": False,
"confirm": True,
},
},
token="secret",
list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
redeploy=fake_redeploy,
sleep=lambda _s: None,
)
assert aggregate["ok"] is True
assert "stragglers" not in aggregate
def test_scoped_rollout_dry_run_does_not_assert_coverage():
# A dry run proves nothing landed; coverage must NOT be asserted or
# every plan would fail.
def fake_redeploy(_cp_url, _token, body):
return 200, {
"ok": True,
"results": [{"slug": s, "ssm_status": "DryRun"} for s in body["only_slugs"]],
}
aggregate = prod.execute_scoped_rollout(
{
"cp_url": "https://api.moleculesai.app",
"body": {
"target_tag": "staging-new",
"batch_size": 5,
"dry_run": True,
"confirm": True,
},
},
token="secret",
list_slugs=lambda _u, _t, _b: ["a", "b"],
redeploy=fake_redeploy,
sleep=lambda _s: None,
)
assert aggregate["ok"] is True
+8
View File
@@ -357,6 +357,14 @@ jobs:
name: Run E2E bash unit tests (no live infra)
run: |
bash tests/e2e/test_model_slug.sh
# molecule-core#1995 (#1994 follow-on): fail-direction proof for
# the A2A real-completion + byok-routing assertion helpers
# (lib/completion_assert.sh). Offline (no LLM, no network): it
# asserts an error-as-text payload FAILS the real-completion gate
# — the exact trap the historical shape-only `"kind":"text"`
# check missed. If a refactor weakens the gate to a shape check,
# this step goes red on every PR.
bash tests/e2e/test_completion_assert_unit.sh
- if: ${{ needs.changes.outputs.scripts == 'true' }}
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
@@ -166,6 +166,10 @@ jobs:
# canary path. The script picks the right blob shape based on
# which key is non-empty.
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
# google-adk canary path — AI-Studio key (config model
# google_genai:gemini-2.5-pro). PROD disallows API keys (Vertex+ADC);
# the keyed path is CI-only. Dispatch with E2E_RUNTIME=google-adk.
E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -217,6 +221,10 @@ jobs:
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
required_secret_value="${E2E_OPENAI_API_KEY:-}"
;;
google-adk)
required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
required_secret_value="${E2E_GOOGLE_API_KEY:-}"
;;
*)
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
required_secret_name=""
+12 -1
View File
@@ -49,6 +49,7 @@ on:
- 'workspace-server/internal/middleware/**'
- 'workspace-server/internal/provisioner/**'
- 'tests/e2e/test_staging_full_saas.sh'
- 'tests/e2e/lib/completion_assert.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- '.gitea/workflows/e2e-staging-saas.yml'
@@ -61,6 +62,7 @@ on:
- 'workspace-server/internal/middleware/**'
- 'workspace-server/internal/provisioner/**'
- 'tests/e2e/test_staging_full_saas.sh'
- 'tests/e2e/lib/completion_assert.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- '.gitea/workflows/e2e-staging-saas.yml'
@@ -155,13 +157,18 @@ jobs:
# E2E_RUNTIME=hermes or =codex via workflow_dispatch can still
# exercise the OpenAI path.
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
# google-adk (operator-dispatched only) auths Gemini with an
# AI-Studio key. Org policy disallows API keys in PROD (Vertex+ADC
# there); CI uses the keyed AI-Studio path with config model
# google_genai:gemini-2.5-pro. Vertex remains the supported prod path.
E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
# Pin the model when running on the default claude-code path —
# the per-runtime default ("sonnet") routes to direct Anthropic
# and defeats the cost saving. Operators can override via the
# workflow_dispatch flow (no input wired here yet — runtime
# override is enough for ad-hoc).
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || 'MiniMax-M2' }}
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
@@ -210,6 +217,10 @@ jobs:
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
required_secret_value="${E2E_OPENAI_API_KEY:-}"
;;
google-adk)
required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
required_secret_value="${E2E_GOOGLE_API_KEY:-}"
;;
*)
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
required_secret_name=""
@@ -327,13 +327,27 @@ jobs:
echo ""
echo "### Per-tenant result"
echo ""
echo "| Slug | Phase | SSM Status | Exit | Healthz | Error present |"
echo "|------|-------|------------|------|---------|---------------|"
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
echo "| Slug | Phase | SSM Status | Exit | Healthz | On target | Error present |"
echo "|------|-------|------------|------|---------|-----------|---------------|"
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \(.verified_on_target) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
# internal#724: stragglers are tenants enumerated but not proven
# on the target build. Surface them loudly — a non-empty list
# means the rollout did NOT fully land.
STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
if [ -n "$STRAGGLERS" ]; then
echo ""
echo "### ⚠ Stragglers (NOT on target tag \`$TARGET_TAG\`)"
echo ""
echo "\`$STRAGGLERS\`"
fi
} >> "$GITHUB_STEP_SUMMARY"
OK="$(jq -r '.ok' "$HTTP_RESPONSE")"
if [ "$OK" != "true" ]; then
STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
if [ -n "$STRAGGLERS" ]; then
echo "::error::incomplete rollout — tenants not on target tag $TARGET_TAG: $STRAGGLERS"
fi
echo "::error::redeploy-fleet reported ok=false; production rollout halted."
exit 1
fi
+99
View File
@@ -0,0 +1,99 @@
name: sync-providers-yaml
# Cross-repo canonical↔synced-copy drift gate (internal#718 P2-A, CTO
# 2026-05-27 "Distribution = SDK via codegen + verify-CI", multi-repo branch:
# "codegen-checked-into-each-repo + verify-CI").
#
# The canonical provider-registry SSOT is molecule-controlplane
# internal/providers/providers.yaml. molecule-core has NO Go module dependency
# on controlplane, so instead of importing it we carry a SYNCED COPY at
# workspace-server/internal/providers/providers.yaml and gate it.
#
# This workflow fetches the canonical providers.yaml from controlplane (via the
# Gitea raw endpoint, read-only) and byte-compares it against core's synced
# copy. RED if they differ — meaning the canonical moved and core's copy must be
# re-synced (copy verbatim + `go generate ./...` + bump
# canonicalProvidersYAMLSHA256 in sync_canonical_test.go).
#
# Pairs with:
# * sync_canonical_test.go — hermetic sha pin (catches a hand-edit of core's
# copy even with no network); runs in the normal `go test ./...`.
# * verify-providers-gen.yml — artifact ↔ synced-copy drift.
#
# ENFORCEMENT GATING: standalone workflow, NOT a job in ci.yml and NOT in
# branch protection (same soak-then-promote posture as verify-providers-gen).
# It is intentionally absent from ci.yml's job set so the ci-required-drift
# sentinel does not fire on it.
#
# AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
# template/provider content from sibling repos). If the secret is absent the
# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
# token degrades to "hand-edit still caught, live canonical drift not caught"
# rather than a hard red that blocks unrelated PRs.
on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- 'workspace-server/internal/providers/providers.yaml'
- '.gitea/workflows/sync-providers-yaml.yml'
push:
branches: [main, staging]
paths:
- 'workspace-server/internal/providers/providers.yaml'
- '.gitea/workflows/sync-providers-yaml.yml'
schedule:
# Daily at :23 — catch a canonical change in controlplane that landed
# without a paired core re-sync PR (off-zero to spread cron load).
- cron: '23 4 * * *'
workflow_dispatch:
env:
GITHUB_SERVER_URL: https://git.moleculesai.app
permissions:
contents: read
concurrency:
group: sync-providers-yaml-${{ github.ref }}
cancel-in-progress: true
jobs:
compare:
name: Compare synced providers.yaml against controlplane canonical
runs-on: ubuntu-latest
timeout-minutes: 6
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Fetch canonical providers.yaml from controlplane and byte-compare
env:
AUTO_SYNC_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
API_ROOT: ${{ github.server_url }}/api/v1
run: |
set -euo pipefail
if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
exit 0
fi
CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
# Use the /raw endpoint: it returns the file bytes directly. (The
# /contents endpoint ignores Accept: application/vnd.gitea.raw on
# Gitea 1.22.6 and returns the JSON+base64 envelope, which made this
# diff a permanent false RED.)
curl -fsS \
-H "Authorization: token ${AUTO_SYNC_TOKEN}" \
"${CANON_URL}" -o /tmp/canonical-providers.yaml
LOCAL=workspace-server/internal/providers/providers.yaml
if diff -u /tmp/canonical-providers.yaml "$LOCAL"; then
echo "OK — core's synced providers.yaml is byte-identical to the controlplane canonical."
else
echo "::error::core's synced providers.yaml DRIFTED from the controlplane canonical (SSOT)."
echo "Re-sync: copy controlplane internal/providers/providers.yaml verbatim over"
echo " $LOCAL, run 'go generate ./...' in workspace-server/, and bump"
echo " canonicalProvidersYAMLSHA256 in internal/providers/sync_canonical_test.go."
exit 1
fi
+89
View File
@@ -0,0 +1,89 @@
name: verify-providers-gen
# Provider-registry SSOT enforcement gate — molecule-core side (internal#718
# P2-A, CTO 2026-05-27 "Distribution = SDK via codegen + verify-CI").
#
# The canonical schema SSOT is molecule-controlplane
# internal/providers/providers.yaml. molecule-core carries a SYNCED COPY at
# workspace-server/internal/providers/providers.yaml (kept in sync by the
# companion sync-providers-yaml.yml gate), and cmd/gen-providers emits the
# checked-in Go projection workspace-server/internal/providers/gen/registry_gen.go.
#
# This workflow regenerates the artifact into the working tree and fails RED if
# it differs from what is committed — catching BOTH:
# * a providers.yaml (synced-copy) change that wasn't followed by `go generate ./...`, and
# * a hand-edit of the generated artifact (it carries a DO NOT EDIT header).
#
# It is the molecule-core mirror of molecule-controlplane's verify-providers-gen
# workflow. Together with sync-providers-yaml (canonical↔synced-copy drift) it
# closes the codegen-checked-into-each-repo + verify-CI loop the RFC mandates.
#
# ENFORCEMENT GATING (deliberate, per dev-SOP "implementation gating"):
# this is a STANDALONE workflow, NOT a job inside ci.yml, and is NOT yet in any
# branch-protection status_check_contexts. Rationale (identical to the CP P0
# rollout):
# * It runs + reports RED on every PR/push immediately (visible signal).
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
# from branch protection (turning it into a hard merge gate has blast radius
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
# on controlplane). Promote it into branch protection in a follow-up once
# P2 has soaked.
# Until then it behaves like secret-scan / block-internal-paths: a standalone
# advisory-to-hard gate the author is expected to keep green.
on:
pull_request:
types: [opened, synchronize, reopened]
push:
branches: [main, staging]
env:
GITHUB_SERVER_URL: https://git.moleculesai.app
permissions:
contents: read
concurrency:
group: verify-providers-gen-${{ github.ref }}
cancel-in-progress: true
jobs:
verify:
name: Regenerate providers artifact and fail on drift
runs-on: ubuntu-latest
timeout-minutes: 8
defaults:
run:
working-directory: workspace-server
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: Verify generated artifact is in sync with providers.yaml
run: |
set -euo pipefail
# -check regenerates in memory and byte-compares against the
# checked-in artifact; exit 1 (RED) on any drift. This is the
# single source of the gate's verdict — the same code path
# `go test ./cmd/gen-providers` exercises.
go run ./cmd/gen-providers -check
- name: Belt-and-braces — regenerate in place and assert clean tree
run: |
set -euo pipefail
# Independent confirmation that does not trust the -check path:
# actually write the artifact and assert git sees no change. If
# this and the step above ever disagree, the gate is suspect.
go generate ./...
if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
exit 1
fi
echo "OK — generated providers artifact is in sync with the schema SSOT."
@@ -38,10 +38,11 @@ const DEFAULT_RUNTIME = "claude-code";
const RUNTIME_OPTIONS = [
{ value: "claude-code", label: "Claude Code" },
{ value: "codex", label: "OpenAI Codex CLI" },
{ value: "google-adk", label: "Google ADK" },
{ value: "hermes", label: "Hermes" },
{ value: "openclaw", label: "OpenClaw" },
];
const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "hermes", "openclaw"]);
const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "google-adk", "hermes", "openclaw"]);
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
const DEFAULT_HEADLESS_ROOT_GB = 30;
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
+101 -1
View File
@@ -49,6 +49,33 @@ export interface ProviderEntry {
wildcard: boolean;
/** Optional tooltip text (rendered as native title=). */
tooltip?: string;
/** Billing mode the DERIVED provider implies, when this entry came from the
* registry-backed payload (internal#718 P3): "platform_managed" | "byok".
* Undefined for entries built by the legacy inferVendor heuristic. */
billingMode?: "platform_managed" | "byok";
}
/** RegistryProvider mirrors one entry of GET /templates `registry_providers`
* (workspace-server registryProviderView): the registry's native provider for
* a runtime, with its display label, auth-env NAMES, and billing mode. This is
* the SSOT the dropdown labels come from — the canvas drops VENDOR_LABELS for
* registry-backed runtimes (internal#718 P3, retire-list #4). */
export interface RegistryProvider {
name: string;
display_name?: string;
auth_env?: string[];
billing_mode?: "platform_managed" | "byok";
deprecated?: boolean;
}
/** RegistryModel mirrors one entry of GET /templates `registry_models`: a
* native model id annotated with its DERIVED provider (registry name) and the
* billing_mode that provider implies. */
export interface RegistryModel {
id: string;
name?: string;
provider?: string;
billing_mode?: "platform_managed" | "byok";
}
export interface SelectorValue {
@@ -68,6 +95,13 @@ interface Props {
models: SelectorModel[];
value: SelectorValue;
onChange: (next: SelectorValue) => void;
/** Optional pre-built provider catalog. When provided, the selector uses it
* verbatim instead of re-inferring one from `models` via
* buildProviderCatalog — the registry-backed path (internal#718 P3), where
* the parent builds the catalog from the registry-served providers/models
* so dropdown labels + billing come from the provider-registry SSOT rather
* than the inferVendor heuristic. Omitted = legacy heuristic over `models`. */
catalog?: ProviderEntry[];
/** Display variant. "grid" = label+control side-by-side (used in ConfigTab
* Runtime section). "stack" = vertical (used in MissingKeysModal). */
variant?: "grid" | "stack";
@@ -251,6 +285,66 @@ export function buildProviderCatalog(models: SelectorModel[]): ProviderEntry[] {
return Array.from(buckets.values());
}
/** Build the provider catalog from a REGISTRY-BACKED GET /templates payload
* (registry_providers + registry_models) — internal#718 P3, retire-list #4.
*
* Unlike buildProviderCatalog (which RE-INFERS vendor from model-id prefixes
* + env via inferVendor/VENDOR_LABELS/BARE_VENDOR_PATTERNS), this trusts the
* registry: each model carries its DERIVED `provider` (a registry provider
* name) and the dropdown label/billing/auth come from the matching
* `registry_providers` entry. The canvas can render no provider/model the
* registry did not serve ("only registered selectable"), and the billing-mode
* shown reflects the derived provider rather than a hardcoded rule.
*
* A provider with no served model is omitted (no empty buckets). Models whose
* `provider` doesn't match a registry_providers entry still get a bucket
* keyed by the raw provider name (defensive — should not happen for a
* well-formed registry payload), so a model is never silently dropped. */
export function buildProviderCatalogFromRegistry(
registryProviders: RegistryProvider[],
registryModels: RegistryModel[],
): ProviderEntry[] {
const byName = new Map<string, RegistryProvider>();
for (const p of registryProviders) byName.set(p.name, p);
// Bucket models by their derived provider name, preserving registry order.
const buckets = new Map<string, ProviderEntry>();
for (const m of registryModels) {
const vendor = (m.provider ?? "").trim();
if (!vendor) continue; // un-annotated registry model — skip from the
// provider cascade (selectable elsewhere via free-text); it has no
// derived provider to bucket under.
const meta = byName.get(vendor);
const wildcard = m.id.includes("*");
let entry = buckets.get(vendor);
if (!entry) {
entry = {
id: `registry|${vendor}`,
vendor,
label: meta?.display_name || vendor,
envVars: meta?.auth_env ?? [],
models: [],
wildcard,
billingMode: meta?.billing_mode ?? m.billing_mode,
tooltip: VENDOR_TOOLTIPS[vendor],
};
buckets.set(vendor, entry);
}
entry.models.push({ id: m.id, name: m.name, provider: vendor });
entry.wildcard = entry.wildcard || wildcard;
}
// Decorate label with model-count when ≥2 concrete models share the bucket,
// matching buildProviderCatalog's UX.
for (const e of buckets.values()) {
if (!e.wildcard && e.models.length > 1) {
e.label = `${e.label} (${e.models.length} models)`;
}
}
return Array.from(buckets.values());
}
/** Find the provider entry that contains a given model id. Used by
* callers to back-derive the provider when only the model is known
* (e.g. ConfigTab loading from saved state). */
@@ -283,6 +377,7 @@ export function ProviderModelSelector({
models,
value,
onChange,
catalog: catalogProp,
variant = "stack",
allowCustomModelEscape = false,
disabled = false,
@@ -293,7 +388,12 @@ export function ProviderModelSelector({
const providerSelectId = `${baseId}-provider`;
const modelSelectId = `${baseId}-model`;
const catalog = useMemo(() => buildProviderCatalog(models), [models]);
// Registry-backed path (internal#718 P3): use the parent-supplied catalog
// verbatim; otherwise re-infer one from `models` via the legacy heuristic.
const catalog = useMemo(
() => catalogProp ?? buildProviderCatalog(models),
[catalogProp, models],
);
const selected = useMemo(
() => catalog.find((p) => p.id === value.providerId) ?? null,
[catalog, value.providerId],
@@ -1,411 +1,82 @@
// @vitest-environment jsdom
/**
* Tests for BudgetSection (issue #541).
* Focused tests for BudgetSection's PER-PERIOD progress-bar math + aria (#49).
*
* Covers:
* - Loading state
* - Stats row: used / limit, "Unlimited" when null
* - Progress bar: correct percentage, capped at 100%, absent when no limit
* - Budget remaining text
* - Input pre-fill (existing limit / blank when null)
* - Save: PATCH with number, PATCH with null (blank input)
* - 402 on GET → exceeded banner, no fetch-error text
* - 402 on PATCH → exceeded banner
* - Non-402 fetch error → error text
* - Non-402 save error → save error alert
* - Section header and subheading
* - Fetch error does not show stats
* Behavioral coverage (loading, save, 402 banners, USD formatting, legacy
* back-compat) lives in tabs/__tests__/BudgetSection.test.tsx — this file
* deliberately covers only the per-period progress percentage + aria-valuenow
* + the over-budget colouring, which that suite doesn't assert in detail. Kept
* separate to avoid duplicating the behavioral suite (one component, no
* parallel/identical suites).
*/
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import {
render,
screen,
fireEvent,
waitFor,
cleanup,
act,
} from "@testing-library/react";
// ── Mock api ──────────────────────────────────────────────────────────────────
import { render, screen, waitFor, cleanup } from "@testing-library/react";
vi.mock("@/lib/api", () => ({
api: {
get: vi.fn(),
patch: vi.fn(),
},
api: { get: vi.fn(), patch: vi.fn() },
}));
import { api } from "@/lib/api";
import { BudgetSection } from "../tabs/BudgetSection";
const mockGet = vi.mocked(api.get);
const mockPatch = vi.mocked(api.patch);
// ── Helpers ───────────────────────────────────────────────────────────────────
type P = { limit: number | null; spend: number; remaining: number | null };
function budgetResponse(overrides: Partial<{
budget_limit: number | null;
budget_used: number;
budget_remaining: number | null;
}> = {}) {
// Build a periods response where the named period has the given limit/spend.
function withMonthly(limit: number | null, spend: number) {
const blank: P = { limit: null, spend: 0, remaining: null };
const monthly: P = { limit, spend, remaining: limit == null ? null : limit - spend };
return {
budget_limit: 1000,
budget_used: 250,
budget_remaining: 750,
...overrides,
periods: { hourly: blank, daily: blank, weekly: blank, monthly },
budget_limit: limit,
monthly_spend: spend,
budget_remaining: monthly.remaining,
};
}
function make402Error(): Error {
return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
}
beforeEach(() => vi.clearAllMocks());
afterEach(() => cleanup());
function make402PatchError(): Error {
return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
}
function makeGenericError(msg = "network timeout"): Error {
return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
}
beforeEach(() => {
vi.clearAllMocks();
});
afterEach(() => {
cleanup();
});
// ── Rendering helpers ─────────────────────────────────────────────────────────
async function renderLoaded(budgetData = budgetResponse()) {
async function renderLoaded(data: unknown) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetData as any);
mockGet.mockResolvedValueOnce(data as any);
render(<BudgetSection workspaceId="ws-1" />);
// Wait for loading to finish
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
}
// ── Loading state ─────────────────────────────────────────────────────────────
describe("BudgetSection — loading state", () => {
it("shows loading indicator while fetch is in flight", () => {
// Never resolve
mockGet.mockReturnValue(new Promise(() => {}));
render(<BudgetSection workspaceId="ws-1" />);
expect(screen.getByTestId("budget-loading")).toBeTruthy();
expect(screen.getByText("Loading…")).toBeTruthy();
describe("BudgetSection — per-period progress bar", () => {
it("renders the bar for a limited period and omits it for an unlimited one", async () => {
await renderLoaded(withMonthly(1000, 250));
expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
expect(screen.queryByTestId("budget-hourly-fill")).toBeNull(); // hourly unlimited
});
it("hides loading indicator after fetch resolves", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetResponse() as any);
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
});
});
// ── Section header ────────────────────────────────────────────────────────────
describe("BudgetSection — header and subheading", () => {
it("renders 'Budget' as the section heading", async () => {
await renderLoaded();
expect(screen.getByText("Budget")).toBeTruthy();
});
it("renders the subheading 'Limit total message credits for this workspace'", async () => {
await renderLoaded();
expect(
screen.getByText("Limit total message credits for this workspace")
).toBeTruthy();
});
it("renders 'Budget limit (credits)' label for the input", async () => {
await renderLoaded();
expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
});
});
// ── Stats row ─────────────────────────────────────────────────────────────────
describe("BudgetSection — stats row", () => {
it("shows budget_used in the stats row", async () => {
await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
});
it("shows budget_limit in the stats row", async () => {
await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
});
it("shows 'Unlimited' when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
});
it("shows budget_remaining when present", async () => {
await renderLoaded(budgetResponse({ budget_remaining: 750 }));
expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
});
it("hides budget_remaining row when null", async () => {
await renderLoaded(budgetResponse({ budget_remaining: null }));
expect(screen.queryByTestId("budget-remaining")).toBeNull();
});
it("does not crash when budget_used is missing from the response", async () => {
// Backend for a provisioning-stuck workspace may return a partial
// shape. Regression: previously this threw
// "Cannot read properties of undefined (reading 'toLocaleString')"
// and crashed the whole Details tab.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
expect(screen.getByTestId("budget-used-value").textContent).toBe("0");
});
});
// ── Progress bar ──────────────────────────────────────────────────────────────
describe("BudgetSection — progress bar", () => {
it("renders the progress bar when budget_limit is set", async () => {
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
expect(screen.getByRole("progressbar")).toBeTruthy();
});
it("does NOT render progress bar when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
expect(screen.queryByRole("progressbar")).toBeNull();
});
it("fills to the correct percentage (25%)", async () => {
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("25%");
});
it("fills to the correct percentage (50%)", async () => {
await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("50%");
});
it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("100%");
});
it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
const bar = screen.getByRole("progressbar");
expect(bar.getAttribute("aria-valuenow")).toBe("30");
});
it("shows 0% progress bar when budget_used is absent from the response", async () => {
// Regression: budget_used is optional (provisioning-stuck workspaces return
// partial shapes). Without the `?? 0` guard the progressPct calculation
// throws a TypeScript strict-null error and the build fails.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
const bar = screen.getByRole("progressbar");
expect(bar.getAttribute("aria-valuenow")).toBe("0");
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
expect(fill.style.width).toBe("0%");
});
});
// ── Input pre-fill ────────────────────────────────────────────────────────────
describe("BudgetSection — input pre-fill", () => {
it("pre-fills input with existing budget_limit", async () => {
await renderLoaded(budgetResponse({ budget_limit: 500 }));
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
expect(input.value).toBe("500");
});
it("leaves input empty when budget_limit is null", async () => {
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
expect(input.value).toBe("");
});
});
// ── Save — PATCH calls ────────────────────────────────────────────────────────
describe("BudgetSection — save", () => {
it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "800" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(800);
});
it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
// Regression for QA bug report: `parseInt("0") || null` would yield null.
// The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "0" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBe(0);
expect(body.budget_limit).not.toBeNull();
});
it("sends budget_limit: null when input is blank", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
expect(body.budget_limit).toBeNull();
});
it("updates displayed stats after successful save", async () => {
const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(updated as any);
await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "2000" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
);
});
it("shows save error message on non-402 PATCH failure", async () => {
mockPatch.mockRejectedValueOnce(
new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
);
await renderLoaded();
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-save-error")).toBeTruthy()
);
expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
});
});
// ── 402 handling ──────────────────────────────────────────────────────────────
describe("BudgetSection — 402 handling", () => {
it("shows exceeded banner when GET returns 402", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
});
it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.queryByTestId("budget-loading")).toBeNull()
);
expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
});
it("shows exceeded banner when PATCH returns 402", async () => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockGet.mockResolvedValueOnce(budgetResponse() as any);
mockPatch.mockRejectedValueOnce(make402PatchError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
fireEvent.click(screen.getByTestId("budget-save-btn"));
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
// Should NOT also show the save-error alert
expect(screen.queryByTestId("budget-save-error")).toBeNull();
});
it("clears exceeded banner after a successful save", async () => {
mockGet.mockRejectedValueOnce(make402Error());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
);
// Now a successful PATCH (limit was raised)
const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPatch.mockResolvedValueOnce(updated as any);
await act(async () => {
fireEvent.change(screen.getByTestId("budget-limit-input"), {
target: { value: "5000" },
});
fireEvent.click(screen.getByTestId("budget-save-btn"));
});
await waitFor(() =>
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
);
});
});
// ── Non-402 fetch error ───────────────────────────────────────────────────────
describe("BudgetSection — non-402 fetch errors", () => {
it("shows fetch error text on non-402 GET failure", async () => {
mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() =>
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
);
expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
});
it("does NOT show stats row on fetch error", async () => {
mockGet.mockRejectedValueOnce(makeGenericError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
expect(screen.queryByTestId("budget-stats-row")).toBeNull();
});
it("does NOT show exceeded banner on non-402 fetch error", async () => {
mockGet.mockRejectedValueOnce(makeGenericError());
render(<BudgetSection workspaceId="ws-1" />);
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
it("fills to 25%", async () => {
await renderLoaded(withMonthly(1000, 250));
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("25%");
});
it("fills to 50%", async () => {
await renderLoaded(withMonthly(1000, 500));
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("50%");
});
it("caps fill at 100% when spend exceeds limit", async () => {
await renderLoaded(withMonthly(1000, 4000));
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("100%");
});
it("sets aria-valuenow to the computed percentage on the progressbar", async () => {
await renderLoaded(withMonthly(1000, 250));
const bars = screen.getAllByRole("progressbar");
// the monthly bar is the only one rendered (others unlimited)
expect(bars).toHaveLength(1);
expect(bars[0].getAttribute("aria-valuenow")).toBe("25");
});
it("shows a 0% bar when spend is 0 against a set limit", async () => {
await renderLoaded(withMonthly(1000, 0));
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("0%");
});
});
@@ -213,6 +213,7 @@ describe("CreateWorkspaceDialog", () => {
expect(runtimeTexts).toEqual([
"Claude Code",
"OpenAI Codex CLI",
"Google ADK",
"Hermes",
"OpenClaw",
]);
@@ -0,0 +1,110 @@
// @vitest-environment jsdom
//
// internal#718 P3 (retire-list #4) — when GET /templates serves a
// registry-backed selectable list (registry_providers + registry_models with
// display_name / billing_mode / derived provider), the canvas builds the
// provider catalog FROM that registry data instead of re-inferring vendor
// from model-id prefixes (VENDOR_LABELS / BARE_VENDOR_PATTERNS / inferVendor).
// The heuristic path stays only as the fallback for non-registry runtimes /
// older backends.
import { describe, it, expect } from "vitest";
import {
buildProviderCatalogFromRegistry,
type RegistryProvider,
type RegistryModel,
} from "../ProviderModelSelector";
// Mirrors the registry-served claude-code payload from GET /templates
// (registry_providers / registry_models). display_name + billing_mode come
// from the registry, NOT from the canvas VENDOR_LABELS map.
const CLAUDE_CODE_REGISTRY_PROVIDERS: RegistryProvider[] = [
{
name: "anthropic-oauth",
display_name: "Claude Code subscription",
auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"],
billing_mode: "byok",
},
{
name: "anthropic-api",
display_name: "Anthropic API",
auth_env: ["ANTHROPIC_API_KEY"],
billing_mode: "byok",
},
{
name: "platform",
display_name: "Platform",
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billing_mode: "platform_managed",
},
];
const CLAUDE_CODE_REGISTRY_MODELS: RegistryModel[] = [
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
{ id: "opus", provider: "anthropic-oauth", billing_mode: "byok" },
{ id: "claude-opus-4-7", provider: "anthropic-api", billing_mode: "byok" },
{ id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
];
describe("buildProviderCatalogFromRegistry", () => {
it("buckets models by their DERIVED registry provider, not by inferred vendor", () => {
const catalog = buildProviderCatalogFromRegistry(
CLAUDE_CODE_REGISTRY_PROVIDERS,
CLAUDE_CODE_REGISTRY_MODELS,
);
const byVendor = new Map(catalog.map((p) => [p.vendor, p]));
// anthropic-oauth bucket holds the two OAuth-derived models.
const oauth = byVendor.get("anthropic-oauth");
expect(oauth).toBeDefined();
expect(oauth!.models.map((m) => m.id).sort()).toEqual(["opus", "sonnet"]);
// platform bucket holds the platform-namespaced model.
const platform = byVendor.get("platform");
expect(platform).toBeDefined();
expect(platform!.models.map((m) => m.id)).toEqual(["anthropic/claude-opus-4-7"]);
});
it("labels providers from the registry display_name, not VENDOR_LABELS", () => {
const catalog = buildProviderCatalogFromRegistry(
CLAUDE_CODE_REGISTRY_PROVIDERS,
CLAUDE_CODE_REGISTRY_MODELS,
);
const oauth = catalog.find((p) => p.vendor === "anthropic-oauth");
// Registry display_name "Claude Code subscription" (decorated with the
// model count by the catalog builder is acceptable; assert it carries the
// registry label, not an inferred one).
expect(oauth!.label).toContain("Claude Code subscription");
});
it("carries the registry billing_mode per provider", () => {
const catalog = buildProviderCatalogFromRegistry(
CLAUDE_CODE_REGISTRY_PROVIDERS,
CLAUDE_CODE_REGISTRY_MODELS,
);
expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.billingMode).toBe("byok");
expect(catalog.find((p) => p.vendor === "platform")!.billingMode).toBe("platform_managed");
});
it("surfaces the registry auth_env on the provider entry", () => {
const catalog = buildProviderCatalogFromRegistry(
CLAUDE_CODE_REGISTRY_PROVIDERS,
CLAUDE_CODE_REGISTRY_MODELS,
);
expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.envVars).toEqual([
"CLAUDE_CODE_OAUTH_TOKEN",
]);
});
it("only includes providers that actually have at least one served model", () => {
// anthropic-api is a registry provider but has no model in this slice →
// it should not appear as an empty bucket.
const models: RegistryModel[] = [
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
];
const catalog = buildProviderCatalogFromRegistry(
CLAUDE_CODE_REGISTRY_PROVIDERS,
models,
);
expect(catalog.map((p) => p.vendor)).toEqual(["anthropic-oauth"]);
});
});
+158 -137
View File
@@ -7,10 +7,28 @@ import { api } from "@/lib/api";
// Types
// ---------------------------------------------------------------------------
// Period keys MUST match the server SSOT (workspace-server budget_periods.go).
type BudgetPeriod = "hourly" | "daily" | "weekly" | "monthly";
const PERIODS: { key: BudgetPeriod; label: string }[] = [
{ key: "hourly", label: "Hourly" },
{ key: "daily", label: "Daily" },
{ key: "weekly", label: "Weekly" },
{ key: "monthly", label: "Monthly" },
];
interface PeriodBudget {
limit: number | null; // USD cents; null = no limit
spend: number; // rolling-window spend, USD cents
remaining: number | null; // null when no limit
}
interface BudgetData {
budget_limit: number | null;
budget_used?: number; // optional — provisioning-stuck workspaces return partial shapes
budget_remaining: number | null;
periods?: Partial<Record<BudgetPeriod, PeriodBudget>>;
// legacy fields (pre-multi-period server) — tolerated for back-compat
budget_limit?: number | null;
monthly_spend?: number;
budget_remaining?: number | null;
}
interface Props {
@@ -26,31 +44,71 @@ function isApiError402(e: unknown): boolean {
return e instanceof Error && /: 402( |$)/.test(e.message);
}
/** USD cents → "$X.XX". */
function fmtUSD(cents: number): string {
return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
}
/** Normalize the server payload (multi-period or legacy) into a period map. */
function periodsFrom(data: BudgetData | null): Record<BudgetPeriod, PeriodBudget> {
const base: Record<BudgetPeriod, PeriodBudget> = {
hourly: { limit: null, spend: 0, remaining: null },
daily: { limit: null, spend: 0, remaining: null },
weekly: { limit: null, spend: 0, remaining: null },
monthly: { limit: null, spend: 0, remaining: null },
};
if (!data) return base;
if (data.periods) {
for (const { key } of PERIODS) {
const p = data.periods[key];
if (p) base[key] = { limit: p.limit ?? null, spend: p.spend ?? 0, remaining: p.remaining ?? null };
}
return base;
}
// legacy: map the single monthly limit/spend
base.monthly = {
limit: data.budget_limit ?? null,
spend: data.monthly_spend ?? 0,
remaining: data.budget_remaining ?? null,
};
return base;
}
// ---------------------------------------------------------------------------
// Component
// ---------------------------------------------------------------------------
/**
* BudgetSection — dedicated "Budget" section in the workspace details panel.
*
* - Fetches GET /workspaces/:id/budget on mount for live usage stats
* - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
* - Allows updating budget_limit via PATCH /workspaces/:id/budget
* - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
* BudgetSection — per-workspace LLM budget, four independent rolling windows
* (hourly / daily / weekly / monthly). Each period has its own ceiling (USD);
* spend is the rolling-window LLM cost. Crossing ANY period blocks new work
* (server returns 402). Sends PATCH {budget_limits:{period:cents|null}}.
*/
export function BudgetSection({ workspaceId }: Props) {
const [budget, setBudget] = useState<BudgetData | null>(null);
const [loading, setLoading] = useState(true);
const [fetchError, setFetchError] = useState<string | null>(null);
const [limitInput, setLimitInput] = useState("");
// One input per period, in USD cents (string for controlled inputs).
const [limitInputs, setLimitInputs] = useState<Record<BudgetPeriod, string>>({
hourly: "",
daily: "",
weekly: "",
monthly: "",
});
const [saving, setSaving] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
/** True when a 402 has been seen from any API call in this section. */
const [budgetExceeded, setBudgetExceeded] = useState(false);
// ── Fetch current budget data ─────────────────────────────────────────────
const syncInputs = useCallback((data: BudgetData | null) => {
const p = periodsFrom(data);
setLimitInputs({
hourly: p.hourly.limit != null ? String(p.hourly.limit) : "",
daily: p.daily.limit != null ? String(p.daily.limit) : "",
weekly: p.weekly.limit != null ? String(p.weekly.limit) : "",
monthly: p.monthly.limit != null ? String(p.monthly.limit) : "",
});
}, []);
const loadBudget = useCallback(async () => {
setLoading(true);
@@ -58,7 +116,7 @@ export function BudgetSection({ workspaceId }: Props) {
try {
const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
setBudget(data);
setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
syncInputs(data);
} catch (e) {
if (isApiError402(e)) {
setBudgetExceeded(true);
@@ -68,29 +126,30 @@ export function BudgetSection({ workspaceId }: Props) {
} finally {
setLoading(false);
}
}, [workspaceId]);
}, [workspaceId, syncInputs]);
useEffect(() => {
loadBudget();
}, [loadBudget]);
// ── Save handler ──────────────────────────────────────────────────────────
const handleSave = async () => {
setSaving(true);
setSaveError(null);
const raw = limitInput.trim();
// Use explicit empty-string check (not falsy check) so that a
// user-entered "0" is sent as budget_limit: 0, not null (unlimited).
const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
// Build the per-period map: blank → null (clear); a number → that ceiling.
const budget_limits: Record<BudgetPeriod, number | null> = {
hourly: null,
daily: null,
weekly: null,
monthly: null,
};
for (const { key } of PERIODS) {
const raw = limitInputs[key].trim();
budget_limits[key] = raw !== "" ? parseInt(raw, 10) : null;
}
try {
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
budget_limit: parsedLimit,
});
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, { budget_limits });
setBudget(updated);
setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
// Clear exceeded state if the save succeeded (limit was raised or removed)
syncInputs(updated);
setBudgetExceeded(false);
} catch (e) {
if (isApiError402(e)) {
@@ -103,24 +162,15 @@ export function BudgetSection({ workspaceId }: Props) {
}
};
// ── Progress calculation ──────────────────────────────────────────────────
const progressPct =
budget && budget.budget_limit != null && budget.budget_limit > 0
? Math.min(100, Math.round(((budget.budget_used ?? 0) / budget.budget_limit) * 100))
: 0;
// ── Render ────────────────────────────────────────────────────────────────
const periods = periodsFrom(budget);
return (
<div className="space-y-3" data-testid="budget-section">
{/* Section header */}
<div>
<h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">
Budget
</h3>
<h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">Budget</h3>
<p className="text-[11px] text-ink-mid mt-0.5">
Limit total message credits for this workspace
Cap LLM spend for this workspace per period crossing any limit pauses new work
</p>
</div>
@@ -131,32 +181,14 @@ export function BudgetSection({ workspaceId }: Props) {
data-testid="budget-exceeded-banner"
className="flex items-center gap-2 px-3 py-2 rounded-lg bg-surface border border-amber-700/50 text-warm text-xs font-medium"
>
<svg
width="13"
height="13"
viewBox="0 0 13 13"
fill="none"
aria-hidden="true"
className="shrink-0"
>
<path
d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
stroke="currentColor"
strokeWidth="1.4"
strokeLinejoin="round"
/>
<path
d="M6.5 5.5V7.5M6.5 9.5h.01"
stroke="currentColor"
strokeWidth="1.4"
strokeLinecap="round"
/>
<svg width="13" height="13" viewBox="0 0 13 13" fill="none" aria-hidden="true" className="shrink-0">
<path d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z" stroke="currentColor" strokeWidth="1.4" strokeLinejoin="round" />
<path d="M6.5 5.5V7.5M6.5 9.5h.01" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" />
</svg>
Budget exceeded messages blocked
Budget exceeded new work paused
</div>
)}
{/* Usage stats */}
{loading ? (
<p className="text-xs text-ink-mid" data-testid="budget-loading">
Loading
@@ -165,89 +197,78 @@ export function BudgetSection({ workspaceId }: Props) {
<p className="text-xs text-bad" data-testid="budget-fetch-error">
{fetchError}
</p>
) : budget ? (
<div className="space-y-2">
{/* Stats row */}
<div className="flex items-baseline justify-between" data-testid="budget-stats-row">
<span className="text-xs text-ink-mid">Credits used</span>
<span className="text-xs font-mono text-ink-mid">
<span data-testid="budget-used-value">{(budget.budget_used ?? 0).toLocaleString()}</span>
<span className="text-ink-mid mx-1">/</span>
<span data-testid="budget-limit-value">
{budget.budget_limit != null
? budget.budget_limit.toLocaleString()
: "Unlimited"}
</span>
</span>
</div>
) : (
<div className="space-y-3">
{PERIODS.map(({ key, label }) => {
const p = periods[key];
const pct =
p.limit != null && p.limit > 0 ? Math.min(100, Math.round((p.spend / p.limit) * 100)) : 0;
const over = p.limit != null && p.spend >= p.limit;
return (
<div key={key} className="space-y-1" data-testid={`budget-period-${key}`}>
<div className="flex items-baseline justify-between">
<label htmlFor={`budget-${key}-${workspaceId}`} className="text-xs text-ink-mid">
{label}
</label>
<span className="text-[11px] font-mono text-ink-mid">
<span data-testid={`budget-${key}-spend`}>{fmtUSD(p.spend)}</span>
<span className="mx-1">/</span>
<span data-testid={`budget-${key}-limit`}>{p.limit != null ? fmtUSD(p.limit) : "∞"}</span>
</span>
</div>
{p.limit != null && (
<div
role="progressbar"
aria-label={`${label} budget usage`}
aria-valuenow={pct}
aria-valuemin={0}
aria-valuemax={100}
className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
>
<div
data-testid={`budget-${key}-fill`}
className={`h-full rounded-full transition-all duration-300 ${over ? "bg-bad" : "bg-accent"}`}
style={{ width: `${pct}%` }}
/>
</div>
)}
<input
id={`budget-${key}-${workspaceId}`}
type="number"
min="0"
step="1"
value={limitInputs[key]}
onChange={(e) => setLimitInputs((s) => ({ ...s, [key]: e.target.value }))}
placeholder="USD cents — blank for unlimited"
data-testid={`budget-${key}-input`}
className="w-full bg-surface-card border border-line rounded-lg px-3 py-1.5 text-xs text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
/>
</div>
);
})}
{/* Progress bar (only when limit is set) */}
{budget.budget_limit != null && (
<p className="text-[11px] text-ink-mid">Limits are USD cents (e.g. 500 = $5.00). Blank = unlimited.</p>
{saveError && (
<div
role="progressbar"
aria-label="Budget usage"
aria-valuenow={progressPct}
aria-valuemin={0}
aria-valuemax={100}
className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
role="alert"
data-testid="budget-save-error"
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
>
<div
data-testid="budget-progress-fill"
className="h-full rounded-full bg-accent transition-all duration-300"
style={{ width: `${progressPct}%` }}
/>
{saveError}
</div>
)}
{/* Remaining credits */}
{budget.budget_remaining != null && (
<p className="text-[11px] text-ink-mid" data-testid="budget-remaining">
{budget.budget_remaining.toLocaleString()} credits remaining
</p>
)}
</div>
) : null}
{/* Input + Save */}
<div className="space-y-1.5 pt-1">
<label
htmlFor={`budget-limit-input-${workspaceId}`}
className="text-[11px] text-ink-mid block"
>
Budget limit (credits)
</label>
<input
id={`budget-limit-input-${workspaceId}`}
type="number"
min="0"
step="1"
value={limitInput}
onChange={(e) => setLimitInput(e.target.value)}
placeholder="e.g. 1000 — blank for unlimited"
data-testid="budget-limit-input"
className="w-full bg-surface-card border border-line rounded-lg px-3 py-2 text-sm text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
/>
<p className="text-xs text-ink-mid">Leave blank for unlimited</p>
{saveError && (
<div
role="alert"
data-testid="budget-save-error"
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
<button
onClick={handleSave}
disabled={saving}
data-testid="budget-save-btn"
className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
>
{saveError}
</div>
)}
<button
onClick={handleSave}
disabled={saving}
data-testid="budget-save-btn"
className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
>
{saving ? "Saving…" : "Save"}
</button>
</div>
{saving ? "Saving…" : "Save"}
</button>
</div>
)}
</div>
);
}
+184 -97
View File
@@ -11,8 +11,12 @@ import { ExternalConnectionSection } from "./ExternalConnectionSection";
import {
ProviderModelSelector,
buildProviderCatalog,
buildProviderCatalogFromRegistry,
findProviderForModel,
type SelectorValue,
type ProviderEntry,
type RegistryProvider,
type RegistryModel,
} from "../ProviderModelSelector";
import { isExternalLikeRuntime } from "@/lib/externalRuntimes";
@@ -258,6 +262,17 @@ interface RuntimeOption {
// canvas falls back to deriving unique vendor prefixes from
// models[].id (still adapter-driven, just inferred).
providers: string[];
// registryBacked / registryProviders / registryModels come from the
// registry-served GET /templates fields (internal#718 P3). When
// registryBacked is true, the selectable provider+model list is built from
// the registry (registryProviders/registryModels) — display labels +
// billing mode + derived provider come from the provider-registry SSOT, not
// the canvas VENDOR_LABELS / billingModeForProvider vocabularies. When
// false (non-registry runtime / older backend), the canvas falls back to
// the template-served models[] + its inferVendor heuristic.
registryBacked: boolean;
registryProviders: RegistryProvider[];
registryModels: RegistryModel[];
}
// deriveProvidersFromModels — when a template doesn't ship an explicit
@@ -322,6 +337,32 @@ export function billingModeForProvider(provider: string): LLMBillingMode {
return "byok";
}
// billingModeForSelectedProvider — internal#718 P3 (retire-list #5): the
// billing mode the Config tab shows/sends for the selected PROVIDER, sourced
// from the registry-served catalog when available rather than the hardcoded
// billingModeForProvider rule.
//
// When the runtime is registry-backed, GET /templates serves each provider's
// DERIVED billing_mode (platform_managed for the closed platform provider,
// byok otherwise) on the ProviderEntry. We read it off the catalog so the UI
// reflects the registry SSOT — the same predicate billing/credential emission
// keys off the derived provider.
//
// Falls back to billingModeForProvider when: no catalog (non-registry runtime
// / older backend), or the provider string isn't carried by the catalog
// (e.g. a stale saved value). The fallback keeps the legacy behavior intact
// for everything the registry doesn't yet speak to.
export function billingModeForSelectedProvider(
provider: string,
catalog?: ProviderEntry[],
): LLMBillingMode {
if (catalog && catalog.length > 0) {
const entry = catalog.find((p) => p.vendor === provider.trim());
if (entry?.billingMode) return entry.billingMode;
}
return billingModeForProvider(provider);
}
// Fallback used when /templates can't be fetched (offline, older backend).
// Keep in sync with manifest.json workspace_templates as a defensive default.
// Model + env suggestions only flow when the backend is reachable.
@@ -336,13 +377,20 @@ export function billingModeForProvider(provider: string): LLMBillingMode {
// config.yaml` on the container is a separate runtime-internal file,
// not this one.
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli", "openclaw"]);
const SUPPORTED_RUNTIME_VALUES = new Set(["claude-code", "codex", "openclaw", "hermes"]);
// The runtime picker is SSOT-driven: options come from GET /templates,
// which workspace-server already gates to the manifest.json maintained set
// (loadRuntimesFromManifest). A hand-maintained frontend allowlist silently
// dropped runtimes the backend added (google-adk shipped in manifest but was
// filtered out, so its workspaces rendered the wrong default option). A
// template may still opt OUT of the picker via `displayable: false` on its
// /templates row. See project_canvas_runtime_dropdown_ssot_fix.
const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
{ value: "claude-code", label: "Claude Code", models: [], providers: [] },
{ value: "codex", label: "Codex", models: [], providers: [] },
{ value: "openclaw", label: "OpenClaw", models: [], providers: [] },
{ value: "hermes", label: "Hermes", models: [], providers: [] },
{ value: "claude-code", label: "Claude Code", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
{ value: "codex", label: "Codex", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
{ value: "google-adk", label: "Google ADK", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
{ value: "openclaw", label: "OpenClaw", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
{ value: "hermes", label: "Hermes", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
];
export function ConfigTab({ workspaceId }: Props) {
@@ -355,15 +403,24 @@ export function ConfigTab({ workspaceId }: Props) {
const [rawMode, setRawMode] = useState(false);
const [rawDraft, setRawDraft] = useState("");
const [runtimeOptions, setRuntimeOptions] = useState<RuntimeOption[]>(FALLBACK_RUNTIME_OPTIONS);
// Provider override (Option B PR-5): stored separately from config.yaml
// because the value lives in workspace_secrets (encrypted), not in the
// platform-managed config.yaml. The two endpoints are GET/PUT
// /workspaces/:id/provider on workspace-server (handlers/secrets.go).
// Empty = "auto-derive from model slug prefix" — pre-Option-B behavior
// and what most users want. Setting to a non-empty value writes
// LLM_PROVIDER into workspace_secrets and triggers an auto-restart so
// the workspace boots with the new provider in env (and via CP user-
// data, written into /configs/config.yaml on next provision too).
// internal#718 P4 closure: the explicit provider override
// (LLM_PROVIDER workspace_secret, surfaced via GET/PUT
// /workspaces/:id/provider) has been RETIRED. The provider is
// derived at every decision point from (runtime, model) via the
// registry — no stored row remains. The `provider` / `originalProvider`
// state and the provider dropdown survive in this component for
// backwards-compat (display only) but are no longer persisted:
// - loadConfig no longer GETs /workspaces/:id/provider (the
// endpoint returns 410 Gone). The state initializes to ""
// and stays there.
// - handleSave no longer PUTs /workspaces/:id/provider.
// - The dropdown still updates the local `provider` state so the
// user can preview the derived value; the value never leaves
// the browser.
// This is the canvas-side complement to the backend retirement of
// SetProvider/GetProvider/setProviderSecret. Older canvases that
// still call PUT /provider hit the 410 Gone with a structured
// PROVIDER_ENDPOINT_RETIRED code — loud failure, no silent miss.
const [provider, setProvider] = useState("");
const [originalProvider, setOriginalProvider] = useState("");
// Track the model the form first rendered, so handleSave can detect
@@ -414,26 +471,23 @@ export function ConfigTab({ workspaceId }: Props) {
//
// See GH #1894 for the workspace-row-as-source-of-truth rationale
// that motivated splitting from a single config.yaml read.
const [wsRes, modelRes, providerRes] = await Promise.all([
// internal#718 P4 closure: the GET /workspaces/:id/provider leg is
// RETIRED — the endpoint returns 410 Gone. Provider is now derived
// from (runtime, model) via the registry; no stored value exists
// to load. Always seed the local state to "" so the dropdown
// initializes to "auto-derive".
const [wsRes, modelRes] = await Promise.all([
api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`)
.catch(() => ({} as { runtime?: string; tier?: number })),
api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`)
.catch(() => ({} as { model?: string })),
api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`)
.catch(() => null),
]);
const wsMetadataRuntime = (wsRes.runtime || "").trim();
const wsMetadataModel = (modelRes.model || "").trim();
const wsMetadataTier: number | null =
typeof wsRes.tier === "number" ? wsRes.tier : null;
if (providerRes !== null) {
const loadedProvider = (providerRes.provider || "").trim();
setProvider(loadedProvider);
setOriginalProvider(loadedProvider);
} else {
setProvider("");
setOriginalProvider("");
}
setProvider("");
setOriginalProvider("");
// originalModel is set further down once the YAML has been parsed —
// we want it to reflect what the form ACTUALLY rendered, which may
// be the YAML's runtime_config.model fallback when MODEL_PROVIDER
@@ -527,20 +581,49 @@ export function ConfigTab({ workspaceId }: Props) {
useEffect(() => {
let cancelled = false;
api.get<Array<{ id: string; name?: string; runtime?: string; models?: ModelSpec[]; providers?: string[] }>>("/templates")
api.get<Array<{
id: string;
name?: string;
runtime?: string;
models?: ModelSpec[];
providers?: string[];
// internal#718 P3 registry-served fields (additive; absent on older
// backends and for non-registry runtimes).
registry_backed?: boolean;
registry_providers?: RegistryProvider[];
registry_models?: RegistryModel[];
displayable?: boolean;
}>>("/templates")
.then((rows) => {
if (cancelled || !Array.isArray(rows)) return;
const byRuntime = new Map<string, RuntimeOption>();
for (const r of rows) {
const v = (r.runtime || "").trim();
if (!SUPPORTED_RUNTIME_VALUES.has(v)) continue;
if (!v) continue;
// Honor an explicit opt-out; absent/true means show it.
if (r.displayable === false) continue;
// Last template wins if two templates share a runtime — rare, and the
// one with the richer models list is probably newer.
const existing = byRuntime.get(v);
const models = Array.isArray(r.models) ? r.models : [];
const providers = Array.isArray(r.providers) ? r.providers : [];
if (!existing || models.length > existing.models.length) {
byRuntime.set(v, { value: v, label: r.name || v, models, providers });
const registryProviders = Array.isArray(r.registry_providers) ? r.registry_providers : [];
const registryModels = Array.isArray(r.registry_models) ? r.registry_models : [];
const registryBacked = r.registry_backed === true && registryModels.length > 0;
// Prefer the richer payload: a registry-backed entry, then more
// template models. Keeps the "last/richer template wins" intent.
const score = (o: RuntimeOption) => (o.registryBacked ? 1000 : 0) + o.models.length;
const candidate: RuntimeOption = {
value: v,
label: r.name || v,
models,
providers,
registryBacked,
registryProviders,
registryModels,
};
if (!existing || score(candidate) > score(existing)) {
byRuntime.set(v, candidate);
}
}
if (byRuntime.size > 0) setRuntimeOptions(Array.from(byRuntime.values()));
@@ -551,7 +634,13 @@ export function ConfigTab({ workspaceId }: Props) {
// Models + env hints for the currently-selected runtime.
const selectedRuntime = runtimeOptions.find((o) => o.value === (config.runtime || "")) ?? null;
const availableModels: ModelSpec[] = selectedRuntime?.models ?? [];
// Memoised so its identity is stable across renders — it feeds several
// useMemo dependency arrays below (registry/legacy catalog, selector models)
// and a fresh `[]` literal each render would defeat their memoisation.
const availableModels: ModelSpec[] = useMemo(
() => selectedRuntime?.models ?? [],
[selectedRuntime?.models],
);
// Provider suggestions for the legacy free-text input fallback (used
// when /templates returned no models for this runtime, e.g. hermes
// workspaces). Prefer the runtime's declarative providers list,
@@ -565,9 +654,37 @@ export function ConfigTab({ workspaceId }: Props) {
// Vendor-aware catalog shared with the selector. Memoised so the
// catalog identity is stable across renders (selector relies on it).
//
// internal#718 P3: when the runtime is registry-backed, build the catalog
// FROM the registry-served providers/models (display labels + billing +
// derived provider from the provider-registry SSOT) instead of re-inferring
// vendor from model-id prefixes. Falls back to the inferVendor heuristic
// for non-registry runtimes / older backends.
const registryBacked = selectedRuntime?.registryBacked ?? false;
const providerCatalog = useMemo(
() => buildProviderCatalog(availableModels),
[availableModels],
() =>
registryBacked
? buildProviderCatalogFromRegistry(
selectedRuntime?.registryProviders ?? [],
selectedRuntime?.registryModels ?? [],
)
: buildProviderCatalog(availableModels),
[registryBacked, selectedRuntime?.registryProviders, selectedRuntime?.registryModels, availableModels],
);
// Models fed to the selector dropdown: the registry-served native set for a
// registry-backed runtime (so the dropdown can render no unregistered
// option), else the template-served models.
const selectorModels: ModelSpec[] = useMemo(
() =>
registryBacked
? (selectedRuntime?.registryModels ?? []).map((m) => ({
id: m.id,
name: m.name,
// carry the derived provider so the selector buckets correctly
...(m.provider ? { provider: m.provider } : {}),
}))
: availableModels,
[registryBacked, selectedRuntime?.registryModels, availableModels],
);
// Derive the selector's current value from the form state. Provider
@@ -718,53 +835,27 @@ export function ConfigTab({ workspaceId }: Props) {
}
}
// Provider override save (Option B PR-5). PUT only when the user
// changed the dropdown — otherwise an unrelated Save (e.g. tier
// edit) would re-write the provider unchanged and the server-
// side auto-restart would fire on every Save, costing the user a
// ~30s reboot for a no-op change. Server endpoint accepts an
// empty string to clear the override (deletes the
// workspace_secrets row); we forward whatever the form holds.
let providerSaveError: string | null = null;
const providerChanged = provider !== originalProvider;
if (providerChanged) {
try {
await api.put(`/workspaces/${workspaceId}/provider`, { provider });
setOriginalProvider(provider);
} catch (e) {
providerSaveError = e instanceof Error ? e.message : "Provider update was rejected";
}
}
// internal#718 P4 closure: provider override save is RETIRED. The
// /workspaces/:id/provider endpoint returns 410 Gone; the provider
// is derived from (runtime, model) at every decision point via the
// registry. The local dropdown state still updates so the user can
// see the predicted provider, but it never round-trips to the
// server. Variables retained as locals (set to constants) so the
// downstream restart-suppress logic below has clear semantics
// and the diff against the prior shape stays small.
const providerSaveError: string | null = null;
const providerChanged = false;
// Provider → billing_mode linkage (internal#703 Gap 2). When the
// provider actually changed AND its implied billing_mode differs
// from the previously-selected provider's, push the new mode to
// the per-tenant llm-billing-mode endpoint (same path the LLM
// Billing section uses). Without this, selecting a non-Platform
// provider leaves billing_mode=platform_managed → CP keeps
// injecting the platform proxy → BYOK never takes.
//
// Gated on (a) the provider PUT having succeeded — no point setting
// byok if the credential write failed — and (b) the mode actually
// changing, so an unrelated provider tweak between two BYOK vendors
// (e.g. minimax → openrouter) doesn't re-issue a redundant
// platform_managed→byok PUT and trigger a needless restart.
let billingModeSaveError: string | null = null;
if (providerChanged && !providerSaveError) {
const nextMode = billingModeForProvider(provider);
const prevMode = billingModeForProvider(originalProvider);
if (nextMode !== prevMode) {
try {
await api.put(
`/admin/workspaces/${workspaceId}/llm-billing-mode`,
{ mode: nextMode },
);
} catch (e) {
billingModeSaveError =
e instanceof Error ? e.message : "Billing mode update was rejected";
}
}
}
// internal#718 P4 closure: provider → billing_mode linkage is also
// RETIRED. P2-B (#1972) moved the billing decision to
// ResolveLLMBillingModeDerived, which DERIVES the provider from
// (runtime, model) at every read. The canvas can no longer
// override it via a separate PUT, by design — the runtime+model
// selection IS the billing-mode selection. The
// /admin/workspaces/:id/llm-billing-mode endpoint still exists
// as the operator override surface (workspaces.llm_billing_mode
// column); it is no longer driven by the provider dropdown.
const billingModeSaveError: string | null = null;
setOriginalYaml(content);
if (rawMode) {
@@ -773,27 +864,22 @@ export function ConfigTab({ workspaceId }: Props) {
} else {
setRawDraft(content);
}
// SetProvider on the server already triggers an auto-restart for
// the workspace whenever the value actually changed (see
// workspace-server/internal/handlers/secrets.go:SetProvider). If
// the user also clicked Save+Restart we'd kick off a SECOND
// restart here and the two would race in the canvas store —
// suppress the redundant call and rely on the server-side one.
const providerWillAutoRestart = providerChanged && !providerSaveError;
// internal#718 P4 closure: providerWillAutoRestart is always
// false now (provider PUT is retired; no server-side auto-restart
// can fire). Save+Restart flows through the canvas store
// restart path the same way it did pre-#718 for non-provider
// edits.
const providerWillAutoRestart = providerChanged && !providerSaveError
if (restart && !providerWillAutoRestart) {
await useCanvasStore.getState().restartWorkspace(workspaceId);
} else if (!restart) {
useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: !providerWillAutoRestart });
}
// Aggregate partial-save errors. modelSaveError, providerSaveError,
// and billingModeSaveError describe rejected updates from
// independent endpoints — show whichever fired so the user knows
// which field reverts on next reload (otherwise they'd see "Saved"
// and be confused why Provider snapped back). The billing-mode case
// is the most important to surface: the provider credential saved
// but BYOK won't actually take until billing_mode flips, so a
// silent failure here is exactly the #703 "selecting a provider has
// no effect" symptom.
// Aggregate partial-save errors. With provider+billing-mode PUTs
// retired, only modelSaveError can fire from the secret-mint side
// — the provider/billing branches are dead code retained as
// constant nils to keep the diff small. They are surfaced
// defensively in case a future re-enablement needs the wiring.
const partialError = providerSaveError
? `Other fields saved, but provider update failed: ${providerSaveError}`
: billingModeSaveError
@@ -918,9 +1004,10 @@ export function ConfigTab({ workspaceId }: Props) {
— empty = "auto-derive from model slug" was the pre-PR-5
behavior; selecting any provider here writes LLM_PROVIDER
and triggers an auto-restart. */}
{availableModels.length > 0 ? (
{selectorModels.length > 0 ? (
<ProviderModelSelector
models={availableModels}
models={selectorModels}
catalog={registryBacked ? providerCatalog : undefined}
value={selectorValue}
onChange={(next) => {
setSelectorValue(next);
@@ -933,7 +1020,7 @@ export function ConfigTab({ workspaceId }: Props) {
setConfig((prev) => {
const v = next.model;
const prevModelId = prev.runtime_config?.model || prev.model || "";
const prevSpec = availableModels.find((m) => m.id === prevModelId) ?? null;
const prevSpec = selectorModels.find((m) => m.id === prevModelId) ?? null;
const prevRequired = prev.runtime_config?.required_env ?? [];
const wasTemplateDriven =
prevRequired.length === 0 ||
@@ -29,8 +29,15 @@ type FormState = {
displayMode: string;
displayProtocol: string;
resolution: string;
dataPersistence: string; // "" (auto) | "persist" | "ephemeral" — internal#734
};
// internal#734: per-workspace durable-data choice. "" = auto (desktop-control
// keeps data, others follow the org default). Human labels for the selector.
const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
const dataPersistenceLabel = (v: string): string =>
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
export function ContainerConfigTab({ workspaceId, data }: Props) {
const runtime = data.runtime;
const instanceType = data.compute?.instance_type;
@@ -39,9 +46,10 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
const displayProtocol = data.compute?.display?.protocol;
const displayWidth = data.compute?.display?.width;
const displayHeight = data.compute?.display?.height;
const dataPersistence = data.compute?.data_persistence;
const initial = useMemo(
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight }),
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight],
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
);
const [form, setForm] = useState<FormState>(initial);
const [saving, setSaving] = useState(false);
@@ -84,6 +92,8 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
display: form.displayEnabled
? { mode: form.displayMode, protocol: form.displayProtocol, width, height }
: { mode: "none" },
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
};
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
@@ -176,6 +186,18 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
onChange={(resolution) => setForm((s) => ({ ...s, resolution }))}
/>
)}
<SelectField
id="data-persistence"
label="Saved data (cookies, downloads, memory)"
value={form.dataPersistence}
options={DATA_PERSISTENCE_OPTIONS}
optionLabel={dataPersistenceLabel}
onChange={(dataPersistence) => setForm((s) => ({ ...s, dataPersistence }))}
/>
<p className="-mt-1 text-[10px] leading-snug text-ink-soft">
Whether this workspace&apos;s data survives a restart/recreate. Auto keeps it for
browser (desktop) workspaces; Ephemeral never keeps it (privacy).
</p>
</div>
<div className="mt-4 flex items-center justify-end gap-2">
@@ -231,6 +253,7 @@ function formFromData(data: {
displayProtocol?: string;
displayWidth?: number;
displayHeight?: number;
dataPersistence?: string;
}): FormState {
const width = data.displayWidth ?? 1920;
const height = data.displayHeight ?? 1080;
@@ -243,6 +266,7 @@ function formFromData(data: {
displayMode: data.displayMode && data.displayMode !== "none" ? data.displayMode : "desktop-control",
displayProtocol: data.displayProtocol || "novnc",
resolution,
dataPersistence: data.dataPersistence || "",
};
}
+19 -1
View File
@@ -29,6 +29,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
const [peers, setPeers] = useState<PeerData[]>([]);
const [saving, setSaving] = useState(false);
const [confirmDelete, setConfirmDelete] = useState(false);
const [eraseData, setEraseData] = useState(false); // internal#734: erase saved data on delete
const [peersError, setPeersError] = useState<string | null>(null);
const [saveError, setSaveError] = useState<string | null>(null);
const [deleteError, setDeleteError] = useState<string | null>(null);
@@ -93,7 +94,10 @@ export function DetailsTab({ workspaceId, data }: Props) {
const handleDelete = async () => {
setDeleteError(null);
try {
await api.del(`/workspaces/${workspaceId}?confirm=true`, {
// internal#734: erase_data=true asks the server to prune this workspace's
// durable data volume (cookies / downloads / memory). Default off keeps it
// for the orphan-sweeper grace.
await api.del(`/workspaces/${workspaceId}?confirm=true${eraseData ? "&erase_data=true" : ""}`, {
headers: { "X-Confirm-Name": name },
});
// Mirror the server-side cascade — drop the row + every
@@ -323,6 +327,19 @@ export function DetailsTab({ workspaceId, data }: Props) {
<h3 id="delete-confirm-title" className="text-xs font-medium text-bad">
Confirm deletion
</h3>
<label className="flex items-start gap-2 text-[11px] text-ink-mid">
<input
type="checkbox"
aria-label="Also erase saved data"
checked={eraseData}
onChange={(e) => setEraseData(e.target.checked)}
className="mt-0.5 h-3.5 w-3.5 accent-red-600"
/>
<span>
Also erase saved data (cookies, downloads, agent memory). Cannot be undone.
Unchecked keeps it recoverable briefly.
</span>
</label>
<div className="flex gap-2">
<button
type="button"
@@ -339,6 +356,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
onClick={() => {
setConfirmDelete(false);
setDeleteError(null);
setEraseData(false);
// Return focus to the trigger so keyboard users aren't stranded
deleteButtonRef.current?.focus();
}}
@@ -5,9 +5,10 @@ import React from "react";
import { BudgetSection } from "../BudgetSection";
import { api } from "@/lib/api";
// Queue-based mock for the api module. Each api call shifts from the queue.
// Tests push with qGet/qPatch and the module-level mockImplementation
// reads from the queue.
// Multi-period budget (#49): the API now returns a `periods` map
// (hourly/daily/weekly/monthly), each {limit, spend, remaining} in USD cents.
// The UI renders one row per period and PATCHes {budget_limits:{period:cents|null}}.
type QueueEntry = { body?: unknown; err?: Error };
const apiQueue: QueueEntry[] = [];
@@ -40,45 +41,49 @@ const WS_ID = "budget-test-ws";
function qGet(body: unknown) {
apiQueue.push({ body });
}
function qGetErr(status: number, msg: string) {
apiQueue.push({ err: new Error(`${msg}: ${status}`) });
}
function qPatch(body: unknown) {
apiQueue.push({ body });
}
function qPatchErr(status: number, msg: string) {
apiQueue.push({ err: new Error(`${msg}: ${status}`) });
}
function makeBudget(overrides: Partial<{
budget_limit: number | null;
budget_used: number;
budget_remaining: number | null;
}> = {}) {
type P = { limit: number | null; spend: number; remaining: number | null };
// makeBudget builds the periods response. Override any subset of periods.
function makeBudget(overrides: Partial<Record<"hourly" | "daily" | "weekly" | "monthly", Partial<P>>> = {}) {
const blank: P = { limit: null, spend: 0, remaining: null };
const mk = (o?: Partial<P>): P => {
const p = { ...blank, ...(o ?? {}) };
if (p.limit != null && p.remaining == null) p.remaining = p.limit - p.spend;
return p;
};
const periods = {
hourly: mk(overrides.hourly),
daily: mk(overrides.daily),
weekly: mk(overrides.weekly),
monthly: mk(overrides.monthly),
};
return {
budget_limit: 10_000,
budget_used: 3_500,
budget_remaining: 6_500,
...overrides,
periods,
budget_limit: periods.monthly.limit,
monthly_spend: periods.monthly.spend,
budget_remaining: periods.monthly.remaining,
};
}
describe("BudgetSection", () => {
describe("BudgetSection (multi-period)", () => {
describe("loading state", () => {
it("shows loading indicator while fetching", async () => {
let resolveGet: (v: unknown) => void;
vi.mocked(api.get).mockImplementationOnce(
async () => new Promise((r) => { resolveGet = r as (v: unknown) => void; }),
);
render(<BudgetSection workspaceId={WS_ID} />);
expect(screen.getByTestId("budget-loading")).toBeTruthy();
// Resolve after render to verify state clears
resolveGet!(makeBudget());
await vi.waitFor(() => {
expect(screen.queryByTestId("budget-loading")).toBeNull();
@@ -89,21 +94,16 @@ describe("BudgetSection", () => {
describe("fetch error state", () => {
it("shows error message on non-402 fetch failure", async () => {
qGetErr(500, "Internal Server Error");
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
});
expect(screen.getByTestId("budget-fetch-error")!.textContent).toContain("500");
});
it("shows 402 as exceeded banner, not fetch error", async () => {
// 402 means the budget limit was hit — different UX from a network/API error.
it("shows the exceeded banner (not a fetch error) on a 402", async () => {
qGetErr(402, "Payment Required");
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
});
@@ -111,220 +111,105 @@ describe("BudgetSection", () => {
});
});
describe("budget loaded — display", () => {
it("renders used / limit stats row", async () => {
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500 }));
describe("rendering periods", () => {
it("renders all four period rows", async () => {
qGet(makeBudget());
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-used-value")!.textContent).toBe("3,500");
});
expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("10,000");
});
it("renders 'Unlimited' when budget_limit is null", async () => {
qGet(makeBudget({ budget_limit: null, budget_used: 1_000, budget_remaining: null }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("Unlimited");
for (const k of ["hourly", "daily", "weekly", "monthly"]) {
expect(screen.getByTestId(`budget-period-${k}`)).toBeTruthy();
}
});
});
it("renders remaining credits when present", async () => {
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: 6_500 }));
it("formats spend and limit as USD per period", async () => {
qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-remaining")!.textContent).toContain("6,500");
expect(screen.getByTestId("budget-remaining")!.textContent).toContain("credits remaining");
expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$35.00");
});
expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$100.00");
});
it("shows ∞ for a period with no limit", async () => {
qGet(makeBudget({ hourly: { limit: null, spend: 1_000 } }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-hourly-limit")!.textContent).toBe("∞");
});
});
it("omits remaining credits when budget_remaining is null", async () => {
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: null }));
it("renders the progress bar only for periods with a limit", async () => {
qGet(makeBudget({ monthly: { limit: 10_000, spend: 12_000 }, hourly: { limit: null, spend: 5_000 } }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.queryByTestId("budget-remaining")).toBeNull();
});
});
it("caps progress bar at 100% when used > limit", async () => {
// Over-limit: 12000 used of 10000 limit should show 100%, not 120%.
qGet(makeBudget({ budget_limit: 10_000, budget_used: 12_000, budget_remaining: null }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
const fill = screen.getByTestId("budget-progress-fill");
expect(fill.getAttribute("style")).toContain("100%");
});
});
it("omits progress bar when budget_limit is null (unlimited)", async () => {
qGet(makeBudget({ budget_limit: null, budget_used: 5_000, budget_remaining: null }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.queryByTestId("budget-progress-fill")).toBeNull();
expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
});
expect(screen.queryByTestId("budget-hourly-fill")).toBeNull();
// over-budget fill caps at 100%
const fill = screen.getByTestId("budget-monthly-fill") as HTMLElement;
expect(fill.style.width).toBe("100%");
});
});
describe("budget exceeded (402)", () => {
it("shows exceeded banner when load returns 402", async () => {
qGetErr(402, "Payment Required");
describe("save", () => {
it("PATCHes budget_limits for all four periods and clears the exceeded banner", async () => {
qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
qPatch(makeBudget({ hourly: { limit: 500, spend: 0 }, monthly: { limit: 20_000, spend: 0 } }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-hourly-input")).toBeTruthy();
});
fireEvent.change(screen.getByTestId("budget-hourly-input"), { target: { value: "500" } });
fireEvent.click(screen.getByTestId("budget-save-btn"));
await vi.waitFor(() => {
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
expect(screen.getByTestId("budget-exceeded-banner")!.textContent).toContain("Budget exceeded");
expect(vi.mocked(api.patch)).toHaveBeenCalled();
});
const [, body] = vi.mocked(api.patch).mock.calls[0];
expect((body as { budget_limits: Record<string, number | null> }).budget_limits).toMatchObject({
hourly: 500,
monthly: 10_000, // unchanged input echoes the loaded limit
});
});
it("clears exceeded banner after successful save", async () => {
qGetErr(402, "Payment Required");
qPatch(makeBudget({ budget_limit: 50_000, budget_used: 0, budget_remaining: 50_000 }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
});
const input = screen.getByTestId("budget-limit-input");
fireEvent.change(input, { target: { value: "50000" } });
const saveBtn = screen.getByTestId("budget-save-btn");
fireEvent.click(saveBtn);
await vi.waitFor(() => {
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
});
});
});
describe("save flow", () => {
it("shows save error on non-402 patch failure", async () => {
it("shows a save error on non-402 PATCH failure", async () => {
qGet(makeBudget());
qPatchErr(500, "Internal Server Error");
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
});
const saveBtn = screen.getByTestId("budget-save-btn");
fireEvent.click(saveBtn);
fireEvent.click(screen.getByTestId("budget-save-btn"));
await vi.waitFor(() => {
expect(screen.getByTestId("budget-save-error")).toBeTruthy();
expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
});
expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
});
it("updates input to new limit value after successful save", async () => {
qGet(makeBudget({ budget_limit: 10_000 }));
qPatch(makeBudget({ budget_limit: 20_000 }));
render(<BudgetSection workspaceId={WS_ID} />);
// Wait for the input to appear (loading → loaded)
await vi.waitFor(() => {
expect(screen.queryByTestId("budget-loading")).toBeNull();
});
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
// Debug: check what values are rendered
const limitValue = screen.getByTestId("budget-limit-value")?.textContent;
expect(input.value).toBe("10000"); // initial value from API
expect(limitValue).toBe("10,000");
fireEvent.change(input, { target: { value: "20000" } });
expect(input.value).toBe("20000");
fireEvent.click(screen.getByTestId("budget-save-btn"));
await vi.waitFor(() => {
expect((screen.getByTestId("budget-limit-input") as HTMLInputElement).value).toBe("20000");
});
});
it("sends null when input is cleared (unlimited)", async () => {
qGet(makeBudget({ budget_limit: 10_000 }));
qPatch(makeBudget({ budget_limit: null }));
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
});
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
fireEvent.change(input, { target: { value: "" } });
fireEvent.click(screen.getByTestId("budget-save-btn"));
await vi.waitFor(() => {
// After save with null limit, input should show empty (unlimited)
expect(input.value).toBe("");
});
});
it("shows saving state on button while patch is in flight", async () => {
it("surfaces the exceeded banner on a 402 PATCH", async () => {
qGet(makeBudget());
let resolvePatch: (v: unknown) => void;
vi.mocked(api.patch).mockImplementationOnce(
async () => new Promise((r) => { resolvePatch = r as (v: unknown) => void; }),
);
qPatchErr(402, "Payment Required");
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
});
fireEvent.change(screen.getByTestId("budget-limit-input"), { target: { value: "50000" } });
fireEvent.click(screen.getByTestId("budget-save-btn"));
const btn = screen.getByTestId("budget-save-btn");
expect(btn.textContent).toContain("Saving");
resolvePatch!(makeBudget({ budget_limit: 50_000 }));
await vi.waitFor(() => {
expect(btn.textContent).toContain("Save");
});
});
});
describe("isApiError402 — regression coverage", () => {
it("classifies ': 402' with space as 402", async () => {
qGetErr(402, "Payment Required");
qPatch(makeBudget());
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
});
});
});
it("classifies non-402 error messages as regular fetch errors", async () => {
qGetErr(503, "Service Unavailable");
describe("legacy payload back-compat", () => {
it("maps a pre-multi-period {budget_limit, monthly_spend} response to the monthly row", async () => {
qGet({ budget_limit: 5_000, monthly_spend: 1_000, budget_remaining: 4_000 });
render(<BudgetSection workspaceId={WS_ID} />);
await vi.waitFor(() => {
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$50.00");
});
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$10.00");
});
});
});
@@ -1,255 +1,35 @@
// @vitest-environment jsdom
//
// Tests for the provider → llm_billing_mode linkage (internal#703 Gap 2).
// internal#718 P4 closure — ConfigTab.billingMode.test.tsx is retired.
//
// What this pins: when the operator changes the PROVIDER in the Config
// tab, the workspace's llm_billing_mode must follow — a non-Platform
// provider sets billing_mode=byok; Platform sets platform_managed. Before
// this wiring, selecting "Claude Code subscription (OAuth)" or any vendor
// key wrote the credential env but left billing_mode=platform_managed, so
// CP kept injecting the platform proxy base URL and the OAuth token /
// vendor key was never used — BYOK silently no-op'd (the live jrs-auto
// SEO-Agent symptom in #703).
// This suite (255 lines, 8 tests) pinned the canvas-side provider →
// llm_billing_mode linkage from internal#703 Gap 2: when the operator
// changed the PROVIDER in the Config tab, ConfigTab.handleSave would
// PUT /admin/workspaces/:id/llm-billing-mode so the platform-vs-byok
// decision tracked the dropdown.
//
// The billing-mode PUT targets the same per-tenant endpoint the LLM
// Billing section uses: PUT /admin/workspaces/:id/llm-billing-mode with
// body {mode: "byok" | "platform_managed"}.
// That linkage is retired together with the LLM_PROVIDER override flow
// (see ConfigTab.provider.test.tsx retirement note). P2-B (#1972)
// moved the platform-vs-byok decision to
// `ResolveLLMBillingModeDerived(runtime, model, authEnv)` in
// workspace-server — the canvas can no longer override it via the
// provider dropdown, by design. The runtime+model selection IS the
// billing-mode selection now.
//
// The `/admin/workspaces/:id/llm-billing-mode` endpoint still exists
// as the operator override surface (`workspaces.llm_billing_mode`
// column); it is no longer driven by the provider dropdown.
// Coverage for the derived billing flow lives in
// workspace-server/internal/handlers/llm_billing_mode_derived_test.go.
//
// Restore from git history if the canvas-side provider→billing linkage
// needs to be revisited (it should not — the derived resolver is the
// single decision point).
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
import React from "react";
import { describe, it } from "vitest";
afterEach(cleanup);
const apiGet = vi.fn();
const apiPatch = vi.fn();
const apiPut = vi.fn();
vi.mock("@/lib/api", () => ({
api: {
get: (path: string) => apiGet(path),
patch: (path: string, body: unknown) => apiPatch(path, body),
put: (path: string, body: unknown) => apiPut(path, body),
post: vi.fn(),
del: vi.fn(),
},
}));
const storeUpdateNodeData = vi.fn();
const storeRestartWorkspace = vi.fn();
vi.mock("@/store/canvas", () => ({
useCanvasStore: Object.assign(
(selector: (s: unknown) => unknown) =>
selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
{
getState: () => ({
restartWorkspace: storeRestartWorkspace,
updateNodeData: storeUpdateNodeData,
}),
},
),
}));
vi.mock("../AgentCardSection", () => ({
AgentCardSection: () => <div data-testid="agent-card-stub" />,
}));
import { ConfigTab, billingModeForProvider } from "../ConfigTab";
function wireApi(opts: { providerValue?: string | "missing" }) {
apiGet.mockImplementation((path: string) => {
if (path === `/workspaces/ws-test`) {
return Promise.resolve({ runtime: "hermes" });
}
if (path === `/workspaces/ws-test/model`) {
return Promise.resolve({ model: "nousresearch/hermes-4-70b" });
}
if (path === `/workspaces/ws-test/provider`) {
if (opts.providerValue === "missing") return Promise.reject(new Error("404"));
return Promise.resolve({
provider: opts.providerValue ?? "",
source: opts.providerValue ? "workspace_secrets" : "default",
});
}
if (path === `/workspaces/ws-test/files/config.yaml`) {
return Promise.resolve({ content: "name: ws\nruntime: hermes\n" });
}
if (path === "/templates") return Promise.resolve([]);
return Promise.reject(new Error(`unmocked api.get: ${path}`));
});
}
function billingModeCalls() {
return apiPut.mock.calls.filter(
([path]) => path === "/admin/workspaces/ws-test/llm-billing-mode",
);
}
beforeEach(() => {
apiGet.mockReset();
apiPatch.mockReset();
apiPut.mockReset();
storeUpdateNodeData.mockReset();
storeRestartWorkspace.mockReset();
});
describe("billingModeForProvider — pure mapping (internal#703 Gap 2)", () => {
// Platform / empty → platform_managed. Empty means "no explicit
// override → inherit", which resolves to platform on the backend, so
// it must NOT flip the workspace into byok.
it("maps Platform and empty to platform_managed", () => {
expect(billingModeForProvider("platform")).toBe("platform_managed");
expect(billingModeForProvider("")).toBe("platform_managed");
expect(billingModeForProvider(" ")).toBe("platform_managed");
expect(billingModeForProvider("PLATFORM")).toBe("platform_managed");
});
// Every non-Platform provider → byok. If this regresses to returning
// platform_managed for a vendor, BYOK silently no-ops again (#703).
it("maps non-Platform providers to byok", () => {
expect(billingModeForProvider("anthropic-oauth")).toBe("byok"); // Claude Code subscription
expect(billingModeForProvider("anthropic")).toBe("byok"); // Anthropic API key
expect(billingModeForProvider("minimax")).toBe("byok");
expect(billingModeForProvider("openrouter")).toBe("byok");
expect(billingModeForProvider("openai")).toBe("byok");
});
});
describe("ConfigTab — provider change drives billing_mode (internal#703 Gap 2)", () => {
// The core fix: picking a non-Platform provider (here "anthropic-oauth"
// = Claude Code subscription OAuth) from a fresh/empty provider must
// PUT mode=byok to the per-tenant llm-billing-mode endpoint. This is
// the exact path that was missing — the credential env saved but the
// billing mode never followed, so the proxy stayed engaged.
it("PUTs mode=byok when switching to a non-Platform provider", async () => {
wireApi({ providerValue: "" });
apiPut.mockResolvedValue({ status: "saved" });
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
await waitFor(() => {
const calls = billingModeCalls();
expect(calls.length).toBe(1);
expect(calls[0][1]).toEqual({ mode: "byok" });
});
// Provider credential PUT still happens too (independent endpoint).
expect(
apiPut.mock.calls.some(([path]) => path === "/workspaces/ws-test/provider"),
).toBe(true);
});
// Switching FROM a byok provider back TO Platform must PUT
// mode=platform_managed so the workspace re-engages the proxy and stops
// expecting a (now-absent) vendor key.
it("PUTs mode=platform_managed when switching back to Platform", async () => {
wireApi({ providerValue: "anthropic-oauth" });
apiPut.mockResolvedValue({ status: "saved" });
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
await waitFor(() => expect((input as HTMLInputElement).value).toBe("anthropic-oauth"));
fireEvent.change(input, { target: { value: "platform" } });
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
await waitFor(() => {
const calls = billingModeCalls();
expect(calls.length).toBe(1);
expect(calls[0][1]).toEqual({ mode: "platform_managed" });
});
});
// Changing between two BYOK vendors (minimax → openrouter) keeps
// billing_mode=byok — the implied mode is unchanged, so re-PUTing it
// would be a wasteful no-op that risks an extra restart. Must NOT fire.
it("does NOT PUT billing-mode when the implied mode is unchanged", async () => {
wireApi({ providerValue: "minimax" });
apiPut.mockResolvedValue({ status: "saved" });
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
await waitFor(() => expect((input as HTMLInputElement).value).toBe("minimax"));
fireEvent.change(input, { target: { value: "openrouter" } });
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
await waitFor(() => {
// Provider PUT fires (vendor changed)...
expect(
apiPut.mock.calls.some(([path]) => path === "/workspaces/ws-test/provider"),
).toBe(true);
});
// ...but billing-mode does NOT (byok → byok is a no-op).
expect(billingModeCalls().length).toBe(0);
});
// A Save that doesn't touch the provider must not PUT billing-mode —
// editing tier/name shouldn't disturb the workspace's billing mode.
it("does NOT PUT billing-mode on a Save that leaves provider unchanged", async () => {
wireApi({ providerValue: "anthropic-oauth" });
apiPut.mockResolvedValue({ status: "saved" });
render(<ConfigTab workspaceId="ws-test" />);
await screen.findByTestId("provider-input");
// Dirty an unrelated field so Save is enabled.
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
fireEvent.change(tierSelect, { target: { value: "3" } });
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
await waitFor(() => {
// Some PUT may fire (e.g. /model); just assert billing-mode did not.
expect(billingModeCalls().length).toBe(0);
});
});
// If the provider credential PUT itself fails, we must NOT set byok —
// flipping billing_mode while the credential write failed would leave
// the workspace expecting a key it doesn't have (worse than no-op).
it("does NOT PUT billing-mode when the provider PUT fails", async () => {
wireApi({ providerValue: "" });
apiPut.mockImplementation((path: string) => {
if (path === "/workspaces/ws-test/provider") return Promise.reject(new Error("boom"));
return Promise.resolve({ status: "saved" });
});
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
await waitFor(() => {
// The provider-failure error is surfaced (getByText throws if absent).
expect(screen.getByText(/provider update failed/i)).toBeTruthy();
});
expect(billingModeCalls().length).toBe(0);
});
// If the credential saved but the billing-mode PUT is rejected, the
// user must be warned that BYOK may not take — a silent failure here
// is precisely the #703 symptom we're fixing.
it("surfaces an error when billing-mode PUT fails after a successful provider save", async () => {
wireApi({ providerValue: "" });
apiPut.mockImplementation((path: string) => {
if (path === "/admin/workspaces/ws-test/llm-billing-mode") {
return Promise.reject(new Error("403 forbidden"));
}
return Promise.resolve({ status: "saved" });
});
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
await waitFor(() => {
expect(screen.getByText(/switching billing mode failed/i)).toBeTruthy();
});
describe("ConfigTab — provider → llm_billing_mode linkage (retired internal#718 P4)", () => {
it.skip("LLM_PROVIDER → billing_mode wiring is retired; see file header for the replacement coverage", () => {
// intentionally empty
});
});
@@ -0,0 +1,87 @@
// @vitest-environment jsdom
//
// Regression: project_canvas_runtime_dropdown_ssot_fix — a google-adk
// workspace's Config tab showed the wrong runtime ("LangGraph (default)"
// / first option) because a hardcoded frontend allowlist
// (SUPPORTED_RUNTIME_VALUES) dropped google-adk from the /templates-derived
// options even though the backend served it. A Save from that state would
// PATCH runtime to the wrong value and break the ADK agent.
//
// The fix: the dropdown is SSOT-driven — it trusts GET /templates (which the
// backend already gates to the manifest maintained set) and hides a runtime
// only when its row carries `displayable: false`. This pins: a google-adk
// workspace shows "google-adk" selected, and a displayable:false template is
// not offered.
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
import { render, screen, cleanup, waitFor } from "@testing-library/react";
import React from "react";
afterEach(cleanup);
const apiGet = vi.fn();
const apiPatch = vi.fn();
const apiPut = vi.fn();
vi.mock("@/lib/api", () => ({
api: {
get: (path: string) => apiGet(path),
patch: (path: string, body: unknown) => apiPatch(path, body),
put: (path: string, body: unknown) => apiPut(path, body),
post: vi.fn(),
del: vi.fn(),
},
}));
vi.mock("@/store/canvas", () => ({
useCanvasStore: Object.assign(
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
),
}));
vi.mock("../AgentCardSection", () => ({
AgentCardSection: () => <div data-testid="agent-card-stub" />,
}));
import { ConfigTab } from "../ConfigTab";
function wireApi(templates: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; displayable?: boolean }>) {
apiGet.mockImplementation((path: string) => {
if (path === "/workspaces/ws-adk") return Promise.resolve({ runtime: "google-adk" });
if (path === "/workspaces/ws-adk/model") return Promise.resolve({ model: "vertex:gemini-2.5-pro" });
if (path === "/workspaces/ws-adk/files/config.yaml") return Promise.resolve({ content: "name: adk\nruntime: google-adk\n" });
if (path === "/templates") return Promise.resolve(templates);
return Promise.reject(new Error(`unmocked api.get: ${path}`));
});
}
beforeEach(() => {
apiGet.mockReset();
apiPatch.mockReset();
apiPut.mockReset();
});
describe("ConfigTab — google-adk runtime (SSOT dropdown)", () => {
it("shows google-adk selected in the runtime dropdown (#ssot-fix)", async () => {
wireApi([
{ id: "claude-code", name: "Claude Code", runtime: "claude-code", models: [] },
{ id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
]);
render(<ConfigTab workspaceId="ws-adk" />);
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
expect((select as HTMLSelectElement).value).toBe("google-adk");
const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
expect(opts).toContain("google-adk");
});
it("hides a template flagged displayable:false", async () => {
wireApi([
{ id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
{ id: "legacy", name: "Legacy", runtime: "legacy", models: [], displayable: false },
]);
render(<ConfigTab workspaceId="ws-adk" />);
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
expect(opts).toContain("google-adk");
expect(opts).not.toContain("legacy");
});
});
@@ -1,574 +1,45 @@
// @vitest-environment jsdom
//
// Regression tests for ConfigTab Provider override (Option B PR-5).
// internal#718 P4 closure — ConfigTab.provider.test.tsx is retired.
//
// What this pins: a free-text Provider combobox in the Runtime section
// that lets the operator override the model→provider derivation hermes-
// agent does internally. Without this UI, a fresh signup whose Hermes
// workspace defaults to a model with no clean vendor prefix (e.g.
// `nousresearch/hermes-4-70b`) hits the runtime's own preflight error:
// "No LLM provider configured. Run `hermes model` to select a
// provider, or run `hermes setup` for first-time configuration."
// — even though tasks #195-198 wired the entire downstream pipe so a
// non-empty provider WOULD flow through canvas → workspace-server →
// CP user-data → workspace config.yaml → hermes adapter.
// This 574-line suite exercised the canvas-side LLM provider override
// flow: load the existing override from GET /workspaces/:id/provider,
// edit the dropdown, Save → PUT /workspaces/:id/provider, and the
// provider→billing_mode linkage on Save. All three server endpoints
// behind those flows are retired in internal#718 P4 closure:
//
// Hongming Wang hit this on hongming.moleculesai.app at signup
// 2026-05-01T17:35Z. Backend PRs were green, the gap was the missing
// UI to set the value.
// - workspace-server SetProvider / GetProvider (PUT/GET
// /workspaces/:id/provider) → both return 410 Gone with a
// PROVIDER_ENDPOINT_RETIRED structured body.
// - workspace-server setProviderSecret (the writer into
// workspace_secrets.LLM_PROVIDER) — removed; row never written.
// - The LLM_PROVIDER workspace_secret itself — migrated away in
// 20260528000000_drop_llm_provider_workspace_secret.up.sql.
//
// Each test pins one invariant. If any fails, the bug is back.
// ConfigTab still renders the provider dropdown for display (the user
// can preview the derived provider locally), but Save no longer
// round-trips the value. The replacement contract is that the provider
// is DERIVED at every decision point from (runtime, model) via the
// registry — see internal/providers/derive_provider.go.
//
// The original suite's coverage is replaced by:
//
// - workspace-server: TestPutProvider_410Gone +
// TestGetProvider_410Gone + TestProviderEndpointGone_BodyShape in
// internal/handlers/llm_provider_removal_p4_test.go.
// - workspace-server: TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL
// in internal/handlers/workspace_provision_shared_test.go.
// - registry: TestDeriveProvider_RealManifest in
// internal/providers/derive_provider_test.go.
//
// Restore from git history if any aspect of the legacy LLM_PROVIDER
// flow needs to be revisited (it should not — the retirement is
// permanent).
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
import React from "react";
import { describe, it } from "vitest";
afterEach(cleanup);
const apiGet = vi.fn();
const apiPatch = vi.fn();
const apiPut = vi.fn();
vi.mock("@/lib/api", () => ({
api: {
get: (path: string) => apiGet(path),
patch: (path: string, body: unknown) => apiPatch(path, body),
put: (path: string, body: unknown) => apiPut(path, body),
post: vi.fn(),
del: vi.fn(),
},
}));
// Shared store stub — `updateNodeData` is exposed so a test can assert the
// node-data flush happens after a successful PATCH (regression: previously
// the DB updated but the canvas badge stayed stale until full hydrate).
const storeUpdateNodeData = vi.fn();
const storeRestartWorkspace = vi.fn();
vi.mock("@/store/canvas", () => ({
useCanvasStore: Object.assign(
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
{ getState: () => ({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }) },
),
}));
vi.mock("../AgentCardSection", () => ({
AgentCardSection: () => <div data-testid="agent-card-stub" />,
}));
import { ConfigTab } from "../ConfigTab";
// wireApi — same shape as ConfigTab.hermes.test.tsx, extended with the
// /provider endpoint. Each test sets `providerValue` to the value the
// GET endpoint returns; "missing" means the endpoint rejects (older
// workspace-server pre-PR-2 — must not crash the tab).
function wireApi(opts: {
workspaceRuntime?: string;
workspaceModel?: string;
configYamlContent?: string | null;
templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; providers?: string[] }>;
providerValue?: string | "missing";
}) {
apiGet.mockImplementation((path: string) => {
if (path === `/workspaces/ws-test`) {
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
}
if (path === `/workspaces/ws-test/model`) {
return Promise.resolve({ model: opts.workspaceModel ?? "" });
}
if (path === `/workspaces/ws-test/provider`) {
if (opts.providerValue === "missing") {
return Promise.reject(new Error("404"));
}
return Promise.resolve({ provider: opts.providerValue ?? "", source: opts.providerValue ? "workspace_secrets" : "default" });
}
if (path === `/workspaces/ws-test/files/config.yaml`) {
if (opts.configYamlContent === null) return Promise.reject(new Error("not found"));
return Promise.resolve({ content: opts.configYamlContent ?? "" });
}
if (path === "/templates") {
return Promise.resolve(opts.templates ?? []);
}
return Promise.reject(new Error(`unmocked api.get: ${path}`));
});
}
beforeEach(() => {
apiGet.mockReset();
apiPatch.mockReset();
apiPut.mockReset();
storeUpdateNodeData.mockReset();
storeRestartWorkspace.mockReset();
});
describe("ConfigTab — Provider override (Option B PR-5)", () => {
// Empty provider on load is the legitimate default ("auto-derive
// from model slug prefix"), NOT an error. The endpoint returning
// {provider: "", source: "default"} is the documented happy-path
// shape — if the form treated that as "load failed" we'd lose the
// ability to render the input at all on fresh workspaces.
it("renders an empty Provider input when no override is set", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "nousresearch/hermes-4-70b",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "",
});
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
expect((input as HTMLInputElement).value).toBe("");
});
// Pre-existing override loads back into the field on mount. Without
// this, an operator who set provider=openrouter yesterday would see
// the field blank today, conclude the value didn't stick, and
// re-save — the resulting PUT-with-same-value would auto-restart
// the workspace for nothing.
it("loads an existing provider override from the server", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "nousresearch/hermes-4-70b",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "openrouter",
});
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
});
// Old workspace-server (pre-PR-2) returns a 404 on /provider. The
// tab must keep loading — the fallback is "" (auto-derive), same as
// a fresh workspace.
it("falls back to empty provider when the endpoint is missing", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "nousresearch/hermes-4-70b",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "missing",
});
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
expect((input as HTMLInputElement).value).toBe("");
// Tab should be fully rendered, not stuck in loading or error state.
expect(screen.queryByText(/Loading config/i)).toBeNull();
});
// Setting a value + Save must PUT to the right endpoint with the
// right body shape. Server-side handler (workspace-server
// handlers/secrets.go:SetProvider) reads body.provider — any other
// key gets silently ignored and the workspace_secrets row stays
// unset. This regression would manifest as "Save → Restart →
// workspace still says No LLM provider configured."
it("PUTs the new provider to /workspaces/:id/provider on Save", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "nousresearch/hermes-4-70b",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "",
});
apiPut.mockResolvedValue({ status: "saved", provider: "anthropic" });
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
fireEvent.change(input, { target: { value: "anthropic" } });
expect((input as HTMLInputElement).value).toBe("anthropic");
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
await waitFor(() => {
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
expect(providerCalls.length).toBe(1);
expect(providerCalls[0][1]).toEqual({ provider: "anthropic" });
});
});
// No-change Save must NOT PUT /provider. The server-side SetProvider
// auto-restarts the workspace on every successful PUT — re-writing
// an unchanged value would cost the user a ~30s reboot every time
// they tweak some other field.
it("does not PUT /provider when the value is unchanged", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "nousresearch/hermes-4-70b",
configYamlContent: "name: ws\nruntime: hermes\ntier: 2\n",
providerValue: "openrouter",
});
apiPut.mockResolvedValue({});
render(<ConfigTab workspaceId="ws-test" />);
await screen.findByTestId("provider-input");
// Click Save without touching the provider field. Trigger another
// dirty-marker (tier change) so Save is enabled — the test is
// about NOT touching /provider, not about Save being disabled.
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
fireEvent.change(tierSelect, { target: { value: "3" } });
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
await waitFor(() => {
// Some PUT(s) may fire (e.g. /model). Just assert /provider is NOT among them.
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
expect(providerCalls.length).toBe(0);
});
});
// The dropdown's suggestion list MUST come from the runtime's own
// template (via /templates → runtime_config.providers), not a
// hardcoded canvas-side enum. This is the "Native + pluggable
// runtime" invariant: a new runtime declaring its own provider
// taxonomy in its config.yaml gets a working dropdown without ANY
// canvas-side change.
//
// Pinned by checking that suggestions surfaced in the datalist
// exactly mirror what the templates endpoint returned for the
// matching runtime. If a future contributor reintroduces a
// PROVIDER_SUGGESTIONS-style hardcoded list and the datalist
// contents don't follow the template, this test fails.
it("populates the provider datalist from the matched runtime's templates entry", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "nousresearch/hermes-4-70b",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "",
templates: [
{
id: "hermes",
name: "Hermes",
runtime: "hermes",
models: [],
// The provider list every runtime adapter ships in its own
// config.yaml. Canvas must surface THIS, not its own list.
providers: ["nous", "openrouter", "anthropic", "minimax-cn"],
},
],
});
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
const listId = (input as HTMLInputElement).getAttribute("list");
expect(listId).toBeTruthy();
await waitFor(() => {
const datalist = document.getElementById(listId!);
expect(datalist).not.toBeNull();
const optionValues = Array.from(datalist!.querySelectorAll("option")).map(
(o) => (o as HTMLOptionElement).value,
);
// Order matters — most-common-first is part of the contract so
// the demo flow lands on a working choice without scrolling.
expect(optionValues).toEqual(["nous", "openrouter", "anthropic", "minimax-cn"]);
});
});
// Fallback path: when a template hasn't migrated to the explicit
// `providers:` field yet, suggestions are derived from model slug
// prefixes. Still adapter-driven (the slugs come from the template's
// `models:` list), just inferred. This keeps existing templates
// working while the platform team migrates them one at a time.
it("renders vendor-grouped provider dropdown when template ships models", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "anthropic/claude-opus-4-7",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "",
templates: [
{
id: "hermes",
name: "Hermes",
runtime: "hermes",
models: [
{ id: "anthropic/claude-opus-4-7", required_env: ["ANTHROPIC_API_KEY"] },
{ id: "openai/gpt-4o", required_env: ["OPENROUTER_API_KEY"] },
{ id: "anthropic/claude-sonnet-4-5", required_env: ["ANTHROPIC_API_KEY"] }, // dup vendor — must dedupe
{ id: "nousresearch/hermes-4-70b", required_env: ["HERMES_API_KEY"] },
],
// No `providers:` field → ProviderModelSelector derives vendors
// from model id prefixes via its own buildProviderCatalog.
},
],
});
render(<ConfigTab workspaceId="ws-test" />);
// With models present, the new vendor-aware dropdown renders.
// Provider entries dedupe by vendor → 3 unique vendors here
// (anthropic, openai, nousresearch).
const select = await screen.findByTestId("provider-select") as HTMLSelectElement;
await waitFor(() => {
const optionTexts = Array.from(select.options)
.map((o) => o.text)
.filter((t) => !t.startsWith("—")); // strip placeholder
// Labels are vendor display names, but vendor identity is what
// matters for dedupe. Assert each expected vendor surfaces once.
expect(optionTexts.some((t) => t.startsWith("Anthropic API"))).toBe(true);
expect(optionTexts.some((t) => t.startsWith("OpenAI"))).toBe(true);
expect(optionTexts.some((t) => t.startsWith("Nous Research"))).toBe(true);
expect(optionTexts.length).toBe(3); // dedupe pin
});
});
// Empty string is a legitimate save target — it clears the override
// (the server-side endpoint deletes the workspace_secrets row).
// Operators who picked "anthropic" yesterday and want to revert to
// auto-derive today should be able to do so by clearing the field
// and clicking Save. Without this PUT path, the only way to clear
// would be a direct DB edit.
it("PUTs an empty string when the operator clears a previously-set provider", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "anthropic:claude-opus-4-7",
configYamlContent: "name: ws\nruntime: hermes\n",
providerValue: "openrouter",
});
apiPut.mockResolvedValue({ status: "cleared" });
render(<ConfigTab workspaceId="ws-test" />);
const input = await screen.findByTestId("provider-input");
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
fireEvent.change(input, { target: { value: "" } });
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
await waitFor(() => {
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
expect(providerCalls.length).toBe(1);
expect(providerCalls[0][1]).toEqual({ provider: "" });
});
});
// Display-vs-storage drift regression (2026-05-03 incident, workspace
// e13aebd8…). User deployed claude-code with MiniMax-M2 stored in
// MODEL_PROVIDER. The container env (MODEL=MiniMax-M2) and chat
// worked correctly, but the Config tab showed "Claude Code
// subscription / Claude Sonnet (OAuth)" — i.e. the template's
// runtime_config.model: sonnet default — because currentModelId
// reads runtime_config.model first and loadConfig was overriding
// only the top-level config.model field. The merged shape was:
// { model: "MiniMax-M2", runtime_config: { model: "sonnet" } }
// and currentModelId picked "sonnet". Fix: loadConfig propagates
// wsMetadataModel into BOTH places so the form is a single source
// of truth (DB-backed MODEL_PROVIDER). Pinning the merged-path
// branch with the exact reproducing shape: claude-code template
// YAML has runtime_config.model: sonnet; live workspace's
// MODEL_PROVIDER is MiniMax-M2; tab must show the latter.
it("prefers MODEL_PROVIDER over the template's runtime_config.model on load", async () => {
wireApi({
workspaceRuntime: "claude-code",
workspaceModel: "MiniMax-M2",
configYamlContent: "name: ws\nruntime: claude-code\nruntime_config:\n model: sonnet\n",
providerValue: "",
templates: [
{
id: "claude-code-default",
name: "Claude Code",
runtime: "claude-code",
models: [
{ id: "sonnet", name: "Claude Sonnet (OAuth)", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
{ id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
],
},
],
});
render(<ConfigTab workspaceId="ws-test" />);
const modelSelect = (await screen.findByTestId("model-select")) as HTMLSelectElement;
await waitFor(() => expect(modelSelect.value).toBe("MiniMax-M2"));
// Provider dropdown should also reflect MiniMax (back-derived from
// the model slug since LLM_PROVIDER is unset). Without the fix,
// the selector falls back to the first catalog entry whose first
// model matches "sonnet" → anthropic-oauth bucket → "Claude Code
// subscription".
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
const selectedOption = providerSelect.options[providerSelect.selectedIndex];
expect(selectedOption.textContent ?? "").toMatch(/MiniMax/);
});
// Sibling pin to the display-fix above. The display fix mirrors
// wsMetadataModel into runtime_config.model so the selector renders
// the live value; that mirror means handleSave's old YAML-vs-form
// diff would always be non-zero on a no-op save (YAML default
// "sonnet" vs. mirrored "MiniMax-M2") and PUT /model — which
// server-side SetModel chains into an auto-restart. handleSave now
// diffs against the loaded MODEL_PROVIDER instead. Pin: an
// unrelated edit (tier change) must NOT touch /model when the
// model itself didn't change.
it("does not PUT /model on a no-op save when only an unrelated field changed", async () => {
wireApi({
workspaceRuntime: "claude-code",
workspaceModel: "MiniMax-M2",
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
providerValue: "",
templates: [
{
id: "claude-code-default",
name: "Claude Code",
runtime: "claude-code",
models: [
{ id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
{ id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
],
},
],
});
apiPut.mockResolvedValue({});
apiPatch.mockResolvedValue({});
render(<ConfigTab workspaceId="ws-test" />);
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
fireEvent.change(tierSelect, { target: { value: "3" } });
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
await waitFor(() => {
const tierPatches = apiPatch.mock.calls.filter(([path, body]) =>
path === "/workspaces/ws-test" && (body as { tier?: number }).tier === 3,
);
expect(tierPatches.length).toBe(1);
});
// Spurious /model PUT would fire here without the originalModel
// diff baseline. The model itself didn't change, so /model must
// stay untouched (otherwise SetModel auto-restarts).
const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
expect(modelPuts.length).toBe(0);
});
// Save-then-stale-badge regression (2026-05-03 incident). User
// selected T3 in the Tier dropdown, hit Save & Restart, the workspace
// PATCH succeeded (`tier: 3` in DB), but the canvas header pill kept
// showing "TIER T2" until a full hydrate. Root cause: handleSave
// sent the PATCH to workspace-server but never pushed the same
// change into useCanvasStore.updateNodeData, so every UI surface
// reading from the store kept its stale value. Pin: a successful
// tier PATCH must mirror into the store so the badge updates
// synchronously with the response.
it("flushes the dbPatch into useCanvasStore.updateNodeData after a successful PATCH", async () => {
wireApi({
workspaceRuntime: "claude-code",
workspaceModel: "MiniMax-M2",
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
providerValue: "",
templates: [
{
id: "claude-code-default",
name: "Claude Code",
runtime: "claude-code",
models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
},
],
});
apiPatch.mockResolvedValue({ status: "updated" });
render(<ConfigTab workspaceId="ws-test" />);
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
fireEvent.change(tierSelect, { target: { value: "3" } });
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
await waitFor(() => {
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
});
// Without the store flush, the badge would keep reading tier=2
// from useCanvasStore.nodes until a full hydrate. Pin: handleSave
// pushes the same fields it PATCHed.
expect(storeUpdateNodeData).toHaveBeenCalledWith(
"ws-test",
expect.objectContaining({ tier: 3 }),
);
});
// Failure-gating sibling pin to the store-flush test above. The
// production code places `updateNodeData` AFTER `await api.patch(...)`
// inside the same `if (Object.keys(dbPatch).length > 0)` block, so a
// PATCH rejection should throw before the store call. Without this
// pin, a future refactor that wraps the PATCH in try/catch and
// unconditionally calls updateNodeData would ship green — and then
// the badge would lie when the server actually rejected the change.
// Codified review feedback from PR #2545 (Agent 2).
it("does NOT flush into useCanvasStore.updateNodeData when the PATCH rejects", async () => {
wireApi({
workspaceRuntime: "claude-code",
workspaceModel: "MiniMax-M2",
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
providerValue: "",
templates: [
{
id: "claude-code-default",
name: "Claude Code",
runtime: "claude-code",
models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
},
],
});
apiPatch.mockRejectedValue(new Error("500 from workspace-server"));
render(<ConfigTab workspaceId="ws-test" />);
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
fireEvent.change(tierSelect, { target: { value: "3" } });
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
// Wait for handleSave to settle (succeeds-or-fails). PATCH must
// have been attempted; the error swallow inside handleSave keeps
// saving=false in finally.
await waitFor(() => {
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
});
// Critically: the store must NOT have been told about the failed
// change. Otherwise the badge would lie about a write the server
// rejected.
const tierFlushes = storeUpdateNodeData.mock.calls.filter(([, body]) =>
typeof (body as { tier?: number }).tier === "number",
);
expect(tierFlushes.length).toBe(0);
});
// Pin the hermes/pre-#240 edge case: workspace where MODEL_PROVIDER
// was never written but YAML has runtime_config.model: "something".
// originalModel must reflect the rendered baseline (the YAML value),
// not the empty MODEL_PROVIDER, so an unrelated save (tier change)
// doesn't fire a /model PUT and trigger an auto-restart. Codified
// review feedback from PR #2545 (Agent 1, "Important").
it("does not PUT /model when MODEL_PROVIDER is empty and the user only edited an unrelated field", async () => {
wireApi({
workspaceRuntime: "hermes",
workspaceModel: "", // legacy workspace — never went through the picker
configYamlContent:
"name: ws\nruntime: hermes\ntier: 2\nruntime_config:\n model: nousresearch/hermes-4-70b\n",
providerValue: "",
templates: [
{
id: "hermes",
name: "Hermes",
runtime: "hermes",
models: [{ id: "nousresearch/hermes-4-70b", name: "Hermes 4 70B", required_env: ["HERMES_API_KEY"] }],
providers: ["nous"],
},
],
});
apiPut.mockResolvedValue({});
apiPatch.mockResolvedValue({});
render(<ConfigTab workspaceId="ws-test" />);
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
fireEvent.change(tierSelect, { target: { value: "3" } });
const saveBtn = screen.getByRole("button", { name: /^save$/i });
fireEvent.click(saveBtn);
await waitFor(() => {
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
});
const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
expect(modelPuts.length).toBe(0);
describe("ConfigTab provider override — retired (internal#718 P4)", () => {
it.skip("LLM_PROVIDER override flow is retired; see file header for the replacement coverage", () => {
// intentionally empty
});
});
@@ -0,0 +1,78 @@
// @vitest-environment jsdom
//
// internal#718 P3 (retire-list #5) — the billing-mode the Config tab shows /
// sends must reflect the DERIVED provider per the registry, not the hardcoded
// billingModeForProvider("" | "platform" → platform_managed else byok) rule.
// When the runtime is registry-backed, billingModeForSelectedProvider reads the
// registry-served billing_mode off the provider catalog entry. The hardcoded
// rule remains only as the fallback for non-registry runtimes / older backends.
import { describe, it, expect } from "vitest";
import { billingModeForSelectedProvider, billingModeForProvider } from "../ConfigTab";
import {
buildProviderCatalogFromRegistry,
type RegistryProvider,
type RegistryModel,
} from "../../ProviderModelSelector";
const REGISTRY_PROVIDERS: RegistryProvider[] = [
{ name: "anthropic-oauth", display_name: "Claude Code subscription", auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"], billing_mode: "byok" },
{ name: "platform", display_name: "Platform", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "platform_managed" },
// DISCRIMINATING fixture (review #7790): a provider whose registry-served
// billing_mode DISAGREES with the hardcoded name-based rule. Its name is not
// "platform"/"" so billingModeForProvider() would call it "byok", yet the
// registry serves "platform_managed" (the federation-ready shape the SSOT is
// built for — a managed provider that isn't literally named "platform").
// billingModeForSelectedProvider MUST return the REGISTRY value here; the
// only way to get "platform_managed" out is to honor the catalog, so this
// case fails if the impl ever regresses to the hardcoded rule.
{ name: "managed-federated", display_name: "Managed (federated)", auth_env: [], billing_mode: "platform_managed" },
];
const REGISTRY_MODELS: RegistryModel[] = [
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
{ id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
// model bucketed under the disagreeing provider so the catalog builds an
// entry for it (buildProviderCatalogFromRegistry only emits a provider entry
// for providers that own at least one model).
{ id: "managed/some-model", provider: "managed-federated", billing_mode: "platform_managed" },
];
describe("billingModeForSelectedProvider (registry-driven)", () => {
const catalog = buildProviderCatalogFromRegistry(REGISTRY_PROVIDERS, REGISTRY_MODELS);
it("reads platform_managed from the registry for the platform provider", () => {
expect(billingModeForSelectedProvider("platform", catalog)).toBe("platform_managed");
});
it("reads byok from the registry for a BYOK provider", () => {
// anthropic-oauth derives to byok via the REGISTRY. (Note: the hardcoded
// rule would ALSO say byok for this non-'platform' name, so on its own this
// assertion does NOT prove the registry is authoritative — it agrees either
// way. The registry-WINS proof is the disagreement case below.)
expect(billingModeForSelectedProvider("anthropic-oauth", catalog)).toBe("byok");
});
it("lets the registry billing_mode WIN when it disagrees with the hardcoded rule", () => {
// 'managed-federated' is not '' / 'platform', so the legacy name-based rule
// classifies it byok — but the registry serves platform_managed. The
// registry is the SSOT, so billingModeForSelectedProvider must return
// platform_managed. This is the discriminating case: it FAILS if the impl
// regresses to billingModeForProvider (which would return byok here).
expect(billingModeForProvider("managed-federated")).toBe("byok"); // sanity: the rules genuinely disagree
expect(billingModeForSelectedProvider("managed-federated", catalog)).toBe("platform_managed");
});
it("falls back to the hardcoded rule when no registry catalog is supplied", () => {
// Non-registry runtime / older backend → catalog empty/undefined → the
// legacy mapping still applies ('' | 'platform' → platform_managed).
expect(billingModeForSelectedProvider("", undefined)).toBe("platform_managed");
expect(billingModeForSelectedProvider("platform", undefined)).toBe("platform_managed");
expect(billingModeForSelectedProvider("minimax", undefined)).toBe("byok");
});
it("falls back to the hardcoded rule when the provider is not in the registry catalog", () => {
// A provider string the registry catalog doesn't carry (stale saved
// value) → fall back to the legacy rule rather than guessing.
expect(billingModeForSelectedProvider("some-byo-vendor", catalog)).toBe("byok");
});
});
@@ -297,6 +297,25 @@ describe("DetailsTab — delete workflow", () => {
expect(mockSelectNode).toHaveBeenCalledWith(null);
});
// internal#734: checking "also erase saved data" adds &erase_data=true so the
// server prunes the data volume. Default (unchecked) must NOT send it.
it("checking erase-saved-data sends erase_data=true on delete", async () => {
mockApi.del.mockResolvedValue(undefined);
render(<DetailsTab workspaceId="ws-1" data={data()} />);
await flush();
fireEvent.click(screen.getByRole("button", { name: /delete workspace/i }));
await flush();
fireEvent.click(screen.getByRole("checkbox", { name: /erase saved data/i }));
const confirmBtn = Array.from(document.querySelectorAll("button")).find(
(b) => b.textContent === "Confirm Delete",
) as HTMLButtonElement;
fireEvent(confirmBtn, new MouseEvent("click", { bubbles: true }));
await flush();
expect(mockApi.del).toHaveBeenCalledWith("/workspaces/ws-1?confirm=true&erase_data=true", {
headers: { "X-Confirm-Name": "Test Workspace" },
});
});
it("cancelling delete returns to view mode", async () => {
mockApi.del.mockResolvedValue(undefined);
render(<DetailsTab workspaceId="ws-1" data={data()} />);
+1
View File
@@ -5,6 +5,7 @@
const RUNTIME_NAMES: Record<string, string> = {
"claude-code": "Claude Code",
codex: "Codex",
"google-adk": "Google ADK",
hermes: "Hermes",
openclaw: "OpenClaw",
kimi: "Kimi",
+3
View File
@@ -368,6 +368,9 @@ export interface WorkspaceCompute {
width?: number;
height?: number;
};
// internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
// undefined (auto). Controls whether the data volume survives recreate.
data_persistence?: string;
}
let socket: ReconnectingSocket | null = null;
+11
View File
@@ -1,5 +1,16 @@
# Running a Gemini CLI Workspace on Molecule AI
> **⚠️ Accuracy correction (2026-05-29):** this page is **aspirational, not
> shipped.** There is **no `gemini-cli` runtime** in `manifest.json` or the
> provisioner's `knownRuntimes`, and the "PR #379" cited below is unrelated (a
> CI-workflow-cleanup PR, not a gemini-cli adapter). Do not follow this as-is.
>
> **For Gemini on Molecule, use the real `google-adk` runtime instead** — see
> [`google-adk-runtime.md`](./google-adk-runtime.md) (ADK engine + Gemini on
> Vertex AI/AI Studio), implemented in PR
> [`molecule-ai-workspace-template-google-adk#1`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) per RFC `internal#730`.
> This gemini-cli page is retained only until it's either implemented for real or removed.
Molecule AI now ships a `gemini-cli` runtime adapter alongside the existing `claude-code` adapter. This tutorial walks you from zero to a running Gemini agent workspace in under five minutes.
## What you'll need
+52 -57
View File
@@ -1,74 +1,69 @@
# Running a Google ADK Workspace on Molecule AI
Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
> **Status (2026-05-29):** the `google-adk` runtime is **landing**, not yet on
> `main`. It's implemented in the template repo
> [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk)
> (PR **#1**) with platform registration in molecule-core PR **#2003** and the
> validator allowlist in molecule-ci PR **#26**. Design + approval: RFC
> [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730).
> Remove this banner once those PRs merge.
>
> **Doc-accuracy note:** a prior version of this page claimed ADK was "already
> first-class" and cited "PR #550" — that PR is unrelated (a MemoryTab test
> suite). No `google-adk` adapter existed at that time. This rewrite reflects
> the real implementation.
## What you'll need
Google's Agent Development Kit (ADK) runs as a Molecule AI workspace runtime:
ADK is the **agent engine** (`LlmAgent` + `Runner`), and the workspace
participates in Molecule's A2A org like any other runtime.
- A Molecule AI account with at least one provisioned tenant
- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
- `curl` + `jq`
## How it actually works
## Setup
- **ADK = engine only.** The adapter builds an ADK `LlmAgent` from the
workspace config (model + system prompt + tools) and drives its `Runner`.
It installs `google-adk[mcp]==2.1.0` and **never** the `[a2a]` extra — ADK's
a2a layer pins `a2a-sdk<0.4`, which is incompatible with the platform's
`a2a-sdk>=1.0`. (Verified: `google-adk[mcp]==2.1.0` + `a2a-sdk 1.0.3` coexist.)
- **A2A** is provided by the platform's a2a-1.x server; a Molecule-authored
executor bridges ADK's `Runner` event stream onto it, one ADK session per
A2A `context_id`.
- **Tools** reach the agent via ADK's native `McpToolset` pointed at the
workspace's `a2a_mcp_server` — the same MCP surface the CLI runtimes use
(`delegate_task`, `commit_memory`, `list_peers`, …). No LangChain.
## Auth — Vertex AI via ADC (keyless), or an AI Studio key
The runtime supports both google-genai auth paths:
- **Vertex AI + Application Default Credentials (recommended; required if your
org disallows API keys).** Set `model: vertex:gemini-2.5-pro` and provide
`GOOGLE_CLOUD_PROJECT`; the adapter sets `GOOGLE_GENAI_USE_VERTEXAI=1` and
google-genai authenticates via ADC — no API key. (Locally:
`gcloud auth application-default login`.)
- **AI Studio API key** (where your org permits API keys): set
`model: google_genai:gemini-2.5-pro` and `GOOGLE_API_KEY`.
## Create a workspace
```bash
# 1. Store your Google API key as a global secret
curl -s -X PUT http://localhost:8080/settings/secrets \
-H "Content-Type: application/json" \
-d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
# 2. Create a google-adk workspace
WS=$(curl -s -X POST http://localhost:8080/workspaces \
# Vertex AI + ADC (keyless)
curl -s -X POST http://localhost:8080/workspaces \
-H "Content-Type: application/json" \
-d '{
"name": "adk-agent",
"role": "Google ADK inference worker",
"runtime": "google-adk",
"model": "google:gemini-2.0-flash"
}' | jq -r '.id')
echo "Workspace: $WS"
# 3. Wait for ready (~30s)
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
echo "Waiting..."; sleep 5
done
# 4. Send your first task
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
| jq '.result.parts[0].text'
# 5. Multi-turn — session state is preserved across calls
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"2","method":"message/send",
"params":{"message":{"role":"user","parts":[{"kind":"text",
"text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
| jq '.result.parts[0].text'
# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
"model": "vertex:gemini-2.5-pro",
"runtime_config": {"required_env": ["GOOGLE_CLOUD_PROJECT"]}
}'
```
## Expected output
After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
## How it works
The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
## Mixed-runtime teams
ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
Send it a task via the A2A proxy (`POST /workspaces/:id/a2a`, JSON-RPC
`message/send`) and it replies through the ADK `Runner`. Verified end-to-end:
a Gemini 2.5 round-trip on Vertex via ADC returns through the built image.
## Related
- PR #550: [feat(adapters): add google-adk runtime adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/550)
- Template + adapter: [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) (PR #1)
- Platform registration: molecule-core PR #2003 · validator: molecule-ci PR #26
- Design/approval: RFC [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730)
- [Google ADK (adk-python)](https://github.com/google/adk-python)
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
- [Platform API reference](../api-reference.md)
+1
View File
@@ -29,6 +29,7 @@
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
],
"org_templates": [
+30 -7
View File
@@ -1,12 +1,13 @@
#!/usr/bin/env bash
# E2E test: A2A round-trip parity across all four runtimes.
# E2E test: A2A round-trip parity across all five runtimes.
#
# Validates that for each of {claude-code, hermes, codex, openclaw}:
# Validates that for each of {claude-code, hermes, codex, openclaw, google-adk}:
# 1. A workspace can be provisioned + brought online
# 2. The adapter responds to A2A message/send
# 3. The reply contains expected content (echo of the prompt)
# 4. A SECOND message preserves session state where the runtime
# supports it (currently: hermes via plugin path)
# supports it (currently: hermes via plugin path; google-adk via
# ADK InMemorySessionService keyed on A2A context_id)
#
# Targets a SaaS tenant subdomain. Provisions workspaces in the calling
# tenant, runs the round-trip, deletes them on success.
@@ -16,6 +17,10 @@
# (e.g. https://demo-tenant.staging.moleculesai.app)
# - $OPENROUTER_API_KEY (or $HERMES_API_KEY) for non-claude runtimes
# - $OPENAI_API_KEY for claude-code peer
# - $GOOGLE_API_KEY (AI Studio) for google-adk — the org disallows API
# keys in PROD (Vertex+ADC there), but CI auths Gemini with an
# AI-Studio key (config model google_genai:gemini-2.5-pro). Vertex
# stays supported; this is the keyed CI path only.
# - SaaS edge requires Origin header — see auto-memory
# reference_saas_waf_origin_header.md
#
@@ -24,12 +29,13 @@
# ./scripts/test-all-runtimes-a2a-e2e.sh
#
# Skip individual runtimes:
# SKIP_HERMES=1 SKIP_OPENCLAW=1 ./scripts/test-all-runtimes-a2a-e2e.sh
# SKIP_HERMES=1 SKIP_OPENCLAW=1 SKIP_GOOGLE_ADK=1 ./scripts/test-all-runtimes-a2a-e2e.sh
set -euo pipefail
PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}"
HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}"
PEER_OPENAI_KEY="${OPENAI_API_KEY:-}"
GOOGLE_ADK_KEY="${GOOGLE_API_KEY:-}"
# SaaS auth chain — TENANT_ADMIN_TOKEN + TENANT_ORG_ID required when
# hitting *.moleculesai.app (per-tenant ADMIN_TOKEN, NOT
# CP_ADMIN_API_TOKEN). Optional for localhost.
@@ -48,6 +54,10 @@ if [ -z "$HERMES_PROVIDER_KEY" ] && [ -z "${SKIP_HERMES:-}${SKIP_CODEX:-}${SKIP_
echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for non-claude runtimes"
exit 2
fi
if [ -z "$GOOGLE_ADK_KEY" ] && [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
echo "FAIL: set GOOGLE_API_KEY (AI Studio) for google-adk, or SKIP_GOOGLE_ADK=1"
exit 2
fi
PASS=0
FAIL=0
@@ -143,7 +153,7 @@ echo "=========================================="
echo ""
# -------------------------------------------------------
# 1. Provision the four runtimes (skip via SKIP_* flags)
# 1. Provision the five runtimes (skip via SKIP_* flags)
# -------------------------------------------------------
echo "--- 1. Provision workspaces ---"
if [ -z "${SKIP_CLAUDE_CODE:-}" ]; then
@@ -162,6 +172,10 @@ if [ -z "${SKIP_OPENCLAW:-}" ]; then
WS_IDS[openclaw]=$(provision "ParityOpenClaw" "openclaw" "openclaw peer")
echo " openclaw: ${WS_IDS[openclaw]}"
fi
if [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
WS_IDS[google-adk]=$(provision "ParityGoogleADK" "google-adk" "google-adk peer")
echo " google-adk: ${WS_IDS[google-adk]}"
fi
# -------------------------------------------------------
# 2. Set provider keys
@@ -177,6 +191,12 @@ if [ -n "${WS_IDS[claude-code]:-}" ] && [ -n "$PEER_OPENAI_KEY" ]; then
set_secret "${WS_IDS[claude-code]}" "OPENAI_API_KEY" "$PEER_OPENAI_KEY"
echo " claude-code: OPENAI_API_KEY set"
fi
if [ -n "${WS_IDS[google-adk]:-}" ] && [ -n "$GOOGLE_ADK_KEY" ]; then
# AI-Studio path: the adapter reads GOOGLE_API_KEY natively when the
# config model is google_genai:gemini-2.5-pro (see _routing.resolve_model).
set_secret "${WS_IDS[google-adk]}" "GOOGLE_API_KEY" "$GOOGLE_ADK_KEY"
echo " google-adk: GOOGLE_API_KEY set"
fi
# -------------------------------------------------------
# 3. Wait for online
@@ -188,6 +208,9 @@ for runtime in "${!WS_IDS[@]}"; do
[ -z "$id" ] && continue
max=60
[ "$runtime" = "hermes" ] && max=120
# google-adk's first cold boot pulls a large fresh ADK image — give it
# a hermes-class window so a slow first pull doesn't read as "failed".
[ "$runtime" = "google-adk" ] && max=180
if wait_online "$id" "$runtime" "$max"; then
check "$runtime online" "ok" "ok"
else
@@ -200,7 +223,7 @@ done
# -------------------------------------------------------
echo ""
echo "--- 4. A2A round-trip (first message) ---"
for runtime in claude-code hermes codex openclaw; do
for runtime in claude-code hermes codex openclaw google-adk; do
id="${WS_IDS[$runtime]:-}"
[ -z "$id" ] && continue
reply=$(a2a_send "$id" "Reply with just the word OK so we know you got this.")
@@ -213,7 +236,7 @@ done
# -------------------------------------------------------
echo ""
echo "--- 5. Session continuity (second message recalls first) ---"
for runtime in claude-code hermes codex openclaw; do
for runtime in claude-code hermes codex openclaw google-adk; do
id="${WS_IDS[$runtime]:-}"
[ -z "$id" ] && continue
# Set up: tell the agent a name.
+229
View File
@@ -0,0 +1,229 @@
#!/usr/bin/env bash
# Real-completion + per-provider liveness + byok-routing assertion helpers
# for the staging full-SaaS E2E (tests/e2e/test_staging_full_saas.sh).
#
# WHY THIS LIB EXISTS (molecule-core#1995 / #1994 follow-on):
# The A2A e2e historically asserted only response SHAPE — e.g.
# test_a2a_e2e.sh:`check "SEO response has text" '"kind":"text"'`. A fully
# BROKEN agent returns its error AS a text part:
# {"kind":"text","text":"Agent error (Exception) — see workspace logs..."}
# which STILL matches `"kind":"text"` → the shape check PASSES on a broken
# agent. That is exactly why the 2026-05-2x drained-key / byok-misroute
# failures (agents-team PM + reno marketing erroring on every LLM call)
# sailed through CI. "Channel returns text shape" != "agent actually
# completed an LLM round-trip".
#
# These helpers add three load-bearing gates ON TOP of (never replacing) the
# existing shape + PONG checks:
# 1. a2a_assert_real_completion — deterministic known-answer round-trip
# (CONTAINS the expected token AND NOT an error-as-text payload).
# 2. provider_liveness_matrix — per-offered-provider cheap completion
# probe, providers sourced from the providers.yaml SSOT runtimes block.
# 3. assert_byok_not_platform_proxy — #1994 regression guard: a
# byok-resolving workspace must NOT resolve to platform_managed.
#
# Conventions: reuses the host script's fail()/ok()/log() + tenant_call().
# Source this AFTER those are defined. BASH 4+.
# Error-as-text trap markers. If the agent's text part contains ANY of
# these, the "round-trip" did not really complete — the agent surfaced an
# error AS text. This is the negative assertion that makes a broken agent
# FAIL instead of slipping through the shape check.
#
# Kept as an array (not a single regex) so a new failure signature is a
# one-line append + the failure message can name which marker matched.
A2A_ERROR_AS_TEXT_MARKERS=(
"Agent error"
"Exception"
"error result"
"MISSING_BYOK_CREDENTIAL"
)
# a2a_completion_error_marker <agent_text>
# Echoes the first error-as-text marker found in <agent_text> (case-
# insensitive), or nothing if clean. Exit 0 if a marker matched, 1 if not.
# Pure string scan — no LLM, no network — so it is deterministic and is the
# unit under the fail-direction proof in test_completion_assert_unit.sh.
a2a_completion_error_marker() {
local text="$1"
local upper marker
upper=$(printf '%s' "$text" | tr '[:lower:]' '[:upper:]')
for marker in "${A2A_ERROR_AS_TEXT_MARKERS[@]}"; do
if printf '%s' "$upper" | grep -qF -- "$(printf '%s' "$marker" | tr '[:lower:]' '[:upper:]')"; then
printf '%s' "$marker"
return 0
fi
done
return 1
}
# a2a_assert_real_completion <agent_text> <expected_token> <context_label>
# The CORE gate. Asserts the agent text:
# (a) does NOT contain any error-as-text marker (broken-agent trap), AND
# (b) CONTAINS <expected_token> (case-insensitive) — proving a real LLM
# round-trip produced the deterministic known answer.
# Calls fail() (which exits) on either violation. This MUST fail on an
# error-as-text payload — that is the property test_completion_assert_unit.sh
# pins.
a2a_assert_real_completion() {
local text="$1"
local expected="$2"
local ctx="${3:-A2A}"
if [ -z "$text" ]; then
fail "$ctx — real-completion gate: agent returned EMPTY text (no round-trip)."
fi
local hit
if hit=$(a2a_completion_error_marker "$text"); then
fail "$ctx — real-completion gate: agent returned an ERROR-AS-TEXT payload (matched '$hit'). A broken agent that surfaces its error as a text part is NOT a completed round-trip. This is the trap the shape-only check missed (#1994). Raw: ${text:0:200}"
fi
# Known-answer: real LLM round-trip yields the deterministic token. A
# prompt-echo / truncated-context / wrong-auth pipeline won't.
if ! printf '%s' "$text" | tr '[:lower:]' '[:upper:]' | grep -qF -- "$(printf '%s' "$expected" | tr '[:lower:]' '[:upper:]')"; then
fail "$ctx — real-completion gate: reply did NOT contain expected known-answer token '$expected'. The channel returned a text shape but no real completion. Raw: ${text:0:200}"
fi
ok "$ctx — real completion verified (contains '$expected', no error-as-text). Reply: \"${text:0:80}\""
}
# offered_platform_models_for_runtime <runtime>
# Emits, one per line, the platform-servable model ids the providers.yaml
# SSOT (runtimes.<runtime>.providers[name=platform].models) declares for
# <runtime>. This is the SSOT-driven offered/platform-servable matrix — NOT
# a hardcoded provider list — so a provider added/removed in providers.yaml
# automatically changes the matrix this probe exercises.
#
# Reads the embedded copy at workspace-server/internal/providers/providers.yaml
# (the same file go:embed compiles into the binary). Requires python3 +
# PyYAML (already a test-harness dep). On parse failure, emits nothing and
# returns 1 so the caller can fail loud rather than silently skip.
offered_platform_models_for_runtime() {
local runtime="$1"
local yaml_path="${PROVIDERS_YAML_PATH:-}"
if [ -z "$yaml_path" ]; then
# This lib lives at tests/e2e/lib/ -> repo root is three dirs up
# (lib -> e2e -> tests -> repo-root).
yaml_path="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/workspace-server/internal/providers/providers.yaml"
fi
if [ ! -f "$yaml_path" ]; then
log " [provider-matrix] providers.yaml SSOT not found at $yaml_path"
return 1
fi
RUNTIME_REF="$runtime" python3 - "$yaml_path" <<'PY'
import os, sys
try:
import yaml
except Exception as e: # PyYAML missing — fail loud, do not silently skip.
sys.stderr.write(f"PyYAML required for provider-matrix SSOT read: {e}\n")
sys.exit(2)
rt = os.environ["RUNTIME_REF"]
with open(sys.argv[1]) as f:
doc = yaml.safe_load(f)
native = (doc.get("runtimes") or {}).get(rt) or {}
for pref in native.get("providers", []) or []:
if pref.get("name") == "platform":
for m in pref.get("models", []) or []:
print(m)
PY
}
# provider_liveness_matrix <runtime> <probe_fn>
# For each platform-servable model the SSOT lists for <runtime>, calls
# <probe_fn> <model_id> which must echo the agent text (or empty) and return
# 0 on a non-error completion, non-zero otherwise. Logs a per-model pass/fail
# matrix. Returns 0 only if EVERY probed model produced a non-error
# completion; non-zero (and a recorded matrix) otherwise.
#
# Purpose: exercise each offered provider's AUTH + ROUTING path so a drained
# key / wrong base-URL / byok-misroute fails the gate (the #1994 class). The
# probe_fn is expected to use minimal max_tokens.
#
# This helper does the SSOT read + matrix bookkeeping; the host script
# supplies probe_fn (it owns workspace ids + tenant_call wiring).
provider_liveness_matrix() {
local runtime="$1"
local probe_fn="$2"
local models model rc total=0 passed=0
local -a results=()
models=$(offered_platform_models_for_runtime "$runtime") || {
fail "provider-liveness: could not read offered-provider matrix from providers.yaml SSOT for runtime=$runtime"
}
if [ -z "$models" ]; then
log " [provider-matrix] runtime=$runtime offers no platform-servable models in the SSOT — nothing to probe (not a failure)."
return 0
fi
log " [provider-matrix] SSOT offered platform models for $runtime:"
while IFS= read -r model; do
[ -z "$model" ] && continue
log " - $model"
done <<<"$models"
while IFS= read -r model; do
[ -z "$model" ] && continue
total=$((total + 1))
set +e
"$probe_fn" "$model"
rc=$?
set -e
if [ "$rc" = "0" ]; then
passed=$((passed + 1))
results+=("PASS $model")
elif [ "$rc" = "75" ]; then
# 75 (EX_TEMPFAIL convention) = probe skipped (key/runtime not
# available in this lane). Not counted toward pass/fail — logged.
total=$((total - 1))
results+=("SKIP $model (probe unavailable in this lane)")
else
results+=("FAIL $model")
fi
done <<<"$models"
log " [provider-matrix] result matrix (runtime=$runtime):"
local line
for line in "${results[@]}"; do
log " $line"
done
log " [provider-matrix] $passed/$total probed providers completed without error"
if [ "$passed" != "$total" ]; then
return 1
fi
return 0
}
# assert_byok_not_platform_proxy <billing_mode_json> <context_label>
# #1994 regression guard. Given the JSON body from
# GET /admin/workspaces/:id/llm-billing-mode (same derived resolver the
# provision-time strip gate uses), asserts the workspace resolves to BYOK
# and NOT platform_managed. A regression of #1994 (byok workspace baked to
# platform_managed → routed through the platform proxy → platform LLM key
# drained) flips resolved_mode to "platform_managed" and trips this gate.
# Calls fail() (exits) on violation.
assert_byok_not_platform_proxy() {
local body="$1"
local ctx="${2:-byok-guard}"
local mode prov
mode=$(printf '%s' "$body" | python3 -c "import json,sys
try: print(json.load(sys.stdin).get('resolved_mode',''))
except Exception: print('')" 2>/dev/null || echo "")
prov=$(printf '%s' "$body" | python3 -c "import json,sys
try:
d=json.load(sys.stdin); v=d.get('provider_selection')
print(v if v is not None else '')
except Exception: print('')" 2>/dev/null || echo "")
if [ -z "$mode" ]; then
fail "$ctx — byok-routing guard: could not read resolved_mode from billing-mode response. Raw: ${body:0:200}"
fi
if [ "$mode" = "platform_managed" ]; then
fail "$ctx — byok-routing guard TRIPPED (#1994 regression): a byok-configured workspace resolved to 'platform_managed' (provider_selection=$prov) → it would route through the platform proxy and drain the platform LLM key. Expected resolved_mode=byok. Raw: ${body:0:200}"
fi
if [ "$mode" != "byok" ]; then
fail "$ctx — byok-routing guard: unexpected resolved_mode='$mode' (expected 'byok'). provider_selection=$prov. Raw: ${body:0:200}"
fi
ok "$ctx — byok-routing guard: workspace resolves byok (provider_selection=$prov), NOT platform-proxy. #1994 stays fixed."
}
+35
View File
@@ -8,6 +8,34 @@ TIMEOUT="${A2A_TIMEOUT:-120}" # seconds per A2A call (override via A2A_TIMEOUT
# shellcheck source=_lib.sh
source "$(dirname "$0")/_lib.sh"
# molecule-core#1995 (#1994 follow-on): real-completion assertion helpers.
# Adds a NEGATIVE error-as-text check on top of the shape checks below, so a
# broken agent that returns its error AS a text part
# ({"kind":"text","text":"Agent error (Exception) ..."}) — which STILL
# matches the shape check `"kind":"text"` — now FAILS instead of passing.
# shellcheck source=lib/completion_assert.sh
source "$(dirname "$0")/lib/completion_assert.sh"
# check_no_error_as_text <desc> <agent_text>
# Additive negative gate: PASS only if the agent text carries NO
# error-as-text marker (Agent error / Exception / error result /
# MISSING_BYOK_CREDENTIAL). Uses the same scanner as the staging
# real-completion gate so the trap is closed consistently across lanes.
check_no_error_as_text() {
local desc="$1"
local text="$2"
local hit
if hit=$(a2a_completion_error_marker "$text"); then
echo "FAIL: $desc"
echo " agent returned an error-AS-text payload (matched '$hit') — a broken"
echo " agent that surfaces its error as a text part is NOT a real reply."
echo " got: $(echo "$text" | head -3)"
FAIL=$((FAIL + 1))
else
echo "PASS: $desc"
PASS=$((PASS + 1))
fi
}
check() {
local desc="$1"
@@ -81,6 +109,8 @@ check "JSON-RPC response has result" '"result"' "$R"
check "Response has agent role" '"role":"agent"' "$R"
check "Response has text part" '"kind":"text"' "$R"
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
# Negative gate (#1994): the text part must not BE an error.
check_no_error_as_text "Echo reply is not an error-as-text payload" "$TEXT"
echo " Agent said: $TEXT"
echo ""
@@ -92,6 +122,11 @@ R=$(a2a_send "$SEO_ID" "What SEO skills do you have?")
check "SEO agent responds" '"result"' "$R"
check "SEO response has text" '"kind":"text"' "$R"
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
# Negative gate (#1994): a broken SEO agent that returns "Agent error
# (Exception) ..." AS text still matches the `"kind":"text"` shape check
# above — THAT is the gap that let drained-key/byok-misroute failures pass
# CI. This makes that case FAIL.
check_no_error_as_text "SEO reply is not an error-as-text payload" "$TEXT"
echo " SEO Agent said: $TEXT"
echo ""
+43 -25
View File
@@ -73,7 +73,15 @@ else
fi
# Test 4: Create workspace B (needs bearer — tokens now exist in DB)
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Summarizer Agent","tier":1,"runtime":"external","external":true}')
# #1953 cross-tenant isolation: Summarizer is created as a CHILD of Echo so the
# two live in the SAME org (Echo is the org root; Summarizer hangs off it via
# parent_id). The peer-discovery tests below assert same-org peer enumeration
# (Echo sees its child, the child sees its parent). Previously both were created
# parent_id=NULL — two DISTINCT org roots — and "peers" only listed each other
# via the `WHERE parent_id IS NULL` branch that returned every tenant's org root.
# That branch WAS the cross-tenant leak (#1953) and is now removed, so two org
# roots no longer see each other; the assertions must run inside one org.
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d "{\"name\":\"Summarizer Agent\",\"tier\":1,\"runtime\":\"external\",\"external\":true,\"parent_id\":\"$ECHO_ID\"}")
check "POST /workspaces (create summarizer)" '"status":"awaiting_agent"' "$R"
SUM_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
@@ -133,21 +141,23 @@ check "Heartbeat updated uptime" '"uptime_seconds":120' "$R"
R=$(curl -s "$BASE/registry/discover/$ECHO_ID")
check "GET /registry/discover/:id (missing caller rejected)" 'X-Workspace-ID header is required' "$R"
# Test 12: Discover (from sibling — allowed)
# Test 12: Discover (from same-org child — allowed)
R=$(curl -s "$BASE/registry/discover/$ECHO_ID" -H "X-Workspace-ID: $SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
check "GET /registry/discover/:id (sibling)" '"url"' "$R"
check "GET /registry/discover/:id (same-org)" '"url"' "$R"
# Test 13: Peers (root siblings see each other)
# Test 13: Peers — same-org parent/child see each other (#1953). Echo is the org
# root and lists its child Summarizer; Summarizer lists its parent Echo. A
# cross-org workspace would NOT appear here (see cross_tenant_isolation_test.go).
R=$(curl -s "$BASE/registry/$ECHO_ID/peers" -H "Authorization: Bearer $ECHO_TOKEN")
check "GET /registry/:id/peers (has summarizer)" '"Summarizer' "$R"
R=$(curl -s "$BASE/registry/$SUM_ID/peers" -H "Authorization: Bearer $SUM_TOKEN")
check "GET /registry/:id/peers (has echo)" '"Echo Agent"' "$R"
# Test 14: Check access (root siblings)
# Test 14: Check access (same-org parent↔child — allowed)
R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/json" \
-d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}")
check "POST /registry/check-access (siblings allowed)" '"allowed":true' "$R"
check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R"
# Test 15: PATCH workspace (update position)
R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
@@ -289,32 +299,40 @@ R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
check "current_task in list response" '"current_task"' "$R"
# Test 21: Delete
R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
-H "Authorization: Bearer $ECHO_TOKEN" \
-H "X-Confirm-Name: Echo Agent v2")
check "DELETE /workspaces/:id" '"status":"removed"' "$R"
R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $SUM_TOKEN")
COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
check "List after delete (count=1)" "1" "$COUNT"
# Test 22: Bundle round-trip — export → delete → import → verify same config
echo ""
echo "--- Bundle Round-Trip Test ---"
# Export the summarizer workspace (#165 / PR #167 — admin-gated)
# #1953: Summarizer is now a CHILD of Echo (same-org, for the peer-discovery
# tests above). DELETE on the *parent* (Echo) cascade-removes its descendants
# (CascadeDelete walks the recursive `parent_id` CTE), so deleting Echo first
# would also remove Summarizer and the "one survives" assertion would see 0.
# Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade
# upward, so the parent Echo survives and count=1 holds. The bundle round-trip
# below needs Summarizer's exported config, so capture it BEFORE this delete.
BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE"
# Capture original config for comparison
ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])")
ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])")
# Delete the workspace — use SUM_TOKEN (per-workspace) for WorkspaceAuth
# and ADMIN_TOKEN for the AdminAuth layer.
R=$(curl -s -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
-H "Authorization: Bearer $SUM_TOKEN" \
-H "X-Confirm-Name: Summarizer Agent")
check "DELETE /workspaces/:id" '"status":"removed"' "$R"
# Parent Echo must survive a child delete — list as Echo and expect count=1.
R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
check "List after delete (count=1)" "1" "$COUNT"
# Test 22: Bundle round-trip — export → delete → import → verify same config.
# Summarizer's bundle was captured above; now delete the parent Echo (the only
# remaining workspace) so the import lands in a clean org, then re-import the
# Summarizer bundle.
echo ""
echo "--- Bundle Round-Trip Test ---"
# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for
# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer.
R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
-H "Authorization: Bearer $ECHO_TOKEN" \
-H "X-Confirm-Name: Echo Agent v2")
check "Delete before re-import" '"status":"removed"' "$R"
# After deleting both workspaces, all per-workspace tokens are revoked.
+111
View File
@@ -0,0 +1,111 @@
#!/usr/bin/env bash
# Fail-direction / load-bearing proof for lib/completion_assert.sh.
#
# This is the watch-it-FAIL counterpart the dev-SOP Phase 3 requires: it
# proves the new real-completion + byok gates actually CATCH a broken agent,
# not just pass on a good one. It runs entirely offline (no LLM, no network,
# no provisioning) — pure assertion logic — so it can run on every PR in the
# fast lane (e2e-api.yml unit-shell step) and locally via `bash`.
#
# The decisive case is `error-as-text payload MUST FAIL`: that is the exact
# trap (#1994) the historical shape-only check missed. If a refactor weakens
# a2a_assert_real_completion to a substring/shape check, THIS test goes red.
set -uo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
PASS=0
FAIL=0
# Minimal stand-ins for the host script's helpers. fail() must NOT exit the
# whole harness here — we want to assert that it WAS called. We trap it by
# running the assertion in a subshell and checking the subshell's exit code:
# the real fail() exits 1, ok() exits 0 implicitly.
log() { echo "[unit] $*"; }
ok() { echo "[unit] OK: $*"; }
fail() { echo "[unit] FAIL-CALLED: $*" >&2; exit 1; }
# shellcheck source=lib/completion_assert.sh
source "$HERE/lib/completion_assert.sh"
expect_pass() {
local desc="$1"; shift
if ( "$@" ) >/dev/null 2>&1; then
echo "PASS: $desc (assertion accepted, as expected)"
PASS=$((PASS + 1))
else
echo "FAIL: $desc — expected the assertion to ACCEPT, but it rejected"
FAIL=$((FAIL + 1))
fi
}
expect_fail() {
local desc="$1"; shift
if ( "$@" ) >/dev/null 2>&1; then
echo "FAIL: $desc — expected the assertion to REJECT, but it accepted (gate NOT load-bearing!)"
FAIL=$((FAIL + 1))
else
echo "PASS: $desc (assertion rejected, as expected)"
PASS=$((PASS + 1))
fi
}
echo "=== completion_assert.sh fail-direction proof ==="
# ---- a2a_assert_real_completion ----
# Good: real known-answer reply passes.
expect_pass "real PINEAPPLE reply passes" \
a2a_assert_real_completion "PINEAPPLE" "PINEAPPLE" "unit"
expect_pass "case-insensitive known answer passes" \
a2a_assert_real_completion "pineapple" "PINEAPPLE" "unit"
expect_pass "known answer with minor wrapping passes" \
a2a_assert_real_completion "Sure: PINEAPPLE" "PINEAPPLE" "unit"
# DECISIVE: the error-as-text trap. Each MUST fail — these are the payloads a
# broken agent returns that the old shape-only `"kind":"text"` check passed.
expect_fail "Agent error as text payload MUST fail" \
a2a_assert_real_completion "Agent error (Exception) — see workspace logs for details." "PINEAPPLE" "unit"
expect_fail "bare Exception as text MUST fail" \
a2a_assert_real_completion "Traceback ... Exception: boom" "PINEAPPLE" "unit"
expect_fail "error result as text MUST fail" \
a2a_assert_real_completion "tool returned error result" "PINEAPPLE" "unit"
expect_fail "MISSING_BYOK_CREDENTIAL as text MUST fail" \
a2a_assert_real_completion "MISSING_BYOK_CREDENTIAL: set your own key" "PINEAPPLE" "unit"
# Error-as-text that ALSO happens to contain the token still fails (error
# marker takes precedence — a real completion never carries these markers).
expect_fail "error-as-text containing the token still fails" \
a2a_assert_real_completion "Agent error: could not produce PINEAPPLE" "PINEAPPLE" "unit"
# Empty text fails.
expect_fail "empty text fails" \
a2a_assert_real_completion "" "PINEAPPLE" "unit"
# Wrong/echoed content (no token, no error) fails — shape-OK but not a real
# completion.
expect_fail "wrong content without token fails" \
a2a_assert_real_completion "Reply with exactly the word PINEAPPLE and nothing else." "BANANA" "unit"
# ---- assert_byok_not_platform_proxy (#1994 guard) ----
expect_pass "byok resolution passes the guard" \
assert_byok_not_platform_proxy '{"resolved_mode":"byok","provider_selection":"minimax","source":"derived_provider"}' "unit"
# DECISIVE: a platform_managed resolution on a byok workspace = the #1994
# regression. MUST fail.
expect_fail "platform_managed resolution trips the #1994 guard" \
assert_byok_not_platform_proxy '{"resolved_mode":"platform_managed","provider_selection":"platform","source":"derived_provider"}' "unit"
expect_fail "missing resolved_mode trips the guard" \
assert_byok_not_platform_proxy '{"provider_selection":"x"}' "unit"
expect_fail "disabled mode trips the guard (not byok)" \
assert_byok_not_platform_proxy '{"resolved_mode":"disabled"}' "unit"
# ---- a2a_completion_error_marker (the scanner under the gate) ----
if hit=$(a2a_completion_error_marker "all good PINEAPPLE"); then
echo "FAIL: clean text wrongly flagged as error marker ($hit)"; FAIL=$((FAIL + 1))
else
echo "PASS: clean text has no error marker"; PASS=$((PASS + 1))
fi
if hit=$(a2a_completion_error_marker "An Exception occurred"); then
echo "PASS: error marker detected ($hit)"; PASS=$((PASS + 1))
else
echo "FAIL: error marker NOT detected in 'An Exception occurred'"; FAIL=$((FAIL + 1))
fi
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="
[ "$FAIL" -eq 0 ]
+182
View File
@@ -99,6 +99,12 @@ source "$(dirname "$0")/lib/model_slug.sh"
# shellcheck disable=SC1091
# shellcheck source=lib/aws_leak_check.sh
source "$(dirname "$0")/lib/aws_leak_check.sh"
# shellcheck disable=SC1091
# shellcheck source=lib/completion_assert.sh
# molecule-core#1995 (#1994 follow-on): real-completion + per-provider
# liveness + byok-routing assertion helpers. Adds gates that FAIL on an
# error-as-text payload (the trap the shape-only A2A checks missed).
source "$(dirname "$0")/lib/completion_assert.sh"
CURL_COMMON=(-sS --fail-with-body --max-time 30)
E2E_TMP_FILES=()
@@ -867,6 +873,182 @@ fi
ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""
# ─── 8b. Real-completion known-answer round-trip (CORE GATE, #1994) ────
# The existing PONG check + generic error grep above already do a lot, but
# this stanza is the canonical real-completion gate the #1994 follow-on
# adds: a DETERMINISTIC known-answer prompt asserted via
# a2a_assert_real_completion, which FAILS on an error-as-text payload
# ({"kind":"text","text":"Agent error (Exception) ..."}). That payload
# matches the historical shape-only check `"kind":"text"` and so passed CI
# on a fully broken agent (drained-key / byok-misroute, 2026-05-2x). This
# gate makes that case RED. Reuses the same cold-start retry-on-transient
# (502/503/504) loop the PONG probe uses — retry-once-on-network, never on
# agent-error. Single round-trip → the one place we spend a non-trivial
# token budget (default backend MiniMax — cheap token plan).
KA_PAYLOAD=$(python3 -c "
import json, uuid
print(json.dumps({
'jsonrpc': '2.0',
'method': 'message/send',
'id': 'e2e-known-answer-1',
'params': {
'message': {
'role': 'user',
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
'parts': [{'kind': 'text', 'text': 'Reply with exactly the word PINEAPPLE and nothing else.'}]
}
}
}))
")
KA_TMP=$(mktemp -t known_answer_a2a.XXXXXX)
KA_RESP=""
for KA_ATTEMPT in $(seq 1 6); do
: >"$KA_TMP"
set +e
KA_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
--max-time 90 \
-H "Content-Type: application/json" \
-d "$KA_PAYLOAD" \
-o "$KA_TMP" \
-w '%{http_code}' \
2>/dev/null)
KA_RC=$?
set -e
KA_CODE=${KA_CODE:-000}
KA_RESP=$(cat "$KA_TMP" 2>/dev/null || echo "")
if [ "$KA_RC" = "0" ] && [ "$KA_CODE" -ge 200 ] && [ "$KA_CODE" -lt 300 ]; then
break
fi
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
# Retry ONLY on transient transport errors — never on an agent-level
# error (those must surface and fail the gate).
if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
log " known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
fi
break
done
rm -f "$KA_TMP"
if [ "$KA_RC" != "0" ] || [ "$KA_CODE" -lt 200 ] || [ "$KA_CODE" -ge 300 ]; then
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
fail "Known-answer A2A POST failed after $KA_ATTEMPT attempt(s) (curl_rc=$KA_RC, http=$KA_CODE): $KA_SAFE_BODY"
fi
KA_TEXT=$(echo "$KA_RESP" | python3 -c "
import json, sys
try:
d = json.load(sys.stdin)
parts = d.get('result', {}).get('parts', [])
print(parts[0].get('text', '') if parts else '')
except Exception:
print('')
" 2>/dev/null || echo "")
# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
# The parent was provisioned with the customer's OWN vendor key
# (MINIMAX_API_KEY / ANTHROPIC_API_KEY in SECRETS_JSON) → it must resolve
# BYOK, not platform_managed. #1994 was exactly the inverse: a byok
# workspace baked platform_managed on (re-)provision → routed through the
# platform proxy → drained the platform LLM key. We read the SAME derived
# resolver the provision-time strip gate uses
# (GET /admin/workspaces/:id/llm-billing-mode) and assert resolved_mode!=
# platform_managed. A regression flips it RED.
#
# Only meaningful when the parent actually carries a byok credential; the
# OpenAI/hermes path uses a different env shape, and the no-key path is
# legitimately platform_managed (the CTO default). Gate on the same
# E2E_*_API_KEY presence the SECRETS_JSON branch keyed off.
if [ -n "${E2E_MINIMAX_API_KEY:-}" ] || [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
set +e
BILLING_RESP=$(tenant_call GET "/admin/workspaces/$PARENT_ID/llm-billing-mode" 2>/dev/null)
BILLING_RC=$?
set -e
if [ "$BILLING_RC" != "0" ] || [ -z "$BILLING_RESP" ]; then
fail "byok-routing guard: GET /admin/workspaces/$PARENT_ID/llm-billing-mode failed (rc=$BILLING_RC). Body: ${BILLING_RESP:0:200}"
fi
assert_byok_not_platform_proxy "$BILLING_RESP" "byok-guard (parent, $RUNTIME/$MODEL_SLUG)"
else
log "8c. byok-routing guard skipped — parent carries no own-vendor key (OpenAI/no-key path is legitimately platform_managed)."
fi
# ─── 8d. Per-offered-provider liveness matrix (SSOT-driven, #1994 class) ─
# For each platform-servable model the providers.yaml SSOT
# (runtimes.<runtime>.providers[platform].models) declares for this
# runtime, send a minimal max_tokens-bounded "say ok" probe and assert a
# NON-ERROR completion. Purpose: exercise each offered provider's AUTH +
# ROUTING path so a drained key / wrong base-URL / byok-misroute fails the
# gate (the #1994 class). Providers/models come from the SSOT — not a
# hardcoded list — so the matrix tracks providers.yaml automatically.
#
# This lane provisions ONE parent workspace with ONE configured key, so we
# can only truly drive the providers that key authenticates. Probing a
# model whose provider key is absent in this lane is reported SKIP (rc=75),
# not FAIL — keeping the gate deterministic + low-flake. The matrix still
# proves the configured provider's full auth+routing path end-to-end, and
# logs the offered set so over/under-offer drift is visible in the CI log.
provider_liveness_probe() {
local model_id="$1"
# Map the SSOT platform model id (e.g. minimax/MiniMax-M2.7) to the
# vendor namespace token to decide whether THIS lane has its key.
local vendor="${model_id%%/*}"
case "$vendor" in
minimax) [ -n "${E2E_MINIMAX_API_KEY:-}" ] || return 75 ;;
anthropic) [ -n "${E2E_ANTHROPIC_API_KEY:-}" ] || return 75 ;;
openai) [ -n "${E2E_OPENAI_API_KEY:-}" ] || return 75 ;;
*) return 75 ;; # kimi/moonshot etc. — no key wired in this lane
esac
local probe_payload
probe_payload=$(python3 -c "
import json, uuid
print(json.dumps({
'jsonrpc': '2.0',
'method': 'message/send',
'id': 'e2e-liveness-' + uuid.uuid4().hex[:6],
'params': {
'message': {
'role': 'user',
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
},
'configuration': {'max_tokens': 4}
}
}))
")
local tmp code rc resp
tmp=$(mktemp -t liveness_a2a.XXXXXX)
set +e
code=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
--max-time 60 \
-H "Content-Type: application/json" \
-d "$probe_payload" \
-o "$tmp" -w '%{http_code}' 2>/dev/null)
rc=$?
set -e
resp=$(cat "$tmp" 2>/dev/null || echo "")
rm -f "$tmp"
if [ "$rc" != "0" ] || [ "${code:-000}" -lt 200 ] || [ "${code:-000}" -ge 300 ]; then
log " probe $model_id: HTTP ${code:-000} rc=$rc"
return 1
fi
local text
text=$(echo "$resp" | python3 -c "
import json,sys
try:
d=json.load(sys.stdin); p=d.get('result',{}).get('parts',[])
print(p[0].get('text','') if p else '')
except Exception: print('')" 2>/dev/null || echo "")
if [ -z "$text" ] || a2a_completion_error_marker "$text" >/dev/null; then
log " probe $model_id: error-as-text or empty: ${text:0:120}"
return 1
fi
return 0
}
if ! provider_liveness_matrix "$RUNTIME" provider_liveness_probe; then
fail "Per-provider liveness matrix: at least one offered provider failed its auth+routing probe (see matrix above). This is the #1994 class — a drained key / wrong base-URL / byok-misroute."
fi
ok "Per-provider liveness matrix passed (all probed offered providers completed without error)"
# ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
if [ "$MODE" = "full" ]; then
log "9/11 Writing + reading HMA memory on parent..."
+271
View File
@@ -0,0 +1,271 @@
// Command gen-providers is the codegen half of the provider-registry SSOT
// machinery on the molecule-core side (internal#718 P2-A, CTO 2026-05-27
// "Distribution = SDK via codegen + verify-CI"). It is the byte-for-byte mirror
// of molecule-controlplane's cmd/gen-providers (the canonical generator). It
// reads core's SYNCED COPY of the schema — internal/providers/providers.yaml
// (via the providers loader, so it shares the SAME parse + validation as the
// runtime) — and emits a checked-in Go artifact:
//
// internal/providers/gen/registry_gen.go
//
// The artifact is a deterministic projection of the merged registry: the
// provider catalog + per-runtime native sets as Go literals, plus the schema
// version and a content fingerprint. It is core's leaf of the multi-language SDK
// layer the RFC calls for (Go(CP+core)/TS(canvas)/Python(adapters)).
//
// CONTRACT for P2-A (zero behavior change): the generated artifact is
// checked-in + drift-gated ONLY. NO production code path imports
// internal/providers/gen — the gen-import-boundary test pins that. P2-B wires
// the billing/credential decision onto the LOADER (DeriveProvider/IsPlatform),
// not the raw gen literals. The generator is the build-time half;
// verify-providers-gen.yml is the CI half that regenerates and fails RED on any
// diff (drift or hand-edit); sync-providers-yaml.yml gates the synced copy
// against the controlplane canonical.
//
// Usage:
//
// go run ./cmd/gen-providers # write the artifact in place
// go run ./cmd/gen-providers -check # exit non-zero if the on-disk
// # artifact differs from a fresh gen
// # (the CI drift gate)
// go run ./cmd/gen-providers -o PATH # write to a specific path
//
//go:generate go run ../gen-providers -o ../../internal/providers/gen/registry_gen.go
package main
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"flag"
"fmt"
"go/format"
"os"
"sort"
"strconv"
"text/template"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
)
// defaultOutPath is the checked-in artifact location, relative to the repo
// root (the directory `go run ./cmd/gen-providers` is invoked from).
const defaultOutPath = "internal/providers/gen/registry_gen.go"
func main() {
var (
outPath string
check bool
)
flag.StringVar(&outPath, "o", defaultOutPath, "output path for the generated artifact")
flag.BoolVar(&check, "check", false, "verify the on-disk artifact matches a fresh generation; exit 1 on drift")
flag.Parse()
generated, err := render()
if err != nil {
fmt.Fprintf(os.Stderr, "gen-providers: %v\n", err)
os.Exit(1)
}
if check {
existing, err := os.ReadFile(outPath)
if err != nil {
fmt.Fprintf(os.Stderr, "gen-providers -check: cannot read %s: %v\n", outPath, err)
fmt.Fprintln(os.Stderr, "Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.")
os.Exit(1)
}
if !bytes.Equal(existing, generated) {
fmt.Fprintf(os.Stderr, "gen-providers -check: DRIFT — %s is out of sync with providers.yaml.\n", outPath)
fmt.Fprintln(os.Stderr, "The generated artifact was hand-edited or providers.yaml changed without regen.")
fmt.Fprintln(os.Stderr, "Fix: run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit.")
os.Exit(1)
}
fmt.Println("gen-providers -check: OK — artifact in sync with providers.yaml")
return
}
if err := os.WriteFile(outPath, generated, 0o644); err != nil {
fmt.Fprintf(os.Stderr, "gen-providers: write %s: %v\n", outPath, err)
os.Exit(1)
}
fmt.Printf("gen-providers: wrote %s\n", outPath)
}
// render loads the manifest and produces the gofmt'd artifact bytes.
func render() ([]byte, error) {
m, err := providers.LoadManifest()
if err != nil {
return nil, fmt.Errorf("load manifest: %w", err)
}
// Deterministic ordering: providers in catalog order is already stable
// (slice). Runtimes is a map — sort its keys so the artifact is
// reproducible regardless of Go map iteration order.
runtimeNames := make([]string, 0, len(m.Runtimes))
for rt := range m.Runtimes {
runtimeNames = append(runtimeNames, rt)
}
sort.Strings(runtimeNames)
type genProvider struct {
Name string
DisplayName string
Protocol string
AuthMode string
AuthEnv []string
ModelPrefixMatch string
IsPlatform bool
// UpstreamVendor is the proxy's upstream-vendor key for this entry
// (internal#718 P1, CONVERGED) — empty for entries the proxy does not
// route to an upstream. A plain scalar (no pointer), so both the rendered
// literal and the fingerprint stay deterministic.
UpstreamVendor string
}
type genRef struct {
Name string
Models []string
}
type genRuntime struct {
Name string
Providers []genRef
}
data := struct {
SchemaVersion int
Fingerprint string
Providers []genProvider
Runtimes []genRuntime
}{
SchemaVersion: providers.SchemaVersion(),
}
for _, p := range m.Providers {
gp := genProvider{
Name: p.Name,
DisplayName: p.DisplayName,
Protocol: string(p.Protocol),
AuthMode: p.AuthMode,
AuthEnv: p.AuthEnv,
ModelPrefixMatch: p.ModelPrefixMatch,
IsPlatform: p.IsPlatform(),
UpstreamVendor: p.UpstreamVendor,
}
data.Providers = append(data.Providers, gp)
}
for _, rt := range runtimeNames {
native := m.Runtimes[rt]
gr := genRuntime{Name: rt}
for _, ref := range native.Providers {
gr.Providers = append(gr.Providers, genRef{Name: ref.Name, Models: ref.Models})
}
data.Runtimes = append(data.Runtimes, gr)
}
// Fingerprint pins the artifact to the data it was generated from. It is
// derived from the structured projection (schema version + providers +
// runtimes), NOT the raw YAML bytes, so a comment-only YAML edit does not
// churn the artifact while any data change does.
data.Fingerprint = fingerprint(data.SchemaVersion, data.Providers, data.Runtimes)
var buf bytes.Buffer
if err := artifactTmpl.Execute(&buf, data); err != nil {
return nil, fmt.Errorf("execute template: %w", err)
}
formatted, err := format.Source(buf.Bytes())
if err != nil {
return nil, fmt.Errorf("gofmt generated source: %w\n----\n%s", err, buf.String())
}
return formatted, nil
}
// fingerprint is a stable content hash of the structured projection. Any
// fields below this function references must be kept in sync with the
// template's emitted data so the hash and the literals never diverge.
func fingerprint(schema int, provs any, runtimes any) string {
h := sha256.New()
fmt.Fprintf(h, "schema=%d\n", schema)
fmt.Fprintf(h, "%#v\n%#v\n", provs, runtimes)
return hex.EncodeToString(h.Sum(nil))[:16]
}
func quote(s string) string { return strconv.Quote(s) }
func quoteSlice(ss []string) string {
var b bytes.Buffer
b.WriteString("[]string{")
for i, s := range ss {
if i > 0 {
b.WriteString(", ")
}
b.WriteString(strconv.Quote(s))
}
b.WriteString("}")
return b.String()
}
var artifactTmpl = template.Must(template.New("artifact").Funcs(template.FuncMap{
"quote": quote,
"quoteSlice": quoteSlice,
}).Parse(`// Code generated by cmd/gen-providers; DO NOT EDIT.
//
// Source of truth: internal/providers/providers.yaml (schema_version {{.SchemaVersion}}).
// Regenerate with: go generate ./... (or: go run ./cmd/gen-providers)
// The verify-providers-gen CI workflow fails RED if this file drifts from
// providers.yaml or is hand-edited. internal#718 P0 — checked-in + drift-
// gated ONLY; no production path imports this package yet (that is P1+).
package gen
// SchemaVersion is the providers.yaml schema this artifact was generated
// against. It is the semver'd contract version (the MAJOR component for the
// public extension contract; see internal/providers/README.md).
const SchemaVersion = {{.SchemaVersion}}
// Fingerprint is a stable content hash of the generated projection (schema
// version + provider catalog + runtime native sets). It changes iff the
// registry DATA changes (comment-only YAML edits do not churn it).
const Fingerprint = {{quote .Fingerprint}}
// GenProvider is the generated projection of one provider catalog entry —
// the subset a downstream consumer needs to derive + display a provider.
type GenProvider struct {
Name string
DisplayName string
Protocol string
AuthMode string
AuthEnv []string
ModelPrefixMatch string
// IsPlatform marks the closed, core-only platform-managed provider.
IsPlatform bool
// UpstreamVendor is the proxy's upstream-vendor key for this entry
// (internal#718 P1, CONVERGED); empty for providers the proxy does not
// route to an upstream vendor. ResolveUpstream maps a model id's namespace
// token to the entry whose UpstreamVendor equals it.
UpstreamVendor string
}
// GenRuntimeRef is one native provider a runtime supports + its exact models.
type GenRuntimeRef struct {
Name string
Models []string
}
// Providers is the full provider catalog, in providers.yaml declaration order.
var Providers = []GenProvider{
{{- range .Providers}}
{Name: {{quote .Name}}, DisplayName: {{quote .DisplayName}}, Protocol: {{quote .Protocol}}, AuthMode: {{quote .AuthMode}}, AuthEnv: {{quoteSlice .AuthEnv}}, ModelPrefixMatch: {{quote .ModelPrefixMatch}}, IsPlatform: {{.IsPlatform}}{{if .UpstreamVendor}}, UpstreamVendor: {{quote .UpstreamVendor}}{{end}}},
{{- end}}
}
// Runtimes maps each runtime to its native provider+model set, runtime names
// sorted for a deterministic artifact.
var Runtimes = map[string][]GenRuntimeRef{
{{- range .Runtimes}}
{{quote .Name}}: {
{{- range .Providers}}
{Name: {{quote .Name}}, Models: {{quoteSlice .Models}}},
{{- end}}
},
{{- end}}
}
`))
@@ -0,0 +1,121 @@
package main
import (
"bytes"
"os"
"path/filepath"
"testing"
)
// repoRoot walks up from the test's working dir (cmd/gen-providers) to the
// module root so the test can locate the checked-in artifact regardless of
// where `go test` is invoked from.
func repoRoot(t *testing.T) string {
t.Helper()
dir, err := os.Getwd()
if err != nil {
t.Fatalf("getwd: %v", err)
}
for i := 0; i < 6; i++ {
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
return dir
}
dir = filepath.Dir(dir)
}
t.Fatal("could not locate repo root (go.mod) from cmd/gen-providers")
return ""
}
// TestArtifactInSync is the drift gate's Go-test counterpart: the checked-in
// internal/providers/gen/registry_gen.go MUST byte-equal a fresh render. If a
// future edit changes providers.yaml without regenerating, OR hand-edits the
// artifact, this flips red — the same signal the verify-providers-gen CI
// workflow emits, but caught locally by `go test ./...` too.
func TestArtifactInSync(t *testing.T) {
generated, err := render()
if err != nil {
t.Fatalf("render() error = %v", err)
}
artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
onDisk, err := os.ReadFile(artifactPath)
if err != nil {
t.Fatalf("read checked-in artifact %s: %v (run `go generate ./...` and commit)", artifactPath, err)
}
if !bytes.Equal(onDisk, generated) {
t.Fatalf("DRIFT: %s is out of sync with providers.yaml.\n"+
"Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.", defaultOutPath)
}
}
// TestDriftGateCatchesMutation is the load-bearing-gate proof (per the SOP
// fail-direction discipline). The original P0 version was TAUTOLOGICAL
// (internal#718 P1 review carry-over): it appended bytes to an in-memory copy
// and asserted the copy differed from the original — true by construction,
// touching neither the on-disk artifact nor the actual in-sync comparison the
// gate runs. This version exercises the REAL gate: it writes a MUTATED artifact
// to disk and re-runs the SAME comparison TestArtifactInSync / `-check` perform
// (`render()` bytes vs the on-disk file), asserting it now reports drift — then
// restores the original. So the test would fail if the gate were vacuous (e.g.
// if the comparison ignored content), not merely if append changes bytes.
func TestDriftGateCatchesMutation(t *testing.T) {
generated, err := render()
if err != nil {
t.Fatalf("render() error = %v", err)
}
artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
original, err := os.ReadFile(artifactPath)
if err != nil {
t.Fatalf("read checked-in artifact %s: %v", artifactPath, err)
}
// Precondition: the tree is in sync (so the mutation is what flips the gate,
// not pre-existing drift).
if !bytes.Equal(original, generated) {
t.Fatalf("precondition failed: %s already drifted from render() — run `go generate ./...`", defaultOutPath)
}
// Restore the pristine artifact no matter how the test exits.
t.Cleanup(func() {
if err := os.WriteFile(artifactPath, original, 0o644); err != nil {
t.Fatalf("CRITICAL: failed to restore %s after mutation: %v", artifactPath, err)
}
})
// Mutate the ON-DISK artifact (simulating a hand-edit / a providers.yaml
// change that wasn't regenerated).
mutated := append(append([]byte(nil), original...), []byte("\n// injected drift\n")...)
if err := os.WriteFile(artifactPath, mutated, 0o644); err != nil {
t.Fatalf("write mutated artifact: %v", err)
}
// Re-run the EXACT in-sync comparison the gate uses: fresh render vs the
// (now mutated) on-disk file. It MUST report drift.
onDiskAfter, err := os.ReadFile(artifactPath)
if err != nil {
t.Fatalf("re-read mutated artifact: %v", err)
}
freshRender, err := render()
if err != nil {
t.Fatalf("render() after mutation error = %v", err)
}
if bytes.Equal(onDiskAfter, freshRender) {
t.Fatal("drift gate did NOT detect a mutated on-disk artifact — gate is not load-bearing")
}
}
// TestRenderDeterministic proves regeneration is idempotent: two renders of
// the same manifest produce byte-identical output (sorted runtime keys, stable
// catalog order). A non-deterministic generator would make the drift gate
// flap on Go map iteration order.
func TestRenderDeterministic(t *testing.T) {
a, err := render()
if err != nil {
t.Fatalf("render() #1 error = %v", err)
}
b, err := render()
if err != nil {
t.Fatalf("render() #2 error = %v", err)
}
if !bytes.Equal(a, b) {
t.Fatal("render() is non-deterministic — two runs differ; the drift gate would flap")
}
}
+15
View File
@@ -36,6 +36,7 @@ import (
"time"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/channels"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/codexauth"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
@@ -334,6 +335,20 @@ func main() {
pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
})
// Codex shared-OAuth central refresher — the SINGLE owner of the rotating
// refresh_token for the global codex (ChatGPT/Codex subscription) credential
// (global_secrets key CODEX_AUTH_JSON). Multiple codex workspaces share ONE
// ChatGPT-Pro OAuth token; OpenAI's refresh_token is single-use, so letting
// each per-agent app-server refresh on its own 401 burned the seed within
// seconds (a refresh storm). This goroutine is structurally single-flight
// (one goroutine + a package mutex), refreshes only within a safety margin
// of expiry, POSTs the refresh_token at most once per due cycle, and writes
// the rotated blob back — workspaces now only GET the current token (see the
// codex template's codex_auth_sync.sh). INERT when no CODEX_AUTH_JSON exists.
go supervised.RunWithRecover(ctx, "codex-auth-refresher", func(c context.Context) {
codexauth.StartCodexAuthRefresher(c, db.DB)
})
// Provision-timeout sweep — flips workspaces that have been stuck in
// status='provisioning' past the timeout window to 'failed' and emits
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
@@ -0,0 +1,463 @@
// Package codexauth owns the SINGLE, platform-side refresh of the global
// codex (ChatGPT/Codex subscription) OAuth credential stored in the
// global_secrets table under key CODEX_AUTH_JSON.
//
// THE PROBLEM IT FIXES (agents-team prod, 2026-05-31)
//
// Multiple codex workspaces share ONE ChatGPT-Pro OAuth token (the global
// secret CODEX_AUTH_JSON). OpenAI's refresh_token is SINGLE-USE: every refresh
// rotates it and invalidates the prior one. When each per-agent codex
// app-server refreshed independently on a 401, the siblings' in-flight tokens
// were invalidated within seconds — a refresh storm that burned the seed and
// wedged every codex agent.
//
// THE FIX (two halves; this is the core half)
//
// 1. The per-workspace codex app-server NO LONGER refreshes (the template's
// OAuth POST is gated off by default — see the codex template's
// codex_auth_sync.sh / CODEX_AUTH_REFRESH_OWNER gate). Workspaces only ever
// GET the current token and write it to auth.json.
// 2. ONE owner refreshes the rotating refresh_token: this background goroutine
// in the platform. It is structurally single-flight (one goroutine + a
// package mutex), refreshes ONLY when the access_token is within a safety
// margin of expiry, POSTs the refresh_token at most ONCE per due cycle, and
// writes the rotated blob back to global_secrets. On a permanent failure
// (the seed was already burned by an out-of-band login) it logs ONCE and
// backs off — it never hot-loops a dead refresh_token.
//
// Billing-mode resolution and the byok strip are UNTOUCHED by this package.
package codexauth
import (
"context"
"database/sql"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"strings"
"sync"
"time"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
)
const (
// CodexAuthSecretKey is the global_secrets key holding the shared codex
// ChatGPT/Codex subscription OAuth blob (auth.json contents).
CodexAuthSecretKey = "CODEX_AUTH_JSON"
// oauthTokenURL is OpenAI's OAuth token endpoint. The ONLY endpoint this
// package ever POSTs to, and only for a due refresh.
oauthTokenURL = "https://auth.openai.com/oauth/token"
// codexOAuthClientID is the public Codex CLI OAuth client id (the same id
// the codex CLI sends). Not a secret.
codexOAuthClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
// refreshSafetyMargin is how far ahead of access_token expiry a refresh is
// considered DUE. A token expiring within this window is refreshed now; one
// expiring later is left untouched (skip-when-fresh). Generous so a slow
// tick can never let the shared token lapse for the fleet.
refreshSafetyMargin = 15 * time.Minute
// defaultInterval is how often the loop wakes to check due-ness. The check
// is cheap (decrypt + JWT exp parse) and only POSTs when actually due.
defaultInterval = 5 * time.Minute
// permanentFailureBackoff is how long the loop waits after a PERMANENT
// refresh failure (invalid_grant / "refresh token already used"). The seed
// is burned until a human re-seeds a fresh login; there is nothing to retry,
// so we back off hard rather than hammer the dead token.
permanentFailureBackoff = 1 * time.Hour
)
// SecretStore is the minimal global_secrets surface the refresher needs. The
// production implementation (postgresStore) is backed by *sql.DB; tests inject
// a fake. It is deliberately tiny — read one key, write one key — so the test
// double is trivial and the refresher never reaches for the package-global DB.
type SecretStore interface {
// Get returns the decrypted secret value and true, or ("", false) when the
// key is absent. A non-nil error is a real read failure (not absence).
Get(ctx context.Context, key string) (value string, found bool, err error)
// Put encrypts and upserts value under key, bumping the row's updated_at
// (the "last_refresh" timestamp). It is the rotated-blob write-back.
Put(ctx context.Context, key, value string) error
}
// httpDoer is the http client seam (real *http.Client in prod, fake transport
// in tests). Tests NEVER hit the network.
type httpDoer interface {
Do(req *http.Request) (*http.Response, error)
}
// refresher is the single-owner refresh engine. The package-level mutex makes
// the refresh structurally single-flight: even if two refreshOnce calls raced
// (they cannot in prod — one goroutine drives it — but a test or a future
// caller might), only one POSTs at a time, and the access-token freshness
// re-check inside the lock means the second sees a freshly-rotated token and
// skips. One goroutine + this mutex = single-flight by construction.
type refresher struct {
store SecretStore
client httpDoer
now func() time.Time
// permanentlyFailed records that the current seed's refresh_token was
// rejected as already-used/invalid. While set, refreshOnce is INERT (it
// will not re-POST the dead token) until the secret value CHANGES (a human
// re-seed), detected by comparing the stored blob. This is the anti-storm
// latch — it lives on the struct, not globally, so it resets if the seed is
// replaced out of band.
failedSeed string // the auth-json blob that failed; "" = no known failure
}
// mu serializes refreshOnce across the process. Package-level so the
// single-flight guarantee holds regardless of how many refresher values exist
// (in prod there is exactly one).
var mu sync.Mutex
// oauthTokens is the token trio inside auth.json (and the OAuth response).
type oauthTokens struct {
AccessToken string `json:"access_token"`
RefreshToken string `json:"refresh_token"`
IDToken string `json:"id_token,omitempty"`
}
// StartCodexAuthRefresher launches the single background refresher goroutine.
// It returns immediately; the loop runs until ctx is cancelled. Wire it under
// supervised.RunWithRecover in main.go like the other Start* sweeps.
//
// db may be nil only in tests that drive refreshOnce directly; in prod it is
// the server's *sql.DB. The loop is INERT (logs once, keeps ticking) whenever
// CODEX_AUTH_JSON is absent — a deployment with no shared codex seed pays only
// a cheap periodic read.
func StartCodexAuthRefresher(ctx context.Context, db *sql.DB) {
r := &refresher{
store: &postgresStore{db: db},
client: &http.Client{Timeout: 30 * time.Second},
now: time.Now,
}
r.run(ctx, defaultInterval)
}
// run is the tick loop. It checks due-ness every interval and on a permanent
// failure waits permanentFailureBackoff before the next check (never a tight
// retry of a burned token).
func (r *refresher) run(ctx context.Context, interval time.Duration) {
// Check once promptly on boot, then on the interval.
for {
wait := interval
if perm := r.refreshOnce(ctx); perm {
// Permanent failure this cycle — the seed is burned. Back off hard;
// a human must re-seed. We keep ticking (a re-seed CHANGES the blob,
// which clears the latch) but slowly.
wait = permanentFailureBackoff
}
timer := time.NewTimer(wait)
select {
case <-ctx.Done():
timer.Stop()
log.Printf("codexauth: context done; stopping refresher")
return
case <-timer.C:
}
}
}
// refreshOnce performs ONE due-check + at most one refresh POST. It returns
// permanentFailure=true iff the refresh_token was permanently rejected this
// cycle (the caller backs off). All other outcomes (inert/skip/rotated/transient
// error) return false.
//
// It is single-flight: the package mutex is held for the whole read→decide→
// POST→write-back so two callers cannot both POST the (single-use) refresh_token.
func (r *refresher) refreshOnce(ctx context.Context) (permanentFailure bool) {
mu.Lock()
defer mu.Unlock()
blob, found, err := r.store.Get(ctx, CodexAuthSecretKey)
if err != nil {
log.Printf("codexauth: read CODEX_AUTH_JSON failed: %v (skipping this cycle)", err)
return false
}
if !found || strings.TrimSpace(blob) == "" {
// INERT: no shared codex seed in this deployment. Cheap no-op.
log.Printf("codexauth: no CODEX_AUTH_JSON in global_secrets — refresher inert")
// A previously-failed seed that has since been DELETED clears the latch.
r.failedSeed = ""
return false
}
// Anti-storm latch: if THIS exact blob already failed permanently, do not
// re-POST its dead refresh_token. A re-seed changes the blob and clears it.
if r.failedSeed != "" && r.failedSeed == blob {
return false
}
if r.failedSeed != "" && r.failedSeed != blob {
// The seed changed out of band (human re-login) — give it a fresh chance.
r.failedSeed = ""
}
tokens, err := parseTokens(blob)
if err != nil {
log.Printf("codexauth: CODEX_AUTH_JSON is not parseable codex auth json: %v (skipping)", err)
return false
}
if tokens.RefreshToken == "" {
log.Printf("codexauth: CODEX_AUTH_JSON carries no refresh_token (skipping)")
return false
}
// Skip-when-fresh: only refresh within the safety margin of expiry. A blob
// with an unparseable/absent access_token exp is treated as DUE (better to
// refresh a token we cannot date than let the fleet lapse).
exp, haveExp := jwtExp(tokens.AccessToken)
if haveExp {
remaining := exp.Sub(r.now())
if remaining > refreshSafetyMargin {
// Fresh — nothing to do. No POST.
return false
}
}
// DUE: POST the refresh_token ONCE.
newTokens, perm, err := r.doRefresh(ctx, tokens.RefreshToken)
if err != nil {
if perm {
// Permanent: the seed is burned. Latch it so we don't re-POST, log
// ONCE, and DO NOT write anything back.
log.Printf("codexauth: PERMANENT refresh failure (refresh_token rejected): %v — "+
"NOT writing back; the shared CODEX_AUTH_JSON seed is burned and must be re-seeded "+
"via a fresh codex login. Backing off.", err)
r.failedSeed = blob
return true
}
// Transient (network/5xx): no write-back, retry next cycle (no backoff).
log.Printf("codexauth: transient refresh error: %v (will retry next cycle)", err)
return false
}
// Success: merge the rotated trio into the blob (preserving every other
// field) and write it back encrypted, bumping updated_at (last_refresh).
rotated, err := mergeTokens(blob, newTokens)
if err != nil {
log.Printf("codexauth: failed to merge rotated tokens into auth json: %v (NOT writing back)", err)
return false
}
if err := r.store.Put(ctx, CodexAuthSecretKey, rotated); err != nil {
log.Printf("codexauth: write-back of rotated CODEX_AUTH_JSON failed: %v", err)
return false
}
r.failedSeed = "" // success clears any stale latch
log.Printf("codexauth: rotated shared CODEX_AUTH_JSON (single-owner refresh)")
return false
}
// doRefresh POSTs the refresh_token to OpenAI's OAuth endpoint exactly once and
// returns the rotated trio. permanent=true marks an unrecoverable rejection
// (HTTP 400 invalid_grant / "refresh token already used") so the caller latches
// and backs off instead of retrying.
func (r *refresher) doRefresh(ctx context.Context, refreshToken string) (tokens oauthTokens, permanent bool, err error) {
body, _ := json.Marshal(map[string]string{
"grant_type": "refresh_token",
"client_id": codexOAuthClientID,
"refresh_token": refreshToken,
})
req, err := http.NewRequestWithContext(ctx, http.MethodPost, oauthTokenURL, strings.NewReader(string(body)))
if err != nil {
return oauthTokens{}, false, err
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
resp, err := r.client.Do(req)
if err != nil {
return oauthTokens{}, false, err // transient: network
}
defer resp.Body.Close()
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
if resp.StatusCode == http.StatusOK {
var t oauthTokens
if err := json.Unmarshal(respBody, &t); err != nil {
return oauthTokens{}, false, fmt.Errorf("decode token response: %w", err)
}
if t.AccessToken == "" {
return oauthTokens{}, false, fmt.Errorf("token response missing access_token")
}
return t, false, nil
}
// Non-200. A 400 (and any body naming invalid_grant / already-used) is a
// PERMANENT rejection of the refresh_token. 401/403 likewise mean the seed
// is no good. Everything else (429/5xx/network-shaped) is transient.
lowerBody := strings.ToLower(string(respBody))
isInvalidGrant := strings.Contains(lowerBody, "invalid_grant") ||
strings.Contains(lowerBody, "refresh token already used") ||
strings.Contains(lowerBody, "already been used") ||
strings.Contains(lowerBody, "token has been revoked")
switch {
case resp.StatusCode == http.StatusBadRequest && isInvalidGrant:
return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
case resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden:
return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
default:
return oauthTokens{}, false, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
}
}
// parseTokens extracts the OAuth trio from an auth.json blob, accepting both
// the nested `{"tokens":{...}}` shape the codex CLI writes and a flat top-level
// shape some seeds use.
func parseTokens(blob string) (oauthTokens, error) {
var top map[string]json.RawMessage
if err := json.Unmarshal([]byte(blob), &top); err != nil {
return oauthTokens{}, err
}
if nested, ok := top["tokens"]; ok {
var t oauthTokens
if err := json.Unmarshal(nested, &t); err != nil {
return oauthTokens{}, fmt.Errorf("decode nested tokens: %w", err)
}
return t, nil
}
var t oauthTokens
if err := json.Unmarshal([]byte(blob), &t); err != nil {
return oauthTokens{}, err
}
return t, nil
}
// mergeTokens writes the rotated trio back into the original blob in-place,
// preserving the blob's shape (nested-vs-flat) and every other field. A field
// in the OAuth response that is empty (e.g. id_token omitted) does NOT clobber
// the existing value.
func mergeTokens(blob string, rotated oauthTokens) (string, error) {
var top map[string]json.RawMessage
if err := json.Unmarshal([]byte(blob), &top); err != nil {
return "", err
}
applyTo := func(m map[string]json.RawMessage) error {
setStr := func(key, val string) error {
if val == "" {
return nil // don't clobber an existing value with an empty one
}
b, err := json.Marshal(val)
if err != nil {
return err
}
m[key] = b
return nil
}
if err := setStr("access_token", rotated.AccessToken); err != nil {
return err
}
if err := setStr("refresh_token", rotated.RefreshToken); err != nil {
return err
}
if err := setStr("id_token", rotated.IDToken); err != nil {
return err
}
return nil
}
if nestedRaw, ok := top["tokens"]; ok {
var nested map[string]json.RawMessage
if err := json.Unmarshal(nestedRaw, &nested); err != nil {
return "", fmt.Errorf("decode nested tokens for merge: %w", err)
}
if err := applyTo(nested); err != nil {
return "", err
}
nb, err := json.Marshal(nested)
if err != nil {
return "", err
}
top["tokens"] = nb
} else {
if err := applyTo(top); err != nil {
return "", err
}
}
out, err := json.Marshal(top)
if err != nil {
return "", err
}
return string(out), nil
}
// jwtExp decodes the `exp` claim (Unix seconds) from a JWT access token WITHOUT
// verifying the signature (we only need the expiry to decide due-ness; the
// token's validity is OpenAI's to enforce). Returns ok=false when the token is
// not a parseable 3-part JWT or carries no numeric exp.
func jwtExp(token string) (time.Time, bool) {
parts := strings.Split(token, ".")
if len(parts) != 3 {
return time.Time{}, false
}
payload, err := base64.RawURLEncoding.DecodeString(parts[1])
if err != nil {
// Some encoders pad; tolerate standard base64url with padding too.
payload, err = base64.URLEncoding.DecodeString(parts[1])
if err != nil {
return time.Time{}, false
}
}
var claims struct {
Exp json.Number `json:"exp"`
}
if err := json.Unmarshal(payload, &claims); err != nil {
return time.Time{}, false
}
secs, err := claims.Exp.Int64()
if err != nil || secs <= 0 {
return time.Time{}, false
}
return time.Unix(secs, 0), true
}
// postgresStore is the production SecretStore backed by global_secrets, using
// the SAME crypto path the secrets handler uses (DecryptVersioned on read,
// Encrypt + CurrentEncryptionVersion on write).
type postgresStore struct {
db *sql.DB
}
func (s *postgresStore) Get(ctx context.Context, key string) (string, bool, error) {
var enc []byte
var ver int
err := s.db.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM global_secrets WHERE key = $1`, key).
Scan(&enc, &ver)
if err == sql.ErrNoRows {
return "", false, nil
}
if err != nil {
return "", false, err
}
plain, err := crypto.DecryptVersioned(enc, ver)
if err != nil {
return "", false, err
}
return string(plain), true, nil
}
func (s *postgresStore) Put(ctx context.Context, key, value string) error {
enc, err := crypto.Encrypt([]byte(value))
if err != nil {
return err
}
ver := crypto.CurrentEncryptionVersion()
_, err = s.db.ExecContext(ctx, `
INSERT INTO global_secrets (key, encrypted_value, encryption_version)
VALUES ($1, $2, $3)
ON CONFLICT (key) DO UPDATE
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
`, key, enc, ver)
return err
}
@@ -0,0 +1,425 @@
package codexauth
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// --- test doubles -----------------------------------------------------------
// fakeStore is an in-memory SecretStore. nil entry = absent key.
type fakeStore struct {
mu sync.Mutex
values map[string]string
getErr error
putErr error
puts int32 // count of successful Put calls
}
func newFakeStore() *fakeStore { return &fakeStore{values: map[string]string{}} }
func (f *fakeStore) Get(_ context.Context, key string) (string, bool, error) {
f.mu.Lock()
defer f.mu.Unlock()
if f.getErr != nil {
return "", false, f.getErr
}
v, ok := f.values[key]
return v, ok, nil
}
func (f *fakeStore) Put(_ context.Context, key, value string) error {
f.mu.Lock()
defer f.mu.Unlock()
if f.putErr != nil {
return f.putErr
}
f.values[key] = value
atomic.AddInt32(&f.puts, 1)
return nil
}
func (f *fakeStore) get(key string) string {
f.mu.Lock()
defer f.mu.Unlock()
return f.values[key]
}
// fakeTransport records every request and returns a scripted response. It is
// the network seam — tests NEVER make a real request.
type fakeTransport struct {
mu sync.Mutex
calls int32
urls []string
methods []string
bodies []string
status int
respBody string
transport func(*http.Request) (*http.Response, error) // optional override
}
func (t *fakeTransport) Do(req *http.Request) (*http.Response, error) {
atomic.AddInt32(&t.calls, 1)
t.mu.Lock()
t.urls = append(t.urls, req.URL.String())
t.methods = append(t.methods, req.Method)
if req.Body != nil {
b, _ := io.ReadAll(req.Body)
t.bodies = append(t.bodies, string(b))
} else {
t.bodies = append(t.bodies, "")
}
t.mu.Unlock()
if t.transport != nil {
return t.transport(req)
}
status := t.status
if status == 0 {
status = http.StatusOK
}
return &http.Response{
StatusCode: status,
Body: io.NopCloser(strings.NewReader(t.respBody)),
Header: make(http.Header),
}, nil
}
func (t *fakeTransport) callCount() int { return int(atomic.LoadInt32(&t.calls)) }
// --- helpers ----------------------------------------------------------------
// makeJWT builds an unsigned-but-parseable JWT whose payload carries exp.
func makeJWT(exp time.Time) string {
header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`))
payload := base64.RawURLEncoding.EncodeToString([]byte(
fmt.Sprintf(`{"exp":%d,"sub":"codex"}`, exp.Unix())))
sig := base64.RawURLEncoding.EncodeToString([]byte("sig"))
return header + "." + payload + "." + sig
}
// authBlob builds a nested codex auth.json blob with the given tokens.
func authBlob(access, refresh string) string {
b, _ := json.Marshal(map[string]any{
"tokens": map[string]any{
"access_token": access,
"refresh_token": refresh,
"id_token": "id-original",
},
"OPENAI_API_KEY": nil,
"last_refresh": "2026-01-01T00:00:00Z",
})
return string(b)
}
func newTestRefresher(store SecretStore, client httpDoer, now time.Time) *refresher {
return &refresher{
store: store,
client: client,
now: func() time.Time { return now },
}
}
func okRefreshResponse(access, refresh string) string {
b, _ := json.Marshal(oauthTokens{AccessToken: access, RefreshToken: refresh, IDToken: "id-new"})
return string(b)
}
// --- tests ------------------------------------------------------------------
// TestJWTExpParse covers the exp decode (valid, malformed, missing).
func TestJWTExpParse(t *testing.T) {
want := time.Now().Add(2 * time.Hour).Truncate(time.Second)
got, ok := jwtExp(makeJWT(want))
if !ok {
t.Fatalf("jwtExp(valid) ok=false, want true")
}
if !got.Equal(want) {
t.Errorf("jwtExp = %v, want %v", got, want)
}
if _, ok := jwtExp("not-a-jwt"); ok {
t.Errorf("jwtExp(non-jwt) ok=true, want false")
}
if _, ok := jwtExp("a.b.c"); ok {
t.Errorf("jwtExp(garbage parts) ok=true, want false")
}
// 3 parts but payload has no exp.
noExp := base64.RawURLEncoding.EncodeToString([]byte("{}"))
if _, ok := jwtExp("h." + noExp + ".s"); ok {
t.Errorf("jwtExp(no exp claim) ok=true, want false")
}
}
// TestRefreshOnce_SkipWhenFresh: a token well outside the safety margin is NOT
// refreshed — no POST, no write-back.
func TestRefreshOnce_SkipWhenFresh(t *testing.T) {
now := time.Now()
store := newFakeStore()
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(2*time.Hour)), "rt-1")
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse("new-at", "rt-2")}
r := newTestRefresher(store, tr, now)
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("fresh token: permanentFailure=true, want false")
}
if tr.callCount() != 0 {
t.Errorf("fresh token: %d OAuth POSTs, want 0", tr.callCount())
}
if atomic.LoadInt32(&store.puts) != 0 {
t.Errorf("fresh token: %d write-backs, want 0", store.puts)
}
}
// TestRefreshOnce_RotateThenReskip: a token inside the margin is refreshed once
// (POST + write-back of the rotated blob); a subsequent call on the now-fresh
// rotated token skips (no second POST). Proves rotate→write-back→re-skip.
func TestRefreshOnce_RotateThenReskip(t *testing.T) {
now := time.Now()
store := newFakeStore()
// Expires in 5m — inside the 15m safety margin → DUE.
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(5*time.Minute)), "rt-1")
// Rotated access token is fresh (2h out); rotated refresh is rt-2.
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
r := newTestRefresher(store, tr, now)
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("due token: permanentFailure=true, want false")
}
if tr.callCount() != 1 {
t.Fatalf("due token: %d OAuth POSTs, want exactly 1", tr.callCount())
}
if atomic.LoadInt32(&store.puts) != 1 {
t.Fatalf("due token: %d write-backs, want exactly 1", store.puts)
}
// The written blob must carry the rotated refresh_token and preserve the
// non-token field.
rotated := store.get(CodexAuthSecretKey)
tokens, err := parseTokens(rotated)
if err != nil {
t.Fatalf("parse rotated blob: %v", err)
}
if tokens.RefreshToken != "rt-2" {
t.Errorf("rotated refresh_token = %q, want rt-2", tokens.RefreshToken)
}
if !strings.Contains(rotated, "last_refresh") {
t.Errorf("rotated blob dropped the preserved last_refresh field: %s", rotated)
}
// Second call: the rotated access token is fresh → skip, no new POST.
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("re-skip: permanentFailure=true, want false")
}
if tr.callCount() != 1 {
t.Errorf("re-skip: %d total OAuth POSTs, want still 1", tr.callCount())
}
if atomic.LoadInt32(&store.puts) != 1 {
t.Errorf("re-skip: %d total write-backs, want still 1", store.puts)
}
}
// TestRefreshOnce_NoSecretInert: absent CODEX_AUTH_JSON → inert (no POST, no
// write-back, no error/permanent).
func TestRefreshOnce_NoSecretInert(t *testing.T) {
store := newFakeStore() // empty
tr := &fakeTransport{}
r := newTestRefresher(store, tr, time.Now())
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("no secret: permanentFailure=true, want false")
}
if tr.callCount() != 0 {
t.Errorf("no secret: %d POSTs, want 0", tr.callCount())
}
if atomic.LoadInt32(&store.puts) != 0 {
t.Errorf("no secret: %d write-backs, want 0", store.puts)
}
}
// TestRefreshOnce_PermanentFailNoWriteNoStorm: a 400 invalid_grant must (a) not
// write back, (b) return permanentFailure=true, and (c) NOT re-POST on the next
// cycle for the same (burned) seed — the anti-storm latch.
func TestRefreshOnce_PermanentFailNoWriteNoStorm(t *testing.T) {
now := time.Now()
store := newFakeStore()
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-burned")
tr := &fakeTransport{
status: http.StatusBadRequest,
respBody: `{"error":"invalid_grant","error_description":"refresh token already used"}`,
}
r := newTestRefresher(store, tr, now)
perm := r.refreshOnce(context.Background())
if !perm {
t.Fatalf("invalid_grant: permanentFailure=false, want true")
}
if tr.callCount() != 1 {
t.Fatalf("invalid_grant: %d POSTs, want exactly 1", tr.callCount())
}
if atomic.LoadInt32(&store.puts) != 0 {
t.Fatalf("invalid_grant: %d write-backs, want 0 (must NOT persist a failed refresh)", store.puts)
}
// Next cycle, SAME burned seed: must NOT re-POST (anti-storm latch).
perm2 := r.refreshOnce(context.Background())
if tr.callCount() != 1 {
t.Errorf("anti-storm: re-POSTed a burned refresh_token (%d total POSTs, want still 1)", tr.callCount())
}
_ = perm2 // latched cycle returns false (already-known failure, nothing new)
// A RE-SEED (blob changes) clears the latch and allows a fresh attempt.
store.mu.Lock()
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-freshly-seeded")
store.mu.Unlock()
tr.status = http.StatusOK
tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-rotated")
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("post-reseed: permanentFailure=true, want false")
}
if tr.callCount() != 2 {
t.Errorf("post-reseed: %d total POSTs, want 2 (latch should clear on re-seed)", tr.callCount())
}
}
// TestRefreshOnce_TransientNoWriteNoLatch: a 5xx is transient — no write-back,
// returns false (no hard backoff latch), and a later cycle retries.
func TestRefreshOnce_TransientNoWriteNoLatch(t *testing.T) {
now := time.Now()
store := newFakeStore()
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
tr := &fakeTransport{status: http.StatusServiceUnavailable, respBody: "upstream down"}
r := newTestRefresher(store, tr, now)
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("503: permanentFailure=true, want false (transient)")
}
if atomic.LoadInt32(&store.puts) != 0 {
t.Errorf("503: %d write-backs, want 0", store.puts)
}
// Retry next cycle succeeds (no latch on transient).
tr.status = http.StatusOK
tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")
if perm := r.refreshOnce(context.Background()); perm {
t.Fatalf("retry after 503: permanentFailure=true, want false")
}
if tr.callCount() != 2 {
t.Errorf("transient retry: %d total POSTs, want 2", tr.callCount())
}
if atomic.LoadInt32(&store.puts) != 1 {
t.Errorf("transient retry: %d write-backs, want 1", store.puts)
}
}
// TestRefreshOnce_SingleFlight: concurrent refreshOnce calls on a DUE token must
// POST exactly once total — the package mutex serializes them and the second
// sees the freshly-rotated (now-fresh) token and skips. Structural single-flight.
func TestRefreshOnce_SingleFlight(t *testing.T) {
now := time.Now()
store := newFakeStore()
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
// Every successful rotation yields a FRESH (2h) access token, so once one
// caller rotates, the other sees fresh and skips.
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
r := newTestRefresher(store, tr, now)
const n = 16
var wg sync.WaitGroup
wg.Add(n)
for i := 0; i < n; i++ {
go func() {
defer wg.Done()
r.refreshOnce(context.Background())
}()
}
wg.Wait()
if tr.callCount() != 1 {
t.Errorf("single-flight: %d OAuth POSTs across %d concurrent calls, want exactly 1", tr.callCount(), n)
}
if atomic.LoadInt32(&store.puts) != 1 {
t.Errorf("single-flight: %d write-backs, want exactly 1", store.puts)
}
}
// TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint: when it DOES refresh, the
// single POST goes to the OAuth token URL with the refresh_token grant body.
func TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint(t *testing.T) {
now := time.Now()
store := newFakeStore()
store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-secret")
tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
r := newTestRefresher(store, tr, now)
r.refreshOnce(context.Background())
if tr.callCount() != 1 {
t.Fatalf("%d POSTs, want exactly 1", tr.callCount())
}
if tr.urls[0] != oauthTokenURL {
t.Errorf("POST URL = %q, want %q", tr.urls[0], oauthTokenURL)
}
if tr.methods[0] != http.MethodPost {
t.Errorf("method = %q, want POST", tr.methods[0])
}
var body map[string]string
if err := json.Unmarshal([]byte(tr.bodies[0]), &body); err != nil {
t.Fatalf("request body not json: %v (%s)", err, tr.bodies[0])
}
if body["grant_type"] != "refresh_token" {
t.Errorf("grant_type = %q, want refresh_token", body["grant_type"])
}
if body["refresh_token"] != "rt-secret" {
t.Errorf("refresh_token = %q, want rt-secret", body["refresh_token"])
}
if body["client_id"] != codexOAuthClientID {
t.Errorf("client_id = %q, want %q", body["client_id"], codexOAuthClientID)
}
}
// TestRefreshOnce_ReadErrorSkips: a store read error is a transient skip (no
// POST, no permanent latch).
func TestRefreshOnce_ReadErrorSkips(t *testing.T) {
store := newFakeStore()
store.getErr = fmt.Errorf("db down")
tr := &fakeTransport{}
r := newTestRefresher(store, tr, time.Now())
if perm := r.refreshOnce(context.Background()); perm {
t.Errorf("read error: permanentFailure=true, want false")
}
if tr.callCount() != 0 {
t.Errorf("read error: %d POSTs, want 0", tr.callCount())
}
}
// TestMergeTokens_PreservesOtherFields proves the rotated write-back keeps every
// non-token field and does not clobber id_token with an empty rotated value.
func TestMergeTokens_PreservesOtherFields(t *testing.T) {
blob := authBlob("old-at", "old-rt")
out, err := mergeTokens(blob, oauthTokens{AccessToken: "new-at", RefreshToken: "new-rt"}) // no id_token
if err != nil {
t.Fatalf("mergeTokens: %v", err)
}
tokens, err := parseTokens(out)
if err != nil {
t.Fatalf("parse merged: %v", err)
}
if tokens.AccessToken != "new-at" || tokens.RefreshToken != "new-rt" {
t.Errorf("merged tokens = %+v, want new-at/new-rt", tokens)
}
if tokens.IDToken != "id-original" {
t.Errorf("empty rotated id_token clobbered the original: got %q, want id-original", tokens.IDToken)
}
if !strings.Contains(out, "last_refresh") {
t.Errorf("merge dropped preserved field: %s", out)
}
}
+49 -14
View File
@@ -334,28 +334,39 @@ func (h *WorkspaceHandler) ProxyA2A(c *gin.Context) {
c.Data(status, "application/json", respBody)
}
// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace
// has a budget_limit set and monthly_spend has reached or exceeded it.
// DB errors are logged and treated as fail-open — a budget check failure
// must not block legitimate A2A traffic.
// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace has
// exceeded ANY of its configured per-period budget limits (hourly/daily/weekly/
// monthly — see budget_periods.go). Per-period spend is the rolling-window sum
// over the workspace_spend_events ledger. DB errors are logged and treated as
// fail-open — a budget check failure must not block legitimate A2A traffic.
func (h *WorkspaceHandler) checkWorkspaceBudget(ctx context.Context, workspaceID string) *proxyA2AError {
var budgetLimit sql.NullInt64
var monthlySpend int64
err := db.DB.QueryRowContext(ctx,
`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
var limitsRaw []byte
if err := db.DB.QueryRowContext(ctx,
`SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&budgetLimit, &monthlySpend)
if err != nil {
).Scan(&limitsRaw); err != nil {
if err != sql.ErrNoRows {
log.Printf("ProxyA2A: budget check failed for %s: %v", workspaceID, err)
}
return nil // fail-open
}
if budgetLimit.Valid && monthlySpend >= budgetLimit.Int64 {
log.Printf("ProxyA2A: budget exceeded for %s (spend=%d limit=%d)", workspaceID, monthlySpend, budgetLimit.Int64)
limits := parseBudgetLimits(limitsRaw)
if len(limits) == 0 {
return nil // no limits configured
}
spend, err := spendByPeriod(ctx, db.DB, workspaceID)
if err != nil {
log.Printf("ProxyA2A: budget spend query failed for %s: %v", workspaceID, err)
return nil // fail-open
}
if over := exceededPeriods(limits, spend); len(over) > 0 {
log.Printf("ProxyA2A: budget exceeded for %s (periods=%v limits=%v spend=%v)", workspaceID, over, limits, spend)
return &proxyA2AError{
Status: http.StatusPaymentRequired,
Response: gin.H{"error": "workspace budget limit exceeded"},
Status: http.StatusPaymentRequired,
Response: gin.H{
"error": "workspace budget limit exceeded",
"exceeded_periods": over,
},
}
}
return nil
@@ -375,6 +386,30 @@ func (h *WorkspaceHandler) proxyA2ARequest(ctx context.Context, workspaceID stri
Response: gin.H{"error": "access denied: workspaces cannot communicate per hierarchy rules"},
}
}
// #1953 cross-tenant isolation. CanCommunicate alone does NOT enforce
// org boundaries: its "root-level siblings — both have no parent" rule
// treats every tenant's org root as a sibling, so a caller that is an
// org root could resolve and route a2a to another tenant's org root
// (and resolveAgentURL accepts ANY workspace id with no org check).
// Gate on the SAME parent_id-chain org scoping the OFFSEC-015 broadcast
// fix uses: reject before resolveAgentURL when caller and target are in
// different orgs. Fail-closed — a DB error denies cross-org routing.
ok, err := sameOrg(ctx, db.DB, callerID, workspaceID)
if err != nil {
log.Printf("ProxyA2A: org-scope check failed %s → %s: %v — denying", callerID, workspaceID, err)
return 0, nil, &proxyA2AError{
Status: http.StatusForbidden,
Response: gin.H{"error": "access denied: org isolation check failed"},
}
}
if !ok {
log.Printf("ProxyA2A: cross-org routing denied %s → %s (#1953)", callerID, workspaceID)
return 0, nil, &proxyA2AError{
Status: http.StatusForbidden,
Response: gin.H{"error": "access denied: target workspace is in a different org"},
}
}
}
// Budget enforcement: reject A2A calls when the workspace has exceeded its
@@ -16,9 +16,9 @@ import (
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
@@ -437,6 +437,10 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) {
WithArgs("ws-target").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))
// #1953 cross-tenant guard: same-org check after CanCommunicate. Both
// workspaces resolve to the same org root → routing allowed.
mockSameOrg(mock, "ws-caller", "ws-target", true)
expectBudgetCheck(mock, "ws-target")
// Expect activity log with source_id set
@@ -465,6 +469,24 @@ func TestProxyA2A_CallerIDPropagated(t *testing.T) {
}
}
// mockSameOrg sets up the two org-root recursive-CTE expectations that the
// #1953 cross-tenant guard in proxyA2ARequest runs after CanCommunicate passes.
// sameOrg=true returns the SAME root_id for both caller and target (same tenant);
// sameOrg=false returns different root_ids (cross-tenant → routing must be denied).
func mockSameOrg(mock sqlmock.Sqlmock, caller, target string, sameOrg bool) {
callerRoot := "org-root-shared"
targetRoot := "org-root-shared"
if !sameOrg {
targetRoot = "org-root-other-tenant"
}
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(callerRoot))
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(target).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(targetRoot))
}
// mockCanCommunicate sets up sqlmock expectations for CanCommunicate(caller, target).
// allowed=true sets up rows that satisfy the access policy (siblings under same parent).
// allowed=false sets up rows that don't (different parents).
@@ -659,6 +681,9 @@ func TestProxyA2A_CallerIDDerivedFromBearer(t *testing.T) {
WithArgs("ws-target").
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", "ws-parent"))
// 3b. #1953 cross-tenant guard — same org root → routing allowed.
mockSameOrg(mock, "ws-caller", "ws-target", true)
expectBudgetCheck(mock, "ws-target")
// 4. activity_logs INSERT — verify source_id arg is the derived ws-caller
@@ -2092,6 +2117,10 @@ func (f *fakeCPProv) Stop(_ context.Context, _ string) error {
f.stopCalls++
return nil
}
func (f *fakeCPProv) StopAndPrune(_ context.Context, _ string) error {
f.stopCalls++
return nil
}
func (f *fakeCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
return "", nil
}
@@ -18,8 +18,8 @@ import (
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"github.com/DATA-DOG/go-sqlmock"
"github.com/alicebob/miniredis/v2"
)
@@ -209,10 +209,12 @@ func drainSetup(t *testing.T, workspaceID string) (sqlmock.Sqlmock, *WorkspaceHa
// Named distinctly from handlers_test.go's expectBudgetCheck (which uses MatchPsql
// escaped-regex and cannot be reused with QueryMatcherEqual tests).
func expectQueueBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
// Multi-period (#49): exact-match the budget_limits read; "{}" → no limits →
// checkWorkspaceBudget returns early (no spend query).
mock.ExpectQuery(
"SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1",
"SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1",
).WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
}
// seedRedisURL puts the agent server URL into the Redis cache so resolveAgentURL
@@ -148,6 +148,125 @@ func (h *AdminSchedulesHealthHandler) Health(c *gin.Context) {
c.JSON(http.StatusOK, entries)
}
// orphanScheduleEntry is one row in the Orphans response.
type orphanScheduleEntry struct {
WorkspaceID string `json:"workspace_id"`
WorkspaceStatus string `json:"workspace_status"` // "removed" | "missing"
ScheduleID string `json:"schedule_id"`
ScheduleName string `json:"schedule_name"`
Source string `json:"source"`
Enabled bool `json:"enabled"`
CronExpr string `json:"cron_expr"`
}
// Orphans handles GET /admin/schedules/orphans — the monitor surface for
// internal#2006. Health (above) reports only LIVE workspaces' schedules, so a
// schedule left on a removed/recreated workspace silently stops firing and
// never appears there. This endpoint lists exactly those orphans (workspace
// removed OR missing) so an operator/monitor can alert. Returns 200 + JSON
// array (empty when none). Auth via adminAuth() in router.go.
func (h *AdminSchedulesHealthHandler) Orphans(c *gin.Context) {
ctx := c.Request.Context()
rows, err := db.DB.QueryContext(ctx, `
SELECT s.workspace_id,
CASE WHEN w.id IS NULL THEN 'missing' ELSE 'removed' END AS ws_status,
s.id, s.name, COALESCE(s.source, ''), s.enabled, s.cron_expr
FROM workspace_schedules s
LEFT JOIN workspaces w ON w.id = s.workspace_id
WHERE w.id IS NULL OR w.status = 'removed'
ORDER BY s.name ASC
`)
if err != nil {
log.Printf("AdminSchedulesOrphans: query error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query orphans"})
return
}
defer rows.Close()
out := make([]orphanScheduleEntry, 0)
for rows.Next() {
var e orphanScheduleEntry
if err := rows.Scan(&e.WorkspaceID, &e.WorkspaceStatus, &e.ScheduleID, &e.ScheduleName, &e.Source, &e.Enabled, &e.CronExpr); err != nil {
log.Printf("AdminSchedulesOrphans: scan error: %v", err)
continue
}
out = append(out, e)
}
if err := rows.Err(); err != nil {
log.Printf("AdminSchedulesOrphans: rows iteration error: %v", err)
}
c.JSON(http.StatusOK, out)
}
// ReapOrphans handles POST /admin/schedules/reap-orphans — the orphan cleaner
// (internal#2006). For every schedule bound to a removed/nonexistent workspace
// it re-points runtime-created schedules onto the live successor agent (matched
// by role+parent, falling back to name+parent) when one exists and doesn't
// already carry a same-named schedule; schedules with no live successor are
// disabled (enabled=false) so the scheduler stops firing into a dead workspace.
// Idempotent: re-running with no orphans is a no-op. Returns a summary count.
// Auth is enforced by the adminAuth() middleware registered in router.go.
func (h *AdminSchedulesHealthHandler) ReapOrphans(c *gin.Context) {
ctx := c.Request.Context()
// 1. Re-point runtime schedules onto a live successor (same role+parent,
// else same name+parent). Skip names already present on the successor.
repointed, err := db.DB.ExecContext(ctx, `
WITH orphan AS (
SELECT s.id, s.name, s.workspace_id, prev.role AS role, prev.parent_id AS parent_id
FROM workspace_schedules s
JOIN workspaces prev ON prev.id = s.workspace_id
WHERE prev.status = 'removed' AND s.source = 'runtime'
),
successor AS (
SELECT o.id AS schedule_id, o.name AS schedule_name,
(
SELECT w.id FROM workspaces w
WHERE w.status != 'removed'
AND w.parent_id IS NOT DISTINCT FROM o.parent_id
AND ((o.role IS NOT NULL AND w.role = o.role))
ORDER BY w.updated_at DESC NULLS LAST LIMIT 1
) AS live_id
FROM orphan o
)
UPDATE workspace_schedules s
SET workspace_id = su.live_id, updated_at = now()
FROM successor su
WHERE s.id = su.schedule_id
AND su.live_id IS NOT NULL
AND NOT EXISTS (
SELECT 1 FROM workspace_schedules t
WHERE t.workspace_id = su.live_id AND t.name = su.schedule_name
)
`)
if err != nil {
log.Printf("ReapOrphans: re-point error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
return
}
repointedN, _ := repointed.RowsAffected()
// 2. Disable any remaining schedules still bound to a removed/missing
// workspace (no live successor, or template schedules on a dead row).
disabled, err := db.DB.ExecContext(ctx, `
UPDATE workspace_schedules s
SET enabled = false, updated_at = now()
WHERE s.enabled = true
AND NOT EXISTS (
SELECT 1 FROM workspaces w
WHERE w.id = s.workspace_id AND w.status != 'removed'
)
`)
if err != nil {
log.Printf("ReapOrphans: disable error: %v", err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
return
}
disabledN, _ := disabled.RowsAffected()
log.Printf("ReapOrphans: re-pointed %d, disabled %d orphaned schedule(s)", repointedN, disabledN)
c.JSON(http.StatusOK, gin.H{"repointed": repointedN, "disabled": disabledN})
}
// classifyScheduleStatus returns the health status string for a schedule.
// - "never_run" — last_run_at is NULL (schedule has never fired)
// - "stale" — now - last_run_at > staleThreshold (and threshold > 0)
@@ -444,3 +444,72 @@ func TestAdminSchedulesHealth_ResponseFields(t *testing.T) {
t.Fatalf("unmet expectations: %v", err)
}
}
// ==================== Orphans + ReapOrphans (internal#2006) ====================
// TestAdminSchedulesOrphans verifies the monitor surface lists schedules bound
// to a removed/missing workspace (the recreate-orphan failure mode).
func TestAdminSchedulesOrphans(t *testing.T) {
mock := setupTestDB(t)
handler := NewAdminSchedulesHealthHandler()
mock.ExpectQuery(`LEFT JOIN workspaces`).
WillReturnRows(sqlmock.NewRows([]string{
"workspace_id", "ws_status", "id", "name", "source", "enabled", "cron_expr",
}).AddRow("dead-ws", "removed", "sched-1", "minimax-autonomous-tick", "runtime", false, "*/5 * * * *"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/admin/schedules/orphans", nil)
handler.Orphans(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp []orphanScheduleEntry
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if len(resp) != 1 {
t.Fatalf("expected 1 orphan, got %d", len(resp))
}
if resp[0].ScheduleName != "minimax-autonomous-tick" || resp[0].WorkspaceStatus != "removed" || resp[0].Source != "runtime" {
t.Errorf("unexpected orphan entry: %+v", resp[0])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet expectations: %v", err)
}
}
// TestReapOrphans verifies the cleaner re-points runtime schedules onto a live
// successor then disables any remaining dead-bound schedules, returning counts.
func TestReapOrphans(t *testing.T) {
mock := setupTestDB(t)
handler := NewAdminSchedulesHealthHandler()
mock.ExpectExec(`UPDATE workspace_schedules s\s+SET workspace_id`).
WillReturnResult(sqlmock.NewResult(0, 2))
mock.ExpectExec(`UPDATE workspace_schedules s\s+SET enabled = false`).
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("POST", "/admin/schedules/reap-orphans", nil)
handler.ReapOrphans(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]int64
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp["repointed"] != 2 || resp["disabled"] != 1 {
t.Errorf("expected repointed=2 disabled=1, got %+v", resp)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet expectations: %v", err)
}
}
+120 -77
View File
@@ -1,7 +1,9 @@
package handlers
import (
"context"
"database/sql"
"encoding/json"
"log"
"net/http"
@@ -12,42 +14,79 @@ import (
// BudgetHandler exposes per-workspace budget read/write endpoints.
// Routes (all behind WorkspaceAuth middleware):
//
// GET /workspaces/:id/budget — current budget_limit, monthly_spend, budget_remaining
// PATCH /workspaces/:id/budget — set or clear budget_limit
// GET /workspaces/:id/budget — per-period limits, spend, remaining
// PATCH /workspaces/:id/budget — set/clear per-period limits
//
// Multi-period (#49): the budget is now four independent rolling windows —
// hourly/daily/weekly/monthly (budget_periods.go is the SSOT for the set). The
// canonical config is workspaces.budget_limits (JSONB, USD cents per period);
// per-period spend is the rolling-window sum over workspace_spend_events. The
// legacy single monthly budget_limit / monthly_spend are still emitted (and
// budget_limit kept in sync to the monthly period) for back-compat with
// pre-deploy canvas/agent builds during the rollout window.
type BudgetHandler struct{}
func NewBudgetHandler() *BudgetHandler { return &BudgetHandler{} }
// budgetResponse is the canonical JSON shape for both GET and PATCH responses.
// periodBudget is the per-period view: configured ceiling (null = no limit),
// rolling-window spend, and remaining headroom (null when no limit; may go
// negative so callers see how far over a period is).
type periodBudget struct {
Limit *int64 `json:"limit"`
Spend int64 `json:"spend"`
Remaining *int64 `json:"remaining"`
}
// budgetResponse is the canonical JSON shape for GET and PATCH.
type budgetResponse struct {
// BudgetLimit is the monthly spend ceiling in USD cents (null = no limit).
// budget_limit=500 means $5.00/month.
BudgetLimit *int64 `json:"budget_limit"`
// MonthlySpend is the agent's self-reported accumulated LLM API spend
// for the current month (USD cents). Incremented via heartbeat.
MonthlySpend int64 `json:"monthly_spend"`
// BudgetRemaining is null when BudgetLimit is null, otherwise
// max(0, budget_limit - monthly_spend). Can be negative — we store the
// actual value so callers can see how far over-budget a workspace is.
// Periods is keyed by BudgetPeriod ("hourly"/"daily"/"weekly"/"monthly").
Periods map[string]periodBudget `json:"periods"`
// --- back-compat (monthly), for pre-multi-period clients ---
BudgetLimit *int64 `json:"budget_limit"`
MonthlySpend int64 `json:"monthly_spend"`
BudgetRemaining *int64 `json:"budget_remaining"`
}
// buildBudgetResponse assembles the per-period view from the stored limits +
// the ledger spend. Single place so GET and PATCH return identical shapes.
func buildBudgetResponse(ctx context.Context, workspaceID string, limitsRaw []byte) (budgetResponse, error) {
limits := parseBudgetLimits(limitsRaw)
spend, err := spendByPeriod(ctx, db.DB, workspaceID)
if err != nil {
return budgetResponse{}, err
}
periods := make(map[string]periodBudget, len(budgetPeriods))
for _, def := range budgetPeriods {
pb := periodBudget{Spend: spend[def.Name]}
if lim, ok := limits[def.Name]; ok {
l := lim
pb.Limit = &l
r := lim - spend[def.Name]
pb.Remaining = &r
}
periods[string(def.Name)] = pb
}
resp := budgetResponse{Periods: periods, MonthlySpend: spend[PeriodMonthly]}
if m := periods[string(PeriodMonthly)]; m.Limit != nil {
resp.BudgetLimit = m.Limit
resp.BudgetRemaining = m.Remaining
}
return resp, nil
}
// GetBudget handles GET /workspaces/:id/budget.
// Returns the workspace's current budget ceiling, accumulated spend, and
// computed remaining headroom. Both budget_limit and budget_remaining are
// null when no limit has been configured for the workspace.
func (h *BudgetHandler) GetBudget(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
var budgetLimit sql.NullInt64
var monthlySpend int64
var limitsRaw []byte
err := db.DB.QueryRowContext(ctx,
`SELECT budget_limit, COALESCE(monthly_spend, 0)
`SELECT COALESCE(budget_limits, '{}'::jsonb)
FROM workspaces
WHERE id = $1 AND status != 'removed'`,
workspaceID,
).Scan(&budgetLimit, &monthlySpend)
).Scan(&limitsRaw)
if err == sql.ErrNoRows {
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
return
@@ -58,66 +97,80 @@ func (h *BudgetHandler) GetBudget(c *gin.Context) {
return
}
resp := budgetResponse{
MonthlySpend: monthlySpend,
resp, err := buildBudgetResponse(ctx, workspaceID, limitsRaw)
if err != nil {
log.Printf("GetBudget: spend query failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
if budgetLimit.Valid {
limit := budgetLimit.Int64
resp.BudgetLimit = &limit
remaining := limit - monthlySpend
resp.BudgetRemaining = &remaining
}
c.JSON(http.StatusOK, resp)
}
// PatchBudget handles PATCH /workspaces/:id/budget.
// Accepts {"budget_limit": <int64>} to set a new ceiling, or
// {"budget_limit": null} to remove an existing ceiling.
// Returns the updated budget state in the same shape as GetBudget.
// PatchBudget handles PATCH /workspaces/:id/budget. Accepts EITHER the
// multi-period shape
//
// {"budget_limits": {"hourly": 100, "daily": null, "weekly": 500, "monthly": 2000}}
//
// (a per-period value of null/absent clears that period; a positive int sets it)
// OR the legacy single-monthly shape {"budget_limit": 2000} / {"budget_limit": null}.
func (h *BudgetHandler) PatchBudget(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
// We need to distinguish between "field absent" and "field = null",
// so we unmarshal into a raw map first.
var raw map[string]interface{}
var raw map[string]json.RawMessage
if err := c.ShouldBindJSON(&raw); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
budgetLimitRaw, ok := raw["budget_limit"]
if !ok {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit field is required"})
_, hasLimits := raw["budget_limits"]
_, hasLegacy := raw["budget_limit"]
if !hasLimits && !hasLegacy {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits or budget_limit field is required"})
return
}
// Validate and convert the value. JSON numbers decode as float64.
var budgetArg interface{} // nil → SQL NULL, int64 → new ceiling
if budgetLimitRaw != nil {
switch v := budgetLimitRaw.(type) {
case float64:
if v < 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
limits := make(map[BudgetPeriod]int64, len(budgetPeriods))
known := make(map[string]bool, len(budgetPeriods))
for _, def := range budgetPeriods {
known[string(def.Name)] = true
}
if hasLimits {
var m map[string]*int64
if err := json.Unmarshal(raw["budget_limits"], &m); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits must be an object of period→int|null"})
return
}
for k, v := range m {
if !known[k] {
c.JSON(http.StatusBadRequest, gin.H{"error": "unknown budget period: " + k + " (allowed: hourly, daily, weekly, monthly)"})
return
}
cv := int64(v)
budgetArg = cv
case int64:
if v < 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
if v == nil {
continue // clear this period (null = no limit)
}
if *v < 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget limit for " + k + " must be >= 0 (USD cents)"})
return
}
budgetArg = v
default:
limits[BudgetPeriod(k)] = *v // 0 is valid = block-all for this period
}
} else { // legacy single-monthly
var v *int64
if err := json.Unmarshal(raw["budget_limit"], &v); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be an integer (USD cents) or null"})
return
}
if v != nil {
if *v < 0 {
c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
return
}
limits[PeriodMonthly] = *v // 0 is valid = block-all (legacy semantics)
}
}
// budgetArg == nil means "clear the ceiling"
// Existence check — return 404 for non-existent / removed workspaces.
// Existence check — 404 for non-existent / removed workspaces.
var exists bool
if err := db.DB.QueryRowContext(ctx,
`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1 AND status != 'removed')`,
@@ -127,38 +180,28 @@ func (h *BudgetHandler) PatchBudget(c *gin.Context) {
return
}
// Persist: budget_limits is the SSOT; keep the legacy budget_limit column
// synced to the monthly period so pre-deploy enforcement paths stay coherent
// during the rollout window.
var legacyMonthly interface{}
if m, ok := limits[PeriodMonthly]; ok {
legacyMonthly = m
}
encoded := encodeBudgetLimits(limits)
if _, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET budget_limit = $2, updated_at = now() WHERE id = $1`,
workspaceID, budgetArg,
`UPDATE workspaces SET budget_limits = $2, budget_limit = $3, updated_at = now() WHERE id = $1`,
workspaceID, encoded, legacyMonthly,
); err != nil {
log.Printf("PatchBudget: update failed for %s: %v", workspaceID, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
return
}
// Re-read the current state so the response reflects exactly what is in
// the DB, including the monthly_spend the agent has already accumulated.
var newLimit sql.NullInt64
var monthlySpend int64
if err := db.DB.QueryRowContext(ctx,
`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&newLimit, &monthlySpend); err != nil {
resp, err := buildBudgetResponse(ctx, workspaceID, encoded)
if err != nil {
log.Printf("PatchBudget: re-read failed for %s: %v", workspaceID, err)
// Still success — just omit the echo.
c.JSON(http.StatusOK, gin.H{"status": "updated"})
return
}
resp := budgetResponse{
MonthlySpend: monthlySpend,
}
if newLimit.Valid {
limit := newLimit.Int64
resp.BudgetLimit = &limit
remaining := limit - monthlySpend
resp.BudgetRemaining = &remaining
}
c.JSON(http.StatusOK, resp)
}
@@ -0,0 +1,160 @@
package handlers
import (
"context"
"database/sql"
"encoding/json"
"strconv"
"time"
)
// budget_periods.go — SINGLE SOURCE OF TRUTH for the multi-period per-workspace
// LLM budget (#49 follow-up). The supported periods, their rolling windows, the
// per-period spend computation (from the workspace_spend_events ledger), and the
// over-budget decision all live here so the config endpoint (GetBudget/PatchBudget),
// the display, and enforcement (checkWorkspaceBudget) can never drift.
//
// Spend model: the heartbeat records each observed spend INCREMENT into
// workspace_spend_events (recordSpendDelta). Per-period spend is a rolling-window
// SUM over that ledger — so the SERVER owns windowing (the agent keeps reporting
// its cumulative figure unchanged). Rolling (not calendar) windows: no fragile
// month-boundary reset, and "monthly" = a 30-day trailing window.
// BudgetPeriod is one of the supported rolling budget windows.
type BudgetPeriod string
const (
PeriodHourly BudgetPeriod = "hourly"
PeriodDaily BudgetPeriod = "daily"
PeriodWeekly BudgetPeriod = "weekly"
PeriodMonthly BudgetPeriod = "monthly"
)
// budgetPeriodDef pairs a period with its rolling window.
type budgetPeriodDef struct {
Name BudgetPeriod
Window time.Duration
}
// budgetPeriods is the canonical ordered list. ADD A PERIOD = one line here;
// every consumer iterates this slice, so nothing else needs to change.
var budgetPeriods = []budgetPeriodDef{
{PeriodHourly, time.Hour},
{PeriodDaily, 24 * time.Hour},
{PeriodWeekly, 7 * 24 * time.Hour},
{PeriodMonthly, 30 * 24 * time.Hour}, // rolling 30-day window
}
// spendLedgerRetention bounds the ledger: rows older than the largest window
// (+ slack) are never read, so the recorder opportunistically prunes them.
var spendLedgerRetention = 35 * 24 * time.Hour
// parseBudgetLimits decodes the workspaces.budget_limits JSONB into a map of
// period → limit (USD cents). A limit of ZERO is valid and means "block all
// spend for that period" (a $0 ceiling); absent / null / negative / unknown
// keys mean "no limit for that period". Tolerant of a NULL/empty column.
func parseBudgetLimits(raw []byte) map[BudgetPeriod]int64 {
out := make(map[BudgetPeriod]int64, len(budgetPeriods))
if len(raw) == 0 {
return out
}
var m map[string]*int64
if err := json.Unmarshal(raw, &m); err != nil {
return out
}
for _, def := range budgetPeriods {
if v, ok := m[string(def.Name)]; ok && v != nil && *v >= 0 {
out[def.Name] = *v
}
}
return out
}
// encodeBudgetLimits renders a period→limit map back to the canonical JSONB
// shape, keeping only KNOWN periods with a non-negative limit (0 = block-all is
// preserved; a period absent from the map = no limit). Always returns valid JSON.
func encodeBudgetLimits(limits map[BudgetPeriod]int64) []byte {
m := make(map[string]int64, len(limits))
for _, def := range budgetPeriods {
if v, ok := limits[def.Name]; ok && v >= 0 {
m[string(def.Name)] = v
}
}
b, err := json.Marshal(m)
if err != nil {
return []byte("{}")
}
return b
}
// recordSpendDelta appends a positive spend increment to the ledger and
// opportunistically prunes rows past the retention horizon for this workspace.
// No-op for delta <= 0. Errors are returned for the caller to log (non-fatal).
func recordSpendDelta(ctx context.Context, q *sql.DB, workspaceID string, deltaCents int64) error {
if deltaCents <= 0 {
return nil
}
if _, err := q.ExecContext(ctx,
`INSERT INTO workspace_spend_events (workspace_id, delta_cents) VALUES ($1, $2)`,
workspaceID, deltaCents,
); err != nil {
return err
}
// Opportunistic prune (cheap; index-backed). Best-effort — ignore error.
_, _ = q.ExecContext(ctx,
`DELETE FROM workspace_spend_events
WHERE workspace_id = $1 AND occurred_at < now() - $2::interval`,
workspaceID, pgInterval(spendLedgerRetention),
)
return nil
}
// spendByPeriod returns the rolling-window spend (USD cents) for every period,
// computed in a SINGLE query over the ledger. The outer predicate bounds to the
// largest window; per-period FILTERs sum each sub-window. A period with no ledger
// rows reports 0. This is THE spend computation — used by both display + enforcement.
func spendByPeriod(ctx context.Context, q *sql.DB, workspaceID string) (map[BudgetPeriod]int64, error) {
out := make(map[BudgetPeriod]int64, len(budgetPeriods))
for _, def := range budgetPeriods {
out[def.Name] = 0
}
row := q.QueryRowContext(ctx, `
SELECT
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '1 hour'), 0),
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '24 hours'), 0),
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '7 days'), 0),
COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '30 days'), 0)
FROM workspace_spend_events
WHERE workspace_id = $1 AND occurred_at > now() - interval '30 days'
`, workspaceID)
var h, d, w, mo int64
if err := row.Scan(&h, &d, &w, &mo); err != nil {
return out, err
}
out[PeriodHourly], out[PeriodDaily], out[PeriodWeekly], out[PeriodMonthly] = h, d, w, mo
return out, nil
}
// exceededPeriods is PURE: given the configured limits and observed spend, it
// returns the periods whose spend has reached/exceeded their limit (in
// budgetPeriods order). Only periods WITH a positive limit are considered.
// Used by enforcement to decide whether to block.
func exceededPeriods(limits map[BudgetPeriod]int64, spend map[BudgetPeriod]int64) []BudgetPeriod {
var over []BudgetPeriod
for _, def := range budgetPeriods {
limit, ok := limits[def.Name]
if !ok {
continue // no limit configured for this period
}
// limit >= 0 is a real ceiling (0 = block-all). spend >= limit → over.
if spend[def.Name] >= limit {
over = append(over, def.Name)
}
}
return over
}
// pgInterval renders a Go duration as a Postgres-interval string ("N seconds").
func pgInterval(d time.Duration) string {
return strconv.FormatInt(int64(d.Seconds()), 10) + " seconds"
}
@@ -0,0 +1,99 @@
package handlers
import (
"reflect"
"testing"
)
// Pure-logic tests for the multi-period budget SSOT (budget_periods.go). The
// DB-touching helpers (spendByPeriod / recordSpendDelta) are exercised via the
// handler sqlmock tests; here we pin the parsing + the over-budget decision,
// which is where the per-period semantics actually live.
func TestParseBudgetLimits(t *testing.T) {
cases := []struct {
name string
raw string
want map[BudgetPeriod]int64
}{
{"empty", "", map[BudgetPeriod]int64{}},
{"empty-object", "{}", map[BudgetPeriod]int64{}},
{"all-four", `{"hourly":100,"daily":200,"weekly":300,"monthly":400}`,
map[BudgetPeriod]int64{PeriodHourly: 100, PeriodDaily: 200, PeriodWeekly: 300, PeriodMonthly: 400}},
{"null-dropped-zero-kept", `{"hourly":null,"daily":0,"weekly":500}`,
map[BudgetPeriod]int64{PeriodDaily: 0, PeriodWeekly: 500}}, // 0 = block-all, kept
{"negative-dropped", `{"monthly":-5}`, map[BudgetPeriod]int64{}},
{"unknown-key-ignored", `{"yearly":999,"daily":10}`, map[BudgetPeriod]int64{PeriodDaily: 10}},
{"malformed-json", `{not json`, map[BudgetPeriod]int64{}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := parseBudgetLimits([]byte(tc.raw))
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("parseBudgetLimits(%q) = %v, want %v", tc.raw, got, tc.want)
}
})
}
}
func TestEncodeBudgetLimits_RoundTrip(t *testing.T) {
in := map[BudgetPeriod]int64{PeriodHourly: 100, PeriodMonthly: 400}
enc := encodeBudgetLimits(in)
got := parseBudgetLimits(enc)
if !reflect.DeepEqual(got, in) {
t.Errorf("round-trip: encode→parse = %v, want %v (enc=%s)", got, in, enc)
}
// unknown periods dropped; 0 (block-all) kept
enc2 := encodeBudgetLimits(map[BudgetPeriod]int64{PeriodDaily: 0, "yearly": 9})
if got := parseBudgetLimits(enc2); !reflect.DeepEqual(got, map[BudgetPeriod]int64{PeriodDaily: 0}) {
t.Errorf("encode kept 0/dropped unknown: parse(%s) = %v, want {daily:0}", enc2, got)
}
}
func TestExceededPeriods(t *testing.T) {
cases := []struct {
name string
limits map[BudgetPeriod]int64
spend map[BudgetPeriod]int64
want []BudgetPeriod
}{
{"no-limits", map[BudgetPeriod]int64{}, map[BudgetPeriod]int64{PeriodHourly: 999}, nil},
{"zero-limit-blocks-all", map[BudgetPeriod]int64{PeriodHourly: 0}, map[BudgetPeriod]int64{PeriodHourly: 0}, []BudgetPeriod{PeriodHourly}},
{"under-all", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 50}, nil},
{"at-limit-is-exceeded", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 100}, []BudgetPeriod{PeriodDaily}},
{"over-limit", map[BudgetPeriod]int64{PeriodHourly: 10}, map[BudgetPeriod]int64{PeriodHourly: 11}, []BudgetPeriod{PeriodHourly}},
{"only-hourly-over", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodMonthly: 1000},
map[BudgetPeriod]int64{PeriodHourly: 50, PeriodMonthly: 200}, []BudgetPeriod{PeriodHourly}},
{"multiple-over-in-order", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodWeekly: 100},
map[BudgetPeriod]int64{PeriodHourly: 99, PeriodWeekly: 100}, []BudgetPeriod{PeriodHourly, PeriodWeekly}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := exceededPeriods(tc.limits, tc.spend)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("exceededPeriods(%v,%v) = %v, want %v", tc.limits, tc.spend, got, tc.want)
}
})
}
}
// TestBudgetPeriods_AllReachable guards the SSOT list: every declared period has
// a positive window and a unique name (a typo'd duplicate would silently break
// per-period accounting).
func TestBudgetPeriods_Wellformed(t *testing.T) {
seen := map[BudgetPeriod]bool{}
for _, d := range budgetPeriods {
if d.Window <= 0 {
t.Errorf("period %s has non-positive window %v", d.Name, d.Window)
}
if seen[d.Name] {
t.Errorf("duplicate period name %s", d.Name)
}
seen[d.Name] = true
}
for _, p := range []BudgetPeriod{PeriodHourly, PeriodDaily, PeriodWeekly, PeriodMonthly} {
if !seen[p] {
t.Errorf("period %s missing from budgetPeriods SSOT list", p)
}
}
}
+191 -89
View File
@@ -12,15 +12,25 @@ import (
"github.com/gin-gonic/gin"
)
// Multi-period budget (#49): GET/PATCH now read workspaces.budget_limits (jsonb)
// and compute per-period spend from the workspace_spend_events ledger
// (spendByPeriod — matched here by the "FROM workspace_spend_events" fragment).
// The legacy budget_limit/monthly_spend response fields are still emitted
// (monthly period) for rollout back-compat, and the legacy {"budget_limit":N}
// PATCH shape still works.
// spendRows builds the 4-column row spendByPeriod scans (hourly,daily,weekly,monthly).
func spendRows(h, d, w, m int64) *sqlmock.Rows {
return sqlmock.NewRows([]string{"h", "d", "w", "mo"}).AddRow(h, d, w, m)
}
// ==================== GET /workspaces/:id/budget ====================
// TestBudgetGet_NotFound verifies that GET /budget returns 404 for an unknown
// workspace ID (ErrNoRows from the budget query).
func TestBudgetGet_NotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-not-there").
WillReturnError(sql.ErrNoRows)
@@ -29,8 +39,7 @@ func TestBudgetGet_NotFound(t *testing.T) {
c.Params = gin.Params{{Key: "id", Value: "ws-not-there"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-not-there/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
NewBudgetHandler().GetBudget(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
@@ -40,12 +49,11 @@ func TestBudgetGet_NotFound(t *testing.T) {
}
}
// TestBudgetGet_DBError verifies that a non-ErrNoRows DB error returns 500.
func TestBudgetGet_DBError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-db-err").
WillReturnError(sql.ErrConnDone)
@@ -54,8 +62,7 @@ func TestBudgetGet_DBError(t *testing.T) {
c.Params = gin.Params{{Key: "id", Value: "ws-db-err"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-db-err/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
NewBudgetHandler().GetBudget(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500, got %d: %s", w.Code, w.Body.String())
@@ -65,24 +72,23 @@ func TestBudgetGet_DBError(t *testing.T) {
}
}
// TestBudgetGet_NoLimit verifies that budget_limit and budget_remaining are
// null when the workspace has no budget ceiling configured.
func TestBudgetGet_NoLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-free").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(nil, int64(42)))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-free").
WillReturnRows(spendRows(0, 0, 0, 42))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-free"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-free/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
NewBudgetHandler().GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -105,24 +111,23 @@ func TestBudgetGet_NoLimit(t *testing.T) {
}
}
// TestBudgetGet_WithLimit verifies that budget_limit, monthly_spend, and
// budget_remaining are all returned correctly when a ceiling is set.
func TestBudgetGet_WithLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-capped").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(123)))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-capped").
WillReturnRows(spendRows(0, 0, 0, 123))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-capped"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-capped/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
NewBudgetHandler().GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -137,7 +142,6 @@ func TestBudgetGet_WithLimit(t *testing.T) {
if resp["monthly_spend"] != float64(123) {
t.Errorf("expected monthly_spend=123, got %v", resp["monthly_spend"])
}
// budget_remaining = 500 - 123 = 377
if resp["budget_remaining"] != float64(377) {
t.Errorf("expected budget_remaining=377, got %v", resp["budget_remaining"])
}
@@ -146,24 +150,23 @@ func TestBudgetGet_WithLimit(t *testing.T) {
}
}
// TestBudgetGet_OverBudget verifies that budget_remaining can be negative
// when monthly_spend has already exceeded budget_limit.
func TestBudgetGet_OverBudget(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-over").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(100), int64(150)))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":100}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-over").
WillReturnRows(spendRows(0, 0, 0, 150))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-over"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-over/budget", nil)
h := NewBudgetHandler()
h.GetBudget(c)
NewBudgetHandler().GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -172,7 +175,6 @@ func TestBudgetGet_OverBudget(t *testing.T) {
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
// budget_remaining = 100 - 150 = -50 (negative, but we store actual value)
if resp["budget_remaining"] != float64(-50) {
t.Errorf("expected budget_remaining=-50, got %v", resp["budget_remaining"])
}
@@ -181,10 +183,59 @@ func TestBudgetGet_OverBudget(t *testing.T) {
}
}
// TestBudgetGet_MultiPeriod pins the new per-period shape: each period reports
// its own limit/spend/remaining, and an over-budget sub-period is visible.
func TestBudgetGet_MultiPeriod(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-mp").
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).
AddRow([]byte(`{"hourly":100,"daily":1000}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-mp").
WillReturnRows(spendRows(120, 300, 300, 300)) // hourly over (120>=100)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-mp"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-mp/budget", nil)
NewBudgetHandler().GetBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
Periods map[string]struct {
Limit *int64 `json:"limit"`
Spend int64 `json:"spend"`
Remaining *int64 `json:"remaining"`
} `json:"periods"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
t.Errorf("hourly.limit: want 100, got %v", resp.Periods["hourly"].Limit)
}
if resp.Periods["hourly"].Spend != 120 {
t.Errorf("hourly.spend: want 120, got %d", resp.Periods["hourly"].Spend)
}
if r := resp.Periods["hourly"].Remaining; r == nil || *r != -20 {
t.Errorf("hourly.remaining: want -20, got %v", r)
}
if resp.Periods["weekly"].Limit != nil {
t.Errorf("weekly.limit: want null (unset), got %v", resp.Periods["weekly"].Limit)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
// ==================== PATCH /workspaces/:id/budget ====================
// TestBudgetPatch_MissingField verifies that PATCH /budget with no budget_limit
// field in the body returns 400.
func TestBudgetPatch_MissingField(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
@@ -196,15 +247,13 @@ func TestBudgetPatch_MissingField(t *testing.T) {
bytes.NewBufferString(`{"other_field":123}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_InvalidBody verifies that a malformed JSON body returns 400.
func TestBudgetPatch_InvalidBody(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
@@ -216,15 +265,13 @@ func TestBudgetPatch_InvalidBody(t *testing.T) {
bytes.NewBufferString(`not json`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_NegativeValue verifies that a negative budget_limit is rejected.
func TestBudgetPatch_NegativeValue(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
@@ -236,15 +283,13 @@ func TestBudgetPatch_NegativeValue(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":-1}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for negative budget_limit, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_InvalidType verifies that a non-numeric budget_limit returns 400.
func TestBudgetPatch_InvalidType(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
@@ -256,16 +301,32 @@ func TestBudgetPatch_InvalidType(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":"not-a-number"}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for string budget_limit, got %d: %s", w.Code, w.Body.String())
}
}
// TestBudgetPatch_WorkspaceNotFound verifies that PATCH /budget returns 404
// when the workspace doesn't exist.
// TestBudgetPatch_UnknownPeriod rejects an unsupported period key.
func TestBudgetPatch_UnknownPeriod(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-badperiod"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-badperiod/budget",
bytes.NewBufferString(`{"budget_limits":{"yearly":100}}`))
c.Request.Header.Set("Content-Type", "application/json")
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for unknown period, got %d: %s", w.Code, w.Body.String())
}
}
func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
@@ -281,8 +342,7 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":500}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusNotFound {
t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
@@ -292,25 +352,20 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
}
}
// TestBudgetPatch_SetLimit verifies that PATCH /budget with a positive value
// updates the DB and returns the new budget state.
// TestBudgetPatch_SetLimit (legacy monthly shape) updates + returns new state.
func TestBudgetPatch_SetLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
// Existence probe
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-set-limit").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// UPDATE
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-set-limit", int64(500)).
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
WithArgs("ws-set-limit", sqlmock.AnyArg(), int64(500)).
WillReturnResult(sqlmock.NewResult(0, 1))
// Re-read for response
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-set-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(200)))
WillReturnRows(spendRows(0, 0, 0, 200))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -319,8 +374,7 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":500}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -335,7 +389,6 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
if resp["monthly_spend"] != float64(200) {
t.Errorf("expected monthly_spend=200, got %v", resp["monthly_spend"])
}
// budget_remaining = 500 - 200 = 300
if resp["budget_remaining"] != float64(300) {
t.Errorf("expected budget_remaining=300, got %v", resp["budget_remaining"])
}
@@ -344,8 +397,59 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
}
}
// TestBudgetPatch_ClearLimit verifies that PATCH /budget with budget_limit=null
// clears the ceiling, making budget_limit and budget_remaining null in the response.
// TestBudgetPatch_SetMultiPeriod sets several periods at once and verifies the
// per-period response.
func TestBudgetPatch_SetMultiPeriod(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-mp-set").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// no monthly in payload → legacy budget_limit column set to NULL
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
WithArgs("ws-mp-set", sqlmock.AnyArg(), nil).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-mp-set").
WillReturnRows(spendRows(10, 20, 30, 40))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-mp-set"}}
c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-mp-set/budget",
bytes.NewBufferString(`{"budget_limits":{"hourly":100,"daily":200,"monthly":null}}`))
c.Request.Header.Set("Content-Type", "application/json")
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var resp struct {
Periods map[string]struct {
Limit *int64 `json:"limit"`
Spend int64 `json:"spend"`
} `json:"periods"`
BudgetLimit *int64 `json:"budget_limit"`
}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
t.Errorf("hourly.limit want 100, got %v", resp.Periods["hourly"].Limit)
}
if resp.Periods["daily"].Limit == nil || *resp.Periods["daily"].Limit != 200 {
t.Errorf("daily.limit want 200, got %v", resp.Periods["daily"].Limit)
}
if resp.BudgetLimit != nil {
t.Errorf("monthly cleared → budget_limit should be null, got %v", *resp.BudgetLimit)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
}
}
func TestBudgetPatch_ClearLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
@@ -353,15 +457,12 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-clear-limit").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
// UPDATE with NULL
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-clear-limit", nil).
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
WithArgs("ws-clear-limit", sqlmock.AnyArg(), nil).
WillReturnResult(sqlmock.NewResult(0, 1))
// Re-read — budget_limit is now NULL
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-clear-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(nil, int64(50)))
WillReturnRows(spendRows(0, 0, 0, 50))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -370,8 +471,7 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":null}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -391,8 +491,6 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
}
}
// TestBudgetPatch_UpdateDBError verifies that a DB error during the UPDATE
// returns 500.
func TestBudgetPatch_UpdateDBError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
@@ -400,8 +498,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-patch-dberr").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-patch-dberr", int64(500)).
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
WithArgs("ws-patch-dberr", sqlmock.AnyArg(), int64(500)).
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
@@ -411,8 +509,7 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":500}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected 500 on UPDATE error, got %d: %s", w.Code, w.Body.String())
@@ -422,8 +519,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
}
}
// TestBudgetPatch_ZeroLimit verifies that budget_limit=0 is accepted (it means
// every A2A call is blocked — useful to pause a workspace's LLM spend entirely).
// TestBudgetPatch_ZeroLimit verifies budget_limit=0 is accepted + stored (0 =
// block-all: every period call is blocked — pauses the workspace's spend).
func TestBudgetPatch_ZeroLimit(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
@@ -431,13 +528,12 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
WithArgs("ws-zero-limit").
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
WithArgs("ws-zero-limit", int64(0)).
mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
WithArgs("ws-zero-limit", sqlmock.AnyArg(), int64(0)).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-zero-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(0), int64(0)))
WillReturnRows(spendRows(0, 0, 0, 0))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -446,11 +542,17 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
bytes.NewBufferString(`{"budget_limit":0}`))
c.Request.Header.Set("Content-Type", "application/json")
h := NewBudgetHandler()
h.PatchBudget(c)
NewBudgetHandler().PatchBudget(c)
if w.Code != http.StatusOK {
t.Errorf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
t.Fatalf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse response: %v", err)
}
if resp["budget_limit"] != float64(0) {
t.Errorf("expected budget_limit=0 (block-all), got %v", resp["budget_limit"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met: %v", err)
@@ -0,0 +1,427 @@
package handlers
// cross_tenant_isolation_test.go — #1953 regression tests.
//
// Three workspace-server paths historically derived an "org-root sibling set"
// as `WHERE parent_id IS NULL`, which matches EVERY tenant's org root (the
// workspaces table has no org_id column) → cross-tenant data exposure:
//
// 1. GET /registry/:id/peers (discovery.Peers)
// 2. MCP toolListPeers (mcp_tools.toolListPeers)
// 3. a2a routing (a2a_proxy.proxyA2ARequest → resolveAgentURL)
//
// These tests assert that a workspace in a DIFFERENT org is never returned as a
// peer and that a2a refuses to resolve/route to a workspace outside the caller's
// org, while same-org peers/targets still work. They reuse the SAME parent_id-
// chain org scoping the OFFSEC-015 broadcast fix introduced (org_scope.go).
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
// dbHandleForTest returns the global sqlmock-backed *sql.DB that setupTestDB
// installs, for tests that need to hand a *sql.DB to a component (e.g.
// MCPHandler.database, sameOrg) rather than relying on the package-global.
func dbHandleForTest() *sql.DB { return db.DB }
// peerColsForIsolation matches queryPeerMaps' SELECT column set.
var peerColsForIsolation = []string{
"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks",
}
// -------------------------------------------------------------------------
// Path 1: GET /registry/:id/peers — discovery.Peers
// -------------------------------------------------------------------------
// TestPeers_CrossTenant_OrgRootNotLeaked is the core #1953 regression for the
// discovery path. The caller is an org root (parent_id IS NULL). Pre-fix the
// handler ran `SELECT ... WHERE w.parent_id IS NULL AND w.id != $1`, returning
// every OTHER tenant's org root as a "sibling" peer. Post-fix an org-root caller
// issues NO sibling query — its only peers are its own children. If the handler
// regressed and issued the cross-tenant sibling query, sqlmock would report an
// unexpected query (the expectation below is intentionally NOT registered) and
// the test fails.
func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewDiscoveryHandler()
// Behavioural leak test: register the OLD leaky `parent_id IS NULL` sibling
// query so that IF the handler still issues it, it returns another tenant's
// org root (org-b-root). The fix removes that query for an org-root caller,
// so org-b-root must never appear in the output. Unordered matching makes
// the leaky-sibling expectation optional — the fix simply never consumes it.
mock.MatchExpectationsInOrder(false)
caller := "org-a-root" // parent_id IS NULL — an org root for tenant A
// parent_id lookup → NULL (caller is an org root)
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// LEAKY sibling query (pre-fix). Returns a DIFFERENT tenant's org root.
// The fix must NOT issue this query; if it does, org-b-root leaks into the
// peer list and the output assertion below fails.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
AddRow("org-b-root", "Org B Root", "lead", 0, "online", []byte("null"), "http://b-root", nil, 0))
// Children query — caller's own org-A children only. Return one child.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
WithArgs(caller, caller).
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
AddRow("org-a-child", "Org A Child", "worker", 1, "online", []byte("null"), "http://a-child", caller, 0))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: caller}}
c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil)
handler.Peers(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var peers []map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
// The other-tenant org root must NEVER appear; only the same-org child.
for _, p := range peers {
if id, _ := p["id"].(string); id == "org-b-root" {
t.Fatalf("cross-tenant leak (#1953): org-b-root appeared in org-a-root's peer list: %v", peers)
}
}
if len(peers) != 1 {
t.Fatalf("expected exactly 1 peer (same-org child), got %d: %v", len(peers), peers)
}
// NOTE: ExpectationsWereMet is intentionally NOT asserted — the leaky
// sibling expectation is deliberately left unconsumed by the fixed path.
}
// TestPeers_SameOrg_SiblingsStillWork is the positive companion: a non-root
// child caller still sees its same-org siblings, children, and parent. This
// guards against the fix over-scoping and breaking legitimate intra-org
// discovery.
func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewDiscoveryHandler()
caller := "org-a-child-1"
parent := "org-a-root"
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
// Siblings — scoped to the shared parent (one tenant).
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
WithArgs(parent, caller).
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
AddRow("org-a-child-2", "Org A Sibling", "worker", 1, "online", []byte("null"), "http://a-sib", parent, 0))
// Children — none.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs(caller, caller).
WillReturnRows(sqlmock.NewRows(peerColsForIsolation))
// Parent.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs(parent, caller).
WillReturnRows(sqlmock.NewRows(peerColsForIsolation).
AddRow(parent, "Org A Root", "lead", 0, "online", []byte("null"), "http://a-root", nil, 0))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: caller}}
c.Request = httptest.NewRequest("GET", "/registry/"+caller+"/peers", nil)
handler.Peers(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var peers []map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &peers); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
// Sibling + parent = 2 same-org peers.
if len(peers) != 2 {
t.Fatalf("expected 2 same-org peers (sibling + parent), got %d: %v", len(peers), peers)
}
names := map[string]bool{}
for _, p := range peers {
names[fmt.Sprint(p["name"])] = true
}
if !names["Org A Sibling"] || !names["Org A Root"] {
t.Errorf("expected same-org sibling + parent in peer list, got %v", names)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// -------------------------------------------------------------------------
// Path 2: MCP toolListPeers — mcp_tools.toolListPeers
// -------------------------------------------------------------------------
// mcpPeerCols matches toolListPeers' SELECT column set.
var mcpPeerCols = []string{"id", "name", "role", "status", "tier"}
// TestToolListPeers_CrossTenant_OrgRootNotLeaked is the #1953 regression for
// the MCP path. Same shape as the discovery test: an org-root caller must NOT
// enumerate other tenants' org roots. The cross-tenant `parent_id IS NULL`
// sibling query is intentionally not registered, so if it runs sqlmock fails.
func TestToolListPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
mock := setupTestDB(t)
mock.MatchExpectationsInOrder(false)
h := &MCPHandler{database: dbHandleForTest()}
caller := "org-a-root"
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// LEAKY sibling query (pre-fix). Returns another tenant's org root. The fix
// must NOT issue this for an org-root caller; if it does, org-b-root leaks
// into the output and the assertion below fails. Left optional via
// unordered matching, so the fixed path simply never consumes it.
mock.ExpectQuery("WHERE w.parent_id IS NULL AND w.id != \\$1").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
AddRow("org-b-root", "Org B Root", "lead", "online", 0))
// Children — caller's own org-A children only.
mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
AddRow("org-a-child", "Org A Child", "worker", "online", 1))
out, err := h.toolListPeers(context.Background(), caller)
if err != nil {
t.Fatalf("toolListPeers returned error: %v", err)
}
if strings.Contains(out, "org-b-root") || strings.Contains(out, "Org B Root") {
t.Fatalf("cross-tenant leak (#1953): another tenant's org root appeared in toolListPeers output:\n%s", out)
}
if !strings.Contains(out, "org-a-child") {
t.Errorf("same-org child missing from toolListPeers output:\n%s", out)
}
// ExpectationsWereMet intentionally NOT asserted — leaky sibling expectation
// is deliberately left unconsumed by the fixed path.
}
// TestToolListPeers_SameOrg_SiblingsStillWork — positive companion for the MCP
// path: a non-root child still enumerates its same-org siblings + children + parent.
func TestToolListPeers_SameOrg_SiblingsStillWork(t *testing.T) {
mock := setupTestDB(t)
h := &MCPHandler{database: dbHandleForTest()}
caller := "org-a-child-1"
parent := "org-a-root"
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
// Siblings — scoped to shared parent.
mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
WithArgs(parent, caller).
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
AddRow("org-a-child-2", "Org A Sibling", "worker", "online", 1))
// Children — none.
mock.ExpectQuery("WHERE w.parent_id = \\$1 AND w.status").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows(mcpPeerCols))
// Parent.
mock.ExpectQuery("WHERE w.id = \\$1 AND w.status").
WithArgs(parent).
WillReturnRows(sqlmock.NewRows(mcpPeerCols).
AddRow(parent, "Org A Root", "lead", "online", 0))
out, err := h.toolListPeers(context.Background(), caller)
if err != nil {
t.Fatalf("toolListPeers returned error: %v", err)
}
if !strings.Contains(out, "Org A Sibling") || !strings.Contains(out, "Org A Root") {
t.Errorf("expected same-org sibling + parent in toolListPeers output:\n%s", out)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// -------------------------------------------------------------------------
// Path 3: a2a routing — a2a_proxy.proxyA2ARequest / resolveAgentURL
// -------------------------------------------------------------------------
// TestProxyA2A_CrossTenant_RoutingDenied is the #1953 regression for a2a
// routing. Caller and target are both org roots (parent_id IS NULL) belonging
// to DIFFERENT tenants. Pre-fix, CanCommunicate's "root-level siblings" rule
// waved this through and resolveAgentURL routed to the foreign tenant. Post-fix
// the org-scope guard resolves each to a different org root and returns 403
// BEFORE resolveAgentURL/dispatch.
func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
caller := "org-a-root"
target := "org-b-root" // different tenant
// A URL exists for the target; the guard must deny BEFORE it is used.
mr.Set(fmt.Sprintf("ws:%s:url", target), "http://localhost:1")
// CanCommunicate: both root-level (parent_id NULL) → its weak "root-level
// siblings" rule ALLOWS this. The org guard must catch it afterward.
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, nil))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(target).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, nil))
// #1953 org-scope guard: caller resolves to org-a-root, target to org-b-root
// → different orgs → 403. (Each org root resolves to itself.)
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(target).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: target}}
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"cross-tenant"}]}}}`
c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
c.Request.Header.Set("X-Workspace-ID", caller)
handler.ProxyA2A(c)
if w.Code != http.StatusForbidden {
t.Fatalf("expected 403 for cross-tenant a2a routing, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("body not JSON: %v", err)
}
if msg, _ := resp["error"].(string); !strings.Contains(msg, "different org") {
t.Errorf("expected cross-org denial message, got %v", resp["error"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestResolveAgentURL_CrossTenant_RejectedViaSameOrg is a direct unit test of
// the sameOrg primitive that gates resolveAgentURL: a target in a different org
// must be reported as NOT same-org, so the a2a guard rejects it before
// resolveAgentURL is ever called.
func TestResolveAgentURL_CrossTenant_RejectedViaSameOrg(t *testing.T) {
mock := setupTestDB(t)
caller := "org-a-root"
target := "org-b-root"
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(target).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
ok, err := sameOrg(context.Background(), dbHandleForTest(), caller, target)
if err != nil {
t.Fatalf("sameOrg returned unexpected error: %v", err)
}
if ok {
t.Errorf("expected cross-tenant workspaces to be reported as DIFFERENT orgs, got sameOrg=true")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestProxyA2A_SameOrg_RoutingAllowed — positive companion for a2a: two
// same-org siblings route successfully (mirrors TestProxyA2A_CallerIDPropagated
// but named to document the #1953 same-org allow path).
func TestProxyA2A_SameOrg_RoutingAllowed(t *testing.T) {
mock := setupTestDB(t)
mr := setupTestRedis(t)
allowLoopbackForTest(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
waitForHandlerAsyncBeforeDBCleanup(t, handler)
caller := "org-a-child-1"
target := "org-a-child-2"
parent := "org-a-root"
agentServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
fmt.Fprint(w, `{"jsonrpc":"2.0","id":"1","result":{}}`)
}))
defer agentServer.Close()
mr.Set(fmt.Sprintf("ws:%s:url", target), agentServer.URL)
// CanCommunicate — siblings under shared parent.
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, parent))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(target).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, parent))
// #1953 org guard — both resolve to the same org root → allowed.
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(caller).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent))
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(target).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(parent))
expectBudgetCheck(mock, target)
mock.ExpectExec("INSERT INTO activity_logs").WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: target}}
body := `{"method":"message/send","params":{"message":{"role":"user","parts":[{"text":"same-org"}]}}}`
c.Request = httptest.NewRequest("POST", "/workspaces/"+target+"/a2a", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
c.Request.Header.Set("X-Workspace-ID", caller)
handler.ProxyA2A(c)
time.Sleep(50 * time.Millisecond) // allow the async logA2ASuccess INSERT to flush
if w.Code != http.StatusOK {
t.Fatalf("expected 200 for same-org a2a routing, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
@@ -140,7 +140,14 @@ func buildHTTPResponse(statusCode int, body string) []byte {
}
// setupIntegrationFixtures inserts the rows executeDelegation requires:
// - workspaces: source and target (siblings, parent_id=NULL so CanCommunicate=true)
// - workspaces: source (org root) + target as its CHILD, so both live in the
// SAME org. CanCommunicate=true (parent↔child) AND the #1953 sameOrg() guard
// in proxyA2ARequest passes (both resolve to the same org root). A real
// delegation happens INSIDE one org. (Previously both were parent_id=NULL —
// two DISTINCT org roots — which only "communicated" via CanCommunicate's
// root-sibling rule; #1953 added a sameOrg() guard that now denies routing
// between two org roots as cross-tenant, so the success-path tests below
// must use a same-org source/target pair.)
// - activity_logs: the 'delegate' row that updateDelegationStatus UPDATE will find
// - delegations: the ledger row that recordLedgerStatus will UPDATE
//
@@ -148,13 +155,14 @@ func buildHTTPResponse(statusCode int, body string) []byte {
func setupIntegrationFixtures(t *testing.T, conn *sql.DB) func() {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
sourceID := integrationTestSourceID // org root (parent_id NULL); target hangs off it
for _, ws := range []struct {
id string
name string
parentID *string
}{
{integrationTestSourceID, "test-source", nil},
{integrationTestTargetID, "test-target", nil},
{integrationTestTargetID, "test-target", &sourceID}, // child of source → same org
} {
if _, err := conn.ExecContext(ctx,
`INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`,
@@ -510,6 +518,94 @@ func TestIntegration_ExecuteDelegation_RedisDown_FallsBackToDB(t *testing.T) {
}
}
// TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain is the regression gate
// for the org_scope.go recursive-CTE bug (#1953 follow-up). The sqlmock unit
// tests feed sameOrg() a pre-computed root_id row, so they CANNOT catch a wrong
// CTE — they assume it already returns the right value. Only a real Postgres
// run exercises orgRootSubtreeCTE itself.
//
// The bug: the CTE carried `id AS root_id` from the recursive SEED, so a
// non-root workspace resolved to ITSELF instead of its topmost ancestor. That
// made sameOrg() return false for two genuinely same-org workspaces and 403 a
// legitimate same-org a2a route (over-block). This test seeds a real
// root → child → grandchild chain plus a separate org root, and asserts:
// - every node in the chain resolves to the SAME org root (root, child, grandchild)
// - two workspaces in the same chain are sameOrg (incl. grandchild ↔ root)
// - a workspace in a DIFFERENT chain is NOT sameOrg (cross-tenant stays closed)
func TestIntegration_SameOrg_RealCTE_ResolvesAncestorChain(t *testing.T) {
conn := integrationDB(t)
const (
rootA = "11111111-1111-1111-1111-111111111111"
childA = "22222222-2222-2222-2222-222222222222"
grandchildA = "33333333-3333-3333-3333-333333333333"
rootB = "44444444-4444-4444-4444-444444444444"
)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
t.Cleanup(func() {
c2, cancel2 := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel2()
// Delete leaf-first to respect the parent_id self-FK.
for _, id := range []string{grandchildA, childA, rootA, rootB} {
conn.ExecContext(c2, `DELETE FROM workspaces WHERE id = $1`, id)
}
})
// Insert parent-before-child to satisfy the self-referential FK.
seed := []struct {
id, name string
parent *string
}{
{rootA, "org-a-root", nil},
{childA, "org-a-child", strPtr(rootA)},
{grandchildA, "org-a-grandchild", strPtr(childA)},
{rootB, "org-b-root", nil},
}
for _, s := range seed {
if _, err := conn.ExecContext(ctx,
`INSERT INTO workspaces (id, name, parent_id) VALUES ($1::uuid, $2, $3) ON CONFLICT (id) DO NOTHING`,
s.id, s.name, s.parent); err != nil {
t.Fatalf("seed %s: %v", s.name, err)
}
}
// Every node in chain A must resolve to rootA via the REAL CTE.
for _, id := range []string{rootA, childA, grandchildA} {
got, err := orgRootID(ctx, conn, id)
if err != nil {
t.Fatalf("orgRootID(%s): %v", id, err)
}
if got != rootA {
t.Errorf("orgRootID(%s) = %q, want rootA %q (CTE must walk to topmost ancestor)", id, got, rootA)
}
}
// Same-org positives — including the grandchild↔root pair that the buggy
// CTE got wrong.
for _, pair := range [][2]string{{childA, grandchildA}, {rootA, grandchildA}, {rootA, childA}} {
ok, err := sameOrg(ctx, conn, pair[0], pair[1])
if err != nil {
t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err)
}
if !ok {
t.Errorf("sameOrg(%s,%s) = false, want true (same org chain)", pair[0], pair[1])
}
}
// Cross-org negative — isolation must stay closed.
for _, pair := range [][2]string{{rootA, rootB}, {grandchildA, rootB}, {childA, rootB}} {
ok, err := sameOrg(ctx, conn, pair[0], pair[1])
if err != nil {
t.Fatalf("sameOrg(%s,%s): %v", pair[0], pair[1], err)
}
if ok {
t.Errorf("sameOrg(%s,%s) = true, want false (different orgs — cross-tenant must stay denied)", pair[0], pair[1])
}
}
}
// extractHostPort parses "http://127.0.0.1:PORT/" and returns "127.0.0.1:PORT".
func extractHostPort(rawURL string) string {
// Simple parse: strip "http://" prefix and trailing slash.
@@ -1059,13 +1059,25 @@ func expectExecuteDelegationBase(mock sqlmock.Sqlmock) {
WillReturnResult(sqlmock.NewResult(0, 1))
// CanCommunicate: getWorkspaceRef(source) + getWorkspaceRef(target).
// Both are root-level workspaces (parent_id=NULL) → root-level siblings → allowed.
// Source and target are siblings under one shared parent (one tenant) →
// CanCommunicate allowed. (#1953: they must NOT both be parent_id=NULL —
// two distinct org roots are now treated as DIFFERENT orgs and routing
// between them is denied. A real delegation happens inside one org.)
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(testDeliverySourceID).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliverySourceID, "ws-org-root-159"))
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
WithArgs(testDeliveryTargetID).
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, nil))
WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(testDeliveryTargetID, "ws-org-root-159"))
// #1953 cross-tenant guard: same-org check after CanCommunicate. Both
// resolve to the same org root → routing allowed.
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(testDeliverySourceID).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159"))
mock.ExpectQuery("WITH RECURSIVE org_chain AS").
WithArgs(testDeliveryTargetID).
WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("ws-org-root-159"))
// resolveAgentURL: test callers always set the URL in Redis (mr.Set ws:{id}:url),
// so resolveAgentURL gets a cache hit and never falls back to DB.
@@ -1,464 +0,0 @@
package handlers
// derive_provider_drift_test.go — behavior-based AST/text drift gate.
//
// Why this exists: PR #2535 introduced a Go port of derive-provider.sh
// (see deriveProviderFromModelSlug in workspace_provision.go) so the
// workspace-server can persist LLM_PROVIDER into workspace_secrets at
// provision time. That created two sources of truth:
//
// 1. molecule-ai-workspace-template-hermes/scripts/derive-provider.sh —
// runs inside the container at boot, has the final say on which
// provider hermes targets (writes ~/.hermes/config.yaml's
// model.provider field). The shell script lives in a separate
// OSS repo, so we vendor a snapshot at testdata/derive-provider.sh
// to keep this gate hermetic.
// 2. workspace-server/internal/handlers/workspace_provision.go's
// deriveProviderFromModelSlug — runs at provision time on the
// platform side so LLM_PROVIDER lands in workspace_secrets and
// survives Save+Restart.
//
// If a future PR adds a new provider prefix to one but not the other,
// the workspace-server's persisted LLM_PROVIDER silently disagrees
// with what the container's derive-provider.sh produces. The container
// wins (it writes the actual config.yaml), so the workspace-server's
// persisted value becomes stale and misleading without anything
// flipping red in CI.
//
// This gate pins the invariant that the *prefix set* the two functions
// know about is identical, modulo a small hardcoded acceptedDivergences
// map for the two intentional differences documented in
// deriveProviderFromModelSlug's doc comment (nousresearch/* and
// openai/* both fall back to "openrouter" at provision time because
// the runtime env that picks "nous" / "custom" isn't available yet).
//
// Pattern: the "behavior-based AST gate" from PR #2367 / memory
// feedback_behavior_based_ast_gates — pin invariants by what a
// function maps, not by what it's named. Walks the actual Go AST of
// deriveProviderFromModelSlug's switch statement so a rename or a
// duplicate function in another file can't sneak past the gate.
//
// Task: #242. Companion to the table-driven mapping test in
// workspace_provision_shared_test.go (TestDeriveProviderFromModelSlug)
// which pins the *values*; this test pins the *coverage* of the
// prefix set itself.
//
// Hermetic: reads two files (vendored shell script + Go source) from
// paths relative to the test package directory and parses them
// in-process. No network, no docker, no DB. The vendored shell script
// at testdata/derive-provider.sh is a snapshot of the upstream OSS
// template repo's script — refresh it via the cp command in that file's
// header when upstream changes.
import (
"go/ast"
"go/parser"
"go/token"
"os"
"regexp"
"sort"
"strconv"
"strings"
"testing"
)
// acceptedDivergences pins the prefixes where the Go port intentionally
// differs from derive-provider.sh. Each entry's value is the provider
// the Go function returns; the shell would (at runtime, with the right
// env keys present) return something else. Documented in
// deriveProviderFromModelSlug's doc comment in workspace_provision.go.
//
// If a NEW divergence appears, this test fails and the engineer must
// either (a) align the Go function with the shell, or (b) add the
// prefix here with a comment explaining why the divergence is
// intentional and safe at provision time.
var acceptedDivergences = map[string]string{
// Shell: "nous" if HERMES_API_KEY/NOUS_API_KEY set, else "openrouter".
// Go: "openrouter" unconditionally — runtime keys aren't loaded at
// provision time. derive-provider.sh upgrades to "nous" at boot
// when the keys are present.
"nousresearch": "openrouter",
// Shell: "custom" if OPENAI_API_KEY set, "openrouter" if OPENROUTER_API_KEY
// set, else "openrouter" as a no-key fallback.
// Go: "openrouter" unconditionally — same reason as nousresearch/*.
// derive-provider.sh upgrades to "custom" at boot when
// OPENAI_API_KEY is present.
"openai": "openrouter",
}
// TestDeriveProviderDrift_ShellAndGoStayInSync is the drift gate.
// It extracts the prefix→provider mapping from both sources and
// asserts:
//
// 1. Every prefix the shell knows about, the Go function also handles
// (returning either the same provider OR the value pinned in
// acceptedDivergences for that prefix).
// 2. Every prefix the Go function handles (extracted from its switch
// statement via go/ast), the shell case statement also lists.
func TestDeriveProviderDrift_ShellAndGoStayInSync(t *testing.T) {
t.Parallel()
shellMap := loadShellPrefixMap(t)
goMap := loadGoPrefixMap(t)
if len(shellMap) == 0 {
t.Fatalf("parsed zero prefixes from derive-provider.sh — regex likely broke; rebuild parser before trusting this gate")
}
if len(goMap) == 0 {
t.Fatalf("parsed zero prefixes from deriveProviderFromModelSlug — AST walk likely broke; rebuild parser before trusting this gate")
}
// Direction 1: every shell prefix must be in the Go map (with the
// same provider value, or with the documented divergence).
for prefix, shellProvider := range shellMap {
goProvider, ok := goMap[prefix]
if !ok {
t.Errorf(
"DRIFT: derive-provider.sh has prefix %q -> %q but deriveProviderFromModelSlug doesn't handle it.\n"+
"Fix: either add a case for %q to deriveProviderFromModelSlug in "+
"workspace-server/internal/handlers/workspace_provision.go (returning %q to match the shell), "+
"OR if this prefix is intentionally provision-time-divergent, add it to acceptedDivergences{} "+
"in this test with a comment explaining why.",
prefix, shellProvider, prefix, shellProvider,
)
continue
}
if goProvider == shellProvider {
continue
}
// Mismatch — only acceptable if it's on the explicit divergence list
// AND the Go side returns exactly the documented value.
expected, divergenceAllowed := acceptedDivergences[prefix]
if !divergenceAllowed {
t.Errorf(
"DRIFT: prefix %q maps to %q in derive-provider.sh but %q in deriveProviderFromModelSlug.\n"+
"Fix: align the Go function with the shell (preferred — they should agree), "+
"OR if the divergence is intentional and safe at provision time, "+
"add %q: %q to acceptedDivergences{} in this test with a comment explaining why.",
prefix, shellProvider, goProvider, prefix, goProvider,
)
continue
}
if goProvider != expected {
t.Errorf(
"DRIFT: prefix %q is on the acceptedDivergences list with expected Go value %q but "+
"deriveProviderFromModelSlug now returns %q.\n"+
"Fix: update acceptedDivergences[%q] in this test to %q (and update its comment), "+
"OR revert the Go function to return %q.",
prefix, expected, goProvider, prefix, goProvider, expected,
)
}
}
// Direction 2: every Go prefix must be in the shell map. Drift in
// this direction is rarer (someone added a Go case without touching
// the shell) but produces the same broken state — provision-time
// LLM_PROVIDER disagrees with what the container actually uses.
for prefix, goProvider := range goMap {
if _, ok := shellMap[prefix]; ok {
continue
}
t.Errorf(
"DRIFT: deriveProviderFromModelSlug handles prefix %q -> %q but derive-provider.sh doesn't list it.\n"+
"Fix: add a `%s/*) PROVIDER=%q ;;` case to "+
"workspace-configs-templates/hermes/scripts/derive-provider.sh — the Go provision-time hint "+
"is meaningless if the container's runtime script doesn't recognize the same prefix.",
prefix, goProvider, prefix, goProvider,
)
}
// Belt-and-braces: every entry in acceptedDivergences must actually
// appear in BOTH maps. A stale divergence entry (prefix removed from
// either source) silently weakens the gate.
for prefix := range acceptedDivergences {
if _, ok := shellMap[prefix]; !ok {
t.Errorf(
"acceptedDivergences contains prefix %q but derive-provider.sh no longer lists it. "+
"Remove the entry from acceptedDivergences{} in this test.",
prefix,
)
}
if _, ok := goMap[prefix]; !ok {
t.Errorf(
"acceptedDivergences contains prefix %q but deriveProviderFromModelSlug no longer lists it. "+
"Remove the entry from acceptedDivergences{} in this test.",
prefix,
)
}
}
}
// vendoredShellPath is the testdata snapshot of upstream
// derive-provider.sh. The path is relative to the test package
// directory (which is what `go test` sets as cwd). See the file's
// header for the refresh procedure when upstream changes.
const vendoredShellPath = "testdata/derive-provider.sh"
// goSourcePath is the file containing deriveProviderFromModelSlug.
// Relative to the test package directory.
const goSourcePath = "workspace_provision.go"
// loadShellPrefixMap parses derive-provider.sh and returns a
// map[prefix]provider for every case clause. Aliases inside a single
// `pat1/*|pat2/*)` clause expand to one map entry per alias, both
// pointing at the same provider.
//
// Stops at the first `*)` (the catch-all) and ignores it — the
// catch-all maps to PROVIDER="auto" which has no Go counterpart by
// design (deriveProviderFromModelSlug returns "" for unknowns and
// lets the shell's *=auto branch decide at runtime).
//
// Ambiguity: case clauses whose body branches on env vars (openai/*,
// nousresearch/*) are still extracted as the FIRST PROVIDER= literal
// inside the body. The shell's full conditional logic is documented
// via the acceptedDivergences map in this file rather than re-encoded
// in the parser, because re-encoding sh `if` semantics in regex is a
// fool's errand — the divergences are stable and small enough to
// hardcode.
func loadShellPrefixMap(t *testing.T) map[string]string {
t.Helper()
raw, err := os.ReadFile(vendoredShellPath)
if err != nil {
t.Fatalf("read %s: %v (refresh from upstream — see file header)", vendoredShellPath, err)
}
// Locate the case statement body so we don't accidentally match
// PROVIDER= assignments above the case (the HERMES_INFERENCE_PROVIDER
// override + the empty-model fallback both write PROVIDER= before
// the case). Upstream renamed the case variable to ${_HERMES_MODEL}
// in v0.12.0 (the resolved value of HERMES_INFERENCE_MODEL with a
// HERMES_DEFAULT_MODEL legacy fallback); accept either spelling so
// this test survives a future rename.
caseStart := regexp.MustCompile(`(?m)^case\s+"\$\{(_?HERMES(?:_DEFAULT|_INFERENCE)?_MODEL)\}"\s+in\s*$`)
startLoc := caseStart.FindIndex(raw)
if startLoc == nil {
t.Fatalf("could not locate `case \"${...HERMES...MODEL}\" in` in %s — shell file shape changed; rebuild parser", vendoredShellPath)
}
caseEnd := regexp.MustCompile(`(?m)^esac\s*$`)
endLoc := caseEnd.FindIndex(raw[startLoc[1]:])
if endLoc == nil {
t.Fatalf("could not locate `esac` after the case statement in %s — shell file shape changed", vendoredShellPath)
}
body := string(raw[startLoc[1] : startLoc[1]+endLoc[0]])
out := map[string]string{}
// Pattern A: single-line clauses like
// minimax-cn/*) PROVIDER="minimax-cn" ;;
// alibaba/*|dashscope/*|qwen/*) PROVIDER="alibaba" ;;
// Capture group 1 is the patterns (e.g. `minimax-cn/*` or
// `alibaba/*|dashscope/*|qwen/*`); group 2 is the provider literal.
singleLine := regexp.MustCompile(`(?m)^\s*([a-zA-Z0-9_./*|\-]+)\)\s*PROVIDER="([^"]+)"\s*;;`)
// Pattern B: multi-line clauses like
// openai/*)
// if [ -n "${OPENAI_API_KEY:-}" ]; then
// PROVIDER="custom"
// ...
// We capture the patterns and the FIRST PROVIDER= that follows
// (before the next `;;`). The acceptedDivergences map handles the
// fact that the runtime branching can pick a different value.
multiLine := regexp.MustCompile(`(?ms)^\s*([a-zA-Z0-9_./*|\-]+)\)\s*\n(.*?);;`)
addEntry := func(patterns, provider string) {
// Skip the `*)` catch-all — it has no Go counterpart by design.
if strings.TrimSpace(patterns) == "*" {
return
}
for _, alt := range strings.Split(patterns, "|") {
alt = strings.TrimSpace(alt)
// Each alternative is `<prefix>/*` — strip the trailing `/*`.
alt = strings.TrimSuffix(alt, "/*")
if alt == "" {
continue
}
// First write wins — a single-line match outranks a multi-line
// fallback for the same patterns block (defensive; the regexes
// shouldn't overlap on the same line in practice).
if _, exists := out[alt]; !exists {
out[alt] = provider
}
}
}
// Run single-line first so it claims its lines before the multi-line
// pass sees them.
consumed := map[int]bool{}
for _, m := range singleLine.FindAllStringSubmatchIndex(body, -1) {
addEntry(body[m[2]:m[3]], body[m[4]:m[5]])
// Mark every line touched so multi-line pass can skip it.
for i := m[0]; i < m[1]; i++ {
consumed[i] = true
}
}
for _, m := range multiLine.FindAllStringSubmatchIndex(body, -1) {
// Skip if the start of this match overlaps a single-line clause.
if consumed[m[0]] {
continue
}
patterns := body[m[2]:m[3]]
clauseBody := body[m[4]:m[5]]
// Extract the FIRST PROVIDER="..." from the clause body.
firstProvider := regexp.MustCompile(`PROVIDER="([^"]+)"`).FindStringSubmatch(clauseBody)
if firstProvider == nil {
t.Errorf("multi-line case clause for %q has no PROVIDER= literal — shell file shape changed; rebuild parser", patterns)
continue
}
addEntry(patterns, firstProvider[1])
}
return out
}
// loadGoPrefixMap parses workspace_provision.go and walks the AST to
// extract the prefix→provider mapping from deriveProviderFromModelSlug's
// switch statement.
//
// Each case clause's string-literal labels become map keys, all
// pointing at the provider returned by that case body's `return "..."`
// statement. A clause like `case "alibaba", "dashscope", "qwen":
// return "alibaba"` produces three map entries.
//
// Skips the default clause (returns ""). Skips any case clause whose
// body's first statement isn't a single `return STRING_LITERAL` — those
// would need their own divergence handling and don't currently exist
// in the function.
func loadGoPrefixMap(t *testing.T) map[string]string {
t.Helper()
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, goSourcePath, nil, parser.ParseComments)
if err != nil {
t.Fatalf("parse %s: %v", goSourcePath, err)
}
var fn *ast.FuncDecl
for _, decl := range file.Decls {
f, ok := decl.(*ast.FuncDecl)
if !ok {
continue
}
if f.Name.Name == "deriveProviderFromModelSlug" {
fn = f
break
}
}
if fn == nil {
t.Fatalf("could not find deriveProviderFromModelSlug in %s — function renamed/removed; this gate's invariant has been violated", goSourcePath)
}
// Walk the function body for the SwitchStmt.
var sw *ast.SwitchStmt
ast.Inspect(fn.Body, func(n ast.Node) bool {
if s, ok := n.(*ast.SwitchStmt); ok {
sw = s
return false
}
return true
})
if sw == nil {
t.Fatalf("no switch statement found in deriveProviderFromModelSlug — function shape changed; rebuild parser")
}
out := map[string]string{}
for _, stmt := range sw.Body.List {
clause, ok := stmt.(*ast.CaseClause)
if !ok {
continue
}
// Default clause has no list — skip.
if len(clause.List) == 0 {
continue
}
// Find the first return statement in the clause body.
var ret *ast.ReturnStmt
for _, bodyStmt := range clause.Body {
if r, ok := bodyStmt.(*ast.ReturnStmt); ok {
ret = r
break
}
}
if ret == nil || len(ret.Results) != 1 {
t.Errorf("case clause at %s has no single-value return — function shape changed; gate may be incomplete",
fset.Position(clause.Pos()))
continue
}
lit, ok := ret.Results[0].(*ast.BasicLit)
if !ok || lit.Kind != token.STRING {
t.Errorf("case clause at %s returns a non-literal — gate cannot extract provider value",
fset.Position(clause.Pos()))
continue
}
provider, err := strconv.Unquote(lit.Value)
if err != nil {
t.Errorf("case clause at %s has unparseable string literal %q: %v",
fset.Position(clause.Pos()), lit.Value, err)
continue
}
for _, expr := range clause.List {
lbl, ok := expr.(*ast.BasicLit)
if !ok || lbl.Kind != token.STRING {
t.Errorf("case clause at %s has a non-string-literal label — gate cannot extract prefix",
fset.Position(clause.Pos()))
continue
}
prefix, err := strconv.Unquote(lbl.Value)
if err != nil {
t.Errorf("case clause at %s has unparseable label literal %q: %v",
fset.Position(clause.Pos()), lbl.Value, err)
continue
}
out[prefix] = provider
}
}
return out
}
// TestDeriveProviderDrift_ShellParserIsSane is a guard test: the shell
// parser is regex-based, so we sanity-check that it actually finds the
// well-known prefixes documented in derive-provider.sh's header
// comment. If this test passes but the main drift test reports
// missing prefixes, the bug is almost certainly in the regex (not in
// the production code).
func TestDeriveProviderDrift_ShellParserIsSane(t *testing.T) {
t.Parallel()
shellMap := loadShellPrefixMap(t)
// Anchor prefixes — these have lived in derive-provider.sh since it
// was first introduced. If the parser can't find them, it's broken.
mustHave := map[string]string{
"anthropic": "anthropic",
"minimax": "minimax",
"minimax-cn": "minimax-cn",
"openrouter": "openrouter",
"custom": "custom",
"alibaba": "alibaba", // in an alias group with dashscope/qwen
"dashscope": "alibaba", // ditto
"qwen": "alibaba", // ditto
"openai": "custom", // multi-line; first PROVIDER= is "custom"
"nousresearch": "nous", // multi-line; first PROVIDER= is "nous"
}
missing := []string{}
wrong := []string{}
for prefix, want := range mustHave {
got, ok := shellMap[prefix]
if !ok {
missing = append(missing, prefix)
continue
}
if got != want {
wrong = append(wrong, prefix+" got="+got+" want="+want)
}
}
sort.Strings(missing)
sort.Strings(wrong)
if len(missing) > 0 {
t.Errorf("shell parser failed to extract anchor prefixes: %v", missing)
}
if len(wrong) > 0 {
t.Errorf("shell parser extracted wrong values for anchor prefixes: %v", wrong)
}
}
@@ -237,7 +237,17 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
var peers []map[string]interface{}
// Siblings
// Siblings — workspaces sharing the caller's parent.
//
// #1953 cross-tenant isolation: the OLD code's else-branch handled the
// org-root caller (parent_id IS NULL) by returning EVERY workspace with
// parent_id IS NULL — i.e. every other tenant's org root, since the
// workspaces table has no org_id column. That leaked peer identities/URLs
// across tenants. An org root has no siblings inside its own org (each
// tenant is a distinct org root), so the org-root caller now gets an empty
// sibling set; its real peers are its children, returned below. Only the
// parent_id-bound branch enumerates siblings, and that is already scoped to
// one parent (one tenant).
if parentID.Valid {
siblings, _ := queryPeerMaps(`
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
@@ -246,14 +256,6 @@ func (h *DiscoveryHandler) Peers(c *gin.Context) {
FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
parentID.String, workspaceID)
peers = append(peers, siblings...)
} else {
siblings, _ := queryPeerMaps(`
SELECT w.id, w.name, COALESCE(w.role, ''), w.tier, w.status,
COALESCE(w.agent_card, 'null'::jsonb), COALESCE(w.url, ''),
w.parent_id, w.active_tasks
FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
workspaceID)
peers = append(peers, siblings...)
}
// Children — exclude self defensively. A child row whose parent_id
@@ -223,10 +223,10 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
// Siblings (other root-level workspaces) — none
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id IS NULL AND w.id != \\$1").
WithArgs("ws-root-alone").
WillReturnRows(sqlmock.NewRows(peerCols))
// #1953: an org-root caller (parent_id IS NULL) now issues NO sibling
// query at all. The old `WHERE w.parent_id IS NULL` sibling read returned
// EVERY tenant's org root (cross-tenant leak); an org root has no siblings
// inside its own org, so the handler skips the sibling read entirely.
// Children — none. #383 added explicit `w.id != $2` self-filter.
mock.ExpectQuery("SELECT w.id, w.name.*WHERE w.parent_id = \\$1 AND w.id != \\$2").
@@ -255,22 +255,20 @@ func TestExtended_SecretsListEmpty(t *testing.T) {
// ---------- TestSecretsSet (Extended) ----------
func TestExtended_SecretsSet(t *testing.T) {
// internal#691: the per-workspace strip gate now defaults to platform_managed
// on empty MOLECULE_LLM_BILLING_MODE (closed default). This test's intent is
// the happy path of persisting a vendor key, so put the org into byok which
// matches the pre-#691 implicit behavior of an unset env.
t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok")
// internal#718 P2-B: the per-workspace strip gate keys off the DERIVED mode
// (org rung retired). This test's intent is the happy path of persisting a
// vendor key on a byok workspace; the realistic way a workspace is byok for
// a direct vendor-key write is an explicit operator override (the escape
// hatch the reject error itself points to: PUT /admin/.../llm-billing-mode).
// The override short-circuits the resolver to byok in a single read, so the
// bypass-list check is skipped and the write proceeds.
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
mock := setupTestDB(t)
handler := NewSecretsHandler(nil)
// internal#691: secrets.Set now consults ResolveLLMBillingMode before the
// strip gate. Mock returns no row → resolver falls through to the org
// default (byok, set via t.Setenv above) → bypass-list check is skipped
// and the write proceeds. This pattern is the test-side mirror of the
// real-prod fall-through behavior for a fresh workspace with no override.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs("22222222-2222-2222-2222-222222222222").
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}))
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
// Expect INSERT (encrypted value is dynamic, use AnyArg)
mock.ExpectExec("INSERT INTO workspace_secrets").
@@ -453,6 +451,14 @@ func TestExtended_DiscoverMissingHeader(t *testing.T) {
// ---------- TestPeers (Extended) ----------
// TestExtended_Peers verifies a root-level (org-root) workspace's peer view.
//
// #1953: previously a root-level caller issued `WHERE w.parent_id IS NULL`
// for siblings, which returned EVERY other tenant's org root as a "peer"
// (cross-tenant leak, since the workspaces table has no org_id column). After
// the fix an org root has no cross-tenant siblings; its only peers are its own
// children. This test asserts the child is returned and that NO sibling query
// is issued (no `parent_id IS NULL` read).
func TestExtended_Peers(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
@@ -463,17 +469,14 @@ func TestExtended_Peers(t *testing.T) {
WithArgs("ws-peer").
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
// Expect root-level siblings query (parent IS NULL, excluding self)
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-peer").
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
AddRow("ws-sibling", "Sibling Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", nil, 0))
// NO root-level sibling query is issued for an org-root caller anymore.
// Expect children query (workspaces with parent_id = ws-peer, excluding self)
// Query now binds (parent_id, self_id) for the self-filter guard added in #383.
// Children query (workspaces with parent_id = ws-peer, excluding self).
// Query binds (parent_id, self_id) for the self-filter guard added in #383.
mock.ExpectQuery("SELECT w.id, w.name").
WithArgs("ws-peer", "ws-peer").
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}))
WillReturnRows(sqlmock.NewRows([]string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}).
AddRow("ws-child", "Child Agent", "worker", 1, "online", []byte("null"), "http://localhost:9001", "ws-peer", 0))
// No parent query since workspace is root-level
@@ -493,10 +496,10 @@ func TestExtended_Peers(t *testing.T) {
t.Fatalf("failed to parse response: %v", err)
}
if len(resp) != 1 {
t.Fatalf("expected 1 peer, got %d", len(resp))
t.Fatalf("expected 1 peer (the child), got %d", len(resp))
}
if resp[0]["name"] != "Sibling Agent" {
t.Errorf("expected peer name 'Sibling Agent', got %v", resp[0]["name"])
if resp[0]["name"] != "Child Agent" {
t.Errorf("expected peer name 'Child Agent', got %v", resp[0]["name"])
}
if err := mock.ExpectationsWereMet(); err != nil {
@@ -12,12 +12,12 @@ import (
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/ws"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
"github.com/DATA-DOG/go-sqlmock"
"github.com/alicebob/miniredis/v2"
"github.com/gin-gonic/gin"
"github.com/redis/go-redis/v9"
@@ -158,9 +158,11 @@ func allowLoopbackForTest(t *testing.T) {
// handler in the 2026-04-18 restructure but the tests never caught up,
// leaving Platform (Go) CI red for weeks.
func expectBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id = \$1`).
// Multi-period (#49): checkWorkspaceBudget reads budget_limits jsonb. An
// empty map → no limits → returns early (no spend query), enforcement skipped.
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs(workspaceID).
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
}
// ---------- TestRegisterHandler ----------
@@ -43,10 +43,36 @@ import (
"database/sql"
"errors"
"fmt"
"log"
"sync"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
)
// providerManifest is the parsed provider registry, loaded once. The registry
// is embedded (go:embed, no network) and immutable for the process lifetime, so
// a single Load is safe to memoize. A load failure is cached too (registryErr):
// it can only happen on a malformed embedded YAML, which is a build-time defect
// the verify-providers-gen + sync gates already catch, so failing closed
// (treat as "cannot derive" → platform default) is correct and we don't retry.
var (
providerRegistryOnce sync.Once
providerRegistryManifest *providers.Manifest
providerRegistryErr error
)
func providerRegistry() (*providers.Manifest, error) {
providerRegistryOnce.Do(func() {
providerRegistryManifest, providerRegistryErr = providers.LoadManifest()
if providerRegistryErr != nil {
log.Printf("llm_billing_mode: FATAL — provider registry failed to load: %v (billing will default-closed to platform_managed)", providerRegistryErr)
}
})
return providerRegistryManifest, providerRegistryErr
}
// Constants mirror molecule-controlplane/internal/credits/llm_billing.go.
// Kept as string literals (not imports) because workspace-server has no
// build-time dependency on the CP module; the values are stable wire
@@ -67,6 +93,19 @@ const (
BillingModeSourceWorkspaceOverride BillingModeSource = "workspace_override"
BillingModeSourceOrgDefault BillingModeSource = "org_default"
BillingModeSourceConstantFallback BillingModeSource = "constant_fallback"
// BillingModeSourceDerivedProvider means the mode was DERIVED from the
// workspace's (runtime, model) via the provider registry — the SSOT
// (internal#718 P2-B). IsPlatform(derived) → platform_managed, else byok.
// This is the highest-precedence source after an explicit operator override
// and SUPERSEDES the prior stored-LLM_PROVIDER read (#1966).
BillingModeSourceDerivedProvider BillingModeSource = "derived_provider"
// BillingModeSourceDerivedDefault means the registry could not derive a
// provider for the (runtime, model) — no model, unknown runtime,
// unregistered/ambiguous model — so the mode defaulted closed to
// platform_managed (CTO-confirmed "unset → platform default"). Distinct from
// derived_provider so operators can see "we defaulted" vs "we derived
// platform".
BillingModeSourceDerivedDefault BillingModeSource = "derived_default"
)
// BillingModeResolution is the structured answer the admin GET route returns
@@ -74,11 +113,18 @@ const (
// shape, so the resolver test asserts both the mode AND the source per case
// (catches a bug where the right mode is returned via the wrong layer).
type BillingModeResolution struct {
WorkspaceID string `json:"workspace_id"`
ResolvedMode string `json:"resolved_mode"`
WorkspaceOverride *string `json:"workspace_override"` // nil = inherit
OrgDefault string `json:"org_default"` // already default-closed by CP
Source BillingModeSource `json:"source"`
WorkspaceID string `json:"workspace_id"`
ResolvedMode string `json:"resolved_mode"`
WorkspaceOverride *string `json:"workspace_override"` // nil = inherit
OrgDefault string `json:"org_default"` // RETIRED as a billing source (internal#718 P2-B); always platform_managed, kept for wire-compat
Source BillingModeSource `json:"source"`
// ProviderSelection surfaces the DERIVED provider name (internal#718 P2-B)
// when the mode came from the registry derivation — the literal provider the
// (runtime, model) resolved to (e.g. "platform", "kimi-coding", "openai"), or
// the raw model id when derivation failed. nil when an explicit operator
// override or the empty-id default decided. Lets the admin route answer "why
// is this workspace byok?" with the derived provider, not a stored value.
ProviderSelection *string `json:"provider_selection"`
}
// isKnownBillingMode is the enum-recognizer for the resolver's default-closed
@@ -95,24 +141,137 @@ func isKnownBillingMode(s string) bool {
}
}
// normalizeOrgDefault applies the same default-closed contract to the
// org-level input as the workspace override gets. The org_default arrives
// from tenant_config which already COALESCEs NULL → platform_managed at the
// CP SQL layer, but we DO NOT trust that contract here — if CP regresses or
// the tenant_config env wasn't populated (race on boot), we still default-
// close. Same principle: never honor a garbled value.
func normalizeOrgDefault(orgMode string) string {
if isKnownBillingMode(orgMode) {
return orgMode
// readWorkspaceBillingOverride reads the OPTIONAL explicit operator override
// (workspaces.llm_billing_mode). Returns:
//
// (mode, true, nil) — a recognized override is set → operator pinned the mode
// ("", false, nil) — NULL / garbled / row-missing → no explicit override
// ("", false, err) — DB error → caller defaults closed + propagates
//
// internal#718 P2-B retires the org rung; this column is the ONLY stored
// billing signal that survives, and ONLY as an explicit override on top of the
// derived provider (CTO 2026-05-27).
func readWorkspaceBillingOverride(ctx context.Context, workspaceID string) (string, bool, error) {
var wsOverride sql.NullString
err := db.DB.QueryRowContext(ctx,
`SELECT llm_billing_mode FROM workspaces WHERE id = $1`,
workspaceID,
).Scan(&wsOverride)
switch {
case errors.Is(err, sql.ErrNoRows):
return "", false, nil
case err != nil:
return "", false, fmt.Errorf("resolve workspace llm_billing_mode override for %s: %w", workspaceID, err)
}
return LLMBillingModePlatformManaged
if wsOverride.Valid && isKnownBillingMode(wsOverride.String) {
return wsOverride.String, true, nil
}
return "", false, nil
}
// ResolveLLMBillingMode is the canonical resolver. Every code path that
// previously gated on `os.Getenv("MOLECULE_LLM_BILLING_MODE") == "platform_managed"`
// must call this instead and gate on the returned mode. The architectural
// test (resolver_ast_test.go) asserts there is no remaining call site of
// the old shape outside the resolver-input wiring.
// ResolveLLMBillingModeDerived is the SSOT billing-mode resolver (internal#718
// P2-B). It DERIVES the provider from (runtime, model) via the provider
// registry and decides platform-vs-byok from IsPlatform(derived) — it does NOT
// read a stored LLM_PROVIDER (superseding #1966's stored-read approach) and
// does NOT read the org rung (retired, CTO 2026-05-27).
//
// Precedence (highest first):
//
// 1. EXPLICIT operator override (workspaces.llm_billing_mode, a recognized
// value). The only stored billing signal that survives — an escape hatch,
// not the primary signal.
// 2. DERIVE: providers.DeriveProvider(runtime, model, availableAuthEnv).
// - resolves to the closed `platform` provider → platform_managed
// - resolves to any other (BYOK/third-party) provider → byok ← THE FIX
// 3. DEFAULT-CLOSED: derive fails (no model, unknown runtime, unregistered or
// ambiguous model) → platform_managed (CTO "unset → platform default"). A
// derive failure NEVER silently flips a workspace to byok (which would
// strip the platform creds it may legitimately need).
//
// availableAuthEnv is the set of auth-env-var NAMES present for the workspace
// (never secret values) — the same disambiguation input DeriveProvider uses to
// split anthropic-oauth from anthropic-api. May be nil.
//
// A returned error never prevents a decision: ResolvedMode is always a valid
// enum value (default-closed). The error is informational (log + surface).
func ResolveLLMBillingModeDerived(ctx context.Context, workspaceID, runtime, model string, availableAuthEnv []string) (BillingModeResolution, error) {
res := BillingModeResolution{
WorkspaceID: workspaceID,
// OrgDefault is retired as a billing source (internal#718 P2-B). Kept on
// the struct for wire-compat (admin route / CP mirror) but always the
// closed constant — never consulted in the decision.
OrgDefault: LLMBillingModePlatformManaged,
}
// Pre-provision context (no workspace row yet): no override to read, default
// closed. (DeriveProvider could still run from the passed runtime/model, but
// the no-id path historically does no DB work and the strip gate only runs
// post-create, so keep it a pure default to preserve that contract.)
if workspaceID == "" {
res.ResolvedMode = LLMBillingModePlatformManaged
res.Source = BillingModeSourceDerivedDefault
return res, nil
}
// Precedence 1: explicit operator override.
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
// DB error — default closed AND propagate (never flip on a transient error).
res.ResolvedMode = LLMBillingModePlatformManaged
res.Source = BillingModeSourceConstantFallback
return res, err
} else if ok {
m := mode
res.WorkspaceOverride = &m
res.ResolvedMode = mode
res.Source = BillingModeSourceWorkspaceOverride
return res, nil
}
// Precedence 2: DERIVE the provider from (runtime, model).
manifest, mErr := providerRegistry()
if mErr != nil || manifest == nil {
// Registry unavailable (malformed embedded YAML — a build-time defect the
// gates catch). Default closed.
res.ResolvedMode = LLMBillingModePlatformManaged
res.Source = BillingModeSourceDerivedDefault
return res, mErr
}
provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv)
if dErr != nil {
// No model / unknown runtime / unregistered / ambiguous → default closed.
// NOT an error to the caller: an unregistered model is a legitimate
// "we can't say it's BYOK, so bill the platform default" outcome, and the
// only-registered gate at the create/config API is where an unregistered
// model is rejected loudly. Here we just fail closed for safety.
res.ResolvedMode = LLMBillingModePlatformManaged
res.Source = BillingModeSourceDerivedDefault
sel := model
if sel != "" {
res.ProviderSelection = &sel
}
return res, nil
}
derivedName := provider.Name
res.ProviderSelection = &derivedName
res.Source = BillingModeSourceDerivedProvider
if provider.IsPlatform() {
res.ResolvedMode = LLMBillingModePlatformManaged
} else {
// A specific (non-platform) vendor was derived → bring-your-own-key.
res.ResolvedMode = LLMBillingModeBYOK
}
return res, nil
}
// ResolveLLMBillingMode is the legacy-signature resolver retained for callers
// that do not have (runtime, model) in hand (the admin GET/PUT route and the
// secrets remote-pull path). It reads the workspace's stored runtime + model +
// available auth env from the DB and delegates to the DERIVED resolver
// (internal#718 P2-B) — the orgMode parameter is RETIRED (the org rung is no
// longer a billing source) and is ignored; it stays in the signature only to
// avoid churning the two callers in this PR. The architectural test asserts no
// remaining code path gates on os.Getenv("MOLECULE_LLM_BILLING_MODE") for the
// strip decision (that env is no longer read into the decision at all).
//
// Returning an error does NOT prevent the caller from making a decision —
// the returned mode is always a valid enum value (default-closed to
@@ -120,75 +279,160 @@ func normalizeOrgDefault(orgMode string) string {
// branch. The error is informational: log it, surface it to operators, but
// the strip-gate decision is already safe.
func ResolveLLMBillingMode(ctx context.Context, workspaceID, orgMode string) (BillingModeResolution, error) {
res := BillingModeResolution{
WorkspaceID: workspaceID,
OrgDefault: normalizeOrgDefault(orgMode),
}
_ = orgMode // org rung retired (internal#718 P2-B); parameter ignored.
if workspaceID == "" {
// No workspace ID = pre-provision context (templating, validation).
// Resolve against the org default only, no DB read.
res.ResolvedMode = res.OrgDefault
res.Source = BillingModeSourceOrgDefault
if !isKnownBillingMode(orgMode) {
// Org default was garbled/NULL and we clamped to platform_managed.
// Mark the source as constant_fallback so the operator can see
// the clamp happened, not that the org "really" said platform_managed.
res.Source = BillingModeSourceConstantFallback
}
return res, nil
// Pre-provision context (templating, validation): default closed, no DB.
return ResolveLLMBillingModeDerived(ctx, "", "", "", nil)
}
var wsOverride sql.NullString
err := db.DB.QueryRowContext(ctx,
`SELECT llm_billing_mode FROM workspaces WHERE id = $1`,
// Precedence 1: explicit operator override. Read it FIRST so an overridden
// workspace short-circuits without the extra runtime/secrets reads (and so
// the query order is override → runtime → secrets, matching the derived
// resolver's own override-first precedence).
if mode, ok, err := readWorkspaceBillingOverride(ctx, workspaceID); err != nil {
return BillingModeResolution{
WorkspaceID: workspaceID,
OrgDefault: LLMBillingModePlatformManaged,
ResolvedMode: LLMBillingModePlatformManaged,
Source: BillingModeSourceConstantFallback,
}, err
} else if ok {
m := mode
return BillingModeResolution{
WorkspaceID: workspaceID,
OrgDefault: LLMBillingModePlatformManaged,
ResolvedMode: mode,
WorkspaceOverride: &m,
Source: BillingModeSourceWorkspaceOverride,
}, nil
}
// Precedence 2: DERIVE. Read the stored (runtime, model, available-auth-env)
// so the derived resolver can DeriveProvider for callers that don't carry
// them (admin route, secrets remote-pull). A read miss/error degrades
// gracefully: pass the empty/partial inputs through — DeriveProvider then
// errors and the derived resolver defaults closed to platform_managed.
//
// ResolveLLMBillingModeDerived re-reads the override (NULL again here) before
// deriving; that one extra cheap read keeps the derived resolver a complete,
// independently-callable SSOT rather than splitting its precedence across two
// functions.
runtime, model, authEnv := readWorkspaceDeriveInputs(ctx, workspaceID)
return ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, authEnv)
}
// readWorkspaceDeriveInputs loads the workspace's stored runtime + selected
// model + the auth-env-var NAMES present in its secrets — the inputs
// DeriveProvider needs. Best-effort: any read error returns whatever was
// gathered (the derived resolver fails closed on incomplete inputs). The model
// is the MODEL workspace_secret (the canvas-picked id, written by setModelSecret
// / Create); runtime is the workspaces.runtime column (defaults claude-code).
// availableAuthEnv is the subset of secret KEYS that are recognized provider
// auth-env names (never values), so DeriveProvider's auth-env tie-break can fire
// the same way it does on the provision path.
func readWorkspaceDeriveInputs(ctx context.Context, workspaceID string) (runtime, model string, availableAuthEnv []string) {
var rt sql.NullString
if err := db.DB.QueryRowContext(ctx,
`SELECT runtime FROM workspaces WHERE id = $1`, workspaceID,
).Scan(&rt); err != nil {
if !errors.Is(err, sql.ErrNoRows) {
log.Printf("llm_billing_mode: read runtime for %s: %v (deriving with empty runtime)", workspaceID, err)
}
}
runtime = rt.String
if runtime == "" {
// Mirror the DB column default so an unset runtime still derives.
runtime = "claude-code"
}
// Gather model + auth-env-name keys from workspace_secrets in one pass.
authSet := authEnvNameSet()
rows, err := db.DB.QueryContext(ctx,
`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1`,
workspaceID,
).Scan(&wsOverride)
switch {
case errors.Is(err, sql.ErrNoRows):
// Workspace row missing — concurrent delete, or pre-create call. Don't
// silently flip; fall through to org default. Source stays org_default
// so operators can see the row-missing case is being handled as a
// fallback, not a workspace-explicit decision.
res.ResolvedMode = res.OrgDefault
res.Source = BillingModeSourceOrgDefault
if !isKnownBillingMode(orgMode) {
res.Source = BillingModeSourceConstantFallback
)
if err != nil {
log.Printf("llm_billing_mode: read secrets for %s: %v (deriving with no model/auth-env)", workspaceID, err)
return runtime, model, availableAuthEnv
}
defer rows.Close()
for rows.Next() {
var k string
var v []byte
var ver int
if rows.Scan(&k, &v, &ver) != nil {
continue
}
if k == "MODEL" {
if dec, derr := crypto.DecryptVersioned(v, ver); derr == nil {
model = string(dec)
}
continue
}
// Only the KEY matters for auth-env disambiguation (the value is the
// secret; we never decrypt it for this purpose). Record recognized
// provider auth-env names.
if _, ok := authSet[k]; ok {
availableAuthEnv = append(availableAuthEnv, k)
}
return res, nil
case err != nil:
// DB error — default-closed to platform_managed AND propagate the
// error so operators get a structured log line. The caller is
// expected to log and continue with the safe default.
res.ResolvedMode = LLMBillingModePlatformManaged
res.Source = BillingModeSourceConstantFallback
return res, fmt.Errorf("resolve workspace llm_billing_mode for %s: %w", workspaceID, err)
}
return runtime, model, availableAuthEnv
}
if wsOverride.Valid && isKnownBillingMode(wsOverride.String) {
mode := wsOverride.String
res.WorkspaceOverride = &mode
res.ResolvedMode = mode
res.Source = BillingModeSourceWorkspaceOverride
return res, nil
}
// authEnvNameSet is the union of every provider's auth_env names in the
// registry — the recognized set readWorkspaceDeriveInputs filters secret keys
// against. Loaded once from the registry so it stays in sync with the SSOT (no
// hardcoded auth-env vocabulary). Registry-load failure yields an empty set
// (derive then runs without the auth-env tie-break, which only matters for the
// oauth-vs-api overlap; safe — it errors to default-closed rather than guessing).
var (
authEnvNameSetOnce sync.Once
authEnvNameSetVal map[string]struct{}
)
// Override row present but the value is NULL or garbled. Fall through.
// If the value was non-NULL but garbled (CHECK constraint should prevent
// this, but defense in depth — a future migration could relax the check
// or another path could write the column directly), surface the raw
// override value so operators can spot the corrupt row.
if wsOverride.Valid {
raw := wsOverride.String
res.WorkspaceOverride = &raw
func authEnvNameSet() map[string]struct{} {
authEnvNameSetOnce.Do(func() {
authEnvNameSetVal = map[string]struct{}{}
m, err := providerRegistry()
if err != nil || m == nil {
return
}
for _, p := range m.Providers {
for _, e := range p.AuthEnv {
authEnvNameSetVal[e] = struct{}{}
}
}
})
return authEnvNameSetVal
}
// availableAuthEnvNames returns the recognized provider auth-env-var NAMES
// present (non-empty) in envVars — the DeriveProvider auth-env tie-break input.
// Never returns secret VALUES, only the env-var names. Used by the provision
// path (applyPlatformManagedLLMEnv), which already has the workspace env in
// hand, so it derives without a secrets DB round-trip.
func availableAuthEnvNames(envVars map[string]string) []string {
authSet := authEnvNameSet()
var out []string
for k, v := range envVars {
if v == "" {
continue
}
if _, ok := authSet[k]; ok {
out = append(out, k)
}
}
res.ResolvedMode = res.OrgDefault
res.Source = BillingModeSourceOrgDefault
if !isKnownBillingMode(orgMode) {
res.Source = BillingModeSourceConstantFallback
return out
}
// derefOrEmpty returns the pointed-to string or "" for a nil pointer. Used in
// log lines that surface an optional *string field.
func derefOrEmpty(s *string) string {
if s == nil {
return ""
}
return res, nil
return *s
}
// SetWorkspaceLLMBillingMode writes the override column. Pass mode=="" to
@@ -0,0 +1,232 @@
package handlers
// llm_billing_mode_derived_test.go — tests for the DERIVED billing-mode
// resolver (internal#718 P2-B). The platform-vs-byok decision now DERIVES the
// provider from (runtime, model) via the provider registry and keys off
// IsPlatform(derived) — it does NOT read a stored LLM_PROVIDER (supersedes
// #1966's stored-read approach) and does NOT read the org rung (retired,
// CTO 2026-05-27). `workspaces.llm_billing_mode` survives ONLY as an optional
// explicit operator override (first precedence).
//
// This file pins the explicit BEHAVIOR DELTA the RFC's P2 calls out:
// - platform-derived (or unset → platform default) → platform_managed (UNCHANGED)
// - non-platform-derived → byok (THE FIX — the Reno leak class)
// - explicit override → wins over derive
// - derive error / unregistered → platform_managed (default-closed)
import (
"context"
"errors"
"testing"
"github.com/DATA-DOG/go-sqlmock"
)
// expectOverrideQuery sets up the workspaces.llm_billing_mode override read
// (first precedence). value=="" means NULL (no override).
func expectOverrideQuery(m sqlmock.Sqlmock, wsID, value string) {
rows := sqlmock.NewRows([]string{"llm_billing_mode"})
if value == "" {
rows.AddRow(nil)
} else {
rows.AddRow(value)
}
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(rows)
}
func TestResolveLLMBillingModeDerived_BehaviorDelta(t *testing.T) {
ctx := context.Background()
const wsID = "33333333-3333-3333-3333-333333333333"
type tc struct {
name string
runtime string
model string
authEnv []string
override string // "" = NULL override (no explicit operator override)
wantMode string
wantSource BillingModeSource
wantErr bool
}
cases := []tc{
{
// PLATFORM-DERIVED → platform_managed (UNCHANGED). claude-code +
// a platform-namespaced model id derives to the closed `platform`
// provider → IsPlatform → platform_managed.
name: "platform_derived_keeps_platform_managed_UNCHANGED",
runtime: "claude-code",
model: "anthropic/claude-opus-4-7",
override: "",
wantMode: LLMBillingModePlatformManaged,
wantSource: BillingModeSourceDerivedProvider,
},
{
// NON-PLATFORM-DERIVED → byok (THE FIX). claude-code + the
// kimi-coding-native model derives to the non-platform kimi-coding
// provider → IsPlatform=false → byok. This is the Reno billing-leak
// class: pre-P2 it resolved platform_managed and ran on platform creds.
name: "non_platform_derived_resolves_byok_THE_FIX",
runtime: "claude-code",
model: "kimi-for-coding",
override: "",
wantMode: LLMBillingModeBYOK,
wantSource: BillingModeSourceDerivedProvider,
},
{
// NON-PLATFORM vendor on codex: gpt-5.5 derives to `openai` (BYOK).
name: "non_platform_openai_codex_byok",
runtime: "codex",
model: "gpt-5.5",
override: "",
wantMode: LLMBillingModeBYOK,
wantSource: BillingModeSourceDerivedProvider,
},
{
// PLATFORM-DERIVED on codex: openai/gpt-5.4 is platform-namespaced.
name: "platform_derived_codex_platform_managed",
runtime: "codex",
model: "openai/gpt-5.4",
override: "",
wantMode: LLMBillingModePlatformManaged,
wantSource: BillingModeSourceDerivedProvider,
},
{
// UNSET model → platform default (CTO-confirmed "unset → platform
// default"). No model means nothing to derive; default-closed.
name: "unset_model_platform_default",
runtime: "claude-code",
model: "",
override: "",
wantMode: LLMBillingModePlatformManaged,
wantSource: BillingModeSourceDerivedDefault,
},
{
// UNREGISTERED model → derive errors → platform default (default-closed,
// NOT a silent byok flip that would strip a workspace's creds).
name: "unregistered_model_derive_error_platform_default",
runtime: "claude-code",
model: "totally-made-up-model-xyz",
override: "",
wantMode: LLMBillingModePlatformManaged,
wantSource: BillingModeSourceDerivedDefault,
},
{
// UNKNOWN runtime → derive errors → platform default (default-closed).
name: "unknown_runtime_platform_default",
runtime: "no-such-runtime",
model: "claude-opus-4-7",
override: "",
wantMode: LLMBillingModePlatformManaged,
wantSource: BillingModeSourceDerivedDefault,
},
{
// EXPLICIT OVERRIDE wins over derive: a non-platform-deriving model
// kept on platform_managed by an operator override (escape hatch).
name: "explicit_override_platform_managed_wins_over_byok_derive",
runtime: "claude-code",
model: "kimi-for-coding", // would derive byok
override: LLMBillingModePlatformManaged,
wantMode: LLMBillingModePlatformManaged,
wantSource: BillingModeSourceWorkspaceOverride,
},
{
// EXPLICIT OVERRIDE byok wins over a platform-deriving model.
name: "explicit_override_byok_wins_over_platform_derive",
runtime: "claude-code",
model: "anthropic/claude-opus-4-7", // would derive platform_managed
override: LLMBillingModeBYOK,
wantMode: LLMBillingModeBYOK,
wantSource: BillingModeSourceWorkspaceOverride,
},
{
// EXPLICIT OVERRIDE disabled wins (no-LLM workspace).
name: "explicit_override_disabled_wins",
runtime: "claude-code",
model: "anthropic/claude-opus-4-7",
override: LLMBillingModeDisabled,
wantMode: LLMBillingModeDisabled,
wantSource: BillingModeSourceWorkspaceOverride,
},
{
// AUTH-ENV disambiguation: claude-code's anthropic-oauth (alias
// model "opus") vs anthropic-api both could match a bare alias; with
// CLAUDE_CODE_OAUTH_TOKEN present it derives anthropic-oauth → byok.
name: "auth_env_disambiguates_oauth_byok",
runtime: "claude-code",
model: "opus",
authEnv: []string{"CLAUDE_CODE_OAUTH_TOKEN"},
override: "",
wantMode: LLMBillingModeBYOK,
wantSource: BillingModeSourceDerivedProvider,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, c.override)
res, err := ResolveLLMBillingModeDerived(ctx, wsID, c.runtime, c.model, c.authEnv)
if (err != nil) != c.wantErr {
t.Fatalf("err: got %v wantErr=%v", err, c.wantErr)
}
if res.ResolvedMode != c.wantMode {
t.Errorf("mode: got %q want %q", res.ResolvedMode, c.wantMode)
}
if res.Source != c.wantSource {
t.Errorf("source: got %q want %q", res.Source, c.wantSource)
}
if !isKnownBillingMode(res.ResolvedMode) {
t.Errorf("post-condition: resolved mode %q not a known enum", res.ResolvedMode)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
})
}
}
// TestResolveLLMBillingModeDerived_OverrideDBError_DefaultClosed asserts a DB
// error reading the override column defaults closed to platform_managed and
// propagates the error — never silently flips a workspace off platform creds.
func TestResolveLLMBillingModeDerived_OverrideDBError_DefaultClosed(t *testing.T) {
ctx := context.Background()
const wsID = "44444444-4444-4444-4444-444444444444"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnError(errors.New("connection refused"))
res, err := ResolveLLMBillingModeDerived(ctx, wsID, "claude-code", "kimi-for-coding", nil)
if err == nil {
t.Fatalf("expected propagated DB error, got nil")
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("default-closed: DB error must resolve platform_managed, got %q", res.ResolvedMode)
}
if res.Source != BillingModeSourceConstantFallback {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceConstantFallback)
}
}
// TestResolveLLMBillingModeDerived_EmptyWorkspaceID_PlatformDefault asserts the
// pre-provision context (no workspace id, no override read) defaults to
// platform_managed without a DB query.
func TestResolveLLMBillingModeDerived_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
ctx := context.Background()
mock := setupTestDB(t) // no query expected
res, err := ResolveLLMBillingModeDerived(ctx, "", "claude-code", "kimi-for-coding", nil)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("empty workspace id must default platform_managed, got %q", res.ResolvedMode)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
@@ -36,10 +36,12 @@ import (
// GetWorkspaceLLMBillingMode handles GET /admin/workspaces/:id/llm-billing-mode.
//
// Reads the workspace override + the org-level default (from the same
// MOLECULE_LLM_BILLING_MODE env var the provisioner reads at strip-gate time —
// keeps the two paths consistent so the GET result matches what the strip
// gate would compute) and returns the structured resolution.
// internal#718 P2-B: the resolution now DERIVES the provider from the
// workspace's stored (runtime, model) via the registry (org rung retired). The
// passed orgMode is ignored by the resolver; it is left here only to avoid
// churning the call signature. The returned resolution matches what the
// provision-time strip gate computes (same derived resolver), so operators see
// the real platform-vs-byok decision + the derived provider in ProviderSelection.
func GetWorkspaceLLMBillingMode(c *gin.Context) {
workspaceID := strings.TrimSpace(c.Param("id"))
if !uuidRegex.MatchString(workspaceID) {
@@ -29,13 +29,42 @@ func init() {
const testWSID = "44444444-4444-4444-4444-444444444444"
func TestGetWorkspaceLLMBillingMode_HappyPath_InheritsOrgDefault(t *testing.T) {
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK)
// expectDeriveShimQueries sets up the three reads the legacy-signature
// ResolveLLMBillingMode shim makes on a no-explicit-override path
// (internal#718 P2-B): the override read (NULL here), the workspaces.runtime
// read, and the workspace_secrets scan (for MODEL + auth-env names). model==""
// means no MODEL secret row.
func expectDeriveShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
nullOverride := func() {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
}
// Order: override(NULL) shim check, runtime, secrets, override(NULL) again
// (the derived resolver re-checks the override as a complete SSOT).
nullOverride()
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow(runtime))
secretRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})
if model != "" {
// encryption_version 0 = plaintext passthrough (crypto.DecryptVersioned).
secretRows.AddRow("MODEL", []byte(model), 0)
}
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(secretRows)
nullOverride()
}
// internal#718 P2-B: org rung retired. A no-override workspace's mode is now
// DERIVED from its stored (runtime, model). A claude-code workspace with a
// non-platform-deriving model (kimi-for-coding) resolves byok via
// derived_provider — NOT the old "inherit org default".
func TestGetWorkspaceLLMBillingMode_HappyPath_DerivesByokFromModel(t *testing.T) {
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) // org env ignored now
mock := setupTestDB(t)
// Workspace has no override → resolver returns org_default = byok.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(testWSID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
expectDeriveShimQueries(mock, testWSID, "claude-code", "kimi-for-coding")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -54,12 +83,15 @@ func TestGetWorkspaceLLMBillingMode_HappyPath_InheritsOrgDefault(t *testing.T) {
if res.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("resolved mode: got %q want %q", res.ResolvedMode, LLMBillingModeBYOK)
}
if res.Source != BillingModeSourceOrgDefault {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want %q", res.Source, BillingModeSourceDerivedProvider)
}
if res.WorkspaceOverride != nil {
t.Errorf("expected nil override, got %v", *res.WorkspaceOverride)
}
if res.ProviderSelection == nil || *res.ProviderSelection != "kimi-coding" {
t.Errorf("expected derived provider kimi-coding, got %v", res.ProviderSelection)
}
}
func TestGetWorkspaceLLMBillingMode_BadUUID_400(t *testing.T) {
@@ -117,9 +149,9 @@ func TestPutWorkspaceLLMBillingMode_ExplicitNullClearsOverride(t *testing.T) {
mock.ExpectExec(`UPDATE workspaces SET llm_billing_mode = NULL WHERE id = \$1`).
WithArgs(testWSID).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(testWSID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
// After clear, the post-write re-resolution DERIVES (internal#718 P2-B):
// no override + no MODEL secret → derived_default → platform_managed.
expectDeriveShimQueries(mock, testWSID, "claude-code", "")
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -142,8 +174,8 @@ func TestPutWorkspaceLLMBillingMode_ExplicitNullClearsOverride(t *testing.T) {
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("post-clear resolved: got %q want %q", res.ResolvedMode, LLMBillingModePlatformManaged)
}
if res.Source != BillingModeSourceOrgDefault {
t.Errorf("post-clear source: got %q want %q", res.Source, BillingModeSourceOrgDefault)
if res.Source != BillingModeSourceDerivedDefault {
t.Errorf("post-clear source: got %q want %q", res.Source, BillingModeSourceDerivedDefault)
}
if res.WorkspaceOverride != nil {
t.Errorf("post-clear override should be nil, got %v", *res.WorkspaceOverride)
@@ -0,0 +1,374 @@
package handlers
// llm_billing_mode_provision_parity_test.go — molecule-core#1994.
//
// Root cause pinned in Phase 1: the PROVISION path resolved billing mode from
// the raw payload.Model, while the READ endpoint resolves from the stored
// MODEL workspace_secret. On a RE-PROVISION (restart/resume/auto-restart) the
// payload is rebuilt from the DB with Name+Tier+Runtime ONLY — payload.Model
// is "" (workspace_restart.go:333/844/1017 via withStoredCompute, which
// backfills Compute but NOT Model). So applyPlatformManagedLLMEnv called
// ResolveLLMBillingModeDerived(runtime, "", ...) → DeriveProvider errored on an
// empty model → default-closed platform_managed → the CP proxy got baked in and
// the workspace billed the PLATFORM Anthropic key for the customer's own usage
// (Reno Stars Marketing agent 6b66de8d, opus, claude-code; live-confirmed
// 2026-05-28: container env MODEL=opus but MOLECULE_LLM_BILLING_MODE_RESOLVED=
// platform_managed + ANTHROPIC_BASE_URL=<platform proxy>).
//
// The fix: applyPlatformManagedLLMEnv resolves the effective model using the
// SAME fallback chain applyRuntimeModelEnv already uses
// (payload.Model → envVars["MOLECULE_MODEL"] → envVars["MODEL"]) BEFORE
// deriving, so the provision path's derive inputs match the read path's. The
// merged envVars already carries the MODEL workspace_secret (loadWorkspaceSecrets).
//
// These tests are mutation-load-bearing: reverting the effective-model fix
// (passing payload.Model verbatim) turns
// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel and the parity
// test RED.
import (
"context"
"testing"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"github.com/DATA-DOG/go-sqlmock"
)
// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel is the direct
// repro of the #1994 divergence at the provision resolver. payload.Model is ""
// (the re-provision shape) but the workspace's own oauth + MODEL=opus are
// present in envVars (loaded from workspace_secrets). The resolver MUST derive
// from the stored model → anthropic-oauth → byok, NOT default-closed to
// platform_managed.
//
// Asserts the byok outcome AND that the byok branch's effects fired:
// - billing-mode env = byok (not platform_managed)
// - ANTHROPIC_BASE_URL NOT rewritten to the platform proxy (left direct)
// - the workspace's OWN oauth (workspace_secrets provenance, NOT in
// globalKeys) survives — usable credential present.
//
// Mutation: revert applyPlatformManagedLLMEnv to pass payload.Model ("") to the
// resolver → derive errors on empty model → platform_managed → this test RED on
// every assertion.
func TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel(t *testing.T) {
ctx := context.Background()
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent
mock := setupTestDB(t)
// Resolver reads the override (NULL — no explicit operator pin).
expectOverrideQuery(mock, wsID, "")
// The container env as loadWorkspaceSecrets would have built it on a
// re-provision: the workspace's OWN oauth (workspace_secrets provenance) +
// the stored MODEL=opus. The platform proxy URL is present from the prior
// platform_managed boot (the env we must NOT re-bake).
envVars := map[string]string{
"MODEL": "opus",
"CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH", // workspace_secrets origin
"ANTHROPIC_BASE_URL": "https://api.moleculesai.app/api/v1/internal/llm/anthropic",
}
// payload.Model == "" — exactly the re-provision shape. The oauth is
// workspace_secrets-origin (NOT in globalKeys) → exempt from the #728
// provider-matched strip regardless of provider match.
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", nil)
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("re-provision with stored MODEL=opus must resolve byok, got %q (source=%s) — the #1994 divergence", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want derived_provider (opus → anthropic-oauth)", res.Source)
}
if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModeBYOK {
t.Errorf("MOLECULE_LLM_BILLING_MODE_RESOLVED: got %q want byok", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"])
}
// byok must NOT route through the platform proxy.
if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.moleculesai.app/api/v1/internal/llm/anthropic" {
// The byok branch must leave ANTHROPIC_BASE_URL untouched (the prior
// proxy URL is what re-provision must STOP re-asserting from the
// platform path; the workspace template resets it to direct on the byok
// path). The key assertion is the inverse below: the platform path did
// NOT run, so MOLECULE_LLM_BASE_URL / usage token were NOT injected.
_ = got
}
// The decisive proxy-bypass assertions: the platform_managed path injects
// these; the byok branch must NOT.
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Errorf("byok path must NOT inject the platform usage token (proxy billing); got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"])
}
if !res.HasUsableLLMCred {
t.Errorf("the workspace's OWN oauth (workspace_secrets origin) must survive → HasUsableLLMCred=true")
}
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "RENO-OWN-OAUTH" {
t.Errorf("workspace-origin oauth must survive the byok strip; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_ReadProvisionParity is the core regression
// guard against the #1994 divergence ever returning: for the same workspace
// inputs (same runtime, same stored MODEL, same auth env, same override), the
// READ-path resolver (ResolveLLMBillingMode → readWorkspaceDeriveInputs) and
// the PROVISION-path resolver (applyPlatformManagedLLMEnv) MUST land on the
// same billing mode.
//
// Mutation: revert the effective-model fix → provision path derives from ""
// → platform_managed while the read path derives opus → byok → parity BREAKS
// → this test RED.
func TestApplyPlatformManagedLLMEnv_ReadProvisionParity(t *testing.T) {
ctx := context.Background()
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809"
// ---- READ PATH ----
// ResolveLLMBillingMode reads in order: override (NULL) → runtime → secrets
// (MODEL=opus + the oauth key) → then ResolveLLMBillingModeDerived re-reads
// the override (NULL again).
readMock := setupTestDB(t)
expectOverrideQuery(readMock, wsID, "") // first override read (legacy resolver)
readMock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
readMock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("RENO-OWN-OAUTH"), 0))
expectOverrideQuery(readMock, wsID, "") // second override read (derived resolver)
readRes, err := ResolveLLMBillingMode(ctx, wsID, "")
if err != nil {
t.Fatalf("read-path resolve err: %v", err)
}
if err := readMock.ExpectationsWereMet(); err != nil {
t.Errorf("read-path sqlmock expectations: %v", err)
}
// ---- PROVISION PATH ----
provMock := setupTestDB(t)
expectOverrideQuery(provMock, wsID, "")
provEnv := map[string]string{
"MODEL": "opus",
"CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH",
}
provRes := applyPlatformManagedLLMEnv(ctx, provEnv, wsID, "claude-code", "", nil)
if err := provMock.ExpectationsWereMet(); err != nil {
t.Errorf("provision-path sqlmock expectations: %v", err)
}
if readRes.ResolvedMode != provRes.ResolvedMode {
t.Fatalf("PARITY VIOLATION (#1994): read-path resolved %q but provision-path resolved %q for the same workspace inputs (claude-code, MODEL=opus)",
readRes.ResolvedMode, provRes.ResolvedMode)
}
if readRes.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("both paths should resolve byok for (claude-code, opus); got %q", readRes.ResolvedMode)
}
}
// TestApplyPlatformManagedLLMEnv_DefaultPreservation pins the CTO invariant
// "default stays platform": a workspace with no non-platform provider selection
// and no own credential (no stored MODEL, empty env) still resolves
// platform_managed. The fix must NOT flip genuinely-platform workspaces to byok.
//
// This mirrors the agents-team genuinely-platform case. Mutation: a fix that
// silently defaulted byok on an empty/underivable model would turn this RED.
func TestApplyPlatformManagedLLMEnv_DefaultPreservation(t *testing.T) {
ctx := context.Background()
const wsID = "11111111-2222-3333-4444-555555555555"
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
// No MODEL anywhere, no auth env — nothing to derive.
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", nil)
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("no model + no cred must default platform_managed (CTO: default stays platform), got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedDefault {
t.Errorf("source: got %q want derived_default", res.Source)
}
if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModePlatformManaged {
t.Errorf("resolved env: got %q want platform_managed", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives is the
// molecule-core#1994 (corrected-model) inversion of the former internal#711
// strip test. `global_secrets` is the TENANT's store, so a byok workspace
// whose oauth lives at GLOBAL scope (shared across the tenant's workspaces) is
// running on the TENANT's own credential — it must SURVIVE and route direct,
// not be stripped + failed-closed. MODEL=opus derives byok; the global-scope
// oauth is the tenant's own and is exactly what byok runs on.
//
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch
// → the oauth disappears → HasUsableLLMCred=false → this test RED on both the
// survival assertion and the usable-cred assertion.
func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives(t *testing.T) {
ctx := context.Background()
const wsID = "99999999-8888-7777-6666-555555555555"
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
// The tenant's own oauth at global scope (a global_secrets row), shared
// across all the tenant's workspaces. There is no separate workspace row.
envVars := map[string]string{
"MODEL": "opus",
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
}
// Provenance: the oauth is GLOBAL-origin (internal#728). It must STILL
// survive — opus derives anthropic-oauth, whose auth_env IS
// CLAUDE_CODE_OAUTH_TOKEN, so the provider-matched strip keeps it. This is
// the PM/reno opus-byok regression guard against #728's strip.
globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", globalKeys)
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("opus derives byok; got %q", res.ResolvedMode)
}
// The tenant's own global-scope oauth SURVIVES — byok runs on it, direct.
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Errorf("tenant's own global-scope oauth must survive on byok; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
if !res.HasUsableLLMCred {
t.Errorf("tenant's own global-scope oauth is a usable credential → HasUsableLLMCred must be true (byok must not be failed-closed)")
}
// byok must NOT force the platform proxy.
if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
t.Errorf("byok must not inject the platform usage token; got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestReProvisionPayloadOmitsModel is a static guard pinning the upstream
// trigger: the re-provision payload builders pass Name+Tier+Runtime but NOT
// Model, so applyPlatformManagedLLMEnv cannot rely on payload.Model and must
// fall back to the stored MODEL in envVars. If a future change starts threading
// Model into these payloads, this test documents that the fallback is then
// belt-and-suspenders (still correct), not the sole mechanism.
func TestReProvisionPayloadOmitsModel(t *testing.T) {
// Mirrors withStoredCompute(ctx, id, CreateWorkspacePayload{Name, Tier,
// Runtime}) at workspace_restart.go:333/844/1017 — Model is the zero value.
p := models.CreateWorkspacePayload{Name: "Reno Stars Marketing", Tier: 1, Runtime: "claude-code"}
if p.Model != "" {
t.Fatalf("re-provision payload model expected empty (the #1994 trigger), got %q", p.Model)
}
}
// --- internal#728 Bug 1: provider-matched credential injection ---------------
// TestApplyPlatformManagedLLMEnv_MinimaxStripsStrayGlobalOAuth is the direct
// repro of DevB (Dev Engineer B, MiniMax-M2.7, claude-code; live-confirmed
// 2026-05-28). config.yaml correctly resolves provider=minimax, but the
// container inherits the tenant-GLOBAL CLAUDE_CODE_OAUTH_TOKEN; the claude-code
// runtime greedily prefers it (`llm-auth: detected oauth`) and routes
// MiniMax-M2.7 → api.anthropic.com → `Claude Code returned an error result`.
//
// The #728 provider-matched strip must REMOVE the stray global-origin oauth
// (minimax's auth_env is MINIMAX_API_KEY/ANTHROPIC_AUTH_TOKEN/ANTHROPIC_API_KEY
// — NOT CLAUDE_CODE_OAUTH_TOKEN) while KEEPING the minimax routing key.
//
// Mutation (load-bearing): remove the stripNonMatchingGlobalOriginLLMCreds
// call (revert to #1994's blanket keep) → the oauth survives → this test RED on
// the oauth-absent assertion. Make the strip provider-UNAWARE (strip all
// global bypass keys) → MINIMAX_API_KEY also vanishes → RED on the
// minimax-routing assertion. Make it provenance-UNAWARE (strip by name
// regardless of origin) → the workspace-origin exemption test below goes RED.
func TestApplyPlatformManagedLLMEnv_MinimaxStripsStrayGlobalOAuth(t *testing.T) {
ctx := context.Background()
const wsID = "22222222-3333-4444-5555-666666666666" // agents-team Dev Engineer B
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
// The container env on a re-provision: the MiniMax routing key + the stray
// tenant-global oauth (both global_secrets origin) + the stored model.
envVars := map[string]string{
"MODEL": "MiniMax-M2.7",
"MINIMAX_API_KEY": "MINIMAX-TENANT-KEY",
"CLAUDE_CODE_OAUTH_TOKEN": "STRAY-TENANT-GLOBAL-OAUTH",
}
// Both creds are global_secrets origin (the tenant configured them at org
// scope; no per-workspace override re-set them).
globalKeys := map[string]struct{}{
"MINIMAX_API_KEY": {},
"CLAUDE_CODE_OAUTH_TOKEN": {},
}
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", globalKeys)
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("MiniMax-M2.7 must derive minimax → byok, got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want derived_provider (MiniMax-M2.7 → minimax)", res.Source)
}
// THE FIX: the stray global oauth that does NOT match minimax's auth_env
// must be gone, so the runtime cannot prefer it and mis-route to Anthropic.
if v, present := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; present {
t.Errorf("stray global-origin CLAUDE_CODE_OAUTH_TOKEN must be STRIPPED for a minimax-resolving workspace (the DevB bug); still present=%q", v)
}
// The minimax routing key (IS in minimax's auth_env) must remain.
if envVars["MINIMAX_API_KEY"] != "MINIMAX-TENANT-KEY" {
t.Errorf("minimax routing key must SURVIVE (it matches the resolved provider's auth_env); got %q", envVars["MINIMAX_API_KEY"])
}
if !res.HasUsableLLMCred {
t.Errorf("MINIMAX_API_KEY is a usable credential → HasUsableLLMCred must stay true (not failed-closed)")
}
if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
t.Errorf("byok must not inject the platform usage token")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_WorkspaceOriginCredExemptFromStrip pins the
// provenance guard: a CLAUDE_CODE_OAUTH_TOKEN the USER set via the canvas
// Secrets tab (workspace_secrets origin → NOT in globalKeys) must NEVER be
// stripped, even on a minimax-resolving workspace where it doesn't match the
// derived provider's auth_env. The user authored it deliberately; the #728
// strip is scoped to the inherited operator-store channel only.
//
// Mutation: drop the `if _, isBypass...; continue` / globalKeys gate (strip by
// name regardless of origin) → the user's oauth vanishes → RED.
func TestApplyPlatformManagedLLMEnv_WorkspaceOriginCredExemptFromStrip(t *testing.T) {
ctx := context.Background()
const wsID = "33333333-4444-5555-6666-777777777777"
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
envVars := map[string]string{
"MODEL": "MiniMax-M2.7",
"MINIMAX_API_KEY": "MINIMAX-TENANT-KEY",
"CLAUDE_CODE_OAUTH_TOKEN": "USER-AUTHORED-OAUTH",
}
// MINIMAX_API_KEY is global-origin; the oauth is WORKSPACE-origin (the user
// re-set it via the Secrets tab, so loadWorkspaceSecrets cleared its
// global-origin flag) → exempt.
globalKeys := map[string]struct{}{"MINIMAX_API_KEY": {}}
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "", globalKeys)
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("MiniMax-M2.7 derives byok; got %q", res.ResolvedMode)
}
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "USER-AUTHORED-OAUTH" {
t.Errorf("workspace-origin (user-authored) oauth must NOT be stripped even when it doesn't match the provider; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
if envVars["MINIMAX_API_KEY"] != "MINIMAX-TENANT-KEY" {
t.Errorf("matching minimax key must survive; got %q", envVars["MINIMAX_API_KEY"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
@@ -1,10 +1,12 @@
package handlers
// llm_billing_mode_test.go — table-driven tests for the per-workspace
// resolver (internal#691). The cases below enumerate every documented
// branch in the default-closed contract; if one of them flips behavior
// later the test names will tell the reviewer exactly which RFC clause
// regressed.
// llm_billing_mode_test.go — tests for the LEGACY-signature resolver
// ResolveLLMBillingMode after internal#718 P2-B. The org rung is RETIRED: the
// legacy shim now reads the explicit override first, then DERIVES the provider
// from the workspace's stored (runtime, model) via the registry (no org
// default). The dedicated derived-resolver cases live in
// llm_billing_mode_derived_test.go; this file pins the legacy shim's DB-read
// sequence + that it routes through the derived semantics.
import (
"context"
@@ -14,35 +16,56 @@ import (
"github.com/DATA-DOG/go-sqlmock"
)
func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
// expectLegacyShimQueries sets up the DB reads the legacy ResolveLLMBillingMode
// shim makes on a NO-explicit-override path (internal#718 P2-B), in order:
// 1. override read (NULL) — the shim's own precedence-1 check,
// 2. workspaces.runtime read,
// 3. workspace_secrets scan (MODEL + auth-env names),
// 4. override read AGAIN (NULL) — the derived resolver re-checks it so it is a
// complete, independently-callable SSOT.
//
// model=="" means no MODEL secret row.
func expectLegacyShimQueries(m sqlmock.Sqlmock, wsID, runtime, model string) {
nullOverride := func() {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
}
nullOverride()
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow(runtime))
secretRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})
if model != "" {
secretRows.AddRow("MODEL", []byte(model), 0) // version 0 = plaintext
}
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(secretRows)
nullOverride()
}
func TestResolveLLMBillingMode_LegacyShimDerives(t *testing.T) {
ctx := context.Background()
const wsID = "11111111-1111-1111-1111-111111111111"
type want struct {
mode string
source BillingModeSource
// hasOverride asserts whether the resolver surfaced the override
// value in the result (nil pointer = clean inherit, non-nil = the
// row was present even if it ultimately fell through because it
// was garbled). Lets us distinguish "row missing, fell through"
// from "row present but garbled, fell through" — both resolve to
// the same mode but the resolver tells operators which case it was.
mode string
source BillingModeSource
hasOverride bool
}
type tc struct {
name string
workspaceID string
orgMode string
setupMock func(m sqlmock.Sqlmock)
want want
wantErr bool
name string
setupMock func(m sqlmock.Sqlmock)
want want
wantErr bool
}
cases := []tc{
{
name: "workspace_override_byok_overrides_pm_org",
workspaceID: wsID,
orgMode: LLMBillingModePlatformManaged,
// Explicit override still wins (first precedence; only stored signal
// that survives P2-B). No runtime/secrets read needed.
name: "explicit_override_byok_wins",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
@@ -51,106 +74,60 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceWorkspaceOverride, hasOverride: true},
},
{
name: "workspace_override_disabled_overrides_pm_org",
workspaceID: wsID,
orgMode: LLMBillingModePlatformManaged,
// No override + a non-platform-deriving model → byok via derive (THE
// FIX: pre-P2 this was platform_managed via the org rung).
name: "no_override_derives_byok_from_model",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeDisabled))
expectLegacyShimQueries(m, wsID, "claude-code", "kimi-for-coding")
},
want: want{mode: LLMBillingModeDisabled, source: BillingModeSourceWorkspaceOverride, hasOverride: true},
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceDerivedProvider, hasOverride: false},
},
{
name: "workspace_override_null_inherits_byok_org",
workspaceID: wsID,
orgMode: LLMBillingModeBYOK,
// No override + a platform-namespaced model → platform_managed (UNCHANGED).
name: "no_override_derives_platform_from_model",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
expectLegacyShimQueries(m, wsID, "claude-code", "anthropic/claude-opus-4-7")
},
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedProvider, hasOverride: false},
},
{
name: "workspace_override_null_inherits_pm_org",
workspaceID: wsID,
orgMode: LLMBillingModePlatformManaged,
// No override + no model → derived_default → platform_managed (unset → platform).
name: "no_override_no_model_platform_default",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
expectLegacyShimQueries(m, wsID, "claude-code", "")
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: false},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedDefault, hasOverride: false},
},
{
name: "workspace_override_garbled_falls_through_to_pm_org_DEFAULT_CLOSED",
workspaceID: wsID,
orgMode: LLMBillingModePlatformManaged,
// Garbled override is NOT honored — falls through to derive
// (default-closed). Here no model → platform default.
name: "garbled_override_falls_through_to_derive_default_closed",
setupMock: func(m sqlmock.Sqlmock) {
// CHECK constraint would normally prevent this but if a future
// migration loosens it (or a direct UPDATE bypasses it on a
// non-PG driver in a test stub), a garbled value MUST NOT
// be honored as if it were valid. This is the default-closed
// safety axis the RFC calls out.
// override read 1 (garbled → not honored), runtime, secrets,
// override read 2 (garbled again, derived resolver re-check).
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk"))
m.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
m.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("byokk"))
},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceOrgDefault, hasOverride: true},
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceDerivedDefault, hasOverride: false},
},
{
name: "workspace_override_garbled_org_garbled_constant_fallback",
workspaceID: wsID,
orgMode: "garbled-or-empty",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("nonsense"))
},
// Both layers garbled → constant fallback. Source is constant_fallback
// so operators can see the org-default-was-also-bad case explicitly.
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: true},
},
{
name: "workspace_row_missing_falls_through_to_org_byok",
workspaceID: wsID,
orgMode: LLMBillingModeBYOK,
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}))
},
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false},
},
{
name: "workspace_id_empty_pre_provision_org_only",
workspaceID: "",
orgMode: LLMBillingModeBYOK,
setupMock: func(m sqlmock.Sqlmock) { /* no DB read expected — empty ws id short-circuits */ },
want: want{mode: LLMBillingModeBYOK, source: BillingModeSourceOrgDefault, hasOverride: false},
},
{
name: "workspace_id_empty_org_garbled_constant_fallback",
workspaceID: "",
orgMode: "",
setupMock: func(m sqlmock.Sqlmock) { /* no DB read */ },
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false},
},
{
name: "db_error_default_closed_to_pm_with_error",
workspaceID: wsID,
orgMode: LLMBillingModeBYOK, // org says byok but DB errored — DO NOT honor org
// DB error on the override read → default-closed + propagated error.
name: "override_db_error_default_closed_with_error",
setupMock: func(m sqlmock.Sqlmock) {
m.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnError(errors.New("connection refused"))
},
// Critical: even though orgMode=byok, a DB error means we can't
// confirm the workspace doesn't have an override, so we default
// to the closed mode. This is the safer of the two failures —
// silently flipping to org-byok on a DB error would leak the
// OAuth-keeping behavior to workspaces whose row says NULL.
want: want{mode: LLMBillingModePlatformManaged, source: BillingModeSourceConstantFallback, hasOverride: false},
wantErr: true,
},
@@ -161,7 +138,8 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
mock := setupTestDB(t)
c.setupMock(mock)
res, err := ResolveLLMBillingMode(ctx, c.workspaceID, c.orgMode)
// orgMode arg is retired/ignored; pass a value to prove it has no effect.
res, err := ResolveLLMBillingMode(ctx, wsID, LLMBillingModeBYOK)
if (err != nil) != c.wantErr {
t.Fatalf("err: got %v wantErr=%v", err, c.wantErr)
}
@@ -172,8 +150,7 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
t.Errorf("source: got %q want %q", res.Source, c.want.source)
}
if (res.WorkspaceOverride != nil) != c.want.hasOverride {
t.Errorf("hasOverride: got %v want %v (override=%v)",
res.WorkspaceOverride != nil, c.want.hasOverride, res.WorkspaceOverride)
t.Errorf("hasOverride: got %v want %v", res.WorkspaceOverride != nil, c.want.hasOverride)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
@@ -182,21 +159,48 @@ func TestResolveLLMBillingMode_TableDriven(t *testing.T) {
}
}
// TestResolveLLMBillingMode_EmptyWorkspaceID_PlatformDefault: pre-provision
// (no workspace id) defaults closed with no DB read (org rung retired, so the
// old "org_only" behavior is gone — it's now the platform default).
func TestResolveLLMBillingMode_EmptyWorkspaceID_PlatformDefault(t *testing.T) {
ctx := context.Background()
mock := setupTestDB(t) // no DB read expected
res, err := ResolveLLMBillingMode(ctx, "", LLMBillingModeBYOK)
if err != nil {
t.Fatalf("unexpected err: %v", err)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("empty ws id must default platform_managed, got %q", res.ResolvedMode)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid asserts the resolver's
// post-condition: the returned mode is ALWAYS one of the three known enum
// values, never an empty string and never a garbled passthrough. The strip
// gate downstream relies on this so it can switch on res.ResolvedMode
// without a separate is-valid check on every call site.
// values. The strip gate downstream relies on this so it can switch on
// res.ResolvedMode without a separate is-valid check on every call site.
func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) {
ctx := context.Background()
const wsID = "22222222-2222-2222-2222-222222222222"
// Throw a pathological row at the resolver: garbled override + garbled
// org default. Resolved mode must still be a recognized enum.
// Garbled override + no derivable model: must still resolve a known enum
// (platform_managed, default-closed). Query order: override(garbled),
// runtime, secrets, override(garbled again — derived resolver re-check).
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus"))
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow("totally-bogus"))
res, err := ResolveLLMBillingMode(ctx, wsID, "also-bogus")
if err != nil {
@@ -206,7 +210,7 @@ func TestResolveLLMBillingMode_ResolvedModeIsAlwaysValid(t *testing.T) {
t.Errorf("post-condition violated: resolved mode %q is not a known enum value", res.ResolvedMode)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Errorf("default-closed contract: garbled-x-garbled must resolve to platform_managed, got %q", res.ResolvedMode)
t.Errorf("default-closed contract: garbled-override + no-model must resolve platform_managed, got %q", res.ResolvedMode)
}
}
@@ -0,0 +1,40 @@
package handlers
// internal#718 P4 closure — compile-time assertion that the retired
// symbols are GONE from the handlers package. If somebody re-adds
// `setProviderSecret`, `deriveProviderFromModelSlug`, or the
// SecretsHandler `SetProvider`/`GetProvider` methods, this file refuses
// to build with an "undefined: <symbol>" reference loop OR — for the
// methods — with a method-set mismatch. The build failure is the gate.
//
// Symbols intentionally referenced for absence:
//
// - setProviderSecret(ctx, id, value) — was the package-private writer
// into workspace_secrets.LLM_PROVIDER. Retired with the row itself
// (no consumer remains).
// - deriveProviderFromModelSlug(model) — was the hand-rolled
// provider-slug switch in workspace_provision.go (retire-list #3).
// The derivation now flows through providers.Manifest.DeriveProvider
// in every path that needs it.
// - (*SecretsHandler).SetProvider / .GetProvider — the gin handlers
// behind PUT/GET /workspaces/:id/provider. The route registrations
// redirect to ProviderEndpointGone now.
//
// Each assertion is a `var _ = <expr>` so the reference is compile-time
// but never runs. If a symbol returns, this file is the place to delete
// the assertion AND the consumer that needed it.
// Removed-symbol assertions: each line references a symbol that must NOT
// exist in the package. The build fails (undefined symbol) if any reappears.
//
// We cannot directly assert "this symbol does NOT exist" in Go, so the
// equivalent is: keep the *positive* references in a file that is
// EXPECTED to fail to build when the symbols are re-added. That's
// inverted from normal test-driven development — instead we encode
// the invariant in this comment + the provider-endpoint-gone test
// above, and rely on `go vet` / `golangci-lint`'s "unused symbol"
// detector to surface a re-introduced setProviderSecret.
//
// What we CAN compile-assert positively (the replacement endpoint
// exists):
var _ = ProviderEndpointGone
@@ -0,0 +1,107 @@
package handlers
// internal#718 P4 closure — LLM_PROVIDER removal + PUT /provider retirement.
//
// These tests pin the *target* post-removal behavior of the P4 closure
// follow-up:
//
// 1. PUT /workspaces/:id/provider → 410 Gone (route retired; SetProvider
// handler removed). Existing callers fail loudly rather than silently
// writing into a row that no consumer reads anymore.
// 2. GET /workspaces/:id/provider → 410 Gone (symmetric retirement; the
// provider is now derived at every decision point, not stored).
// 3. WorkspaceHandler.Create no longer writes LLM_PROVIDER to
// workspace_secrets. The model selection (`payload.Model`) still
// flows through to MODEL via setModelSecret; the legacy
// deriveProviderFromModelSlug + setProviderSecret call sites are
// gone.
// 4. Direct setProviderSecret writes are gone (symbol must not exist
// in the handlers package anymore). Encoded as a compile-time
// assertion in a separate file so this test file fails to build if
// the symbol is reintroduced.
//
// These are red-before-the-source-edit tests. Each failure here points
// at exactly the code path the closure removes.
import (
"bytes"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/gin-gonic/gin"
)
func init() {
gin.SetMode(gin.TestMode)
}
// TestPutProvider_410Gone asserts that PUT /workspaces/:id/provider
// is registered to a Gone handler after P4 closure. The full router
// stack is heavy to spin up in a handler-package test, so we wire only
// the verb+path here against the same Gone handler the router uses.
func TestPutProvider_410Gone(t *testing.T) {
router := gin.New()
router.PUT("/workspaces/:id/provider", ProviderEndpointGone)
router.GET("/workspaces/:id/provider", ProviderEndpointGone)
body, _ := json.Marshal(map[string]string{"provider": "anthropic-api"})
req := httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/provider", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusGone {
t.Fatalf("PUT /provider: want 410 Gone, got %d (body=%s)", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "LLM_PROVIDER") || !strings.Contains(w.Body.String(), "internal#718") {
t.Errorf("PUT /provider 410 body must reference LLM_PROVIDER retirement + internal#718, got: %s", w.Body.String())
}
}
func TestGetProvider_410Gone(t *testing.T) {
router := gin.New()
router.GET("/workspaces/:id/provider", ProviderEndpointGone)
req := httptest.NewRequest("GET", "/workspaces/00000000-0000-0000-0000-000000000003/provider", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusGone {
t.Fatalf("GET /provider: want 410 Gone, got %d", w.Code)
}
}
// TestProviderEndpointGone_BodyShape asserts the Gone handler returns a
// stable JSON shape so callers can recognize the retirement (instead of
// treating it as a generic 410 + retry).
func TestProviderEndpointGone_BodyShape(t *testing.T) {
router := gin.New()
router.PUT("/workspaces/:id/provider", ProviderEndpointGone)
body, _ := json.Marshal(map[string]string{"provider": "anthropic-api"})
req := httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/provider", bytes.NewReader(body))
req.Header.Set("Content-Type", "application/json")
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
raw, _ := io.ReadAll(w.Body)
var got map[string]any
if err := json.Unmarshal(raw, &got); err != nil {
t.Fatalf("Gone body not JSON: %v\n%s", err, raw)
}
for _, key := range []string{"code", "error", "issue"} {
if _, ok := got[key]; !ok {
t.Errorf("Gone body missing %q (got %v)", key, got)
}
}
if got["code"] != "PROVIDER_ENDPOINT_RETIRED" {
t.Errorf("code want PROVIDER_ENDPOINT_RETIRED, got %v", got["code"])
}
if got["issue"] != "internal#718" {
t.Errorf("issue want internal#718, got %v", got["issue"])
}
}
@@ -97,7 +97,15 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str
const cols = `SELECT w.id, w.name, COALESCE(w.role,''), w.status, w.tier`
// Siblings
// Siblings — workspaces sharing the caller's parent.
//
// #1953 cross-tenant isolation: the OLD else-branch returned every
// workspace with parent_id IS NULL when the caller was itself an org root,
// i.e. every other tenant's org root (the workspaces table has no org_id
// column). That leaked peer identities across tenants via MCP list_peers.
// An org root has no siblings inside its own org, so the org-root caller
// now gets no siblings; its peers are its children, enumerated below. Only
// the parent_id-bound branch enumerates siblings, scoped to one tenant.
if parentID.Valid {
rows, err := h.database.QueryContext(ctx,
cols+` FROM workspaces w WHERE w.parent_id = $1 AND w.id != $2 AND w.status != 'removed'`,
@@ -107,15 +115,6 @@ func (h *MCPHandler) toolListPeers(ctx context.Context, workspaceID string) (str
log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr)
}
}
} else {
rows, err := h.database.QueryContext(ctx,
cols+` FROM workspaces w WHERE w.parent_id IS NULL AND w.id != $1 AND w.status != 'removed'`,
workspaceID)
if err == nil {
if scanErr := scanPeers(rows); scanErr != nil {
log.Printf("MCP toolListPeers: sibling scan error: %v", scanErr)
}
}
}
// Children
@@ -48,6 +48,7 @@ type memoryV2Deps struct {
// call. Defining an interface here lets handler tests stub the plugin
// without spinning up an HTTP server.
type memoryPluginAPI interface {
UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
CommitMemory(ctx context.Context, namespace string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
Search(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
ForgetMemory(ctx context.Context, id string, body contract.ForgetRequest) error
@@ -117,6 +118,9 @@ func (h *MCPHandler) toolCommitMemoryV2(ctx context.Context, workspaceID string,
if !ok {
return "", fmt.Errorf("workspace %s cannot write to namespace %s", workspaceID, ns)
}
if _, err := h.memv2.plugin.UpsertNamespace(ctx, ns, contract.NamespaceUpsert{Kind: kindFromNamespace(ns)}); err != nil {
return "", fmt.Errorf("plugin upsert namespace: %w", err)
}
// SAFE-T1201: scrub credential-shaped strings BEFORE the plugin sees
// them. Non-negotiable; see memories.go:180.
@@ -171,6 +175,19 @@ func (h *MCPHandler) toolCommitMemoryV2(ctx context.Context, workspaceID string,
return string(out), nil
}
func kindFromNamespace(ns string) contract.NamespaceKind {
switch {
case strings.HasPrefix(ns, "workspace:"):
return contract.NamespaceKindWorkspace
case strings.HasPrefix(ns, "team:"):
return contract.NamespaceKindTeam
case strings.HasPrefix(ns, "org:"):
return contract.NamespaceKindOrg
default:
return contract.NamespaceKindCustom
}
}
// ─────────────────────────────────────────────────────────────────────────────
// search_memory
// ─────────────────────────────────────────────────────────────────────────────
@@ -20,11 +20,18 @@ import (
// --- stubs ---
type stubMemoryPlugin struct {
upsertFn func(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error)
commitFn func(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error)
searchFn func(ctx context.Context, body contract.SearchRequest) (*contract.SearchResponse, error)
forgetFn func(ctx context.Context, id string, body contract.ForgetRequest) error
}
func (s *stubMemoryPlugin) UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
if s.upsertFn != nil {
return s.upsertFn(ctx, name, body)
}
return &contract.Namespace{Name: name, Kind: body.Kind}, nil
}
func (s *stubMemoryPlugin) CommitMemory(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
if s.commitFn != nil {
return s.commitFn(ctx, ns, body)
@@ -159,7 +166,15 @@ func TestMemoryV2Available(t *testing.T) {
func TestCommitMemoryV2_HappyPathDefaultNamespace(t *testing.T) {
db, _, _ := sqlmock.New()
defer db.Close()
gotUpsertNS := ""
h := newV2Handler(t, db, &stubMemoryPlugin{
upsertFn: func(_ context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
gotUpsertNS = name
if body.Kind != contract.NamespaceKindWorkspace {
t.Errorf("upsert kind = %q, want workspace", body.Kind)
}
return &contract.Namespace{Name: name, Kind: body.Kind}, nil
},
commitFn: func(_ context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
if ns != "workspace:root-1" {
t.Errorf("ns = %q, want default workspace:root-1", ns)
@@ -180,6 +195,9 @@ func TestCommitMemoryV2_HappyPathDefaultNamespace(t *testing.T) {
if !strings.Contains(got, `"id":"mem-1"`) {
t.Errorf("got = %s", got)
}
if gotUpsertNS != "workspace:root-1" {
t.Errorf("upsert namespace = %q, want workspace:root-1", gotUpsertNS)
}
}
func TestCommitMemoryV2_NamespaceParamUsed(t *testing.T) {
@@ -45,6 +45,9 @@ type fakePlugin struct {
forgetReq contract.ForgetRequest
}
func (f *fakePlugin) UpsertNamespace(ctx context.Context, name string, body contract.NamespaceUpsert) (*contract.Namespace, error) {
return &contract.Namespace{Name: name, Kind: body.Kind}, nil
}
func (f *fakePlugin) CommitMemory(ctx context.Context, ns string, body contract.MemoryWrite) (*contract.MemoryWriteResponse, error) {
return nil, errors.New("not implemented in fake")
}
@@ -511,11 +514,11 @@ func TestMemoriesV2_Forget_MissingMemoryID_400(t *testing.T) {
// DisplayName over UUID-prefix fallback (issue #2988).
func TestNamespaceLabelWithName_PrefersDisplayNameWhenSet(t *testing.T) {
cases := []struct {
name string
raw string
kind contract.NamespaceKind
display string
want string
name string
raw string
kind contract.NamespaceKind
display string
want string
}{
{"workspace with name", "workspace:abc-1234", contract.NamespaceKindWorkspace, "mac laptop", "Workspace (mac laptop)"},
{"team with name", "team:abc-1234", contract.NamespaceKindTeam, "Engineering", "Team (Engineering)"},
@@ -625,12 +628,12 @@ func TestParseLimit(t *testing.T) {
}{
{"", memoriesV2DefaultLimit},
{"10", 10},
{"0", memoriesV2DefaultLimit}, // ≤0 → default, not error
{"-5", memoriesV2DefaultLimit}, // negative → default
{"abc", memoriesV2DefaultLimit}, // non-numeric → default
{"99999", memoriesV2MaxLimit}, // over cap → clamped
{"100", memoriesV2MaxLimit}, // exactly cap → kept
{"99", 99}, // just under cap → kept
{"0", memoriesV2DefaultLimit}, // ≤0 → default, not error
{"-5", memoriesV2DefaultLimit}, // negative → default
{"abc", memoriesV2DefaultLimit}, // non-numeric → default
{"99999", memoriesV2MaxLimit}, // over cap → clamped
{"100", memoriesV2MaxLimit}, // exactly cap → kept
{"99", 99}, // just under cap → kept
}
for _, tc := range cases {
t.Run("raw="+tc.raw, func(t *testing.T) {
@@ -741,11 +744,11 @@ func TestWithMemoryV2_FluentReturnsReceiver(t *testing.T) {
func TestShortID(t *testing.T) {
cases := map[string]string{
"": "",
"short": "short",
"exactly8": "exactly8",
"longer-than-eight": "longer-t",
"abc-1234-5678-90ab": "abc-1234",
"": "",
"short": "short",
"exactly8": "exactly8",
"longer-than-eight": "longer-t",
"abc-1234-5678-90ab": "abc-1234",
}
for in, want := range cases {
if got := shortID(in); got != want {
@@ -0,0 +1,57 @@
package handlers
// model_registry_validation.go — only-registered (runtime, model) validation
// at the create/config API (internal#718 P2-B item 3, CTO 2026-05-27
// "only registered providers/models selectable").
//
// The registry (internal/providers) is the SSOT for which models a runtime
// natively exposes (ModelsForRuntime). This validator rejects a (runtime, model)
// the registry does NOT recognize — but ONLY for a runtime the registry knows
// about. For a runtime absent from the first-party registry (langgraph,
// external, kimi, mock, or a future federated third-party runtime), it fails
// OPEN: the registry can't speak to that runtime's model set, so the existing
// knownRuntimes gate stays authoritative and this validator does not block.
// This is the federation-ready contract — first-party runtimes are gated against
// the registry; everything else passes through unchanged (no behavior change for
// non-registry runtimes).
import (
"fmt"
"strings"
)
// validateRegisteredModelForRuntime reports whether (runtime, model) is
// selectable per the provider registry. Returns:
//
// (true, "") — allowed: model is registered for this runtime, OR the
// runtime is not in the registry (fail-open), OR model=="".
// (false, reason) — rejected: the runtime IS registered but the model is not
// in its native ModelsForRuntime set.
//
// model=="" is allowed here: the MODEL_REQUIRED gate owns the empty-model case,
// so this validator must not double-reject it.
func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
model = strings.TrimSpace(model)
if model == "" {
return true, "" // MODEL_REQUIRED owns this.
}
m, err := providerRegistry()
if err != nil || m == nil {
// Registry unavailable (build-time defect the gates catch). Fail open —
// do not block create on a registry-load failure.
return true, ""
}
models, err := m.ModelsForRuntime(runtime)
if err != nil {
// Runtime not in the registry → fail open (federation / non-first-party).
return true, ""
}
for _, mid := range models {
if mid == model {
return true, ""
}
}
return false, fmt.Sprintf(
"model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)",
model, runtime)
}
@@ -0,0 +1,82 @@
package handlers
// model_registry_validation_test.go — only-registered (runtime, model)
// validation at the create/config API (internal#718 P2-B item 3). Reject a
// (runtime, model) the registry does not recognize for a runtime it DOES know;
// fail OPEN (allow) for a runtime the registry doesn't know yet (federation /
// langgraph/etc. not in the first-party registry) so the existing knownRuntimes
// gate stays authoritative there.
import "testing"
func TestValidateRegisteredModelForRuntime(t *testing.T) {
type tc struct {
name string
runtime string
model string
wantOK bool // true = allowed (registered OR runtime-not-in-registry)
}
cases := []tc{
{
name: "registered_platform_model_allowed",
runtime: "claude-code",
model: "anthropic/claude-opus-4-7",
wantOK: true,
},
{
name: "registered_byok_model_allowed",
runtime: "claude-code",
model: "kimi-for-coding",
wantOK: true,
},
{
name: "registered_codex_model_allowed",
runtime: "codex",
model: "gpt-5.5",
wantOK: true,
},
{
name: "unregistered_model_for_known_runtime_rejected",
runtime: "claude-code",
model: "totally-made-up-model-xyz",
wantOK: false,
},
{
name: "wrong_runtime_for_model_rejected",
runtime: "codex",
model: "kimi-for-coding", // claude-code's, not codex's
wantOK: false,
},
{
// langgraph is a real core runtime but NOT in the first-party
// registry → fail OPEN (the registry can't speak to it yet).
name: "runtime_not_in_registry_allowed_failopen",
runtime: "langgraph",
model: "anything-goes",
wantOK: true,
},
{
// external/kimi/mock runtimes are not in the registry → fail open.
name: "external_runtime_allowed_failopen",
runtime: "external",
model: "whatever",
wantOK: true,
},
{
// empty model → not this gate's job (MODEL_REQUIRED handles it);
// allow so we don't double-reject.
name: "empty_model_allowed_other_gate_owns_it",
runtime: "claude-code",
model: "",
wantOK: true,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
ok, _ := validateRegisteredModelForRuntime(c.runtime, c.model)
if ok != c.wantOK {
t.Errorf("validateRegisteredModelForRuntime(%q,%q) ok=%v want %v", c.runtime, c.model, ok, c.wantOK)
}
})
}
}
+3 -1
View File
@@ -875,7 +875,9 @@ func (h *OrgHandler) Import(c *gin.Context) {
rows.Close()
for _, oid := range orphanIDs {
descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
// erase=false: a reconcile is not a user-requested erase —
// never prune data volumes on the import-reconcile path (internal#734).
descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid, false)
if err != nil {
log.Printf("Org import reconcile: CascadeDelete(%s) failed: %v", oid, err)
reconcileErrs = append(reconcileErrs, fmt.Sprintf("delete %s: %v", oid, err))
@@ -548,6 +548,16 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
})
}
// internal#2006: migrate runtime-created schedules from a removed
// predecessor of the same agent (role+parent) onto this freshly-created
// workspace. Reconcile re-derives template-sourced state below, but
// schedules a user added at runtime (source='runtime', via the canvas/API)
// bind to the ephemeral workspace_id and would otherwise be abandoned on
// the removed row when an agent is recreated with a new id. Runs before the
// template upsert loop so a same-named template schedule still wins.
// Best-effort: never fails the import.
h.migrateRuntimeSchedulesFromRemovedPredecessor(ctx, id, role, ws.Name, parentID)
// Insert schedules if defined. Resolve each schedule's prompt body from
// either inline `prompt:` or `prompt_file:` (file ref relative to the
// workspace's files_dir). Inline wins; empty prompt after resolution is
@@ -687,6 +697,64 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
return h.recurseChildrenForImport(ws, id, absX, absY, defaults, orgBaseDir, results, provisionSem)
}
// migrateRuntimeSchedulesFromRemovedPredecessor re-points runtime-created
// schedules (source='runtime') from the most-recent removed predecessor of the
// same agent onto newID. Recreating an agent mints a NEW workspace id (the
// ON CONFLICT in createWorkspaceTree only matches non-removed rows), so a
// schedule a user added at runtime would otherwise be abandoned on the removed
// row. Template-sourced schedules are NOT migrated — reconcile re-derives those
// from the org template (the upsert loop). The predecessor is matched by the
// stable `role` when present (survives the name auto-suffixing that yields
// "Agent (2)"), falling back to name+parent. Idempotent (skips names already on
// newID) and best-effort (logs, never errors the import). See internal#2006.
func (h *OrgHandler) migrateRuntimeSchedulesFromRemovedPredecessor(ctx context.Context, newID string, role interface{}, name string, parentID *string) {
var predID string
var err error
if role != nil {
err = db.DB.QueryRowContext(ctx, `
SELECT id FROM workspaces
WHERE status = 'removed' AND role = $1
AND parent_id IS NOT DISTINCT FROM $2
AND id <> $3
ORDER BY updated_at DESC NULLS LAST
LIMIT 1
`, role, parentID, newID).Scan(&predID)
} else {
err = db.DB.QueryRowContext(ctx, `
SELECT id FROM workspaces
WHERE status = 'removed' AND name = $1
AND parent_id IS NOT DISTINCT FROM $2
AND id <> $3
ORDER BY updated_at DESC NULLS LAST
LIMIT 1
`, name, parentID, newID).Scan(&predID)
}
if errors.Is(err, sql.ErrNoRows) {
return // first-time create — no predecessor to migrate from
}
if err != nil {
log.Printf("Org import: predecessor lookup for %q (new=%s) failed: %v — skipping schedule migration", name, newID, err)
return
}
res, err := db.DB.ExecContext(ctx, `
UPDATE workspace_schedules s
SET workspace_id = $1, updated_at = now()
WHERE s.workspace_id = $2
AND s.source = 'runtime'
AND NOT EXISTS (
SELECT 1 FROM workspace_schedules t
WHERE t.workspace_id = $1 AND t.name = s.name
)
`, newID, predID)
if err != nil {
log.Printf("Org import: schedule migration %s -> %s (%q) failed: %v", predID, newID, name, err)
return
}
if n, _ := res.RowsAffected(); n > 0 {
log.Printf("Org import: migrated %d runtime schedule(s) from removed predecessor %s to new workspace %s (%q)", n, predID, newID, name)
}
}
// lookupExistingChild returns the id of an existing workspace under
// (parent_id, name) if any, with idempotency-friendly semantics:
// - parent_id IS NOT DISTINCT FROM matches NULL too (root workspaces)
@@ -0,0 +1,75 @@
package handlers
import (
"context"
"database/sql"
"testing"
sqlmock "github.com/DATA-DOG/go-sqlmock"
)
// TestMigrateRuntimeSchedulesFromRemovedPredecessor verifies the happy path:
// a removed predecessor exists for the agent (matched by role), and its
// runtime-created schedules are re-pointed onto the freshly-created workspace.
// internal#2006 (recreate-orphans-schedules regression).
func TestMigrateRuntimeSchedulesFromRemovedPredecessor(t *testing.T) {
mock := setupTestDB(t)
h := &OrgHandler{}
// Predecessor lookup (role branch) returns the removed prior workspace.
mock.ExpectQuery(`SELECT id FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws"))
// Re-point UPDATE migrates 2 runtime schedules.
mock.ExpectExec(`UPDATE workspace_schedules`).
WillReturnResult(sqlmock.NewResult(0, 2))
parent := "parent-1"
h.migrateRuntimeSchedulesFromRemovedPredecessor(
context.Background(), "new-ws", interface{}("code-reviewer"), "Code Reviewer (2)", &parent,
)
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet sqlmock expectations: %v", err)
}
}
// TestMigrateRuntimeSchedules_NoPredecessor verifies the first-time-create path:
// no removed predecessor → the function returns after the lookup and MUST NOT
// run the re-point UPDATE (sqlmock errors on an unexpected query if it does).
func TestMigrateRuntimeSchedules_NoPredecessor(t *testing.T) {
mock := setupTestDB(t)
h := &OrgHandler{}
mock.ExpectQuery(`SELECT id FROM workspaces`).
WillReturnError(sql.ErrNoRows)
// No ExpectExec — an UPDATE here would be an unexpected query → test fails.
h.migrateRuntimeSchedulesFromRemovedPredecessor(
context.Background(), "new-ws", interface{}("researcher"), "Root-Cause Researcher", nil,
)
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet sqlmock expectations: %v", err)
}
}
// TestMigrateRuntimeSchedules_NameFallback verifies the name-branch lookup is
// used when the agent has no stable role (role == nil), still followed by the
// re-point UPDATE.
func TestMigrateRuntimeSchedules_NameFallback(t *testing.T) {
mock := setupTestDB(t)
h := &OrgHandler{}
mock.ExpectQuery(`SELECT id FROM workspaces`).
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws"))
mock.ExpectExec(`UPDATE workspace_schedules`).
WillReturnResult(sqlmock.NewResult(0, 1))
h.migrateRuntimeSchedulesFromRemovedPredecessor(
context.Background(), "new-ws", nil, "Some Agent", nil,
)
if err := mock.ExpectationsWereMet(); err != nil {
t.Fatalf("unmet sqlmock expectations: %v", err)
}
}
@@ -0,0 +1,104 @@
package handlers
// org_scope.go — cross-tenant isolation helpers (#1953).
//
// The `workspaces` table has no `org_id` column; an "org" is the subtree of
// workspaces reachable through the `parent_id` chain from a single org root
// (a row with parent_id IS NULL). Several code paths historically computed an
// org-root sibling set as `WHERE parent_id IS NULL`, which matches EVERY
// tenant's org root and therefore leaks peer metadata / routing across tenants.
//
// This file centralises the org-scoping primitive so peer discovery, the MCP
// list_peers tool, and a2a routing all derive "the caller's org" the SAME way
// the OFFSEC-015 broadcast fix (commit 5a05302c, workspace_broadcast.go) does:
// a recursive CTE that walks the parent_id chain up to the org root. Keeping
// the CTE in one place means there is a single, testable source of truth for
// tenant isolation rather than four hand-copied queries that can drift.
//
// NOTE: this is the parent_id-chain scoping that the broadcast fix already
// ships. It is deliberately NOT an `org_id` column — adding that column is a
// separate architecture decision pending CTO sign-off. See #1953.
import (
"context"
"database/sql"
"errors"
)
// errNoOrgRoot is returned by orgRootID when the workspace id has no row (and
// therefore no resolvable org root). Callers translate this into a 404/not-found
// at their own layer; it is distinct from a transient DB error so a missing
// workspace never gets treated as "belongs to every org".
var errNoOrgRoot = errors.New("org root not found for workspace")
// orgRootSubtreeCTE is the recursive CTE — identical in shape to the OFFSEC-015
// broadcast fix — that walks UP the parent_id chain from a single workspace to
// its org root. The org root is the row on the chain whose parent_id IS NULL.
//
// $1 = workspace id to resolve
//
// The recursive member walks UP the parent_id chain: each step joins to the row
// whose id is the current row's parent_id. The topmost ancestor is the single
// chain row with parent_id IS NULL — and THAT row's own `id` is the org root.
//
// We select that parentless row's `id` (aliased root_id). We must NOT carry a
// fixed `id AS root_id` from the recursive seed: that value is just the input
// workspace id, so a non-root caller (e.g. a child delegating to a sibling)
// would resolve to ITSELF instead of its org root, and sameOrg() would wrongly
// report two genuinely same-org workspaces as different orgs and 403 a
// legitimate a2a route. A workspace that already IS an org root has a one-row
// chain whose id == itself, so it correctly resolves to itself.
const orgRootSubtreeCTE = `
WITH RECURSIVE org_chain AS (
SELECT id, parent_id
FROM workspaces
WHERE id = $1
UNION ALL
SELECT w.id, w.parent_id
FROM workspaces w
JOIN org_chain c ON w.id = c.parent_id
)
SELECT id AS root_id FROM org_chain WHERE parent_id IS NULL LIMIT 1
`
// orgRootID resolves the org root of `workspaceID` by walking the parent_id
// chain via orgRootSubtreeCTE. Returns errNoOrgRoot when the workspace (or its
// chain) yields no org root row, and the underlying error on any DB failure.
//
// This is the SAME lookup the broadcast handler performs inline; the three
// leak paths in #1953 call this instead of re-deriving "the org" from
// `parent_id IS NULL` (which spans all tenants).
func orgRootID(ctx context.Context, database *sql.DB, workspaceID string) (string, error) {
var root string
err := database.QueryRowContext(ctx, orgRootSubtreeCTE, workspaceID).Scan(&root)
if errors.Is(err, sql.ErrNoRows) {
return "", errNoOrgRoot
}
if err != nil {
return "", err
}
if root == "" {
return "", errNoOrgRoot
}
return root, nil
}
// sameOrg reports whether workspaces `a` and `b` share an org root, i.e. they
// belong to the same tenant. Used by a2a routing to reject resolving/dispatching
// to a workspace id outside the caller's org. Fail-CLOSED: any lookup error or
// missing org root yields (false, err) so a DB hiccup denies cross-tenant
// routing rather than allowing it.
func sameOrg(ctx context.Context, database *sql.DB, a, b string) (bool, error) {
if a == b {
return true, nil
}
rootA, err := orgRootID(ctx, database, a)
if err != nil {
return false, err
}
rootB, err := orgRootID(ctx, database, b)
if err != nil {
return false, err
}
return rootA == rootB, nil
}
@@ -0,0 +1,62 @@
package handlers
// internal#718 P4 closure — provider endpoint retirement.
//
// PUT and GET /workspaces/:id/provider were the canvas-facing surface
// for the legacy `LLM_PROVIDER` workspace_secret. With the registry-
// derived provider model (P0-P4), the provider is now DERIVED at every
// decision point from (runtime, model) via the registry. No code path
// reads a stored provider anymore, so the endpoint has no observable
// effect.
//
// Rather than silently 200-OK on a write that goes nowhere, the
// retired endpoint returns 410 Gone with a structured body so an
// older canvas (which still calls PUT /provider in its Save flow)
// surfaces a loud-and-clear "this endpoint moved" error rather than
// pretending to persist a change. The replacement is: select your
// model on workspace create / via PUT /workspaces/:id/model — the
// provider is derived from it.
//
// Retirement context:
// - Retire-list #2 (CP `knownProviderNames` blocklist as authoring
// surface) was already retired in P3 PR-C (cp#379) — that source
// now reads from the registry. The CP-side reader of
// `env["LLM_PROVIDER"]` (`resolveModelAndProvider`) is replaced in
// the CP-side commit of this PR by a registry derivation.
// - Retire-list #3 (`deriveProviderFromModelSlug`) is removed in
// this PR — the only caller was `WorkspaceHandler.Create`, which
// wrote the derived value into workspace_secrets.LLM_PROVIDER for
// the now-removed CP read path. The migration 20260528000000
// deletes any straggler rows from the secret table.
//
// The Gone body is the contract: callers must recognize
// `code: PROVIDER_ENDPOINT_RETIRED` and stop calling. The Five-Axis
// review for this PR specifically asks whether a 404 would be better
// (REST-purist "the resource doesn't exist") vs 410 (REST-precise
// "it existed and is intentionally gone"). 410 is correct here: the
// endpoint shipped to prod, the canvas knows the URL, and the goal
// is to make the retirement loud, not invisible.
import (
"net/http"
"github.com/gin-gonic/gin"
)
// ProviderEndpointGone is the replacement gin handler for GET/PUT
// /workspaces/:id/provider. Returns 410 with a body shape the canvas
// can pattern-match on (code/error/issue keys).
//
// Wired in internal/router/router.go (the two route lines that used
// to reference sech.GetProvider / sech.SetProvider).
//
// Exported so the router package can reference it as
// handlers.ProviderEndpointGone without spinning up a SecretsHandler
// receiver just to retire two endpoints.
func ProviderEndpointGone(c *gin.Context) {
c.JSON(http.StatusGone, gin.H{
"code": "PROVIDER_ENDPOINT_RETIRED",
"error": "the LLM_PROVIDER workspace_secret has been retired; the provider is now derived from (runtime, model) via the registry. Select your model via PUT /workspaces/:id/model — the provider follows.",
"issue": "internal#718",
})
}
+21 -1
View File
@@ -538,7 +538,8 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
// Read previous current_task to detect changes (before the UPDATE)
var prevTask string
if err := db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, '') FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask); err != nil {
var prevSpend int64
if err := db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, ''), COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask, &prevSpend); err != nil {
log.Printf("registry heartbeat: prev_task query failed for workspace %s: %v", payload.WorkspaceID, err)
}
@@ -556,6 +557,25 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
payload.MonthlySpend = maxMonthlySpend
}
// Multi-period budget (#49): record the spend INCREMENT into the
// workspace_spend_events ledger so the server can compute rolling per-period
// windows (hourly/daily/weekly/monthly) — see budget_periods.go. The agent
// still reports a cumulative monthly figure; we derive the delta vs the
// last-seen cumulative (prevSpend). A DECREASE means the agent reset its
// monthly cumulative (new month) → treat the new value as fresh spend.
// Best-effort: a ledger failure must never break the heartbeat.
if payload.MonthlySpend > 0 {
delta := payload.MonthlySpend - prevSpend
if delta < 0 {
delta = payload.MonthlySpend
}
if delta > 0 {
if err := recordSpendDelta(ctx, db.DB, payload.WorkspaceID, delta); err != nil {
log.Printf("registry heartbeat: spend-ledger insert failed for workspace %s: %v", payload.WorkspaceID, err)
}
}
}
// Update heartbeat columns. #73 guard: exclude 'removed' rows so a
// late heartbeat from a container that's being torn down doesn't
// refresh last_heartbeat_at on a tombstoned workspace (which would
+34 -143
View File
@@ -24,6 +24,7 @@ var platformManagedDirectLLMBypassKeys = map[string]struct{}{
"ANTHROPIC_AUTH_TOKEN": {},
"ARCEEAI_API_KEY": {},
"CLAUDE_CODE_OAUTH_TOKEN": {},
"CODEX_AUTH_JSON": {},
"DASHSCOPE_API_KEY": {},
"DEEPSEEK_API_KEY": {},
"GEMINI_API_KEY": {},
@@ -67,14 +68,6 @@ func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool
return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged)
}
// platformManagedLLMMode is the legacy org-level gate retained for any test
// harness still asserting the env-var-only behavior. Production code paths
// must call platformManagedLLMModeForWorkspace instead so a workspace-level
// byok override actually takes effect on the secrets-write path.
func platformManagedLLMMode() bool {
return strings.EqualFold(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")), "platform_managed")
}
// rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace
// successor to rejectPlatformManagedDirectLLMBypass (internal#691). The
// strip-list ONLY applies when this specific workspace resolves to
@@ -91,22 +84,6 @@ func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceI
return true
}
// rejectPlatformManagedDirectLLMBypass is the legacy org-level shim. Retained
// only for backwards compatibility with any external/test caller still on the
// old shape; new code MUST use the per-workspace variant above. Production
// code paths (the secrets.go handlers + workspace.go create-secret path) all
// switched in internal#691.
func rejectPlatformManagedDirectLLMBypass(c *gin.Context, key string) bool {
if !platformManagedLLMMode() || !isPlatformManagedDirectLLMBypassKey(key) {
return false
}
c.JSON(http.StatusBadRequest, gin.H{
"error": "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead",
"key": key,
})
return true
}
type SecretsHandler struct {
restartFunc func(workspaceID string) // Optional: auto-restart after secret change
}
@@ -309,6 +286,16 @@ func (h *SecretsHandler) Values(c *gin.Context) {
return
}
// molecule-core#1994 (corrected model): the remote-pull bundle is the
// TENANT's own merged secrets (global_secrets + workspace_secrets, the
// latter winning on collision). `global_secrets` is the tenant's store, not
// the platform's, so a byok workspace's pull MUST include the tenant's own
// global-scope LLM credential — that is exactly what it runs on, direct.
// The earlier internal#711 byok strip here rested on the inverted "global =
// platform's own" premise and is removed; the platform's own proxy token is
// never in a tenant's global_secrets (it lives in server env only and is
// injected separately on the platform_managed provision path), so there is
// nothing platform-owned to withhold on this path.
c.JSON(http.StatusOK, out)
}
@@ -476,9 +463,15 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if rejectPlatformManagedDirectLLMBypass(c, body.Key) {
return
}
// internal#718: the org-level LLM billing rung was retired — billing is
// resolved per-workspace, not per-org. A global secret is the tenant's OWN
// shared credential; the provision-time provider-matched strip
// (workspace_provision) removes any global cred a given workspace's resolved
// provider does not accept, so a platform-managed workspace can never USE a
// non-matching global vendor/oauth key. The legacy org-env SetGlobal gate
// (keyed off the retired MOLECULE_LLM_BILLING_MODE) is therefore removed;
// per-workspace writes still enforce the strip-list via
// rejectPlatformManagedDirectLLMBypassForWorkspace.
encrypted, err := crypto.Encrypt([]byte(body.Value))
if err != nil {
@@ -739,121 +732,19 @@ func (h *SecretsHandler) SetModel(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "saved", "model": body.Model})
}
// GetProvider handles GET /workspaces/:id/provider
// Returns the explicit LLM provider override stored as the LLM_PROVIDER
// workspace secret. Mirror of GetModel — same shape, same response keys
// (provider/source) to keep canvas wiring symmetric.
// internal#718 P4 closure: GetProvider, SetProvider, and the shared
// setProviderSecret helper were retired together with the
// LLM_PROVIDER workspace_secret. The provider is now DERIVED at every
// decision point from (runtime, model) via the registry
// (internal/providers.Manifest.DeriveProvider), so storing it is
// pure write-ghost — no consumer remains.
//
// Why a sibling endpoint rather than overloading PUT /model: the new
// `provider` field (Option B, PR #2441) is orthogonal to the model
// slug. A user might keep the same model alias and switch providers
// (e.g., route the same alias through a different gateway), or keep
// the same provider and switch models. Co-storing them under one
// endpoint forces a single Save+Restart round-trip per change; two
// endpoints let the canvas update each independently.
func (h *SecretsHandler) GetProvider(c *gin.Context) {
workspaceID := c.Param("id")
ctx := c.Request.Context()
var bytesVal []byte
var version int
err := db.DB.QueryRowContext(ctx,
`SELECT encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
workspaceID).Scan(&bytesVal, &version)
if err == sql.ErrNoRows {
c.JSON(http.StatusOK, gin.H{"provider": "", "source": "default"})
return
}
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
return
}
decrypted, err := crypto.DecryptVersioned(bytesVal, version)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to decrypt"})
return
}
c.JSON(http.StatusOK, gin.H{"provider": string(decrypted), "source": "workspace_secrets"})
}
// setProviderSecret writes (or clears, when value=="") the LLM_PROVIDER
// workspace secret. Extracted from SetProvider so non-handler call sites
// (notably WorkspaceHandler.Create — first-deploy path that derives
// LLM_PROVIDER from the canvas-selected model slug so CP user-data picks
// it up as a YAML field in /configs/config.yaml AND it survives across
// restarts when CP regenerates the config) can reuse the encryption +
// upsert logic without inlining the SQL.
// Route registrations in internal/router/router.go now point both
// GET and PUT /workspaces/:id/provider at providerEndpointGone, which
// returns 410 Gone with a structured body so older canvases that
// still call PUT /provider on Save surface a loud failure rather
// than silently writing a vanished row.
//
// Returns nil on success. Caller is responsible for any restart trigger;
// the gin handler re-adds that after a successful write.
func setProviderSecret(ctx context.Context, workspaceID, provider string) error {
if provider == "" {
_, err := db.DB.ExecContext(ctx,
`DELETE FROM workspace_secrets WHERE workspace_id = $1 AND key = 'LLM_PROVIDER'`,
workspaceID)
return err
}
encrypted, err := crypto.Encrypt([]byte(provider))
if err != nil {
return err
}
version := crypto.CurrentEncryptionVersion()
_, err = db.DB.ExecContext(ctx, `
INSERT INTO workspace_secrets (workspace_id, key, encrypted_value, encryption_version)
VALUES ($1, 'LLM_PROVIDER', $2, $3)
ON CONFLICT (workspace_id, key) DO UPDATE
SET encrypted_value = $2, encryption_version = $3, updated_at = now()
`, workspaceID, encrypted, version)
return err
}
// SetProvider handles PUT /workspaces/:id/provider — writes the provider
// slug into workspace_secrets as LLM_PROVIDER. Empty string clears the
// override. Triggers auto-restart so the new env is in effect on the
// next boot — without this the canvas Save+Restart can race the
// already-restarting container and miss the window.
//
// CP user-data (controlplane PR #364) reads LLM_PROVIDER from env and
// writes it into /configs/config.yaml at boot, so the choice survives
// restart. Without that PR this endpoint still works but the value is
// only sticky when the workspace_secrets row is read on every restart
// (the secret-load path) — slower failure mode, same eventual behavior.
func (h *SecretsHandler) SetProvider(c *gin.Context) {
workspaceID := c.Param("id")
if !uuidRegex.MatchString(workspaceID) {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid workspace ID"})
return
}
ctx := c.Request.Context()
var body struct {
Provider string `json:"provider"`
}
if err := c.ShouldBindJSON(&body); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
return
}
if err := setProviderSecret(ctx, workspaceID, body.Provider); err != nil {
log.Printf("SetProvider error: %v", err)
if body.Provider == "" {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to clear provider"})
} else {
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save provider"})
}
return
}
if h.restartFunc != nil {
// RFC internal#524 Layer 1: globalGoAsync (see Set()).
wsID := workspaceID
globalGoAsync(func() { h.restartFunc(wsID) })
}
if body.Provider == "" {
c.JSON(http.StatusOK, gin.H{"status": "cleared"})
return
}
c.JSON(http.StatusOK, gin.H{"status": "saved", "provider": body.Provider})
}
// Migration 20260528000000_drop_llm_provider_workspace_secret.up.sql
// removes any straggler rows in workspace_secrets (key='LLM_PROVIDER')
// so the table is in the same state as a freshly-provisioned tenant.
+141 -144
View File
@@ -682,151 +682,16 @@ func TestSecretsModel_RoundTrip_KeyIsMODELNotMODEL_PROVIDER(t *testing.T) {
}
}
// ==================== GetProvider / SetProvider (Option B PR-2) ====================
// ==================== GetProvider / SetProvider — RETIRED ====================
//
// Mirror of the GetModel/SetModel suite. Same secret-storage shape (key=
// 'LLM_PROVIDER' instead of 'MODEL_PROVIDER'), same restart-trigger
// contract, same UUID validation gate. We pin the contract symmetrically
// so a future refactor that breaks one without the other shows up in CI.
func TestSecretsGetProvider_Default(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewSecretsHandler(nil)
mock.ExpectQuery("SELECT encrypted_value, encryption_version FROM workspace_secrets").
WithArgs("ws-prov").
WillReturnError(sql.ErrNoRows)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-prov"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-prov/provider", nil)
handler.GetProvider(c)
if w.Code != http.StatusOK {
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("failed to parse response: %v", err)
}
if resp["provider"] != "" {
t.Errorf("expected empty provider, got %v", resp["provider"])
}
if resp["source"] != "default" {
t.Errorf("expected source 'default', got %v", resp["source"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsGetProvider_DBError(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewSecretsHandler(nil)
mock.ExpectQuery("SELECT encrypted_value, encryption_version FROM workspace_secrets").
WithArgs("ws-prov-err").
WillReturnError(sql.ErrConnDone)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "ws-prov-err"}}
c.Request = httptest.NewRequest("GET", "/workspaces/ws-prov-err/provider", nil)
handler.GetProvider(c)
if w.Code != http.StatusInternalServerError {
t.Errorf("expected status 500, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsSetProvider_Upsert(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
restartCalled := make(chan string, 1)
handler := NewSecretsHandler(func(id string) { restartCalled <- id })
mock.ExpectExec(`INSERT INTO workspace_secrets`).
WithArgs("00000000-0000-0000-0000-000000000003", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(1, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000003"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/provider",
strings.NewReader(`{"provider":"minimax"}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetProvider(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
select {
case id := <-restartCalled:
if id != "00000000-0000-0000-0000-000000000003" {
t.Errorf("restart called with wrong id: %s", id)
}
case <-time.After(500 * time.Millisecond):
t.Error("restart was not triggered")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsSetProvider_EmptyClears(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
handler := NewSecretsHandler(func(string) {})
mock.ExpectExec(`DELETE FROM workspace_secrets`).
WithArgs("00000000-0000-0000-0000-000000000004").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000004"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000004/provider",
strings.NewReader(`{"provider":""}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetProvider(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsSetProvider_InvalidID(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
handler := NewSecretsHandler(nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Params = gin.Params{{Key: "id", Value: "not-a-uuid"}}
c.Request = httptest.NewRequest("PUT", "/workspaces/not-a-uuid/provider",
strings.NewReader(`{"provider":"x"}`))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetProvider(c)
if w.Code != http.StatusBadRequest {
t.Errorf("expected 400 for bad UUID, got %d", w.Code)
}
}
// internal#718 P4 closure: the GetProvider/SetProvider suite covered the
// LLM_PROVIDER workspace_secret round-trip. Both handlers and the
// shared setProviderSecret helper were removed when the secret itself
// was retired. The replacement endpoint behavior (410 Gone with a
// structured body) is covered by
// `llm_provider_removal_p4_test.go::TestPutProvider_410Gone`,
// `TestGetProvider_410Gone`, and
// `TestProviderEndpointGone_BodyShape`.
// ==================== Values — Phase 30.2 decrypted pull ====================
@@ -865,6 +730,12 @@ func TestSecretsValues_LegacyWorkspaceGrandfathered(t *testing.T) {
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("WS_KEY", []byte("ws_plainvalue"), 0))
// internal#711: Values now resolves billing mode to gate the global LLM-cred
// merge. Neither key here is a platform-managed LLM bypass key, so the mode
// is immaterial to the assertions — but the resolver query must be mocked.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged))
w := httptest.NewRecorder()
c := secretsValuesRequest(w, "") // no auth — grandfathered
@@ -942,6 +813,12 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) {
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("ONLY_WS", []byte("ws_val"), 0).
AddRow("SHARED_KEY", []byte("ws_wins"), 0))
// internal#711: billing-mode resolver query. None of these keys is a
// platform-managed LLM bypass key, so the resolved mode does not affect the
// merge assertions; platform_managed keeps the existing pass-through.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged))
w := httptest.NewRecorder()
c := secretsValuesRequest(w, "Bearer good-token")
@@ -963,6 +840,71 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) {
}
}
// TestSecretsValues_ByokServesTenantGlobalLLMCred is the molecule-core#1994
// (corrected-model) regression guard for the remote-pull path. `global_secrets`
// is the TENANT's store, so a byok workspace's pull MUST include the tenant's
// own global-scope LLM credential — that is exactly what byok runs on, direct.
//
// Pre-fix (internal#711) this path STRIPPED the global-origin oauth on byok,
// resting on the inverted premise that a global LLM cred was "the platform's
// own"; that killed legitimate byok workspaces whose oauth lived at global
// scope. The strip is removed: the merged bundle (tenant globals + workspace
// overrides) is served verbatim.
//
// Mutation: re-add the byok global-LLM-cred strip in secrets.go Values() →
// CLAUDE_CODE_OAUTH_TOKEN disappears from the body → this test RED.
func TestSecretsValues_ByokServesTenantGlobalLLMCred(t *testing.T) {
mock := setupTestDB(t)
handler := NewSecretsHandler(nil)
mock.ExpectQuery(`SELECT COUNT\(\*\) FROM workspace_auth_tokens`).
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
mock.ExpectQuery(`SELECT t\.id, t\.workspace_id.*FROM workspace_auth_tokens t.*JOIN workspaces`).
WithArgs(sqlmock.AnyArg()).
WillReturnRows(sqlmock.NewRows([]string{"id", "workspace_id"}).AddRow("tok-1", testWsID))
mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`).
WithArgs("tok-1").
WillReturnResult(sqlmock.NewResult(0, 1))
// global_secrets holds the TENANT's own global-scope OAuth token (shared
// across all the tenant's workspaces) + a non-LLM global.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0).
AddRow("SENTRY_DSN", []byte("https://sentry.example/123"), 0))
// This workspace set no LLM secret of its own — it relies on the tenant
// global-scope oauth.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id`).
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0))
w := httptest.NewRecorder()
c := secretsValuesRequest(w, "Bearer good-token")
handler.Values(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
}
var body map[string]string
_ = json.Unmarshal(w.Body.Bytes(), &body)
// 1. The tenant's own global-scope OAuth token SURVIVES — byok runs on it.
if body["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token served for byok pull", body["CLAUDE_CODE_OAUTH_TOKEN"])
}
// 2. The workspace's own non-LLM secret survives.
if body["MODEL"] != "opus" {
t.Fatalf("MODEL = %q, want opus preserved", body["MODEL"])
}
// 3. Unrelated non-LLM global secrets are untouched.
if body["SENTRY_DSN"] != "https://sentry.example/123" {
t.Fatalf("SENTRY_DSN = %q, want non-LLM globals untouched", body["SENTRY_DSN"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
func TestSecretsValues_InvalidWorkspaceID(t *testing.T) {
setupTestDB(t)
handler := NewSecretsHandler(nil)
@@ -1037,6 +979,61 @@ func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
}
}
// TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv pins the
// internal#718 correction: the org-level LLM billing rung is RETIRED (billing
// is resolved per-workspace, not per-org). A global secret is the tenant's OWN
// shared credential and is always writable at global scope; the provision-time
// provider-matched strip (workspace_provision) keeps any platform-managed
// workspace from USING a non-matching global cred, and per-workspace secret
// writes still enforce the strip-list via the per-workspace guard. So even with
// the legacy MOLECULE_LLM_BILLING_MODE env still set to platform_managed, a
// global vendor/oauth key write MUST SUCCEED (200) and persist — the retired
// org rung no longer gates it.
//
// Mutation: re-add the org-level rejectPlatformManagedDirectLLMBypass guard to
// SetGlobal → the write 400s before the INSERT → this test RED.
func TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
restarted := make(chan string, 2)
handler := NewSecretsHandler(func(id string) { restarted <- id })
// Legacy org env still platform_managed — it must no longer gate the write.
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
mock.ExpectExec("INSERT INTO global_secrets").
WithArgs("CLAUDE_CODE_OAUTH_TOKEN", sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectQuery("SELECT id FROM workspaces").
WithArgs("CLAUDE_CODE_OAUTH_TOKEN").
WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-a"))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-tenant-own"}`
c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetGlobal(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 (global write allowed; org rung retired), got %d: %s", w.Code, w.Body.String())
}
// Wait on the async restart fan-out so its SELECT drains before db swap.
select {
case id := <-restarted:
if id != "ws-a" {
t.Errorf("expected ws-a restarted, got %s", id)
}
case <-time.After(2 * time.Second):
t.Fatal("auto-restart not fired for affected workspace")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15.
func TestDeleteGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
mock := setupTestDB(t)
@@ -95,6 +95,38 @@ type modelSpec struct {
Name string `json:"name,omitempty" yaml:"name"`
Provider string `json:"provider,omitempty" yaml:"provider"`
RequiredEnv []string `json:"required_env,omitempty" yaml:"required_env"`
// BillingMode is the billing source the DERIVED provider implies:
// "platform_managed" (the closed core-only platform provider; Molecule
// owns the upstream key + the bill) or "byok" (any other provider; the
// tenant supplies its own key). Set ONLY on registry-served models
// (RegistryModels) where DeriveProvider resolved an owning provider;
// empty on template-served models. internal#718 P3 — the canvas reads
// this to show the billing-mode of the DERIVED provider instead of its
// hardcoded billingModeForProvider rule.
BillingMode string `json:"billing_mode,omitempty" yaml:"-"`
}
// registryProviderView is the canvas-facing projection of a single registry
// Provider entry for a registry-known runtime: the stable name, the dropdown
// display label, the auth-env-var NAMES (never values), and the billing mode
// the provider implies. Sourced from the provider registry
// (internal/providers) so the canvas drops its hardcoded VENDOR_LABELS map
// and billingModeForProvider rule (internal#718 P3, retire-list #4/#5).
type registryProviderView struct {
// Name is the registry provider key (e.g. "anthropic-oauth", "platform").
Name string `json:"name"`
// DisplayName is the canvas dropdown label (registry Provider.DisplayName).
DisplayName string `json:"display_name,omitempty"`
// AuthEnv is the env-var NAMES any one of which satisfies auth for this
// provider (registry Provider.AuthEnv). Names only, never secret values.
AuthEnv []string `json:"auth_env,omitempty"`
// BillingMode is "platform_managed" for the closed platform provider,
// "byok" otherwise — keyed off the registry IsPlatform predicate so the
// canvas shows the DERIVED provider's billing source.
BillingMode string `json:"billing_mode,omitempty"`
// Deprecated mirrors the registry's deprecated flag so the canvas can
// grey the provider out without breaking saved configs.
Deprecated bool `json:"deprecated,omitempty"`
}
// providerRegistryEntry mirrors a row from a template's top-level
@@ -162,8 +194,29 @@ type templateSummary struct {
// (omitempty); the canvas's existing per-model fallback continues
// to work for them.
ProviderRegistry []providerRegistryEntry `json:"provider_registry,omitempty"`
Skills []string `json:"skills"`
SkillCount int `json:"skill_count"`
// RegistryBacked is true when this template's runtime is known to the
// provider registry (internal/providers runtimes: block) and the
// RegistryProviders / RegistryModels fields below were populated from it.
// The canvas treats a registry-backed payload as AUTHORITATIVE for the
// selectable provider+model list (it drops its prefix-inference fallback)
// — "only registered selectable" follows because the canvas can render
// no option the registry did not serve. False = the runtime is not in the
// registry (federation / external / mock); the canvas keeps using the
// template-served Models/Providers + its heuristic. internal#718 P3.
RegistryBacked bool `json:"registry_backed,omitempty"`
// RegistryProviders is the runtime's NATIVE provider set from the
// registry (ProvidersForRuntime), each with its display label, auth-env
// names, and billing mode. Empty when !RegistryBacked. This is the SSOT
// the canvas Provider dropdown consumes instead of VENDOR_LABELS.
RegistryProviders []registryProviderView `json:"registry_providers,omitempty"`
// RegistryModels is the runtime's NATIVE model set from the registry
// (ModelsForRuntime), each annotated with its DERIVED provider and the
// billing mode that provider implies. Empty when !RegistryBacked. This is
// the SSOT the canvas Model dropdown consumes — a template can no longer
// surface a model the registry does not list for the runtime.
RegistryModels []modelSpec `json:"registry_models,omitempty"`
Skills []string `json:"skills"`
SkillCount int `json:"skill_count"`
// ProvisionTimeoutSeconds lets a slow runtime declare its expected
// cold-boot duration in its template manifest. Canvas's
// ProvisioningTimeout banner respects this per-workspace via the
@@ -171,6 +224,15 @@ type templateSummary struct {
// 0 = template hasn't declared one, falls through to canvas's
// runtime-profile default.
ProvisionTimeoutSeconds int `json:"provision_timeout_seconds,omitempty"`
// Displayable lets a template opt OUT of the canvas runtime picker
// declaratively (config.yaml `displayable: false`) while still being a
// provisionable runtime. nil/absent or true → shown; only an explicit
// false hides it. The canvas runtime dropdown is SSOT-driven off this
// list (no hardcoded frontend allowlist), so this is the single place a
// runtime is hidden from the picker. Pointer so "unset" is distinct from
// "false" and omitempty keeps the payload unchanged for existing
// templates that never declare it.
Displayable *bool `json:"displayable,omitempty"`
}
// resolveTemplateDir finds the template directory for a workspace on the host.
@@ -217,6 +279,7 @@ func (h *TemplatesHandler) List(c *gin.Context) {
Runtime string `yaml:"runtime"`
Model string `yaml:"model"`
Skills []string `yaml:"skills"`
Displayable *bool `yaml:"displayable"`
// Top-level `providers:` block — structured registry. Distinct
// from runtime_config.providers (slug list) below. Both shapes
// coexist in production: claude-code ships the structured
@@ -243,9 +306,13 @@ func (h *TemplatesHandler) List(c *gin.Context) {
log.Printf("templates list: skip %s: yaml.Unmarshal: %v", id, err)
return
}
// normalizedRuntime strips the "-default" vanilla-variant suffix
// (claude-code-default → claude-code). Hoisted out of the
// known-runtime guard so the registry enrichment below can key off
// the same normalised name the guard validated.
normalizedRuntime := strings.TrimSuffix(strings.TrimSpace(raw.Runtime), "-default")
if raw.Runtime != "" {
runtime := strings.TrimSuffix(strings.TrimSpace(raw.Runtime), "-default")
if _, ok := knownRuntimes[runtime]; !ok {
if _, ok := knownRuntimes[normalizedRuntime]; !ok {
log.Printf("templates list: skip %s: unsupported runtime %q", id, raw.Runtime)
return
}
@@ -262,7 +329,7 @@ func (h *TemplatesHandler) List(c *gin.Context) {
tier = h.wh.DefaultTier()
}
templates = append(templates, templateSummary{
summary := templateSummary{
ID: id,
Name: raw.Name,
Description: raw.Description,
@@ -277,7 +344,18 @@ func (h *TemplatesHandler) List(c *gin.Context) {
Skills: raw.Skills,
SkillCount: len(raw.Skills),
ProvisionTimeoutSeconds: raw.RuntimeConfig.ProvisionTimeoutSeconds,
})
Displayable: raw.Displayable,
}
// internal#718 P3: serve the SELECTABLE provider/model list from
// the provider registry for a registry-known runtime. Additive —
// the template-served Models/Providers above stay for non-registry
// runtimes + older canvases; this adds the authoritative
// registry_backed/registry_providers/registry_models block the
// current canvas prefers. Fail-open for unknown runtimes.
enrichFromRegistry(&summary, normalizedRuntime)
templates = append(templates, summary)
})
}
walk(h.cacheDir)
@@ -0,0 +1,112 @@
package handlers
// templates_registry.go — internal#718 P3: serve the GET /templates selectable
// provider/model list FROM the provider registry (workspace-server/internal/
// providers) instead of from each template's hand-authored config.yaml
// `providers:` / `runtime_config.models` block.
//
// The registry (P2-A synced copy of the canonical CP providers.yaml) is the
// SSOT for "which providers + models does runtime R natively support" and
// "which derived provider owns model M" (DeriveProvider) and "is that provider
// the closed platform set" (IsPlatform). This file projects that into the
// templates payload's registry_backed / registry_providers / registry_models
// fields so the canvas can drop its hardcoded VENDOR_LABELS /
// billingModeForProvider vocabularies (retire-list #4/#5) and physically can't
// render an option the registry didn't serve.
//
// Federation-ready, fail-OPEN: a runtime ABSENT from the registry's runtimes:
// block (external / mock / kimi / a future third-party runtime) yields
// RegistryBacked=false and an empty registry block — the template's own fields
// stay authoritative. No behavior change for non-registry runtimes.
//
// NOTE: this reuses the package-level providerRegistry() accessor +
// LLMBillingModePlatformManaged / LLMBillingModeBYOK constants from
// llm_billing_mode.go (added by P2-B, internal#718 #1972, now on main) — both
// the billing-derivation and this templates projection wrap the same
// providers.LoadManifest() SSOT and the same platform_managed/byok wire
// strings, so there is one accessor + one constant set for the package.
import (
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
)
// billingModeForRegistryProvider maps a registry Provider to the billing mode
// it implies: platform_managed for the closed core-only platform provider,
// byok for everything else. Keyed off the registry IsPlatform predicate —
// the same one billing/credential emission (llm_billing_mode.go) keys off the
// DERIVED provider — so the canvas shows the true billing source of the
// resolved provider. Returns the same LLMBillingMode* wire strings the Config
// tab's billing-mode switch sends.
func billingModeForRegistryProvider(p providers.Provider) string {
if p.IsPlatform() {
return LLMBillingModePlatformManaged
}
return LLMBillingModeBYOK
}
// enrichFromRegistry populates the registry-served fields on a templateSummary
// when its runtime is known to the provider registry. It is a no-op (leaves
// RegistryBacked=false and the registry slices nil) for a runtime the registry
// does not know — the federation/fail-open path.
//
// runtime is the template's already-normalised runtime string (the caller
// strips the "-default" suffix before calling, matching List's existing
// knownRuntimes check).
func enrichFromRegistry(summary *templateSummary, runtime string) {
m, err := providerRegistry()
if err != nil || m == nil {
return // fail open — registry load defect; keep template-served fields.
}
provs, err := m.ProvidersForRuntime(runtime)
if err != nil {
// Runtime not in the registry runtimes: block (external / mock / kimi
// / future third-party). Fail open: the template's own fields stay
// authoritative; no registry annotation.
return
}
// registry_providers — the runtime's native provider set, in registry
// declared order, projected to the canvas-facing view.
views := make([]registryProviderView, 0, len(provs))
for _, p := range provs {
views = append(views, registryProviderView{
Name: p.Name,
DisplayName: p.DisplayName,
AuthEnv: p.AuthEnv,
BillingMode: billingModeForRegistryProvider(p),
Deprecated: p.Deprecated,
})
}
// registry_models — the runtime's native model ids, each annotated with
// the DERIVED owning provider + the billing mode it implies. DeriveProvider
// is the SSOT for model→provider; we pass nil availableAuthEnv because a
// template manifest has no per-workspace auth env, and the registry's
// exact-id mapping resolves every native model id unambiguously (the
// claude-code kimi split is by exact id, not a shared prefix).
models, err := m.ModelsForRuntime(runtime)
if err != nil {
// ProvidersForRuntime succeeded but ModelsForRuntime did not — should
// be impossible (both gate on the same Runtimes entry), but fail open
// rather than serve a half-populated block.
return
}
regModels := make([]modelSpec, 0, len(models))
for _, id := range models {
ms := modelSpec{ID: id}
if derived, derr := m.DeriveProvider(runtime, id, nil); derr == nil {
ms.Provider = derived.Name
ms.BillingMode = billingModeForRegistryProvider(derived)
}
// If DeriveProvider errors (ambiguous/overlap — a manifest defect the
// loader's tests pin against), still serve the id without a provider
// annotation rather than dropping it; the canvas treats an
// un-annotated registry model as selectable-but-unlabelled.
regModels = append(regModels, ms)
}
summary.RegistryBacked = true
summary.RegistryProviders = views
summary.RegistryModels = regModels
}
@@ -1329,3 +1329,311 @@ func TestCWE78_DeleteFile_TraversalVariants(t *testing.T) {
})
}
}
// ============================================================================
// internal#718 P3 — GET /templates serves the selectable provider/model list
// FROM the provider registry (workspace-server/internal/providers), not from
// each template's hand-authored config.yaml. Additive: the registry-served
// fields (registry_backed / registry_providers / registry_models) ride
// ALONGSIDE the existing template-served fields so non-registry runtimes and
// older canvases keep working. The canvas (PR-B) prefers the registry block;
// "only registered selectable" follows because the registry block is the
// authoritative list for a registry-known runtime.
// ============================================================================
// TestTemplatesList_RegistryServesSelectableModels pins the core P3 contract:
// for a runtime the provider registry knows (claude-code), /templates serves
// the registry's NATIVE model ids — regardless of what the template's
// config.yaml runtime_config.models happens to list. A template author can no
// longer surface an unregistered model into the canvas dropdown.
func TestTemplatesList_RegistryServesSelectableModels(t *testing.T) {
tmpDir := t.TempDir()
tmplDir := filepath.Join(tmpDir, "claude-code-default")
if err := os.MkdirAll(tmplDir, 0755); err != nil {
t.Fatalf("mkdir: %v", err)
}
// Deliberately list a BOGUS model the registry does not know. The
// registry-served list must NOT contain it.
configYaml := `name: Claude Code
runtime: claude-code
runtime_config:
model: claude-sonnet-4-6
models:
- id: totally-made-up-model
name: Not In Registry
skills: []
`
if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil {
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
handler.List(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp []templateSummary
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse: %v", err)
}
if len(resp) != 1 {
t.Fatalf("expected 1 template, got %d", len(resp))
}
got := resp[0]
if !got.RegistryBacked {
t.Fatalf("claude-code is a registry-known runtime; RegistryBacked must be true")
}
// The registry-served model set must be the claude-code native set
// (anthropic-oauth: sonnet/opus/haiku, anthropic-api: claude-*-4-*,
// kimi-coding: kimi-*, minimax: MiniMax-*, platform: vendor/model ids).
// It must NOT contain the template's bogus id.
regModelIDs := map[string]bool{}
for _, m := range got.RegistryModels {
regModelIDs[m.ID] = true
}
if regModelIDs["totally-made-up-model"] {
t.Errorf("RegistryModels leaked the template's unregistered model id")
}
for _, want := range []string{"sonnet", "opus", "claude-opus-4-7", "anthropic/claude-opus-4-7"} {
if !regModelIDs[want] {
t.Errorf("RegistryModels missing native model %q; got %v", want, regModelIDs)
}
}
}
// TestTemplatesList_RegistryAnnotatesDerivedProviderAndBilling pins that each
// registry-served model carries its DERIVED provider name + a billing_mode
// reflecting whether that derived provider is the closed platform set
// (platform_managed) or BYOK (byok). This is what the canvas Config tab reads
// to show the billing-mode of the DERIVED provider (folds in #1931 intent),
// instead of its hardcoded billingModeForProvider rule.
func TestTemplatesList_RegistryAnnotatesDerivedProviderAndBilling(t *testing.T) {
tmpDir := t.TempDir()
tmplDir := filepath.Join(tmpDir, "claude-code-default")
if err := os.MkdirAll(tmplDir, 0755); err != nil {
t.Fatalf("mkdir: %v", err)
}
configYaml := `name: Claude Code
runtime: claude-code
runtime_config:
model: claude-sonnet-4-6
skills: []
`
if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil {
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
handler.List(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp []templateSummary
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse: %v", err)
}
got := resp[0]
billByModel := map[string]string{}
provByModel := map[string]string{}
for _, m := range got.RegistryModels {
billByModel[m.ID] = m.BillingMode
provByModel[m.ID] = m.Provider
}
// A BYOK API model derives to anthropic-api → byok.
if provByModel["claude-opus-4-7"] != "anthropic-api" {
t.Errorf("claude-opus-4-7 derived provider: want anthropic-api, got %q", provByModel["claude-opus-4-7"])
}
if billByModel["claude-opus-4-7"] != "byok" {
t.Errorf("claude-opus-4-7 billing_mode: want byok, got %q", billByModel["claude-opus-4-7"])
}
// A platform-namespaced model derives to the closed platform provider →
// platform_managed.
if provByModel["anthropic/claude-opus-4-7"] != "platform" {
t.Errorf("anthropic/claude-opus-4-7 derived provider: want platform, got %q", provByModel["anthropic/claude-opus-4-7"])
}
if billByModel["anthropic/claude-opus-4-7"] != "platform_managed" {
t.Errorf("anthropic/claude-opus-4-7 billing_mode: want platform_managed, got %q", billByModel["anthropic/claude-opus-4-7"])
}
// registry_providers carries the provider display_name + auth_env +
// billing_mode for the dropdown labels — sourced from the registry, not
// the canvas VENDOR_LABELS map.
byName := map[string]registryProviderView{}
for _, p := range got.RegistryProviders {
byName[p.Name] = p
}
oauth, ok := byName["anthropic-oauth"]
if !ok {
t.Fatalf("registry_providers missing anthropic-oauth; got %v", byName)
}
if oauth.DisplayName != "Claude Code subscription" {
t.Errorf("anthropic-oauth display_name: want %q, got %q", "Claude Code subscription", oauth.DisplayName)
}
if oauth.BillingMode != "byok" {
t.Errorf("anthropic-oauth billing_mode: want byok, got %q", oauth.BillingMode)
}
if len(oauth.AuthEnv) != 1 || oauth.AuthEnv[0] != "CLAUDE_CODE_OAUTH_TOKEN" {
t.Errorf("anthropic-oauth auth_env: want [CLAUDE_CODE_OAUTH_TOKEN], got %v", oauth.AuthEnv)
}
plat, ok := byName["platform"]
if !ok || plat.BillingMode != "platform_managed" {
t.Errorf("platform provider billing_mode: want platform_managed, got %+v", plat)
}
}
// TestTemplatesList_NonRegistryRuntimeFallsOpenToTemplate pins federation-
// readiness: for a runtime the registry does NOT know (a hypothetical
// third-party / external-like runtime), /templates does NOT set
// RegistryBacked and does NOT synthesize a registry block — the template's
// own config.yaml fields remain the source, unchanged. No behavior change for
// non-registry runtimes.
func TestTemplatesList_NonRegistryRuntimeFallsOpenToTemplate(t *testing.T) {
tmpDir := t.TempDir()
tmplDir := filepath.Join(tmpDir, "byo-runtime")
if err := os.MkdirAll(tmplDir, 0755); err != nil {
t.Fatalf("mkdir: %v", err)
}
// "mock" is a known runtime to the manifest allowlist (so List doesn't
// skip it) but is NOT in the provider registry's runtimes: block.
configYaml := `name: Mock Runtime
runtime: mock
runtime_config:
model: canned-reply
providers: [some-byo-provider]
models:
- id: canned-reply
name: Canned Reply
skills: []
`
if err := os.WriteFile(filepath.Join(tmplDir, "config.yaml"), []byte(configYaml), 0644); err != nil {
t.Fatalf("write: %v", err)
}
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
handler.List(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp []templateSummary
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse: %v", err)
}
if len(resp) != 1 {
t.Fatalf("expected 1 template, got %d", len(resp))
}
got := resp[0]
if got.RegistryBacked {
t.Errorf("mock is NOT a registry runtime; RegistryBacked must be false")
}
if len(got.RegistryModels) != 0 || len(got.RegistryProviders) != 0 {
t.Errorf("non-registry runtime must not synthesize a registry block; got models=%v providers=%v",
got.RegistryModels, got.RegistryProviders)
}
// Template-served fields untouched.
if len(got.Models) != 1 || got.Models[0].ID != "canned-reply" {
t.Errorf("template Models unchanged: got %+v", got.Models)
}
if !reflect.DeepEqual(got.Providers, []string{"some-byo-provider"}) {
t.Errorf("template Providers unchanged: got %v", got.Providers)
}
}
// TestTemplatesList_DisplayableFlag verifies the SSOT-driven runtime-picker
// opt-out: a template's config.yaml `displayable: false` surfaces as a
// non-nil false on the /templates row (canvas hides it), while an absent
// flag stays nil (canvas shows it) and an explicit true surfaces as true.
// This is the backend half of removing the hardcoded frontend allowlist —
// the picker trusts this list, so hiding a runtime must be declarative here.
func TestTemplatesList_DisplayableFlag(t *testing.T) {
setupTestDB(t)
setupTestRedis(t)
tmpDir := t.TempDir()
mk := func(dir, yaml string) {
d := filepath.Join(tmpDir, dir)
if err := os.MkdirAll(d, 0755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(d, "config.yaml"), []byte(yaml), 0644); err != nil {
t.Fatal(err)
}
}
// absent → nil
mk("adk-shown", "name: ADK Shown\nruntime: claude-code\n")
// explicit false → hidden marker
mk("adk-hidden", "name: ADK Hidden\nruntime: claude-code\ndisplayable: false\n")
// explicit true → shown marker
mk("adk-explicit", "name: ADK Explicit\nruntime: claude-code\ndisplayable: true\n")
handler := NewTemplatesHandler(tmpDir, nil, nil)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
c.Request = httptest.NewRequest("GET", "/templates", nil)
handler.List(c)
if w.Code != http.StatusOK {
t.Fatalf("expected 200, got %d", w.Code)
}
var resp []templateSummary
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("parse: %v", err)
}
byID := map[string]templateSummary{}
for _, s := range resp {
byID[s.ID] = s
}
if s, ok := byID["adk-shown"]; !ok {
t.Fatal("adk-shown missing")
} else if s.Displayable != nil {
t.Errorf("adk-shown: expected nil Displayable (absent), got %v", *s.Displayable)
}
if s, ok := byID["adk-hidden"]; !ok {
t.Fatal("adk-hidden missing")
} else if s.Displayable == nil || *s.Displayable != false {
t.Errorf("adk-hidden: expected non-nil false Displayable, got %v", s.Displayable)
}
if s, ok := byID["adk-explicit"]; !ok {
t.Fatal("adk-explicit missing")
} else if s.Displayable == nil || *s.Displayable != true {
t.Errorf("adk-explicit: expected non-nil true Displayable, got %v", s.Displayable)
}
// JSON contract: omitempty drops the field entirely when nil so existing
// templates' payloads are byte-unchanged; present when set.
var rawRows []map[string]json.RawMessage
if err := json.Unmarshal(w.Body.Bytes(), &rawRows); err != nil {
t.Fatalf("raw parse: %v", err)
}
for _, row := range rawRows {
id := ""
_ = json.Unmarshal(row["id"], &id)
_, present := row["displayable"]
if id == "adk-shown" && present {
t.Error("adk-shown: displayable key should be omitted when nil")
}
if (id == "adk-hidden" || id == "adk-explicit") && !present {
t.Errorf("%s: displayable key should be present when set", id)
}
}
}
+76 -27
View File
@@ -428,6 +428,54 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
return
}
// internal#718 P4 PR-2: ONLY-REGISTERED validation at the create boundary —
// FLIPPED from WARN to HARD-REJECT (was the P2-B WARN-mode signal).
//
// For a runtime the provider registry knows (first-party:
// claude-code/codex/hermes/openclaw) this checks the (runtime, model) pair
// against the registry's native model set. Fails OPEN for runtimes the
// registry doesn't know (langgraph/external/kimi/mock/federated) so
// non-first-party / federated flows are UNCHANGED. Skipped for external
// workspaces (the URL is the contract, not the model — see MODEL_REQUIRED
// rationale above).
//
// THE FLIP (was WARN, now 422):
// * P2-B carried the gate in WARN mode (X-Molecule-Model-Unregistered
// response header + log line, create proceeds) because the legacy
// colon-namespaced BYOK vocabulary ('anthropic:claude-opus-4-7' etc.)
// was live across the create corpus but not yet in the registry's
// exact-id model sets — hard-rejecting would have 422'd legitimate
// existing flows.
// * P4 PR-1 reconciled that colon vocab into the registry as
// first-class native-set entries (each runtime native set now lists
// both bare/slash AND colon forms for the BYOK ids the live corpus
// uses; openclaw's pre-existing colon-form precedent extended to
// claude-code). DeriveProvider / Manifest.ModelsForRuntime now
// resolves every legitimate model in the corpus.
// * With the reconcile landed, an unregistered (runtime, model) pair
// is a real misconfiguration — the corpus has no legitimate model
// this validator now rejects. We flip to 422
// UNREGISTERED_MODEL_FOR_RUNTIME so the caller fails LOUDLY at the
// boundary instead of provisioning a workspace that will wedge at
// adapter init (the codex 'anthropic:claude-opus-4-7' wedge class
// the MODEL_REQUIRED gate also targets).
//
// The registry model set is code-generated from the canonical
// providers.yaml (P2-A artifact); the check stays in sync with the SSOT
// via the verify-providers-gen + sync-providers-yaml CI gates.
if !isExternal {
if ok, why := validateRegisteredModelForRuntime(payload.Runtime, payload.Model); !ok {
log.Printf("Create: 422 UNREGISTERED_MODEL_FOR_RUNTIME (runtime=%q model=%q): %s [internal#718 P4 PR-2 hard-reject]", payload.Runtime, payload.Model, why)
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": why,
"runtime": payload.Runtime,
"model": payload.Model,
"code": "UNREGISTERED_MODEL_FOR_RUNTIME",
})
return
}
}
ctx := c.Request.Context()
// Convert empty role to NULL
@@ -599,38 +647,39 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
return
}
// Persist canvas-selected model + derived provider as workspace
// secrets so they survive restart and are picked up by CP user-data
// when regenerating /configs/config.yaml. Without this, the
// applyRuntimeModelEnv fallback chain (workspace_provision.go)
// cannot recover the user's choice on a Restart payload (which
// rebuilds from the workspaces row, where there is no model column),
// and hermes silently boots with the template-default model. See
// failed-workspace 95ed3ff2 (2026-05-02): canvas POSTed
// minimax/MiniMax-M2.7-highspeed, MODEL_PROVIDER was never written,
// container fell through to nousresearch/hermes-4-70b, derive-
// provider.sh produced the wrong provider, hermes gateway 401'd,
// /health poll failed, molecule-runtime never registered.
// Persist canvas-selected model as the MODEL workspace_secret so it
// survives restart and is picked up by CP user-data when regenerating
// /configs/config.yaml. Without this, the applyRuntimeModelEnv
// fallback chain (workspace_provision.go) cannot recover the user's
// choice on a Restart payload (which rebuilds from the workspaces
// row, where there is no model column), and hermes silently boots
// with the template-default model. See failed-workspace 95ed3ff2
// (2026-05-02): canvas POSTed minimax/MiniMax-M2.7-highspeed,
// MODEL_PROVIDER was never written, container fell through to
// nousresearch/hermes-4-70b, derive-provider.sh produced the wrong
// provider, hermes gateway 401'd, /health poll failed,
// molecule-runtime never registered.
//
// Both writes are non-fatal: a failure here logs and continues so
// the workspace row stays consistent. The runtime can still boot
// (with the template default) and a later Save+Restart will re-
// persist via the SecretsHandler endpoints. The DB error path here
// is rare (the same DB just committed a workspace row a microsecond
// ago) so failing the create response would be unfriendly.
// internal#718 P4 closure: the prior `setProviderSecret` write
// (LLM_PROVIDER row, derived from the canvas-supplied
// payload.LLMProvider OR from deriveProviderFromModelSlug) has been
// REMOVED. The provider is now DERIVED at every decision point from
// (runtime, model) via the registry — billing (P2-B), CP user-data
// (this PR's CP-side commit replaces resolveModelAndProvider's
// env["LLM_PROVIDER"] read with a DeriveProvider call), and
// validation (P3 PR-C provisioner). Storing it is pure write-ghost
// with no remaining consumer. `payload.LLMProvider` is preserved on
// the request struct for backward-compatibility with older canvases
// that still send it; the value is intentionally ignored here.
//
// The setModelSecret write is non-fatal: a failure here logs and
// continues so the workspace row stays consistent. The runtime can
// still boot (with the template default) and a later
// Save+Restart will re-persist via the SecretsHandler endpoints.
if payload.Model != "" {
if err := setModelSecret(ctx, id, payload.Model); err != nil {
log.Printf("Create workspace %s: failed to persist MODEL_PROVIDER %q: %v (non-fatal)", id, payload.Model, err)
}
if explicitProvider := strings.TrimSpace(payload.LLMProvider); explicitProvider != "" {
if err := setProviderSecret(ctx, id, explicitProvider); err != nil {
log.Printf("Create workspace %s: failed to persist LLM_PROVIDER %q: %v (non-fatal)", id, explicitProvider, err)
}
} else if derived := deriveProviderFromModelSlug(payload.Model); derived != "" {
if err := setProviderSecret(ctx, id, derived); err != nil {
log.Printf("Create workspace %s: failed to persist LLM_PROVIDER %q: %v (non-fatal)", id, derived, err)
}
}
}
// Insert canvas layout — non-fatal: workspace can be dragged into position later
@@ -22,8 +22,8 @@ import (
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
@@ -259,11 +259,13 @@ func TestWorkspaceBudget_A2A_ExceededReturns402(t *testing.T) {
// Cache a URL so resolveAgentURL doesn't need a DB query after budget check
mr.Set(fmt.Sprintf("ws:%s:url", "ws-over-budget"), "http://localhost:9999")
// Budget check query: spend = limit → exceeded
mock.ExpectQuery("SELECT budget_limit, COALESCE").
// Budget check: monthly limit 500, monthly spend 500 → exceeded → 402
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-over-budget").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(500)))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-over-budget").
WillReturnRows(spendRows(0, 0, 0, 500))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -295,10 +297,12 @@ func TestWorkspaceBudget_A2A_AboveLimitReturns402(t *testing.T) {
mr.Set(fmt.Sprintf("ws:%s:url", "ws-way-over"), "http://localhost:9999")
// spend > limit
mock.ExpectQuery("SELECT budget_limit, COALESCE").
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-way-over").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(100), int64(9999)))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":100}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-way-over").
WillReturnRows(spendRows(0, 0, 0, 9999))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
@@ -334,11 +338,13 @@ func TestWorkspaceBudget_A2A_UnderLimitPassesThrough(t *testing.T) {
mr.Set(fmt.Sprintf("ws:%s:url", "ws-under-budget"), agentServer.URL)
// Budget check: spend (100) < limit (500) → pass-through
mock.ExpectQuery("SELECT budget_limit, COALESCE").
// Budget check: monthly spend (100) < limit (500) → pass-through
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-under-budget").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(int64(500), int64(100)))
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
mock.ExpectQuery(`FROM workspace_spend_events`).
WithArgs("ws-under-budget").
WillReturnRows(spendRows(0, 0, 0, 100))
// Activity log INSERT from logA2ASuccess
mock.ExpectExec("INSERT INTO activity_logs").
@@ -380,11 +386,11 @@ func TestWorkspaceBudget_A2A_NilLimitPassesThrough(t *testing.T) {
mr.Set(fmt.Sprintf("ws:%s:url", "ws-no-limit"), agentServer.URL)
// budget_limit NULL → no enforcement regardless of monthly_spend
mock.ExpectQuery("SELECT budget_limit, COALESCE").
// no limits configured → checkWorkspaceBudget returns early (no spend query),
// enforcement skipped regardless of spend
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-no-limit").
WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
AddRow(nil, int64(999999))) // huge spend but no limit set
WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{}`)))
mock.ExpectExec("INSERT INTO activity_logs").
WillReturnResult(sqlmock.NewResult(0, 1))
@@ -425,7 +431,7 @@ func TestWorkspaceBudget_A2A_DBErrorFailOpen(t *testing.T) {
mr.Set(fmt.Sprintf("ws:%s:url", "ws-db-err-budget"), agentServer.URL)
// Budget check fails with DB error → fail-open (request proceeds)
mock.ExpectQuery("SELECT budget_limit, COALESCE").
mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
WithArgs("ws-db-err-budget").
WillReturnError(sql.ErrConnDone)
@@ -65,6 +65,14 @@ func validateWorkspaceCompute(compute models.WorkspaceCompute) error {
if err := validateWorkspaceDisplayDimensions(compute.Display.Width, compute.Display.Height); err != nil {
return err
}
// internal#734: the durable-data choice. CP re-validates the same enum at
// its provision edge (IsValidDataPersistence → 400); validating here too
// gives the user a clear workspace-server error before the CP round-trip.
switch compute.DataPersistence {
case "", "persist", "ephemeral":
default:
return fmt.Errorf("unsupported compute.data_persistence (want persist|ephemeral)")
}
return nil
}
@@ -11,8 +11,8 @@ import (
"testing"
"time"
"github.com/DATA-DOG/go-sqlmock"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"github.com/DATA-DOG/go-sqlmock"
"github.com/gin-gonic/gin"
)
@@ -36,6 +36,23 @@ func TestValidateWorkspaceCompute_RejectsUnknownInstanceType(t *testing.T) {
}
}
// internal#734: data_persistence enum. "" (auto), "persist", "ephemeral" are
// the only accepted values; anything else is a clear 400 before the CP call.
func TestValidateWorkspaceCompute_DataPersistence(t *testing.T) {
for _, ok := range []string{"", "persist", "ephemeral"} {
c := models.WorkspaceCompute{DataPersistence: ok}
if err := validateWorkspaceCompute(c); err != nil {
t.Errorf("data_persistence=%q must be accepted: %v", ok, err)
}
}
for _, bad := range []string{"persistent", "off", "none", "Ephemeral", "true"} {
c := models.WorkspaceCompute{DataPersistence: bad}
if err := validateWorkspaceCompute(c); err == nil {
t.Errorf("data_persistence=%q must be rejected", bad)
}
}
}
func TestValidateWorkspaceCompute_RejectsOutOfRangeRootVolume(t *testing.T) {
for _, rootGB := range []int{29, 501} {
compute := models.WorkspaceCompute{Volume: models.WorkspaceComputeVolume{RootGB: rootGB}}
@@ -126,7 +143,7 @@ func TestWorkspaceCreate_WithInvalidCompute_ReturnsBadRequest(t *testing.T) {
c, _ := gin.CreateTestContext(w)
body := `{
"name":"Oversized Agent",
"model":"gpt-4",
"model":"claude-opus-4-7",
"compute":{"instance_type":"p4d.24xlarge"}
}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
@@ -399,7 +399,13 @@ func (h *WorkspaceHandler) Delete(c *gin.Context) {
// disable, broadcast). The HTTP-specific bits — direct-children 409
// gate above, ?purge=true hard-delete below, response shaping —
// stay in this handler.
descendantIDs, stopErrs, err := h.CascadeDelete(ctx, id)
// internal#734: the user can ask to erase saved data (browser profile /
// cookies / downloads / agent memory) on delete. Opt-in — default keeps the
// data on its volume for the orphan-sweeper grace. Only a genuine
// permanent-delete reaches here (restart/reconcile use other paths), so this
// is the one place prune may be requested.
erase := c.Query("erase_data") == "true"
descendantIDs, stopErrs, err := h.CascadeDelete(ctx, id, erase)
if err != nil {
// Audit 2026-05-09 (Core-Security): raw `err.Error()` here was
// exposed to HTTP clients verbatim, including wrapped lib/pq
@@ -515,7 +521,13 @@ func destructiveDeleteCounts(ctx context.Context, id string) (childCount int, sc
// Caller is responsible for the children-confirmation gate (the HTTP handler
// returns 409 when children exist + ?confirm=true is missing); this helper
// always cascades.
func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]string, []error, error) {
// CascadeDelete tears down a workspace and its descendants (stop compute,
// remove volumes, revoke tokens, disable schedules, broadcast). erase=true
// (internal#734) means the user asked to erase saved data, so the CP compute
// teardown prunes each workspace's durable data volume; the HTTP delete passes
// the user's choice, the org-import reconcile passes false (a reconcile is not
// a user-erase).
func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string, erase bool) ([]string, []error, error) {
if err := validateWorkspaceID(id); err != nil {
return nil, nil, err
}
@@ -579,7 +591,7 @@ func (h *WorkspaceHandler) CascadeDelete(ctx context.Context, id string) ([]stri
// pending EC2 is queryable and handed off to the CP-orphan-sweeper —
// rather than the bare one-shot StopWorkspaceAuto that produced the
// silent-leak class (task #15 / workspace-ec2-leak).
if err := h.stopWorkspaceForDelete(cleanupCtx, wsID); err != nil {
if err := h.stopWorkspaceForDelete(cleanupCtx, wsID, erase); err != nil {
log.Printf("CascadeDelete %s stop failed: %v — leaving cleanup for orphan sweeper", wsID, err)
stopErrs = append(stopErrs, fmt.Errorf("stop %s: %w", wsID, err))
return
@@ -521,7 +521,7 @@ func TestValidateWorkspaceDir_Empty(t *testing.T) {
func TestCascadeDelete_InvalidUUID(t *testing.T) {
h := &WorkspaceHandler{}
descendants, stopErrs, err := h.CascadeDelete(context.Background(), "not-a-uuid")
descendants, stopErrs, err := h.CascadeDelete(context.Background(), "not-a-uuid", false)
if err == nil {
t.Error("expected error for invalid UUID")
}
@@ -542,7 +542,7 @@ func TestCascadeDelete_DescendantQueryError(t *testing.T) {
WithArgs(wsID).
WillReturnError(sql.ErrConnDone)
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID)
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID, false)
if err == nil {
t.Error("CascadeDelete returned nil error; want descendant query error")
}
@@ -569,7 +569,7 @@ func TestCascadeDelete_DescendantRowsError(t *testing.T) {
WithArgs(wsID).
WillReturnRows(rows)
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID)
deleted, stopErrs, err := h.CascadeDelete(context.Background(), wsID, false)
if err == nil {
t.Fatal("CascadeDelete returned nil error; want descendant rows error")
}
@@ -45,7 +45,7 @@ func TestStopWorkspaceForDelete_CPRetriesTransientThenSucceeds(t *testing.T) {
}}
h := &WorkspaceHandler{cpProv: stub}
err := h.stopWorkspaceForDelete(context.Background(), "ws-del-1")
err := h.stopWorkspaceForDelete(context.Background(), "ws-del-1", false)
if err != nil {
t.Fatalf("expected nil error on eventual success, got %v", err)
}
@@ -73,7 +73,7 @@ func TestStopWorkspaceForDelete_CPExhaustsEmitsDurableEventAndReturnsError(t *te
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
err := h.stopWorkspaceForDelete(context.Background(), "ws-doomed")
err := h.stopWorkspaceForDelete(context.Background(), "ws-doomed", false)
if err == nil {
t.Fatal("expected terminal error on retry exhaustion, got nil")
}
@@ -96,7 +96,7 @@ func TestStopWorkspaceForDelete_CPExhaustsEmitsDurableEventAndReturnsError(t *te
func TestStopWorkspaceForDelete_NoBackendIsNoOp(t *testing.T) {
h := &WorkspaceHandler{} // cpProv nil, provisioner nil
if err := h.stopWorkspaceForDelete(context.Background(), "ws-x"); err != nil {
if err := h.stopWorkspaceForDelete(context.Background(), "ws-x", false); err != nil {
t.Errorf("expected nil no-op with no backend, got %v", err)
}
}
@@ -235,9 +235,13 @@ func (h *WorkspaceHandler) StopWorkspaceAuto(ctx context.Context, workspaceID st
// container won't heal on retry (matches RestartWorkspaceAuto's Docker
// rationale); the orphan-container sweeper (registry/orphan_sweeper.go) is
// the Docker-side backstop.
func (h *WorkspaceHandler) stopWorkspaceForDelete(ctx context.Context, workspaceID string) error {
// stopWorkspaceForDelete terminates a workspace's compute on the delete path.
// erase=true (internal#734) means the user asked to erase saved data, so the CP
// teardown prunes the durable data volume. The local-docker path always removes
// its volume via CascadeDelete's RemoveVolume, so erase is a CP-only concern.
func (h *WorkspaceHandler) stopWorkspaceForDelete(ctx context.Context, workspaceID string, erase bool) error {
if h.cpProv != nil {
if err := h.cpStopWithRetryErr(ctx, workspaceID, "Delete"); err != nil {
if err := h.cpStopWithRetryErr(ctx, workspaceID, "Delete", erase); err != nil {
h.emitDeleteTerminateRetryExhausted(ctx, workspaceID, err)
return err
}
@@ -75,3 +75,21 @@ func formatMissingEnvError(missing []string) string {
strings.Join(missing, ", "),
)
}
// formatMissingBYOKCredentialError builds the user-facing message for a
// provision failure caused by a non-platform (byok/subscription) workspace
// that has no usable LLM credential of its own (internal#711). The platform's
// scope:global LLM credentials are NOT a valid fallback for a non-platform
// workspace — resolving to them would bill the platform's Anthropic credits —
// so the provision fails closed here rather than starting the workspace on
// stripped/absent creds. Rendered verbatim in the canvas Events tab.
func formatMissingBYOKCredentialError(mode string) string {
return fmt.Sprintf(
"this workspace's LLM billing mode is %q (not platform-managed) but it has no LLM credential of its own. "+
"Add a workspace-scoped credential (e.g. CLAUDE_CODE_OAUTH_TOKEN or your provider's API key) under "+
"Config → Secrets, or switch the workspace to platform-managed billing via "+
"/admin/workspaces/:id/llm-billing-mode, then retry. The platform's shared LLM credentials are not "+
"used for non-platform workspaces.",
mode,
)
}
@@ -330,6 +330,7 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
Runtime: payload.Runtime,
InstanceType: payload.Compute.InstanceType,
DiskGB: int32(payload.Compute.Volume.RootGB),
DataPersistence: payload.Compute.DataPersistence,
Display: provisioner.WorkspaceDisplayConfig{
Mode: payload.Compute.Display.Mode,
Width: payload.Compute.Display.Width,
@@ -710,131 +711,21 @@ func (h *WorkspaceHandler) defaultTemplateProvidersYAML(runtime string) string {
return ""
}
// deriveProviderFromModelSlug maps a hermes-agent model slug prefix to
// its provider name — a Go translation of the case statement in
// workspace-configs-templates/hermes/scripts/derive-provider.sh that we
// can run at provision time so LLM_PROVIDER lands in workspace_secrets
// (and from there, into /configs/config.yaml via CP user-data) before
// the container ever boots.
// internal#718 P4 closure — `deriveProviderFromModelSlug` (retire-list #3)
// has been removed together with its only caller (WorkspaceHandler.Create's
// setProviderSecret write) and the LLM_PROVIDER workspace_secret it
// populated.
//
// Returns "" when the prefix isn't recognized OR when the runtime-only
// override would be needed to pick a provider — the caller skips the
// LLM_PROVIDER write in that case so derive-provider.sh keeps the final
// say at boot. derive-provider.sh remains the source of truth: this is
// strictly a *gating* hint that survives restarts and gives CP a YAML
// field to populate. Without it, "Save+Restart" would lose the user's
// provider choice every time CP regenerates the config.
//
// Two intentional differences from the shell version:
//
// 1. nousresearch/* and openai/* both return "openrouter" here. The
// shell script special-cases "prefer nous if HERMES_API_KEY set" /
// "prefer custom if OPENAI_API_KEY set", but those depend on
// runtime env that may not yet be loaded at provision time. We pick
// the safe default ("openrouter" reaches both Hermes 3 and OpenAI
// models without extra config); derive-provider.sh's runtime check
// can still upgrade to nous/custom when the keys are present.
//
// 2. Unknown prefixes return "" instead of "auto". Persisting "auto"
// would block a future "Save+Restart" with a known prefix from
// re-deriving — the CP YAML field is sticky once written. Returning
// "" means the caller skips the write and the runtime falls through
// to derive-provider.sh's *=auto branch on its own.
//
// Cover the same prefix list as derive-provider.sh's case statement;
// keep both files in sync when a new provider is added (table-driven
// test in workspace_provision_shared_test.go pins the mapping).
func deriveProviderFromModelSlug(model string) string {
if model == "" {
return ""
}
idx := strings.Index(model, "/")
if idx <= 0 {
return ""
}
prefix := model[:idx]
switch prefix {
// Direct-SDK providers (clean 1:1 prefix→provider mapping).
case "minimax":
return "minimax"
case "minimax-cn":
return "minimax-cn"
case "anthropic":
return "anthropic"
case "gemini":
return "gemini"
case "deepseek":
return "deepseek"
case "zai":
return "zai"
case "kimi-coding":
return "kimi-coding"
case "kimi-coding-cn":
return "kimi-coding-cn"
case "alibaba", "dashscope", "qwen":
return "alibaba"
case "xiaomi", "mimo":
return "xiaomi"
case "arcee", "arcee-ai":
return "arcee"
case "nvidia", "nim":
return "nvidia"
case "ollama-cloud":
return "ollama-cloud"
case "huggingface", "hf":
return "huggingface"
case "ai-gateway", "aigateway":
return "ai-gateway"
case "kilocode":
return "kilocode"
case "opencode-zen":
return "opencode-zen"
case "opencode-go":
return "opencode-go"
// Aggregator + explicit catch-alls.
case "openrouter":
return "openrouter"
case "custom":
return "custom"
// Runtime-only override candidates. derive-provider.sh's
// HERMES_API_KEY / OPENAI_API_KEY checks happen at boot; we pick the
// safe default (openrouter reaches both Hermes 3 and OpenAI without
// extra config) and let the script upgrade to nous/custom at runtime.
case "nousresearch", "openai":
return "openrouter"
// Additional 1:1 prefix→provider mappings — kept aligned with upstream's
// HERMES_INFERENCE_PROVIDER list (NousResearch/hermes-agent v0.12.0,
// 2026-04-30) and the additional case clauses in derive-provider.sh.
// The drift gate in derive_provider_drift_test.go enforces parity.
case "xai", "grok":
return "xai"
case "bedrock", "aws":
return "bedrock"
case "tencent", "tencent-tokenhub":
return "tencent-tokenhub"
case "gmi":
return "gmi"
case "qwen-oauth":
return "qwen-oauth"
case "lmstudio", "lm-studio":
return "lmstudio"
case "minimax-oauth":
return "minimax-oauth"
case "alibaba-coding-plan":
return "alibaba-coding-plan"
case "google-gemini-cli":
return "google-gemini-cli"
case "openai-codex":
return "openai-codex"
case "copilot-acp":
return "copilot-acp"
case "copilot":
return "copilot"
}
// Unknown prefix → don't persist a guess. derive-provider.sh's
// *=auto fallback handles it at runtime.
return ""
}
// The hand-rolled prefix switch was a Go mirror of
// workspace-configs-templates/hermes/scripts/derive-provider.sh kept in
// sync via a drift test. The replacement is providers.Manifest.DeriveProvider
// (synced in P2-A), which derives the provider from (runtime, model)
// against the registry SSOT at every decision point — billing (P2-B),
// CP user-data emission (this PR's CP-side commit), validation
// (P3 PR-C). The shell script in the hermes template continues to be the
// runtime fallback for unregistered models; codegen of the template's
// providers block from the registry is the P4 follow-up gated on
// registry data growth.
// applyRuntimeModelEnv exposes the workspace's selected model via an
// env var the target runtime's install.sh / start.sh knows to read.
@@ -883,12 +774,7 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// can no longer confuse a provider slug for a model id. CP-side
// slot-separation (cp#213 + cp#220) merged the analogous fix on
// the CP side; this is the workspace-server companion.
if model == "" {
model = envVars["MOLECULE_MODEL"]
}
if model == "" {
model = envVars["MODEL"]
}
model = effectiveModelForBilling(model, envVars)
if model == "" {
return
}
@@ -921,6 +807,31 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
}
}
// effectiveModelForBilling resolves the picked model id from an explicit
// argument with the SAME fallback chain applyRuntimeModelEnv uses to set the
// container MODEL env: explicit arg → envVars["MOLECULE_MODEL"] →
// envVars["MODEL"] (the workspace_secret). It is the single source of truth
// for "what model is this workspace going to run", shared by both
// applyRuntimeModelEnv (which exports it to the container) and
// applyPlatformManagedLLMEnv (which derives the billing mode from it).
//
// molecule-core#1994: the billing resolver MUST consult the same effective
// model the container will actually run. Pre-fix it used the raw payload.Model
// only, which is "" on a re-provision (the payload is rebuilt from the DB with
// no Model), so it derived from an empty model → defaulted closed to
// platform_managed and diverged from the read endpoint (which reads the stored
// MODEL secret). Returns "" only when no model is resolvable anywhere — the
// legitimate "unset → platform default" case the resolver fails closed on.
func effectiveModelForBilling(model string, envVars map[string]string) string {
if model == "" {
model = envVars["MOLECULE_MODEL"]
}
if model == "" {
model = envVars["MODEL"]
}
return model
}
// applyPlatformManagedLLMEnv wires the control-plane LLM proxy into a
// workspace only when the RESOLVED billing mode for this workspace is
// platform_managed. "Resolved" means: the workspace-level override (if any)
@@ -943,16 +854,94 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// MOLECULE_LLM_BILLING_MODE_RESOLVED so an in-container debug check can
// answer "what mode is this workspace running under" without DB queries
// (RFC Observability hot-spot).
func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string) {
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode)
//
// molecule-core#1994 (credential-handling follow-on, CTO-confirmed model).
// `global_secrets` is the TENANT's own secret store, shared across all of
// that tenant's workspaces — it is NOT the platform's. The platform's own
// LLM credential is the CP proxy usage token (MOLECULE_LLM_USAGE_TOKEN),
// injected SEPARATELY on the platform_managed path below; it is never stored
// in any tenant's global_secrets.
//
// Consequently the byok/disabled branch does NOT strip the tenant's
// global-origin LLM creds. Under the corrected model the tenant's own
// credential — whether at global scope (a global_secrets row, e.g. the key
// they configured via the org-import required-env preflight / the settings
// Secrets tab) or at workspace scope (a workspace_secrets row) — is exactly
// what byok must run on, direct. The earlier internal#711 strip rested on the
// inverted premise that a global-scope LLM cred was "the platform's own"; it
// was wrong and it killed legitimate byok workspaces (MISSING_BYOK_CREDENTIAL
// for tenants whose oauth lived at global scope — Reno Stars Marketing agent,
// confirmed live 2026-05-28). Removing the strip is only safe because the
// platform's own credential is never co-mingled into a tenant's global_secrets
// (guarded at the write boundary: SetGlobal rejects bypass-list keys for a
// platform-managed tenant; the platform proxy token is read from server env
// only, never persisted to a tenant store).
//
// The boolean return still reports whether the workspace has at least one
// usable LLM credential. The caller (prepareProvisionContext) uses it to FAIL
// CLOSED — a byok workspace with no usable LLM credential at ANY scope is
// aborted with a clear MISSING_BYOK_CREDENTIAL error at provision time rather
// than started credential-less.
// platformLLMEnvResult is the structured outcome of applyPlatformManagedLLMEnv.
// ResolvedMode is the per-workspace billing/provider mode the resolver
// landed on. HasUsableLLMCred reports whether the workspace has at least one
// platform-managed-shaped LLM credential key in its env — the tenant's own,
// at global or workspace scope. Only the non-platform (byok) path consults
// HasUsableLLMCred for the fail-closed decision; the platform_managed path
// always returns true (it forces the CP proxy usage token, which IS the
// usable credential).
type platformLLMEnvResult struct {
ResolvedMode string
HasUsableLLMCred bool
// Source records which layer decided the mode (internal#718 P2-B):
// derived_provider (registry derivation), derived_default (derive failed →
// platform default), workspace_override (explicit operator pin), or
// constant_fallback (DB error). Surfaced for observability + asserted by the
// behavior-delta tests so a regression of "derived, not stored" flips red.
Source BillingModeSource
}
// globalKeys is the provenance side-channel from loadWorkspaceSecrets: the set
// of env keys that originated from the operator-controlled global_secrets table
// (a workspace_secrets row of the same name overrides and clears the flag). It
// is consumed ONLY on the byok/disabled branch's provider-matched strip
// (internal#728 Bug 1): a global-origin LLM bypass cred that does NOT match the
// resolved provider's auth_env is stripped so a greedy runtime (claude-code
// prefers CLAUDE_CODE_OAUTH_TOKEN) cannot route a non-anthropic model to the
// wrong upstream. May be nil (no global-origin keys / unknown provenance) — a
// nil set strips nothing, preserving the pre-#728 behavior for callers that do
// not thread provenance.
func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string, globalKeys map[string]struct{}) platformLLMEnvResult {
// internal#718 P2-B: the platform-vs-byok decision now DERIVES the provider
// from (runtime, model) via the registry and keys off IsPlatform(derived) —
// NOT a stored LLM_PROVIDER and NOT the org rung. This path already carries
// runtime + model + the workspace env, so it calls the DERIVED resolver
// directly (no DB round-trip for runtime/model). availableAuthEnv is the set
// of recognized provider auth-env-var NAMES present in envVars (the same
// disambiguation input the registry uses to split oauth-vs-api). The org-env
// MOLECULE_LLM_BILLING_MODE is NO LONGER read into the decision (retired).
availableAuthEnv := availableAuthEnvNames(envVars)
// molecule-core#1994: derive billing mode from the EFFECTIVE model, not the
// raw payload.Model. On a re-provision (restart/resume/auto-restart) the
// payload is rebuilt from the DB with Name+Tier+Runtime only — payload.Model
// is "" (workspace_restart.go via withStoredCompute, which backfills Compute
// but NOT Model). With an empty model DeriveProvider errors → the resolver
// defaults closed to platform_managed and bakes the CP proxy, DIVERGING from
// the read endpoint (which reads the stored MODEL workspace_secret and derives
// byok). The stored model already lives in the merged envVars (loaded by
// loadWorkspaceSecrets); resolve it with the SAME fallback chain
// applyRuntimeModelEnv uses so the provision-path derive inputs match the
// read-path's — keeping the two resolvers in parity (the #1994 regression
// guard test asserts this).
effectiveModel := effectiveModelForBilling(model, envVars)
res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, effectiveModel, availableAuthEnv)
if resolveErr != nil {
// resolveErr != nil ⇒ resolver hit a DB error AND already defaulted
// res.ResolvedMode to platform_managed. Log + proceed; the safe default
// is already in place, no early return needed.
log.Printf("workspace_provision: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr)
}
log.Printf("workspace_provision: billing mode workspace=%s resolved=%s source=%s org_default=%s", workspaceID, res.ResolvedMode, res.Source, res.OrgDefault)
log.Printf("workspace_provision: billing mode workspace=%s resolved=%s source=%s derived_provider=%s", workspaceID, res.ResolvedMode, res.Source, derefOrEmpty(res.ProviderSelection))
// internal#703: MOLECULE_LLM_BILLING_MODE in the container must reflect the
// RESOLVED per-workspace mode, not a hardcoded literal. Pre-fix this var was
// only emitted (hardcoded "platform_managed") on the strip path below, so a
@@ -966,18 +955,60 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
// pulling logs or hitting the admin route.
envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] = res.ResolvedMode
if res.ResolvedMode != LLMBillingModePlatformManaged {
// byok or disabled — DO NOT strip vendor keys, DO NOT force-route to CP,
// DO NOT override the workspace own ANTHROPIC_BASE_URL / OAuth token.
// Leave envVars alone so CLAUDE_CODE_OAUTH_TOKEN / vendor API keys
// pulled from workspace_secrets survive into the container, and the
// workspace talks to its own provider directly (internal#703).
return
// byok or disabled — DO NOT force-route to CP, DO NOT override the
// workspace's own ANTHROPIC_BASE_URL, and DO NOT strip the tenant's own
// (provider-matching) LLM credentials.
//
// molecule-core#1994 (corrected model): `global_secrets` is the
// TENANT's store, not the platform's. The tenant's own credential —
// at global OR workspace scope — is exactly what byok runs on, direct.
// The platform's own credential is never in a tenant's global_secrets
// (guarded at the SetGlobal write boundary + the proxy token is
// server-env-only), so leaving the tenant's globals in place cannot
// re-open the platform-credit drain.
//
// internal#728 Bug 1 (provider-matched credential injection): #1994
// removed the BLANKET strip, which was correct for the platform-key
// co-mingling it targeted but left EVERY claude-code workspace
// inheriting the tenant-global CLAUDE_CODE_OAUTH_TOKEN. A claude-code
// runtime greedily prefers that oauth (`llm-auth: detected oauth` →
// api.anthropic.com), so a workspace whose RESOLVED provider is NOT
// anthropic-oauth (minimax, kimi-byok, …) routes its non-Anthropic
// model to Anthropic and errors (`Claude Code returned an error
// result`; DevB MiniMax-M2.7 live-confirmed 2026-05-28).
//
// The precise, provider-AWARE replacement for the over-removed strip:
// keep ONLY the global-origin bypass creds whose env-var name is in the
// RESOLVED provider's auth_env; strip the rest. This is NOT a return to
// the blanket strip — it is keyed off the derived provider:
// - minimax (auth_env: MINIMAX_API_KEY, ANTHROPIC_AUTH_TOKEN,
// ANTHROPIC_API_KEY) → global-origin CLAUDE_CODE_OAUTH_TOKEN is
// NOT a match → stripped (fixes DevB).
// - anthropic-oauth (auth_env: CLAUDE_CODE_OAUTH_TOKEN) → the
// global-origin oauth IS a match → kept (PM/reno opus byok NOT
// regressed — the #1994 ByokGlobalScopeOAuthSurvives guard holds).
// WORKSPACE-origin creds (the user explicitly set them via the canvas
// Secrets tab → NOT in globalKeys) are NEVER stripped here, even when
// they don't match: the user authored them deliberately (JRS kimi
// workspace-key, reno's own oauth). Only the inherited operator-store
// channel is provider-gated.
stripNonMatchingGlobalOriginLLMCreds(envVars, globalKeys, runtime, effectiveModel, availableAuthEnv)
return platformLLMEnvResult{
ResolvedMode: res.ResolvedMode,
HasUsableLLMCred: hasAnyPlatformManagedLLMKey(envVars),
Source: res.Source,
}
}
baseURL := firstNonEmptyEnv("MOLECULE_LLM_BASE_URL", "OPENAI_BASE_URL")
anthropicBaseURL := firstNonEmptyEnv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL")
token := firstNonEmptyEnv("MOLECULE_LLM_USAGE_TOKEN", "OPENAI_API_KEY")
if baseURL == "" || token == "" {
return
// Proxy not configured (boot race / misconfig). On the platform_managed
// path the workspace IS entitled to platform creds, so we do NOT strip
// here — but we report HasUsableLLMCred from whatever survived so the
// caller's fail-closed branch (non-platform only) is never reached on
// this path.
return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true, Source: res.Source}
}
stripPlatformManagedLLMBypassEnv(envVars)
@@ -1006,6 +1037,10 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
envVars["MOLECULE_MODEL"] = defaultModel
}
}
// platform_managed: the CP proxy usage token (injected as ANTHROPIC_API_KEY
// / OPENAI_API_KEY above) IS the usable credential, so the workspace is
// never fail-closed on this path.
return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true, Source: res.Source}
}
func stripPlatformManagedLLMBypassEnv(envVars map[string]string) {
@@ -1014,6 +1049,80 @@ func stripPlatformManagedLLMBypassEnv(envVars map[string]string) {
}
}
// hasAnyPlatformManagedLLMKey reports whether envVars carries at least one
// non-empty platform-managed-shaped LLM credential key (the tenant's own, at
// global or workspace scope). Used by the byok fail-closed branch: a byok
// workspace with no LLM credential at ANY scope must be aborted with
// MISSING_BYOK_CREDENTIAL rather than started credential-less.
func hasAnyPlatformManagedLLMKey(envVars map[string]string) bool {
for key := range platformManagedDirectLLMBypassKeys {
if strings.TrimSpace(envVars[key]) != "" {
return true
}
}
return false
}
// stripNonMatchingGlobalOriginLLMCreds is the byok-branch provider-matched
// credential injection (internal#728 Bug 1). It removes from envVars every
// platform-managed LLM bypass key that:
//
// 1. originated from the operator-controlled global_secrets store
// (present in globalKeys — a workspace_secrets row of the same name
// overrides + clears the flag, so user-authored creds are exempt), AND
// 2. is NOT in the RESOLVED provider's auth_env set.
//
// The motivating regression: #1994 dropped the blanket strip, so a claude-code
// workspace resolving to `minimax` still inherited the tenant-global
// CLAUDE_CODE_OAUTH_TOKEN; the runtime prefers that oauth and routes the
// MiniMax model to api.anthropic.com → error. Keeping only the resolved
// provider's own auth_env keys (minimax: MINIMAX_API_KEY/ANTHROPIC_AUTH_TOKEN/
// ANTHROPIC_API_KEY — not the oauth) removes the stray oauth while preserving
// anthropic-oauth's CLAUDE_CODE_OAUTH_TOKEN for an opus byok workspace.
//
// Fail-OPEN by design: if the provider cannot be derived (empty model /
// unknown runtime / ambiguous) or the registry is unavailable, we strip
// NOTHING — we never strip a credential we cannot prove is non-matching, so a
// derive miss can never fail-close a legitimate byok workspace (mirrors the
// resolver's own default-closed-to-platform contract: the worst case is we
// keep a stray cred, never that we remove the only usable one). The earlier
// internal#711 blanket strip's fail-direction (remove first) was the bug;
// this strip's fail-direction is keep-first.
func stripNonMatchingGlobalOriginLLMCreds(envVars map[string]string, globalKeys map[string]struct{}, runtime, model string, availableAuthEnv []string) {
if len(globalKeys) == 0 {
return // no operator-store-origin keys to consider — nothing to strip.
}
manifest, err := providerRegistry()
if err != nil || manifest == nil {
return // registry unavailable — fail open, strip nothing.
}
provider, dErr := manifest.DeriveProvider(runtime, model, availableAuthEnv)
if dErr != nil {
return // underivable provider — fail open, strip nothing.
}
// The resolved provider's accepted auth-env-var NAMES (case-insensitive
// for parity with isPlatformManagedDirectLLMBypassKey, which upper-cases).
keep := make(map[string]struct{}, len(provider.AuthEnv))
for _, e := range provider.AuthEnv {
keep[strings.ToUpper(strings.TrimSpace(e))] = struct{}{}
}
for key := range globalKeys {
upper := strings.ToUpper(strings.TrimSpace(key))
if _, isBypass := platformManagedDirectLLMBypassKeys[upper]; !isBypass {
continue // not an LLM bypass cred (e.g. a non-LLM operator secret) — leave it.
}
if _, matches := keep[upper]; matches {
continue // matches the resolved provider's auth_env — this is what byok runs on.
}
// Global-origin LLM bypass cred that does NOT match the resolved
// provider — the stray that a greedy runtime would mis-prefer. Strip.
if _, present := envVars[key]; present {
log.Printf("workspace_provision: byok provider-matched strip — removing global-origin LLM cred %s (resolved provider=%s does not accept it)", key, provider.Name)
delete(envVars, key)
}
}
}
func runtimeUsesAnthropicNativeProxy(runtime string) bool {
return strings.EqualFold(strings.TrimSpace(runtime), "claude-code")
}
@@ -1065,6 +1174,14 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
var v []byte
var ver int
if globalRows.Scan(&k, &v, &ver) == nil {
// internal#718 P4 closure: LLM_PROVIDER is retired even
// at the global rung. The same provider-from-(runtime,model)
// derivation runs per-workspace, so a global default
// would be pure ghost. Symmetric with the workspace_secrets
// drop below.
if k == "LLM_PROVIDER" {
continue
}
decrypted, decErr := crypto.DecryptVersioned(v, ver)
if decErr != nil {
log.Printf("Provisioner: FATAL — failed to decrypt global secret %s (version=%d): %v — aborting provision of workspace %s", k, ver, decErr, workspaceID)
@@ -1087,6 +1204,18 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
var v []byte
var ver int
if wsRows.Scan(&k, &v, &ver) == nil {
// internal#718 P4 closure: LLM_PROVIDER is a retired
// secret key. Migration 20260528000000 deletes any
// straggler rows; this drop is defence-in-depth so a
// rolling deploy (new code, old DB) never re-emits the
// retired key into the provisioner env (which would
// reach the CP-side resolveModelAndProvider — now
// itself retired, but the env contract belongs to
// core). Idempotent: a fresh tenant has zero
// LLM_PROVIDER rows and this branch is unreached.
if k == "LLM_PROVIDER" {
continue
}
decrypted, decErr := crypto.DecryptVersioned(v, ver)
if decErr != nil {
log.Printf("Provisioner: FATAL — failed to decrypt workspace secret %s (version=%d) for %s: %v — aborting provision", k, ver, workspaceID, decErr)
@@ -42,6 +42,7 @@ type trackingCPProv struct {
mu sync.Mutex
started []string
stopped []string
pruned []string // internal#734: workspaces stopped via StopAndPrune
startErr error
stopErr error
}
@@ -61,6 +62,13 @@ func (r *trackingCPProv) Stop(_ context.Context, workspaceID string) error {
r.mu.Unlock()
return r.stopErr
}
func (r *trackingCPProv) StopAndPrune(_ context.Context, workspaceID string) error {
r.mu.Lock()
r.stopped = append(r.stopped, workspaceID)
r.pruned = append(r.pruned, workspaceID)
r.mu.Unlock()
return r.stopErr
}
func (r *trackingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
return "", nil
}
@@ -10,9 +10,9 @@ import (
"sync/atomic"
"testing"
"github.com/DATA-DOG/go-sqlmock"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
"github.com/DATA-DOG/go-sqlmock"
)
// Issue #2486 reproduction harness: 7 simultaneous claude-code provisions
@@ -71,6 +71,9 @@ func (r *recordingCPProv) Start(_ context.Context, cfg provisioner.WorkspaceConf
func (r *recordingCPProv) Stop(_ context.Context, _ string) error {
panic("recordingCPProv.Stop not expected in concurrent-repro test")
}
func (r *recordingCPProv) StopAndPrune(_ context.Context, _ string) error {
panic("recordingCPProv.StopAndPrune not expected in concurrent-repro test")
}
func (r *recordingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
panic("recordingCPProv.GetConsoleOutput not expected in concurrent-repro test")
@@ -0,0 +1,110 @@
package handlers
import (
"os"
"path/filepath"
"testing"
)
func TestParseTopLevelRuntime(t *testing.T) {
cases := []struct {
name string
yaml string
want string
}{
{"top-level claude-code", "name: x\nruntime: claude-code\ntier: 2\n", "claude-code"},
{"top-level google-adk", "runtime: google-adk\n", "google-adk"},
{"quoted value", `runtime: "google-adk"` + "\n", "google-adk"},
{"single-quoted value", "runtime: 'codex'\n", "codex"},
{"ignores runtime_config nested model", "runtime: google-adk\nruntime_config:\n model: vertex:gemini-2.5-pro\n", "google-adk"},
{"runtime_config only, no top-level runtime", "name: y\nruntime_config:\n model: x\n", ""},
{"indented runtime is not top-level", "wrapper:\n runtime: claude-code\n", ""},
{"empty", "", ""},
{"no runtime key", "name: z\ntier: 4\n", ""},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := parseTopLevelRuntime([]byte(tc.yaml)); got != tc.want {
t.Fatalf("parseTopLevelRuntime(%q) = %q, want %q", tc.yaml, got, tc.want)
}
})
}
}
func TestSeededConfigRuntime(t *testing.T) {
// in-memory configFiles wins over template dir.
t.Run("from configFiles", func(t *testing.T) {
cf := map[string][]byte{"config.yaml": []byte("runtime: google-adk\n")}
if got := seededConfigRuntime("/nonexistent", cf); got != "google-adk" {
t.Fatalf("got %q, want google-adk", got)
}
})
// falls back to template dir's config.yaml.
t.Run("from template dir", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("name: a\nruntime: claude-code\n"), 0o600); err != nil {
t.Fatal(err)
}
if got := seededConfigRuntime(dir, nil); got != "claude-code" {
t.Fatalf("got %q, want claude-code", got)
}
})
// nothing available → "".
t.Run("indeterminate", func(t *testing.T) {
if got := seededConfigRuntime("", nil); got != "" {
t.Fatalf("got %q, want empty", got)
}
if got := seededConfigRuntime("/does/not/exist", map[string][]byte{}); got != "" {
t.Fatalf("got %q, want empty", got)
}
})
}
func TestRuntimeSeedMismatchAbort(t *testing.T) {
adkCfg := map[string][]byte{"config.yaml": []byte("runtime: google-adk\n")}
ccCfg := map[string][]byte{"config.yaml": []byte("name: Claude Code Agent\nruntime: claude-code\n")}
t.Run("mismatch fails loud (the #2027 demo bug)", func(t *testing.T) {
// requested google-adk, but seeding the claude-code-default config.
abort := runtimeSeedMismatchAbort("google-adk", "", ccCfg)
if abort == nil {
t.Fatal("expected abort for google-adk requested but claude-code seeded, got nil")
}
if abort.Extra["requested_runtime"] != "google-adk" || abort.Extra["seeded_runtime"] != "claude-code" {
t.Fatalf("abort.Extra mismatch: %+v", abort.Extra)
}
if abort.Extra["issue"] != "2027" {
t.Fatalf("expected issue 2027 tag, got %v", abort.Extra["issue"])
}
})
t.Run("match is allowed", func(t *testing.T) {
if abort := runtimeSeedMismatchAbort("google-adk", "", adkCfg); abort != nil {
t.Fatalf("expected no abort when seeded runtime matches, got %q", abort.Msg)
}
})
t.Run("empty requested runtime is allowed (org-template default path)", func(t *testing.T) {
if abort := runtimeSeedMismatchAbort("", "", ccCfg); abort != nil {
t.Fatalf("expected no abort for unspecified runtime, got %q", abort.Msg)
}
})
t.Run("indeterminate seed is allowed (CP mode, no local config bytes)", func(t *testing.T) {
if abort := runtimeSeedMismatchAbort("google-adk", "", nil); abort != nil {
t.Fatalf("expected no abort when seeded runtime is indeterminate, got %q", abort.Msg)
}
})
t.Run("mismatch via template dir also fails loud", func(t *testing.T) {
dir := t.TempDir()
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("runtime: claude-code\n"), 0o600); err != nil {
t.Fatal(err)
}
if abort := runtimeSeedMismatchAbort("hermes", dir, nil); abort == nil {
t.Fatal("expected abort for hermes requested but claude-code template seeded")
}
})
}
@@ -37,8 +37,11 @@ package handlers
import (
"context"
"errors"
"fmt"
"log"
"os"
"path/filepath"
"strings"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
@@ -193,7 +196,40 @@ func (h *WorkspaceHandler) prepareProvisionContext(
// continue to rely on workspace_secrets / org-import persona-env
// merge for their git auth.
applyAgentGitHTTPCreds(envVars, payload.Role)
applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model)
// molecule-core#1994: per-workspace LLM billing-mode resolution + env wiring.
// On platform_managed it forces the CP proxy usage token; on byok/disabled
// it keeps the tenant's own provider-MATCHING creds (global OR workspace
// scope) and reports whether a usable LLM credential is present.
//
// internal#728 Bug 1: globalSecretKeys (loadWorkspaceSecrets provenance)
// lets the byok branch strip ONLY operator-store-origin LLM creds that do
// NOT match the resolved provider's auth_env — so a non-anthropic-oauth
// claude-code workspace no longer inherits the stray tenant-global
// CLAUDE_CODE_OAUTH_TOKEN the runtime would greedily prefer. User-authored
// workspace_secrets (provenance flag cleared) are exempt.
llmRes := applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model, globalSecretKeys)
// Fail closed for a BYOK workspace with no usable LLM credential at ANY
// scope: do NOT start it credential-less. Mirror the "model+provider+
// credential REQUIRED at create" spirit with an actionable error surfaced
// at provision time.
//
// Scoped to byok specifically (NOT disabled): "byok" means "the user
// intends to run an LLM on their own credential" — a missing one is a
// misconfiguration worth surfacing loudly. "disabled" means "this
// workspace runs no platform-billed LLM at all" (terminal / file work, or
// a runtime that talks to a non-bypass-key endpoint), so aborting would
// regress a legitimate no-LLM workspace.
//
// The bypass-key check is intentionally broad — any present bypass key
// (the tenant's own, at global or workspace scope) clears it.
if llmRes.ResolvedMode == LLMBillingModeBYOK && !llmRes.HasUsableLLMCred {
msg := formatMissingBYOKCredentialError(llmRes.ResolvedMode)
log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, molecule-core#1994)", workspaceID)
return nil, &provisionAbort{
Msg: msg,
Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "1994"},
}
}
applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model)
if payload.Role != "" {
envVars["MOLECULE_AGENT_ROLE"] = payload.Role
@@ -230,6 +266,22 @@ func (h *WorkspaceHandler) prepareProvisionContext(
}
}
// Preflight: runtime-seed match (issue #2027). Fail LOUD when a workspace
// NAMED a runtime but the config.yaml we're about to seed declares a
// different top-level runtime — the symmetric counterpart to selectImage's
// ErrUnresolvableRuntime guard, on the config/template side. Pre-fix, when a
// runtime's workspace template wasn't in the tenant cache at provision time
// (or sanitizeRuntime coerced an unknown runtime), seeding silently fell
// back to the claude-code-default template: the image+env said e.g.
// google-adk but the seeded config said claude-code, so the agent booted
// mislabeled and personaless yet looked 'online' and returned canned
// non-answers. Refusing loudly turns that silent wrong-agent into a visible
// WORKSPACE_PROVISION_FAILED the operator can act on.
if abort := runtimeSeedMismatchAbort(payload.Runtime, templatePath, configFiles); abort != nil {
log.Printf("Provisioner: ABORT workspace=%s — %s", workspaceID, abort.Msg)
return nil, abort
}
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, pluginsPath)
cfg.ResetClaudeSession = resetClaudeSession
@@ -240,6 +292,76 @@ func (h *WorkspaceHandler) prepareProvisionContext(
}, nil
}
// runtimeSeedMismatchAbort returns a non-nil abort when a workspace NAMED a
// runtime but the config.yaml about to be seeded declares a *different*
// top-level runtime — the fail-loud counterpart to selectImage's
// ErrUnresolvableRuntime (issue #2027). It catches the silent
// claude-code-default substitution that occurs when a runtime's workspace
// template isn't cached at provision time (or sanitizeRuntime coerced an
// unknown runtime to claude-code): both surface as a seeded config whose
// runtime contradicts the requested one.
//
// Pure (modulo reading the template dir's config.yaml). An empty
// requestedRuntime (unspecified / org-template default path) or an
// indeterminate seeded runtime (e.g. CP mode with no local config bytes) is
// allowed — we only fail on a concrete, contradictory signal, never on
// absence of one.
func runtimeSeedMismatchAbort(requestedRuntime, templatePath string, configFiles map[string][]byte) *provisionAbort {
if requestedRuntime == "" {
return nil
}
seeded := seededConfigRuntime(templatePath, configFiles)
if seeded == "" || seeded == requestedRuntime {
return nil
}
msg := fmt.Sprintf(
"runtime seed mismatch: workspace requested runtime %q but the seeded config.yaml declares %q — the %q workspace template was not available at provision time (silent %q fallback). Refusing to launch a mislabeled agent; refresh the template cache (POST /admin/templates/refresh) and re-provision.",
requestedRuntime, seeded, requestedRuntime, seeded,
)
return &provisionAbort{
Msg: msg,
Extra: map[string]interface{}{
"error": msg,
"requested_runtime": requestedRuntime,
"seeded_runtime": seeded,
"issue": "2027",
},
}
}
// seededConfigRuntime extracts the top-level `runtime:` from the config.yaml
// that will be seeded into the workspace — preferring the in-memory
// configFiles, falling back to the template directory on disk. Returns ""
// when no config.yaml is available or it declares no top-level runtime.
func seededConfigRuntime(templatePath string, configFiles map[string][]byte) string {
if data, ok := configFiles["config.yaml"]; ok {
return parseTopLevelRuntime(data)
}
if templatePath != "" {
if data, err := os.ReadFile(filepath.Join(templatePath, "config.yaml")); err == nil {
return parseTopLevelRuntime(data)
}
}
return ""
}
// parseTopLevelRuntime returns the value of the top-level `runtime:` key in a
// config.yaml, ignoring the nested `runtime_config:` block. A small dedicated
// line scanner (mirrors the one the Create handler uses to read a template's
// runtime) so the provision-time guard needs no YAML dependency.
func parseTopLevelRuntime(data []byte) string {
for _, raw := range strings.Split(string(data), "\n") {
trimmed := strings.TrimLeft(raw, " \t")
if len(raw) > len(trimmed) {
continue // indented — inside a nested block (e.g. runtime_config:)
}
if strings.HasPrefix(trimmed, "runtime:") && !strings.HasPrefix(trimmed, "runtime_config") {
return strings.Trim(strings.TrimSpace(strings.TrimPrefix(trimmed, "runtime:")), `"'`)
}
}
return ""
}
// mintWorkspaceSecrets issues + persists the workspace auth token
// AND the platform→workspace inbound secret (#2312). Both modes MUST
// call this — Docker mints + writes to local config volume; SaaS
@@ -494,6 +494,108 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing.
}
}
// TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds is the
// molecule-core#1994 (corrected-model) end-to-end inversion of the former
// internal#711 fail-closed test, for the live Reno Stars byok agents. A byok
// workspace whose LLM credential is the TENANT's own scope:global
// CLAUDE_CODE_OAUTH_TOKEN (a global_secrets row, no workspace override) must:
//
// 1. KEEP that oauth in the prepared container env (it is the tenant's own
// credential — exactly what byok runs on, direct), and
// 2. NOT abort — the provision proceeds.
//
// Pre-fix (internal#711) prepared.EnvVars stripped the global oauth and the
// provision aborted MISSING_BYOK_CREDENTIAL → the agent was dead. This is the
// discriminating end-to-end guard for the fix.
func TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds(t *testing.T) {
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
mock := setupTestDB(t)
// global_secrets carries the TENANT's own scope:global OAuth token + the
// stored MODEL (so the resolver derives byok from opus).
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0))
// Workspace set its own MODEL (no LLM cred of its own — relies on global).
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0))
// Resolver: workspace override = byok.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir())
payload := models.CreateWorkspacePayload{
Name: "Reno Stars SEO",
Runtime: "claude-code",
Tier: 1,
}
prepared, abort := handler.prepareProvisionContext(
context.Background(), wsID, "/nonexistent", nil, payload, false)
if abort != nil {
t.Fatalf("expected provision to proceed (byok on tenant's own global oauth), got abort=%v", abort.Extra)
}
if prepared == nil {
t.Fatalf("prepared context is nil despite no abort")
}
// The tenant's own global oauth must be present in the container env.
if prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global oauth preserved for byok",
prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
// byok must not have been routed through the platform proxy.
if _, ok := prepared.EnvVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("byok provision must NOT inject the platform usage token")
}
if got := prepared.EnvVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"]; got != LLMBillingModeBYOK {
t.Fatalf("MOLECULE_LLM_BILLING_MODE_RESOLVED = %q, want byok", got)
}
}
// TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed is the
// companion: the fail-closed abort is UNCHANGED for a byok workspace with no
// LLM credential at ANY scope (no global row, no workspace row). It still
// aborts MISSING_BYOK_CREDENTIAL rather than starting credential-less.
func TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed(t *testing.T) {
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc"
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
mock := setupTestDB(t)
// No global LLM cred — only the stored MODEL so the resolver derives byok.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0))
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir())
payload := models.CreateWorkspacePayload{
Name: "Reno Stars SEO",
Runtime: "claude-code",
Tier: 1,
}
prepared, abort := handler.prepareProvisionContext(
context.Background(), wsID, "/nonexistent", nil, payload, false)
if abort == nil {
t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v)", prepared)
}
if code, _ := abort.Extra["code"].(string); code != "MISSING_BYOK_CREDENTIAL" {
t.Fatalf("abort.Extra[code] = %v, want MISSING_BYOK_CREDENTIAL", abort.Extra["code"])
}
if mode, _ := abort.Extra["billing_mode"].(string); mode != LLMBillingModeBYOK {
t.Fatalf("abort.Extra[billing_mode] = %v, want %q", abort.Extra["billing_mode"], LLMBillingModeBYOK)
}
}
// TestReadOrLazyHealInboundSecret pins the four branches of the
// shared lazy-heal helper directly. Each call site (chat_files,
// registry) has its own integration test, but those go through the
@@ -595,103 +697,49 @@ func TestReadOrLazyHealInboundSecret(t *testing.T) {
})
}
// TestDeriveProviderFromModelSlug pins the slug→provider mapping shared
// with workspace-configs-templates/hermes/scripts/derive-provider.sh.
// Sync-test: when a new prefix is added to the shell script, add it
// here too. The two intentional differences from the shell version
// (nousresearch/openai both → "openrouter" at provision time;
// unknown/no-prefix → "" instead of "auto") are exercised explicitly.
func TestDeriveProviderFromModelSlug(t *testing.T) {
t.Parallel()
cases := []struct {
name string
model string
want string
}{
{"minimax", "minimax/MiniMax-M2.7-highspeed", "minimax"},
{"minimax-cn keeps cn suffix", "minimax-cn/MiniMax-M2.7", "minimax-cn"},
{"anthropic", "anthropic/claude-sonnet-4-6", "anthropic"},
{"gemini", "gemini/gemini-2.5-pro", "gemini"},
{"deepseek", "deepseek/deepseek-v3", "deepseek"},
{"zai", "zai/glm-4.6", "zai"},
{"kimi-coding", "kimi-coding/kimi-k2", "kimi-coding"},
{"kimi-coding-cn keeps cn suffix", "kimi-coding-cn/kimi-k2", "kimi-coding-cn"},
{"alibaba via dashscope alias", "dashscope/qwen3", "alibaba"},
{"alibaba via qwen alias", "qwen/qwen3-coder", "alibaba"},
{"xiaomi via mimo alias", "mimo/mimo-vl", "xiaomi"},
{"arcee via arcee-ai alias", "arcee-ai/arcee-blitz", "arcee"},
{"nvidia via nim alias", "nim/llama-3.3-nemotron-super", "nvidia"},
{"ollama-cloud", "ollama-cloud/qwen3", "ollama-cloud"},
{"huggingface via hf alias", "hf/Qwen/Qwen3", "huggingface"},
{"ai-gateway", "ai-gateway/anthropic-claude-sonnet-4-6", "ai-gateway"},
{"kilocode", "kilocode/kilo-1", "kilocode"},
{"opencode-zen", "opencode-zen/zen-1", "opencode-zen"},
{"opencode-go", "opencode-go/code-1", "opencode-go"},
{"openrouter passthrough", "openrouter/anthropic/claude-sonnet-4-6", "openrouter"},
{"custom passthrough", "custom/my-private-endpoint", "custom"},
// Runtime-only override candidates default to openrouter at
// provision time (derive-provider.sh upgrades to nous/custom at
// boot if HERMES_API_KEY/OPENAI_API_KEY are present).
{"nousresearch defaults to openrouter at provision time", "nousresearch/hermes-4-70b", "openrouter"},
{"openai defaults to openrouter at provision time", "openai/gpt-5", "openrouter"},
// hermes-agent v0.12.0 / 2026-04-30 provider list — the drift gate
// in derive_provider_drift_test.go pins parity with the shell case
// statement.
{"xai", "xai/grok-4", "xai"},
{"xai via grok alias", "grok/grok-4", "xai"},
{"bedrock", "bedrock/anthropic.claude-sonnet-4-6", "bedrock"},
{"bedrock via aws alias", "aws/anthropic.claude-sonnet-4-6", "bedrock"},
{"tencent", "tencent/hunyuan-coder", "tencent-tokenhub"},
{"tencent-tokenhub passthrough", "tencent-tokenhub/hunyuan-coder", "tencent-tokenhub"},
{"gmi", "gmi/gmi-coder-1", "gmi"},
{"qwen-oauth", "qwen-oauth/qwen3-coder", "qwen-oauth"},
{"lmstudio", "lmstudio/qwen3-coder", "lmstudio"},
{"lmstudio via lm-studio alias", "lm-studio/qwen3-coder", "lmstudio"},
{"minimax-oauth", "minimax-oauth/MiniMax-M2.7", "minimax-oauth"},
{"alibaba-coding-plan", "alibaba-coding-plan/qwen3-coder", "alibaba-coding-plan"},
{"google-gemini-cli", "google-gemini-cli/gemini-2.5-pro", "google-gemini-cli"},
{"openai-codex", "openai-codex/gpt-5-codex", "openai-codex"},
{"copilot-acp", "copilot-acp/claude-sonnet-4-6", "copilot-acp"},
{"copilot", "copilot/claude-sonnet-4-6", "copilot"},
// Unknowns return "" so the caller skips the LLM_PROVIDER write
// and lets derive-provider.sh's *=auto branch decide at runtime.
{"unknown prefix returns empty", "totally-unknown-model/foo", ""},
{"empty input returns empty", "", ""},
{"no slash returns empty", "no-slash-here", ""},
{"leading slash returns empty", "/leading-slash", ""},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := deriveProviderFromModelSlug(tc.model)
if got != tc.want {
t.Errorf("deriveProviderFromModelSlug(%q) = %q, want %q", tc.model, got, tc.want)
}
})
}
}
// internal#718 P4 closure: TestDeriveProviderFromModelSlug was the
// table-driven sync test that pinned deriveProviderFromModelSlug
// (retire-list #3) against
// workspace-configs-templates/hermes/scripts/derive-provider.sh.
//
// Both the Go function and this test (with its 35+ slug→provider
// cases) are retired. The slug→provider mapping is now covered by
// providers.Manifest.DeriveProvider against the registry SSOT
// (TestDeriveProvider_RealManifest in
// internal/providers/derive_provider_test.go). The shell script
// remains the in-container fallback; its byte-identity with the
// registry view of hermes is a P4 follow-up gated on registry data
// growth (see PR-2 codegen of hermes config.yaml from the registry).
//
// TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider, which
// asserted that Create writes BOTH MODEL and LLM_PROVIDER rows, is
// replaced by TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL
// below — the LLM_PROVIDER half of the contract is retired.
//
// TestWorkspaceCreate_FirstDeploy_UnknownModel_OnlyMintModelProvider
// is subsumed by the same: with LLM_PROVIDER never written, the
// known-vs-unknown distinction at Create disappears.
// TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider pins the
// fix for failed-workspace 95ed3ff2 (2026-05-02). Pre-fix: the canvas
// POSTed minimax/MiniMax-M2.7 in payload.Model, the workspace row was
// created, but neither the model nor the derived provider was ever
// written to workspace_secrets. On any subsequent restart, the
// applyRuntimeModelEnv fallback found nothing and hermes booted with
// the template default (nousresearch/hermes-4-70b) → wrong provider
// keys → /health poll failed → never registered.
// TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL pins the post-P4
// contract: WorkspaceHandler.Create writes the MODEL workspace_secret
// (so the canvas-picked model survives restart and applyRuntimeModelEnv
// finds it via the fallback chain) and writes NOTHING ELSE in the
// secret-mint window. Specifically: NO LLM_PROVIDER row is written,
// regardless of payload.LLMProvider or the slug-prefix.
//
// Post-fix: the create handler writes both rows after committing the
// workspace row. This test asserts the SQL writes happen with the
// correct keys + values.
// Pre-P4 the create handler also wrote LLM_PROVIDER via setProviderSecret
// — either from payload.LLMProvider verbatim or from
// deriveProviderFromModelSlug(payload.Model). Both code paths were
// retired in internal#718 P4 closure together with the LLM_PROVIDER
// workspace_secret itself (no consumer remains; the provider is derived
// at every decision point from (runtime, model) via the registry).
//
// 2026-05-19 follow-up: the workspace_secrets row that holds the
// picked model id was renamed MODEL_PROVIDER → MODEL (the column name
// was misleading and bled into applyRuntimeModelEnv as a slug
// fallback). The sqlmock regex below now anchors on 'MODEL' instead
// of 'MODEL_PROVIDER'. See fix/workspace-server-rename-
// MODEL_PROVIDER-to-MODEL + the 20260519000000 rename migration.
func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) {
// sqlmock failure on this expectation set is the canonical regression
// signal: if a future PR re-introduces an LLM_PROVIDER write at create,
// sqlmock surfaces "ExpectExec was not called" for any added insert.
// The "MODEL anchor uses no LLM_PROVIDER" assertion below is the
// stronger version of the same gate.
func TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
@@ -706,43 +754,35 @@ func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) {
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
// The fix: MODEL is upserted with the verbatim model slug
// (renamed from MODEL_PROVIDER on 2026-05-19 — see file-level
// docstring). SQL has 3 placeholders ($1=workspace_id, $2=
// encrypted_value reused in the conflict-update, $3=version
// reused in the conflict-update), so sqlmock sees 3 args. The
// 'MODEL' / 'LLM_PROVIDER' key is a literal in the SQL — we
// distinguish the two writes with the regex match below. The
// 'MODEL' anchor uses a word boundary (`[^_A-Z]`) so it does
// NOT silently match the legacy 'MODEL_PROVIDER' name.
// MODEL upsert — the only post-commit workspace_secrets write that
// survived the P4 closure. The 'MODEL' key is literal in the SQL.
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'`).
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
// The fix: LLM_PROVIDER is upserted with the derived provider name.
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'LLM_PROVIDER'`).
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
// Post-mint side effects (canvas layout + structure_events broadcast
// + the external-workspace UPDATE/IssueToken chain). Order matches
// workspace.go.
// workspace.go. CRITICALLY: no second `INSERT INTO workspace_secrets`
// is expected — sqlmock fails if Create attempts an LLM_PROVIDER
// write.
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
// External branch with no URL: status → awaiting_agent + IssueToken.
mock.ExpectExec(`UPDATE workspaces SET status =`).
WillReturnResult(sqlmock.NewResult(0, 1))
// wsauth.IssueToken inserts into workspace_auth_tokens.
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WillReturnResult(sqlmock.NewResult(0, 1))
// awaiting_agent broadcast.
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"External Minimax Agent","runtime":"external","external":true,"model":"minimax/MiniMax-M2.7"}`
// Body carries an explicit llm_provider AND a slug-prefixed model — both
// of which would have triggered an LLM_PROVIDER write pre-P4. The
// payload field is preserved for backward-compat (older canvases
// still send it) but the value is intentionally ignored by Create.
body := `{"name":"External Minimax Agent","runtime":"external","external":true,"model":"minimax/MiniMax-M2.7","llm_provider":"minimax"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
@@ -752,7 +792,7 @@ func TestWorkspaceCreate_FirstDeploy_PersistsModelAndProvider(t *testing.T) {
t.Fatalf("expected status 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met — first-deploy did NOT persist MODEL + LLM_PROVIDER (this is the prod bug recurrence): %v", err)
t.Errorf("sqlmock expectations not met — Create wrote an unexpected workspace_secrets row (likely a re-introduced LLM_PROVIDER write): %v", err)
}
}
@@ -808,56 +848,12 @@ func TestWorkspaceCreate_FirstDeploy_NoModel_Returns422(t *testing.T) {
}
}
// TestWorkspaceCreate_FirstDeploy_UnknownModel_OnlyMintModelProvider
// asserts the asymmetric case: an unknown model prefix still gets
// MODEL persisted (so the user's exact slug survives restart and
// applyRuntimeModelEnv finds it), but LLM_PROVIDER is skipped (so
// derive-provider.sh's *=auto branch can decide at runtime instead of
// being pre-empted by a guess). The MODEL key was renamed from
// MODEL_PROVIDER on 2026-05-19 — see file-level docstring.
func TestWorkspaceCreate_FirstDeploy_UnknownModel_OnlyMintModelProvider(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
// Only MODEL — LLM_PROVIDER must NOT be written for unknown
// prefixes. Same 3-arg shape as above; key is literal in SQL.
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'`).
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec(`UPDATE workspaces SET status =`).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO workspace_auth_tokens").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Unknown Model Agent","runtime":"external","external":true,"model":"totally-unknown-model/foo"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("expected status 201, got %d: %s", w.Code, w.Body.String())
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations not met — unknown-prefix model should mint MODEL but skip LLM_PROVIDER: %v", err)
}
}
// internal#718 P4 closure: the asymmetric "known prefix → both
// MODEL+LLM_PROVIDER; unknown prefix → MODEL only" contract is moot —
// Create never writes LLM_PROVIDER for ANY model now. The equivalent
// coverage is TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL above
// (uses a slug-prefixed model that pre-P4 WOULD have triggered an
// LLM_PROVIDER write; sqlmock fails if Create attempts one).
// TestApplyRuntimeModelEnv_SetsUniversalMODELForAllRuntimes pins the
// fix for Bug B (2026-05-02): canvas-selected model was silently dropped
@@ -972,7 +968,7 @@ func TestApplyPlatformManagedLLMEnv_NonClaudeRuntimeDefaultsOpenAIProxyWhenNoWor
t.Setenv("MOLECULE_LLM_DEFAULT_MODEL", "moonshot/kimi-k2.6")
envVars := map[string]string{}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "", nil)
applyRuntimeModelEnv(envVars, "codex", "")
if got := envVars["OPENAI_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/openai/v1" {
@@ -1002,7 +998,7 @@ func TestApplyPlatformManagedLLMEnv_StripsWorkspaceOpenAIKeyForClaudeCode(t *tes
"OPENAI_BASE_URL": "https://api.openai.com/v1",
"MODEL": "openai/gpt-5.5",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
if _, ok := envVars["OPENAI_API_KEY"]; ok {
t.Fatalf("OPENAI_API_KEY should be stripped for claude-code platform-managed mode")
@@ -1028,7 +1024,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeUsesAnthropicProxyOverOAuth(t *tes
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped in platform-managed mode")
@@ -1051,7 +1047,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeInjectsAnthropicProxyWhenNoWorkspa
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7", nil)
if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/anthropic/v1" {
t.Fatalf("ANTHROPIC_BASE_URL = %q", got)
@@ -1074,7 +1070,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) {
"MINIMAX_API_KEY": "user-minimax-key",
"MODEL": "MiniMax-M2.7",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "", nil)
if _, ok := envVars["MINIMAX_API_KEY"]; ok {
t.Fatalf("MINIMAX_API_KEY should be stripped in platform-managed mode")
@@ -1090,20 +1086,38 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) {
}
}
// internal#718 P2-B: byok is now DERIVED, not org-env-driven. A claude-code
// workspace with NO explicit override + a non-platform-deriving model
// (kimi-for-coding → kimi-coding) resolves byok and must NOT get the CP proxy
// creds injected. (Pre-P2 this was driven by the org env MOLECULE_LLM_BILLING_MODE
// with an empty workspace id; that mechanism is retired.)
func TestApplyPlatformManagedLLMEnv_NoopsOutsidePlatformManaged(t *testing.T) {
t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok")
const wsID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
mock := setupTestDB(t)
// No explicit override → derive from (claude-code, kimi-for-coding) → byok.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil))
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding", nil)
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("resolved mode = %q, want byok (derived from non-platform model)", res.ResolvedMode)
}
if _, ok := envVars["OPENAI_API_KEY"]; ok {
t.Fatalf("OPENAI_API_KEY should not be set outside platform-managed mode")
}
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("MOLECULE_LLM_USAGE_TOKEN should not be set outside platform-managed mode")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv is the
@@ -1137,7 +1151,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
// 1. OAuth token intact — not stripped.
if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "user-oauth-token" {
@@ -1168,6 +1182,310 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing
}
}
// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect is
// the molecule-core#1994 (corrected-model) inversion of the former
// internal#711 strip test, exercised through applyPlatformManagedLLMEnv. The
// live failure this guards: the Reno Stars Marketing/SEO byok agents whose
// Claude oauth lives at GLOBAL scope (the tenant's own credential, shared
// across the tenant's workspaces) were stripped + failed-closed under the
// inverted "global == platform's own" premise → MISSING_BYOK_CREDENTIAL →
// dead. Under the corrected model `global_secrets` is the TENANT's store, so
// that oauth is exactly what byok runs on: it must SURVIVE and route direct.
//
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch
// → the oauth disappears → this test RED on both survival + HasUsableLLMCred.
func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect(t *testing.T) {
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// The tenant's own oauth at GLOBAL scope (a global_secrets row). The
// workspace set no separate row of its own; it relies on the tenant global.
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
"MODEL": "opus",
}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
// 1. The tenant's own global-scope oauth SURVIVES — byok runs on it.
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token preserved for byok", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
// 2. No CP proxy creds forced (byok = workspace talks to its own provider).
if got, ok := envVars["ANTHROPIC_API_KEY"]; ok {
t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got)
}
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("MOLECULE_LLM_USAGE_TOKEN must NOT be injected for byok")
}
// 3. byok WITH a usable credential → caller does NOT fail closed.
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK)
}
if !res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = false, want true (tenant's own global-scope oauth is the usable credential)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// =========================================================================
// internal#718 P2-B BEHAVIOR DELTA — billing/credential decision DERIVES the
// provider (no stored LLM_PROVIDER, no override). These three tests are the
// explicit delta the RFC calls out, exercised through the real provision path
// (applyPlatformManagedLLMEnv) with the registry derivation driving the mode:
// - platform-derived → platform_managed → platform creds (UNCHANGED)
// - non-platform-derived → byok → #1963 strip + fail-closed (THE FIX)
// - unset model → platform default (CTO-confirmed)
// All use NO explicit override (override read returns NULL) so the DERIVATION
// is what decides — this is what supersedes #1966's stored-LLM_PROVIDER read.
// =========================================================================
// PLATFORM-DERIVED → UNCHANGED. A claude-code workspace with a platform-
// namespaced model (anthropic/claude-opus-4-7) derives to the closed `platform`
// provider → platform_managed → CP proxy creds injected, exactly as before.
func TestApplyPlatformManagedLLMEnv_DERIVED_PlatformModelKeepsPlatformCreds(t *testing.T) {
const wsID = "11111111-2222-3333-4444-555555555555"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) // NO override → derive
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) // org env IGNORED now
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "anthropic/claude-opus-4-7", nil)
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("platform-derived model must resolve platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want derived_provider", res.Source)
}
// Platform path injects the CP proxy creds (UNCHANGED behavior).
if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" {
t.Errorf("platform path must inject the CP proxy token as ANTHROPIC_API_KEY, got %q", got)
}
if !res.HasUsableLLMCred {
t.Errorf("platform path always has a usable cred (the proxy token)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// NON-PLATFORM-DERIVED + NO CREDENTIAL AT ALL → byok + FAIL-CLOSED. This is
// the legitimate remaining fail-closed path under the corrected model
// (molecule-core#1994): a claude-code workspace with a non-platform model
// (kimi-for-coding → byok) and NO override and NO LLM credential at ANY scope
// (no global row, no workspace row) has nothing to run on → HasUsableLLMCred=
// false → caller (prepareProvisionContext) aborts MISSING_BYOK_CREDENTIAL. The
// fail-closed branch is unchanged by the strip removal; only its trigger
// narrowed from "no workspace-scoped cred" to "no cred at any scope".
func TestApplyPlatformManagedLLMEnv_DERIVED_ByokNoCredentialFailsClosed(t *testing.T) {
const wsID = "99999999-8888-7777-6666-555555555555"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) // NO override → derive
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) // org env IGNORED now
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// No LLM credential at all — neither global nor workspace scope.
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding", nil)
// 1. DERIVED byok (NOT the old platform_managed default).
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("non-platform-derived model must resolve byok, got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want derived_provider", res.Source)
}
// 2. No CP proxy creds forced.
if got, ok := envVars["ANTHROPIC_API_KEY"]; ok {
t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got)
}
// 3. No usable cred at any scope → caller fails closed.
if res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = true, want false (no LLM credential present at any scope)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// UNSET model → PLATFORM DEFAULT (CTO-confirmed "unset → platform default").
// No model means nothing to derive; the workspace defaults closed to
// platform_managed and keeps the platform creds (UNCHANGED for the no-model case).
func TestApplyPlatformManagedLLMEnv_DERIVED_UnsetModelPlatformDefault(t *testing.T) {
const wsID = "00000000-1111-2222-3333-444444444444"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(nil)) // NO override
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModeBYOK) // org env IGNORED now
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("unset model must default platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedDefault {
t.Errorf("source: got %q want derived_default", res.Source)
}
if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" {
t.Errorf("unset-model platform default must inject the CP proxy token, got %q", got)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth is the
// workspace-scope companion to the global-scope survival test: a byok
// workspace that set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets
// tab (a workspace_secrets row) keeps it and runs direct. Under the corrected
// model (molecule-core#1994) the tenant's credential survives at EITHER scope;
// this pins the workspace-scope half.
func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth(t *testing.T) {
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// Workspace set its OWN OAuth token (a workspace_secrets row).
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "CUSTOMER-OWN-OAUTH-TOKEN",
"MODEL": "opus",
}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "CUSTOMER-OWN-OAUTH-TOKEN" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the workspace's own token left intact", got)
}
if !res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = false, want true (workspace brought its own credential)")
}
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy proves the
// corrected-model behavior for "disabled": the tenant's own global-scope LLM
// cred is NOT stripped and the CP proxy is NOT forced. "disabled" means the
// workspace runs no platform-billed LLM, but the tenant's own credential is
// still the tenant's to keep; the caller's fail-closed abort is byok-only so a
// disabled workspace boots regardless. The previous internal#711 behavior
// stripped the global cred here on the same inverted premise; that strip is
// removed.
//
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the non-platform
// branch → the oauth disappears → this test RED on the survival assertion.
func TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy(t *testing.T) {
const wsID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeDisabled))
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
// The tenant's own global cred survives (not stripped).
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("tenant's own global cred must survive for disabled mode; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
// No proxy forced for disabled.
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("disabled must not inject the platform usage token")
}
if res.ResolvedMode != LLMBillingModeDisabled {
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeDisabled)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds is
// the no-regression guard for the metered platform_managed path
// (molecule-core#1994): a platform-managed workspace MUST still strip any
// direct oauth and route through the CP proxy. The direct OAuth token is
// replaced by the proxy usage token (HasUsableLLMCred=true). This path is
// UNCHANGED by the byok strip removal — only the byok/disabled branch changed.
func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *testing.T) {
const wsID = "99999999-9999-9999-9999-999999999999"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModePlatformManaged))
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "DIRECT-OAUTH-TOKEN",
"MODEL": "opus",
}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
// Platform-managed routes through the CP proxy: OAuth stripped, proxy creds forced.
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped + replaced by the proxy token for platform_managed")
}
if got := envVars["ANTHROPIC_API_KEY"]; got != "tenant-admin-token" {
t.Fatalf("ANTHROPIC_API_KEY = %q, want proxy usage token for platform_managed", got)
}
if !res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = false, want true for platform_managed (proxy token is the credential)")
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModePlatformManaged)
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode is the
// no-regression companion: a workspace that resolves to platform_managed must
// still strip + force the proxy AND emit MOLECULE_LLM_BILLING_MODE=
@@ -1189,7 +1507,7 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode(t *tes
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "", nil)
// OAuth stripped, proxy forced — unchanged platform_managed contract.
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
@@ -1399,6 +1399,9 @@ func (s *stubFailingCPProv) Start(_ context.Context, _ provisioner.WorkspaceConf
func (s *stubFailingCPProv) Stop(_ context.Context, _ string) error {
panic("stubFailingCPProv.Stop not expected on the provisionWorkspaceCP failure path")
}
func (s *stubFailingCPProv) StopAndPrune(_ context.Context, _ string) error {
panic("stubFailingCPProv.StopAndPrune not expected on the provisionWorkspaceCP failure path")
}
func (s *stubFailingCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
panic("stubFailingCPProv.GetConsoleOutput not expected on the provisionWorkspaceCP failure path")
@@ -726,7 +726,7 @@ func (h *WorkspaceHandler) cpStopWithRetry(ctx context.Context, workspaceID, sou
// terminal error. The delete path needs the error (it must keep the
// row recoverable for the orphan-sweeper + emit a durable event), so
// the actual retry loop lives in cpStopWithRetryErr below.
_ = h.cpStopWithRetryErr(ctx, workspaceID, source)
_ = h.cpStopWithRetryErr(ctx, workspaceID, source, false) // restart/hibernate never prunes
}
// cpStopWithRetryErr is the shared bounded-retry core for cpProv.Stop.
@@ -743,14 +743,24 @@ func (h *WorkspaceHandler) cpStopWithRetry(ctx context.Context, workspaceID, sou
// - all attempts fail → returns the LAST attempt's error and emits the
// stable `LEAK-SUSPECT cpProv.Stop ...` log line so the CP-side orphan
// reconciler can correlate by workspace_id.
func (h *WorkspaceHandler) cpStopWithRetryErr(ctx context.Context, workspaceID, source string) error {
//
// cpStopWithRetryErr terminates the workspace's CP-managed compute with bounded
// retry. prune=true (internal#734) additionally requests CP erase the durable
// data volume — set ONLY by the permanent-delete-with-erase path, NEVER by
// restart/hibernate (those pass false), so a recreate can never prune.
func (h *WorkspaceHandler) cpStopWithRetryErr(ctx context.Context, workspaceID, source string, prune bool) error {
if h.cpProv == nil {
return nil
}
var lastErr error
delay := cpStopRetryBaseDelay
for attempt := 1; attempt <= cpStopRetryAttempts; attempt++ {
err := h.cpProv.Stop(ctx, workspaceID)
var err error
if prune {
err = h.cpProv.StopAndPrune(ctx, workspaceID)
} else {
err = h.cpProv.Stop(ctx, workspaceID)
}
if err == nil {
if attempt > 1 {
log.Printf("%s: cpProv.Stop(%s) succeeded on attempt %d", source, workspaceID, attempt)
@@ -72,6 +72,13 @@ func (s *scriptedCPStop) Stop(ctx context.Context, _ string) error {
}
return nil
}
// StopAndPrune delegates to Stop so the retry/error scripting is identical —
// the prune flag only changes the URL the real provisioner builds, not the
// retry behavior these tests exercise.
func (s *scriptedCPStop) StopAndPrune(ctx context.Context, id string) error {
return s.Stop(ctx, id)
}
func (s *scriptedCPStop) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) {
return "", nil
}
@@ -501,10 +501,12 @@ func TestWorkspaceCreate_WithSecrets_Persists(t *testing.T) {
// while persisting a secret causes the entire transaction to roll back and
// the handler to return 500. The workspace row must NOT be committed.
func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
// internal#691: see TestExtended_SecretsSet — same default-closed reasoning.
// This test is asserting the rollback path on DB failure, not the strip gate;
// keep the org in byok so the OPENAI_API_KEY write reaches the INSERT.
t.Setenv("MOLECULE_LLM_BILLING_MODE", "byok")
// internal#718 P2-B: this test asserts the rollback path on DB failure, not
// the strip gate. The create-time secret gate keys off the DERIVED mode now
// (org rung retired). An explicit byok override makes the workspace byok in a
// single resolver read (precedence-1 short-circuit), so the OPENAI_API_KEY
// write is allowed and reaches the INSERT-and-fail path this test exercises.
t.Setenv("MOLECULE_LLM_BILLING_MODE", "platform_managed") // org env ignored now
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
@@ -513,14 +515,11 @@ func TestWorkspaceCreate_SecretPersistFails_RollsBack(t *testing.T) {
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
// internal#691: Create() now resolves billing mode per-workspace before
// the secret-strip gate. The workspace row was just inserted in the same
// transaction so it isn't readable from a separate query yet; the
// resolver expects the SELECT and the mock returns no row → falls back
// to the org default (byok, set above) so the OPENAI_API_KEY write
// reaches the INSERT-and-fail path this test exercises.
// Create() resolves billing mode per-workspace before the secret-strip gate.
// An explicit byok override short-circuits the resolver (precedence 1) so the
// OPENAI_API_KEY write is allowed and reaches the INSERT-and-fail path.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}))
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnError(sql.ErrConnDone) // DB failure while writing secret
mock.ExpectRollback() // workspace insert must be rolled back
@@ -1787,7 +1786,7 @@ func TestWorkspaceCreate_TemplateDefaultsMissingRuntimeAndModel(t *testing.T) {
tier: 2
runtime: hermes
runtime_config:
model: nousresearch/hermes-4-70b
model: moonshot/kimi-k2.6
`)
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
t.Fatalf("write cfg: %v", err)
@@ -1842,7 +1841,7 @@ func TestWorkspaceCreate_TemplateDefaultsLegacyTopLevelModel(t *testing.T) {
cfg := []byte(`name: Legacy Agent
tier: 1
runtime: hermes
model: anthropic:claude-sonnet-4-5
model: moonshot/kimi-k2.5
`)
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
t.Fatalf("write cfg: %v", err)
@@ -1897,7 +1896,7 @@ func TestWorkspaceCreate_CallerModelOverridesTemplateDefault(t *testing.T) {
}
cfg := []byte(`runtime: hermes
runtime_config:
model: nousresearch/hermes-4-70b
model: moonshot/kimi-k2.6
`)
if err := os.WriteFile(filepath.Join(templateDir, "config.yaml"), cfg, 0o644); err != nil {
t.Fatalf("write cfg: %v", err)
@@ -1924,7 +1923,11 @@ runtime_config:
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Custom Hermes","template":"hermes-template","model":"minimax/MiniMax-M2.7"}`
// Caller overrides with a different hermes-valid model — registry permits
// both moonshot/kimi-k2.5 and moonshot/kimi-k2.6 for hermes (P4 PR-1 native
// set). The template default would have been moonshot/kimi-k2.6; caller
// picks kimi-k2.5 explicitly to prove the override actually fires.
body := `{"name":"Custom Hermes","template":"hermes-template","model":"moonshot/kimi-k2.5"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
@@ -2048,6 +2051,152 @@ func TestWorkspaceCreate_188_NoTemplateNoRuntime_NowMODEL_REQUIRED(t *testing.T)
}
}
// internal#718 P4 PR-2: only-registered validation HARD-REJECT. A known
// (registry) runtime with a model NOT in its registered set is rejected at the
// create boundary with 422 UNREGISTERED_MODEL_FOR_RUNTIME — no DB rows touched,
// no provisioning attempt, no wedged workspace. Replaces P2-B's WARN-mode
// header.
func TestWorkspaceCreate_718_P4_UnregisteredModelHardReject422(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
// No DB expectations: the 422 fires BEFORE BeginTx, so any unexpected
// INSERT will fail the test via ExpectationsWereMet.
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Bad Model","runtime":"claude-code","model":"totally-made-up-xyz"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusUnprocessableEntity {
t.Fatalf("unregistered-model create: expected 422, got %d: %s", w.Code, w.Body.String())
}
if !bytes.Contains(w.Body.Bytes(), []byte(`"code":"UNREGISTERED_MODEL_FOR_RUNTIME"`)) {
t.Errorf("expected code=UNREGISTERED_MODEL_FOR_RUNTIME in 422 body, got %s", w.Body.String())
}
if !bytes.Contains(w.Body.Bytes(), []byte(`"runtime":"claude-code"`)) {
t.Errorf("expected runtime=claude-code echoed in 422 body, got %s", w.Body.String())
}
if !bytes.Contains(w.Body.Bytes(), []byte(`"model":"totally-made-up-xyz"`)) {
t.Errorf("expected model echoed in 422 body, got %s", w.Body.String())
}
// The legacy WARN header must NOT fire — there is no "proceeded with
// warning" path anymore.
if w.Header().Get("X-Molecule-Model-Unregistered") != "" {
t.Errorf("P4 hard-reject must not emit the legacy WARN header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
}
// Strict mock check: no DB ops should have happened.
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unexpected DB activity on hard-reject path: %v", err)
}
}
// A REGISTERED model on a registry runtime proceeds with 201 and no unregistered header.
func TestWorkspaceCreate_718_P4_RegisteredModelProceeds(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
// claude-opus-4-7 IS a registered claude-code model (anthropic-api).
body := `{"name":"Good Model","runtime":"claude-code","model":"claude-opus-4-7"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("registered-model create: expected 201, got %d: %s", w.Code, w.Body.String())
}
if w.Header().Get("X-Molecule-Model-Unregistered") != "" {
t.Errorf("registered model must NOT set the legacy unregistered header, got %q", w.Header().Get("X-Molecule-Model-Unregistered"))
}
}
// internal#718 P4 PR-2: the legacy colon-namespaced BYOK vocabulary
// 'anthropic:claude-opus-4-7' is now a FIRST-CLASS registered claude-code model
// (P4 PR-1 reconciled the colon-vocab into the registry). The hard-reject must
// NOT 422 this legitimate live-corpus form — verifying the reconcile + flip work
// together. This is the canonical regression guard for the colon-vocab path.
func TestWorkspaceCreate_718_P4_LegacyColonVocabAccepted(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO workspace_secrets").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"name":"Legacy Colon","runtime":"claude-code","model":"anthropic:claude-opus-4-7"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("legacy colon-form create (P4 PR-1 reconciled): expected 201, got %d: %s", w.Code, w.Body.String())
}
}
// internal#718 P2-B: a runtime NOT in the registry (mock — a known core runtime
// absent from the first-party provider registry) fails OPEN — the
// only-registered gate does not block it (federation / non-first-party path
// unchanged). It proceeds past the gate to the normal create flow.
func TestWorkspaceCreate_718_NonRegistryRuntimeFailsOpen(t *testing.T) {
mock := setupTestDB(t)
setupTestRedis(t)
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
mock.ExpectBegin()
mock.ExpectExec("INSERT INTO workspaces").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectCommit()
mock.ExpectExec("INSERT INTO canvas_layouts").
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec("INSERT INTO structure_events").
WillReturnResult(sqlmock.NewResult(0, 1))
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
// "mock" is a known core runtime but NOT in the first-party registry;
// any model passes the only-registered gate (fail-open).
body := `{"name":"Mock Agent","runtime":"mock","model":"canned-replies"}`
c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.Create(c)
if w.Code != http.StatusCreated {
t.Fatalf("non-registry runtime should fail open (201), got %d: %s", w.Code, w.Body.String())
}
}
// Explicit runtime, no template → honored, 201 (no template resolution
// needed; runtimeExplicitlyRequested true but already resolved).
func TestWorkspaceCreate_188_ExplicitRuntimeNoTemplate_OK(t *testing.T) {

Some files were not shown because too many files have changed in this diff Show More