fix(plugins): log silently ignored execAsRoot errors during uninstall

Plugin uninstall had two sites where execAsRoot errors were discarded: - Skill directory removal (plugins_install.go:125) — orphaned skill dirs if rm -rf failed silently - CLAUDE.md marker stripping (plugins_install_pipeline.go:326) — stale plugin content left in CLAUDE.md if awk script failed Both now log the error without failing the overall uninstall (best-effort cleanup), giving operators visibility into incomplete uninstalls. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Merge pull request 'test(handlers): org_scope + workspace_abilities coverage (#1312 , clean extract)' (#2114 ) from test/org-scope-abilities-coverage-clean into main
2026-06-02 03:54:39 +00:00 · 2026-06-02 00:47:50 +00:00 · 2026-06-02 00:42:54 +00:00 · 2026-06-02 00:34:20 +00:00 · 2026-06-02 00:33:48 +00:00 · 2026-06-02 00:32:32 +00:00
142 changed files with 6914 additions and 1618 deletions
@@ -51,7 +51,7 @@ MOLECULE_ENV=development                       # Environment label (development/
 # MOLECULE_IN_DOCKER=                    # Set when running the platform inside Docker (accepts 1/0, true/false). Triggers A2A proxy to rewrite 127.0.0.1:<port> agent URLs to Docker bridge hostnames. Auto-detected via /.dockerenv; only set if detection fails or to force off.

 # GitHub
-# GITHUB_REPO=owner/repo                 # Target repo for agent initial_prompt clone (e.g. Molecule-AI/molecule-monorepo). Read inside workspace containers.
+# GITHUB_REPO=owner/repo                 # Target repo for agent initial_prompt clone (e.g. Molecule-AI/molecule-core). Read inside workspace containers.
 # GITHUB_TOKEN=                          # Personal access token / installation token used by agents that clone private repos. Register as a global secret via POST /admin/secrets for propagation to workspace env. Token is used in-URL during clone and then scrubbed from .git/config via `git remote set-url`.

 # Webhooks
@@ -18,15 +18,24 @@
 # per §SOP-6 security model). No-op when merged=false.
 #
 # Required env (set by the workflow):
-#   GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER, REQUIRED_CHECKS
+#   GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER
+#   plus one of REQUIRED_CHECKS_JSON (preferred) or REQUIRED_CHECKS (legacy)
 #
-# REQUIRED_CHECKS is a newline-separated list of status-check context
-# names that branch protection requires. Declared in the workflow YAML
-# rather than fetched from /branch_protections (which needs admin
-# scope — sop-tier-bot has read-only). Trade dynamism for simplicity:
-# when the required-check set changes, update both branch protection
-# AND this env. Keeping them in sync is less complexity than granting
-# the audit bot admin perms on every repo.
+# REQUIRED_CHECKS_JSON is a JSON object keyed by branch name. Each value
+# is an array of status-check context names that branch protection
+# requires for that branch. The script looks up the PR's base branch and
+# evaluates only the checks declared for that branch.
+#
+#   {"main": ["CI / all-required (pull_request)", ...],
+#    "staging": ["CI / all-required (pull_request)", ...]}
+#
+# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
+# JSON variable is not set. Declared in the workflow YAML rather than
+# fetched from /branch_protections (which needs admin scope — sop-tier-bot
+# has read-only). Trade dynamism for simplicity: when the required-check
+# set changes, update both branch protection AND this env. Keeping them
+# in sync is less complexity than granting the audit bot admin perms on
+# every repo.

 set -euo pipefail

@@ -34,7 +43,10 @@ set -euo pipefail
 : "${GITEA_HOST:?required}"
 : "${REPO:?required}"
 : "${PR_NUMBER:?required}"
-: "${REQUIRED_CHECKS:?required (newline-separated context names)}"
+if [ -z "${REQUIRED_CHECKS_JSON:-}" ] && [ -z "${REQUIRED_CHECKS:-}" ]; then
+  echo "::error::Either REQUIRED_CHECKS_JSON or REQUIRED_CHECKS must be set"
+  exit 1
+fi

 OWNER="${REPO%%/*}"
 NAME="${REPO##*/}"
@@ -65,10 +77,14 @@ if [ -z "$MERGE_SHA" ]; then
  exit 0
 fi

-# 2. Required status checks declared in the workflow env.
-REQUIRED="$REQUIRED_CHECKS"
+# 2. Required status checks — branch-aware JSON dict takes precedence.
+if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
+  REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
+else
+  REQUIRED="$REQUIRED_CHECKS"
+fi
 if [ -z "${REQUIRED//[[:space:]]/}" ]; then
-  echo "::notice::REQUIRED_CHECKS empty — force-merge not applicable."
+  echo "::notice::REQUIRED_CHECKS empty for branch '$BASE_BRANCH' — force-merge not applicable."
  exit 0
 fi

@@ -296,7 +296,15 @@ fi
 #   403     → token owner is not in this team (Gitea 1.22.6 'Must be a team
 #             member' constraint — see follow-up issue for token-provisioning)
 #   404     → not a member
+# Track whether every candidate returned 403 (token owner not in team).
+# When this happens the root cause is a token-provisioning issue, not a
+# reviewer-eligibility issue — surface it clearly so ops don't waste time
+# verifying team roster (Bug C / RFC#324 follow-up).
+_ALL_CANDIDATES_403="yes"
+_CANDIDATE_COUNT=0
+
 for U in $CANDIDATES; do
+  _CANDIDATE_COUNT=$((_CANDIDATE_COUNT + 1))
  CODE=$(curl -sS -o "$TEAM_PROBE_TMP" -w '%{http_code}' \
    -K "$CURL_AUTH_FILE" "${API}/teams/${TEAM_ID}/members/${U}")
  debug "probe ${U} in team ${TEAM} (id=${TEAM_ID}) → HTTP ${CODE}"
@@ -317,14 +325,20 @@ for U in $CANDIDATES; do
      continue
      ;;
    404)
+      _ALL_CANDIDATES_403="no"
      debug "${U} not a member of ${TEAM}"
      ;;
    *)
+      _ALL_CANDIDATES_403="no"
      echo "::warning::team-probe for ${U} in ${TEAM} returned unexpected HTTP ${CODE}"
      cat "$TEAM_PROBE_TMP" >&2
      ;;
  esac
 done

-echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (candidates: $(echo "$CANDIDATES" | tr '\n' ',' | sed 's/,$//') — none are in team)"
+if [ "$_ALL_CANDIDATES_403" = "yes" ] && [ "$_CANDIDATE_COUNT" -gt 0 ]; then
+  echo "::error::${TEAM}-review FAILED — every candidate returned 403 (token owner is not a member of the ${TEAM} team). This is a TOKEN PROVISIONING issue, not a reviewer-eligibility issue. Add the token owner to the '${TEAM}' Gitea team (id=${TEAM_ID}) or use a token whose owner is already in that team."
+else
+  echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (candidates: $(echo "$CANDIDATES" | tr '\n' ',' | sed 's/,$//') — none are in team)"
+fi
 exit 1
@@ -6,8 +6,8 @@
 # RFC#351 Step 2 of 6 (implementation MVP).
 #
 # Invoked by .gitea/workflows/sop-checklist.yml on:
-#   - pull_request_target: [opened, edited, synchronize, reopened]
-#   - issue_comment:       [created, edited, deleted]
+#   - pull_request_target: [opened, edited, synchronize, reopened, labeled, unlabeled]
+#   - issue_comment:       [created]  # edited/deleted omitted (Gitea 1.22.6 job-parsing quirk)
 #
 # Flow:
 #   1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
@@ -639,9 +639,7 @@ def load_config(path: str) -> dict[str, Any]:
        # yaml is an optional dep; the canonical loader is used when available,
        # but the SOP runs on runners that may not have PyYAML installed. The
        # fallback _load_config_minimal covers the same config shape without
-        # requiring the dep, so the ignore is safe: if yaml loads, we use it;
-        # otherwise we fall back silently.
-        import yaml  # type: ignore[import-not-found]
+        import yaml  # type: ignore[import-not-found]  # optional dep; fall back silently if absent
        with open(path, encoding="utf-8") as f:
            return yaml.safe_load(f)
    except ImportError:
@@ -1033,7 +1031,7 @@ def main(argv: list[str] | None = None) -> int:
                    for t in data:
                        if t.get("name") == tn:
                            tid = t.get("id")
-                            client._team_id_cache[(args.owner, tn)] = tid  # noqa: SLF001  # internal write-through cache
+                            client._team_id_cache[(args.owner, tn)] = tid  # noqa: SLF001  # write-through cache; intentional side-effect for reuse across calls
                            break
            if tid is not None:
                team_ids.append(tid)
@@ -11,21 +11,100 @@ def load_workflow(name: str) -> dict:
        return yaml.safe_load(f)


+def _all_required(workflow: dict) -> dict:
+    return workflow["jobs"]["all-required"]
+
+
 def test_all_required_uses_dedicated_meta_runner_lane():
    workflow = load_workflow("ci.yml")
-    all_required = workflow["jobs"]["all-required"]
+    all_required = _all_required(workflow)

+    # Stays on the dedicated `ci-meta` lane (the sentinel does no docker
+    # work, so it must NOT occupy the general docker-host pool).
    assert all_required["runs-on"] == "ci-meta"
-    assert "needs" not in all_required


-def test_all_required_reuses_path_filter_before_polling():
+def test_all_required_is_needs_aggregator_not_a_polling_gate():
+    """fix/ci-scheduler-fanout (2026-06-01): the sentinel was converted
+    from a status-polling loop (which squatted a ci-meta executor slot for
+    up to 40 min per PR) into a plain `needs:` aggregator that frees the
+    slot immediately. Pin the new shape so a regression to the poller is
+    caught.
+    """
    workflow = load_workflow("ci.yml")
-    all_required = workflow["jobs"]["all-required"]
+    all_required = _all_required(workflow)
    rendered = str(all_required)

-    assert "--profile ci" in rendered
-    assert ".gitea/scripts/detect-changes.py" in rendered
-    assert "REQUIRE_PLATFORM" in rendered
-    assert "REQUIRE_CANVAS" in rendered
-    assert "REQUIRE_SCRIPTS" in rendered
+    # The job MUST aggregate via `needs:` (the slot-freeing design).
+    assert "needs" in all_required, "all-required must be a needs: aggregator"
+
+    # It MUST NOT reintroduce the polling loop / per-SHA status fetch that
+    # was the throughput sink.
+    assert "detect-changes.py" not in rendered, (
+        "all-required must not run the detect-changes poller path"
+    )
+    assert "commits/" not in rendered and "statuses" not in rendered, (
+        "all-required must not poll commit statuses (the slot-squat path)"
+    )
+
+
+def test_all_required_does_not_use_if_always():
+    """Plain `needs:` works on Gitea 1.22.6 / act_runner v0.6.1; `needs:` +
+    `if: always()` is BROKEN (feedback_gitea_needs_works_only_ifalways_broken)
+    and would let a non-success need pass the gate. The sentinel must use
+    plain `needs:` WITHOUT a job-level `if: always()`.
+    """
+    workflow = load_workflow("ci.yml")
+    all_required = _all_required(workflow)
+
+    job_if = all_required.get("if")
+    assert not (isinstance(job_if, str) and "always()" in job_if), (
+        "all-required must not combine needs: with if: always()"
+    )
+
+
+def test_all_required_needs_matches_ci_required_drift_f1_set():
+    """The sentinel `needs:` list MUST equal ci-required-drift.py's
+    `ci_job_names()` set: every job MINUS the sentinel itself MINUS jobs
+    whose `if:` gates on github.event_name/github.ref (event-gated jobs
+    skip on PRs and a `needs:` on a skipped job would never let the
+    sentinel run). If they diverge, ci-required-drift F1 fires.
+    """
+    workflow = load_workflow("ci.yml")
+    jobs = workflow["jobs"]
+    sentinel = "all-required"
+
+    expected = set()
+    for key, body in jobs.items():
+        if key == sentinel:
+            continue
+        gate = body.get("if") if isinstance(body, dict) else None
+        if isinstance(gate, str) and (
+            "github.event_name" in gate or "github.ref" in gate
+        ):
+            # event-gated → legitimately skips on some triggers; excluded
+            # from both `needs:` and the F1 set.
+            continue
+        expected.add(key)
+
+    needs = jobs[sentinel].get("needs", [])
+    if isinstance(needs, str):
+        needs = [needs]
+    actual = set(needs)
+
+    assert actual == expected, (
+        f"all-required needs: {sorted(actual)} != ci_job_names() "
+        f"{sorted(expected)} — ci-required-drift F1 would fire"
+    )
+
+
+def test_all_required_needs_reference_real_jobs():
+    """F1b guard: every entry in `needs:` must name an existing job."""
+    workflow = load_workflow("ci.yml")
+    jobs = workflow["jobs"]
+    needs = jobs["all-required"].get("needs", [])
+    if isinstance(needs, str):
+        needs = [needs]
+    job_keys = set(jobs)
+    for dep in needs:
+        assert dep in job_keys, f"all-required needs unknown job {dep!r}"
@@ -47,13 +47,25 @@ jobs:
          REPO: ${{ github.repository }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
          # Required-status-check contexts to evaluate at merge time.
-          # Newline-separated. Mirror this against branch protection
-          # (settings → branches → protected branch → required checks).
+          # Branch-aware JSON dict: keys are protected branch names,
+          # values are arrays of context names that branch protection
+          # requires for that branch. Mirror this against branch
+          # protection (settings → branches → protected branch →
+          # required checks) for each branch listed here.
+          #
          # Declared here rather than fetched from /branch_protections
          # because that endpoint requires admin write — sop-tier-bot is
          # read-only by design (least-privilege).
-          REQUIRED_CHECKS: |
-            CI / all-required (pull_request)
-            E2E API Smoke Test / E2E API Smoke Test (pull_request)
-            Handlers Postgres Integration / Handlers Postgres Integration (pull_request)
+          REQUIRED_CHECKS_JSON: |
+            {
+              "main": [
+                "CI / all-required (pull_request)",
+                "E2E API Smoke Test / E2E API Smoke Test (pull_request)",
+                "Handlers Postgres Integration / Handlers Postgres Integration (pull_request)"
+              ],
+              "staging": [
+                "CI / all-required (pull_request)",
+                "sop-checklist / all-items-acked (pull_request)"
+              ]
+            }
        run: bash .gitea/scripts/audit-force-merge.sh
@@ -37,7 +37,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -45,7 +45,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 5
    steps:
@@ -101,7 +101,7 @@ jobs:
    # AND-set: only the Mac arm64 runner advertises macos-self-hosted.
    # See "RUNNER TARGETING" header note for why bare self-hosted is unsafe.
    runs-on: [self-hosted, macos-self-hosted]
-    # ADVISORY: never blocks. See safety contract point 3. mc#774
+    # ADVISORY: never blocks. See safety contract point 3. mc#1982
    # internal#418 — tracked: arm64 advisory pilot, non-gating by design.
    continue-on-error: true
    # event_name gate: functional (only meaningful on push/PR) AND keeps
@@ -106,7 +106,7 @@ jobs:
    name: Platform (Go)
    needs: changes
    runs-on: ubuntu-latest
-    # mc#774 (closed 2026-05-14): Phase 4 flip of the platform-build job.
+    # mc#1982 (closed 2026-05-14): Phase 4 flip of the platform-build job.
    # Phase 4 (#656) originally flipped this to continue-on-error: false based on
    # Phase-3-masked "green on main 2026-05-12". Two failure classes then surfaced:
    #   (1) 4x delegation_test.go sqlmock gaps (PR #669 / #634 fix-forward, closed).
@@ -161,7 +161,7 @@ jobs:
          echo "::group::pendinguploads exit=$pu_exit (last 100 lines)"
          tail -100 /tmp/test-pu.log
          echo "::endgroup::"
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
      - if: ${{ needs.changes.outputs.platform == 'true' }}
        name: Run tests with coverage (blocking gate)
@@ -392,7 +392,7 @@ jobs:
  canvas-deploy-reminder:
    name: Canvas Deploy Reminder
    runs-on: docker-host
-    # mc#774 root-fix: added job-level `if:` so ci-required-drift.py's
+    # mc#1982 root-fix: added job-level `if:` so ci-required-drift.py's
    # ci_job_names() detects this as github.ref-gated and skips it from F1.
    # The step-level exit 0 handles the "not main push" case; the job-level
    # `if:` makes the gating explicit so the drift script sees it.
@@ -475,10 +475,10 @@ jobs:
    #
    # Emits `CI / all-required (<event>)` where <event> is the workflow trigger
    # (e.g. `CI / all-required (pull_request)`, `CI / all-required (push)`).
-    # Branch protection MUST be updated to require the event-suffixed name —
+    # Branch protection requires the event-suffixed name —
    # requiring `CI / all-required` (bare, no suffix) silently blocks all merges
    # because Gitea treats absent status contexts as pending (not skipped), and
-    # no workflow emits the bare name. Fixed: BP now requires
+    # no workflow emits the bare name. BP requires
    # `CI / all-required (pull_request)` per issue #1473.
    #
    # Closes the failure mode where status_check_contexts on molecule-core/main
@@ -487,129 +487,91 @@ jobs:
    # red silently merged through. See internal#286 for the three concrete
    # tonight-of-2026-05-11 incidents that prompted the emergency bump.
    #
-    # This job deliberately has no `needs:`. Gitea 1.22/act_runner can mark a
-    # job-level `if: always()` + `needs:` sentinel as skipped before upstream
-    # jobs settle, leaving branch protection with a permanent pending
-    # `CI / all-required` context. Instead, this independent sentinel polls the
-    # required commit-status contexts for this SHA and fails if any fail, skip,
-    # or never emit. It runs the same path detector as `changes` and only waits
-    # for path-relevant jobs; Gitea can otherwise leave needs/output-skipped
-    # jobs permanently pending with "Blocked by required conditions". It runs on
-    # the dedicated `ci-meta` lane so the poller does not occupy the same
-    # general runner pool as the jobs it is waiting for.
+    # ── 2026-06-01 CI-scheduler-overload fix (fix/ci-scheduler-fanout) ──
+    # PREVIOUS shape: a poll-gate that ran detect-changes then LOOPED on
+    # `GET /commits/{sha}/statuses` every 15s for up to 40 min, occupying a
+    # `ci-meta` executor slot the entire time it waited for upstream jobs.
+    # With only 2 ci-meta runners, that poll-loop squatted half the lane on
+    # every PR — a confirmed throughput sink in the live RCA (two concurrent
+    # `JOB-all-required` containers observed pinning the lane). The polling
+    # design existed only to dodge the Gitea `needs:` + `if: always()` bug,
+    # where an always()-guarded sentinel could be marked skipped before
+    # upstream jobs settled (leaving BP pending forever).
    #
-    # canvas-deploy-reminder is intentionally NOT included in all-required.needs.
-    # It is an informational main-push reminder, not a PR quality gate. Keeping
-    # it in this dependency list lets a skipped reminder skip the required
-    # sentinel before the `always()` guard can emit a branch-protection status.
+    # NEW shape: a plain `needs:` aggregator with NO polling loop. This is
+    # safe here — and was NOT safe at the time the poller was written —
+    # because every aggregated CI job now gates its real work PER-STEP
+    # (`if: needs.changes.outputs.* != 'true'`) rather than at the JOB level.
+    # A per-step-gated job always reaches a terminal SUCCESS (it no-ops its
+    # expensive steps but the job itself still completes), so it is never
+    # `skipped`. Plain `needs:` (WITHOUT `if: always()`) works correctly on
+    # Gitea 1.22.6 / act_runner v0.6.1 — only `needs:` + `if: always()` is
+    # broken (feedback_gitea_needs_works_only_ifalways_broken). We therefore
+    # use plain `needs:` + an explicit per-need result check (NOT
+    # `if: always()`); if any need fails/errors, Gitea never starts this job
+    # and BP sees `CI / all-required` go red via the failed dependency
+    # propagation — exactly the gate we want, with zero runner-squat.
    #
+    # The `needs:` list MUST stay in lockstep with ci-required-drift.py's
+    # F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
+    # whose `if:` gates on github.event_name/github.ref). canvas-deploy-
+    # reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
+    # so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
+    # skipped job would never let the sentinel run. If a new always-running
+    # CI job is added, add it here too or ci-required-drift F1 will flag it.
+    #
+    # Stays on the dedicated `ci-meta` lane (no docker work, so the
+    # docker-host-pin lint does not apply), but now the job is sub-second:
+    # it only inspects already-settled `needs.*.result` values, so it frees
+    # the slot immediately instead of holding it for the whole CI duration.
+    #
+    needs:
+      - changes
+      - platform-build
+      - canvas-build
+      - shellcheck
+      - python-lint
    continue-on-error: false
    runs-on: ci-meta
-    timeout-minutes: 45
+    timeout-minutes: 5
    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 0
-      - id: check
+      - name: Verify all aggregated CI jobs succeeded
+        # NO polling, NO API call, NO checkout. Because this job lists the
+        # aggregated jobs under `needs:` (without `if: always()`), Gitea only
+        # starts it once every need has reached SUCCESS — a failed/errored
+        # need short-circuits the job and propagates red to the
+        # `CI / all-required` context. This explicit check is a
+        # belt-and-suspenders assertion + a readable run summary; the real
+        # gating is the `needs:` edge itself.
        env:
-          PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
-          PR_BASE_REF: ${{ github.event.pull_request.base.ref }}
-          PUSH_BEFORE: ${{ github.event.before }}
-        run: |
-          python3 .gitea/scripts/detect-changes.py \
-            --profile ci \
-            --event-name "${{ github.event_name }}" \
-            --pr-base-sha "$PR_BASE_SHA" \
-            --base-ref "$PR_BASE_REF" \
-            --push-before "${GITHUB_EVENT_BEFORE:-$PUSH_BEFORE}"
-      - name: Wait for required CI contexts
-        env:
-          GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          API_ROOT: ${{ github.server_url }}/api/v1
-          REPOSITORY: ${{ github.repository }}
-          COMMIT_SHA: ${{ github.sha }}
-          EVENT_NAME: ${{ github.event_name }}
-          REQUIRE_PLATFORM: ${{ steps.check.outputs.platform }}
-          REQUIRE_CANVAS: ${{ steps.check.outputs.canvas }}
-          REQUIRE_SCRIPTS: ${{ steps.check.outputs.scripts }}
+          CHANGES_RESULT: ${{ needs.changes.result }}
+          PLATFORM_RESULT: ${{ needs.platform-build.result }}
+          CANVAS_RESULT: ${{ needs.canvas-build.result }}
+          SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
+          PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
        run: |
          set -euo pipefail
-          python3 - <<'PY'
-          import json
-          import os
-          import sys
-          import time
-          import urllib.error
-          import urllib.request
-
-          token = os.environ["GITEA_TOKEN"]
-          api_root = os.environ["API_ROOT"].rstrip("/")
-          repo = os.environ["REPOSITORY"]
-          sha = os.environ["COMMIT_SHA"]
-          event = os.environ["EVENT_NAME"]
-          required = [
-              f"CI / Detect changes ({event})",
-              f"CI / Python Lint & Test ({event})",
-          ]
-          if os.environ.get("REQUIRE_PLATFORM") == "true":
-              required.append(f"CI / Platform (Go) ({event})")
-          if os.environ.get("REQUIRE_CANVAS") == "true":
-              required.append(f"CI / Canvas (Next.js) ({event})")
-          if os.environ.get("REQUIRE_SCRIPTS") == "true":
-              required.append(f"CI / Shellcheck (E2E scripts) ({event})")
-          terminal_bad = {"failure", "error"}
-          deadline = time.time() + 40 * 60
-          last_summary = None
-
-          def fetch_statuses():
-              statuses = []
-              for page in range(1, 6):
-                  url = f"{api_root}/repos/{repo}/commits/{sha}/statuses?page={page}&limit=100"
-                  req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
-                  with urllib.request.urlopen(req, timeout=10) as resp:
-                      chunk = json.load(resp)
-                  if not chunk:
-                      break
-                  statuses.extend(chunk)
-              latest = {}
-              for item in statuses:
-                  ctx = item.get("context")
-                  if not ctx:
-                      continue
-                  prev = latest.get(ctx)
-                  if prev is None or (item.get("updated_at") or item.get("created_at") or "") >= (prev.get("updated_at") or prev.get("created_at") or ""):
-                      latest[ctx] = item
-              return latest
-
-          while True:
-              try:
-                  latest = fetch_statuses()
-              except (TimeoutError, OSError, urllib.error.URLError) as exc:
-                  if time.time() >= deadline:
-                      print(f"FAIL: status polling did not recover before deadline: {exc}", file=sys.stderr)
-                      sys.exit(1)
-                  print(f"WARN: status poll failed, retrying: {exc}", flush=True)
-                  time.sleep(15)
-                  continue
-              states = {ctx: (latest.get(ctx) or {}).get("status") or (latest.get(ctx) or {}).get("state") or "missing" for ctx in required}
-              summary = ", ".join(f"{ctx}={state}" for ctx, state in states.items())
-              if summary != last_summary:
-                  print(summary, flush=True)
-                  last_summary = summary
-              bad = {ctx: state for ctx, state in states.items() if state in terminal_bad}
-              if bad:
-                  print("FAIL: required CI context failed:", file=sys.stderr)
-                  for ctx, state in bad.items():
-                      desc = (latest.get(ctx) or {}).get("description") or ""
-                      print(f"  - {ctx}: {state} {desc}", file=sys.stderr)
-                  sys.exit(1)
-              if all(state == "success" for state in states.values()):
-                  print(f"OK: all {len(required)} required CI contexts succeeded")
-                  sys.exit(0)
-              if time.time() >= deadline:
-                  print("FAIL: timed out waiting for required CI contexts:", file=sys.stderr)
-                  for ctx, state in states.items():
-                      print(f"  - {ctx}: {state}", file=sys.stderr)
-                  sys.exit(1)
-              time.sleep(15)
-          PY
+          fail=0
+          check() {
+            name="$1"; result="$2"
+            printf 'CI / %s = %s\n' "$name" "$result"
+            # `success` is the only green terminal state we accept. A plain
+            # `needs:` job is only started when all needs succeed, so reaching
+            # this step already implies success — but assert explicitly so a
+            # future `if: always()` reintroduction (which WOULD let non-success
+            # through) fails loudly instead of silently passing the gate.
+            if [ "$result" != "success" ]; then
+              echo "::error::aggregated CI job '${name}' did not succeed (result=${result})"
+              fail=1
+            fi
+          }
+          check "Detect changes"        "$CHANGES_RESULT"
+          check "Platform (Go)"         "$PLATFORM_RESULT"
+          check "Canvas (Next.js)"      "$CANVAS_RESULT"
+          check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
+          check "Python Lint & Test"    "$PYTHON_LINT_RESULT"
+          if [ "$fail" -ne 0 ]; then
+            echo "::error::all-required: one or more aggregated CI jobs did not succeed"
+            exit 1
+          fi
+          echo "OK: all aggregated CI jobs succeeded — CI / all-required green."
@@ -102,7 +102,7 @@ jobs:
    name: Synthetic E2E against staging
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # Bumped from 12 → 20 (2026-05-04). Tenant user-data install phase
    # (apt-get update + install docker.io/jq/awscli/caddy + snap install
@@ -166,6 +166,10 @@ jobs:
      # canary path. The script picks the right blob shape based on
      # which key is non-empty.
      E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
+      # google-adk canary path — AI-Studio key (config model
+      # google_genai:gemini-2.5-pro). PROD disallows API keys (Vertex+ADC);
+      # the keyed path is CI-only. Dispatch with E2E_RUNTIME=google-adk.
+      E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

@@ -217,6 +221,10 @@ jobs:
              required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
              required_secret_value="${E2E_OPENAI_API_KEY:-}"
              ;;
+            google-adk)
+              required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
+              required_secret_value="${E2E_GOOGLE_API_KEY:-}"
+              ;;
            *)
              echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
              required_secret_name=""
@@ -123,7 +123,7 @@ jobs:
    # integration). See internal#512 for the class defect.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      api: ${{ steps.decide.outputs.api }}
@@ -160,7 +160,7 @@ jobs:
    # detect-changes for the full rationale.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 15
    env:
@@ -48,7 +48,7 @@ jobs:
    # defect.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      chat: ${{ steps.decide.outputs.chat }}
@@ -112,7 +112,7 @@ jobs:
    # Must land on operator-host Linux (docker-host).
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 15
    env:
@@ -71,7 +71,7 @@ jobs:
  detect-changes:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      canvas: ${{ steps.decide.outputs.canvas }}
@@ -140,7 +140,7 @@ jobs:
    name: Canvas tabs E2E
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 40

@@ -84,7 +84,7 @@ jobs:
    name: E2E Staging External Runtime
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25

@@ -94,20 +94,20 @@ jobs:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 1
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true

      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
          python-version: "3.11"
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true

      - name: YAML validation (best-effort)
        run: |
          echo "e2e-staging-saas.yml — PR validation: workflow YAML is valid."
          echo "E2E step runs only when provisioning-critical files change."
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true

  # Actual E2E: runs on trunk pushes and PRs that touch provisioning-critical
@@ -118,7 +118,7 @@ jobs:
    name: E2E Staging SaaS
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 45
    permissions:
@@ -157,13 +157,18 @@ jobs:
      # E2E_RUNTIME=hermes or =codex via workflow_dispatch can still
      # exercise the OpenAI path.
      E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
+      # google-adk (operator-dispatched only) auths Gemini with an
+      # AI-Studio key. Org policy disallows API keys in PROD (Vertex+ADC
+      # there); CI uses the keyed AI-Studio path with config model
+      # google_genai:gemini-2.5-pro. Vertex remains the supported prod path.
+      E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
      E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
      # Pin the model when running on the default claude-code path —
      # the per-runtime default ("sonnet") routes to direct Anthropic
      # and defeats the cost saving. Operators can override via the
      # workflow_dispatch flow (no input wired here yet — runtime
      # override is enough for ad-hoc).
-      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || 'MiniMax-M2' }}
+      E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
      E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
      E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}

@@ -212,6 +217,10 @@ jobs:
              required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
              required_secret_value="${E2E_OPENAI_API_KEY:-}"
              ;;
+            google-adk)
+              required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
+              required_secret_value="${E2E_GOOGLE_API_KEY:-}"
+              ;;
            *)
              echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
              required_secret_name=""
@@ -37,7 +37,7 @@ jobs:
    name: Intentional-failure teardown sanity
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 20

@@ -66,7 +66,7 @@ jobs:
  # bp-exempt: PR advisory bot; merge blocking is enforced by CI status and branch protection.
  gate-check:
    runs-on: ubuntu-latest
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true  # Never block on our own detector failing
    steps:
      - name: Check out BASE ref (never PR-head under pull_request_target)
@@ -87,8 +87,8 @@ jobs:
    # both jobs on the same label avoids workspace-volume cross-host
    # surprises and keeps the routing rule discoverable in one place.
    runs-on: docker-host
-    # mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      handlers: ${{ steps.filter.outputs.handlers }}
@@ -118,8 +118,8 @@ jobs:
    # mc#1529 §1: must run on operator-host (where `molecule-core-net`
    # exists). See detect-changes for the full routing rationale.
    runs-on: docker-host
-    # mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    env:
      # Unique name per run so concurrent jobs don't collide on the
@@ -70,7 +70,7 @@ jobs:
    # of mc#1543; see internal#512 for class defect.
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      run: ${{ steps.decide.outputs.run }}
@@ -172,7 +172,7 @@ jobs:
    # beta containers. Must run on operator-host Linux (docker-host).
    runs-on: docker-host
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 30
    steps:
@@ -1,6 +1,6 @@
 name: lint-bp-context-emit-match

-# Tier 2f scheduled lint (per mc#774) — detects drift between
+# Tier 2f scheduled lint (per mc#1982) — detects drift between
 # `branch_protections/<branch>.status_check_contexts` and the set of
 # contexts emitted by `.gitea/workflows/*.yml`.
 #
@@ -60,7 +60,7 @@ name: lint-bp-context-emit-match
 #
 # Cross-links
 # -----------
-# - mc#774 (the RFC that specs this lint)
+# - mc#1982 (the RFC that specs this lint)
 # - internal#349 (cross-repo BP sweep)
 # - feedback_phantom_required_check_after_gitea_migration
 # - feedback_tier_label_ids_are_per_repo
@@ -94,7 +94,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface drift without blocking. After 7
    # clean scheduled runs on main, flip to false so a scheduled
    # failure is a hard CI signal.
-    continue-on-error: true  # mc#774 Phase 3 — flip to false after 7 clean main runs
+    continue-on-error: true  # mc#1982 Phase 3 — flip to false after 7 clean main runs
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
@@ -1,6 +1,6 @@
 name: lint-continue-on-error-tracking

-# Tier 2e hard-gate lint (per mc#774) — every
+# Tier 2e hard-gate lint (per mc#1982) — every
 # `continue-on-error: true` in `.gitea/workflows/*.yml` must carry a
 # `# mc#NNNN` or `# internal#NNNN` tracker comment within 2 lines,
 # the referenced issue must be OPEN, and ≤14 days old.
@@ -8,7 +8,7 @@ name: lint-continue-on-error-tracking
 # Why this exists
 # ---------------
 # `continue-on-error: true` on `platform-build` had been hiding
-# mc#774-class regressions for ~3 weeks before #656 surfaced them on
+# mc#1982-class regressions for ~3 weeks before #656 surfaced them on
 # 2026-05-12. A 14-day cap on tracker age forces a review cycle and
 # surfaces mask-drift within at most 14 days of the original defect.
 # Each `continue-on-error: true` gets a paper trail — close or renew.
@@ -45,12 +45,12 @@ name: lint-continue-on-error-tracking
 # close-and-flip, or document the deliberate keep-mask in a fresh
 # 14-day-renewable tracker. After main is clean for 3 days,
 # follow-up PR flips this workflow's continue-on-error to false.
-# Tracking: mc#774.
+# Tracking: mc#1982.
 #
 # Cross-links
 # -----------
-# - mc#774 (the RFC that specs this lint)
-# - mc#774 (the empirical masked-3-weeks case)
+# - mc#1982 (the RFC that specs this lint)
+# - mc#1982 (the empirical masked-3-weeks case)
 # - feedback_chained_defects_in_never_tested_workflows
 # - feedback_behavior_based_ast_gates
 # - feedback_strict_root_only_after_class_a
@@ -97,9 +97,9 @@ jobs:
    # Phase 3 (RFC #219 §1): surface masked defects without blocking
    # PRs. Pre-existing continue-on-error: true directives on main
    # all violate this lint at first — intentional. Flip to false
-    # follow-up after main is clean for 3 days. mc#774.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
-    continue-on-error: true  # mc#774 Phase 3 mask — 14d forced-renewal cadence
+    # follow-up after main is clean for 3 days. mc#1982.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    continue-on-error: true  # mc#1982 Phase 3 mask — 14d forced-renewal cadence
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5.6.0
@@ -51,7 +51,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking
    # the PR. Follow-up PR flips this off after surfaced defects are
    # triaged.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -25,6 +25,21 @@ name: Lint forbidden tenant-env keys
 #   feedback_path_filtered_workflow_cant_be_required). The scan itself
 #   targets workspace_secrets-writer paths via grep -r; it's fast
 #   (sub-second) so unconditional run is fine.
+#
+# ── 2026-06-01 CI-scheduler-fanout consolidation (fix/ci-scheduler-fanout) ──
+# The RFC#523 sibling lint formerly in its own file
+# `lint-no-tenant-gitea-token.yml` (the broader "no repo-host token into
+# any tenant-writer surface" scan) is now a SECOND job in THIS workflow
+# (`scan-tenant-token-write`). Both are sub-second Go-source greps that
+# fired as two separate workflow runs on every PR — pure scheduler
+# fan-out. Folding the sibling in here drops one workflow run + one
+# checkout per PR while keeping BOTH scans firing unconditionally on
+# every PR (the no-paths discipline above is preserved — neither job is
+# paths-filtered). The moved job keeps its exact `name:` so its emitted
+# status context is unchanged in substance; its `# bp-exempt:` directive
+# moves with it (Tier 2g). The old `Lint no tenant GITEA or GITHUB token
+# write / …` context is retired (a disappearing context needs no
+# directive; only NEW emitters do).

 on:
  pull_request:
@@ -166,3 +181,126 @@ jobs:
          fi

          echo "OK No forbidden operator-scope env key names hardcoded in writer paths."
+
+  # bp-exempt: advisory RFC#523 lint; PR review gate is review-driven, not BP-driven.
+  # (Carried with the workflow-name rename in PR mc#1593 so the renamed
+  # context emission satisfies lint_required_context_exists_in_bp Tier 2g.)
+  scan-tenant-token-write:
+    name: Scan for repo-host token write into tenant workspace surface
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 1
+
+      - name: Find Go files referencing a tenant-writer surface AND a repo-host token
+        run: |
+          set -euo pipefail
+
+          # Repo-host token NAMES — the threat-model subset. Operator-fleet
+          # tokens (CP_ADMIN_API_TOKEN, RAILWAY_TOKEN, INFISICAL_*) are
+          # caught by lint-forbidden-env-keys.yml's broader deny set; this
+          # lint focuses on the git-host class so a single co-occurrence
+          # match has a low false-positive rate.
+          FORBIDDEN_KEYS=(
+            "GITEA_TOKEN"
+            "GITEA_PAT"
+            "GITHUB_TOKEN"
+            "GITHUB_PAT"
+            "GH_TOKEN"
+          )
+
+          # Tenant-writer surface markers. A file matches the surface set
+          # if it references ANY of these strings. This is the "is this
+          # code path writing into a tenant workspace?" heuristic.
+          # Curated to catch the actual code shapes used in this repo
+          # (verified by grep against current main 2026-05-19):
+          #   - "workspace_secrets" / "global_secrets"  → DB table writes
+          #   - "seedAllowList"                          → CP-side seed table
+          #   - "/settings/secrets"                      → tenant HTTP API write
+          #   - "envVars["                               → in-memory env map write
+          #   - "containerEnv"                           → docker-run env-set
+          #   - "userData"                               → EC2 user-data script
+          #   - "provisionPayload" / "provisionContext"  → provision-request shape
+          SURFACE_PATTERN='workspace_secrets|global_secrets|seedAllowList|/settings/secrets|envVars\[|containerEnv|userData|provisionPayload|provisionContext'
+
+          # Files that legitimately reference these names AND a surface
+          # marker, but do so for guard / strip / test / doc-comment
+          # reasons. New entries require reviewer signoff and a one-line
+          # justification in the diff.
+          EXEMPT_FILES=(
+            # RFC#523 L1 deny-set source-of-truth + tests
+            "workspace-server/internal/handlers/workspace_provision_forbidden_env.go"
+            "workspace-server/internal/handlers/workspace_provision_forbidden_env_test.go"
+            # Forensic-#145 silent-strip denylist (defense-in-depth, by design lists the names)
+            "workspace-server/internal/provisioner/provisioner.go"
+            "workspace-server/internal/provisioner/provisioner_test.go"
+            # Pre-RFC#523 persona-fallback / org-helper paths. The L1
+            # fail-closed runs BEFORE these writers; downstream silent-strip
+            # also covers them. See applyAgentGitHTTPCreds doc-comment.
+            "workspace-server/internal/handlers/agent_git_identity.go"
+            "workspace-server/internal/handlers/org_helpers.go"
+            "workspace-server/internal/handlers/org.go"
+            # CP→platform admin auth (NOT a tenant env write).
+            "workspace-server/internal/provisioner/cp_provisioner.go"
+          )
+
+          # Build an extended-regex alternation of forbidden keys.
+          KEY_ALT="$(IFS='|'; echo "${FORBIDDEN_KEYS[*]}")"
+
+          # Find candidate files: Go non-test sources that contain a
+          # tenant-writer surface marker.
+          mapfile -t CANDIDATES < <(
+            grep -rlE --include='*.go' --exclude='*_test.go' \
+              "${SURFACE_PATTERN}" . 2>/dev/null \
+            | sed 's|^\./||' \
+            | sort -u
+          )
+
+          if [ "${#CANDIDATES[@]}" -eq 0 ]; then
+            echo "OK No tenant-writer-surface files found in tree (unexpected, but not a lint failure)."
+            exit 0
+          fi
+
+          HITS=""
+          for f in "${CANDIDATES[@]}"; do
+            # Skip exempt files.
+            skip=0
+            for ex in "${EXEMPT_FILES[@]}"; do
+              if [ "$f" = "$ex" ]; then skip=1; break; fi
+            done
+            [ "$skip" = "1" ] && continue
+
+            # File contains a surface marker; now grep for a forbidden
+            # key NAME. We require a QUOTED-literal match to avoid
+            # firing on a comment like "// also handle GITEA_TOKEN".
+            #
+            # The literal form catches:
+            #   - os.Getenv("GITEA_TOKEN")
+            #   - envVars["GITEA_TOKEN"] = ...
+            #   - {envKey: "GITEA_TOKEN", tenantKey: "GITEA_TOKEN"}
+            # but not:
+            #   - // see GITEA_TOKEN below   (no quotes)
+            found=$(grep -nE "\"(${KEY_ALT})\"" "$f" 2>/dev/null || true)
+            if [ -n "$found" ]; then
+              HITS="${HITS}--- ${f} ---\n${found}\n"
+            fi
+          done
+
+          if [ -n "$HITS" ]; then
+            echo "::error::Task #146 lint: repo-host token name(s) quoted in a tenant-writer-surface file:"
+            printf "$HITS"
+            echo ""
+            echo "These files reference a tenant-writer surface (workspace_secrets,"
+            echo "seedAllowList, /settings/secrets, containerEnv, userData, etc.)"
+            echo "AND quote a repo-host token name (GITEA_TOKEN/GITHUB_TOKEN/…)."
+            echo "Per RFC#523 threat model, tenant workspaces MUST NOT receive"
+            echo "operator-scope repo-host tokens. If your code legitimately needs"
+            echo "to reference one of these names in a tenant-writer file (e.g."
+            echo "a deny-set definition or silent-strip list), add the file to"
+            echo "EXEMPT_FILES with a one-line justification — reviewer signoff"
+            echo "required."
+            exit 1
+          fi
+
+          echo "OK No tenant-writer-surface file co-mentions a repo-host token literal."
@@ -1,6 +1,6 @@
 name: lint-mask-pr-atomicity

-# Tier 2d hard-gate lint (per mc#774) — blocks PRs that touch
+# Tier 2d hard-gate lint (per mc#1982) — blocks PRs that touch
 # `.gitea/workflows/ci.yml` and modify ONLY ONE of {continue-on-error,
 # all-required.sentinel.needs} without a `Paired: #NNN` reference in
 # the PR body or in a commit message.
@@ -37,13 +37,13 @@ name: lint-mask-pr-atomicity
 # This workflow lands at `continue-on-error: true` (Phase 3 — surface
 # regressions without blocking PRs while the rule beds in).
 # Follow-up PR flips to `false` once we have ≥3 days of clean runs on
-# `main` and no false-positives. Tracking issue: mc#774.
+# `main` and no false-positives. Tracking issue: mc#1982.
 #
 # Cross-links
 # -----------
-# - mc#774 (the RFC that specs this lint)
+# - mc#1982 (the RFC that specs this lint)
 # - PR#665 / PR#668 (the empirical split-pair)
-# - mc#774 (the main-red incident the split caused)
+# - mc#1982 (the main-red incident the split caused)
 # - feedback_strict_root_only_after_class_a
 # - feedback_behavior_based_ast_gates
 #
@@ -92,8 +92,8 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken shapes without blocking
    # PRs. Follow-up PR flips this to `false` once recent runs on main
    # are confirmed clean (eat-our-own-dogfood discipline mirrors
-    # PR#673's same-shape comment). Tracking: mc#774.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # PR#673's same-shape comment). Tracking: mc#1982.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - name: Check out PR head with full history (need base SHA blobs)
@@ -1,182 +0,0 @@
-name: Lint no tenant GITEA or GITHUB token write
-
-# Task #146 — CI guardrail companion to RFC#523's `lint-forbidden-env-keys.yml`.
-#
-# `lint-forbidden-env-keys.yml` (Layer 3) catches code that hardcodes a
-# forbidden env-var key NAME as a quoted literal in workspace_secrets
-# writer paths under workspace-server/internal/.
-#
-# This workflow catches a BROADER class: any code path that reads a
-# repo-host token (GITEA_TOKEN / GITHUB_TOKEN / GH_TOKEN) and then writes
-# it into a TENANT WORKSPACE's env, secret store, user-data, or
-# provision payload. This is the actual RFC#523 threat-model statement —
-# the goal is "no tenant workspace ever receives an operator-scope repo
-# token," not just "no _quoted_ literal `GITEA_TOKEN`." A future writer
-# could route the value via a variable, a struct field, or a config key
-# and slip past the existing literal scan; this lint catches those
-# routing patterns at PR review time.
-#
-# Scope
-#   Scans the WHOLE repo's Go sources (not just workspace-server/) for
-#   co-occurrences of:
-#     - a repo-host token NAME (GITEA_TOKEN / GITHUB_TOKEN / GH_TOKEN /
-#       GITEA_PAT / GITHUB_PAT) used as os.Getenv argument or string
-#       literal
-#     - within a file that ALSO references a tenant-writer surface
-#       (`tenant`, `workspace_secrets`, `global_secrets`, `seedAllowList`,
-#       `/settings/secrets`, `userData`, `provisionPayload`,
-#       `envVars[`, `containerEnv`).
-#
-#   Co-occurrence (not single-line) is the false-positive control: a
-#   file that just LOGS the variable name (e.g. "missing GITEA_TOKEN")
-#   without touching any tenant surface won't fire.
-#
-# Drift contract with lint-forbidden-env-keys.yml
-#   Both lints share the same FORBIDDEN_KEYS list (a subset — only the
-#   repo-host tokens, since this lint's threat model is "tenant gets
-#   write access to operator's git host"). If RFC#523's deny set grows,
-#   update BOTH this file AND lint-forbidden-env-keys.yml AND the Go
-#   source-of-truth in
-#   workspace-server/internal/handlers/workspace_provision_forbidden_env.go.
-#
-# Open-source-template-friendly
-#   The patterns scanned are generic (no MOLECULE_-prefix literals).
-#   A fork can copy this workflow as-is and adjust FORBIDDEN_KEYS.
-#
-# Path-filter discipline
-#   No `paths:` filter — required-status workflows must run on every PR
-#   per `feedback_path_filtered_workflow_cant_be_required`. Scan is
-#   sub-second.
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened]
-  push:
-    branches: [main, staging]
-
-env:
-  GITHUB_SERVER_URL: https://git.moleculesai.app
-
-jobs:
-  # bp-exempt: advisory RFC#523 lint; PR review gate is review-driven, not BP-driven.
-  # (Carried with the workflow-name rename in PR mc#1593 so the renamed
-  # context emission satisfies lint_required_context_exists_in_bp Tier 2g.)
-  scan:
-    name: Scan for repo-host token write into tenant workspace surface
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
-        with:
-          fetch-depth: 1
-
-      - name: Find Go files referencing a tenant-writer surface AND a repo-host token
-        run: |
-          set -euo pipefail
-
-          # Repo-host token NAMES — the threat-model subset. Operator-fleet
-          # tokens (CP_ADMIN_API_TOKEN, RAILWAY_TOKEN, INFISICAL_*) are
-          # caught by lint-forbidden-env-keys.yml's broader deny set; this
-          # lint focuses on the git-host class so a single co-occurrence
-          # match has a low false-positive rate.
-          FORBIDDEN_KEYS=(
-            "GITEA_TOKEN"
-            "GITEA_PAT"
-            "GITHUB_TOKEN"
-            "GITHUB_PAT"
-            "GH_TOKEN"
-          )
-
-          # Tenant-writer surface markers. A file matches the surface set
-          # if it references ANY of these strings. This is the "is this
-          # code path writing into a tenant workspace?" heuristic.
-          # Curated to catch the actual code shapes used in this repo
-          # (verified by grep against current main 2026-05-19):
-          #   - "workspace_secrets" / "global_secrets"  → DB table writes
-          #   - "seedAllowList"                          → CP-side seed table
-          #   - "/settings/secrets"                      → tenant HTTP API write
-          #   - "envVars["                               → in-memory env map write
-          #   - "containerEnv"                           → docker-run env-set
-          #   - "userData"                               → EC2 user-data script
-          #   - "provisionPayload" / "provisionContext"  → provision-request shape
-          SURFACE_PATTERN='workspace_secrets|global_secrets|seedAllowList|/settings/secrets|envVars\[|containerEnv|userData|provisionPayload|provisionContext'
-
-          # Files that legitimately reference these names AND a surface
-          # marker, but do so for guard / strip / test / doc-comment
-          # reasons. New entries require reviewer signoff and a one-line
-          # justification in the diff.
-          EXEMPT_FILES=(
-            # RFC#523 L1 deny-set source-of-truth + tests
-            "workspace-server/internal/handlers/workspace_provision_forbidden_env.go"
-            "workspace-server/internal/handlers/workspace_provision_forbidden_env_test.go"
-            # Forensic-#145 silent-strip denylist (defense-in-depth, by design lists the names)
-            "workspace-server/internal/provisioner/provisioner.go"
-            "workspace-server/internal/provisioner/provisioner_test.go"
-            # Pre-RFC#523 persona-fallback / org-helper paths. The L1
-            # fail-closed runs BEFORE these writers; downstream silent-strip
-            # also covers them. See applyAgentGitHTTPCreds doc-comment.
-            "workspace-server/internal/handlers/agent_git_identity.go"
-            "workspace-server/internal/handlers/org_helpers.go"
-            "workspace-server/internal/handlers/org.go"
-            # CP→platform admin auth (NOT a tenant env write).
-            "workspace-server/internal/provisioner/cp_provisioner.go"
-          )
-
-          # Build an extended-regex alternation of forbidden keys.
-          KEY_ALT="$(IFS='|'; echo "${FORBIDDEN_KEYS[*]}")"
-
-          # Find candidate files: Go non-test sources that contain a
-          # tenant-writer surface marker.
-          mapfile -t CANDIDATES < <(
-            grep -rlE --include='*.go' --exclude='*_test.go' \
-              "${SURFACE_PATTERN}" . 2>/dev/null \
-            | sed 's|^\./||' \
-            | sort -u
-          )
-
-          if [ "${#CANDIDATES[@]}" -eq 0 ]; then
-            echo "OK No tenant-writer-surface files found in tree (unexpected, but not a lint failure)."
-            exit 0
-          fi
-
-          HITS=""
-          for f in "${CANDIDATES[@]}"; do
-            # Skip exempt files.
-            skip=0
-            for ex in "${EXEMPT_FILES[@]}"; do
-              if [ "$f" = "$ex" ]; then skip=1; break; fi
-            done
-            [ "$skip" = "1" ] && continue
-
-            # File contains a surface marker; now grep for a forbidden
-            # key NAME. We require a QUOTED-literal match to avoid
-            # firing on a comment like "// also handle GITEA_TOKEN".
-            #
-            # The literal form catches:
-            #   - os.Getenv("GITEA_TOKEN")
-            #   - envVars["GITEA_TOKEN"] = ...
-            #   - {envKey: "GITEA_TOKEN", tenantKey: "GITEA_TOKEN"}
-            # but not:
-            #   - // see GITEA_TOKEN below   (no quotes)
-            found=$(grep -nE "\"(${KEY_ALT})\"" "$f" 2>/dev/null || true)
-            if [ -n "$found" ]; then
-              HITS="${HITS}--- ${f} ---\n${found}\n"
-            fi
-          done
-
-          if [ -n "$HITS" ]; then
-            echo "::error::Task #146 lint: repo-host token name(s) quoted in a tenant-writer-surface file:"
-            printf "$HITS"
-            echo ""
-            echo "These files reference a tenant-writer surface (workspace_secrets,"
-            echo "seedAllowList, /settings/secrets, containerEnv, userData, etc.)"
-            echo "AND quote a repo-host token name (GITEA_TOKEN/GITHUB_TOKEN/…)."
-            echo "Per RFC#523 threat model, tenant workspaces MUST NOT receive"
-            echo "operator-scope repo-host tokens. If your code legitimately needs"
-            echo "to reference one of these names in a tenant-writer file (e.g."
-            echo "a deny-set definition or silent-strip list), add the file to"
-            echo "EXEMPT_FILES with a one-line justification — reviewer signoff"
-            echo "required."
-            exit 1
-          fi
-
-          echo "OK No tenant-writer-surface file co-mentions a repo-host token literal."
@@ -4,7 +4,7 @@ name: Lint pre-flip continue-on-error
 # on any job in `.gitea/workflows/*.yml` WITHOUT proof that the affected
 # job's recent runs on the target branch (PR base) are actually green.
 #
-# Empirical class: PR #656 / mc#774. PR #656 (RFC internal#219 Phase 4)
+# Empirical class: PR #656 / mc#1982. PR #656 (RFC internal#219 Phase 4)
 # flipped 5 platform-build-class jobs `continue-on-error: true → false`
 # on the basis of a "verified green on main via combined-status check".
 # But that "green" was the LIE the prior `continue-on-error: true`
@@ -13,7 +13,7 @@ name: Lint pre-flip continue-on-error
 # job-level status. The precondition the PR claimed to verify was
 # structurally fooled by the bug being flipped.
 #
-# mc#774 captured the surfaced defects (2 mutually-masked regressions):
+# mc#1982 captured the surfaced defects (2 mutually-masked regressions):
 #   - Class 1: sqlmock helper drift since 2f36bb9a (24 days old)
 #   - Class 2: OFFSEC-001 contract collision since 7d1a189f (1 day old)
 #
@@ -55,7 +55,7 @@ name: Lint pre-flip continue-on-error
 #   - YAML parse error in one of the workflow files: warn-only,
 #     don't block — the YAML lint workflows catch this separately.
 #
-# Cross-links: PR#656, mc#774, PR#665 (interim re-mask),
+# Cross-links: PR#656, mc#1982, PR#665 (interim re-mask),
 # Quirk #10 (internal#342 + dup #287), hongming-pc2 charter
 # §SOP-N rule (e), feedback_strict_root_only_after_class_a,
 # feedback_no_shared_persona_token_use.
@@ -99,8 +99,8 @@ jobs:
    timeout-minutes: 8
    # Phase 3 (RFC internal#219 §1): surface broken flips without blocking
    # the PR yet. Follow-up flips this to `false` once the workflow itself
-    # has clean recent runs on main. mc#774 interim — remove when CoE→false.
-    continue-on-error: true  # mc#774
+    # has clean recent runs on main. mc#1982 interim — remove when CoE→false.
+    continue-on-error: true  # mc#1982
    steps:
      - name: Check out PR head (full history for base-SHA access)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -1,6 +1,6 @@
 name: lint-required-context-exists-in-bp

-# Tier 2g hard-gate lint (per mc#774) — diff-based PR-time
+# Tier 2g hard-gate lint (per mc#1982) — diff-based PR-time
 # check. When a PR adds a NEW commit-status emission (workflow YAML
 # `name:` + job `name:`-or-key + on:-event), the workflow file must
 # carry one of three directives adjacent to the new job:
@@ -16,7 +16,7 @@ name: lint-required-context-exists-in-bp
 # PR#656 added `CI / all-required (pull_request)` as a sentinel
 # context that workflows emit, but BP did NOT list it. When
 # platform-build failed, all-required failed, but BP let the PR
-# merge anyway → cascade to mc#774. With this lint, PR#656 would
+# merge anyway → cascade to mc#1982. With this lint, PR#656 would
 # have been blocked until either the BP PATCH ran alongside OR
 # the author added a `bp-required: pending` directive.
 #
@@ -27,7 +27,7 @@ name: lint-required-context-exists-in-bp
 # share the workflow-context enumeration helpers
 # (`_event_map`, `workflow_contexts`, `_job_display`) but the
 # semantics are intentionally distinct so they're separate scripts.
-# Co-design is documented in mc#774.
+# Co-design is documented in mc#1982.
 #
 # Directive comment lives in the workflow file (NOT PR body)
 # ----------------------------------------------------------
@@ -42,13 +42,13 @@ name: lint-required-context-exists-in-bp
 # Lands at `continue-on-error: true` (Phase 3 — surface the
 # pattern without blocking PRs while the directive convention
 # beds in). After 7 days of clean runs on `main` with no false
-# positives, follow-up flips to `false`. Tracking: mc#774.
+# positives, follow-up flips to `false`. Tracking: mc#1982.
 #
 # Cross-links
 # -----------
-# - mc#774 (the RFC that specs this lint)
+# - mc#1982 (the RFC that specs this lint)
 # - PR#656 (the empirical case)
-# - mc#774 (the surfaced cascade)
+# - mc#1982 (the surfaced cascade)
 # - feedback_phantom_required_check_after_gitea_migration (Tier 2f cousin)
 # - feedback_behavior_based_ast_gates
 #
@@ -83,8 +83,8 @@ jobs:
    timeout-minutes: 5
    # Phase 3 (RFC #219 §1): surface the pattern without blocking PRs
    # while the directive convention beds in. Follow-up flip to false
-    # after 7 clean days on main. mc#774.
-    continue-on-error: true  # mc#774 Phase 3 — flip to false after 7 clean main runs
+    # after 7 clean days on main. mc#1982.
+    continue-on-error: true  # mc#1982 Phase 3 — flip to false after 7 clean main runs
    steps:
      - name: Check out PR head with full history (need base SHA blobs)
        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -55,7 +55,7 @@ jobs:
    # Phase 3 (RFC #219 §1): surface broken shapes without blocking PRs.
    # Follow-up PR flips this off after the 4 existing-on-main rule-2
    # (workflow_run) violations are migrated to a supported trigger.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd  # v6.0.2
@@ -67,7 +67,7 @@ jobs:
    # in this rollout (internal#462) so the precondition holds.
    runs-on: publish
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - name: Checkout
@@ -234,7 +234,7 @@ jobs:
    name: Production auto-deploy
    needs: build-and-push
    if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
-    # Side-effect deploy only; image publish success is the durable artifact. mc#774
+    # Side-effect deploy only; image publish success is the durable artifact. mc#1982
    continue-on-error: true
    # Publish/release lane (internal#462) — production deploy of a merged
    # fix; reserved capacity, never queued behind PR-CI.
@@ -51,7 +51,7 @@ jobs:
    name: Audit Railway env vars for drift-prone pins
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 10

@@ -73,7 +73,7 @@ jobs:
    # it never queues behind PR-CI. `publish` -> molecule-runner-publish-*.
    runs-on: publish
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25
    env:
@@ -80,7 +80,7 @@ jobs:
    # `publish` -> molecule-runner-publish-* sub-pool.
    runs-on: publish
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 25
    steps:
@@ -54,7 +54,7 @@ jobs:
        # runners with internet access to package mirrors). Falls back to GitHub
        # binary download. GitHub releases may be blocked on some runner networks
        # (infra#241 follow-up).
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
        run: |
          if apt-get update -qq && apt-get install -y -qq jq; then
@@ -57,7 +57,7 @@ jobs:
    name: Detect SECRET_PATTERNS drift
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    timeout-minutes: 5
    steps:
@@ -36,7 +36,7 @@
 # window closed. continue-on-error: true has been removed from the
 # tier-check job; AND-composition is now fully enforced. If you need
 # to temporarily re-introduce a mask, file a tracker and follow the
-# mc#774 protocol (Tier 2e lint requires a current tracker within
+# mc#1982 protocol (Tier 2e lint requires a current tracker within
 # 2 lines of any continue-on-error: true).

 name: sop-tier-check
@@ -92,7 +92,7 @@ jobs:
        # runners). The sop-tier-check script has its own fallback as a
        # third line of defense. continue-on-error: true ensures this step
        # failing does not block the job.
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
        run: |
          # apt-get is the primary method — Ubuntu package mirrors are reliably
@@ -113,7 +113,7 @@ jobs:
        # continue-on-error: true at step level — job-level is ignored by Gitea
        # Actions (quirk #10, internal runbooks). Belt-and-suspenders with
        # SOP_FAIL_OPEN=1 + || true below.
-        # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+        # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
        continue-on-error: true
        env:
          GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
@@ -90,7 +90,7 @@ jobs:
  staging-smoke:
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    outputs:
      sha: ${{ steps.compute.outputs.sha }}
@@ -212,7 +212,7 @@ jobs:
    if: ${{ needs.staging-smoke.result == 'success' && needs.staging-smoke.outputs.smoke_ran == 'true' }}
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    env:
      SHA: ${{ needs.staging-smoke.outputs.sha }}
@@ -71,7 +71,7 @@ jobs:
    name: Sweep CF orphans
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # 3 min surfaces hangs (CF API stall, AWS describe-instances stuck)
    # within one cron interval instead of burning a full tick. Realistic
@@ -55,7 +55,7 @@ jobs:
    name: Sweep CF tunnels
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    # 30 min cap. Was 5 min on the theory that the only thing that
    # could take >5min is a CF-API hang — but on 2026-05-02 a backlog
@@ -49,7 +49,7 @@ jobs:
    name: Ops scripts (unittest)
    runs-on: ubuntu-latest
    # Phase 3 (RFC #219 §1): surface broken workflows without blocking.
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    steps:
      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -35,8 +35,26 @@ name: verify-providers-gen
 on:
  pull_request:
    types: [opened, synchronize, reopened]
+    # CI-scheduler-overload fix (fix/ci-scheduler-fanout, 2026-06-01):
+    # this gate only verifies that the generated providers artifact is in
+    # sync with the schema SSOT. Its verdict can ONLY change when one of
+    # the codegen inputs/outputs changes, so firing the Go toolchain on
+    # every unrelated PR (docs, canvas, scripts) is pure fan-out cost.
+    # Scoped to the codegen surface. SAFE because this workflow is NOT a
+    # branch-protection status_check_context (see header §ENFORCEMENT
+    # GATING) — lint-required-no-paths only forbids paths filters on
+    # REQUIRED workflows; this is advisory, so a paths filter is allowed.
+    # Mirrors the sibling sync-providers-yaml.yml scoping convention.
+    paths:
+      - 'workspace-server/internal/providers/**'
+      - 'workspace-server/cmd/gen-providers/**'
+      - '.gitea/workflows/verify-providers-gen.yml'
  push:
    branches: [main, staging]
+    paths:
+      - 'workspace-server/internal/providers/**'
+      - 'workspace-server/cmd/gen-providers/**'
+      - '.gitea/workflows/verify-providers-gen.yml'

 env:
  GITHUB_SERVER_URL: https://git.moleculesai.app
@@ -31,7 +31,7 @@ jobs:
    name: Weekly Platform-Go Surface
    runs-on: ubuntu-latest
    # continue-on-error: surface only, never block
-    # mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
+    # mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
    continue-on-error: true
    defaults:
      run:
@@ -49,8 +49,8 @@
 ## Quick Start

 ```bash
-git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
-cd molecule-monorepo
+git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
+cd molecule-core
 ./scripts/dev-start.sh
 ```

@@ -41,7 +41,7 @@ export default function PricingPage() {
        <p className="mt-2 text-ink-mid">
          We publish the{" "}
          <a
-            href="https://git.moleculesai.app/molecule-ai/molecule-monorepo"
+            href="https://git.moleculesai.app/molecule-ai/molecule-core"
            className="text-accent underline hover:text-accent"
          >
            full source on GitHub
@@ -38,10 +38,11 @@ const DEFAULT_RUNTIME = "claude-code";
 const RUNTIME_OPTIONS = [
  { value: "claude-code", label: "Claude Code" },
  { value: "codex", label: "OpenAI Codex CLI" },
+  { value: "google-adk", label: "Google ADK" },
  { value: "hermes", label: "Hermes" },
  { value: "openclaw", label: "OpenClaw" },
 ];
-const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "hermes", "openclaw"]);
+const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "google-adk", "hermes", "openclaw"]);
 const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
 const DEFAULT_HEADLESS_ROOT_GB = 30;
 const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
@@ -1,411 +1,82 @@
 // @vitest-environment jsdom
 /**
- * Tests for BudgetSection (issue #541).
+ * Focused tests for BudgetSection's PER-PERIOD progress-bar math + aria (#49).
 *
- * Covers:
- *  - Loading state
- *  - Stats row: used / limit, "Unlimited" when null
- *  - Progress bar: correct percentage, capped at 100%, absent when no limit
- *  - Budget remaining text
- *  - Input pre-fill (existing limit / blank when null)
- *  - Save: PATCH with number, PATCH with null (blank input)
- *  - 402 on GET → exceeded banner, no fetch-error text
- *  - 402 on PATCH → exceeded banner
- *  - Non-402 fetch error → error text
- *  - Non-402 save error → save error alert
- *  - Section header and subheading
- *  - Fetch error does not show stats
+ * Behavioral coverage (loading, save, 402 banners, USD formatting, legacy
+ * back-compat) lives in tabs/__tests__/BudgetSection.test.tsx — this file
+ * deliberately covers only the per-period progress percentage + aria-valuenow
+ * + the over-budget colouring, which that suite doesn't assert in detail. Kept
+ * separate to avoid duplicating the behavioral suite (one component, no
+ * parallel/identical suites).
 */
 import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
-import {
-  render,
-  screen,
-  fireEvent,
-  waitFor,
-  cleanup,
-  act,
-} from "@testing-library/react";
-
-// ── Mock api ──────────────────────────────────────────────────────────────────
+import { render, screen, waitFor, cleanup } from "@testing-library/react";

 vi.mock("@/lib/api", () => ({
-  api: {
-    get: vi.fn(),
-    patch: vi.fn(),
-  },
+  api: { get: vi.fn(), patch: vi.fn() },
 }));

 import { api } from "@/lib/api";
 import { BudgetSection } from "../tabs/BudgetSection";

 const mockGet = vi.mocked(api.get);
-const mockPatch = vi.mocked(api.patch);

-// ── Helpers ───────────────────────────────────────────────────────────────────
+type P = { limit: number | null; spend: number; remaining: number | null };

-function budgetResponse(overrides: Partial<{
-  budget_limit: number | null;
-  budget_used: number;
-  budget_remaining: number | null;
-}> = {}) {
+// Build a periods response where the named period has the given limit/spend.
+function withMonthly(limit: number | null, spend: number) {
+  const blank: P = { limit: null, spend: 0, remaining: null };
+  const monthly: P = { limit, spend, remaining: limit == null ? null : limit - spend };
  return {
-    budget_limit: 1000,
-    budget_used: 250,
-    budget_remaining: 750,
-    ...overrides,
+    periods: { hourly: blank, daily: blank, weekly: blank, monthly },
+    budget_limit: limit,
+    monthly_spend: spend,
+    budget_remaining: monthly.remaining,
  };
 }

-function make402Error(): Error {
-  return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
-}
+beforeEach(() => vi.clearAllMocks());
+afterEach(() => cleanup());

-function make402PatchError(): Error {
-  return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
-}
-
-function makeGenericError(msg = "network timeout"): Error {
-  return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
-}
-
-beforeEach(() => {
-  vi.clearAllMocks();
-});
-
-afterEach(() => {
-  cleanup();
-});
-
-// ── Rendering helpers ─────────────────────────────────────────────────────────
-
-async function renderLoaded(budgetData = budgetResponse()) {
+async function renderLoaded(data: unknown) {
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  mockGet.mockResolvedValueOnce(budgetData as any);
+  mockGet.mockResolvedValueOnce(data as any);
  render(<BudgetSection workspaceId="ws-1" />);
-  // Wait for loading to finish
  await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
 }

-// ── Loading state ─────────────────────────────────────────────────────────────
-
-describe("BudgetSection — loading state", () => {
-  it("shows loading indicator while fetch is in flight", () => {
-    // Never resolve
-    mockGet.mockReturnValue(new Promise(() => {}));
-    render(<BudgetSection workspaceId="ws-1" />);
-    expect(screen.getByTestId("budget-loading")).toBeTruthy();
-    expect(screen.getByText("Loading…")).toBeTruthy();
+describe("BudgetSection — per-period progress bar", () => {
+  it("renders the bar for a limited period and omits it for an unlimited one", async () => {
+    await renderLoaded(withMonthly(1000, 250));
+    expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
+    expect(screen.queryByTestId("budget-hourly-fill")).toBeNull(); // hourly unlimited
  });

-  it("hides loading indicator after fetch resolves", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockGet.mockResolvedValueOnce(budgetResponse() as any);
-    render(<BudgetSection workspaceId="ws-1" />);
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-  });
-});
-
-// ── Section header ────────────────────────────────────────────────────────────
-
-describe("BudgetSection — header and subheading", () => {
-  it("renders 'Budget' as the section heading", async () => {
-    await renderLoaded();
-    expect(screen.getByText("Budget")).toBeTruthy();
-  });
-
-  it("renders the subheading 'Limit total message credits for this workspace'", async () => {
-    await renderLoaded();
-    expect(
-      screen.getByText("Limit total message credits for this workspace")
-    ).toBeTruthy();
-  });
-
-  it("renders 'Budget limit (credits)' label for the input", async () => {
-    await renderLoaded();
-    expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
-  });
-});
-
-// ── Stats row ─────────────────────────────────────────────────────────────────
-
-describe("BudgetSection — stats row", () => {
-  it("shows budget_used in the stats row", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
-    expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
-  });
-
-  it("shows budget_limit in the stats row", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
-    expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
-  });
-
-  it("shows 'Unlimited' when budget_limit is null", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
-    expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
-  });
-
-  it("shows budget_remaining when present", async () => {
-    await renderLoaded(budgetResponse({ budget_remaining: 750 }));
-    expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
-    expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
-  });
-
-  it("hides budget_remaining row when null", async () => {
-    await renderLoaded(budgetResponse({ budget_remaining: null }));
-    expect(screen.queryByTestId("budget-remaining")).toBeNull();
-  });
-
-  it("does not crash when budget_used is missing from the response", async () => {
-    // Backend for a provisioning-stuck workspace may return a partial
-    // shape. Regression: previously this threw
-    // "Cannot read properties of undefined (reading 'toLocaleString')"
-    // and crashed the whole Details tab.
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
-    expect(screen.getByTestId("budget-used-value").textContent).toBe("0");
-  });
-});
-
-// ── Progress bar ──────────────────────────────────────────────────────────────
-
-describe("BudgetSection — progress bar", () => {
-  it("renders the progress bar when budget_limit is set", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
-    expect(screen.getByRole("progressbar")).toBeTruthy();
-  });
-
-  it("does NOT render progress bar when budget_limit is null", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
-    expect(screen.queryByRole("progressbar")).toBeNull();
-  });
-
-  it("fills to the correct percentage (25%)", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("25%");
-  });
-
-  it("fills to the correct percentage (50%)", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("50%");
-  });
-
-  it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("100%");
-  });
-
-  it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
-    await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
-    const bar = screen.getByRole("progressbar");
-    expect(bar.getAttribute("aria-valuenow")).toBe("30");
-  });
-
-  it("shows 0% progress bar when budget_used is absent from the response", async () => {
-    // Regression: budget_used is optional (provisioning-stuck workspaces return
-    // partial shapes). Without the `?? 0` guard the progressPct calculation
-    // throws a TypeScript strict-null error and the build fails.
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
-    const bar = screen.getByRole("progressbar");
-    expect(bar.getAttribute("aria-valuenow")).toBe("0");
-    const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
-    expect(fill.style.width).toBe("0%");
-  });
-});
-
-// ── Input pre-fill ────────────────────────────────────────────────────────────
-
-describe("BudgetSection — input pre-fill", () => {
-  it("pre-fills input with existing budget_limit", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: 500 }));
-    const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-    expect(input.value).toBe("500");
-  });
-
-  it("leaves input empty when budget_limit is null", async () => {
-    await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
-    const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-    expect(input.value).toBe("");
-  });
-});
-
-// ── Save — PATCH calls ────────────────────────────────────────────────────────
-
-describe("BudgetSection — save", () => {
-  it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "800" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
-    expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
-    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.budget_limit).toBe(800);
-  });
-
-  it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
-    // Regression for QA bug report: `parseInt("0") || null` would yield null.
-    // The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "0" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
-    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.budget_limit).toBe(0);
-    expect(body.budget_limit).not.toBeNull();
-  });
-
-  it("sends budget_limit: null when input is blank", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() => expect(mockPatch).toHaveBeenCalled());
-    const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
-    expect(body.budget_limit).toBeNull();
-  });
-
-  it("updates displayed stats after successful save", async () => {
-    const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(updated as any);
-    await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
-
-    fireEvent.change(screen.getByTestId("budget-limit-input"), {
-      target: { value: "2000" },
-    });
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
-    );
-  });
-
-  it("shows save error message on non-402 PATCH failure", async () => {
-    mockPatch.mockRejectedValueOnce(
-      new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
-    );
-    await renderLoaded();
-
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-save-error")).toBeTruthy()
-    );
-    expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
-  });
-});
-
-// ── 402 handling ──────────────────────────────────────────────────────────────
-
-describe("BudgetSection — 402 handling", () => {
-  it("shows exceeded banner when GET returns 402", async () => {
-    mockGet.mockRejectedValueOnce(make402Error());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
-    );
-    expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
-  });
-
-  it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
-    mockGet.mockRejectedValueOnce(make402Error());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() =>
-      expect(screen.queryByTestId("budget-loading")).toBeNull()
-    );
-    expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
-    expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
-  });
-
-  it("shows exceeded banner when PATCH returns 402", async () => {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockGet.mockResolvedValueOnce(budgetResponse() as any);
-    mockPatch.mockRejectedValueOnce(make402PatchError());
-    render(<BudgetSection workspaceId="ws-1" />);
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-
-    fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
-    );
-    // Should NOT also show the save-error alert
-    expect(screen.queryByTestId("budget-save-error")).toBeNull();
-  });
-
-  it("clears exceeded banner after a successful save", async () => {
-    mockGet.mockRejectedValueOnce(make402Error());
-    render(<BudgetSection workspaceId="ws-1" />);
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
-    );
-
-    // Now a successful PATCH (limit was raised)
-    const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    mockPatch.mockResolvedValueOnce(updated as any);
-
-    await act(async () => {
-      fireEvent.change(screen.getByTestId("budget-limit-input"), {
-        target: { value: "5000" },
-      });
-      fireEvent.click(screen.getByTestId("budget-save-btn"));
-    });
-
-    await waitFor(() =>
-      expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
-    );
-  });
-});
-
-// ── Non-402 fetch error ───────────────────────────────────────────────────────
-
-describe("BudgetSection — non-402 fetch errors", () => {
-  it("shows fetch error text on non-402 GET failure", async () => {
-    mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() =>
-      expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
-    );
-    expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
-  });
-
-  it("does NOT show stats row on fetch error", async () => {
-    mockGet.mockRejectedValueOnce(makeGenericError());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-    expect(screen.queryByTestId("budget-stats-row")).toBeNull();
-  });
-
-  it("does NOT show exceeded banner on non-402 fetch error", async () => {
-    mockGet.mockRejectedValueOnce(makeGenericError());
-    render(<BudgetSection workspaceId="ws-1" />);
-
-    await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
-    expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
+  it("fills to 25%", async () => {
+    await renderLoaded(withMonthly(1000, 250));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("25%");
+  });
+
+  it("fills to 50%", async () => {
+    await renderLoaded(withMonthly(1000, 500));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("50%");
+  });
+
+  it("caps fill at 100% when spend exceeds limit", async () => {
+    await renderLoaded(withMonthly(1000, 4000));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("100%");
+  });
+
+  it("sets aria-valuenow to the computed percentage on the progressbar", async () => {
+    await renderLoaded(withMonthly(1000, 250));
+    const bars = screen.getAllByRole("progressbar");
+    // the monthly bar is the only one rendered (others unlimited)
+    expect(bars).toHaveLength(1);
+    expect(bars[0].getAttribute("aria-valuenow")).toBe("25");
+  });
+
+  it("shows a 0% bar when spend is 0 against a set limit", async () => {
+    await renderLoaded(withMonthly(1000, 0));
+    expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("0%");
  });
 });
@@ -213,6 +213,7 @@ describe("CreateWorkspaceDialog", () => {
    expect(runtimeTexts).toEqual([
      "Claude Code",
      "OpenAI Codex CLI",
+      "Google ADK",
      "Hermes",
      "OpenClaw",
    ]);
@@ -7,10 +7,28 @@ import { api } from "@/lib/api";
 // Types
 // ---------------------------------------------------------------------------

+// Period keys MUST match the server SSOT (workspace-server budget_periods.go).
+type BudgetPeriod = "hourly" | "daily" | "weekly" | "monthly";
+
+const PERIODS: { key: BudgetPeriod; label: string }[] = [
+  { key: "hourly", label: "Hourly" },
+  { key: "daily", label: "Daily" },
+  { key: "weekly", label: "Weekly" },
+  { key: "monthly", label: "Monthly" },
+];
+
+interface PeriodBudget {
+  limit: number | null; // USD cents; null = no limit
+  spend: number; // rolling-window spend, USD cents
+  remaining: number | null; // null when no limit
+}
+
 interface BudgetData {
-  budget_limit: number | null;
-  budget_used?: number; // optional — provisioning-stuck workspaces return partial shapes
-  budget_remaining: number | null;
+  periods?: Partial<Record<BudgetPeriod, PeriodBudget>>;
+  // legacy fields (pre-multi-period server) — tolerated for back-compat
+  budget_limit?: number | null;
+  monthly_spend?: number;
+  budget_remaining?: number | null;
 }

 interface Props {
@@ -26,31 +44,71 @@ function isApiError402(e: unknown): boolean {
  return e instanceof Error && /: 402( |$)/.test(e.message);
 }

+/** USD cents → "$X.XX". */
+function fmtUSD(cents: number): string {
+  return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
+}
+
+/** Normalize the server payload (multi-period or legacy) into a period map. */
+function periodsFrom(data: BudgetData | null): Record<BudgetPeriod, PeriodBudget> {
+  const base: Record<BudgetPeriod, PeriodBudget> = {
+    hourly: { limit: null, spend: 0, remaining: null },
+    daily: { limit: null, spend: 0, remaining: null },
+    weekly: { limit: null, spend: 0, remaining: null },
+    monthly: { limit: null, spend: 0, remaining: null },
+  };
+  if (!data) return base;
+  if (data.periods) {
+    for (const { key } of PERIODS) {
+      const p = data.periods[key];
+      if (p) base[key] = { limit: p.limit ?? null, spend: p.spend ?? 0, remaining: p.remaining ?? null };
+    }
+    return base;
+  }
+  // legacy: map the single monthly limit/spend
+  base.monthly = {
+    limit: data.budget_limit ?? null,
+    spend: data.monthly_spend ?? 0,
+    remaining: data.budget_remaining ?? null,
+  };
+  return base;
+}
+
 // ---------------------------------------------------------------------------
 // Component
 // ---------------------------------------------------------------------------

 /**
- * BudgetSection — dedicated "Budget" section in the workspace details panel.
- *
- * - Fetches GET /workspaces/:id/budget on mount for live usage stats
- * - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
- * - Allows updating budget_limit via PATCH /workspaces/:id/budget
- * - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
+ * BudgetSection — per-workspace LLM budget, four independent rolling windows
+ * (hourly / daily / weekly / monthly). Each period has its own ceiling (USD);
+ * spend is the rolling-window LLM cost. Crossing ANY period blocks new work
+ * (server returns 402). Sends PATCH {budget_limits:{period:cents|null}}.
 */
 export function BudgetSection({ workspaceId }: Props) {
  const [budget, setBudget] = useState<BudgetData | null>(null);
  const [loading, setLoading] = useState(true);
  const [fetchError, setFetchError] = useState<string | null>(null);

-  const [limitInput, setLimitInput] = useState("");
+  // One input per period, in USD cents (string for controlled inputs).
+  const [limitInputs, setLimitInputs] = useState<Record<BudgetPeriod, string>>({
+    hourly: "",
+    daily: "",
+    weekly: "",
+    monthly: "",
+  });
  const [saving, setSaving] = useState(false);
  const [saveError, setSaveError] = useState<string | null>(null);
-
-  /** True when a 402 has been seen from any API call in this section. */
  const [budgetExceeded, setBudgetExceeded] = useState(false);

-  // ── Fetch current budget data ─────────────────────────────────────────────
+  const syncInputs = useCallback((data: BudgetData | null) => {
+    const p = periodsFrom(data);
+    setLimitInputs({
+      hourly: p.hourly.limit != null ? String(p.hourly.limit) : "",
+      daily: p.daily.limit != null ? String(p.daily.limit) : "",
+      weekly: p.weekly.limit != null ? String(p.weekly.limit) : "",
+      monthly: p.monthly.limit != null ? String(p.monthly.limit) : "",
+    });
+  }, []);

  const loadBudget = useCallback(async () => {
    setLoading(true);
@@ -58,7 +116,7 @@ export function BudgetSection({ workspaceId }: Props) {
    try {
      const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
      setBudget(data);
-      setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
+      syncInputs(data);
    } catch (e) {
      if (isApiError402(e)) {
        setBudgetExceeded(true);
@@ -68,29 +126,30 @@ export function BudgetSection({ workspaceId }: Props) {
    } finally {
      setLoading(false);
    }
-  }, [workspaceId]);
+  }, [workspaceId, syncInputs]);

  useEffect(() => {
    loadBudget();
  }, [loadBudget]);

-  // ── Save handler ──────────────────────────────────────────────────────────
-
  const handleSave = async () => {
    setSaving(true);
    setSaveError(null);
-    const raw = limitInput.trim();
-    // Use explicit empty-string check (not falsy check) so that a
-    // user-entered "0" is sent as budget_limit: 0, not null (unlimited).
-    const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
-
+    // Build the per-period map: blank → null (clear); a number → that ceiling.
+    const budget_limits: Record<BudgetPeriod, number | null> = {
+      hourly: null,
+      daily: null,
+      weekly: null,
+      monthly: null,
+    };
+    for (const { key } of PERIODS) {
+      const raw = limitInputs[key].trim();
+      budget_limits[key] = raw !== "" ? parseInt(raw, 10) : null;
+    }
    try {
-      const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
-        budget_limit: parsedLimit,
-      });
+      const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, { budget_limits });
      setBudget(updated);
-      setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
-      // Clear exceeded state if the save succeeded (limit was raised or removed)
+      syncInputs(updated);
      setBudgetExceeded(false);
    } catch (e) {
      if (isApiError402(e)) {
@@ -103,24 +162,15 @@ export function BudgetSection({ workspaceId }: Props) {
    }
  };

-  // ── Progress calculation ──────────────────────────────────────────────────
-
-  const progressPct =
-    budget && budget.budget_limit != null && budget.budget_limit > 0
-      ? Math.min(100, Math.round(((budget.budget_used ?? 0) / budget.budget_limit) * 100))
-      : 0;
-
-  // ── Render ────────────────────────────────────────────────────────────────
+  const periods = periodsFrom(budget);

  return (
    <div className="space-y-3" data-testid="budget-section">
      {/* Section header */}
      <div>
-        <h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">
-          Budget
-        </h3>
+        <h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">Budget</h3>
        <p className="text-[11px] text-ink-mid mt-0.5">
-          Limit total message credits for this workspace
+          Cap LLM spend for this workspace per period — crossing any limit pauses new work
        </p>
      </div>

@@ -131,32 +181,14 @@ export function BudgetSection({ workspaceId }: Props) {
          data-testid="budget-exceeded-banner"
          className="flex items-center gap-2 px-3 py-2 rounded-lg bg-surface border border-amber-700/50 text-warm text-xs font-medium"
        >
-          <svg
-            width="13"
-            height="13"
-            viewBox="0 0 13 13"
-            fill="none"
-            aria-hidden="true"
-            className="shrink-0"
-          >
-            <path
-              d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
-              stroke="currentColor"
-              strokeWidth="1.4"
-              strokeLinejoin="round"
-            />
-            <path
-              d="M6.5 5.5V7.5M6.5 9.5h.01"
-              stroke="currentColor"
-              strokeWidth="1.4"
-              strokeLinecap="round"
-            />
+          <svg width="13" height="13" viewBox="0 0 13 13" fill="none" aria-hidden="true" className="shrink-0">
+            <path d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z" stroke="currentColor" strokeWidth="1.4" strokeLinejoin="round" />
+            <path d="M6.5 5.5V7.5M6.5 9.5h.01" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" />
          </svg>
-          Budget exceeded — messages blocked
+          Budget exceeded — new work paused
        </div>
      )}

-      {/* Usage stats */}
      {loading ? (
        <p className="text-xs text-ink-mid" data-testid="budget-loading">
          Loading…
@@ -165,89 +197,78 @@ export function BudgetSection({ workspaceId }: Props) {
        <p className="text-xs text-bad" data-testid="budget-fetch-error">
          {fetchError}
        </p>
-      ) : budget ? (
-        <div className="space-y-2">
-          {/* Stats row */}
-          <div className="flex items-baseline justify-between" data-testid="budget-stats-row">
-            <span className="text-xs text-ink-mid">Credits used</span>
-            <span className="text-xs font-mono text-ink-mid">
-              <span data-testid="budget-used-value">{(budget.budget_used ?? 0).toLocaleString()}</span>
-              <span className="text-ink-mid mx-1">/</span>
-              <span data-testid="budget-limit-value">
-                {budget.budget_limit != null
-                  ? budget.budget_limit.toLocaleString()
-                  : "Unlimited"}
-              </span>
-            </span>
-          </div>
+      ) : (
+        <div className="space-y-3">
+          {PERIODS.map(({ key, label }) => {
+            const p = periods[key];
+            const pct =
+              p.limit != null && p.limit > 0 ? Math.min(100, Math.round((p.spend / p.limit) * 100)) : 0;
+            const over = p.limit != null && p.spend >= p.limit;
+            return (
+              <div key={key} className="space-y-1" data-testid={`budget-period-${key}`}>
+                <div className="flex items-baseline justify-between">
+                  <label htmlFor={`budget-${key}-${workspaceId}`} className="text-xs text-ink-mid">
+                    {label}
+                  </label>
+                  <span className="text-[11px] font-mono text-ink-mid">
+                    <span data-testid={`budget-${key}-spend`}>{fmtUSD(p.spend)}</span>
+                    <span className="mx-1">/</span>
+                    <span data-testid={`budget-${key}-limit`}>{p.limit != null ? fmtUSD(p.limit) : "∞"}</span>
+                  </span>
+                </div>
+                {p.limit != null && (
+                  <div
+                    role="progressbar"
+                    aria-label={`${label} budget usage`}
+                    aria-valuenow={pct}
+                    aria-valuemin={0}
+                    aria-valuemax={100}
+                    className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
+                  >
+                    <div
+                      data-testid={`budget-${key}-fill`}
+                      className={`h-full rounded-full transition-all duration-300 ${over ? "bg-bad" : "bg-accent"}`}
+                      style={{ width: `${pct}%` }}
+                    />
+                  </div>
+                )}
+                <input
+                  id={`budget-${key}-${workspaceId}`}
+                  type="number"
+                  min="0"
+                  step="1"
+                  value={limitInputs[key]}
+                  onChange={(e) => setLimitInputs((s) => ({ ...s, [key]: e.target.value }))}
+                  placeholder="USD cents — blank for unlimited"
+                  data-testid={`budget-${key}-input`}
+                  className="w-full bg-surface-card border border-line rounded-lg px-3 py-1.5 text-xs text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
+                />
+              </div>
+            );
+          })}

-          {/* Progress bar (only when limit is set) */}
-          {budget.budget_limit != null && (
+          <p className="text-[11px] text-ink-mid">Limits are USD cents (e.g. 500 = $5.00). Blank = unlimited.</p>
+
+          {saveError && (
            <div
-              role="progressbar"
-              aria-label="Budget usage"
-              aria-valuenow={progressPct}
-              aria-valuemin={0}
-              aria-valuemax={100}
-              className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
+              role="alert"
+              data-testid="budget-save-error"
+              className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
            >
-              <div
-                data-testid="budget-progress-fill"
-                className="h-full rounded-full bg-accent transition-all duration-300"
-                style={{ width: `${progressPct}%` }}
-              />
+              {saveError}
            </div>
          )}

-          {/* Remaining credits */}
-          {budget.budget_remaining != null && (
-            <p className="text-[11px] text-ink-mid" data-testid="budget-remaining">
-              {budget.budget_remaining.toLocaleString()} credits remaining
-            </p>
-          )}
-        </div>
-      ) : null}
-
-      {/* Input + Save */}
-      <div className="space-y-1.5 pt-1">
-        <label
-          htmlFor={`budget-limit-input-${workspaceId}`}
-          className="text-[11px] text-ink-mid block"
-        >
-          Budget limit (credits)
-        </label>
-        <input
-          id={`budget-limit-input-${workspaceId}`}
-          type="number"
-          min="0"
-          step="1"
-          value={limitInput}
-          onChange={(e) => setLimitInput(e.target.value)}
-          placeholder="e.g. 1000 — blank for unlimited"
-          data-testid="budget-limit-input"
-          className="w-full bg-surface-card border border-line rounded-lg px-3 py-2 text-sm text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
-        />
-        <p className="text-xs text-ink-mid">Leave blank for unlimited</p>
-
-        {saveError && (
-          <div
-            role="alert"
-            data-testid="budget-save-error"
-            className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
+          <button
+            onClick={handleSave}
+            disabled={saving}
+            data-testid="budget-save-btn"
+            className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
          >
-            {saveError}
-          </div>
-        )}
-
-        <button
-          onClick={handleSave}
-          disabled={saving}
-          data-testid="budget-save-btn"
-          className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
-        >
-          {saving ? "Saving…" : "Save"}
-        </button>
-      </div>
+            {saving ? "Saving…" : "Save"}
+          </button>
+        </div>
+      )}
    </div>
  );
 }
@@ -377,11 +377,18 @@ export function billingModeForSelectedProvider(
 // config.yaml` on the container is a separate runtime-internal file,
 // not this one.
 const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli", "openclaw"]);
-const SUPPORTED_RUNTIME_VALUES = new Set(["claude-code", "codex", "openclaw", "hermes"]);
+// The runtime picker is SSOT-driven: options come from GET /templates,
+// which workspace-server already gates to the manifest.json maintained set
+// (loadRuntimesFromManifest). A hand-maintained frontend allowlist silently
+// dropped runtimes the backend added (google-adk shipped in manifest but was
+// filtered out, so its workspaces rendered the wrong default option). A
+// template may still opt OUT of the picker via `displayable: false` on its
+// /templates row. See project_canvas_runtime_dropdown_ssot_fix.

 const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
  { value: "claude-code", label: "Claude Code", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
  { value: "codex", label: "Codex", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
+  { value: "google-adk", label: "Google ADK", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
  { value: "openclaw", label: "OpenClaw", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
  { value: "hermes", label: "Hermes", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
 ];
@@ -585,13 +592,16 @@ export function ConfigTab({ workspaceId }: Props) {
      registry_backed?: boolean;
      registry_providers?: RegistryProvider[];
      registry_models?: RegistryModel[];
+      displayable?: boolean;
    }>>("/templates")
      .then((rows) => {
        if (cancelled || !Array.isArray(rows)) return;
        const byRuntime = new Map<string, RuntimeOption>();
        for (const r of rows) {
          const v = (r.runtime || "").trim();
-          if (!SUPPORTED_RUNTIME_VALUES.has(v)) continue;
+          if (!v) continue;
+          // Honor an explicit opt-out; absent/true means show it.
+          if (r.displayable === false) continue;
          // Last template wins if two templates share a runtime — rare, and the
          // one with the richer models list is probably newer.
          const existing = byRuntime.get(v);
@@ -29,8 +29,15 @@ type FormState = {
  displayMode: string;
  displayProtocol: string;
  resolution: string;
+  dataPersistence: string; // "" (auto) | "persist" | "ephemeral" — internal#734
 };

+// internal#734: per-workspace durable-data choice. "" = auto (desktop-control
+// keeps data, others follow the org default). Human labels for the selector.
+const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
+const dataPersistenceLabel = (v: string): string =>
+  v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
+
 export function ContainerConfigTab({ workspaceId, data }: Props) {
  const runtime = data.runtime;
  const instanceType = data.compute?.instance_type;
@@ -39,9 +46,10 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
  const displayProtocol = data.compute?.display?.protocol;
  const displayWidth = data.compute?.display?.width;
  const displayHeight = data.compute?.display?.height;
+  const dataPersistence = data.compute?.data_persistence;
  const initial = useMemo(
-    () => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight }),
-    [runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight],
+    () => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
+    [runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
  );
  const [form, setForm] = useState<FormState>(initial);
  const [saving, setSaving] = useState(false);
@@ -84,6 +92,8 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
          display: form.displayEnabled
            ? { mode: form.displayMode, protocol: form.displayProtocol, width, height }
            : { mode: "none" },
+          // internal#734: omit when "auto" so the wire/default behavior is unchanged.
+          ...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
        };

        const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
@@ -176,6 +186,18 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
              onChange={(resolution) => setForm((s) => ({ ...s, resolution }))}
            />
          )}
+          <SelectField
+            id="data-persistence"
+            label="Saved data (cookies, downloads, memory)"
+            value={form.dataPersistence}
+            options={DATA_PERSISTENCE_OPTIONS}
+            optionLabel={dataPersistenceLabel}
+            onChange={(dataPersistence) => setForm((s) => ({ ...s, dataPersistence }))}
+          />
+          <p className="-mt-1 text-[10px] leading-snug text-ink-soft">
+            Whether this workspace&apos;s data survives a restart/recreate. Auto keeps it for
+            browser (desktop) workspaces; Ephemeral never keeps it (privacy).
+          </p>
        </div>

        <div className="mt-4 flex items-center justify-end gap-2">
@@ -231,6 +253,7 @@ function formFromData(data: {
  displayProtocol?: string;
  displayWidth?: number;
  displayHeight?: number;
+  dataPersistence?: string;
 }): FormState {
  const width = data.displayWidth ?? 1920;
  const height = data.displayHeight ?? 1080;
@@ -243,6 +266,7 @@ function formFromData(data: {
    displayMode: data.displayMode && data.displayMode !== "none" ? data.displayMode : "desktop-control",
    displayProtocol: data.displayProtocol || "novnc",
    resolution,
+    dataPersistence: data.dataPersistence || "",
  };
 }

@@ -29,6 +29,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
  const [peers, setPeers] = useState<PeerData[]>([]);
  const [saving, setSaving] = useState(false);
  const [confirmDelete, setConfirmDelete] = useState(false);
+  const [eraseData, setEraseData] = useState(false); // internal#734: erase saved data on delete
  const [peersError, setPeersError] = useState<string | null>(null);
  const [saveError, setSaveError] = useState<string | null>(null);
  const [deleteError, setDeleteError] = useState<string | null>(null);
@@ -93,7 +94,10 @@ export function DetailsTab({ workspaceId, data }: Props) {
  const handleDelete = async () => {
    setDeleteError(null);
    try {
-      await api.del(`/workspaces/${workspaceId}?confirm=true`, {
+      // internal#734: erase_data=true asks the server to prune this workspace's
+      // durable data volume (cookies / downloads / memory). Default off keeps it
+      // for the orphan-sweeper grace.
+      await api.del(`/workspaces/${workspaceId}?confirm=true${eraseData ? "&erase_data=true" : ""}`, {
        headers: { "X-Confirm-Name": name },
      });
      // Mirror the server-side cascade — drop the row + every
@@ -323,6 +327,19 @@ export function DetailsTab({ workspaceId, data }: Props) {
            <h3 id="delete-confirm-title" className="text-xs font-medium text-bad">
              Confirm deletion
            </h3>
+            <label className="flex items-start gap-2 text-[11px] text-ink-mid">
+              <input
+                type="checkbox"
+                aria-label="Also erase saved data"
+                checked={eraseData}
+                onChange={(e) => setEraseData(e.target.checked)}
+                className="mt-0.5 h-3.5 w-3.5 accent-red-600"
+              />
+              <span>
+                Also erase saved data (cookies, downloads, agent memory). Cannot be undone.
+                Unchecked keeps it recoverable briefly.
+              </span>
+            </label>
            <div className="flex gap-2">
              <button
                type="button"
@@ -339,6 +356,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
                onClick={() => {
                  setConfirmDelete(false);
                  setDeleteError(null);
+                  setEraseData(false);
                  // Return focus to the trigger so keyboard users aren't stranded
                  deleteButtonRef.current?.focus();
                }}
@@ -5,9 +5,10 @@ import React from "react";
 import { BudgetSection } from "../BudgetSection";
 import { api } from "@/lib/api";

-// Queue-based mock for the api module. Each api call shifts from the queue.
-// Tests push with qGet/qPatch and the module-level mockImplementation
-// reads from the queue.
+// Multi-period budget (#49): the API now returns a `periods` map
+// (hourly/daily/weekly/monthly), each {limit, spend, remaining} in USD cents.
+// The UI renders one row per period and PATCHes {budget_limits:{period:cents|null}}.
+
 type QueueEntry = { body?: unknown; err?: Error };
 const apiQueue: QueueEntry[] = [];

@@ -40,45 +41,49 @@ const WS_ID = "budget-test-ws";
 function qGet(body: unknown) {
  apiQueue.push({ body });
 }
-
 function qGetErr(status: number, msg: string) {
  apiQueue.push({ err: new Error(`${msg}: ${status}`) });
 }
-
 function qPatch(body: unknown) {
  apiQueue.push({ body });
 }
-
 function qPatchErr(status: number, msg: string) {
  apiQueue.push({ err: new Error(`${msg}: ${status}`) });
 }

-function makeBudget(overrides: Partial<{
-  budget_limit: number | null;
-  budget_used: number;
-  budget_remaining: number | null;
-}> = {}) {
+type P = { limit: number | null; spend: number; remaining: number | null };
+
+// makeBudget builds the periods response. Override any subset of periods.
+function makeBudget(overrides: Partial<Record<"hourly" | "daily" | "weekly" | "monthly", Partial<P>>> = {}) {
+  const blank: P = { limit: null, spend: 0, remaining: null };
+  const mk = (o?: Partial<P>): P => {
+    const p = { ...blank, ...(o ?? {}) };
+    if (p.limit != null && p.remaining == null) p.remaining = p.limit - p.spend;
+    return p;
+  };
+  const periods = {
+    hourly: mk(overrides.hourly),
+    daily: mk(overrides.daily),
+    weekly: mk(overrides.weekly),
+    monthly: mk(overrides.monthly),
+  };
  return {
-    budget_limit: 10_000,
-    budget_used: 3_500,
-    budget_remaining: 6_500,
-    ...overrides,
+    periods,
+    budget_limit: periods.monthly.limit,
+    monthly_spend: periods.monthly.spend,
+    budget_remaining: periods.monthly.remaining,
  };
 }

-describe("BudgetSection", () => {
+describe("BudgetSection (multi-period)", () => {
  describe("loading state", () => {
    it("shows loading indicator while fetching", async () => {
      let resolveGet: (v: unknown) => void;
      vi.mocked(api.get).mockImplementationOnce(
        async () => new Promise((r) => { resolveGet = r as (v: unknown) => void; }),
      );
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      expect(screen.getByTestId("budget-loading")).toBeTruthy();
-
-      // Resolve after render to verify state clears
      resolveGet!(makeBudget());
      await vi.waitFor(() => {
        expect(screen.queryByTestId("budget-loading")).toBeNull();
@@ -89,21 +94,16 @@ describe("BudgetSection", () => {
  describe("fetch error state", () => {
    it("shows error message on non-402 fetch failure", async () => {
      qGetErr(500, "Internal Server Error");
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
      });
      expect(screen.getByTestId("budget-fetch-error")!.textContent).toContain("500");
    });

-    it("shows 402 as exceeded banner, not fetch error", async () => {
-      // 402 means the budget limit was hit — different UX from a network/API error.
+    it("shows the exceeded banner (not a fetch error) on a 402", async () => {
      qGetErr(402, "Payment Required");
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
      });
@@ -111,220 +111,105 @@ describe("BudgetSection", () => {
    });
  });

-  describe("budget loaded — display", () => {
-    it("renders used / limit stats row", async () => {
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500 }));
-
+  describe("rendering periods", () => {
+    it("renders all four period rows", async () => {
+      qGet(makeBudget());
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-used-value")!.textContent).toBe("3,500");
-      });
-      expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("10,000");
-    });
-
-    it("renders 'Unlimited' when budget_limit is null", async () => {
-      qGet(makeBudget({ budget_limit: null, budget_used: 1_000, budget_remaining: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("Unlimited");
+        for (const k of ["hourly", "daily", "weekly", "monthly"]) {
+          expect(screen.getByTestId(`budget-period-${k}`)).toBeTruthy();
+        }
      });
    });

-    it("renders remaining credits when present", async () => {
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: 6_500 }));
-
+    it("formats spend and limit as USD per period", async () => {
+      qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-remaining")!.textContent).toContain("6,500");
-        expect(screen.getByTestId("budget-remaining")!.textContent).toContain("credits remaining");
+        expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$35.00");
+      });
+      expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$100.00");
+    });
+
+    it("shows ∞ for a period with no limit", async () => {
+      qGet(makeBudget({ hourly: { limit: null, spend: 1_000 } }));
+      render(<BudgetSection workspaceId={WS_ID} />);
+      await vi.waitFor(() => {
+        expect(screen.getByTestId("budget-hourly-limit")!.textContent).toBe("∞");
      });
    });

-    it("omits remaining credits when budget_remaining is null", async () => {
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: null }));
-
+    it("renders the progress bar only for periods with a limit", async () => {
+      qGet(makeBudget({ monthly: { limit: 10_000, spend: 12_000 }, hourly: { limit: null, spend: 5_000 } }));
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-remaining")).toBeNull();
-      });
-    });
-
-    it("caps progress bar at 100% when used > limit", async () => {
-      // Over-limit: 12000 used of 10000 limit should show 100%, not 120%.
-      qGet(makeBudget({ budget_limit: 10_000, budget_used: 12_000, budget_remaining: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        const fill = screen.getByTestId("budget-progress-fill");
-        expect(fill.getAttribute("style")).toContain("100%");
-      });
-    });
-
-    it("omits progress bar when budget_limit is null (unlimited)", async () => {
-      qGet(makeBudget({ budget_limit: null, budget_used: 5_000, budget_remaining: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-progress-fill")).toBeNull();
+        expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
      });
+      expect(screen.queryByTestId("budget-hourly-fill")).toBeNull();
+      // over-budget fill caps at 100%
+      const fill = screen.getByTestId("budget-monthly-fill") as HTMLElement;
+      expect(fill.style.width).toBe("100%");
    });
  });

-  describe("budget exceeded (402)", () => {
-    it("shows exceeded banner when load returns 402", async () => {
-      qGetErr(402, "Payment Required");
-
+  describe("save", () => {
+    it("PATCHes budget_limits for all four periods and clears the exceeded banner", async () => {
+      qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
+      qPatch(makeBudget({ hourly: { limit: 500, spend: 0 }, monthly: { limit: 20_000, spend: 0 } }));
      render(<BudgetSection workspaceId={WS_ID} />);
+      await vi.waitFor(() => {
+        expect(screen.getByTestId("budget-hourly-input")).toBeTruthy();
+      });
+
+      fireEvent.change(screen.getByTestId("budget-hourly-input"), { target: { value: "500" } });
+      fireEvent.click(screen.getByTestId("budget-save-btn"));

      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
-        expect(screen.getByTestId("budget-exceeded-banner")!.textContent).toContain("Budget exceeded");
+        expect(vi.mocked(api.patch)).toHaveBeenCalled();
+      });
+      const [, body] = vi.mocked(api.patch).mock.calls[0];
+      expect((body as { budget_limits: Record<string, number | null> }).budget_limits).toMatchObject({
+        hourly: 500,
+        monthly: 10_000, // unchanged input echoes the loaded limit
      });
    });

-    it("clears exceeded banner after successful save", async () => {
-      qGetErr(402, "Payment Required");
-      qPatch(makeBudget({ budget_limit: 50_000, budget_used: 0, budget_remaining: 50_000 }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
-      });
-
-      const input = screen.getByTestId("budget-limit-input");
-      fireEvent.change(input, { target: { value: "50000" } });
-
-      const saveBtn = screen.getByTestId("budget-save-btn");
-      fireEvent.click(saveBtn);
-
-      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
-      });
-    });
-  });
-
-  describe("save flow", () => {
-    it("shows save error on non-402 patch failure", async () => {
+    it("shows a save error on non-402 PATCH failure", async () => {
      qGet(makeBudget());
      qPatchErr(500, "Internal Server Error");
-
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
+        expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
      });
-
-      const saveBtn = screen.getByTestId("budget-save-btn");
-      fireEvent.click(saveBtn);
-
+      fireEvent.click(screen.getByTestId("budget-save-btn"));
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-save-error")).toBeTruthy();
-        expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
      });
+      expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
    });

-    it("updates input to new limit value after successful save", async () => {
-      qGet(makeBudget({ budget_limit: 10_000 }));
-      qPatch(makeBudget({ budget_limit: 20_000 }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      // Wait for the input to appear (loading → loaded)
-      await vi.waitFor(() => {
-        expect(screen.queryByTestId("budget-loading")).toBeNull();
-      });
-
-      const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-      // Debug: check what values are rendered
-      const limitValue = screen.getByTestId("budget-limit-value")?.textContent;
-      expect(input.value).toBe("10000"); // initial value from API
-      expect(limitValue).toBe("10,000");
-
-      fireEvent.change(input, { target: { value: "20000" } });
-      expect(input.value).toBe("20000");
-
-      fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-      await vi.waitFor(() => {
-        expect((screen.getByTestId("budget-limit-input") as HTMLInputElement).value).toBe("20000");
-      });
-    });
-
-    it("sends null when input is cleared (unlimited)", async () => {
-      qGet(makeBudget({ budget_limit: 10_000 }));
-      qPatch(makeBudget({ budget_limit: null }));
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
-      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
-      });
-
-      const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
-      fireEvent.change(input, { target: { value: "" } });
-      fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-      await vi.waitFor(() => {
-        // After save with null limit, input should show empty (unlimited)
-        expect(input.value).toBe("");
-      });
-    });
-
-    it("shows saving state on button while patch is in flight", async () => {
+    it("surfaces the exceeded banner on a 402 PATCH", async () => {
      qGet(makeBudget());
-      let resolvePatch: (v: unknown) => void;
-      vi.mocked(api.patch).mockImplementationOnce(
-        async () => new Promise((r) => { resolvePatch = r as (v: unknown) => void; }),
-      );
-
+      qPatchErr(402, "Payment Required");
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
+        expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
      });
-
-      fireEvent.change(screen.getByTestId("budget-limit-input"), { target: { value: "50000" } });
      fireEvent.click(screen.getByTestId("budget-save-btn"));
-
-      const btn = screen.getByTestId("budget-save-btn");
-      expect(btn.textContent).toContain("Saving");
-
-      resolvePatch!(makeBudget({ budget_limit: 50_000 }));
-      await vi.waitFor(() => {
-        expect(btn.textContent).toContain("Save");
-      });
-    });
-  });
-
-  describe("isApiError402 — regression coverage", () => {
-    it("classifies ': 402' with space as 402", async () => {
-      qGetErr(402, "Payment Required");
-      qPatch(makeBudget());
-
-      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
        expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
      });
    });
+  });

-    it("classifies non-402 error messages as regular fetch errors", async () => {
-      qGetErr(503, "Service Unavailable");
-
+  describe("legacy payload back-compat", () => {
+    it("maps a pre-multi-period {budget_limit, monthly_spend} response to the monthly row", async () => {
+      qGet({ budget_limit: 5_000, monthly_spend: 1_000, budget_remaining: 4_000 });
      render(<BudgetSection workspaceId={WS_ID} />);
-
      await vi.waitFor(() => {
-        expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
+        expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$50.00");
      });
-      expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
+      expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$10.00");
    });
  });
 });
@@ -0,0 +1,87 @@
+// @vitest-environment jsdom
+//
+// Regression: project_canvas_runtime_dropdown_ssot_fix — a google-adk
+// workspace's Config tab showed the wrong runtime ("LangGraph (default)"
+// / first option) because a hardcoded frontend allowlist
+// (SUPPORTED_RUNTIME_VALUES) dropped google-adk from the /templates-derived
+// options even though the backend served it. A Save from that state would
+// PATCH runtime to the wrong value and break the ADK agent.
+//
+// The fix: the dropdown is SSOT-driven — it trusts GET /templates (which the
+// backend already gates to the manifest maintained set) and hides a runtime
+// only when its row carries `displayable: false`. This pins: a google-adk
+// workspace shows "google-adk" selected, and a displayable:false template is
+// not offered.
+import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
+import { render, screen, cleanup, waitFor } from "@testing-library/react";
+import React from "react";
+
+afterEach(cleanup);
+
+const apiGet = vi.fn();
+const apiPatch = vi.fn();
+const apiPut = vi.fn();
+vi.mock("@/lib/api", () => ({
+  api: {
+    get: (path: string) => apiGet(path),
+    patch: (path: string, body: unknown) => apiPatch(path, body),
+    put: (path: string, body: unknown) => apiPut(path, body),
+    post: vi.fn(),
+    del: vi.fn(),
+  },
+}));
+
+vi.mock("@/store/canvas", () => ({
+  useCanvasStore: Object.assign(
+    (selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
+    { getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
+  ),
+}));
+
+vi.mock("../AgentCardSection", () => ({
+  AgentCardSection: () => <div data-testid="agent-card-stub" />,
+}));
+
+import { ConfigTab } from "../ConfigTab";
+
+function wireApi(templates: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; displayable?: boolean }>) {
+  apiGet.mockImplementation((path: string) => {
+    if (path === "/workspaces/ws-adk") return Promise.resolve({ runtime: "google-adk" });
+    if (path === "/workspaces/ws-adk/model") return Promise.resolve({ model: "vertex:gemini-2.5-pro" });
+    if (path === "/workspaces/ws-adk/files/config.yaml") return Promise.resolve({ content: "name: adk\nruntime: google-adk\n" });
+    if (path === "/templates") return Promise.resolve(templates);
+    return Promise.reject(new Error(`unmocked api.get: ${path}`));
+  });
+}
+
+beforeEach(() => {
+  apiGet.mockReset();
+  apiPatch.mockReset();
+  apiPut.mockReset();
+});
+
+describe("ConfigTab — google-adk runtime (SSOT dropdown)", () => {
+  it("shows google-adk selected in the runtime dropdown (#ssot-fix)", async () => {
+    wireApi([
+      { id: "claude-code", name: "Claude Code", runtime: "claude-code", models: [] },
+      { id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
+    ]);
+    render(<ConfigTab workspaceId="ws-adk" />);
+    const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
+    expect((select as HTMLSelectElement).value).toBe("google-adk");
+    const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
+    expect(opts).toContain("google-adk");
+  });
+
+  it("hides a template flagged displayable:false", async () => {
+    wireApi([
+      { id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
+      { id: "legacy", name: "Legacy", runtime: "legacy", models: [], displayable: false },
+    ]);
+    render(<ConfigTab workspaceId="ws-adk" />);
+    const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
+    const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
+    expect(opts).toContain("google-adk");
+    expect(opts).not.toContain("legacy");
+  });
+});
@@ -297,6 +297,25 @@ describe("DetailsTab — delete workflow", () => {
    expect(mockSelectNode).toHaveBeenCalledWith(null);
  });

+  // internal#734: checking "also erase saved data" adds &erase_data=true so the
+  // server prunes the data volume. Default (unchecked) must NOT send it.
+  it("checking erase-saved-data sends erase_data=true on delete", async () => {
+    mockApi.del.mockResolvedValue(undefined);
+    render(<DetailsTab workspaceId="ws-1" data={data()} />);
+    await flush();
+    fireEvent.click(screen.getByRole("button", { name: /delete workspace/i }));
+    await flush();
+    fireEvent.click(screen.getByRole("checkbox", { name: /erase saved data/i }));
+    const confirmBtn = Array.from(document.querySelectorAll("button")).find(
+      (b) => b.textContent === "Confirm Delete",
+    ) as HTMLButtonElement;
+    fireEvent(confirmBtn, new MouseEvent("click", { bubbles: true }));
+    await flush();
+    expect(mockApi.del).toHaveBeenCalledWith("/workspaces/ws-1?confirm=true&erase_data=true", {
+      headers: { "X-Confirm-Name": "Test Workspace" },
+    });
+  });
+
  it("cancelling delete returns to view mode", async () => {
    mockApi.del.mockResolvedValue(undefined);
    render(<DetailsTab workspaceId="ws-1" data={data()} />);
@@ -5,6 +5,7 @@
 const RUNTIME_NAMES: Record<string, string> = {
  "claude-code": "Claude Code",
  codex: "Codex",
+  "google-adk": "Google ADK",
  hermes: "Hermes",
  openclaw: "OpenClaw",
  kimi: "Kimi",
@@ -368,6 +368,9 @@ export interface WorkspaceCompute {
    width?: number;
    height?: number;
  };
+  // internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
+  // undefined (auto). Controls whether the data volume survives recreate.
+  data_persistence?: string;
 }

 let socket: ReconnectingSocket | null = null;
@@ -1,7 +1,7 @@
 # Molecule AI — Comprehensive Technical Documentation

 > Definitive technical reference for the Molecule AI Agent Team platform.
-> Based on a full non-invasive scan of the [molecule-monorepo](https://git.moleculesai.app/molecule-ai/molecule-monorepo) repository.
+> Based on a full non-invasive scan of the [molecule-core](https://git.moleculesai.app/molecule-ai/molecule-core) repository.

 ---

@@ -1131,11 +1131,11 @@ Molecule AI's workspace abstraction is **runtime-agnostic by design**. A workspa

 ## Links

- **GitHub**: https://git.moleculesai.app/molecule-ai/molecule-monorepo
- **Architecture Docs**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/architecture
- **API Protocol**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/api-protocol
- **Agent Runtime**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/agent-runtime
- **Product Docs**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/product
+- **GitHub**: https://git.moleculesai.app/molecule-ai/molecule-core
+- **Architecture Docs**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/architecture
+- **API Protocol**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/api-protocol
+- **Agent Runtime**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/agent-runtime
+- **Product Docs**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/product

 ---

@@ -82,7 +82,7 @@ DATABASE_URL=postgres://dev:dev@postgres:5432/molecule?sslmode=prefer
 REDIS_URL=redis://redis:6379
 PORT=8080
 SECRETS_ENCRYPTION_KEY=dev-key-change-in-production
-WORKSPACE_DIR=/path/to/molecule-monorepo   # Optional global fallback; prefer per-workspace workspace_dir in org.yaml or API
+WORKSPACE_DIR=/path/to/molecule-core   # Optional global fallback; prefer per-workspace workspace_dir in org.yaml or API
 ```

 ### Canvas (Next.js)
@@ -16,11 +16,9 @@ workspace container running on it) over an [EC2 Instance Connect
 Endpoint](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-connect-setup-ec2-instance-connect-endpoint.html).
 End users see a terminal; no direct public SSH ingress is required.

-Tracking: originally `molecule-core#1528` (resolved 2026-04-22). The
-`molecule-core` repo has since been renamed to `molecule-monorepo` and no
-longer accepts new issues under the old name; future terminal work is
-tracked in `molecule-monorepo` issues (workspace-server scope) and in
-`molecule-controlplane` issues for the EIC / per-tenant SG path.
+Tracking: originally `molecule-core#1528` (resolved 2026-04-22). Future
+terminal work is tracked in `molecule-core` issues (workspace-server scope)
+and in `molecule-controlplane` issues for the EIC / per-tenant SG path.

 ## Where things are

@@ -64,7 +64,7 @@ When opencode connects to the Molecule MCP endpoint, the agent gains access to:
  "tool": "delegate_task",
  "arguments": {
    "target": "research-lead",
-    "task": "Summarise the last 7 days of commits in Molecule-AI/molecule-monorepo"
+    "task": "Summarise the last 7 days of commits in Molecule-AI/molecule-core"
  }
 }
 ```
@@ -1,6 +1,6 @@
 # Internal content policy

-The `Molecule-AI/molecule-monorepo` repo is **public**. Anything internal
+The `Molecule-AI/molecule-core` repo is **public**. Anything internal
 (positioning, competitive briefs, sales playbooks, PMM/press drip, draft
 campaigns, raw research notes, ops runbooks, retrospectives) lives in
 **`Molecule-AI/internal`**.
@@ -18,14 +18,14 @@ This page is the canonical decision tree.
 | Draft campaign asset (still iterating, not yet customer-visible) | `Molecule-AI/internal/marketing/campaigns/` |
 | Roadmap discussion, planning doc, retrospective | `Molecule-AI/internal/PLAN.md` or `Molecule-AI/internal/retrospectives/` |
 | Runbook, ops procedure, incident postmortem | `Molecule-AI/internal/runbooks/` |
-| **Public-ready** blog post (final draft, ready to ship to docs site) | `Molecule-AI/molecule-monorepo/docs/blog/` |
-| **Public-ready** tutorial / quickstart | `Molecule-AI/molecule-monorepo/docs/tutorials/` |
-| Public DevRel content (code samples, demos for users) | `Molecule-AI/molecule-monorepo/docs/devrel/` |
-| API reference, architecture docs for external developers | `Molecule-AI/molecule-monorepo/docs/api/` |
+| **Public-ready** blog post (final draft, ready to ship to docs site) | `Molecule-AI/molecule-core/docs/blog/` |
+| **Public-ready** tutorial / quickstart | `Molecule-AI/molecule-core/docs/tutorials/` |
+| Public DevRel content (code samples, demos for users) | `Molecule-AI/molecule-core/docs/devrel/` |
+| API reference, architecture docs for external developers | `Molecule-AI/molecule-core/docs/api/` |
 | Code, tests, infrastructure | wherever is appropriate inside this repo |

 **Rule of thumb:** *"Would I be comfortable if a competitor / journalist / customer
-read this verbatim today?"* — yes → `monorepo/docs/`. No / not yet → `internal/`.
+read this verbatim today?"* — yes → `molecule-core/docs/`. No / not yet → `internal/`.

 ## Why

@@ -82,7 +82,7 @@ git push -u origin HEAD
 gh pr create --base main --fill
 ```

-Yes, this is more steps than `cd molecule-monorepo && git add research/foo.md`.
+Yes, this is more steps than `cd molecule-core && git add research/foo.md`.
 That cost is intentional: the friction is the point. Public space and
 internal space are different products with different audiences and
 different durability guarantees.
@@ -17,8 +17,8 @@ This path is aligned to the current repository and current UI. It gets you from
 ## The one-command path

 ```bash
-git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
-cd molecule-monorepo
+git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
+cd molecule-core
 ./scripts/dev-start.sh
 ```

@@ -42,8 +42,8 @@ If you'd rather run each component yourself — useful when you're iterating on
 ### Step 1: Clone the repository

 ```bash
-git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
-cd molecule-monorepo
+git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
+cd molecule-core
 ```

 ### Step 2: Start the shared infrastructure
@@ -0,0 +1,124 @@
+# Engineer-Agent Gitea Token Scope Runbook
+
+## Symptom
+
+Engineer-class agents (e.g. `agent-dev-a`, `agent-dev-b`) fail swarm-pull issue discovery or receive HTTP 403 when calling Gitea issue-list APIs, while PR review and repository API operations continue to work.
+
+Typical failing call:
+```bash
+GET /api/v1/repos/molecule-ai/molecule-core/issues?state=open&labels=approved&limit=50
+# => 403 Forbidden
+```
+
+Typical working calls (same token):
+```bash
+GET /api/v1/repos/molecule-ai/molecule-core/pulls?state=open&limit=50
+POST /api/v1/repos/molecule-ai/molecule-core/pulls/1666/comments
+# => 200 OK
+```
+
+## Root Cause
+
+Gitea v1.22.6 routes issue-list under the `Issue` scope category (`routers/api/v1/api.go:1379-1491`), while PR routes live under repository/pull routing (`api.go:1278-1305`). The scope gate derives required read/write level from HTTP method (`api.go:309-313`), so `GET /issues?...` requires `read:issue`.
+
+Engineer-class agent PATs were provisioned with repository and PR scopes but without `read:issue`, causing the asymmetric 403.
+
+## Detection
+
+1. **Agent-side**: swarm-pull workflow logs show `403 Forbidden` on issue enumeration but not on PR list/review.
+2. **Platform-side**: Gitea access logs show `GET /repos/{owner}/{repo}/issues` returning 403 for the affected token.
+3. **Reproduction** (from any workspace with a suspected token):
+   ```bash
+   TOKEN=$(cat /configs/secrets.d/GITEA_TOKEN)
+   PLATFORM="https://git.moleculesai.app"
+
+   # Should succeed — confirms token is live
+   curl -s -o /dev/null -w "%{http_code}" \
+     -H "Authorization: token $TOKEN" \
+     "$PLATFORM/api/v1/user"
+
+   # Will 403 if the token lacks read:issue
+   curl -s -o /dev/null -w "%{http_code}" \
+     -H "Authorization: token $TOKEN" \
+     "$PLATFORM/api/v1/repos/molecule-ai/molecule-core/issues?state=open&limit=1"
+   ```
+
+## Immediate Fix
+
+### Step 1: Issue fresh PATs with correct scopes
+
+From a Gitea site-admin account (or via the Gitea web UI → Settings → Applications):
+
+1. Navigate to the affected user's profile (e.g. `agent-dev-a`).
+2. Go to **Settings → Applications → Generate New Token**.
+3. Select scopes:
+   - `read:repository` (existing)
+   - `write:repository` (existing, if push is required)
+   - `read:issue` (**add this**)
+   - `write:issue` (add only if agents must comment/edit issues)
+   - `read:pull-request` / `write:pull-request` (existing)
+   - `read:comment` / `write:comment` (existing, if PR review is required)
+4. Copy the plaintext token immediately — it is shown only once.
+
+### Step 2: Update workspace secrets
+
+For each affected engineer workspace, update the Gitea token secret:
+
+```bash
+# Via the platform API (admin auth required)
+PLATFORM="https://agents-team.moleculesai.app"
+ADMIN_TOKEN="<your-admin-token>"
+WORKSPACE_ID="<affected-workspace-id>"
+NEW_GITEA_TOKEN="<fresh-token-from-step-1>"
+
+curl -X POST "$PLATFORM/workspaces/$WORKSPACE_ID/secrets" \
+  -H "Authorization: Bearer $ADMIN_TOKEN" \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"GITEA_TOKEN\": \"$NEW_GITEA_TOKEN\"
+  }"
+```
+
+Restart the workspace so the runtime re-reads secrets:
+```bash
+curl -X POST "$PLATFORM/workspaces/$WORKSPACE_ID/restart" \
+  -H "Authorization: Bearer $ADMIN_TOKEN"
+```
+
+### Step 3: Smoke-test
+
+From the restarted workspace, verify all three paths:
+
+```bash
+# 1. Issue list (the previously failing path)
+curl -s -H "Authorization: token $GITEA_TOKEN" \
+  "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/issues?state=open&labels=approved&limit=1" | jq '.[0].number'
+
+# 2. PR list (should still work)
+curl -s -H "Authorization: token $GITEA_TOKEN" \
+  "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/pulls?state=open&limit=1" | jq '.[0].number'
+
+# 3. Swarm-pull discovery (end-to-end)
+# Trigger the agent's autonomous tick or delegate a task that enumerates open issues.
+```
+
+## Long-Term Fix
+
+Update the **workspace secret injection path** that writes `/configs/secrets.d/GITEA_TOKEN` for engineer-class agents. The provisioning template or secret-distribution job should request `read:issue` (and optionally `write:issue`) at token-creation time.
+
+File locations to audit:
+- `.gitea/scripts/` — any token-provisioning automation
+- `infra/terraform/` or equivalent — IAM/secret-manager templates
+- `workspace-configs-templates/` — engineer-class workspace templates that declare required secrets
+
+## Prevention
+
+1. **Token scope checklist**: when provisioning new engineer-class agent tokens, verify the scope set includes `read:issue` before distributing the secret.
+2. **Monitoring**: add an agent health-check that probes `GET /repos/molecule-ai/molecule-core/issues?limit=1` and surfaces a non-fatal warning if it returns 403.
+3. **Documentation**: update the onboarding runbook for new engineer agents to include the full required scope list.
+
+## References
+
+- Gitea issue #1750: [RCA: engineer-token read:issue scope gap blocks swarm-pull workflow](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1750)
+- Gitea source: `routers/api/v1/api.go:309-313` (scope gate), `api.go:1278-1305` (PR routing), `api.go:1379-1491` (issue routing)
+- Related: PR #1542 (provisioner git-creds injection), PR #1669 (auth_token inline mint)
@@ -29,6 +29,7 @@
    {"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
    {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
    {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
+    {"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
    {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
  ],
  "org_templates": [
@@ -93,9 +93,7 @@ def _gitea_get(path: str, params: dict[str, str] | None = None) -> bytes | None:
    try:
        # S310 (信任boundary): this function IS the outbound HTTP client for
        # Gitea API calls. The call is intentional and controlled — we build
-        # the request ourselves and handle errors explicitly. Timeout=20s
-        # prevents indefinite hangs.
-        with urllib.request.urlopen(req, timeout=20) as resp:  # noqa: S310
+        with urllib.request.urlopen(req, timeout=20) as resp:  # noqa: S310  # explicit timeout + error handling; bandit false positive
            return resp.read()
    except urllib.error.HTTPError as e:
        sys.stderr.write(f"Gitea API HTTP {e.code} on {path}: {e.reason}\n")
@@ -1,12 +1,13 @@
 #!/usr/bin/env bash
-# E2E test: A2A round-trip parity across all four runtimes.
+# E2E test: A2A round-trip parity across all five runtimes.
 #
-# Validates that for each of {claude-code, hermes, codex, openclaw}:
+# Validates that for each of {claude-code, hermes, codex, openclaw, google-adk}:
 #   1. A workspace can be provisioned + brought online
 #   2. The adapter responds to A2A message/send
 #   3. The reply contains expected content (echo of the prompt)
 #   4. A SECOND message preserves session state where the runtime
-#      supports it (currently: hermes via plugin path)
+#      supports it (currently: hermes via plugin path; google-adk via
+#      ADK InMemorySessionService keyed on A2A context_id)
 #
 # Targets a SaaS tenant subdomain. Provisions workspaces in the calling
 # tenant, runs the round-trip, deletes them on success.
@@ -16,6 +17,10 @@
 #       (e.g. https://demo-tenant.staging.moleculesai.app)
 #   - $OPENROUTER_API_KEY (or $HERMES_API_KEY) for non-claude runtimes
 #   - $OPENAI_API_KEY for claude-code peer
+#   - $GOOGLE_API_KEY (AI Studio) for google-adk — the org disallows API
+#       keys in PROD (Vertex+ADC there), but CI auths Gemini with an
+#       AI-Studio key (config model google_genai:gemini-2.5-pro). Vertex
+#       stays supported; this is the keyed CI path only.
 #   - SaaS edge requires Origin header — see auto-memory
 #       reference_saas_waf_origin_header.md
 #
@@ -24,12 +29,13 @@
 #       ./scripts/test-all-runtimes-a2a-e2e.sh
 #
 # Skip individual runtimes:
-#   SKIP_HERMES=1 SKIP_OPENCLAW=1 ./scripts/test-all-runtimes-a2a-e2e.sh
+#   SKIP_HERMES=1 SKIP_OPENCLAW=1 SKIP_GOOGLE_ADK=1 ./scripts/test-all-runtimes-a2a-e2e.sh
 set -euo pipefail

 PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}"
 HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}"
 PEER_OPENAI_KEY="${OPENAI_API_KEY:-}"
+GOOGLE_ADK_KEY="${GOOGLE_API_KEY:-}"
 # SaaS auth chain — TENANT_ADMIN_TOKEN + TENANT_ORG_ID required when
 # hitting *.moleculesai.app (per-tenant ADMIN_TOKEN, NOT
 # CP_ADMIN_API_TOKEN). Optional for localhost.
@@ -48,6 +54,10 @@ if [ -z "$HERMES_PROVIDER_KEY" ] && [ -z "${SKIP_HERMES:-}${SKIP_CODEX:-}${SKIP_
  echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for non-claude runtimes"
  exit 2
 fi
+if [ -z "$GOOGLE_ADK_KEY" ] && [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
+  echo "FAIL: set GOOGLE_API_KEY (AI Studio) for google-adk, or SKIP_GOOGLE_ADK=1"
+  exit 2
+fi

 PASS=0
 FAIL=0
@@ -143,7 +153,7 @@ echo "=========================================="
 echo ""

 # -------------------------------------------------------
-# 1. Provision the four runtimes (skip via SKIP_* flags)
+# 1. Provision the five runtimes (skip via SKIP_* flags)
 # -------------------------------------------------------
 echo "--- 1. Provision workspaces ---"
 if [ -z "${SKIP_CLAUDE_CODE:-}" ]; then
@@ -162,6 +172,10 @@ if [ -z "${SKIP_OPENCLAW:-}" ]; then
  WS_IDS[openclaw]=$(provision "ParityOpenClaw" "openclaw" "openclaw peer")
  echo "  openclaw:    ${WS_IDS[openclaw]}"
 fi
+if [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
+  WS_IDS[google-adk]=$(provision "ParityGoogleADK" "google-adk" "google-adk peer")
+  echo "  google-adk:  ${WS_IDS[google-adk]}"
+fi

 # -------------------------------------------------------
 # 2. Set provider keys
@@ -177,6 +191,12 @@ if [ -n "${WS_IDS[claude-code]:-}" ] && [ -n "$PEER_OPENAI_KEY" ]; then
  set_secret "${WS_IDS[claude-code]}" "OPENAI_API_KEY" "$PEER_OPENAI_KEY"
  echo "  claude-code: OPENAI_API_KEY set"
 fi
+if [ -n "${WS_IDS[google-adk]:-}" ] && [ -n "$GOOGLE_ADK_KEY" ]; then
+  # AI-Studio path: the adapter reads GOOGLE_API_KEY natively when the
+  # config model is google_genai:gemini-2.5-pro (see _routing.resolve_model).
+  set_secret "${WS_IDS[google-adk]}" "GOOGLE_API_KEY" "$GOOGLE_ADK_KEY"
+  echo "  google-adk:  GOOGLE_API_KEY set"
+fi

 # -------------------------------------------------------
 # 3. Wait for online
@@ -188,6 +208,9 @@ for runtime in "${!WS_IDS[@]}"; do
  [ -z "$id" ] && continue
  max=60
  [ "$runtime" = "hermes" ] && max=120
+  # google-adk's first cold boot pulls a large fresh ADK image — give it
+  # a hermes-class window so a slow first pull doesn't read as "failed".
+  [ "$runtime" = "google-adk" ] && max=180
  if wait_online "$id" "$runtime" "$max"; then
    check "$runtime online" "ok" "ok"
  else
@@ -200,7 +223,7 @@ done
 # -------------------------------------------------------
 echo ""
 echo "--- 4. A2A round-trip (first message) ---"
-for runtime in claude-code hermes codex openclaw; do
+for runtime in claude-code hermes codex openclaw google-adk; do
  id="${WS_IDS[$runtime]:-}"
  [ -z "$id" ] && continue
  reply=$(a2a_send "$id" "Reply with just the word OK so we know you got this.")
@@ -213,7 +236,7 @@ done
 # -------------------------------------------------------
 echo ""
 echo "--- 5. Session continuity (second message recalls first) ---"
-for runtime in claude-code hermes codex openclaw; do
+for runtime in claude-code hermes codex openclaw google-adk; do
  id="${WS_IDS[$runtime]:-}"
  [ -z "$id" ] && continue
  # Set up: tell the agent a name.
@@ -27,9 +27,9 @@ def smoke_imports_and_invariants() -> None:
    import-rewrite mistakes (the 0.1.16 incident, where main.py loaded but
    main_sync was missing because the build script dropped a re-export).
    """
-    from molecule_runtime.main import main_sync  # noqa: F401
-    from molecule_runtime import a2a_client, a2a_tools  # noqa: F401
-    from molecule_runtime.builtin_tools import memory  # noqa: F401
+    from molecule_runtime.main import main_sync  # noqa: F401  # smoke-test re-export regression (mc#1769)
+    from molecule_runtime import a2a_client, a2a_tools  # noqa: F401  # smoke-test re-export regression (mc#1769)
+    from molecule_runtime.builtin_tools import memory  # noqa: F401  # smoke-test re-export regression (mc#1769)
    from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig

    # cli_main + mcp_cli.main are the molecule-mcp console-script entry
@@ -38,8 +38,8 @@ def smoke_imports_and_invariants() -> None:
    # rewrite here would break every external operator's MCP install on
    # the next wheel publish. Pin both names because pyproject points
    # at mcp_cli.main, which then imports a2a_mcp_server.cli_main.
-    from molecule_runtime.a2a_mcp_server import cli_main  # noqa: F401
-    from molecule_runtime.mcp_cli import main as mcp_cli_main  # noqa: F401
+    from molecule_runtime.a2a_mcp_server import cli_main  # noqa: F401  # smoke-test re-export regression (mc#1769)
+    from molecule_runtime.mcp_cli import main as mcp_cli_main  # noqa: F401  # smoke-test re-export regression (mc#1769)
    assert callable(cli_main), "a2a_mcp_server.cli_main must be callable"
    assert callable(mcp_cli_main), "mcp_cli.main must be callable"

@@ -48,7 +48,7 @@ def smoke_imports_and_invariants() -> None:
    # imports + activates these at startup; if a wheel ships without
    # them, the standalone agent silently loses the wait_for_message /
    # inbox_peek / inbox_pop tools and reverts to outbound-only.
-    from molecule_runtime.inbox import (  # noqa: F401
+    from molecule_runtime.inbox import (  # noqa: F401  # smoke-test re-export regression (mc#1769)
        InboxState,
        activate as inbox_activate,
        get_state as inbox_get_state,
@@ -13,7 +13,7 @@
 #
 # Invocation (from template-hermes repo's CI):
 #
-#     bash /path/to/molecule-monorepo/tools/check-template-parity.sh \
+#     bash /path/to/molecule-core/tools/check-template-parity.sh \
 #          install.sh start.sh
 #
 # Or inline via curl:
@@ -36,6 +36,7 @@ import (
 	"time"

 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/channels"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/codexauth"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
@@ -334,6 +335,20 @@ func main() {
 		pendinguploads.StartSweeper(c, pendinguploads.NewPostgres(db.DB), 0)
 	})

+	// Codex shared-OAuth central refresher — the SINGLE owner of the rotating
+	// refresh_token for the global codex (ChatGPT/Codex subscription) credential
+	// (global_secrets key CODEX_AUTH_JSON). Multiple codex workspaces share ONE
+	// ChatGPT-Pro OAuth token; OpenAI's refresh_token is single-use, so letting
+	// each per-agent app-server refresh on its own 401 burned the seed within
+	// seconds (a refresh storm). This goroutine is structurally single-flight
+	// (one goroutine + a package mutex), refreshes only within a safety margin
+	// of expiry, POSTs the refresh_token at most once per due cycle, and writes
+	// the rotated blob back — workspaces now only GET the current token (see the
+	// codex template's codex_auth_sync.sh). INERT when no CODEX_AUTH_JSON exists.
+	go supervised.RunWithRecover(ctx, "codex-auth-refresher", func(c context.Context) {
+		codexauth.StartCodexAuthRefresher(c, db.DB)
+	})
+
 	// Provision-timeout sweep — flips workspaces that have been stuck in
 	// status='provisioning' past the timeout window to 'failed' and emits
 	// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
@@ -0,0 +1,114 @@
+# Molecule Platform OpenAPI specs
+
+This directory holds the machine-readable API contracts for the Molecule
+platform.
+
+| File | Spec | Scope | Status |
+|------|------|-------|--------|
+| `management.yaml` | OpenAPI **3.1** | The **management surface** across both services (orgs, billing, admin, provisioning, workspaces, secrets, templates, org-tokens, bundles). | **SSOT** — hand-authored. |
+| `swagger.yaml` / `swagger.json` | OpenAPI 2.0 | swaggo-generated stub, `/schedules` only (the per-workspace **runtime** surface). | Legacy stub; superseded for management by `management.yaml`. |
+
+`management.yaml` is the **single source of truth** the management tooling
+derives from — the management MCP server, the management CLI (`molecule-cli`),
+and the human-facing API docs (RFC #1706, the gap closed by
+`PLATFORM-MANAGEMENT-API.md` §5c). Do not hand-edit those clients' route maps;
+change them here and regenerate/derive.
+
+## The two-service split
+
+One structural fact drives the whole spec: there are **two services with two
+auth stacks**, and the management surface spans both.
+
+```
+                         ┌─────────────────────────────────────────┐
+   browser / CLI / MCP   │  Control plane (CP)                      │
+        │                │  molecule-controlplane @ api.moleculesai │
+        │  session       │  /api/v1/* (stable) [+ /cp/* sunset]      │
+        ├───────────────▶│  orgs · members · billing · provisioning │
+        │  admin bearer  │  · fleet/admin ops · pins                 │
+        │  provision sec │                                          │
+        └────────────────┴──────────────┬───────────────────────────┘
+                                         │ edge reverse-proxy
+                                         │ (subdomain / X-Molecule-Org-Slug)
+                                         ▼
+                         ┌─────────────────────────────────────────┐
+   Org API Key / ws tok  │  Tenant workspace-server                 │
+        │                │  molecule-core/workspace-server          │
+        └───────────────▶│  ONE EC2 per org @ <slug>.moleculesai.app│
+                         │  workspaces · secrets · templates ·      │
+                         │  org-tokens · bundles                    │
+                         └─────────────────────────────────────────┘
+```
+
+- **Control plane (CP)** — `api.moleculesai.app`, routes modelled under
+  `/api/v1/*` (the `/cp/*` mirror is identical but sunset-headed per RFC #61 and
+  is not duplicated in the spec). Owns **orgs, members, billing, provisioning,
+  fleet/admin ops**.
+- **Tenant workspace-server** — one EC2 per org at `<slug>.moleculesai.app`.
+  Owns **workspaces, agents, secrets, templates, org-tokens, bundles**. Requests
+  may also be sent to the CP host with an `X-Molecule-Org-Slug` header; the CP
+  edge reverse-proxies them to the tenant host (the `Authorization`,
+  `X-Molecule-Org-*`, and cookie headers pass through unchanged and the tenant's
+  own middleware validates them).
+
+The key consequence, called out in `PLATFORM-MANAGEMENT-API.md`: **the Org API
+Key is a TENANT credential, not a CP one.** It is full tenant-admin over its own
+org's workspace-server surface and reaches **nothing** on the CP (org
+create/delete, billing, members, provisioning all 401/403 it). That is why
+member/billing tools belong in a separate CP-admin MCP, not the org-key-authed
+management MCP.
+
+## Security scheme → surface map (the tier matrix)
+
+`management.yaml` defines these `securitySchemes`; each operation declares the
+one(s) it accepts. Mirror of `PLATFORM-MANAGEMENT-API.md` §1:
+
+| Scheme | What it is | Where it applies |
+|--------|-----------|------------------|
+| `workosSession` | WorkOS AuthKit session cookie `mcp_session` (+ org membership/ownership checks) | CP `/api/v1/orgs/*`, `/api/v1/billing/*`. Also accepted on the tenant surface via the CP-session path. |
+| `cpAdminBearer` | CP `CP_ADMIN_API_TOKEN` operator bearer (AdminGate, constant-time) | CP `/api/v1/admin/*` — admin-create-org, tenant teardown, workspace env, ListOrgWorkspaces, redeploy, pins. |
+| `provisionSecret` | CP `PROVISION_SHARED_SECRET` bearer | CP `/api/v1/workspaces/provision`, `…/status`. Routes unmounted when the secret is unset. |
+| `tenantAdminToken` | Per-tenant admin_token (+ `X-Molecule-Org-Id`) | CP `DELETE /api/v1/workspaces/:id` (deprovision) — **in addition to** `provisionSecret` (issue #118). |
+| `orgApiKey` | Tenant Org API Key — `Authorization: Bearer <key>` + routing header; full tenant-admin, self-minting | **All** tenant routes: `/workspaces[/:id]`, `/workspaces/:id/secrets`, budget, billing-mode, `/settings/secrets`, `/org/import`, `/org/templates`, `/org/tokens`, `/templates`, `/bundles`. |
+| `workspaceToken` | Per-workspace bearer, bound to one workspace id (+ routing header) | Read/lifecycle/secrets on a single `/workspaces/:id/*`. **Rejected** on admin list/create/delete when ADMIN_TOKEN is set — use `orgApiKey`. |
+| `orgRoutingHeaderId` / `orgRoutingHeaderSlug` | `X-Molecule-Org-Id` / `X-Molecule-Org-Slug` | Required on every tenant-host request so the edge / TenantGuard route + authorize against the correct org. Send one of them alongside the bearer. |
+
+### Guards worth knowing (modelled per-operation)
+
+- **Dry-run:** `POST /api/v1/admin/orgs?dry_run=true` — validate + echo, no org
+  created. (The only dry-run on the whole management API.)
+- **Confirm token:** `DELETE /api/v1/admin/tenants/:slug` and
+  `…/scrub-artifacts` — body `confirm` MUST equal the URL slug, else `400`
+  before any teardown.
+- **Force flag:** `POST /api/v1/admin/workspaces/:id/env` — keys matching the
+  secret-keyword guard (`TOKEN`/`SECRET`/`KEY`/`PASSWORD`) require `force=true`.
+- **Runtime-pin gate:** `POST /api/v1/workspaces/provision` returns `422
+  RUNTIME_PIN_MISSING` when no runtime image pin exists.
+- **Auto-restart side-effects:** writing a workspace or global secret
+  auto-restarts the affected workspace(s).
+
+## Security note (carried from the synthesis spec)
+
+The Org API Key is **full tenant-admin and self-minting** — a management MCP
+holding one holds tenant root. There is no scope-down today (TODO in
+`orgtoken`). Per-role / per-workspace scoping should ship alongside the
+management MCP.
+
+## Validate
+
+```bash
+cd workspace-server/docs/openapi
+npx @redocly/cli lint management.yaml   # must be clean (0 errors, 0 warnings)
+```
+
+## Scope notes / best-effort flags
+
+- The per-workspace **runtime** surface (schedules, agent, registry, a2a,
+  memory, approvals, channels, terminal, files) is intentionally **out of
+  scope** here — that's the runtime contract, not management.
+- A handful of bodies are **best-effort** from the handlers (org-import inline
+  template, bundle import, list responses with open shapes) and are marked with
+  `additionalProperties: true` in the schema. Tighten as the handler structs
+  stabilise.
+- `/cp/*` deprecated mirrors are omitted (identical shapes; RFC #61
+  Deprecation/Sunset). Build against `/api/v1/*`.
@@ -0,0 +1,463 @@
+// Package codexauth owns the SINGLE, platform-side refresh of the global
+// codex (ChatGPT/Codex subscription) OAuth credential stored in the
+// global_secrets table under key CODEX_AUTH_JSON.
+//
+// THE PROBLEM IT FIXES (agents-team prod, 2026-05-31)
+//
+// Multiple codex workspaces share ONE ChatGPT-Pro OAuth token (the global
+// secret CODEX_AUTH_JSON). OpenAI's refresh_token is SINGLE-USE: every refresh
+// rotates it and invalidates the prior one. When each per-agent codex
+// app-server refreshed independently on a 401, the siblings' in-flight tokens
+// were invalidated within seconds — a refresh storm that burned the seed and
+// wedged every codex agent.
+//
+// THE FIX (two halves; this is the core half)
+//
+//  1. The per-workspace codex app-server NO LONGER refreshes (the template's
+//     OAuth POST is gated off by default — see the codex template's
+//     codex_auth_sync.sh / CODEX_AUTH_REFRESH_OWNER gate). Workspaces only ever
+//     GET the current token and write it to auth.json.
+//  2. ONE owner refreshes the rotating refresh_token: this background goroutine
+//     in the platform. It is structurally single-flight (one goroutine + a
+//     package mutex), refreshes ONLY when the access_token is within a safety
+//     margin of expiry, POSTs the refresh_token at most ONCE per due cycle, and
+//     writes the rotated blob back to global_secrets. On a permanent failure
+//     (the seed was already burned by an out-of-band login) it logs ONCE and
+//     backs off — it never hot-loops a dead refresh_token.
+//
+// Billing-mode resolution and the byok strip are UNTOUCHED by this package.
+package codexauth
+
+import (
+	"context"
+	"database/sql"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"strings"
+	"sync"
+	"time"
+
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
+)
+
+const (
+	// CodexAuthSecretKey is the global_secrets key holding the shared codex
+	// ChatGPT/Codex subscription OAuth blob (auth.json contents).
+	CodexAuthSecretKey = "CODEX_AUTH_JSON"
+
+	// oauthTokenURL is OpenAI's OAuth token endpoint. The ONLY endpoint this
+	// package ever POSTs to, and only for a due refresh.
+	oauthTokenURL = "https://auth.openai.com/oauth/token"
+
+	// codexOAuthClientID is the public Codex CLI OAuth client id (the same id
+	// the codex CLI sends). Not a secret.
+	codexOAuthClientID = "app_EMoamEEZ73f0CkXaXp7hrann"
+
+	// refreshSafetyMargin is how far ahead of access_token expiry a refresh is
+	// considered DUE. A token expiring within this window is refreshed now; one
+	// expiring later is left untouched (skip-when-fresh). Generous so a slow
+	// tick can never let the shared token lapse for the fleet.
+	refreshSafetyMargin = 15 * time.Minute
+
+	// defaultInterval is how often the loop wakes to check due-ness. The check
+	// is cheap (decrypt + JWT exp parse) and only POSTs when actually due.
+	defaultInterval = 5 * time.Minute
+
+	// permanentFailureBackoff is how long the loop waits after a PERMANENT
+	// refresh failure (invalid_grant / "refresh token already used"). The seed
+	// is burned until a human re-seeds a fresh login; there is nothing to retry,
+	// so we back off hard rather than hammer the dead token.
+	permanentFailureBackoff = 1 * time.Hour
+)
+
+// SecretStore is the minimal global_secrets surface the refresher needs. The
+// production implementation (postgresStore) is backed by *sql.DB; tests inject
+// a fake. It is deliberately tiny — read one key, write one key — so the test
+// double is trivial and the refresher never reaches for the package-global DB.
+type SecretStore interface {
+	// Get returns the decrypted secret value and true, or ("", false) when the
+	// key is absent. A non-nil error is a real read failure (not absence).
+	Get(ctx context.Context, key string) (value string, found bool, err error)
+	// Put encrypts and upserts value under key, bumping the row's updated_at
+	// (the "last_refresh" timestamp). It is the rotated-blob write-back.
+	Put(ctx context.Context, key, value string) error
+}
+
+// httpDoer is the http client seam (real *http.Client in prod, fake transport
+// in tests). Tests NEVER hit the network.
+type httpDoer interface {
+	Do(req *http.Request) (*http.Response, error)
+}
+
+// refresher is the single-owner refresh engine. The package-level mutex makes
+// the refresh structurally single-flight: even if two refreshOnce calls raced
+// (they cannot in prod — one goroutine drives it — but a test or a future
+// caller might), only one POSTs at a time, and the access-token freshness
+// re-check inside the lock means the second sees a freshly-rotated token and
+// skips. One goroutine + this mutex = single-flight by construction.
+type refresher struct {
+	store  SecretStore
+	client httpDoer
+	now    func() time.Time
+
+	// permanentlyFailed records that the current seed's refresh_token was
+	// rejected as already-used/invalid. While set, refreshOnce is INERT (it
+	// will not re-POST the dead token) until the secret value CHANGES (a human
+	// re-seed), detected by comparing the stored blob. This is the anti-storm
+	// latch — it lives on the struct, not globally, so it resets if the seed is
+	// replaced out of band.
+	failedSeed string // the auth-json blob that failed; "" = no known failure
+}
+
+// mu serializes refreshOnce across the process. Package-level so the
+// single-flight guarantee holds regardless of how many refresher values exist
+// (in prod there is exactly one).
+var mu sync.Mutex
+
+// oauthTokens is the token trio inside auth.json (and the OAuth response).
+type oauthTokens struct {
+	AccessToken  string `json:"access_token"`
+	RefreshToken string `json:"refresh_token"`
+	IDToken      string `json:"id_token,omitempty"`
+}
+
+// StartCodexAuthRefresher launches the single background refresher goroutine.
+// It returns immediately; the loop runs until ctx is cancelled. Wire it under
+// supervised.RunWithRecover in main.go like the other Start* sweeps.
+//
+// db may be nil only in tests that drive refreshOnce directly; in prod it is
+// the server's *sql.DB. The loop is INERT (logs once, keeps ticking) whenever
+// CODEX_AUTH_JSON is absent — a deployment with no shared codex seed pays only
+// a cheap periodic read.
+func StartCodexAuthRefresher(ctx context.Context, db *sql.DB) {
+	r := &refresher{
+		store:  &postgresStore{db: db},
+		client: &http.Client{Timeout: 30 * time.Second},
+		now:    time.Now,
+	}
+	r.run(ctx, defaultInterval)
+}
+
+// run is the tick loop. It checks due-ness every interval and on a permanent
+// failure waits permanentFailureBackoff before the next check (never a tight
+// retry of a burned token).
+func (r *refresher) run(ctx context.Context, interval time.Duration) {
+	// Check once promptly on boot, then on the interval.
+	for {
+		wait := interval
+		if perm := r.refreshOnce(ctx); perm {
+			// Permanent failure this cycle — the seed is burned. Back off hard;
+			// a human must re-seed. We keep ticking (a re-seed CHANGES the blob,
+			// which clears the latch) but slowly.
+			wait = permanentFailureBackoff
+		}
+
+		timer := time.NewTimer(wait)
+		select {
+		case <-ctx.Done():
+			timer.Stop()
+			log.Printf("codexauth: context done; stopping refresher")
+			return
+		case <-timer.C:
+		}
+	}
+}
+
+// refreshOnce performs ONE due-check + at most one refresh POST. It returns
+// permanentFailure=true iff the refresh_token was permanently rejected this
+// cycle (the caller backs off). All other outcomes (inert/skip/rotated/transient
+// error) return false.
+//
+// It is single-flight: the package mutex is held for the whole read→decide→
+// POST→write-back so two callers cannot both POST the (single-use) refresh_token.
+func (r *refresher) refreshOnce(ctx context.Context) (permanentFailure bool) {
+	mu.Lock()
+	defer mu.Unlock()
+
+	blob, found, err := r.store.Get(ctx, CodexAuthSecretKey)
+	if err != nil {
+		log.Printf("codexauth: read CODEX_AUTH_JSON failed: %v (skipping this cycle)", err)
+		return false
+	}
+	if !found || strings.TrimSpace(blob) == "" {
+		// INERT: no shared codex seed in this deployment. Cheap no-op.
+		log.Printf("codexauth: no CODEX_AUTH_JSON in global_secrets — refresher inert")
+		// A previously-failed seed that has since been DELETED clears the latch.
+		r.failedSeed = ""
+		return false
+	}
+
+	// Anti-storm latch: if THIS exact blob already failed permanently, do not
+	// re-POST its dead refresh_token. A re-seed changes the blob and clears it.
+	if r.failedSeed != "" && r.failedSeed == blob {
+		return false
+	}
+	if r.failedSeed != "" && r.failedSeed != blob {
+		// The seed changed out of band (human re-login) — give it a fresh chance.
+		r.failedSeed = ""
+	}
+
+	tokens, err := parseTokens(blob)
+	if err != nil {
+		log.Printf("codexauth: CODEX_AUTH_JSON is not parseable codex auth json: %v (skipping)", err)
+		return false
+	}
+	if tokens.RefreshToken == "" {
+		log.Printf("codexauth: CODEX_AUTH_JSON carries no refresh_token (skipping)")
+		return false
+	}
+
+	// Skip-when-fresh: only refresh within the safety margin of expiry. A blob
+	// with an unparseable/absent access_token exp is treated as DUE (better to
+	// refresh a token we cannot date than let the fleet lapse).
+	exp, haveExp := jwtExp(tokens.AccessToken)
+	if haveExp {
+		remaining := exp.Sub(r.now())
+		if remaining > refreshSafetyMargin {
+			// Fresh — nothing to do. No POST.
+			return false
+		}
+	}
+
+	// DUE: POST the refresh_token ONCE.
+	newTokens, perm, err := r.doRefresh(ctx, tokens.RefreshToken)
+	if err != nil {
+		if perm {
+			// Permanent: the seed is burned. Latch it so we don't re-POST, log
+			// ONCE, and DO NOT write anything back.
+			log.Printf("codexauth: PERMANENT refresh failure (refresh_token rejected): %v — "+
+				"NOT writing back; the shared CODEX_AUTH_JSON seed is burned and must be re-seeded "+
+				"via a fresh codex login. Backing off.", err)
+			r.failedSeed = blob
+			return true
+		}
+		// Transient (network/5xx): no write-back, retry next cycle (no backoff).
+		log.Printf("codexauth: transient refresh error: %v (will retry next cycle)", err)
+		return false
+	}
+
+	// Success: merge the rotated trio into the blob (preserving every other
+	// field) and write it back encrypted, bumping updated_at (last_refresh).
+	rotated, err := mergeTokens(blob, newTokens)
+	if err != nil {
+		log.Printf("codexauth: failed to merge rotated tokens into auth json: %v (NOT writing back)", err)
+		return false
+	}
+	if err := r.store.Put(ctx, CodexAuthSecretKey, rotated); err != nil {
+		log.Printf("codexauth: write-back of rotated CODEX_AUTH_JSON failed: %v", err)
+		return false
+	}
+	r.failedSeed = "" // success clears any stale latch
+	log.Printf("codexauth: rotated shared CODEX_AUTH_JSON (single-owner refresh)")
+	return false
+}
+
+// doRefresh POSTs the refresh_token to OpenAI's OAuth endpoint exactly once and
+// returns the rotated trio. permanent=true marks an unrecoverable rejection
+// (HTTP 400 invalid_grant / "refresh token already used") so the caller latches
+// and backs off instead of retrying.
+func (r *refresher) doRefresh(ctx context.Context, refreshToken string) (tokens oauthTokens, permanent bool, err error) {
+	body, _ := json.Marshal(map[string]string{
+		"grant_type":    "refresh_token",
+		"client_id":     codexOAuthClientID,
+		"refresh_token": refreshToken,
+	})
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, oauthTokenURL, strings.NewReader(string(body)))
+	if err != nil {
+		return oauthTokens{}, false, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	req.Header.Set("Accept", "application/json")
+
+	resp, err := r.client.Do(req)
+	if err != nil {
+		return oauthTokens{}, false, err // transient: network
+	}
+	defer resp.Body.Close()
+	respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
+
+	if resp.StatusCode == http.StatusOK {
+		var t oauthTokens
+		if err := json.Unmarshal(respBody, &t); err != nil {
+			return oauthTokens{}, false, fmt.Errorf("decode token response: %w", err)
+		}
+		if t.AccessToken == "" {
+			return oauthTokens{}, false, fmt.Errorf("token response missing access_token")
+		}
+		return t, false, nil
+	}
+
+	// Non-200. A 400 (and any body naming invalid_grant / already-used) is a
+	// PERMANENT rejection of the refresh_token. 401/403 likewise mean the seed
+	// is no good. Everything else (429/5xx/network-shaped) is transient.
+	lowerBody := strings.ToLower(string(respBody))
+	isInvalidGrant := strings.Contains(lowerBody, "invalid_grant") ||
+		strings.Contains(lowerBody, "refresh token already used") ||
+		strings.Contains(lowerBody, "already been used") ||
+		strings.Contains(lowerBody, "token has been revoked")
+	switch {
+	case resp.StatusCode == http.StatusBadRequest && isInvalidGrant:
+		return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
+	case resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden:
+		return oauthTokens{}, true, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
+	default:
+		return oauthTokens{}, false, fmt.Errorf("oauth %d: %s", resp.StatusCode, strings.TrimSpace(string(respBody)))
+	}
+}
+
+// parseTokens extracts the OAuth trio from an auth.json blob, accepting both
+// the nested `{"tokens":{...}}` shape the codex CLI writes and a flat top-level
+// shape some seeds use.
+func parseTokens(blob string) (oauthTokens, error) {
+	var top map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(blob), &top); err != nil {
+		return oauthTokens{}, err
+	}
+	if nested, ok := top["tokens"]; ok {
+		var t oauthTokens
+		if err := json.Unmarshal(nested, &t); err != nil {
+			return oauthTokens{}, fmt.Errorf("decode nested tokens: %w", err)
+		}
+		return t, nil
+	}
+	var t oauthTokens
+	if err := json.Unmarshal([]byte(blob), &t); err != nil {
+		return oauthTokens{}, err
+	}
+	return t, nil
+}
+
+// mergeTokens writes the rotated trio back into the original blob in-place,
+// preserving the blob's shape (nested-vs-flat) and every other field. A field
+// in the OAuth response that is empty (e.g. id_token omitted) does NOT clobber
+// the existing value.
+func mergeTokens(blob string, rotated oauthTokens) (string, error) {
+	var top map[string]json.RawMessage
+	if err := json.Unmarshal([]byte(blob), &top); err != nil {
+		return "", err
+	}
+
+	applyTo := func(m map[string]json.RawMessage) error {
+		setStr := func(key, val string) error {
+			if val == "" {
+				return nil // don't clobber an existing value with an empty one
+			}
+			b, err := json.Marshal(val)
+			if err != nil {
+				return err
+			}
+			m[key] = b
+			return nil
+		}
+		if err := setStr("access_token", rotated.AccessToken); err != nil {
+			return err
+		}
+		if err := setStr("refresh_token", rotated.RefreshToken); err != nil {
+			return err
+		}
+		if err := setStr("id_token", rotated.IDToken); err != nil {
+			return err
+		}
+		return nil
+	}
+
+	if nestedRaw, ok := top["tokens"]; ok {
+		var nested map[string]json.RawMessage
+		if err := json.Unmarshal(nestedRaw, &nested); err != nil {
+			return "", fmt.Errorf("decode nested tokens for merge: %w", err)
+		}
+		if err := applyTo(nested); err != nil {
+			return "", err
+		}
+		nb, err := json.Marshal(nested)
+		if err != nil {
+			return "", err
+		}
+		top["tokens"] = nb
+	} else {
+		if err := applyTo(top); err != nil {
+			return "", err
+		}
+	}
+
+	out, err := json.Marshal(top)
+	if err != nil {
+		return "", err
+	}
+	return string(out), nil
+}
+
+// jwtExp decodes the `exp` claim (Unix seconds) from a JWT access token WITHOUT
+// verifying the signature (we only need the expiry to decide due-ness; the
+// token's validity is OpenAI's to enforce). Returns ok=false when the token is
+// not a parseable 3-part JWT or carries no numeric exp.
+func jwtExp(token string) (time.Time, bool) {
+	parts := strings.Split(token, ".")
+	if len(parts) != 3 {
+		return time.Time{}, false
+	}
+	payload, err := base64.RawURLEncoding.DecodeString(parts[1])
+	if err != nil {
+		// Some encoders pad; tolerate standard base64url with padding too.
+		payload, err = base64.URLEncoding.DecodeString(parts[1])
+		if err != nil {
+			return time.Time{}, false
+		}
+	}
+	var claims struct {
+		Exp json.Number `json:"exp"`
+	}
+	if err := json.Unmarshal(payload, &claims); err != nil {
+		return time.Time{}, false
+	}
+	secs, err := claims.Exp.Int64()
+	if err != nil || secs <= 0 {
+		return time.Time{}, false
+	}
+	return time.Unix(secs, 0), true
+}
+
+// postgresStore is the production SecretStore backed by global_secrets, using
+// the SAME crypto path the secrets handler uses (DecryptVersioned on read,
+// Encrypt + CurrentEncryptionVersion on write).
+type postgresStore struct {
+	db *sql.DB
+}
+
+func (s *postgresStore) Get(ctx context.Context, key string) (string, bool, error) {
+	var enc []byte
+	var ver int
+	err := s.db.QueryRowContext(ctx,
+		`SELECT encrypted_value, encryption_version FROM global_secrets WHERE key = $1`, key).
+		Scan(&enc, &ver)
+	if err == sql.ErrNoRows {
+		return "", false, nil
+	}
+	if err != nil {
+		return "", false, err
+	}
+	plain, err := crypto.DecryptVersioned(enc, ver)
+	if err != nil {
+		return "", false, err
+	}
+	return string(plain), true, nil
+}
+
+func (s *postgresStore) Put(ctx context.Context, key, value string) error {
+	enc, err := crypto.Encrypt([]byte(value))
+	if err != nil {
+		return err
+	}
+	ver := crypto.CurrentEncryptionVersion()
+	_, err = s.db.ExecContext(ctx, `
+		INSERT INTO global_secrets (key, encrypted_value, encryption_version)
+		VALUES ($1, $2, $3)
+		ON CONFLICT (key) DO UPDATE
+			SET encrypted_value = $2, encryption_version = $3, updated_at = now()
+	`, key, enc, ver)
+	return err
+}
@@ -0,0 +1,425 @@
+package codexauth
+
+import (
+	"context"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// --- test doubles -----------------------------------------------------------
+
+// fakeStore is an in-memory SecretStore. nil entry = absent key.
+type fakeStore struct {
+	mu     sync.Mutex
+	values map[string]string
+	getErr error
+	putErr error
+	puts   int32 // count of successful Put calls
+}
+
+func newFakeStore() *fakeStore { return &fakeStore{values: map[string]string{}} }
+
+func (f *fakeStore) Get(_ context.Context, key string) (string, bool, error) {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	if f.getErr != nil {
+		return "", false, f.getErr
+	}
+	v, ok := f.values[key]
+	return v, ok, nil
+}
+
+func (f *fakeStore) Put(_ context.Context, key, value string) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	if f.putErr != nil {
+		return f.putErr
+	}
+	f.values[key] = value
+	atomic.AddInt32(&f.puts, 1)
+	return nil
+}
+
+func (f *fakeStore) get(key string) string {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	return f.values[key]
+}
+
+// fakeTransport records every request and returns a scripted response. It is
+// the network seam — tests NEVER make a real request.
+type fakeTransport struct {
+	mu        sync.Mutex
+	calls     int32
+	urls      []string
+	methods   []string
+	bodies    []string
+	status    int
+	respBody  string
+	transport func(*http.Request) (*http.Response, error) // optional override
+}
+
+func (t *fakeTransport) Do(req *http.Request) (*http.Response, error) {
+	atomic.AddInt32(&t.calls, 1)
+	t.mu.Lock()
+	t.urls = append(t.urls, req.URL.String())
+	t.methods = append(t.methods, req.Method)
+	if req.Body != nil {
+		b, _ := io.ReadAll(req.Body)
+		t.bodies = append(t.bodies, string(b))
+	} else {
+		t.bodies = append(t.bodies, "")
+	}
+	t.mu.Unlock()
+
+	if t.transport != nil {
+		return t.transport(req)
+	}
+	status := t.status
+	if status == 0 {
+		status = http.StatusOK
+	}
+	return &http.Response{
+		StatusCode: status,
+		Body:       io.NopCloser(strings.NewReader(t.respBody)),
+		Header:     make(http.Header),
+	}, nil
+}
+
+func (t *fakeTransport) callCount() int { return int(atomic.LoadInt32(&t.calls)) }
+
+// --- helpers ----------------------------------------------------------------
+
+// makeJWT builds an unsigned-but-parseable JWT whose payload carries exp.
+func makeJWT(exp time.Time) string {
+	header := base64.RawURLEncoding.EncodeToString([]byte(`{"alg":"none","typ":"JWT"}`))
+	payload := base64.RawURLEncoding.EncodeToString([]byte(
+		fmt.Sprintf(`{"exp":%d,"sub":"codex"}`, exp.Unix())))
+	sig := base64.RawURLEncoding.EncodeToString([]byte("sig"))
+	return header + "." + payload + "." + sig
+}
+
+// authBlob builds a nested codex auth.json blob with the given tokens.
+func authBlob(access, refresh string) string {
+	b, _ := json.Marshal(map[string]any{
+		"tokens": map[string]any{
+			"access_token":  access,
+			"refresh_token": refresh,
+			"id_token":      "id-original",
+		},
+		"OPENAI_API_KEY": nil,
+		"last_refresh":   "2026-01-01T00:00:00Z",
+	})
+	return string(b)
+}
+
+func newTestRefresher(store SecretStore, client httpDoer, now time.Time) *refresher {
+	return &refresher{
+		store:  store,
+		client: client,
+		now:    func() time.Time { return now },
+	}
+}
+
+func okRefreshResponse(access, refresh string) string {
+	b, _ := json.Marshal(oauthTokens{AccessToken: access, RefreshToken: refresh, IDToken: "id-new"})
+	return string(b)
+}
+
+// --- tests ------------------------------------------------------------------
+
+// TestJWTExpParse covers the exp decode (valid, malformed, missing).
+func TestJWTExpParse(t *testing.T) {
+	want := time.Now().Add(2 * time.Hour).Truncate(time.Second)
+	got, ok := jwtExp(makeJWT(want))
+	if !ok {
+		t.Fatalf("jwtExp(valid) ok=false, want true")
+	}
+	if !got.Equal(want) {
+		t.Errorf("jwtExp = %v, want %v", got, want)
+	}
+
+	if _, ok := jwtExp("not-a-jwt"); ok {
+		t.Errorf("jwtExp(non-jwt) ok=true, want false")
+	}
+	if _, ok := jwtExp("a.b.c"); ok {
+		t.Errorf("jwtExp(garbage parts) ok=true, want false")
+	}
+	// 3 parts but payload has no exp.
+	noExp := base64.RawURLEncoding.EncodeToString([]byte("{}"))
+	if _, ok := jwtExp("h." + noExp + ".s"); ok {
+		t.Errorf("jwtExp(no exp claim) ok=true, want false")
+	}
+}
+
+// TestRefreshOnce_SkipWhenFresh: a token well outside the safety margin is NOT
+// refreshed — no POST, no write-back.
+func TestRefreshOnce_SkipWhenFresh(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(2*time.Hour)), "rt-1")
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse("new-at", "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("fresh token: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 0 {
+		t.Errorf("fresh token: %d OAuth POSTs, want 0", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Errorf("fresh token: %d write-backs, want 0", store.puts)
+	}
+}
+
+// TestRefreshOnce_RotateThenReskip: a token inside the margin is refreshed once
+// (POST + write-back of the rotated blob); a subsequent call on the now-fresh
+// rotated token skips (no second POST). Proves rotate→write-back→re-skip.
+func TestRefreshOnce_RotateThenReskip(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	// Expires in 5m — inside the 15m safety margin → DUE.
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(5*time.Minute)), "rt-1")
+	// Rotated access token is fresh (2h out); rotated refresh is rt-2.
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("due token: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 1 {
+		t.Fatalf("due token: %d OAuth POSTs, want exactly 1", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Fatalf("due token: %d write-backs, want exactly 1", store.puts)
+	}
+
+	// The written blob must carry the rotated refresh_token and preserve the
+	// non-token field.
+	rotated := store.get(CodexAuthSecretKey)
+	tokens, err := parseTokens(rotated)
+	if err != nil {
+		t.Fatalf("parse rotated blob: %v", err)
+	}
+	if tokens.RefreshToken != "rt-2" {
+		t.Errorf("rotated refresh_token = %q, want rt-2", tokens.RefreshToken)
+	}
+	if !strings.Contains(rotated, "last_refresh") {
+		t.Errorf("rotated blob dropped the preserved last_refresh field: %s", rotated)
+	}
+
+	// Second call: the rotated access token is fresh → skip, no new POST.
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("re-skip: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 1 {
+		t.Errorf("re-skip: %d total OAuth POSTs, want still 1", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Errorf("re-skip: %d total write-backs, want still 1", store.puts)
+	}
+}
+
+// TestRefreshOnce_NoSecretInert: absent CODEX_AUTH_JSON → inert (no POST, no
+// write-back, no error/permanent).
+func TestRefreshOnce_NoSecretInert(t *testing.T) {
+	store := newFakeStore() // empty
+	tr := &fakeTransport{}
+	r := newTestRefresher(store, tr, time.Now())
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("no secret: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 0 {
+		t.Errorf("no secret: %d POSTs, want 0", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Errorf("no secret: %d write-backs, want 0", store.puts)
+	}
+}
+
+// TestRefreshOnce_PermanentFailNoWriteNoStorm: a 400 invalid_grant must (a) not
+// write back, (b) return permanentFailure=true, and (c) NOT re-POST on the next
+// cycle for the same (burned) seed — the anti-storm latch.
+func TestRefreshOnce_PermanentFailNoWriteNoStorm(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-burned")
+	tr := &fakeTransport{
+		status:   http.StatusBadRequest,
+		respBody: `{"error":"invalid_grant","error_description":"refresh token already used"}`,
+	}
+	r := newTestRefresher(store, tr, now)
+
+	perm := r.refreshOnce(context.Background())
+	if !perm {
+		t.Fatalf("invalid_grant: permanentFailure=false, want true")
+	}
+	if tr.callCount() != 1 {
+		t.Fatalf("invalid_grant: %d POSTs, want exactly 1", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Fatalf("invalid_grant: %d write-backs, want 0 (must NOT persist a failed refresh)", store.puts)
+	}
+
+	// Next cycle, SAME burned seed: must NOT re-POST (anti-storm latch).
+	perm2 := r.refreshOnce(context.Background())
+	if tr.callCount() != 1 {
+		t.Errorf("anti-storm: re-POSTed a burned refresh_token (%d total POSTs, want still 1)", tr.callCount())
+	}
+	_ = perm2 // latched cycle returns false (already-known failure, nothing new)
+
+	// A RE-SEED (blob changes) clears the latch and allows a fresh attempt.
+	store.mu.Lock()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-freshly-seeded")
+	store.mu.Unlock()
+	tr.status = http.StatusOK
+	tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-rotated")
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("post-reseed: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 2 {
+		t.Errorf("post-reseed: %d total POSTs, want 2 (latch should clear on re-seed)", tr.callCount())
+	}
+}
+
+// TestRefreshOnce_TransientNoWriteNoLatch: a 5xx is transient — no write-back,
+// returns false (no hard backoff latch), and a later cycle retries.
+func TestRefreshOnce_TransientNoWriteNoLatch(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
+	tr := &fakeTransport{status: http.StatusServiceUnavailable, respBody: "upstream down"}
+	r := newTestRefresher(store, tr, now)
+
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("503: permanentFailure=true, want false (transient)")
+	}
+	if atomic.LoadInt32(&store.puts) != 0 {
+		t.Errorf("503: %d write-backs, want 0", store.puts)
+	}
+	// Retry next cycle succeeds (no latch on transient).
+	tr.status = http.StatusOK
+	tr.respBody = okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Fatalf("retry after 503: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 2 {
+		t.Errorf("transient retry: %d total POSTs, want 2", tr.callCount())
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Errorf("transient retry: %d write-backs, want 1", store.puts)
+	}
+}
+
+// TestRefreshOnce_SingleFlight: concurrent refreshOnce calls on a DUE token must
+// POST exactly once total — the package mutex serializes them and the second
+// sees the freshly-rotated (now-fresh) token and skips. Structural single-flight.
+func TestRefreshOnce_SingleFlight(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-1")
+	// Every successful rotation yields a FRESH (2h) access token, so once one
+	// caller rotates, the other sees fresh and skips.
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	const n = 16
+	var wg sync.WaitGroup
+	wg.Add(n)
+	for i := 0; i < n; i++ {
+		go func() {
+			defer wg.Done()
+			r.refreshOnce(context.Background())
+		}()
+	}
+	wg.Wait()
+
+	if tr.callCount() != 1 {
+		t.Errorf("single-flight: %d OAuth POSTs across %d concurrent calls, want exactly 1", tr.callCount(), n)
+	}
+	if atomic.LoadInt32(&store.puts) != 1 {
+		t.Errorf("single-flight: %d write-backs, want exactly 1", store.puts)
+	}
+}
+
+// TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint: when it DOES refresh, the
+// single POST goes to the OAuth token URL with the refresh_token grant body.
+func TestRefreshOnce_PostsExactlyOnceToOAuthEndpoint(t *testing.T) {
+	now := time.Now()
+	store := newFakeStore()
+	store.values[CodexAuthSecretKey] = authBlob(makeJWT(now.Add(1*time.Minute)), "rt-secret")
+	tr := &fakeTransport{status: http.StatusOK, respBody: okRefreshResponse(makeJWT(now.Add(2*time.Hour)), "rt-2")}
+	r := newTestRefresher(store, tr, now)
+
+	r.refreshOnce(context.Background())
+
+	if tr.callCount() != 1 {
+		t.Fatalf("%d POSTs, want exactly 1", tr.callCount())
+	}
+	if tr.urls[0] != oauthTokenURL {
+		t.Errorf("POST URL = %q, want %q", tr.urls[0], oauthTokenURL)
+	}
+	if tr.methods[0] != http.MethodPost {
+		t.Errorf("method = %q, want POST", tr.methods[0])
+	}
+	var body map[string]string
+	if err := json.Unmarshal([]byte(tr.bodies[0]), &body); err != nil {
+		t.Fatalf("request body not json: %v (%s)", err, tr.bodies[0])
+	}
+	if body["grant_type"] != "refresh_token" {
+		t.Errorf("grant_type = %q, want refresh_token", body["grant_type"])
+	}
+	if body["refresh_token"] != "rt-secret" {
+		t.Errorf("refresh_token = %q, want rt-secret", body["refresh_token"])
+	}
+	if body["client_id"] != codexOAuthClientID {
+		t.Errorf("client_id = %q, want %q", body["client_id"], codexOAuthClientID)
+	}
+}
+
+// TestRefreshOnce_ReadErrorSkips: a store read error is a transient skip (no
+// POST, no permanent latch).
+func TestRefreshOnce_ReadErrorSkips(t *testing.T) {
+	store := newFakeStore()
+	store.getErr = fmt.Errorf("db down")
+	tr := &fakeTransport{}
+	r := newTestRefresher(store, tr, time.Now())
+	if perm := r.refreshOnce(context.Background()); perm {
+		t.Errorf("read error: permanentFailure=true, want false")
+	}
+	if tr.callCount() != 0 {
+		t.Errorf("read error: %d POSTs, want 0", tr.callCount())
+	}
+}
+
+// TestMergeTokens_PreservesOtherFields proves the rotated write-back keeps every
+// non-token field and does not clobber id_token with an empty rotated value.
+func TestMergeTokens_PreservesOtherFields(t *testing.T) {
+	blob := authBlob("old-at", "old-rt")
+	out, err := mergeTokens(blob, oauthTokens{AccessToken: "new-at", RefreshToken: "new-rt"}) // no id_token
+	if err != nil {
+		t.Fatalf("mergeTokens: %v", err)
+	}
+	tokens, err := parseTokens(out)
+	if err != nil {
+		t.Fatalf("parse merged: %v", err)
+	}
+	if tokens.AccessToken != "new-at" || tokens.RefreshToken != "new-rt" {
+		t.Errorf("merged tokens = %+v, want new-at/new-rt", tokens)
+	}
+	if tokens.IDToken != "id-original" {
+		t.Errorf("empty rotated id_token clobbered the original: got %q, want id-original", tokens.IDToken)
+	}
+	if !strings.Contains(out, "last_refresh") {
+		t.Errorf("merge dropped preserved field: %s", out)
+	}
+}
@@ -334,28 +334,39 @@ func (h *WorkspaceHandler) ProxyA2A(c *gin.Context) {
 	c.Data(status, "application/json", respBody)
 }

-// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace
-// has a budget_limit set and monthly_spend has reached or exceeded it.
-// DB errors are logged and treated as fail-open — a budget check failure
-// must not block legitimate A2A traffic.
+// checkWorkspaceBudget returns a proxyA2AError with 402 when the workspace has
+// exceeded ANY of its configured per-period budget limits (hourly/daily/weekly/
+// monthly — see budget_periods.go). Per-period spend is the rolling-window sum
+// over the workspace_spend_events ledger. DB errors are logged and treated as
+// fail-open — a budget check failure must not block legitimate A2A traffic.
 func (h *WorkspaceHandler) checkWorkspaceBudget(ctx context.Context, workspaceID string) *proxyA2AError {
-	var budgetLimit sql.NullInt64
-	var monthlySpend int64
-	err := db.DB.QueryRowContext(ctx,
-		`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
+	var limitsRaw []byte
+	if err := db.DB.QueryRowContext(ctx,
+		`SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1`,
 		workspaceID,
-	).Scan(&budgetLimit, &monthlySpend)
-	if err != nil {
+	).Scan(&limitsRaw); err != nil {
 		if err != sql.ErrNoRows {
 			log.Printf("ProxyA2A: budget check failed for %s: %v", workspaceID, err)
 		}
 		return nil // fail-open
 	}
-	if budgetLimit.Valid && monthlySpend >= budgetLimit.Int64 {
-		log.Printf("ProxyA2A: budget exceeded for %s (spend=%d limit=%d)", workspaceID, monthlySpend, budgetLimit.Int64)
+	limits := parseBudgetLimits(limitsRaw)
+	if len(limits) == 0 {
+		return nil // no limits configured
+	}
+	spend, err := spendByPeriod(ctx, db.DB, workspaceID)
+	if err != nil {
+		log.Printf("ProxyA2A: budget spend query failed for %s: %v", workspaceID, err)
+		return nil // fail-open
+	}
+	if over := exceededPeriods(limits, spend); len(over) > 0 {
+		log.Printf("ProxyA2A: budget exceeded for %s (periods=%v limits=%v spend=%v)", workspaceID, over, limits, spend)
 		return &proxyA2AError{
-			Status:   http.StatusPaymentRequired,
-			Response: gin.H{"error": "workspace budget limit exceeded"},
+			Status: http.StatusPaymentRequired,
+			Response: gin.H{
+				"error":            "workspace budget limit exceeded",
+				"exceeded_periods": over,
+			},
 		}
 	}
 	return nil
@@ -16,9 +16,9 @@ import (
 	"testing"
 	"time"

-	"github.com/DATA-DOG/go-sqlmock"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/gin-gonic/gin"
 )

@@ -2117,6 +2117,10 @@ func (f *fakeCPProv) Stop(_ context.Context, _ string) error {
 	f.stopCalls++
 	return nil
 }
+func (f *fakeCPProv) StopAndPrune(_ context.Context, _ string) error {
+	f.stopCalls++
+	return nil
+}
 func (f *fakeCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
 	return "", nil
 }
@@ -18,8 +18,8 @@ import (
 	"testing"
 	"time"

-	"github.com/DATA-DOG/go-sqlmock"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/alicebob/miniredis/v2"
 )

@@ -209,10 +209,12 @@ func drainSetup(t *testing.T, workspaceID string) (sqlmock.Sqlmock, *WorkspaceHa
 // Named distinctly from handlers_test.go's expectBudgetCheck (which uses MatchPsql
 // escaped-regex and cannot be reused with QueryMatcherEqual tests).
 func expectQueueBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
+	// Multi-period (#49): exact-match the budget_limits read; "{}" → no limits →
+	// checkWorkspaceBudget returns early (no spend query).
 	mock.ExpectQuery(
-		"SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1",
+		"SELECT COALESCE(budget_limits, '{}'::jsonb) FROM workspaces WHERE id = $1",
 	).WithArgs(workspaceID).
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
 }

 // seedRedisURL puts the agent server URL into the Redis cache so resolveAgentURL
@@ -148,6 +148,135 @@ func (h *AdminSchedulesHealthHandler) Health(c *gin.Context) {
 	c.JSON(http.StatusOK, entries)
 }

+// orphanScheduleEntry is one row in the Orphans response.
+type orphanScheduleEntry struct {
+	WorkspaceID     string `json:"workspace_id"`
+	WorkspaceStatus string `json:"workspace_status"` // "removed" | "missing"
+	ScheduleID      string `json:"schedule_id"`
+	ScheduleName    string `json:"schedule_name"`
+	Source          string `json:"source"`
+	Enabled         bool   `json:"enabled"`
+	CronExpr        string `json:"cron_expr"`
+}
+
+// Orphans handles GET /admin/schedules/orphans — the monitor surface for
+// internal#2006. Health (above) reports only LIVE workspaces' schedules, so a
+// schedule left on a removed/recreated workspace silently stops firing and
+// never appears there. This endpoint lists exactly those orphans (workspace
+// removed OR missing) so an operator/monitor can alert. Returns 200 + JSON
+// array (empty when none). Auth via adminAuth() in router.go.
+func (h *AdminSchedulesHealthHandler) Orphans(c *gin.Context) {
+	ctx := c.Request.Context()
+	rows, err := db.DB.QueryContext(ctx, `
+		SELECT s.workspace_id,
+		       CASE WHEN w.id IS NULL THEN 'missing' ELSE 'removed' END AS ws_status,
+		       s.id, s.name, COALESCE(s.source, ''), s.enabled, s.cron_expr
+		FROM workspace_schedules s
+		LEFT JOIN workspaces w ON w.id = s.workspace_id
+		WHERE w.id IS NULL OR w.status = 'removed'
+		ORDER BY s.name ASC
+	`)
+	if err != nil {
+		log.Printf("AdminSchedulesOrphans: query error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to query orphans"})
+		return
+	}
+	defer rows.Close()
+	out := make([]orphanScheduleEntry, 0)
+	for rows.Next() {
+		var e orphanScheduleEntry
+		if err := rows.Scan(&e.WorkspaceID, &e.WorkspaceStatus, &e.ScheduleID, &e.ScheduleName, &e.Source, &e.Enabled, &e.CronExpr); err != nil {
+			log.Printf("AdminSchedulesOrphans: scan error: %v", err)
+			continue
+		}
+		out = append(out, e)
+	}
+	if err := rows.Err(); err != nil {
+		log.Printf("AdminSchedulesOrphans: rows iteration error: %v", err)
+	}
+	c.JSON(http.StatusOK, out)
+}
+
+// ReapOrphans handles POST /admin/schedules/reap-orphans — the orphan cleaner
+// (internal#2006). For every schedule bound to a removed/nonexistent workspace
+// it re-points runtime-created schedules onto the live successor agent (matched
+// by role+parent, falling back to name+parent) when one exists and doesn't
+// already carry a same-named schedule; schedules with no live successor are
+// disabled (enabled=false) so the scheduler stops firing into a dead workspace.
+// Idempotent: re-running with no orphans is a no-op. Returns a summary count.
+// Auth is enforced by the adminAuth() middleware registered in router.go.
+func (h *AdminSchedulesHealthHandler) ReapOrphans(c *gin.Context) {
+	ctx := c.Request.Context()
+
+	// 1. Re-point runtime schedules onto a live successor (same role+parent,
+	//    else same name+parent). Skip names already present on the successor.
+	repointed, err := db.DB.ExecContext(ctx, `
+		WITH orphan AS (
+			SELECT s.id, s.name, s.workspace_id, prev.role AS role, prev.parent_id AS parent_id
+			FROM workspace_schedules s
+			JOIN workspaces prev ON prev.id = s.workspace_id
+			WHERE prev.status = 'removed' AND s.source = 'runtime'
+		),
+		successor AS (
+			SELECT o.id AS schedule_id, o.name AS schedule_name,
+			       (
+			         SELECT w.id FROM workspaces w
+			         WHERE w.status != 'removed'
+			           AND w.parent_id IS NOT DISTINCT FROM o.parent_id
+			           AND ((o.role IS NOT NULL AND w.role = o.role))
+			         ORDER BY w.updated_at DESC NULLS LAST LIMIT 1
+			       ) AS live_id
+			FROM orphan o
+		)
+		UPDATE workspace_schedules s
+		SET workspace_id = su.live_id, updated_at = now()
+		FROM successor su
+		WHERE s.id = su.schedule_id
+		  AND su.live_id IS NOT NULL
+		  AND NOT EXISTS (
+		      SELECT 1 FROM workspace_schedules t
+		      WHERE t.workspace_id = su.live_id AND t.name = su.schedule_name
+		  )
+	`)
+	if err != nil {
+		log.Printf("ReapOrphans: re-point error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
+		return
+	}
+	repointedN, err := repointed.RowsAffected()
+	if err != nil {
+		log.Printf("ReapOrphans: repointed rows affected: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
+		return
+	}
+
+	// 2. Disable any remaining schedules still bound to a removed/missing
+	//    workspace (no live successor, or template schedules on a dead row).
+	disabled, err := db.DB.ExecContext(ctx, `
+		UPDATE workspace_schedules s
+		SET enabled = false, updated_at = now()
+		WHERE s.enabled = true
+		  AND NOT EXISTS (
+		      SELECT 1 FROM workspaces w
+		      WHERE w.id = s.workspace_id AND w.status != 'removed'
+		  )
+	`)
+	if err != nil {
+		log.Printf("ReapOrphans: disable error: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
+		return
+	}
+	disabledN, err := disabled.RowsAffected()
+	if err != nil {
+		log.Printf("ReapOrphans: disabled rows affected: %v", err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
+		return
+	}
+
+	log.Printf("ReapOrphans: re-pointed %d, disabled %d orphaned schedule(s)", repointedN, disabledN)
+	c.JSON(http.StatusOK, gin.H{"repointed": repointedN, "disabled": disabledN})
+}
+
 // classifyScheduleStatus returns the health status string for a schedule.
 //   - "never_run"  — last_run_at is NULL (schedule has never fired)
 //   - "stale"      — now - last_run_at > staleThreshold (and threshold > 0)
@@ -444,3 +444,72 @@ func TestAdminSchedulesHealth_ResponseFields(t *testing.T) {
 		t.Fatalf("unmet expectations: %v", err)
 	}
 }
+
+// ==================== Orphans + ReapOrphans (internal#2006) ====================
+
+// TestAdminSchedulesOrphans verifies the monitor surface lists schedules bound
+// to a removed/missing workspace (the recreate-orphan failure mode).
+func TestAdminSchedulesOrphans(t *testing.T) {
+	mock := setupTestDB(t)
+	handler := NewAdminSchedulesHealthHandler()
+
+	mock.ExpectQuery(`LEFT JOIN workspaces`).
+		WillReturnRows(sqlmock.NewRows([]string{
+			"workspace_id", "ws_status", "id", "name", "source", "enabled", "cron_expr",
+		}).AddRow("dead-ws", "removed", "sched-1", "minimax-autonomous-tick", "runtime", false, "*/5 * * * *"))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest("GET", "/admin/schedules/orphans", nil)
+
+	handler.Orphans(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp []orphanScheduleEntry
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if len(resp) != 1 {
+		t.Fatalf("expected 1 orphan, got %d", len(resp))
+	}
+	if resp[0].ScheduleName != "minimax-autonomous-tick" || resp[0].WorkspaceStatus != "removed" || resp[0].Source != "runtime" {
+		t.Errorf("unexpected orphan entry: %+v", resp[0])
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet expectations: %v", err)
+	}
+}
+
+// TestReapOrphans verifies the cleaner re-points runtime schedules onto a live
+// successor then disables any remaining dead-bound schedules, returning counts.
+func TestReapOrphans(t *testing.T) {
+	mock := setupTestDB(t)
+	handler := NewAdminSchedulesHealthHandler()
+
+	mock.ExpectExec(`UPDATE workspace_schedules s\s+SET workspace_id`).
+		WillReturnResult(sqlmock.NewResult(0, 2))
+	mock.ExpectExec(`UPDATE workspace_schedules s\s+SET enabled = false`).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Request = httptest.NewRequest("POST", "/admin/schedules/reap-orphans", nil)
+
+	handler.ReapOrphans(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]int64
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp["repointed"] != 2 || resp["disabled"] != 1 {
+		t.Errorf("expected repointed=2 disabled=1, got %+v", resp)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet expectations: %v", err)
+	}
+}
@@ -252,6 +252,9 @@ func scanAuditRows(rows *sql.Rows) ([]auditEventRow, error) {
 		}
 		result = append(result, ev)
 	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
 	return result, nil
 }

@@ -1,7 +1,9 @@
 package handlers

 import (
+	"context"
 	"database/sql"
+	"encoding/json"
 	"log"
 	"net/http"

@@ -12,42 +14,79 @@ import (
 // BudgetHandler exposes per-workspace budget read/write endpoints.
 // Routes (all behind WorkspaceAuth middleware):
 //
-//	GET  /workspaces/:id/budget  — current budget_limit, monthly_spend, budget_remaining
-//	PATCH /workspaces/:id/budget — set or clear budget_limit
+//	GET   /workspaces/:id/budget  — per-period limits, spend, remaining
+//	PATCH /workspaces/:id/budget  — set/clear per-period limits
+//
+// Multi-period (#49): the budget is now four independent rolling windows —
+// hourly/daily/weekly/monthly (budget_periods.go is the SSOT for the set). The
+// canonical config is workspaces.budget_limits (JSONB, USD cents per period);
+// per-period spend is the rolling-window sum over workspace_spend_events. The
+// legacy single monthly budget_limit / monthly_spend are still emitted (and
+// budget_limit kept in sync to the monthly period) for back-compat with
+// pre-deploy canvas/agent builds during the rollout window.
 type BudgetHandler struct{}

 func NewBudgetHandler() *BudgetHandler { return &BudgetHandler{} }

-// budgetResponse is the canonical JSON shape for both GET and PATCH responses.
+// periodBudget is the per-period view: configured ceiling (null = no limit),
+// rolling-window spend, and remaining headroom (null when no limit; may go
+// negative so callers see how far over a period is).
+type periodBudget struct {
+	Limit     *int64 `json:"limit"`
+	Spend     int64  `json:"spend"`
+	Remaining *int64 `json:"remaining"`
+}
+
+// budgetResponse is the canonical JSON shape for GET and PATCH.
 type budgetResponse struct {
-	// BudgetLimit is the monthly spend ceiling in USD cents (null = no limit).
-	// budget_limit=500 means $5.00/month.
-	BudgetLimit *int64 `json:"budget_limit"`
-	// MonthlySpend is the agent's self-reported accumulated LLM API spend
-	// for the current month (USD cents). Incremented via heartbeat.
-	MonthlySpend int64 `json:"monthly_spend"`
-	// BudgetRemaining is null when BudgetLimit is null, otherwise
-	// max(0, budget_limit - monthly_spend). Can be negative — we store the
-	// actual value so callers can see how far over-budget a workspace is.
+	// Periods is keyed by BudgetPeriod ("hourly"/"daily"/"weekly"/"monthly").
+	Periods map[string]periodBudget `json:"periods"`
+
+	// --- back-compat (monthly), for pre-multi-period clients ---
+	BudgetLimit     *int64 `json:"budget_limit"`
+	MonthlySpend    int64  `json:"monthly_spend"`
 	BudgetRemaining *int64 `json:"budget_remaining"`
 }

+// buildBudgetResponse assembles the per-period view from the stored limits +
+// the ledger spend. Single place so GET and PATCH return identical shapes.
+func buildBudgetResponse(ctx context.Context, workspaceID string, limitsRaw []byte) (budgetResponse, error) {
+	limits := parseBudgetLimits(limitsRaw)
+	spend, err := spendByPeriod(ctx, db.DB, workspaceID)
+	if err != nil {
+		return budgetResponse{}, err
+	}
+	periods := make(map[string]periodBudget, len(budgetPeriods))
+	for _, def := range budgetPeriods {
+		pb := periodBudget{Spend: spend[def.Name]}
+		if lim, ok := limits[def.Name]; ok {
+			l := lim
+			pb.Limit = &l
+			r := lim - spend[def.Name]
+			pb.Remaining = &r
+		}
+		periods[string(def.Name)] = pb
+	}
+	resp := budgetResponse{Periods: periods, MonthlySpend: spend[PeriodMonthly]}
+	if m := periods[string(PeriodMonthly)]; m.Limit != nil {
+		resp.BudgetLimit = m.Limit
+		resp.BudgetRemaining = m.Remaining
+	}
+	return resp, nil
+}
+
 // GetBudget handles GET /workspaces/:id/budget.
-// Returns the workspace's current budget ceiling, accumulated spend, and
-// computed remaining headroom. Both budget_limit and budget_remaining are
-// null when no limit has been configured for the workspace.
 func (h *BudgetHandler) GetBudget(c *gin.Context) {
 	workspaceID := c.Param("id")
 	ctx := c.Request.Context()

-	var budgetLimit sql.NullInt64
-	var monthlySpend int64
+	var limitsRaw []byte
 	err := db.DB.QueryRowContext(ctx,
-		`SELECT budget_limit, COALESCE(monthly_spend, 0)
+		`SELECT COALESCE(budget_limits, '{}'::jsonb)
 		 FROM workspaces
 		 WHERE id = $1 AND status != 'removed'`,
 		workspaceID,
-	).Scan(&budgetLimit, &monthlySpend)
+	).Scan(&limitsRaw)
 	if err == sql.ErrNoRows {
 		c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
 		return
@@ -58,66 +97,80 @@ func (h *BudgetHandler) GetBudget(c *gin.Context) {
 		return
 	}

-	resp := budgetResponse{
-		MonthlySpend: monthlySpend,
+	resp, err := buildBudgetResponse(ctx, workspaceID, limitsRaw)
+	if err != nil {
+		log.Printf("GetBudget: spend query failed for %s: %v", workspaceID, err)
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "query failed"})
+		return
 	}
-	if budgetLimit.Valid {
-		limit := budgetLimit.Int64
-		resp.BudgetLimit = &limit
-		remaining := limit - monthlySpend
-		resp.BudgetRemaining = &remaining
-	}
-
 	c.JSON(http.StatusOK, resp)
 }

-// PatchBudget handles PATCH /workspaces/:id/budget.
-// Accepts {"budget_limit": <int64>} to set a new ceiling, or
-// {"budget_limit": null} to remove an existing ceiling.
-// Returns the updated budget state in the same shape as GetBudget.
+// PatchBudget handles PATCH /workspaces/:id/budget. Accepts EITHER the
+// multi-period shape
+//
+//	{"budget_limits": {"hourly": 100, "daily": null, "weekly": 500, "monthly": 2000}}
+//
+// (a per-period value of null/absent clears that period; a positive int sets it)
+// OR the legacy single-monthly shape {"budget_limit": 2000} / {"budget_limit": null}.
 func (h *BudgetHandler) PatchBudget(c *gin.Context) {
 	workspaceID := c.Param("id")
 	ctx := c.Request.Context()

-	// We need to distinguish between "field absent" and "field = null",
-	// so we unmarshal into a raw map first.
-	var raw map[string]interface{}
+	var raw map[string]json.RawMessage
 	if err := c.ShouldBindJSON(&raw); err != nil {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
 		return
 	}
-
-	budgetLimitRaw, ok := raw["budget_limit"]
-	if !ok {
-		c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit field is required"})
+	_, hasLimits := raw["budget_limits"]
+	_, hasLegacy := raw["budget_limit"]
+	if !hasLimits && !hasLegacy {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits or budget_limit field is required"})
 		return
 	}

-	// Validate and convert the value. JSON numbers decode as float64.
-	var budgetArg interface{} // nil → SQL NULL, int64 → new ceiling
-	if budgetLimitRaw != nil {
-		switch v := budgetLimitRaw.(type) {
-		case float64:
-			if v < 0 {
-				c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
+	limits := make(map[BudgetPeriod]int64, len(budgetPeriods))
+	known := make(map[string]bool, len(budgetPeriods))
+	for _, def := range budgetPeriods {
+		known[string(def.Name)] = true
+	}
+
+	if hasLimits {
+		var m map[string]*int64
+		if err := json.Unmarshal(raw["budget_limits"], &m); err != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limits must be an object of period→int|null"})
+			return
+		}
+		for k, v := range m {
+			if !known[k] {
+				c.JSON(http.StatusBadRequest, gin.H{"error": "unknown budget period: " + k + " (allowed: hourly, daily, weekly, monthly)"})
 				return
 			}
-			cv := int64(v)
-			budgetArg = cv
-		case int64:
-			if v < 0 {
-				c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
+			if v == nil {
+				continue // clear this period (null = no limit)
+			}
+			if *v < 0 {
+				c.JSON(http.StatusBadRequest, gin.H{"error": "budget limit for " + k + " must be >= 0 (USD cents)"})
 				return
 			}
-			budgetArg = v
-		default:
+			limits[BudgetPeriod(k)] = *v // 0 is valid = block-all for this period
+		}
+	} else { // legacy single-monthly
+		var v *int64
+		if err := json.Unmarshal(raw["budget_limit"], &v); err != nil {
 			c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be an integer (USD cents) or null"})
 			return
 		}
+		if v != nil {
+			if *v < 0 {
+				c.JSON(http.StatusBadRequest, gin.H{"error": "budget_limit must be >= 0 (USD cents)"})
+				return
+			}
+			limits[PeriodMonthly] = *v // 0 is valid = block-all (legacy semantics)
+		}
 	}
-	// budgetArg == nil means "clear the ceiling"

-	// Existence check — return 404 for non-existent / removed workspaces.
+	// Existence check — 404 for non-existent / removed workspaces.
 	var exists bool
 	if err := db.DB.QueryRowContext(ctx,
 		`SELECT EXISTS(SELECT 1 FROM workspaces WHERE id = $1 AND status != 'removed')`,
@@ -127,38 +180,28 @@ func (h *BudgetHandler) PatchBudget(c *gin.Context) {
 		return
 	}

+	// Persist: budget_limits is the SSOT; keep the legacy budget_limit column
+	// synced to the monthly period so pre-deploy enforcement paths stay coherent
+	// during the rollout window.
+	var legacyMonthly interface{}
+	if m, ok := limits[PeriodMonthly]; ok {
+		legacyMonthly = m
+	}
+	encoded := encodeBudgetLimits(limits)
 	if _, err := db.DB.ExecContext(ctx,
-		`UPDATE workspaces SET budget_limit = $2, updated_at = now() WHERE id = $1`,
-		workspaceID, budgetArg,
+		`UPDATE workspaces SET budget_limits = $2, budget_limit = $3, updated_at = now() WHERE id = $1`,
+		workspaceID, encoded, legacyMonthly,
 	); err != nil {
 		log.Printf("PatchBudget: update failed for %s: %v", workspaceID, err)
 		c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
 		return
 	}

-	// Re-read the current state so the response reflects exactly what is in
-	// the DB, including the monthly_spend the agent has already accumulated.
-	var newLimit sql.NullInt64
-	var monthlySpend int64
-	if err := db.DB.QueryRowContext(ctx,
-		`SELECT budget_limit, COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`,
-		workspaceID,
-	).Scan(&newLimit, &monthlySpend); err != nil {
+	resp, err := buildBudgetResponse(ctx, workspaceID, encoded)
+	if err != nil {
 		log.Printf("PatchBudget: re-read failed for %s: %v", workspaceID, err)
-		// Still success — just omit the echo.
 		c.JSON(http.StatusOK, gin.H{"status": "updated"})
 		return
 	}
-
-	resp := budgetResponse{
-		MonthlySpend: monthlySpend,
-	}
-	if newLimit.Valid {
-		limit := newLimit.Int64
-		resp.BudgetLimit = &limit
-		remaining := limit - monthlySpend
-		resp.BudgetRemaining = &remaining
-	}
-
 	c.JSON(http.StatusOK, resp)
 }
@@ -0,0 +1,160 @@
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"strconv"
+	"time"
+)
+
+// budget_periods.go — SINGLE SOURCE OF TRUTH for the multi-period per-workspace
+// LLM budget (#49 follow-up). The supported periods, their rolling windows, the
+// per-period spend computation (from the workspace_spend_events ledger), and the
+// over-budget decision all live here so the config endpoint (GetBudget/PatchBudget),
+// the display, and enforcement (checkWorkspaceBudget) can never drift.
+//
+// Spend model: the heartbeat records each observed spend INCREMENT into
+// workspace_spend_events (recordSpendDelta). Per-period spend is a rolling-window
+// SUM over that ledger — so the SERVER owns windowing (the agent keeps reporting
+// its cumulative figure unchanged). Rolling (not calendar) windows: no fragile
+// month-boundary reset, and "monthly" = a 30-day trailing window.
+
+// BudgetPeriod is one of the supported rolling budget windows.
+type BudgetPeriod string
+
+const (
+	PeriodHourly  BudgetPeriod = "hourly"
+	PeriodDaily   BudgetPeriod = "daily"
+	PeriodWeekly  BudgetPeriod = "weekly"
+	PeriodMonthly BudgetPeriod = "monthly"
+)
+
+// budgetPeriodDef pairs a period with its rolling window.
+type budgetPeriodDef struct {
+	Name   BudgetPeriod
+	Window time.Duration
+}
+
+// budgetPeriods is the canonical ordered list. ADD A PERIOD = one line here;
+// every consumer iterates this slice, so nothing else needs to change.
+var budgetPeriods = []budgetPeriodDef{
+	{PeriodHourly, time.Hour},
+	{PeriodDaily, 24 * time.Hour},
+	{PeriodWeekly, 7 * 24 * time.Hour},
+	{PeriodMonthly, 30 * 24 * time.Hour}, // rolling 30-day window
+}
+
+// spendLedgerRetention bounds the ledger: rows older than the largest window
+// (+ slack) are never read, so the recorder opportunistically prunes them.
+var spendLedgerRetention = 35 * 24 * time.Hour
+
+// parseBudgetLimits decodes the workspaces.budget_limits JSONB into a map of
+// period → limit (USD cents). A limit of ZERO is valid and means "block all
+// spend for that period" (a $0 ceiling); absent / null / negative / unknown
+// keys mean "no limit for that period". Tolerant of a NULL/empty column.
+func parseBudgetLimits(raw []byte) map[BudgetPeriod]int64 {
+	out := make(map[BudgetPeriod]int64, len(budgetPeriods))
+	if len(raw) == 0 {
+		return out
+	}
+	var m map[string]*int64
+	if err := json.Unmarshal(raw, &m); err != nil {
+		return out
+	}
+	for _, def := range budgetPeriods {
+		if v, ok := m[string(def.Name)]; ok && v != nil && *v >= 0 {
+			out[def.Name] = *v
+		}
+	}
+	return out
+}
+
+// encodeBudgetLimits renders a period→limit map back to the canonical JSONB
+// shape, keeping only KNOWN periods with a non-negative limit (0 = block-all is
+// preserved; a period absent from the map = no limit). Always returns valid JSON.
+func encodeBudgetLimits(limits map[BudgetPeriod]int64) []byte {
+	m := make(map[string]int64, len(limits))
+	for _, def := range budgetPeriods {
+		if v, ok := limits[def.Name]; ok && v >= 0 {
+			m[string(def.Name)] = v
+		}
+	}
+	b, err := json.Marshal(m)
+	if err != nil {
+		return []byte("{}")
+	}
+	return b
+}
+
+// recordSpendDelta appends a positive spend increment to the ledger and
+// opportunistically prunes rows past the retention horizon for this workspace.
+// No-op for delta <= 0. Errors are returned for the caller to log (non-fatal).
+func recordSpendDelta(ctx context.Context, q *sql.DB, workspaceID string, deltaCents int64) error {
+	if deltaCents <= 0 {
+		return nil
+	}
+	if _, err := q.ExecContext(ctx,
+		`INSERT INTO workspace_spend_events (workspace_id, delta_cents) VALUES ($1, $2)`,
+		workspaceID, deltaCents,
+	); err != nil {
+		return err
+	}
+	// Opportunistic prune (cheap; index-backed). Best-effort — ignore error.
+	_, _ = q.ExecContext(ctx,
+		`DELETE FROM workspace_spend_events
+		  WHERE workspace_id = $1 AND occurred_at < now() - $2::interval`,
+		workspaceID, pgInterval(spendLedgerRetention),
+	)
+	return nil
+}
+
+// spendByPeriod returns the rolling-window spend (USD cents) for every period,
+// computed in a SINGLE query over the ledger. The outer predicate bounds to the
+// largest window; per-period FILTERs sum each sub-window. A period with no ledger
+// rows reports 0. This is THE spend computation — used by both display + enforcement.
+func spendByPeriod(ctx context.Context, q *sql.DB, workspaceID string) (map[BudgetPeriod]int64, error) {
+	out := make(map[BudgetPeriod]int64, len(budgetPeriods))
+	for _, def := range budgetPeriods {
+		out[def.Name] = 0
+	}
+	row := q.QueryRowContext(ctx, `
+		SELECT
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '1 hour'), 0),
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '24 hours'), 0),
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '7 days'), 0),
+			COALESCE(SUM(delta_cents) FILTER (WHERE occurred_at > now() - interval '30 days'), 0)
+		FROM workspace_spend_events
+		WHERE workspace_id = $1 AND occurred_at > now() - interval '30 days'
+	`, workspaceID)
+	var h, d, w, mo int64
+	if err := row.Scan(&h, &d, &w, &mo); err != nil {
+		return out, err
+	}
+	out[PeriodHourly], out[PeriodDaily], out[PeriodWeekly], out[PeriodMonthly] = h, d, w, mo
+	return out, nil
+}
+
+// exceededPeriods is PURE: given the configured limits and observed spend, it
+// returns the periods whose spend has reached/exceeded their limit (in
+// budgetPeriods order). Only periods WITH a positive limit are considered.
+// Used by enforcement to decide whether to block.
+func exceededPeriods(limits map[BudgetPeriod]int64, spend map[BudgetPeriod]int64) []BudgetPeriod {
+	var over []BudgetPeriod
+	for _, def := range budgetPeriods {
+		limit, ok := limits[def.Name]
+		if !ok {
+			continue // no limit configured for this period
+		}
+		// limit >= 0 is a real ceiling (0 = block-all). spend >= limit → over.
+		if spend[def.Name] >= limit {
+			over = append(over, def.Name)
+		}
+	}
+	return over
+}
+
+// pgInterval renders a Go duration as a Postgres-interval string ("N seconds").
+func pgInterval(d time.Duration) string {
+	return strconv.FormatInt(int64(d.Seconds()), 10) + " seconds"
+}
@@ -0,0 +1,99 @@
+package handlers
+
+import (
+	"reflect"
+	"testing"
+)
+
+// Pure-logic tests for the multi-period budget SSOT (budget_periods.go). The
+// DB-touching helpers (spendByPeriod / recordSpendDelta) are exercised via the
+// handler sqlmock tests; here we pin the parsing + the over-budget decision,
+// which is where the per-period semantics actually live.
+
+func TestParseBudgetLimits(t *testing.T) {
+	cases := []struct {
+		name string
+		raw  string
+		want map[BudgetPeriod]int64
+	}{
+		{"empty", "", map[BudgetPeriod]int64{}},
+		{"empty-object", "{}", map[BudgetPeriod]int64{}},
+		{"all-four", `{"hourly":100,"daily":200,"weekly":300,"monthly":400}`,
+			map[BudgetPeriod]int64{PeriodHourly: 100, PeriodDaily: 200, PeriodWeekly: 300, PeriodMonthly: 400}},
+		{"null-dropped-zero-kept", `{"hourly":null,"daily":0,"weekly":500}`,
+			map[BudgetPeriod]int64{PeriodDaily: 0, PeriodWeekly: 500}}, // 0 = block-all, kept
+		{"negative-dropped", `{"monthly":-5}`, map[BudgetPeriod]int64{}},
+		{"unknown-key-ignored", `{"yearly":999,"daily":10}`, map[BudgetPeriod]int64{PeriodDaily: 10}},
+		{"malformed-json", `{not json`, map[BudgetPeriod]int64{}},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := parseBudgetLimits([]byte(tc.raw))
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Errorf("parseBudgetLimits(%q) = %v, want %v", tc.raw, got, tc.want)
+			}
+		})
+	}
+}
+
+func TestEncodeBudgetLimits_RoundTrip(t *testing.T) {
+	in := map[BudgetPeriod]int64{PeriodHourly: 100, PeriodMonthly: 400}
+	enc := encodeBudgetLimits(in)
+	got := parseBudgetLimits(enc)
+	if !reflect.DeepEqual(got, in) {
+		t.Errorf("round-trip: encode→parse = %v, want %v (enc=%s)", got, in, enc)
+	}
+	// unknown periods dropped; 0 (block-all) kept
+	enc2 := encodeBudgetLimits(map[BudgetPeriod]int64{PeriodDaily: 0, "yearly": 9})
+	if got := parseBudgetLimits(enc2); !reflect.DeepEqual(got, map[BudgetPeriod]int64{PeriodDaily: 0}) {
+		t.Errorf("encode kept 0/dropped unknown: parse(%s) = %v, want {daily:0}", enc2, got)
+	}
+}
+
+func TestExceededPeriods(t *testing.T) {
+	cases := []struct {
+		name   string
+		limits map[BudgetPeriod]int64
+		spend  map[BudgetPeriod]int64
+		want   []BudgetPeriod
+	}{
+		{"no-limits", map[BudgetPeriod]int64{}, map[BudgetPeriod]int64{PeriodHourly: 999}, nil},
+		{"zero-limit-blocks-all", map[BudgetPeriod]int64{PeriodHourly: 0}, map[BudgetPeriod]int64{PeriodHourly: 0}, []BudgetPeriod{PeriodHourly}},
+		{"under-all", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 50}, nil},
+		{"at-limit-is-exceeded", map[BudgetPeriod]int64{PeriodDaily: 100}, map[BudgetPeriod]int64{PeriodDaily: 100}, []BudgetPeriod{PeriodDaily}},
+		{"over-limit", map[BudgetPeriod]int64{PeriodHourly: 10}, map[BudgetPeriod]int64{PeriodHourly: 11}, []BudgetPeriod{PeriodHourly}},
+		{"only-hourly-over", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodMonthly: 1000},
+			map[BudgetPeriod]int64{PeriodHourly: 50, PeriodMonthly: 200}, []BudgetPeriod{PeriodHourly}},
+		{"multiple-over-in-order", map[BudgetPeriod]int64{PeriodHourly: 10, PeriodWeekly: 100},
+			map[BudgetPeriod]int64{PeriodHourly: 99, PeriodWeekly: 100}, []BudgetPeriod{PeriodHourly, PeriodWeekly}},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := exceededPeriods(tc.limits, tc.spend)
+			if !reflect.DeepEqual(got, tc.want) {
+				t.Errorf("exceededPeriods(%v,%v) = %v, want %v", tc.limits, tc.spend, got, tc.want)
+			}
+		})
+	}
+}
+
+// TestBudgetPeriods_AllReachable guards the SSOT list: every declared period has
+// a positive window and a unique name (a typo'd duplicate would silently break
+// per-period accounting).
+func TestBudgetPeriods_Wellformed(t *testing.T) {
+	seen := map[BudgetPeriod]bool{}
+	for _, d := range budgetPeriods {
+		if d.Window <= 0 {
+			t.Errorf("period %s has non-positive window %v", d.Name, d.Window)
+		}
+		if seen[d.Name] {
+			t.Errorf("duplicate period name %s", d.Name)
+		}
+		seen[d.Name] = true
+	}
+	for _, p := range []BudgetPeriod{PeriodHourly, PeriodDaily, PeriodWeekly, PeriodMonthly} {
+		if !seen[p] {
+			t.Errorf("period %s missing from budgetPeriods SSOT list", p)
+		}
+	}
+}
@@ -12,15 +12,25 @@ import (
 	"github.com/gin-gonic/gin"
 )

+// Multi-period budget (#49): GET/PATCH now read workspaces.budget_limits (jsonb)
+// and compute per-period spend from the workspace_spend_events ledger
+// (spendByPeriod — matched here by the "FROM workspace_spend_events" fragment).
+// The legacy budget_limit/monthly_spend response fields are still emitted
+// (monthly period) for rollout back-compat, and the legacy {"budget_limit":N}
+// PATCH shape still works.
+
+// spendRows builds the 4-column row spendByPeriod scans (hourly,daily,weekly,monthly).
+func spendRows(h, d, w, m int64) *sqlmock.Rows {
+	return sqlmock.NewRows([]string{"h", "d", "w", "mo"}).AddRow(h, d, w, m)
+}
+
 // ==================== GET /workspaces/:id/budget ====================

-// TestBudgetGet_NotFound verifies that GET /budget returns 404 for an unknown
-// workspace ID (ErrNoRows from the budget query).
 func TestBudgetGet_NotFound(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-not-there").
 		WillReturnError(sql.ErrNoRows)

@@ -29,8 +39,7 @@ func TestBudgetGet_NotFound(t *testing.T) {
 	c.Params = gin.Params{{Key: "id", Value: "ws-not-there"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-not-there/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusNotFound {
 		t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
@@ -40,12 +49,11 @@ func TestBudgetGet_NotFound(t *testing.T) {
 	}
 }

-// TestBudgetGet_DBError verifies that a non-ErrNoRows DB error returns 500.
 func TestBudgetGet_DBError(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-db-err").
 		WillReturnError(sql.ErrConnDone)

@@ -54,8 +62,7 @@ func TestBudgetGet_DBError(t *testing.T) {
 	c.Params = gin.Params{{Key: "id", Value: "ws-db-err"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-db-err/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusInternalServerError {
 		t.Errorf("expected 500, got %d: %s", w.Code, w.Body.String())
@@ -65,24 +72,23 @@ func TestBudgetGet_DBError(t *testing.T) {
 	}
 }

-// TestBudgetGet_NoLimit verifies that budget_limit and budget_remaining are
-// null when the workspace has no budget ceiling configured.
 func TestBudgetGet_NoLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-free").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(nil, int64(42)))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-free").
+		WillReturnRows(spendRows(0, 0, 0, 42))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-free"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-free/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -105,24 +111,23 @@ func TestBudgetGet_NoLimit(t *testing.T) {
 	}
 }

-// TestBudgetGet_WithLimit verifies that budget_limit, monthly_spend, and
-// budget_remaining are all returned correctly when a ceiling is set.
 func TestBudgetGet_WithLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-capped").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(500), int64(123)))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":500}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-capped").
+		WillReturnRows(spendRows(0, 0, 0, 123))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-capped"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-capped/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -137,7 +142,6 @@ func TestBudgetGet_WithLimit(t *testing.T) {
 	if resp["monthly_spend"] != float64(123) {
 		t.Errorf("expected monthly_spend=123, got %v", resp["monthly_spend"])
 	}
-	// budget_remaining = 500 - 123 = 377
 	if resp["budget_remaining"] != float64(377) {
 		t.Errorf("expected budget_remaining=377, got %v", resp["budget_remaining"])
 	}
@@ -146,24 +150,23 @@ func TestBudgetGet_WithLimit(t *testing.T) {
 	}
 }

-// TestBudgetGet_OverBudget verifies that budget_remaining can be negative
-// when monthly_spend has already exceeded budget_limit.
 func TestBudgetGet_OverBudget(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\)`).
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs("ws-over").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(100), int64(150)))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte(`{"monthly":100}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-over").
+		WillReturnRows(spendRows(0, 0, 0, 150))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: "ws-over"}}
 	c.Request = httptest.NewRequest("GET", "/workspaces/ws-over/budget", nil)

-	h := NewBudgetHandler()
-	h.GetBudget(c)
+	NewBudgetHandler().GetBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -172,7 +175,6 @@ func TestBudgetGet_OverBudget(t *testing.T) {
 	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
 		t.Fatalf("parse response: %v", err)
 	}
-	// budget_remaining = 100 - 150 = -50 (negative, but we store actual value)
 	if resp["budget_remaining"] != float64(-50) {
 		t.Errorf("expected budget_remaining=-50, got %v", resp["budget_remaining"])
 	}
@@ -181,10 +183,59 @@ func TestBudgetGet_OverBudget(t *testing.T) {
 	}
 }

+// TestBudgetGet_MultiPeriod pins the new per-period shape: each period reports
+// its own limit/spend/remaining, and an over-budget sub-period is visible.
+func TestBudgetGet_MultiPeriod(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
+		WithArgs("ws-mp").
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).
+			AddRow([]byte(`{"hourly":100,"daily":1000}`)))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-mp").
+		WillReturnRows(spendRows(120, 300, 300, 300)) // hourly over (120>=100)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-mp"}}
+	c.Request = httptest.NewRequest("GET", "/workspaces/ws-mp/budget", nil)
+
+	NewBudgetHandler().GetBudget(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp struct {
+		Periods map[string]struct {
+			Limit     *int64 `json:"limit"`
+			Spend     int64  `json:"spend"`
+			Remaining *int64 `json:"remaining"`
+		} `json:"periods"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
+		t.Errorf("hourly.limit: want 100, got %v", resp.Periods["hourly"].Limit)
+	}
+	if resp.Periods["hourly"].Spend != 120 {
+		t.Errorf("hourly.spend: want 120, got %d", resp.Periods["hourly"].Spend)
+	}
+	if r := resp.Periods["hourly"].Remaining; r == nil || *r != -20 {
+		t.Errorf("hourly.remaining: want -20, got %v", r)
+	}
+	if resp.Periods["weekly"].Limit != nil {
+		t.Errorf("weekly.limit: want null (unset), got %v", resp.Periods["weekly"].Limit)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
+
 // ==================== PATCH /workspaces/:id/budget ====================

-// TestBudgetPatch_MissingField verifies that PATCH /budget with no budget_limit
-// field in the body returns 400.
 func TestBudgetPatch_MissingField(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -196,15 +247,13 @@ func TestBudgetPatch_MissingField(t *testing.T) {
 		bytes.NewBufferString(`{"other_field":123}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_InvalidBody verifies that a malformed JSON body returns 400.
 func TestBudgetPatch_InvalidBody(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -216,15 +265,13 @@ func TestBudgetPatch_InvalidBody(t *testing.T) {
 		bytes.NewBufferString(`not json`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_NegativeValue verifies that a negative budget_limit is rejected.
 func TestBudgetPatch_NegativeValue(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -236,15 +283,13 @@ func TestBudgetPatch_NegativeValue(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":-1}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400 for negative budget_limit, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_InvalidType verifies that a non-numeric budget_limit returns 400.
 func TestBudgetPatch_InvalidType(t *testing.T) {
 	setupTestDB(t)
 	setupTestRedis(t)
@@ -256,16 +301,32 @@ func TestBudgetPatch_InvalidType(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":"not-a-number"}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusBadRequest {
 		t.Errorf("expected 400 for string budget_limit, got %d: %s", w.Code, w.Body.String())
 	}
 }

-// TestBudgetPatch_WorkspaceNotFound verifies that PATCH /budget returns 404
-// when the workspace doesn't exist.
+// TestBudgetPatch_UnknownPeriod rejects an unsupported period key.
+func TestBudgetPatch_UnknownPeriod(t *testing.T) {
+	setupTestDB(t)
+	setupTestRedis(t)
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-badperiod"}}
+	c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-badperiod/budget",
+		bytes.NewBufferString(`{"budget_limits":{"yearly":100}}`))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	NewBudgetHandler().PatchBudget(c)
+
+	if w.Code != http.StatusBadRequest {
+		t.Errorf("expected 400 for unknown period, got %d: %s", w.Code, w.Body.String())
+	}
+}
+
 func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -281,8 +342,7 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":500}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusNotFound {
 		t.Errorf("expected 404, got %d: %s", w.Code, w.Body.String())
@@ -292,25 +352,20 @@ func TestBudgetPatch_WorkspaceNotFound(t *testing.T) {
 	}
 }

-// TestBudgetPatch_SetLimit verifies that PATCH /budget with a positive value
-// updates the DB and returns the new budget state.
+// TestBudgetPatch_SetLimit (legacy monthly shape) updates + returns new state.
 func TestBudgetPatch_SetLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)

-	// Existence probe
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-set-limit").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	// UPDATE
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-set-limit", int64(500)).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-set-limit", sqlmock.AnyArg(), int64(500)).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	// Re-read for response
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
+	mock.ExpectQuery(`FROM workspace_spend_events`).
 		WithArgs("ws-set-limit").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(500), int64(200)))
+		WillReturnRows(spendRows(0, 0, 0, 200))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -319,8 +374,7 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":500}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -335,7 +389,6 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
 	if resp["monthly_spend"] != float64(200) {
 		t.Errorf("expected monthly_spend=200, got %v", resp["monthly_spend"])
 	}
-	// budget_remaining = 500 - 200 = 300
 	if resp["budget_remaining"] != float64(300) {
 		t.Errorf("expected budget_remaining=300, got %v", resp["budget_remaining"])
 	}
@@ -344,8 +397,59 @@ func TestBudgetPatch_SetLimit(t *testing.T) {
 	}
 }

-// TestBudgetPatch_ClearLimit verifies that PATCH /budget with budget_limit=null
-// clears the ceiling, making budget_limit and budget_remaining null in the response.
+// TestBudgetPatch_SetMultiPeriod sets several periods at once and verifies the
+// per-period response.
+func TestBudgetPatch_SetMultiPeriod(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+
+	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
+		WithArgs("ws-mp-set").
+		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
+	// no monthly in payload → legacy budget_limit column set to NULL
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-mp-set", sqlmock.AnyArg(), nil).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectQuery(`FROM workspace_spend_events`).
+		WithArgs("ws-mp-set").
+		WillReturnRows(spendRows(10, 20, 30, 40))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	c.Params = gin.Params{{Key: "id", Value: "ws-mp-set"}}
+	c.Request = httptest.NewRequest("PATCH", "/workspaces/ws-mp-set/budget",
+		bytes.NewBufferString(`{"budget_limits":{"hourly":100,"daily":200,"monthly":null}}`))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	NewBudgetHandler().PatchBudget(c)
+
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp struct {
+		Periods map[string]struct {
+			Limit *int64 `json:"limit"`
+			Spend int64  `json:"spend"`
+		} `json:"periods"`
+		BudgetLimit *int64 `json:"budget_limit"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp.Periods["hourly"].Limit == nil || *resp.Periods["hourly"].Limit != 100 {
+		t.Errorf("hourly.limit want 100, got %v", resp.Periods["hourly"].Limit)
+	}
+	if resp.Periods["daily"].Limit == nil || *resp.Periods["daily"].Limit != 200 {
+		t.Errorf("daily.limit want 200, got %v", resp.Periods["daily"].Limit)
+	}
+	if resp.BudgetLimit != nil {
+		t.Errorf("monthly cleared → budget_limit should be null, got %v", *resp.BudgetLimit)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations not met: %v", err)
+	}
+}
+
 func TestBudgetPatch_ClearLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -353,15 +457,12 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-clear-limit").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	// UPDATE with NULL
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-clear-limit", nil).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-clear-limit", sqlmock.AnyArg(), nil).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	// Re-read — budget_limit is now NULL
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
+	mock.ExpectQuery(`FROM workspace_spend_events`).
 		WithArgs("ws-clear-limit").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(nil, int64(50)))
+		WillReturnRows(spendRows(0, 0, 0, 50))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -370,8 +471,7 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":null}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusOK {
 		t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
@@ -391,8 +491,6 @@ func TestBudgetPatch_ClearLimit(t *testing.T) {
 	}
 }

-// TestBudgetPatch_UpdateDBError verifies that a DB error during the UPDATE
-// returns 500.
 func TestBudgetPatch_UpdateDBError(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -400,8 +498,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-patch-dberr").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-patch-dberr", int64(500)).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-patch-dberr", sqlmock.AnyArg(), int64(500)).
 		WillReturnError(sql.ErrConnDone)

 	w := httptest.NewRecorder()
@@ -411,8 +509,7 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":500}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusInternalServerError {
 		t.Errorf("expected 500 on UPDATE error, got %d: %s", w.Code, w.Body.String())
@@ -422,8 +519,8 @@ func TestBudgetPatch_UpdateDBError(t *testing.T) {
 	}
 }

-// TestBudgetPatch_ZeroLimit verifies that budget_limit=0 is accepted (it means
-// every A2A call is blocked — useful to pause a workspace's LLM spend entirely).
+// TestBudgetPatch_ZeroLimit verifies budget_limit=0 is accepted + stored (0 =
+// block-all: every period call is blocked — pauses the workspace's spend).
 func TestBudgetPatch_ZeroLimit(t *testing.T) {
 	mock := setupTestDB(t)
 	setupTestRedis(t)
@@ -431,13 +528,12 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
 	mock.ExpectQuery(`SELECT EXISTS.*status != 'removed'`).
 		WithArgs("ws-zero-limit").
 		WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
-	mock.ExpectExec(`UPDATE workspaces SET budget_limit`).
-		WithArgs("ws-zero-limit", int64(0)).
+	mock.ExpectExec(`UPDATE workspaces SET budget_limits`).
+		WithArgs("ws-zero-limit", sqlmock.AnyArg(), int64(0)).
 		WillReturnResult(sqlmock.NewResult(0, 1))
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id`).
+	mock.ExpectQuery(`FROM workspace_spend_events`).
 		WithArgs("ws-zero-limit").
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}).
-			AddRow(int64(0), int64(0)))
+		WillReturnRows(spendRows(0, 0, 0, 0))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -446,11 +542,17 @@ func TestBudgetPatch_ZeroLimit(t *testing.T) {
 		bytes.NewBufferString(`{"budget_limit":0}`))
 	c.Request.Header.Set("Content-Type", "application/json")

-	h := NewBudgetHandler()
-	h.PatchBudget(c)
+	NewBudgetHandler().PatchBudget(c)

 	if w.Code != http.StatusOK {
-		t.Errorf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
+		t.Fatalf("expected 200 for zero budget_limit, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	if resp["budget_limit"] != float64(0) {
+		t.Errorf("expected budget_limit=0 (block-all), got %v", resp["budget_limit"])
 	}
 	if err := mock.ExpectationsWereMet(); err != nil {
 		t.Errorf("sqlmock expectations not met: %v", err)
@@ -294,8 +294,9 @@ func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) {
 	// A URL exists for the target; the guard must deny BEFORE it is used.
 	mr.Set(fmt.Sprintf("ws:%s:url", target), "http://localhost:1")

-	// CanCommunicate: both root-level (parent_id NULL) → its weak "root-level
-	// siblings" rule ALLOWS this. The org guard must catch it afterward.
+	// Post-#1955: CanCommunicate no longer has the root-sibling bypass.
+	// Both root-level (parent_id NULL) but unrelated org roots → hierarchy
+	// check DENIES with 403 BEFORE the org-scope guard or resolveAgentURL.
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id = ").
 		WithArgs(caller).
 		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(caller, nil))
@@ -303,15 +304,6 @@ func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) {
 		WithArgs(target).
 		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow(target, nil))

-	// #1953 org-scope guard: caller resolves to org-a-root, target to org-b-root
-	// → different orgs → 403. (Each org root resolves to itself.)
-	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
-		WithArgs(caller).
-		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(caller))
-	mock.ExpectQuery("WITH RECURSIVE org_chain AS").
-		WithArgs(target).
-		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(target))
-
 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
 	c.Params = gin.Params{{Key: "id", Value: target}}
@@ -329,8 +321,8 @@ func TestProxyA2A_CrossTenant_RoutingDenied(t *testing.T) {
 	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
 		t.Fatalf("body not JSON: %v", err)
 	}
-	if msg, _ := resp["error"].(string); !strings.Contains(msg, "different org") {
-		t.Errorf("expected cross-org denial message, got %v", resp["error"])
+	if msg, _ := resp["error"].(string); !strings.Contains(msg, "cannot communicate") {
+		t.Errorf("expected hierarchy denial message, got %v", resp["error"])
 	}
 	if err := mock.ExpectationsWereMet(); err != nil {
 		t.Errorf("unmet sqlmock expectations: %v", err)
@@ -55,6 +55,7 @@ import (
 const integrationTestDelegationID = "del-159-test-integration"
 const integrationTestSourceID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
 const integrationTestTargetID = "bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"
+const integrationTestParentID = "cccccccc-cccc-cccc-cccc-cccccccccccc"

 // rawHTTPServer starts a TCP listener, serves one HTTP response, and closes.
 // It runs in a background goroutine so the test can proceed immediately after
@@ -43,6 +43,8 @@ func TestWorkspaceCreate_WithParentID(t *testing.T) {
 		WillReturnResult(sqlmock.NewResult(0, 1))
 	mock.ExpectExec("INSERT INTO structure_events").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -79,6 +81,8 @@ func TestWorkspaceCreate_ExplicitClaudeCodeRuntime(t *testing.T) {
 		WillReturnResult(sqlmock.NewResult(0, 1))
 	mock.ExpectExec("INSERT INTO structure_events").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -300,6 +304,8 @@ func TestWorkspaceCreate_MaxConcurrentTasksOverride(t *testing.T) {
 		WillReturnResult(sqlmock.NewResult(0, 1))
 	mock.ExpectExec("INSERT INTO structure_events").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -576,13 +582,14 @@ func TestDiscover_TargetOffline(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

-	// Both root-level, access allowed
+	// Share a parent so communication is allowed under post-#1955 rules
+	sharedParent := "ws-parent"
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-caller").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", sharedParent))
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-off").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-off", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-off", sharedParent))

 	// Name + runtime lookup (discovery now queries both)
 	mock.ExpectQuery("SELECT COALESCE").
@@ -622,13 +629,14 @@ func TestCheckAccess_SiblingsAllowed(t *testing.T) {
 	setupTestRedis(t)
 	handler := NewDiscoveryHandler()

-	// Both root-level siblings → allowed
+	// Share a parent so communication is allowed under post-#1955 rules
+	sharedParent := "ws-parent"
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-a").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", sharedParent))
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-b").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", sharedParent))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -374,14 +374,14 @@ func TestExtended_DiscoverWithCallerID(t *testing.T) {
 	handler := NewDiscoveryHandler()

 	// CanCommunicate needs to look up both workspaces
-	// Caller: root-level (no parent)
+	// Share a parent so communication is allowed under post-#1955 rules
+	sharedParent := "ws-parent"
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-caller").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", nil))
-	// Target: also root-level (no parent) — root-level siblings are allowed
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-caller", sharedParent))
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-target").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-target", sharedParent))

 	// Discover handler looks up workspace name + runtime
 	mock.ExpectQuery("SELECT COALESCE").
@@ -515,13 +515,14 @@ func TestExtended_CheckAccess(t *testing.T) {
 	handler := NewDiscoveryHandler()

 	// CanCommunicate will look up both workspaces
-	// Both root-level — should be allowed
+	// Share a parent so communication is allowed under post-#1955 rules
+	sharedParent := "ws-parent"
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-a").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-a", sharedParent))
 	mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
 		WithArgs("ws-b").
-		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", nil))
+		WillReturnRows(sqlmock.NewRows([]string{"id", "parent_id"}).AddRow("ws-b", sharedParent))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -12,12 +12,12 @@ import (
 	"testing"
 	"time"

-	"github.com/DATA-DOG/go-sqlmock"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/ws"
 	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
+	"github.com/DATA-DOG/go-sqlmock"
 	"github.com/alicebob/miniredis/v2"
 	"github.com/gin-gonic/gin"
 	"github.com/redis/go-redis/v9"
@@ -158,9 +158,11 @@ func allowLoopbackForTest(t *testing.T) {
 // handler in the 2026-04-18 restructure but the tests never caught up,
 // leaving Platform (Go) CI red for weeks.
 func expectBudgetCheck(mock sqlmock.Sqlmock, workspaceID string) {
-	mock.ExpectQuery(`SELECT budget_limit, COALESCE\(monthly_spend, 0\) FROM workspaces WHERE id = \$1`).
+	// Multi-period (#49): checkWorkspaceBudget reads budget_limits jsonb. An
+	// empty map → no limits → returns early (no spend query), enforcement skipped.
+	mock.ExpectQuery(`SELECT COALESCE\(budget_limits`).
 		WithArgs(workspaceID).
-		WillReturnRows(sqlmock.NewRows([]string{"budget_limit", "monthly_spend"}))
+		WillReturnRows(sqlmock.NewRows([]string{"budget_limits"}).AddRow([]byte("{}")))
 }

 // ---------- TestRegisterHandler ----------
@@ -384,6 +386,8 @@ func TestWorkspaceCreate(t *testing.T) {
 	// Expect RecordAndBroadcast INSERT for WORKSPACE_PROVISIONING
 	mock.ExpectExec("INSERT INTO structure_events").
 		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WillReturnResult(sqlmock.NewResult(0, 1))

 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
@@ -420,6 +424,76 @@ func TestWorkspaceCreate(t *testing.T) {
 	}
 }

+// TestWorkspaceCreate_ReturnsAuthToken_201 pins the inline-auth_token
+// behaviour added for #1644. Pre-fix, the 201 response was
+// {id, status, awareness_namespace, workspace_access} — callers had to
+// make a separate POST to /admin/workspaces/:id/tokens (AdminAuth-gated,
+// path-prefix differs in CP-admin deploys) OR fall back to the dev-only
+// GET /admin/workspaces/:id/test-token (deliberately 404s on
+// MOLECULE_ENV=production per feedback_no_dev_only_routes_in_e2e).
+//
+// Post-fix: every Create response includes an `auth_token` field with
+// the freshly-minted plaintext bearer (returned once, never recoverable).
+// This is the SSOT path — production E2E + canvas + org_import all
+// get the bearer they need in the same round trip.
+//
+// Failure path is non-fatal: if the IssueToken DB call fails, the 201
+// still goes out without auth_token + a fallback log line. That branch
+// is exercised by sqlmock returning a non-INSERT-INTO-workspace_auth_tokens
+// path here — the test asserts presence on the happy path.
+func TestWorkspaceCreate_ReturnsAuthToken_201(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)
+	broadcaster := newTestBroadcaster()
+	handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", "/tmp/configs")
+
+	mock.ExpectBegin()
+	mock.ExpectExec("INSERT INTO workspaces").
+		WithArgs(sqlmock.AnyArg(), "Token Holder", nil, 3, "claude-code", (*string)(nil), nil, "none", (*int64)(nil), models.DefaultMaxConcurrentTasks, "push").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectCommit()
+	mock.ExpectExec("INSERT INTO canvas_layouts").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectExec("INSERT INTO structure_events").
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	// The inline mint added in #1644 Part B — wsauth.IssueToken issues
+	// a new bearer via INSERT INTO workspace_auth_tokens (workspace_id,
+	// token_hash, prefix). This is the assertion that the new code path
+	// reaches the DB.
+	mock.ExpectExec("INSERT INTO workspace_auth_tokens").
+		WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	w := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(w)
+	body := `{"name":"Token Holder","model":"anthropic:claude-opus-4-7"}`
+	c.Request = httptest.NewRequest("POST", "/workspaces", bytes.NewBufferString(body))
+	c.Request.Header.Set("Content-Type", "application/json")
+
+	handler.Create(c)
+
+	if w.Code != http.StatusCreated {
+		t.Fatalf("expected 201, got %d: %s", w.Code, w.Body.String())
+	}
+	var resp map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("parse response: %v", err)
+	}
+	tok, ok := resp["auth_token"].(string)
+	if !ok || tok == "" {
+		t.Fatalf("expected non-empty auth_token in 201 response (the #1644 SSOT inline mint), got: %s", w.Body.String())
+	}
+	// Sanity: tokens are base64-RawURL encoded 32-byte payloads (per
+	// wsauth/tokens.go::tokenPayloadBytes), so a meaningful lower bound
+	// is ~40 chars. If this fails, IssueToken's contract drifted.
+	if len(tok) < 40 {
+		t.Errorf("auth_token suspiciously short (%d chars) — wsauth.IssueToken contract drift?", len(tok))
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations — inline mint path may have skipped IssueToken: %v", err)
+	}
+}
+
 func TestBuildProvisionerConfig_WorkspacePathFromPayload(t *testing.T) {
 	setupTestDB(t)
 	broadcaster := newTestBroadcaster()
@@ -377,6 +377,9 @@ func readWorkspaceDeriveInputs(ctx context.Context, workspaceID string) (runtime
 			availableAuthEnv = append(availableAuthEnv, k)
 		}
 	}
+	if err := rows.Err(); err != nil {
+		log.Printf("llm_billing_mode: read secrets rows error for %s: %v (deriving with partial model/auth-env)", workspaceID, err)
+	}
 	return runtime, model, availableAuthEnv
 }

@@ -453,7 +456,10 @@ func SetWorkspaceLLMBillingMode(ctx context.Context, workspaceID, mode string) e
 		if err != nil {
 			return fmt.Errorf("clear workspace llm_billing_mode for %s: %w", workspaceID, err)
 		}
-		n, _ := res.RowsAffected()
+		n, err := res.RowsAffected()
+		if err != nil {
+			return fmt.Errorf("clear workspace llm_billing_mode rows affected %s: %w", workspaceID, err)
+		}
 		if n == 0 {
 			return sql.ErrNoRows
 		}
@@ -470,7 +476,10 @@ func SetWorkspaceLLMBillingMode(ctx context.Context, workspaceID, mode string) e
 	if err != nil {
 		return fmt.Errorf("set workspace llm_billing_mode for %s: %w", workspaceID, err)
 	}
-	n, _ := res.RowsAffected()
+	n, err := res.RowsAffected()
+	if err != nil {
+		return fmt.Errorf("set workspace llm_billing_mode rows affected %s: %w", workspaceID, err)
+	}
 	if n == 0 {
 		return sql.ErrNoRows
 	}
@@ -875,7 +875,9 @@ func (h *OrgHandler) Import(c *gin.Context) {
 				rows.Close()

 				for _, oid := range orphanIDs {
-					descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid)
+					// erase=false: a reconcile is not a user-requested erase —
+					// never prune data volumes on the import-reconcile path (internal#734).
+					descendantIDs, stopErrs, err := h.workspace.CascadeDelete(ctx, oid, false)
 					if err != nil {
 						log.Printf("Org import reconcile: CascadeDelete(%s) failed: %v", oid, err)
 						reconcileErrs = append(reconcileErrs, fmt.Sprintf("delete %s: %v", oid, err))
@@ -548,6 +548,16 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 		})
 	}

+	// internal#2006: migrate runtime-created schedules from a removed
+	// predecessor of the same agent (role+parent) onto this freshly-created
+	// workspace. Reconcile re-derives template-sourced state below, but
+	// schedules a user added at runtime (source='runtime', via the canvas/API)
+	// bind to the ephemeral workspace_id and would otherwise be abandoned on
+	// the removed row when an agent is recreated with a new id. Runs before the
+	// template upsert loop so a same-named template schedule still wins.
+	// Best-effort: never fails the import.
+	h.migrateRuntimeSchedulesFromRemovedPredecessor(ctx, id, role, ws.Name, parentID)
+
 	// Insert schedules if defined. Resolve each schedule's prompt body from
 	// either inline `prompt:` or `prompt_file:` (file ref relative to the
 	// workspace's files_dir). Inline wins; empty prompt after resolution is
@@ -687,6 +697,69 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
 	return h.recurseChildrenForImport(ws, id, absX, absY, defaults, orgBaseDir, results, provisionSem)
 }

+// migrateRuntimeSchedulesFromRemovedPredecessor re-points runtime-created
+// schedules (source='runtime') from the most-recent removed predecessor of the
+// same agent onto newID. Recreating an agent mints a NEW workspace id (the
+// ON CONFLICT in createWorkspaceTree only matches non-removed rows), so a
+// schedule a user added at runtime would otherwise be abandoned on the removed
+// row. Template-sourced schedules are NOT migrated — reconcile re-derives those
+// from the org template (the upsert loop). The predecessor is matched by the
+// stable `role` when present (survives the name auto-suffixing that yields
+// "Agent (2)"), falling back to name+parent. Idempotent (skips names already on
+// newID) and best-effort (logs, never errors the import). See internal#2006.
+func (h *OrgHandler) migrateRuntimeSchedulesFromRemovedPredecessor(ctx context.Context, newID string, role interface{}, name string, parentID *string) {
+	var predID string
+	var err error
+	if role != nil {
+		err = db.DB.QueryRowContext(ctx, `
+			SELECT id FROM workspaces
+			WHERE status = 'removed' AND role = $1
+			  AND parent_id IS NOT DISTINCT FROM $2
+			  AND id <> $3
+			ORDER BY updated_at DESC NULLS LAST
+			LIMIT 1
+		`, role, parentID, newID).Scan(&predID)
+	} else {
+		err = db.DB.QueryRowContext(ctx, `
+			SELECT id FROM workspaces
+			WHERE status = 'removed' AND name = $1
+			  AND parent_id IS NOT DISTINCT FROM $2
+			  AND id <> $3
+			ORDER BY updated_at DESC NULLS LAST
+			LIMIT 1
+		`, name, parentID, newID).Scan(&predID)
+	}
+	if errors.Is(err, sql.ErrNoRows) {
+		return // first-time create — no predecessor to migrate from
+	}
+	if err != nil {
+		log.Printf("Org import: predecessor lookup for %q (new=%s) failed: %v — skipping schedule migration", name, newID, err)
+		return
+	}
+	res, err := db.DB.ExecContext(ctx, `
+		UPDATE workspace_schedules s
+		SET workspace_id = $1, updated_at = now()
+		WHERE s.workspace_id = $2
+		  AND s.source = 'runtime'
+		  AND NOT EXISTS (
+		      SELECT 1 FROM workspace_schedules t
+		      WHERE t.workspace_id = $1 AND t.name = s.name
+		  )
+	`, newID, predID)
+	if err != nil {
+		log.Printf("Org import: schedule migration %s -> %s (%q) failed: %v", predID, newID, name, err)
+		return
+	}
+	n, err := res.RowsAffected()
+	if err != nil {
+		log.Printf("Org import: schedule migration rows affected %s -> %s: %v", predID, newID, err)
+		return
+	}
+	if n > 0 {
+		log.Printf("Org import: migrated %d runtime schedule(s) from removed predecessor %s to new workspace %s (%q)", n, predID, newID, name)
+	}
+}
+
 // lookupExistingChild returns the id of an existing workspace under
 // (parent_id, name) if any, with idempotency-friendly semantics:
 //   - parent_id IS NOT DISTINCT FROM matches NULL too (root workspaces)
@@ -0,0 +1,75 @@
+package handlers
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+
+	sqlmock "github.com/DATA-DOG/go-sqlmock"
+)
+
+// TestMigrateRuntimeSchedulesFromRemovedPredecessor verifies the happy path:
+// a removed predecessor exists for the agent (matched by role), and its
+// runtime-created schedules are re-pointed onto the freshly-created workspace.
+// internal#2006 (recreate-orphans-schedules regression).
+func TestMigrateRuntimeSchedulesFromRemovedPredecessor(t *testing.T) {
+	mock := setupTestDB(t)
+	h := &OrgHandler{}
+
+	// Predecessor lookup (role branch) returns the removed prior workspace.
+	mock.ExpectQuery(`SELECT id FROM workspaces`).
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws"))
+	// Re-point UPDATE migrates 2 runtime schedules.
+	mock.ExpectExec(`UPDATE workspace_schedules`).
+		WillReturnResult(sqlmock.NewResult(0, 2))
+
+	parent := "parent-1"
+	h.migrateRuntimeSchedulesFromRemovedPredecessor(
+		context.Background(), "new-ws", interface{}("code-reviewer"), "Code Reviewer (2)", &parent,
+	)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestMigrateRuntimeSchedules_NoPredecessor verifies the first-time-create path:
+// no removed predecessor → the function returns after the lookup and MUST NOT
+// run the re-point UPDATE (sqlmock errors on an unexpected query if it does).
+func TestMigrateRuntimeSchedules_NoPredecessor(t *testing.T) {
+	mock := setupTestDB(t)
+	h := &OrgHandler{}
+
+	mock.ExpectQuery(`SELECT id FROM workspaces`).
+		WillReturnError(sql.ErrNoRows)
+	// No ExpectExec — an UPDATE here would be an unexpected query → test fails.
+
+	h.migrateRuntimeSchedulesFromRemovedPredecessor(
+		context.Background(), "new-ws", interface{}("researcher"), "Root-Cause Researcher", nil,
+	)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet sqlmock expectations: %v", err)
+	}
+}
+
+// TestMigrateRuntimeSchedules_NameFallback verifies the name-branch lookup is
+// used when the agent has no stable role (role == nil), still followed by the
+// re-point UPDATE.
+func TestMigrateRuntimeSchedules_NameFallback(t *testing.T) {
+	mock := setupTestDB(t)
+	h := &OrgHandler{}
+
+	mock.ExpectQuery(`SELECT id FROM workspaces`).
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("old-removed-ws"))
+	mock.ExpectExec(`UPDATE workspace_schedules`).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+
+	h.migrateRuntimeSchedulesFromRemovedPredecessor(
+		context.Background(), "new-ws", nil, "Some Agent", nil,
+	)
+
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Fatalf("unmet sqlmock expectations: %v", err)
+	}
+}
@@ -141,7 +141,7 @@ func requireCallerOwnsOrg(c *gin.Context) (string, error) {
 	orgID, err := orgtoken.OrgIDByTokenID(c.Request.Context(), db.DB, tokID)
 	if err != nil {
 		// DB error — deny by default rather than risk cross-org access.
-		return "", fmt.Errorf("allowlist: requireCallerOwnsOrg: %v", err)
+		return "", fmt.Errorf("allowlist: requireCallerOwnsOrg: %w", err)
 	}
 	return orgID, nil
 }
@@ -0,0 +1,191 @@
+package handlers
+
+// Sqlmock-backed coverage for org_scope.go (orgRootID + sameOrg).
+// Security-critical path — cross-tenant isolation (#1953).
+
+import (
+	"context"
+	"errors"
+	"testing"
+
+	"github.com/DATA-DOG/go-sqlmock"
+	"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
+)
+
+// ---------- orgRootID ----------
+
+func TestOrgRootID_HappyPath_NonRoot(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	// CTE walks: ws-child → ws-parent → org-root (parent_id IS NULL)
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(wsUUID3))
+
+	root, err := orgRootID(context.Background(), db.DB, wsUUID1)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if root != wsUUID3 {
+		t.Errorf("root=%q, want %q", root, wsUUID3)
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet: %v", err)
+	}
+}
+
+func TestOrgRootID_WorkspaceIsRoot(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	// One-row chain: the workspace itself is the org root.
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(wsUUID1))
+
+	root, err := orgRootID(context.Background(), db.DB, wsUUID1)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if root != wsUUID1 {
+		t.Errorf("root=%q, want %q", root, wsUUID1)
+	}
+}
+
+func TestOrgRootID_NoRows(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}))
+
+	_, err := orgRootID(context.Background(), db.DB, wsUUID1)
+	if !errors.Is(err, errNoOrgRoot) {
+		t.Fatalf("expected errNoOrgRoot, got %v", err)
+	}
+}
+
+func TestOrgRootID_DBError(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnError(errors.New("conn lost"))
+
+	_, err := orgRootID(context.Background(), db.DB, wsUUID1)
+	if err == nil || errors.Is(err, errNoOrgRoot) {
+		t.Fatalf("expected DB error, got %v", err)
+	}
+}
+
+func TestOrgRootID_EmptyRoot(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	// Row present but root is empty string → treated as not-found.
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(""))
+
+	_, err := orgRootID(context.Background(), db.DB, wsUUID1)
+	if !errors.Is(err, errNoOrgRoot) {
+		t.Fatalf("expected errNoOrgRoot for empty root, got %v", err)
+	}
+}
+
+// ---------- sameOrg ----------
+
+func TestSameOrg_SameWorkspace(t *testing.T) {
+	// Fast path: identical IDs are same-org without touching DB.
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	ok, err := sameOrg(context.Background(), db.DB, wsUUID1, wsUUID1)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !ok {
+		t.Error("same workspace must be same-org")
+	}
+	// No DB expectations → proves short-circuit.
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("DB was touched despite short-circuit: %v", err)
+	}
+}
+
+func TestSameOrg_SameOrg(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(wsUUID3))
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID2).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(wsUUID3))
+
+	ok, err := sameOrg(context.Background(), db.DB, wsUUID1, wsUUID2)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if !ok {
+		t.Error("expected same-org")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet: %v", err)
+	}
+}
+
+func TestSameOrg_DifferentOrg(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow(wsUUID3))
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID2).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}).AddRow("org-b"))
+
+	ok, err := sameOrg(context.Background(), db.DB, wsUUID1, wsUUID2)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if ok {
+		t.Error("expected different-org")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet: %v", err)
+	}
+}
+
+func TestSameOrg_OrgRootFails(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnError(errors.New("conn lost"))
+
+	_, err := sameOrg(context.Background(), db.DB, wsUUID1, wsUUID2)
+	if err == nil {
+		t.Fatal("expected error when orgRootID fails")
+	}
+}
+
+func TestSameOrg_OrgRootNotFound(t *testing.T) {
+	mock, cleanup := withMockDB(t)
+	defer cleanup()
+
+	mock.ExpectQuery(`WITH RECURSIVE org_chain`).
+		WithArgs(wsUUID1).
+		WillReturnRows(sqlmock.NewRows([]string{"root_id"}))
+
+	_, err := sameOrg(context.Background(), db.DB, wsUUID1, wsUUID2)
+	if !errors.Is(err, errNoOrgRoot) {
+		t.Fatalf("expected errNoOrgRoot, got %v", err)
+	}
+}
@@ -171,9 +171,11 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
 			log.Printf("Plugin uninstall: skipping invalid skill name %q in %s: %v", skill, pluginName, err)
 			continue
 		}
-		_, _ = h.execAsRoot(ctx, containerName, []string{
+		if _, rmErr := h.execAsRoot(ctx, containerName, []string{
 			"rm", "-rf", "/configs/skills/" + skill,
-		})
+		}); rmErr != nil {
+			log.Printf("Plugin uninstall: failed to remove skill %s from %s: %v", skill, workspaceID, rmErr)
+		}
 	}

 	// 3. Delete the plugin directory itself (as root to handle file ownership).
@@ -417,7 +417,9 @@ func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, conta
 		`awk 'BEGIN{skip=0; blanks=0} /^%s/{skip=1; blanks=0; next} skip==1 && /^[[:space:]]*$/{blanks++; if(blanks>=2){skip=0; print; next} next} /^# Plugin: /{if(skip==1)skip=0} skip==1{next} {print}' /configs/CLAUDE.md > /tmp/claude.new && mv /tmp/claude.new /configs/CLAUDE.md`,
 		regexpEscapeForAwk(marker),
 	)
-	_, _ = h.execAsRoot(ctx, containerName, []string{"bash", "-c", script})
+	if _, awkErr := h.execAsRoot(ctx, containerName, []string{"bash", "-c", script}); awkErr != nil {
+		log.Printf("Plugin uninstall: failed to strip markers from CLAUDE.md for %s in %s: %v", pluginName, workspaceID, awkErr)
+	}
 }

 // regexpEscapeForAwk escapes characters that have special meaning inside an
@@ -538,7 +538,8 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {

 	// Read previous current_task to detect changes (before the UPDATE)
 	var prevTask string
-	if err := db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, '') FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask); err != nil {
+	var prevSpend int64
+	if err := db.DB.QueryRowContext(ctx, `SELECT COALESCE(current_task, ''), COALESCE(monthly_spend, 0) FROM workspaces WHERE id = $1`, payload.WorkspaceID).Scan(&prevTask, &prevSpend); err != nil {
 		log.Printf("registry heartbeat: prev_task query failed for workspace %s: %v", payload.WorkspaceID, err)
 	}

@@ -556,6 +557,25 @@ func (h *RegistryHandler) Heartbeat(c *gin.Context) {
 		payload.MonthlySpend = maxMonthlySpend
 	}

+	// Multi-period budget (#49): record the spend INCREMENT into the
+	// workspace_spend_events ledger so the server can compute rolling per-period
+	// windows (hourly/daily/weekly/monthly) — see budget_periods.go. The agent
+	// still reports a cumulative monthly figure; we derive the delta vs the
+	// last-seen cumulative (prevSpend). A DECREASE means the agent reset its
+	// monthly cumulative (new month) → treat the new value as fresh spend.
+	// Best-effort: a ledger failure must never break the heartbeat.
+	if payload.MonthlySpend > 0 {
+		delta := payload.MonthlySpend - prevSpend
+		if delta < 0 {
+			delta = payload.MonthlySpend
+		}
+		if delta > 0 {
+			if err := recordSpendDelta(ctx, db.DB, payload.WorkspaceID, delta); err != nil {
+				log.Printf("registry heartbeat: spend-ledger insert failed for workspace %s: %v", payload.WorkspaceID, err)
+			}
+		}
+	}
+
 	// Update heartbeat columns. #73 guard: exclude 'removed' rows so a
 	// late heartbeat from a container that's being torn down doesn't
 	// refresh last_heartbeat_at on a tombstoned workspace (which would
@@ -24,6 +24,7 @@ var platformManagedDirectLLMBypassKeys = map[string]struct{}{
 	"ANTHROPIC_AUTH_TOKEN":    {},
 	"ARCEEAI_API_KEY":         {},
 	"CLAUDE_CODE_OAUTH_TOKEN": {},
+	"CODEX_AUTH_JSON":         {},
 	"DASHSCOPE_API_KEY":       {},
 	"DEEPSEEK_API_KEY":        {},
 	"GEMINI_API_KEY":          {},
@@ -67,14 +68,6 @@ func platformManagedLLMModeForWorkspace(c *gin.Context, workspaceID string) bool
 	return strings.EqualFold(res.ResolvedMode, LLMBillingModePlatformManaged)
 }

-// platformManagedLLMMode is the legacy org-level gate retained for any test
-// harness still asserting the env-var-only behavior. Production code paths
-// must call platformManagedLLMModeForWorkspace instead so a workspace-level
-// byok override actually takes effect on the secrets-write path.
-func platformManagedLLMMode() bool {
-	return strings.EqualFold(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")), "platform_managed")
-}
-
 // rejectPlatformManagedDirectLLMBypassForWorkspace is the per-workspace
 // successor to rejectPlatformManagedDirectLLMBypass (internal#691). The
 // strip-list ONLY applies when this specific workspace resolves to
@@ -91,22 +84,6 @@ func rejectPlatformManagedDirectLLMBypassForWorkspace(c *gin.Context, workspaceI
 	return true
 }

-// rejectPlatformManagedDirectLLMBypass is the legacy org-level shim. Retained
-// only for backwards compatibility with any external/test caller still on the
-// old shape; new code MUST use the per-workspace variant above. Production
-// code paths (the secrets.go handlers + workspace.go create-secret path) all
-// switched in internal#691.
-func rejectPlatformManagedDirectLLMBypass(c *gin.Context, key string) bool {
-	if !platformManagedLLMMode() || !isPlatformManagedDirectLLMBypassKey(key) {
-		return false
-	}
-	c.JSON(http.StatusBadRequest, gin.H{
-		"error": "direct Hermes custom provider secrets are blocked for platform-managed LLM workspaces; use MODEL/LLM_PROVIDER or the platform LLM proxy env instead",
-		"key":   key,
-	})
-	return true
-}
-
 type SecretsHandler struct {
 	restartFunc func(workspaceID string) // Optional: auto-restart after secret change
 }
@@ -486,9 +463,15 @@ func (h *SecretsHandler) SetGlobal(c *gin.Context) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
 		return
 	}
-	if rejectPlatformManagedDirectLLMBypass(c, body.Key) {
-		return
-	}
+	// internal#718: the org-level LLM billing rung was retired — billing is
+	// resolved per-workspace, not per-org. A global secret is the tenant's OWN
+	// shared credential; the provision-time provider-matched strip
+	// (workspace_provision) removes any global cred a given workspace's resolved
+	// provider does not accept, so a platform-managed workspace can never USE a
+	// non-matching global vendor/oauth key. The legacy org-env SetGlobal gate
+	// (keyed off the retired MOLECULE_LLM_BILLING_MODE) is therefore removed;
+	// per-workspace writes still enforce the strip-list via
+	// rejectPlatformManagedDirectLLMBypassForWorkspace.

 	encrypted, err := crypto.Encrypt([]byte(body.Value))
 	if err != nil {
@@ -979,47 +979,59 @@ func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
 	}
 }

-// TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant is the
-// molecule-core#1994 co-mingling GUARD regression. Removing the byok strip is
-// only safe if the platform's own credential is never written into a tenant's
-// global_secrets. SetGlobal is the in-code write boundary: on a tenant whose
-// resolved LLM mode is platform_managed (the metered default), a direct
-// vendor / oauth bypass-list key MUST be rejected (400) and NOT persisted —
-// the tenant is supposed to route through the CP proxy, not carry a direct
-// platform-shaped credential at global scope. This is what keeps a
-// platform-origin token out of global_secrets going forward.
+// TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv pins the
+// internal#718 correction: the org-level LLM billing rung is RETIRED (billing
+// is resolved per-workspace, not per-org). A global secret is the tenant's OWN
+// shared credential and is always writable at global scope; the provision-time
+// provider-matched strip (workspace_provision) keeps any platform-managed
+// workspace from USING a non-matching global cred, and per-workspace secret
+// writes still enforce the strip-list via the per-workspace guard. So even with
+// the legacy MOLECULE_LLM_BILLING_MODE env still set to platform_managed, a
+// global vendor/oauth key write MUST SUCCEED (200) and persist — the retired
+// org rung no longer gates it.
 //
-// (On a byok/disabled tenant the same write is ALLOWED — that key is the
-// tenant's OWN credential, which the corrected model expects at global scope.
-// TestSetGlobal_AutoRestartsAffectedWorkspaces covers that allowed path.)
-//
-// Mutation: drop the rejectPlatformManagedDirectLLMBypass guard from SetGlobal
-// → the write reaches the INSERT (no 400) → this test RED.
-func TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant(t *testing.T) {
-	setupTestDB(t)
-	handler := NewSecretsHandler(nil)
+// Mutation: re-add the org-level rejectPlatformManagedDirectLLMBypass guard to
+// SetGlobal → the write 400s before the INSERT → this test RED.
+func TestSetGlobal_AllowsTenantOwnedVendorKeyDespiteLegacyOrgEnv(t *testing.T) {
+	mock := setupTestDB(t)
+	setupTestRedis(t)

-	// Org/tenant default is platform_managed — the metered path. A direct
-	// vendor key write into global_secrets must be refused here.
+	restarted := make(chan string, 2)
+	handler := NewSecretsHandler(func(id string) { restarted <- id })
+
+	// Legacy org env still platform_managed — it must no longer gate the write.
 	t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)

+	mock.ExpectExec("INSERT INTO global_secrets").
+		WithArgs("CLAUDE_CODE_OAUTH_TOKEN", sqlmock.AnyArg(), sqlmock.AnyArg()).
+		WillReturnResult(sqlmock.NewResult(0, 1))
+	mock.ExpectQuery("SELECT id FROM workspaces").
+		WithArgs("CLAUDE_CODE_OAUTH_TOKEN").
+		WillReturnRows(sqlmock.NewRows([]string{"id"}).AddRow("ws-a"))
+
 	w := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(w)
-	body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-platform-shaped"}`
+	body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-tenant-own"}`
 	c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body))
 	c.Request.Header.Set("Content-Type", "application/json")

 	handler.SetGlobal(c)

-	if w.Code != http.StatusBadRequest {
-		t.Fatalf("expected 400 (bypass-list key rejected for platform_managed tenant), got %d: %s", w.Code, w.Body.String())
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200 (global write allowed; org rung retired), got %d: %s", w.Code, w.Body.String())
 	}
-	if !strings.Contains(w.Body.String(), "blocked") {
-		t.Errorf("response should explain the block; got %s", w.Body.String())
+	// Wait on the async restart fan-out so its SELECT drains before db swap.
+	select {
+	case id := <-restarted:
+		if id != "ws-a" {
+			t.Errorf("expected ws-a restarted, got %s", id)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("auto-restart not fired for affected workspace")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("unmet sqlmock expectations: %v", err)
 	}
-	// No INSERT was expected on the mock — sqlmock would error on an
-	// unexpected ExecContext, so reaching here with a 400 proves the write
-	// was refused before the DB.
 }

 // TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15.
--- a/Show More
+++ b/Show More