Compare commits
13 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ef650644cd | |||
| df6014a34b | |||
| ee39ccbf2f | |||
| a4def269e0 | |||
| 39e79c64c8 | |||
| 47520eeb73 | |||
| ee4d0d4ccb | |||
| 467e3ae9ce | |||
| 1eb1327ad5 | |||
| a407c8d079 | |||
| 6a1189ee9d | |||
| 53efcb5c46 | |||
| ea3bae5068 |
@@ -11,21 +11,100 @@ def load_workflow(name: str) -> dict:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def _all_required(workflow: dict) -> dict:
|
||||
return workflow["jobs"]["all-required"]
|
||||
|
||||
|
||||
def test_all_required_uses_dedicated_meta_runner_lane():
|
||||
workflow = load_workflow("ci.yml")
|
||||
all_required = workflow["jobs"]["all-required"]
|
||||
all_required = _all_required(workflow)
|
||||
|
||||
# Stays on the dedicated `ci-meta` lane (the sentinel does no docker
|
||||
# work, so it must NOT occupy the general docker-host pool).
|
||||
assert all_required["runs-on"] == "ci-meta"
|
||||
assert "needs" not in all_required
|
||||
|
||||
|
||||
def test_all_required_reuses_path_filter_before_polling():
|
||||
def test_all_required_is_needs_aggregator_not_a_polling_gate():
|
||||
"""fix/ci-scheduler-fanout (2026-06-01): the sentinel was converted
|
||||
from a status-polling loop (which squatted a ci-meta executor slot for
|
||||
up to 40 min per PR) into a plain `needs:` aggregator that frees the
|
||||
slot immediately. Pin the new shape so a regression to the poller is
|
||||
caught.
|
||||
"""
|
||||
workflow = load_workflow("ci.yml")
|
||||
all_required = workflow["jobs"]["all-required"]
|
||||
all_required = _all_required(workflow)
|
||||
rendered = str(all_required)
|
||||
|
||||
assert "--profile ci" in rendered
|
||||
assert ".gitea/scripts/detect-changes.py" in rendered
|
||||
assert "REQUIRE_PLATFORM" in rendered
|
||||
assert "REQUIRE_CANVAS" in rendered
|
||||
assert "REQUIRE_SCRIPTS" in rendered
|
||||
# The job MUST aggregate via `needs:` (the slot-freeing design).
|
||||
assert "needs" in all_required, "all-required must be a needs: aggregator"
|
||||
|
||||
# It MUST NOT reintroduce the polling loop / per-SHA status fetch that
|
||||
# was the throughput sink.
|
||||
assert "detect-changes.py" not in rendered, (
|
||||
"all-required must not run the detect-changes poller path"
|
||||
)
|
||||
assert "commits/" not in rendered and "statuses" not in rendered, (
|
||||
"all-required must not poll commit statuses (the slot-squat path)"
|
||||
)
|
||||
|
||||
|
||||
def test_all_required_does_not_use_if_always():
|
||||
"""Plain `needs:` works on Gitea 1.22.6 / act_runner v0.6.1; `needs:` +
|
||||
`if: always()` is BROKEN (feedback_gitea_needs_works_only_ifalways_broken)
|
||||
and would let a non-success need pass the gate. The sentinel must use
|
||||
plain `needs:` WITHOUT a job-level `if: always()`.
|
||||
"""
|
||||
workflow = load_workflow("ci.yml")
|
||||
all_required = _all_required(workflow)
|
||||
|
||||
job_if = all_required.get("if")
|
||||
assert not (isinstance(job_if, str) and "always()" in job_if), (
|
||||
"all-required must not combine needs: with if: always()"
|
||||
)
|
||||
|
||||
|
||||
def test_all_required_needs_matches_ci_required_drift_f1_set():
|
||||
"""The sentinel `needs:` list MUST equal ci-required-drift.py's
|
||||
`ci_job_names()` set: every job MINUS the sentinel itself MINUS jobs
|
||||
whose `if:` gates on github.event_name/github.ref (event-gated jobs
|
||||
skip on PRs and a `needs:` on a skipped job would never let the
|
||||
sentinel run). If they diverge, ci-required-drift F1 fires.
|
||||
"""
|
||||
workflow = load_workflow("ci.yml")
|
||||
jobs = workflow["jobs"]
|
||||
sentinel = "all-required"
|
||||
|
||||
expected = set()
|
||||
for key, body in jobs.items():
|
||||
if key == sentinel:
|
||||
continue
|
||||
gate = body.get("if") if isinstance(body, dict) else None
|
||||
if isinstance(gate, str) and (
|
||||
"github.event_name" in gate or "github.ref" in gate
|
||||
):
|
||||
# event-gated → legitimately skips on some triggers; excluded
|
||||
# from both `needs:` and the F1 set.
|
||||
continue
|
||||
expected.add(key)
|
||||
|
||||
needs = jobs[sentinel].get("needs", [])
|
||||
if isinstance(needs, str):
|
||||
needs = [needs]
|
||||
actual = set(needs)
|
||||
|
||||
assert actual == expected, (
|
||||
f"all-required needs: {sorted(actual)} != ci_job_names() "
|
||||
f"{sorted(expected)} — ci-required-drift F1 would fire"
|
||||
)
|
||||
|
||||
|
||||
def test_all_required_needs_reference_real_jobs():
|
||||
"""F1b guard: every entry in `needs:` must name an existing job."""
|
||||
workflow = load_workflow("ci.yml")
|
||||
jobs = workflow["jobs"]
|
||||
needs = jobs["all-required"].get("needs", [])
|
||||
if isinstance(needs, str):
|
||||
needs = [needs]
|
||||
job_keys = set(jobs)
|
||||
for dep in needs:
|
||||
assert dep in job_keys, f"all-required needs unknown job {dep!r}"
|
||||
|
||||
+82
-120
@@ -475,10 +475,10 @@ jobs:
|
||||
#
|
||||
# Emits `CI / all-required (<event>)` where <event> is the workflow trigger
|
||||
# (e.g. `CI / all-required (pull_request)`, `CI / all-required (push)`).
|
||||
# Branch protection MUST be updated to require the event-suffixed name —
|
||||
# Branch protection requires the event-suffixed name —
|
||||
# requiring `CI / all-required` (bare, no suffix) silently blocks all merges
|
||||
# because Gitea treats absent status contexts as pending (not skipped), and
|
||||
# no workflow emits the bare name. Fixed: BP now requires
|
||||
# no workflow emits the bare name. BP requires
|
||||
# `CI / all-required (pull_request)` per issue #1473.
|
||||
#
|
||||
# Closes the failure mode where status_check_contexts on molecule-core/main
|
||||
@@ -487,129 +487,91 @@ jobs:
|
||||
# red silently merged through. See internal#286 for the three concrete
|
||||
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
|
||||
#
|
||||
# This job deliberately has no `needs:`. Gitea 1.22/act_runner can mark a
|
||||
# job-level `if: always()` + `needs:` sentinel as skipped before upstream
|
||||
# jobs settle, leaving branch protection with a permanent pending
|
||||
# `CI / all-required` context. Instead, this independent sentinel polls the
|
||||
# required commit-status contexts for this SHA and fails if any fail, skip,
|
||||
# or never emit. It runs the same path detector as `changes` and only waits
|
||||
# for path-relevant jobs; Gitea can otherwise leave needs/output-skipped
|
||||
# jobs permanently pending with "Blocked by required conditions". It runs on
|
||||
# the dedicated `ci-meta` lane so the poller does not occupy the same
|
||||
# general runner pool as the jobs it is waiting for.
|
||||
# ── 2026-06-01 CI-scheduler-overload fix (fix/ci-scheduler-fanout) ──
|
||||
# PREVIOUS shape: a poll-gate that ran detect-changes then LOOPED on
|
||||
# `GET /commits/{sha}/statuses` every 15s for up to 40 min, occupying a
|
||||
# `ci-meta` executor slot the entire time it waited for upstream jobs.
|
||||
# With only 2 ci-meta runners, that poll-loop squatted half the lane on
|
||||
# every PR — a confirmed throughput sink in the live RCA (two concurrent
|
||||
# `JOB-all-required` containers observed pinning the lane). The polling
|
||||
# design existed only to dodge the Gitea `needs:` + `if: always()` bug,
|
||||
# where an always()-guarded sentinel could be marked skipped before
|
||||
# upstream jobs settled (leaving BP pending forever).
|
||||
#
|
||||
# canvas-deploy-reminder is intentionally NOT included in all-required.needs.
|
||||
# It is an informational main-push reminder, not a PR quality gate. Keeping
|
||||
# it in this dependency list lets a skipped reminder skip the required
|
||||
# sentinel before the `always()` guard can emit a branch-protection status.
|
||||
# NEW shape: a plain `needs:` aggregator with NO polling loop. This is
|
||||
# safe here — and was NOT safe at the time the poller was written —
|
||||
# because every aggregated CI job now gates its real work PER-STEP
|
||||
# (`if: needs.changes.outputs.* != 'true'`) rather than at the JOB level.
|
||||
# A per-step-gated job always reaches a terminal SUCCESS (it no-ops its
|
||||
# expensive steps but the job itself still completes), so it is never
|
||||
# `skipped`. Plain `needs:` (WITHOUT `if: always()`) works correctly on
|
||||
# Gitea 1.22.6 / act_runner v0.6.1 — only `needs:` + `if: always()` is
|
||||
# broken (feedback_gitea_needs_works_only_ifalways_broken). We therefore
|
||||
# use plain `needs:` + an explicit per-need result check (NOT
|
||||
# `if: always()`); if any need fails/errors, Gitea never starts this job
|
||||
# and BP sees `CI / all-required` go red via the failed dependency
|
||||
# propagation — exactly the gate we want, with zero runner-squat.
|
||||
#
|
||||
# The `needs:` list MUST stay in lockstep with ci-required-drift.py's
|
||||
# F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
|
||||
# whose `if:` gates on github.event_name/github.ref). canvas-deploy-
|
||||
# reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
|
||||
# so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
|
||||
# skipped job would never let the sentinel run. If a new always-running
|
||||
# CI job is added, add it here too or ci-required-drift F1 will flag it.
|
||||
#
|
||||
# Stays on the dedicated `ci-meta` lane (no docker work, so the
|
||||
# docker-host-pin lint does not apply), but now the job is sub-second:
|
||||
# it only inspects already-settled `needs.*.result` values, so it frees
|
||||
# the slot immediately instead of holding it for the whole CI duration.
|
||||
#
|
||||
needs:
|
||||
- changes
|
||||
- platform-build
|
||||
- canvas-build
|
||||
- shellcheck
|
||||
- python-lint
|
||||
continue-on-error: false
|
||||
runs-on: ci-meta
|
||||
timeout-minutes: 45
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- id: check
|
||||
- name: Verify all aggregated CI jobs succeeded
|
||||
# NO polling, NO API call, NO checkout. Because this job lists the
|
||||
# aggregated jobs under `needs:` (without `if: always()`), Gitea only
|
||||
# starts it once every need has reached SUCCESS — a failed/errored
|
||||
# need short-circuits the job and propagates red to the
|
||||
# `CI / all-required` context. This explicit check is a
|
||||
# belt-and-suspenders assertion + a readable run summary; the real
|
||||
# gating is the `needs:` edge itself.
|
||||
env:
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
PR_BASE_REF: ${{ github.event.pull_request.base.ref }}
|
||||
PUSH_BEFORE: ${{ github.event.before }}
|
||||
run: |
|
||||
python3 .gitea/scripts/detect-changes.py \
|
||||
--profile ci \
|
||||
--event-name "${{ github.event_name }}" \
|
||||
--pr-base-sha "$PR_BASE_SHA" \
|
||||
--base-ref "$PR_BASE_REF" \
|
||||
--push-before "${GITHUB_EVENT_BEFORE:-$PUSH_BEFORE}"
|
||||
- name: Wait for required CI contexts
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
API_ROOT: ${{ github.server_url }}/api/v1
|
||||
REPOSITORY: ${{ github.repository }}
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REQUIRE_PLATFORM: ${{ steps.check.outputs.platform }}
|
||||
REQUIRE_CANVAS: ${{ steps.check.outputs.canvas }}
|
||||
REQUIRE_SCRIPTS: ${{ steps.check.outputs.scripts }}
|
||||
CHANGES_RESULT: ${{ needs.changes.result }}
|
||||
PLATFORM_RESULT: ${{ needs.platform-build.result }}
|
||||
CANVAS_RESULT: ${{ needs.canvas-build.result }}
|
||||
SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
|
||||
PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
token = os.environ["GITEA_TOKEN"]
|
||||
api_root = os.environ["API_ROOT"].rstrip("/")
|
||||
repo = os.environ["REPOSITORY"]
|
||||
sha = os.environ["COMMIT_SHA"]
|
||||
event = os.environ["EVENT_NAME"]
|
||||
required = [
|
||||
f"CI / Detect changes ({event})",
|
||||
f"CI / Python Lint & Test ({event})",
|
||||
]
|
||||
if os.environ.get("REQUIRE_PLATFORM") == "true":
|
||||
required.append(f"CI / Platform (Go) ({event})")
|
||||
if os.environ.get("REQUIRE_CANVAS") == "true":
|
||||
required.append(f"CI / Canvas (Next.js) ({event})")
|
||||
if os.environ.get("REQUIRE_SCRIPTS") == "true":
|
||||
required.append(f"CI / Shellcheck (E2E scripts) ({event})")
|
||||
terminal_bad = {"failure", "error"}
|
||||
deadline = time.time() + 40 * 60
|
||||
last_summary = None
|
||||
|
||||
def fetch_statuses():
|
||||
statuses = []
|
||||
for page in range(1, 6):
|
||||
url = f"{api_root}/repos/{repo}/commits/{sha}/statuses?page={page}&limit=100"
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
chunk = json.load(resp)
|
||||
if not chunk:
|
||||
break
|
||||
statuses.extend(chunk)
|
||||
latest = {}
|
||||
for item in statuses:
|
||||
ctx = item.get("context")
|
||||
if not ctx:
|
||||
continue
|
||||
prev = latest.get(ctx)
|
||||
if prev is None or (item.get("updated_at") or item.get("created_at") or "") >= (prev.get("updated_at") or prev.get("created_at") or ""):
|
||||
latest[ctx] = item
|
||||
return latest
|
||||
|
||||
while True:
|
||||
try:
|
||||
latest = fetch_statuses()
|
||||
except (TimeoutError, OSError, urllib.error.URLError) as exc:
|
||||
if time.time() >= deadline:
|
||||
print(f"FAIL: status polling did not recover before deadline: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"WARN: status poll failed, retrying: {exc}", flush=True)
|
||||
time.sleep(15)
|
||||
continue
|
||||
states = {ctx: (latest.get(ctx) or {}).get("status") or (latest.get(ctx) or {}).get("state") or "missing" for ctx in required}
|
||||
summary = ", ".join(f"{ctx}={state}" for ctx, state in states.items())
|
||||
if summary != last_summary:
|
||||
print(summary, flush=True)
|
||||
last_summary = summary
|
||||
bad = {ctx: state for ctx, state in states.items() if state in terminal_bad}
|
||||
if bad:
|
||||
print("FAIL: required CI context failed:", file=sys.stderr)
|
||||
for ctx, state in bad.items():
|
||||
desc = (latest.get(ctx) or {}).get("description") or ""
|
||||
print(f" - {ctx}: {state} {desc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if all(state == "success" for state in states.values()):
|
||||
print(f"OK: all {len(required)} required CI contexts succeeded")
|
||||
sys.exit(0)
|
||||
if time.time() >= deadline:
|
||||
print("FAIL: timed out waiting for required CI contexts:", file=sys.stderr)
|
||||
for ctx, state in states.items():
|
||||
print(f" - {ctx}: {state}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
time.sleep(15)
|
||||
PY
|
||||
fail=0
|
||||
check() {
|
||||
name="$1"; result="$2"
|
||||
printf 'CI / %s = %s\n' "$name" "$result"
|
||||
# `success` is the only green terminal state we accept. A plain
|
||||
# `needs:` job is only started when all needs succeed, so reaching
|
||||
# this step already implies success — but assert explicitly so a
|
||||
# future `if: always()` reintroduction (which WOULD let non-success
|
||||
# through) fails loudly instead of silently passing the gate.
|
||||
if [ "$result" != "success" ]; then
|
||||
echo "::error::aggregated CI job '${name}' did not succeed (result=${result})"
|
||||
fail=1
|
||||
fi
|
||||
}
|
||||
check "Detect changes" "$CHANGES_RESULT"
|
||||
check "Platform (Go)" "$PLATFORM_RESULT"
|
||||
check "Canvas (Next.js)" "$CANVAS_RESULT"
|
||||
check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
|
||||
check "Python Lint & Test" "$PYTHON_LINT_RESULT"
|
||||
if [ "$fail" -ne 0 ]; then
|
||||
echo "::error::all-required: one or more aggregated CI jobs did not succeed"
|
||||
exit 1
|
||||
fi
|
||||
echo "OK: all aggregated CI jobs succeeded — CI / all-required green."
|
||||
|
||||
@@ -25,6 +25,21 @@ name: Lint forbidden tenant-env keys
|
||||
# feedback_path_filtered_workflow_cant_be_required). The scan itself
|
||||
# targets workspace_secrets-writer paths via grep -r; it's fast
|
||||
# (sub-second) so unconditional run is fine.
|
||||
#
|
||||
# ── 2026-06-01 CI-scheduler-fanout consolidation (fix/ci-scheduler-fanout) ──
|
||||
# The RFC#523 sibling lint formerly in its own file
|
||||
# `lint-no-tenant-gitea-token.yml` (the broader "no repo-host token into
|
||||
# any tenant-writer surface" scan) is now a SECOND job in THIS workflow
|
||||
# (`scan-tenant-token-write`). Both are sub-second Go-source greps that
|
||||
# fired as two separate workflow runs on every PR — pure scheduler
|
||||
# fan-out. Folding the sibling in here drops one workflow run + one
|
||||
# checkout per PR while keeping BOTH scans firing unconditionally on
|
||||
# every PR (the no-paths discipline above is preserved — neither job is
|
||||
# paths-filtered). The moved job keeps its exact `name:` so its emitted
|
||||
# status context is unchanged in substance; its `# bp-exempt:` directive
|
||||
# moves with it (Tier 2g). The old `Lint no tenant GITEA or GITHUB token
|
||||
# write / …` context is retired (a disappearing context needs no
|
||||
# directive; only NEW emitters do).
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -166,3 +181,126 @@ jobs:
|
||||
fi
|
||||
|
||||
echo "OK No forbidden operator-scope env key names hardcoded in writer paths."
|
||||
|
||||
# bp-exempt: advisory RFC#523 lint; PR review gate is review-driven, not BP-driven.
|
||||
# (Carried with the workflow-name rename in PR mc#1593 so the renamed
|
||||
# context emission satisfies lint_required_context_exists_in_bp Tier 2g.)
|
||||
scan-tenant-token-write:
|
||||
name: Scan for repo-host token write into tenant workspace surface
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Find Go files referencing a tenant-writer surface AND a repo-host token
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Repo-host token NAMES — the threat-model subset. Operator-fleet
|
||||
# tokens (CP_ADMIN_API_TOKEN, RAILWAY_TOKEN, INFISICAL_*) are
|
||||
# caught by lint-forbidden-env-keys.yml's broader deny set; this
|
||||
# lint focuses on the git-host class so a single co-occurrence
|
||||
# match has a low false-positive rate.
|
||||
FORBIDDEN_KEYS=(
|
||||
"GITEA_TOKEN"
|
||||
"GITEA_PAT"
|
||||
"GITHUB_TOKEN"
|
||||
"GITHUB_PAT"
|
||||
"GH_TOKEN"
|
||||
)
|
||||
|
||||
# Tenant-writer surface markers. A file matches the surface set
|
||||
# if it references ANY of these strings. This is the "is this
|
||||
# code path writing into a tenant workspace?" heuristic.
|
||||
# Curated to catch the actual code shapes used in this repo
|
||||
# (verified by grep against current main 2026-05-19):
|
||||
# - "workspace_secrets" / "global_secrets" → DB table writes
|
||||
# - "seedAllowList" → CP-side seed table
|
||||
# - "/settings/secrets" → tenant HTTP API write
|
||||
# - "envVars[" → in-memory env map write
|
||||
# - "containerEnv" → docker-run env-set
|
||||
# - "userData" → EC2 user-data script
|
||||
# - "provisionPayload" / "provisionContext" → provision-request shape
|
||||
SURFACE_PATTERN='workspace_secrets|global_secrets|seedAllowList|/settings/secrets|envVars\[|containerEnv|userData|provisionPayload|provisionContext'
|
||||
|
||||
# Files that legitimately reference these names AND a surface
|
||||
# marker, but do so for guard / strip / test / doc-comment
|
||||
# reasons. New entries require reviewer signoff and a one-line
|
||||
# justification in the diff.
|
||||
EXEMPT_FILES=(
|
||||
# RFC#523 L1 deny-set source-of-truth + tests
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env.go"
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env_test.go"
|
||||
# Forensic-#145 silent-strip denylist (defense-in-depth, by design lists the names)
|
||||
"workspace-server/internal/provisioner/provisioner.go"
|
||||
"workspace-server/internal/provisioner/provisioner_test.go"
|
||||
# Pre-RFC#523 persona-fallback / org-helper paths. The L1
|
||||
# fail-closed runs BEFORE these writers; downstream silent-strip
|
||||
# also covers them. See applyAgentGitHTTPCreds doc-comment.
|
||||
"workspace-server/internal/handlers/agent_git_identity.go"
|
||||
"workspace-server/internal/handlers/org_helpers.go"
|
||||
"workspace-server/internal/handlers/org.go"
|
||||
# CP→platform admin auth (NOT a tenant env write).
|
||||
"workspace-server/internal/provisioner/cp_provisioner.go"
|
||||
)
|
||||
|
||||
# Build an extended-regex alternation of forbidden keys.
|
||||
KEY_ALT="$(IFS='|'; echo "${FORBIDDEN_KEYS[*]}")"
|
||||
|
||||
# Find candidate files: Go non-test sources that contain a
|
||||
# tenant-writer surface marker.
|
||||
mapfile -t CANDIDATES < <(
|
||||
grep -rlE --include='*.go' --exclude='*_test.go' \
|
||||
"${SURFACE_PATTERN}" . 2>/dev/null \
|
||||
| sed 's|^\./||' \
|
||||
| sort -u
|
||||
)
|
||||
|
||||
if [ "${#CANDIDATES[@]}" -eq 0 ]; then
|
||||
echo "OK No tenant-writer-surface files found in tree (unexpected, but not a lint failure)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
HITS=""
|
||||
for f in "${CANDIDATES[@]}"; do
|
||||
# Skip exempt files.
|
||||
skip=0
|
||||
for ex in "${EXEMPT_FILES[@]}"; do
|
||||
if [ "$f" = "$ex" ]; then skip=1; break; fi
|
||||
done
|
||||
[ "$skip" = "1" ] && continue
|
||||
|
||||
# File contains a surface marker; now grep for a forbidden
|
||||
# key NAME. We require a QUOTED-literal match to avoid
|
||||
# firing on a comment like "// also handle GITEA_TOKEN".
|
||||
#
|
||||
# The literal form catches:
|
||||
# - os.Getenv("GITEA_TOKEN")
|
||||
# - envVars["GITEA_TOKEN"] = ...
|
||||
# - {envKey: "GITEA_TOKEN", tenantKey: "GITEA_TOKEN"}
|
||||
# but not:
|
||||
# - // see GITEA_TOKEN below (no quotes)
|
||||
found=$(grep -nE "\"(${KEY_ALT})\"" "$f" 2>/dev/null || true)
|
||||
if [ -n "$found" ]; then
|
||||
HITS="${HITS}--- ${f} ---\n${found}\n"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$HITS" ]; then
|
||||
echo "::error::Task #146 lint: repo-host token name(s) quoted in a tenant-writer-surface file:"
|
||||
printf "$HITS"
|
||||
echo ""
|
||||
echo "These files reference a tenant-writer surface (workspace_secrets,"
|
||||
echo "seedAllowList, /settings/secrets, containerEnv, userData, etc.)"
|
||||
echo "AND quote a repo-host token name (GITEA_TOKEN/GITHUB_TOKEN/…)."
|
||||
echo "Per RFC#523 threat model, tenant workspaces MUST NOT receive"
|
||||
echo "operator-scope repo-host tokens. If your code legitimately needs"
|
||||
echo "to reference one of these names in a tenant-writer file (e.g."
|
||||
echo "a deny-set definition or silent-strip list), add the file to"
|
||||
echo "EXEMPT_FILES with a one-line justification — reviewer signoff"
|
||||
echo "required."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "OK No tenant-writer-surface file co-mentions a repo-host token literal."
|
||||
|
||||
@@ -1,182 +0,0 @@
|
||||
name: Lint no tenant GITEA or GITHUB token write
|
||||
|
||||
# Task #146 — CI guardrail companion to RFC#523's `lint-forbidden-env-keys.yml`.
|
||||
#
|
||||
# `lint-forbidden-env-keys.yml` (Layer 3) catches code that hardcodes a
|
||||
# forbidden env-var key NAME as a quoted literal in workspace_secrets
|
||||
# writer paths under workspace-server/internal/.
|
||||
#
|
||||
# This workflow catches a BROADER class: any code path that reads a
|
||||
# repo-host token (GITEA_TOKEN / GITHUB_TOKEN / GH_TOKEN) and then writes
|
||||
# it into a TENANT WORKSPACE's env, secret store, user-data, or
|
||||
# provision payload. This is the actual RFC#523 threat-model statement —
|
||||
# the goal is "no tenant workspace ever receives an operator-scope repo
|
||||
# token," not just "no _quoted_ literal `GITEA_TOKEN`." A future writer
|
||||
# could route the value via a variable, a struct field, or a config key
|
||||
# and slip past the existing literal scan; this lint catches those
|
||||
# routing patterns at PR review time.
|
||||
#
|
||||
# Scope
|
||||
# Scans the WHOLE repo's Go sources (not just workspace-server/) for
|
||||
# co-occurrences of:
|
||||
# - a repo-host token NAME (GITEA_TOKEN / GITHUB_TOKEN / GH_TOKEN /
|
||||
# GITEA_PAT / GITHUB_PAT) used as os.Getenv argument or string
|
||||
# literal
|
||||
# - within a file that ALSO references a tenant-writer surface
|
||||
# (`tenant`, `workspace_secrets`, `global_secrets`, `seedAllowList`,
|
||||
# `/settings/secrets`, `userData`, `provisionPayload`,
|
||||
# `envVars[`, `containerEnv`).
|
||||
#
|
||||
# Co-occurrence (not single-line) is the false-positive control: a
|
||||
# file that just LOGS the variable name (e.g. "missing GITEA_TOKEN")
|
||||
# without touching any tenant surface won't fire.
|
||||
#
|
||||
# Drift contract with lint-forbidden-env-keys.yml
|
||||
# Both lints share the same FORBIDDEN_KEYS list (a subset — only the
|
||||
# repo-host tokens, since this lint's threat model is "tenant gets
|
||||
# write access to operator's git host"). If RFC#523's deny set grows,
|
||||
# update BOTH this file AND lint-forbidden-env-keys.yml AND the Go
|
||||
# source-of-truth in
|
||||
# workspace-server/internal/handlers/workspace_provision_forbidden_env.go.
|
||||
#
|
||||
# Open-source-template-friendly
|
||||
# The patterns scanned are generic (no MOLECULE_-prefix literals).
|
||||
# A fork can copy this workflow as-is and adjust FORBIDDEN_KEYS.
|
||||
#
|
||||
# Path-filter discipline
|
||||
# No `paths:` filter — required-status workflows must run on every PR
|
||||
# per `feedback_path_filtered_workflow_cant_be_required`. Scan is
|
||||
# sub-second.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
push:
|
||||
branches: [main, staging]
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory RFC#523 lint; PR review gate is review-driven, not BP-driven.
|
||||
# (Carried with the workflow-name rename in PR mc#1593 so the renamed
|
||||
# context emission satisfies lint_required_context_exists_in_bp Tier 2g.)
|
||||
scan:
|
||||
name: Scan for repo-host token write into tenant workspace surface
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Find Go files referencing a tenant-writer surface AND a repo-host token
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Repo-host token NAMES — the threat-model subset. Operator-fleet
|
||||
# tokens (CP_ADMIN_API_TOKEN, RAILWAY_TOKEN, INFISICAL_*) are
|
||||
# caught by lint-forbidden-env-keys.yml's broader deny set; this
|
||||
# lint focuses on the git-host class so a single co-occurrence
|
||||
# match has a low false-positive rate.
|
||||
FORBIDDEN_KEYS=(
|
||||
"GITEA_TOKEN"
|
||||
"GITEA_PAT"
|
||||
"GITHUB_TOKEN"
|
||||
"GITHUB_PAT"
|
||||
"GH_TOKEN"
|
||||
)
|
||||
|
||||
# Tenant-writer surface markers. A file matches the surface set
|
||||
# if it references ANY of these strings. This is the "is this
|
||||
# code path writing into a tenant workspace?" heuristic.
|
||||
# Curated to catch the actual code shapes used in this repo
|
||||
# (verified by grep against current main 2026-05-19):
|
||||
# - "workspace_secrets" / "global_secrets" → DB table writes
|
||||
# - "seedAllowList" → CP-side seed table
|
||||
# - "/settings/secrets" → tenant HTTP API write
|
||||
# - "envVars[" → in-memory env map write
|
||||
# - "containerEnv" → docker-run env-set
|
||||
# - "userData" → EC2 user-data script
|
||||
# - "provisionPayload" / "provisionContext" → provision-request shape
|
||||
SURFACE_PATTERN='workspace_secrets|global_secrets|seedAllowList|/settings/secrets|envVars\[|containerEnv|userData|provisionPayload|provisionContext'
|
||||
|
||||
# Files that legitimately reference these names AND a surface
|
||||
# marker, but do so for guard / strip / test / doc-comment
|
||||
# reasons. New entries require reviewer signoff and a one-line
|
||||
# justification in the diff.
|
||||
EXEMPT_FILES=(
|
||||
# RFC#523 L1 deny-set source-of-truth + tests
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env.go"
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env_test.go"
|
||||
# Forensic-#145 silent-strip denylist (defense-in-depth, by design lists the names)
|
||||
"workspace-server/internal/provisioner/provisioner.go"
|
||||
"workspace-server/internal/provisioner/provisioner_test.go"
|
||||
# Pre-RFC#523 persona-fallback / org-helper paths. The L1
|
||||
# fail-closed runs BEFORE these writers; downstream silent-strip
|
||||
# also covers them. See applyAgentGitHTTPCreds doc-comment.
|
||||
"workspace-server/internal/handlers/agent_git_identity.go"
|
||||
"workspace-server/internal/handlers/org_helpers.go"
|
||||
"workspace-server/internal/handlers/org.go"
|
||||
# CP→platform admin auth (NOT a tenant env write).
|
||||
"workspace-server/internal/provisioner/cp_provisioner.go"
|
||||
)
|
||||
|
||||
# Build an extended-regex alternation of forbidden keys.
|
||||
KEY_ALT="$(IFS='|'; echo "${FORBIDDEN_KEYS[*]}")"
|
||||
|
||||
# Find candidate files: Go non-test sources that contain a
|
||||
# tenant-writer surface marker.
|
||||
mapfile -t CANDIDATES < <(
|
||||
grep -rlE --include='*.go' --exclude='*_test.go' \
|
||||
"${SURFACE_PATTERN}" . 2>/dev/null \
|
||||
| sed 's|^\./||' \
|
||||
| sort -u
|
||||
)
|
||||
|
||||
if [ "${#CANDIDATES[@]}" -eq 0 ]; then
|
||||
echo "OK No tenant-writer-surface files found in tree (unexpected, but not a lint failure)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
HITS=""
|
||||
for f in "${CANDIDATES[@]}"; do
|
||||
# Skip exempt files.
|
||||
skip=0
|
||||
for ex in "${EXEMPT_FILES[@]}"; do
|
||||
if [ "$f" = "$ex" ]; then skip=1; break; fi
|
||||
done
|
||||
[ "$skip" = "1" ] && continue
|
||||
|
||||
# File contains a surface marker; now grep for a forbidden
|
||||
# key NAME. We require a QUOTED-literal match to avoid
|
||||
# firing on a comment like "// also handle GITEA_TOKEN".
|
||||
#
|
||||
# The literal form catches:
|
||||
# - os.Getenv("GITEA_TOKEN")
|
||||
# - envVars["GITEA_TOKEN"] = ...
|
||||
# - {envKey: "GITEA_TOKEN", tenantKey: "GITEA_TOKEN"}
|
||||
# but not:
|
||||
# - // see GITEA_TOKEN below (no quotes)
|
||||
found=$(grep -nE "\"(${KEY_ALT})\"" "$f" 2>/dev/null || true)
|
||||
if [ -n "$found" ]; then
|
||||
HITS="${HITS}--- ${f} ---\n${found}\n"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$HITS" ]; then
|
||||
echo "::error::Task #146 lint: repo-host token name(s) quoted in a tenant-writer-surface file:"
|
||||
printf "$HITS"
|
||||
echo ""
|
||||
echo "These files reference a tenant-writer surface (workspace_secrets,"
|
||||
echo "seedAllowList, /settings/secrets, containerEnv, userData, etc.)"
|
||||
echo "AND quote a repo-host token name (GITEA_TOKEN/GITHUB_TOKEN/…)."
|
||||
echo "Per RFC#523 threat model, tenant workspaces MUST NOT receive"
|
||||
echo "operator-scope repo-host tokens. If your code legitimately needs"
|
||||
echo "to reference one of these names in a tenant-writer file (e.g."
|
||||
echo "a deny-set definition or silent-strip list), add the file to"
|
||||
echo "EXEMPT_FILES with a one-line justification — reviewer signoff"
|
||||
echo "required."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "OK No tenant-writer-surface file co-mentions a repo-host token literal."
|
||||
@@ -35,8 +35,26 @@ name: verify-providers-gen
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
# CI-scheduler-overload fix (fix/ci-scheduler-fanout, 2026-06-01):
|
||||
# this gate only verifies that the generated providers artifact is in
|
||||
# sync with the schema SSOT. Its verdict can ONLY change when one of
|
||||
# the codegen inputs/outputs changes, so firing the Go toolchain on
|
||||
# every unrelated PR (docs, canvas, scripts) is pure fan-out cost.
|
||||
# Scoped to the codegen surface. SAFE because this workflow is NOT a
|
||||
# branch-protection status_check_context (see header §ENFORCEMENT
|
||||
# GATING) — lint-required-no-paths only forbids paths filters on
|
||||
# REQUIRED workflows; this is advisory, so a paths filter is allowed.
|
||||
# Mirrors the sibling sync-providers-yaml.yml scoping convention.
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/**'
|
||||
- 'workspace-server/cmd/gen-providers/**'
|
||||
- '.gitea/workflows/verify-providers-gen.yml'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/**'
|
||||
- 'workspace-server/cmd/gen-providers/**'
|
||||
- '.gitea/workflows/verify-providers-gen.yml'
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
@@ -243,7 +243,12 @@ func (h *AdminSchedulesHealthHandler) ReapOrphans(c *gin.Context) {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
|
||||
return
|
||||
}
|
||||
repointedN, _ := repointed.RowsAffected()
|
||||
repointedN, err := repointed.RowsAffected()
|
||||
if err != nil {
|
||||
log.Printf("ReapOrphans: repointed rows affected: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "re-point failed"})
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Disable any remaining schedules still bound to a removed/missing
|
||||
// workspace (no live successor, or template schedules on a dead row).
|
||||
@@ -261,7 +266,12 @@ func (h *AdminSchedulesHealthHandler) ReapOrphans(c *gin.Context) {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
|
||||
return
|
||||
}
|
||||
disabledN, _ := disabled.RowsAffected()
|
||||
disabledN, err := disabled.RowsAffected()
|
||||
if err != nil {
|
||||
log.Printf("ReapOrphans: disabled rows affected: %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "disable failed"})
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("ReapOrphans: re-pointed %d, disabled %d orphaned schedule(s)", repointedN, disabledN)
|
||||
c.JSON(http.StatusOK, gin.H{"repointed": repointedN, "disabled": disabledN})
|
||||
|
||||
@@ -252,6 +252,9 @@ func scanAuditRows(rows *sql.Rows) ([]auditEventRow, error) {
|
||||
}
|
||||
result = append(result, ev)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -377,6 +377,9 @@ func readWorkspaceDeriveInputs(ctx context.Context, workspaceID string) (runtime
|
||||
availableAuthEnv = append(availableAuthEnv, k)
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
log.Printf("llm_billing_mode: read secrets rows error for %s: %v (deriving with partial model/auth-env)", workspaceID, err)
|
||||
}
|
||||
return runtime, model, availableAuthEnv
|
||||
}
|
||||
|
||||
@@ -453,7 +456,10 @@ func SetWorkspaceLLMBillingMode(ctx context.Context, workspaceID, mode string) e
|
||||
if err != nil {
|
||||
return fmt.Errorf("clear workspace llm_billing_mode for %s: %w", workspaceID, err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
n, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return fmt.Errorf("clear workspace llm_billing_mode rows affected %s: %w", workspaceID, err)
|
||||
}
|
||||
if n == 0 {
|
||||
return sql.ErrNoRows
|
||||
}
|
||||
@@ -470,7 +476,10 @@ func SetWorkspaceLLMBillingMode(ctx context.Context, workspaceID, mode string) e
|
||||
if err != nil {
|
||||
return fmt.Errorf("set workspace llm_billing_mode for %s: %w", workspaceID, err)
|
||||
}
|
||||
n, _ := res.RowsAffected()
|
||||
n, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
return fmt.Errorf("set workspace llm_billing_mode rows affected %s: %w", workspaceID, err)
|
||||
}
|
||||
if n == 0 {
|
||||
return sql.ErrNoRows
|
||||
}
|
||||
|
||||
@@ -750,7 +750,12 @@ func (h *OrgHandler) migrateRuntimeSchedulesFromRemovedPredecessor(ctx context.C
|
||||
log.Printf("Org import: schedule migration %s -> %s (%q) failed: %v", predID, newID, name, err)
|
||||
return
|
||||
}
|
||||
if n, _ := res.RowsAffected(); n > 0 {
|
||||
n, err := res.RowsAffected()
|
||||
if err != nil {
|
||||
log.Printf("Org import: schedule migration rows affected %s -> %s: %v", predID, newID, err)
|
||||
return
|
||||
}
|
||||
if n > 0 {
|
||||
log.Printf("Org import: migrated %d runtime schedule(s) from removed predecessor %s to new workspace %s (%q)", n, predID, newID, name)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ func requireCallerOwnsOrg(c *gin.Context) (string, error) {
|
||||
orgID, err := orgtoken.OrgIDByTokenID(c.Request.Context(), db.DB, tokID)
|
||||
if err != nil {
|
||||
// DB error — deny by default rather than risk cross-org access.
|
||||
return "", fmt.Errorf("allowlist: requireCallerOwnsOrg: %v", err)
|
||||
return "", fmt.Errorf("allowlist: requireCallerOwnsOrg: %w", err)
|
||||
}
|
||||
return orgID, nil
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ func isSafeURL(rawURL string) error {
|
||||
}
|
||||
addrs, err := net.LookupHost(host)
|
||||
if err != nil {
|
||||
return fmt.Errorf("DNS resolution blocked for hostname: %s (%v)", host, err)
|
||||
return fmt.Errorf("DNS resolution blocked for hostname: %s (%w)", host, err)
|
||||
}
|
||||
if len(addrs) == 0 {
|
||||
return fmt.Errorf("DNS returned no addresses for: %s", host)
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseTopLevelRuntime(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
yaml string
|
||||
want string
|
||||
}{
|
||||
{"top-level claude-code", "name: x\nruntime: claude-code\ntier: 2\n", "claude-code"},
|
||||
{"top-level google-adk", "runtime: google-adk\n", "google-adk"},
|
||||
{"quoted value", `runtime: "google-adk"` + "\n", "google-adk"},
|
||||
{"single-quoted value", "runtime: 'codex'\n", "codex"},
|
||||
{"ignores runtime_config nested model", "runtime: google-adk\nruntime_config:\n model: vertex:gemini-2.5-pro\n", "google-adk"},
|
||||
{"runtime_config only, no top-level runtime", "name: y\nruntime_config:\n model: x\n", ""},
|
||||
{"indented runtime is not top-level", "wrapper:\n runtime: claude-code\n", ""},
|
||||
{"empty", "", ""},
|
||||
{"no runtime key", "name: z\ntier: 4\n", ""},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
if got := parseTopLevelRuntime([]byte(tc.yaml)); got != tc.want {
|
||||
t.Fatalf("parseTopLevelRuntime(%q) = %q, want %q", tc.yaml, got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSeededConfigRuntime(t *testing.T) {
|
||||
// in-memory configFiles wins over template dir.
|
||||
t.Run("from configFiles", func(t *testing.T) {
|
||||
cf := map[string][]byte{"config.yaml": []byte("runtime: google-adk\n")}
|
||||
if got := seededConfigRuntime("/nonexistent", cf); got != "google-adk" {
|
||||
t.Fatalf("got %q, want google-adk", got)
|
||||
}
|
||||
})
|
||||
|
||||
// falls back to template dir's config.yaml.
|
||||
t.Run("from template dir", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("name: a\nruntime: claude-code\n"), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got := seededConfigRuntime(dir, nil); got != "claude-code" {
|
||||
t.Fatalf("got %q, want claude-code", got)
|
||||
}
|
||||
})
|
||||
|
||||
// nothing available → "".
|
||||
t.Run("indeterminate", func(t *testing.T) {
|
||||
if got := seededConfigRuntime("", nil); got != "" {
|
||||
t.Fatalf("got %q, want empty", got)
|
||||
}
|
||||
if got := seededConfigRuntime("/does/not/exist", map[string][]byte{}); got != "" {
|
||||
t.Fatalf("got %q, want empty", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestRuntimeSeedMismatchAbort(t *testing.T) {
|
||||
adkCfg := map[string][]byte{"config.yaml": []byte("runtime: google-adk\n")}
|
||||
ccCfg := map[string][]byte{"config.yaml": []byte("name: Claude Code Agent\nruntime: claude-code\n")}
|
||||
|
||||
t.Run("mismatch fails loud (the #2027 demo bug)", func(t *testing.T) {
|
||||
// requested google-adk, but seeding the claude-code-default config.
|
||||
abort := runtimeSeedMismatchAbort("google-adk", "", ccCfg)
|
||||
if abort == nil {
|
||||
t.Fatal("expected abort for google-adk requested but claude-code seeded, got nil")
|
||||
}
|
||||
if abort.Extra["requested_runtime"] != "google-adk" || abort.Extra["seeded_runtime"] != "claude-code" {
|
||||
t.Fatalf("abort.Extra mismatch: %+v", abort.Extra)
|
||||
}
|
||||
if abort.Extra["issue"] != "2027" {
|
||||
t.Fatalf("expected issue 2027 tag, got %v", abort.Extra["issue"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("match is allowed", func(t *testing.T) {
|
||||
if abort := runtimeSeedMismatchAbort("google-adk", "", adkCfg); abort != nil {
|
||||
t.Fatalf("expected no abort when seeded runtime matches, got %q", abort.Msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("empty requested runtime is allowed (org-template default path)", func(t *testing.T) {
|
||||
if abort := runtimeSeedMismatchAbort("", "", ccCfg); abort != nil {
|
||||
t.Fatalf("expected no abort for unspecified runtime, got %q", abort.Msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("indeterminate seed is allowed (CP mode, no local config bytes)", func(t *testing.T) {
|
||||
if abort := runtimeSeedMismatchAbort("google-adk", "", nil); abort != nil {
|
||||
t.Fatalf("expected no abort when seeded runtime is indeterminate, got %q", abort.Msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("mismatch via template dir also fails loud", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
if err := os.WriteFile(filepath.Join(dir, "config.yaml"), []byte("runtime: claude-code\n"), 0o600); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if abort := runtimeSeedMismatchAbort("hermes", dir, nil); abort == nil {
|
||||
t.Fatal("expected abort for hermes requested but claude-code template seeded")
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -37,8 +37,11 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/events"
|
||||
@@ -263,6 +266,22 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
}
|
||||
}
|
||||
|
||||
// Preflight: runtime-seed match (issue #2027). Fail LOUD when a workspace
|
||||
// NAMED a runtime but the config.yaml we're about to seed declares a
|
||||
// different top-level runtime — the symmetric counterpart to selectImage's
|
||||
// ErrUnresolvableRuntime guard, on the config/template side. Pre-fix, when a
|
||||
// runtime's workspace template wasn't in the tenant cache at provision time
|
||||
// (or sanitizeRuntime coerced an unknown runtime), seeding silently fell
|
||||
// back to the claude-code-default template: the image+env said e.g.
|
||||
// google-adk but the seeded config said claude-code, so the agent booted
|
||||
// mislabeled and personaless yet looked 'online' and returned canned
|
||||
// non-answers. Refusing loudly turns that silent wrong-agent into a visible
|
||||
// WORKSPACE_PROVISION_FAILED the operator can act on.
|
||||
if abort := runtimeSeedMismatchAbort(payload.Runtime, templatePath, configFiles); abort != nil {
|
||||
log.Printf("Provisioner: ABORT workspace=%s — %s", workspaceID, abort.Msg)
|
||||
return nil, abort
|
||||
}
|
||||
|
||||
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, pluginsPath)
|
||||
cfg.ResetClaudeSession = resetClaudeSession
|
||||
|
||||
@@ -273,6 +292,76 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
}, nil
|
||||
}
|
||||
|
||||
// runtimeSeedMismatchAbort returns a non-nil abort when a workspace NAMED a
|
||||
// runtime but the config.yaml about to be seeded declares a *different*
|
||||
// top-level runtime — the fail-loud counterpart to selectImage's
|
||||
// ErrUnresolvableRuntime (issue #2027). It catches the silent
|
||||
// claude-code-default substitution that occurs when a runtime's workspace
|
||||
// template isn't cached at provision time (or sanitizeRuntime coerced an
|
||||
// unknown runtime to claude-code): both surface as a seeded config whose
|
||||
// runtime contradicts the requested one.
|
||||
//
|
||||
// Pure (modulo reading the template dir's config.yaml). An empty
|
||||
// requestedRuntime (unspecified / org-template default path) or an
|
||||
// indeterminate seeded runtime (e.g. CP mode with no local config bytes) is
|
||||
// allowed — we only fail on a concrete, contradictory signal, never on
|
||||
// absence of one.
|
||||
func runtimeSeedMismatchAbort(requestedRuntime, templatePath string, configFiles map[string][]byte) *provisionAbort {
|
||||
if requestedRuntime == "" {
|
||||
return nil
|
||||
}
|
||||
seeded := seededConfigRuntime(templatePath, configFiles)
|
||||
if seeded == "" || seeded == requestedRuntime {
|
||||
return nil
|
||||
}
|
||||
msg := fmt.Sprintf(
|
||||
"runtime seed mismatch: workspace requested runtime %q but the seeded config.yaml declares %q — the %q workspace template was not available at provision time (silent %q fallback). Refusing to launch a mislabeled agent; refresh the template cache (POST /admin/templates/refresh) and re-provision.",
|
||||
requestedRuntime, seeded, requestedRuntime, seeded,
|
||||
)
|
||||
return &provisionAbort{
|
||||
Msg: msg,
|
||||
Extra: map[string]interface{}{
|
||||
"error": msg,
|
||||
"requested_runtime": requestedRuntime,
|
||||
"seeded_runtime": seeded,
|
||||
"issue": "2027",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// seededConfigRuntime extracts the top-level `runtime:` from the config.yaml
|
||||
// that will be seeded into the workspace — preferring the in-memory
|
||||
// configFiles, falling back to the template directory on disk. Returns ""
|
||||
// when no config.yaml is available or it declares no top-level runtime.
|
||||
func seededConfigRuntime(templatePath string, configFiles map[string][]byte) string {
|
||||
if data, ok := configFiles["config.yaml"]; ok {
|
||||
return parseTopLevelRuntime(data)
|
||||
}
|
||||
if templatePath != "" {
|
||||
if data, err := os.ReadFile(filepath.Join(templatePath, "config.yaml")); err == nil {
|
||||
return parseTopLevelRuntime(data)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// parseTopLevelRuntime returns the value of the top-level `runtime:` key in a
|
||||
// config.yaml, ignoring the nested `runtime_config:` block. A small dedicated
|
||||
// line scanner (mirrors the one the Create handler uses to read a template's
|
||||
// runtime) so the provision-time guard needs no YAML dependency.
|
||||
func parseTopLevelRuntime(data []byte) string {
|
||||
for _, raw := range strings.Split(string(data), "\n") {
|
||||
trimmed := strings.TrimLeft(raw, " \t")
|
||||
if len(raw) > len(trimmed) {
|
||||
continue // indented — inside a nested block (e.g. runtime_config:)
|
||||
}
|
||||
if strings.HasPrefix(trimmed, "runtime:") && !strings.HasPrefix(trimmed, "runtime_config") {
|
||||
return strings.Trim(strings.TrimSpace(strings.TrimPrefix(trimmed, "runtime:")), `"'`)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// mintWorkspaceSecrets issues + persists the workspace auth token
|
||||
// AND the platform→workspace inbound secret (#2312). Both modes MUST
|
||||
// call this — Docker mints + writes to local config volume; SaaS
|
||||
|
||||
@@ -16,7 +16,7 @@ const SchemaVersion = 1
|
||||
// Fingerprint is a stable content hash of the generated projection (schema
|
||||
// version + provider catalog + runtime native sets). It changes iff the
|
||||
// registry DATA changes (comment-only YAML edits do not churn it).
|
||||
const Fingerprint = "1100bc3e42a5f425"
|
||||
const Fingerprint = "8f733b112695b926"
|
||||
|
||||
// GenProvider is the generated projection of one provider catalog entry —
|
||||
// the subset a downstream consumer needs to derive + display a provider.
|
||||
@@ -80,8 +80,8 @@ var Runtimes = map[string][]GenRuntimeRef{
|
||||
{Name: "anthropic-oauth", Models: []string{"sonnet", "opus", "haiku", "anthropic:sonnet", "anthropic:opus", "anthropic:haiku"}},
|
||||
{Name: "anthropic-api", Models: []string{"claude-sonnet-4-6", "claude-opus-4-7", "claude-haiku-4-5", "claude-sonnet-4-5", "anthropic:claude-sonnet-4-6", "anthropic:claude-opus-4-7", "anthropic:claude-haiku-4-5", "anthropic:claude-sonnet-4-5"}},
|
||||
{Name: "kimi-coding", Models: []string{"kimi-for-coding", "kimi-k2.5", "kimi-k2", "moonshot:kimi-k2.6", "moonshot:kimi-k2.5"}},
|
||||
{Name: "minimax", Models: []string{"MiniMax-M2", "MiniMax-M2.7", "MiniMax-M2.7-highspeed", "minimax:MiniMax-M2", "minimax:MiniMax-M2.7", "minimax:MiniMax-M2.7-highspeed"}},
|
||||
{Name: "platform", Models: []string{"anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.6", "moonshot/kimi-k2.5", "minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed"}},
|
||||
{Name: "minimax", Models: []string{"MiniMax-M2", "MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M3", "minimax:MiniMax-M2", "minimax:MiniMax-M2.7", "minimax:MiniMax-M2.7-highspeed", "minimax:MiniMax-M3"}},
|
||||
{Name: "platform", Models: []string{"anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.6", "moonshot/kimi-k2.5", "minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed", "minimax/MiniMax-M3"}},
|
||||
},
|
||||
"codex": {
|
||||
{Name: "openai-subscription", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
|
||||
|
||||
@@ -348,8 +348,8 @@ providers:
|
||||
vendor_logo: "moonshot"
|
||||
protocol: anthropic
|
||||
auth_mode: third_party_anthropic_compat
|
||||
base_url_template: "https://api.kimi.com/coding/"
|
||||
base_url_anthropic: "https://api.kimi.com/coding/"
|
||||
base_url_template: "https://api.kimi.com/coding/v1"
|
||||
base_url_anthropic: "https://api.kimi.com/coding/v1"
|
||||
auth_env: [KIMI_API_KEY, ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN]
|
||||
# x-api-key header (NOT bearer) per kimi.com's Claude Code integration doc.
|
||||
auth_token_env: ANTHROPIC_API_KEY
|
||||
@@ -403,7 +403,7 @@ providers:
|
||||
vendor_logo: "google"
|
||||
protocol: openai
|
||||
auth_mode: third_party_anthropic_compat
|
||||
base_url_template: null
|
||||
base_url_template: "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
base_url_anthropic: null
|
||||
auth_env: [GEMINI_API_KEY, GOOGLE_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
@@ -733,9 +733,11 @@ runtimes:
|
||||
- MiniMax-M2
|
||||
- MiniMax-M2.7
|
||||
- MiniMax-M2.7-highspeed
|
||||
- MiniMax-M3
|
||||
- minimax:MiniMax-M2
|
||||
- minimax:MiniMax-M2.7
|
||||
- minimax:MiniMax-M2.7-highspeed
|
||||
- minimax:MiniMax-M3
|
||||
# Platform-managed (no tenant key; Molecule owns billing). The
|
||||
# vendor/model-namespaced ids the proxy resolves to the upstream vendor.
|
||||
# Canonical for the template's `provider: platform` model entries — the
|
||||
@@ -749,6 +751,7 @@ runtimes:
|
||||
- moonshot/kimi-k2.5
|
||||
- minimax/MiniMax-M2.7
|
||||
- minimax/MiniMax-M2.7-highspeed
|
||||
- minimax/MiniMax-M3
|
||||
|
||||
# hermes: native Kimi only (kimi-coding gateway). hermes-agent owns its own
|
||||
# broad provider matrix, but the CTO native matrix for the Molecule
|
||||
|
||||
@@ -103,10 +103,10 @@ func TestModelsForRuntime_ExactModelIDs(t *testing.T) {
|
||||
// kimi via platform proxy
|
||||
"moonshot/kimi-k2.6", "moonshot/kimi-k2.5",
|
||||
// minimax BYOK (bare + legacy colon-namespaced)
|
||||
"MiniMax-M2", "MiniMax-M2.7", "MiniMax-M2.7-highspeed",
|
||||
"minimax:MiniMax-M2", "minimax:MiniMax-M2.7", "minimax:MiniMax-M2.7-highspeed",
|
||||
"MiniMax-M2", "MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M3",
|
||||
"minimax:MiniMax-M2", "minimax:MiniMax-M2.7", "minimax:MiniMax-M2.7-highspeed", "minimax:MiniMax-M3",
|
||||
// minimax via platform proxy
|
||||
"minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed",
|
||||
"minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed", "minimax/MiniMax-M3",
|
||||
},
|
||||
// hermes: kimi (BYOK gateway) + platform-managed kimi.
|
||||
"hermes": {
|
||||
|
||||
@@ -29,7 +29,7 @@ import (
|
||||
// canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
|
||||
// synced from molecule-controlplane. Bumped deliberately on each re-sync (see
|
||||
// file doc). Cross-checked live by the sync-providers-yaml CI workflow.
|
||||
const canonicalProvidersYAMLSHA256 = "dedbb8ccb00155202a54160b38574f9e6688587509e79ffc676e8e2667f76187"
|
||||
const canonicalProvidersYAMLSHA256 = "dec73199e26cee2d395a0acece99771618d3879dc5ca724ba57cb5b38079c6ce"
|
||||
|
||||
func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
|
||||
sum := sha256.Sum256(embeddedYAML)
|
||||
|
||||
Reference in New Issue
Block a user