Compare commits

..

1 Commits

Author SHA1 Message Date
Molecule AI Dev Engineer A (Kimi) f12c38b3f6 chore(dead-code): remove unused QueueDepth function
QueueDepth was added for Phase 2/3 busy-return response visibility but
was never wired to a caller. The inline depth query in EnqueueA2A serves
today's enqueue response, making this function dead code.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 07:23:28 +00:00
266 changed files with 3318 additions and 25950 deletions
+5 -14
View File
@@ -19,22 +19,13 @@ REDIS_URL=redis://localhost:6379
# itself to 3000 in canvas/package.json, so sourcing this file before
# `npm run dev` won't accidentally make Next.js try to bind 8080.
PORT=8080
# ---- Admin credential — REQUIRED in EVERY environment (auth is fail-closed) ----
# Auth is fail-CLOSED everywhere now (harden/no-fail-open-auth): there is NO
# dev-mode escape hatch. AdminAuth / WorkspaceAuth / discovery all require a
# real credential. The canvas authenticates by sending this value as a bearer
# (it reads NEXT_PUBLIC_ADMIN_TOKEN — set it to the SAME value).
# ---- Admin credential — REQUIRED to close issue #684 (AdminAuth bearer bypass) ----
# When ADMIN_TOKEN is set, only this value is accepted on /admin/* and /approvals/* routes.
# (When unset, a fresh install 401s on admin routes and any valid workspace bearer
# is the only deprecated fallback once tokens exist — set ADMIN_TOKEN to close #684.)
# Generate: openssl rand -base64 32 (scripts/dev-start.sh provisions a fixed dev value)
# Without it, any valid workspace bearer token can call admin endpoints (backward compat
# fallback, still vulnerable). Set this in every environment, rotate when compromised.
# Generate: openssl rand -base64 32
# Store in fly secrets / deployment env — NEVER commit the actual value here.
ADMIN_TOKEN=
# NEXT_PUBLIC_ADMIN_TOKEN= # Canvas-side mirror of ADMIN_TOKEN. The canvas
# bakes this into its bundle and sends it as the
# bearer. MUST equal ADMIN_TOKEN (next.config.ts
# warns if the pair is half-set). dev-start.sh
# exports it for you.
SECRETS_ENCRYPTION_KEY= # 32-byte key (raw or base64). Leave empty for plaintext (dev only).
CONFIGS_DIR= # Path to workspace-configs-templates/ (auto-discovered if empty)
PLUGINS_DIR= # Path to plugins/ directory (default: /plugins in container)
@@ -43,7 +34,7 @@ PLUGINS_DIR= # Path to plugins/ directory (default: /plugins i
# MOLECULE_MCP_ALLOW_SEND_MESSAGE= # Set to "true" to include send_message_to_user in the MCP bridge tool list (issue #810). Excluded by default to prevent unintended WebSocket pushes from CLI sessions.
# MOLECULE_MCP_URL=http://localhost:8080 # Platform URL for opencode MCP config (opencode.json). Same as PLATFORM_URL; separate var so opencode configs can reference it without ambiguity.
# WORKSPACE_DIR= # Optional global host path bind-mounted to /workspace in every container. Per-workspace workspace_dir column overrides this; if neither is set each workspace gets an isolated Docker named volume.
MOLECULE_ENV=development # Environment label (development/staging/production). Used for log tagging and for NON-security local-dev conveniences (loopback HTTP bind, relaxed rate-limit bucket). It is NOT an auth lever — auth is fail-closed in every environment. SaaS deployments MUST set MOLECULE_ENV=production.
MOLECULE_ENV=development # Environment label (development/staging/production). Used for log tagging and for the AdminAuth dev-mode escape hatch (lets the Canvas dashboard keep working after the first workspace is created, when ADMIN_TOKEN is unset). SaaS deployments MUST set MOLECULE_ENV=production.
# MOLECULE_ENABLE_TEST_TOKENS= # Set to 1 to expose GET /admin/workspaces/:id/test-token (mints a fresh bearer token for E2E scripts). The route is auto-enabled when MOLECULE_ENV != production; this flag is the explicit override. Leave unset/0 in prod — the route 404s unless enabled.
# MOLECULE_ORG_ID= # SaaS only: org UUID set by control plane on tenant machines. When set, workspace provisioning auto-routes through the control plane API instead of Docker.
# CP_PROVISION_URL= # Override control plane URL for workspace provisioning (default: https://api.moleculesai.app). Only needed for testing against a non-production control plane.
+20 -61
View File
@@ -31,7 +31,7 @@
#
# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
# JSON variable is not set. Declared in the workflow YAML rather than
# fetched from /branch_protections (which needs admin scope —
# fetched from /branch_protections (which needs admin scope — sop-tier-bot
# has read-only). Trade dynamism for simplicity: when the required-check
# set changes, update both branch protection AND this env. Keeping them
# in sync is less complexity than granting the audit bot admin perms on
@@ -54,57 +54,32 @@ API="https://${GITEA_HOST}/api/v1"
AUTH="Authorization: token ${GITEA_TOKEN}"
# 1. Fetch the PR. If not merged, no-op.
# Fail-closed: verify HTTP 200 before parsing. A 401/403/404 means the token
# is invalid or the PR is inaccessible — we must NOT silently treat that as
# "not merged" and skip the audit.
PR_TMP=$(mktemp)
PR_HTTP=$(curl -sS -o "$PR_TMP" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
PR=$(cat "$PR_TMP")
rm -f "$PR_TMP"
if [ "$PR_HTTP" != "200" ]; then
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${PR_HTTP} — cannot evaluate merge state."
exit 1
fi
# FAIL-CLOSED: a 200 response with a missing/malformed `merged` field must
# NOT be treated as "not merged" (that would silently skip the audit).
# We verify both presence AND correct type for every field we consume.
PR_SCHEMA_OK=$(echo "$PR" | jq -r '
(.merged | type == "boolean") and
(.merge_commit_sha | type == "string") and
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
(.base | type == "object") and (.base.ref | type == "string") and
(.head | type == "object") and (.head.sha | type == "string")
')
if [ "$PR_SCHEMA_OK" != "true" ]; then
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP 200 but one or more required fields are missing, null, or of wrong type — cannot evaluate force-merge."
exit 1
fi
MERGED=$(echo "$PR" | jq -r '.merged')
PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
MERGED=$(echo "$PR" | jq -r '.merged // false')
if [ "$MERGED" != "true" ]; then
echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
exit 0
fi
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha')
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login')
# NOTE: no || true — with set -euo pipefail, jq parse failures (e.g. field
# missing from API response) propagate as hard errors. Use jq's // operator
# for graceful defaults instead of bash || true guards. This was re-added by
# 8c343e3a ("fix(gitea): add || true guards to jq pipelines") — reverted
# here because the guards mask silent failures that hide malformed API responses.
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
TITLE=$(echo "$PR" | jq -r '.title // ""')
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref')
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha')
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
if [ -z "$MERGE_SHA" ]; then
echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
exit 0
fi
# 2. Required status checks — branch-aware JSON dict takes precedence.
if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
# FAIL-CLOSED: if REQUIRED_CHECKS_JSON is set, the branch entry must exist
# and be an array. A missing branch or non-array value means the config is
# malformed or drifted — we must NOT silently treat it as "no checks".
_RC_JSON_OK=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '
has($branch) and (.[$branch] | type == "array")
')
if [ "$_RC_JSON_OK" != "true" ]; then
echo "::error::REQUIRED_CHECKS_JSON missing or non-array entry for branch '$BASE_BRANCH' — cannot evaluate required checks."
exit 1
fi
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] | .[]')
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
else
REQUIRED="$REQUIRED_CHECKS"
fi
@@ -116,28 +91,12 @@ fi
# 3. Status-check state at the PR HEAD (where checks ran). The merge
# commit doesn't get its own checks; we evaluate the PR's last
# commit, which is what branch protection compared against.
# Fail-closed: verify HTTP 200. A 401/403/404 means the status is
# unreadable — we must NOT treat that as "no statuses" and skip checks.
STATUS_TMP=$(mktemp)
STATUS_HTTP=$(curl -sS -o "$STATUS_TMP" -w '%{http_code}' -H "$AUTH" \
STATUS=$(curl -sS -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
STATUS=$(cat "$STATUS_TMP")
rm -f "$STATUS_TMP"
if [ "$STATUS_HTTP" != "200" ]; then
echo "::error::GET /commits/${HEAD_SHA}/status returned HTTP ${STATUS_HTTP} — cannot evaluate required checks."
exit 1
fi
# FAIL-CLOSED: a 200 status response missing the 'statuses' array, or with
# 'statuses' set to a non-array type (null/string/object), must NOT be treated
# as "no checks" — that would silently declare all checks green.
if ! echo "$STATUS" | jq -e '(.statuses | type) == "array"' >/dev/null; then
echo "::error::GET /commits/${HEAD_SHA}/status returned HTTP 200 but 'statuses' is missing or not an array — cannot evaluate required checks."
exit 1
fi
declare -A CHECK_STATE
while IFS=$'\t' read -r ctx state; do
[ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
done < <(echo "$STATUS" | jq -r '.statuses | .[] | "\(.context)\t\(.status)"')
done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
# 4. For each required check, was it green at merge? YAML block scalars
# (`|`) leave a trailing newline; skip blank/whitespace-only lines.
+64 -173
View File
@@ -8,8 +8,7 @@ pair diverges.
Sources:
A. `.gitea/workflows/ci.yml` jobs (CI source — the actual job set)
B. `status_check_contexts` in branch_protections (the merge gate)
C. `REQUIRED_CHECKS_JSON` (preferred) or `REQUIRED_CHECKS` (legacy)
env in audit-force-merge.yml (the audit env)
C. `REQUIRED_CHECKS` env in audit-force-merge.yml (the audit env)
Three failure classes:
F1 Job in (A) is not under the sentinel's `needs:` — sentinel
@@ -251,21 +250,13 @@ def sentinel_needs(ci_doc: dict) -> set[str]:
return set(needs)
def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
"""Pull the required-checks env value from audit-force-merge.yml.
def required_checks_env(audit_doc: dict) -> set[str]:
"""Pull the REQUIRED_CHECKS env value from audit-force-merge.yml.
Walks the YAML AST per `feedback_behavior_based_ast_gates`: we do
NOT grep for env keys — that breaks under reformatting,
NOT grep for `REQUIRED_CHECKS:` — that breaks under reformatting,
multi-job workflows, or a future move of the env to a different
step. Instead, look inside every job's every step's `env:` map.
Supports two variants:
- REQUIRED_CHECKS_JSON (preferred): JSON dict keyed by branch name.
We extract the array for the target branch.
- REQUIRED_CHECKS (legacy): newline-separated list of context names.
"""
found_json: list[str] = []
found_legacy: list[str] = []
step. Instead, look inside every job's every step's `env:` map."""
found: list[str] = []
jobs = audit_doc.get("jobs", {})
if not isinstance(jobs, dict):
sys.stderr.write(f"::warning::{AUDIT_WORKFLOW_PATH} has no jobs: mapping\n")
@@ -277,93 +268,27 @@ def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
if not isinstance(step, dict):
continue
step_env = step.get("env") or {}
if isinstance(step_env, dict):
if "REQUIRED_CHECKS_JSON" in step_env:
v = step_env["REQUIRED_CHECKS_JSON"]
if isinstance(v, str):
found_json.append(v)
if "REQUIRED_CHECKS" in step_env:
v = step_env["REQUIRED_CHECKS"]
if isinstance(v, str):
found_legacy.append(v)
# JSON variant takes precedence.
if found_json:
if len(found_json) > 1:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON env present in {len(found_json)} steps; ambiguous\n"
)
sys.exit(3)
try:
parsed = json.loads(found_json[0])
except json.JSONDecodeError as e:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON is not valid JSON: {e}\n"
)
sys.exit(3)
if not isinstance(parsed, dict):
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON parsed to {type(parsed).__name__}, expected dict\n"
)
sys.exit(3)
branch_checks = parsed.get(branch)
if branch_checks is None:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON has no entry for branch '{branch}'\n"
)
sys.exit(3)
if not isinstance(branch_checks, list):
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
)
sys.exit(3)
# Fail-closed validation: every entry must be a non-empty string.
# Reject null, int, dict, or empty/whitespace strings silently —
# they indicate a malformed manifest that drift-detect must not
# normalize away (that would hide config errors).
validated: set[str] = set()
for idx, item in enumerate(branch_checks):
if not isinstance(item, str):
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
f"{type(item).__name__} (value={item!r}), expected str\n"
)
sys.exit(3)
stripped = item.strip()
if not stripped:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
f"empty/whitespace string\n"
)
sys.exit(3)
if stripped in validated:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'] contains "
f"duplicate context '{stripped}' at index {idx}\n"
)
sys.exit(3)
validated.add(stripped)
return validated
# Legacy variant fallback.
if found_legacy:
if len(found_legacy) > 1:
# Defensive: refuse to guess which one is canonical.
sys.stderr.write(
f"::error::REQUIRED_CHECKS env present in {len(found_legacy)} steps; ambiguous\n"
)
sys.exit(3)
raw = found_legacy[0]
# YAML block-scalars (`|`) leave a trailing newline + blanks; trim
# consistently with audit-force-merge.sh's parser so both sides
# produce identical sets.
return {line.strip() for line in raw.splitlines() if line.strip()}
sys.stderr.write(
f"::error::Neither REQUIRED_CHECKS_JSON nor REQUIRED_CHECKS env found in any step of "
f"{AUDIT_WORKFLOW_PATH}\n"
)
sys.exit(3)
if isinstance(step_env, dict) and "REQUIRED_CHECKS" in step_env:
v = step_env["REQUIRED_CHECKS"]
if isinstance(v, str):
found.append(v)
if not found:
sys.stderr.write(
f"::error::REQUIRED_CHECKS env not found in any step of "
f"{AUDIT_WORKFLOW_PATH}\n"
)
sys.exit(3)
if len(found) > 1:
# Defensive: refuse to guess which one is canonical.
sys.stderr.write(
f"::error::REQUIRED_CHECKS env present in {len(found)} steps; ambiguous\n"
)
sys.exit(3)
raw = found[0]
# YAML block-scalars (`|`) leave a trailing newline + blanks; trim
# consistently with audit-force-merge.sh's parser so both sides
# produce identical sets.
return {line.strip() for line in raw.splitlines() if line.strip()}
# --------------------------------------------------------------------------
@@ -387,17 +312,15 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
"""Returns (findings, debug). Empty findings == no drift.
Raises:
ApiError: propagated (fail-closed) on a transient Gitea outage
(5xx) AND on a 401/403 auth failure from the protection
endpoint. A 401/403 means DRIFT_BOT_TOKEN cannot read
branch protections at all — drift is UNVERIFIABLE, so
this HARD gate must fail loud rather than green
undetected drift (the regression class it exists to
catch). An authenticated 404 (branch genuinely has no
protection, e.g. staging pre-rollout) is the one
tolerated skip: it returns ([], debug) with a loud
::warning:: and the workflow continues to the next
branch.
ApiError: propagated from the protection fetch only when the
failure is likely a transient Gitea outage (5xx).
403/404 from the protection endpoint is treated as
"cannot determine drift for this branch" — a token-
scope issue (missing repo-admin on DRIFT_BOT_TOKEN) or
a repo with no protection set should not turn the
hourly cron red. The workflow continues to the next
branch; no [ci-drift] issue is filed for a branch
whose protection cannot be read.
"""
findings: list[str] = []
@@ -407,7 +330,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
jobs = ci_job_names(ci_doc)
jobs_all = ci_jobs_all(ci_doc)
needs = sentinel_needs(ci_doc)
env_set = required_checks_env(audit_doc, branch)
env_set = required_checks_env(audit_doc)
# Protection
# api() raises ApiError on non-2xx. Transient 5xx should fail loud.
@@ -431,38 +354,17 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
m = _re.search(r"HTTP (\d{3})", msg)
if m:
http_status = int(m.group(1))
# FAIL-CLOSED contract (was fail-open: 403 AND 404 both returned
# [] with no signal — fixed). This is a HARD gate (no
# continue-on-error → false) running hourly on a PROTECTED context
# (schedule/dispatch on main). We split auth-failure from
# genuinely-absent:
# 401/403 → AUTH FAILURE: the token cannot read branch
# protections at all, so drift CANNOT be determined for ANY
# branch. Greening the hourly cron here means jobs↔protection
# drift goes silently undetected — exactly the regression class
# this sentinel exists to catch. Raise so the workflow fails
# loud / fails closed.
# 404 → authenticated absent resource: this specific branch has
# no protection (e.g. `staging` before its protection rollout).
# Genuinely nothing to diff against — skip THIS branch with a
# loud ::warning::, continue to the next.
if http_status in (401, 403):
if http_status in (403, 404):
# Token lacks scope OR branch has no protection. Cannot
# determine drift — skip this branch. Do NOT exit non-zero;
# the issue IS the alarm, not a red workflow.
sys.stderr.write(
f"::error::GET {protection_path} returned HTTP "
f"{http_status}DRIFT_BOT_TOKEN cannot read branch "
f"protections (needs repo-admin scope). AUTH FAILURE: "
f"drift CANNOT be determined, so this HARD gate FAILS "
f"CLOSED rather than greening undetected drift. Fix: grant "
f"repo-admin to mc-drift-bot (org team `drift-bot`, "
f"perm=admin) — fix the token, not the lint.\n"
)
raise
if http_status == 404:
sys.stderr.write(
f"::warning::GET {protection_path} returned HTTP 404 — "
f"branch '{branch}' has no protection configured "
f"(authenticated absent resource). Skipping drift check for "
f"{branch}; if it SHOULD be protected, configure it.\n"
f"::error::GET {protection_path} returned HTTP {http_status}"
f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 "
f"requires it for this endpoint) OR branch has no protection "
f"configured. Cannot determine drift for {branch}; "
f"skipping. Fix: grant repo-admin to mc-drift-bot or "
f"configure protection on {branch}.\n"
)
debug = {
"branch": branch,
@@ -473,7 +375,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
"audit_env_checks": sorted(env_set),
}
return [], debug
# 5xx / other — propagate (transient outage, fail loud per design).
# 5xx — propagate (transient outage, fail loud per design).
raise
if not isinstance(protection, dict):
sys.stderr.write(
@@ -578,34 +480,23 @@ def find_open_issue(title: str) -> dict | None:
hourly; failing one cycle loudly is strictly better than silently
duplicating.
Paginates through all open issues (limit=50 per page) until the
title is found or the result set is exhausted. Previously only one
page was fetched, causing duplicate [ci-drift] issues when the
existing tracking issue fell beyond page 1.
Gitea issue search returns at most page=50 per page; one page is
enough as long as `[ci-drift]` issues are a tiny minority. (See
follow-up issue for Link-header pagination.)
"""
page = 1
while True:
_, results = api(
"GET",
f"/repos/{OWNER}/{NAME}/issues",
query={
"state": "open",
"type": "issues",
"limit": "50",
"page": str(page),
},
_, results = api(
"GET",
f"/repos/{OWNER}/{NAME}/issues",
query={"state": "open", "type": "issues", "limit": "50"},
)
if not isinstance(results, list):
raise ApiError(
f"issue search returned non-list body (got {type(results).__name__})"
)
if not isinstance(results, list):
raise ApiError(
f"issue search returned non-list body (got {type(results).__name__})"
)
for issue in results:
if issue.get("title") == title:
return issue
# Fewer than limit results means last page reached.
if len(results) < 50:
return None
page += 1
for issue in results:
if issue.get("title") == title:
return issue
return None
def render_body(branch: str, findings: list[str], debug: dict) -> str:
@@ -633,7 +524,7 @@ def render_body(branch: str, findings: list[str], debug: dict) -> str:
"- **F2**: rename the protection context to match an emitter, "
"or remove it from `status_check_contexts` "
"(PATCH `/api/v1/repos/{owner}/{repo}/branch_protections/{branch}`).",
"- **F3a / F3b**: bring `REQUIRED_CHECKS_JSON` (or `REQUIRED_CHECKS` legacy) env in "
"- **F3a / F3b**: bring `REQUIRED_CHECKS` env in "
"`.gitea/workflows/audit-force-merge.yml` into set-equality with "
"`status_check_contexts` (single PR, both files).",
"",
-5
View File
@@ -26,10 +26,6 @@ PROFILES: dict[str, dict[str, str]] = {
"handlers": (
r"^workspace-server/internal/handlers/"
r"|^workspace-server/internal/wsauth/"
# #2149: the scheduler real-PG integration tests run in this same
# workflow (they reuse its migrated Postgres), so changes to the
# scheduler package must trigger the job too.
r"|^workspace-server/internal/scheduler/"
r"|^workspace-server/migrations/"
r"|^\.gitea/workflows/handlers-postgres-integration\.yml$"
),
@@ -178,4 +174,3 @@ def main(argv: list[str]) -> int:
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
File diff suppressed because it is too large Load Diff
+14 -40
View File
@@ -40,24 +40,20 @@ Context-format note (Gitea 1.22.6):
Exit codes:
0 — no required workflow has a paths/paths-ignore filter (clean) OR
branch_protections returned an authenticated 404 (branch
genuinely has no protection; ::warning:: surfaced).
branch_protections endpoint returned 403/404 (token-scope issue;
surfaced via ::error:: but non-fatal so a missing scope doesn't
red-X every PR — fix the token, not the lint).
1 — at least one required workflow has a paths/paths-ignore filter
(the gate-degrading defect class).
2 — env contract violation (missing GITEA_TOKEN/HOST/REPO/BRANCH).
3 — workflows directory missing or workflow YAML unparseable.
4 — FAIL-CLOSED verification failure: branch_protections 401/403
auth failure (token can't read BP), 5xx transient (propagated
ApiError), or unexpected response shape. This is a HARD gate on
a protected context — it MUST NOT green when it cannot verify.
4 — protection response shape unexpected (non-dict body on 2xx).
Auth note: `GET /repos/.../branch_protections/{branch}` requires
repo-admin role in Gitea 1.22.6. The workflow-default `GITHUB_TOKEN`
is non-admin; we re-use `DRIFT_BOT_TOKEN` (same persona that powers
ci-required-drift.yml). A 401/403 from a missing-scope token is an
AUTH FAILURE that FAILS CLOSED (exit 4) — fix the token, not the
lint. Only an authenticated 404 (genuinely-absent protection) is a
tolerated graceful skip.
ci-required-drift.yml). If `DRIFT_BOT_TOKEN` is unavailable in a future
context, the script falls through gracefully (exit 0 + ::error::).
"""
from __future__ import annotations
@@ -165,7 +161,7 @@ def api(
# Format: "<workflow_name> / <job_name_or_key> (<event>)"
# Examples observed on molecule-core/main:
# "Secret scan / Scan diff for credential-shaped strings (pull_request)"
# " / tier-check (pull_request)"
# "sop-tier-check / tier-check (pull_request)"
#
# Split strategy: peel off the trailing ` (<event>)` first, then split
# the leading `<workflow> / <rest>` on the FIRST ` / ` (workflow names
@@ -313,36 +309,14 @@ def run() -> int:
msg = str(e)
m = re.search(r"HTTP (\d{3})", msg)
http_status = int(m.group(1)) if m else None
# FAIL-CLOSED contract (was fail-open: 403 AND 404 both exit 0 —
# fixed). This is a HARD gate (no continue-on-error → false) on a
# PROTECTED context: pull_request (same-repo; fork PRs can't carry
# DRIFT_BOT_TOKEN) + workflow_dispatch. We split auth-failure from
# genuinely-absent:
# 401/403 → AUTH FAILURE: the token cannot read branch
# protections, so we CANNOT enumerate the required-check set
# and CANNOT verify the no-paths-filter invariant. Fail loud /
# fail closed (exit 4) — do NOT green an unverifiable gate.
# 404 → authenticated absent resource: branch genuinely has no
# protection. Nothing to enumerate; tolerated degradation,
# surfaced loudly (exit 0 with ::warning::).
if http_status in (401, 403):
if http_status in (403, 404):
sys.stderr.write(
f"::error::GET {protection_path} returned HTTP "
f"{http_status}DRIFT_BOT_TOKEN cannot read branch "
f"protections (needs repo-admin scope). AUTH FAILURE: "
f"cannot enumerate required checks, so this lint FAILS "
f"CLOSED rather than greening a gate it could not verify. "
f"Fix: grant repo-admin to mc-drift-bot (org team "
f"`drift-bot`, perm=admin) — fix the token, not the lint.\n"
)
return 4
if http_status == 404:
sys.stderr.write(
f"::warning::GET {protection_path} returned HTTP 404 — "
f"branch '{BRANCH}' has no protection configured "
f"(authenticated absent resource). No required contexts to "
f"check. If '{BRANCH}' SHOULD be protected, this is a real "
f"finding.\n"
f"::error::GET {protection_path} returned HTTP {http_status}"
f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 "
f"requires it for this endpoint) OR branch '{BRANCH}' has "
f"no protection configured. Cannot enumerate required "
f"checks; skipping lint with exit 0 to avoid red-X on "
f"every PR. Fix: grant repo-admin to mc-drift-bot.\n"
)
return 0
raise
+2 -2
View File
@@ -17,7 +17,7 @@ Rules (4 fatal + 1 fatal cross-file + 1 heuristic-warn):
enumeration; task #81). Workflow registers, fires for 0 events.
3. `name:` containing `/` — breaks the
`<workflow> / <job> (<event>)` commit-status context convention;
downstream parsers (sop-checklist, status-reaper) tokenize on `/`.
downstream parsers (sop-tier-check, status-reaper) tokenize on `/`.
4. `name:` collision across files — Gitea routes commit-status updates
by `name` and behavior on collision is undefined (status-reaper
rev1 fail-loud).
@@ -150,7 +150,7 @@ def check_name_with_slash(filename: str, doc: Any) -> list[str]:
f"::error file={filename}::Rule 3 (FATAL): workflow `name: "
f"{name!r}` contains `/`. The commit-status context convention "
f"is `<workflow> / <job> (<event>)`; embedding `/` in the "
f"workflow name makes downstream parsers (sop-checklist, "
f"workflow name makes downstream parsers (sop-tier-check, "
f"status-reaper) tokenize ambiguously. Rename to use `-` or "
f"` ` instead."
)
+19 -44
View File
@@ -36,8 +36,7 @@ Daily scheduled run + workflow_dispatch:
1. GET `branch_protections/{BRANCH}` (needs DRIFT_BOT_TOKEN with
repo-admin scope; same persona as ci-required-drift.yml).
FAIL CLOSED on 401/403 (auth failure → exit 2); a genuine
authenticated 404 (no protection) is a loud ::warning:: skip.
Graceful-degrade on 403/404 per Tier 2a contract.
2. Walk `.gitea/workflows/*.yml` via PyYAML AST. For each workflow,
enumerate its emitted contexts: `{workflow.name} / {job.name or
@@ -49,7 +48,8 @@ Daily scheduled run + workflow_dispatch:
4. If orphans exist:
- File or PATCH a `[ci-bp-drift]` issue (idempotency contract:
search for exact title prefix, edit existing if open).
- Apply label `ci-bp-drift` (lookup ID per repo).
- Apply labels `tier:high` + `ci-bp-drift` (lookup IDs per
repo; per `feedback_tier_label_ids_are_per_repo`).
- Exit 1.
5. If no orphans:
@@ -59,14 +59,10 @@ Daily scheduled run + workflow_dispatch:
Exit codes
----------
0 — clean, OR an authenticated 404 (branch genuinely has no
protection — surfaces ::warning::, not a fail-open).
0 — clean OR API 403/404 (graceful-degrade, surfaces ::error::).
1 — at least one BP context has no emitter.
2 — env contract violation, workflows-dir missing, YAML parse
error, OR a fail-closed verification failure: 401/403 auth
failure (token can't read BP) or transient/unexpected API
error. This is a HARD gate on a protected context (schedule/
dispatch on main) — it MUST NOT green when it cannot verify.
2 — env contract violation, workflows-dir missing, or YAML parse
error.
Env
---
@@ -81,7 +77,7 @@ Memory cross-links
------------------
- internal#350 (the RFC that specs this lint)
- feedback_phantom_required_check_after_gitea_migration
- feedback_label_ids_are_per_repo
- feedback_tier_label_ids_are_per_repo
- reference_post_suspension_pipeline
"""
from __future__ import annotations
@@ -358,7 +354,7 @@ def file_or_update_issue(
existing = h
break
label_ids = _ensure_labels(repo, ["ci-bp-drift"])
label_ids = _ensure_labels(repo, ["ci-bp-drift", "tier:high"])
if existing:
api(
@@ -398,49 +394,28 @@ def run() -> int:
return 2
# 1. Pull BP.
#
# FAIL-CLOSED contract (was fail-open with exit 0 — fixed). This lint
# is a HARD gate (continue-on-error: false) and only ever runs on a
# PROTECTED context: schedule + workflow_dispatch on `main`. There is
# NO fork/advisory split here — the DRIFT_BOT_TOKEN secret is always
# present and trusted, so an auth failure or transient error is a real
# inability-to-verify, not a legitimate degradation. We MUST fail loud
# (`::error::` + nonzero) rather than green a gate we could not check.
status, bp = api("GET", f"/repos/{repo}/branch_protections/{branch}")
if status == "forbidden":
sys.stderr.write(
f"::error::GET branch_protections/{branch} returned HTTP "
f"401/403 — DRIFT_BOT_TOKEN cannot read branch protections "
f"(needs repo-admin scope; Gitea requires it for this "
f"endpoint). This is an AUTH FAILURE, not an absent resource: "
f"the lint CANNOT verify the BP↔emitter invariant, so it FAILS "
f"CLOSED instead of greening a gate it could not check. Fix: "
f"grant repo-admin to mc-drift-bot (org team `drift-bot`, "
f"perm=admin) — fix the token, not the lint.\n"
f"::error::GET branch_protections/{branch} returned HTTP 403 — "
f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 requires "
f"it for this endpoint). Skipping lint with exit 0 to avoid "
f"red-X on every run. Fix: grant repo-admin to mc-drift-bot. "
f"Per Tier 2a contract.\n"
)
return 2
return 0
if status == "not_found":
# Genuine 404 WITH a valid token = branch has no protection
# configured. On `main` this is itself suspicious (main should
# always be protected) but it is a real, authenticated read of an
# absent resource — not an auth failure — so we surface it loudly
# but do not hard-fail on the genuinely-absent case.
print(
f"::warning::branch '{branch}' has no protection configured "
f"(authenticated 404); nothing to lint. If '{branch}' SHOULD be "
f"protected, this is a real finding — configure branch "
f"protection."
f"::notice::branch '{branch}' has no protection configured; "
f"nothing to lint."
)
return 0
if status != "ok" or not isinstance(bp, dict):
sys.stderr.write(
f"::error::branch_protections/{branch} read failed with "
f"status={status} (transient/unexpected). The lint CANNOT "
f"verify the BP↔emitter invariant on this run; FAILING CLOSED "
f"rather than greening unverified. Re-run; if it persists, "
f"investigate Gitea API health / token validity.\n"
f"::error::branch_protections/{branch} response unexpected; "
f"status={status}. Treating as transient; exit 0.\n"
)
return 2
return 0
bp_contexts: list[str] = list(bp.get("status_check_contexts") or [])
if not bp_contexts:
@@ -305,9 +305,9 @@ def validate_tracker(
if status == "error":
sys.stderr.write(
f"::error::issue {slug}#{num} fetch errored — treating as "
f"unverified, FAILING CLOSED (do not skip on outage).\n"
f"unverified, skipping this check.\n"
)
return (False, f"{slug}#{num} fetch erroredcannot verify tracker")
return (True, "fetch-error — skipped")
assert payload is not None
state = payload.get("state", "")
@@ -466,40 +466,12 @@ def fetch_log(target_url: str) -> str | None:
def grep_fail_markers(log_text: str) -> list[str]:
"""Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
Empty list = clean log.
Heuristic: skip lines where the marker appears inside script source
(e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
than actual execution output. The Gitea Actions log prints the raw
script before executing it; ``echo "::error::"`` lines in that
display are false positives.
"""
Empty list = clean log."""
matches: list[str] = []
in_run_group = False
group_depth = 0
for line in log_text.splitlines():
stripped = line.strip()
# Track Gitea Actions group markers so we can skip the
# ``::group::Run`` script-source display blocks.
if stripped.startswith("::group::Run"):
in_run_group = True
group_depth = 1
continue
if stripped == "::endgroup::":
if in_run_group:
in_run_group = False
group_depth = 0
continue
if in_run_group:
continue
for pat in FAIL_PATTERNS:
if pat in line:
# Additional false-positive guard: ``echo "::error::"``
# is script source, not a runtime error emission.
if pat == "::error::":
prefix = line[: line.index(pat)].strip()
if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
break
# Truncate to keep error output bounded.
matches.append(line.strip()[:240])
break
if len(matches) >= 5:
@@ -546,24 +518,16 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
shas = recent_commits_on_branch(branch, n)
if not shas:
result["masked_runs"].append({
"sha": "",
"status": "unverified",
"target_url": "",
"samples": [f"no recent commits on {branch} — cannot verify flip"],
})
result["warnings"].append(
f"no recent commits on {branch} (cannot verify flip)"
)
return result
for sha in shas:
try:
status_doc = combined_status(sha)
except ApiError as e:
result["masked_runs"].append({
"sha": sha,
"status": "error",
"target_url": "",
"samples": [f"combined-status API error: {e}"],
})
result["warnings"].append(f"combined-status for {sha}: {e}")
continue
statuses = status_doc.get("statuses") or []
# First entry matching the context name. Newest SHAs come
@@ -590,17 +554,6 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
"target_url": target_url,
"samples": ["[log unavailable; status itself is " + state + "]"],
})
elif state == "success":
# Fail-closed: unreadable log on a success status is a
# potential Quirk #10 mask (continue-on-error hiding real
# failures). We cannot verify it's clean, so treat as
# masked rather than allowing the flip.
result["masked_runs"].append({
"sha": sha,
"status": state,
"target_url": target_url,
"samples": ["[log unavailable; cannot verify status is genuine — treat as masked]"],
})
break
samples = grep_fail_markers(log_text)
if state in ("failure", "error"):
@@ -624,12 +577,10 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
break
if result["checked_commits"] == 0:
result["masked_runs"].append({
"sha": "",
"status": "unverified",
"target_url": "",
"samples": [f"no runs of {target_context!r} found in the last {n} commits on {branch} — cannot verify flip"],
})
result["warnings"].append(
f"no runs of {target_context!r} found in the last {n} commits on "
f"{branch} — cannot verify; allowing flip with warning"
)
return result
@@ -57,14 +57,10 @@ comment unrelated to the new job.
Exit codes
----------
0 — no new emissions, all new emissions have valid directives,
OR an authenticated 404 (branch genuinely has no protection
to verify against — surfaces ::warning::, not a fail-open).
or BP read errored (graceful-degrade per Tier 2a contract).
1 — at least one new emission lacks a directive, or has
`bp-required: yes` but the context is missing from BP.
2 — env contract violation, YAML parse error, OR a fail-closed
verification failure: 401/403 auth failure (token can't read
BP) or transient/unexpected API error. HARD gate on a
same-repo PR context — MUST NOT green when it cannot verify.
2 — env contract violation or YAML parse error.
Env
---
@@ -424,51 +420,33 @@ def run() -> int:
return 0
# Step 3 — fetch BP context list.
#
# FAIL-CLOSED contract (was fail-open with exit 0 — fixed). This is a
# HARD gate (continue-on-error: false) that runs on `pull_request`
# against `main`. On molecule-core, `pull_request` runs are same-repo
# (fork PRs cannot carry the DRIFT_BOT_TOKEN secret), so this is a
# PROTECTED/trusted context with no legitimate fork-degradation. An
# auth failure or transient error means we CANNOT verify a NEW
# bp-required emission is actually in BP — so we MUST fail loud rather
# than green the gate. (A genuinely-absent 404 read with a valid token
# is the one tolerated degradation: there is no BP to check against.)
status, bp = api("GET", f"/repos/{repo}/branch_protections/{branch}")
bp_contexts: set[str] = set()
if status == "forbidden":
sys.stderr.write(
f"::error::GET branch_protections/{branch} returned HTTP "
f"401/403 — DRIFT_BOT_TOKEN cannot read branch protections "
f"(needs repo-admin scope). This is an AUTH FAILURE: the lint "
f"CANNOT verify the bp-required directives on this PR, so it "
f"FAILS CLOSED instead of greening unverified. Fix: grant "
f"repo-admin to mc-drift-bot (org team `drift-bot`) — fix the "
f"token, not the lint.\n"
f"::error::GET branch_protections/{branch} returned HTTP 403 — "
f"DRIFT_BOT_TOKEN lacks repo-admin scope. Cannot verify "
f"bp-required directives; skipping lint with exit 0 per "
f"Tier 2a contract. Fix the token, not the lint.\n"
)
return 2
return 0
elif status == "not_found":
# Authenticated 404 — branch genuinely has no protection. There is
# nothing to verify a `bp-required: yes` directive against, so this
# is the one tolerated degradation. Surface loudly (on `main` a
# missing protection is itself a real finding) but do not hard-fail.
# Branch has no protection — nothing to verify against; the
# bp-required: yes directive can't be satisfied. Treat as
# graceful-skip rather than red-X.
print(
f"::warning::branch '{branch}' has no protection (authenticated "
f"404); cannot verify bp-required directives. If '{branch}' "
f"SHOULD be protected this is a real finding."
f"::notice::branch '{branch}' has no protection; cannot verify "
f"bp-required directives. Skipping (exit 0)."
)
return 0
elif status == "ok" and isinstance(bp, dict):
bp_contexts = set(bp.get("status_check_contexts") or [])
else:
sys.stderr.write(
f"::error::branch_protections/{branch} read failed with "
f"status={status} (transient/unexpected). CANNOT verify "
f"bp-required directives on this PR; FAILING CLOSED rather than "
f"greening unverified. Re-run; if persistent, check Gitea API "
f"health / token validity.\n"
f"::error::branch_protections/{branch} response unexpected; "
f"status={status}. Treating as transient; exit 0.\n"
)
return 2
return 0
# Step 4 — validate each new emission's directive.
violations: list[str] = []
+2 -2
View File
@@ -50,7 +50,7 @@ runtime contract enforcement lives in `_require_runtime_env()`.
Run locally (dry-run, no API mutation):
GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\
WATCH_BRANCH=main RED_LABEL=ci-bp-drift \\
WATCH_BRANCH=main RED_LABEL=tier:high \\
python3 .gitea/scripts/main-red-watchdog.py --dry-run
"""
from __future__ import annotations
@@ -81,7 +81,7 @@ GITEA_TOKEN = _env("GITEA_TOKEN")
GITEA_HOST = _env("GITEA_HOST")
REPO = _env("REPO")
WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
RED_LABEL = _env("RED_LABEL", default="ci-bp-drift")
RED_LABEL = _env("RED_LABEL", default="tier:high")
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
-83
View File
@@ -364,71 +364,6 @@ def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
return exc.code, None
def current_branch_head(env: dict[str, str]) -> str | None:
"""Return the SHA at the tip of the deploy branch (main) per Gitea, or None.
Used to detect a *superseded* deploy job (see `superseded_by`). Fail-safe:
any read error / missing token returns None so the caller treats the job as
NOT superseded and the strict /buildinfo verify still runs. We never let an
unreadable head silently green a deploy.
"""
token = env.get("GITEA_TOKEN", "").strip()
if not token:
return None
host = env.get("GITEA_HOST", "git.moleculesai.app")
repo = env.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
# Deploy lane is on: push:main; the branch is always main here, but read it
# from the ref name when present so a future branch rename doesn't break us.
branch = env.get("GITHUB_REF_NAME", "").strip() or "main"
url = f"https://{host}/api/v1/repos/{repo}/branches/{quote(branch, safe='')}"
status, body = _api_json_optional(url, token)
if status != 200 or not isinstance(body, dict):
return None
commit = body.get("commit")
if isinstance(commit, dict):
head = commit.get("id") or commit.get("sha")
if isinstance(head, str) and head.strip():
return head.strip()
return None
def superseded_by(env: dict[str, str]) -> str | None:
"""Return the newer head SHA if THIS deploy job has been superseded, else None.
This workflow runs with no `concurrency:` (intentional — Gitea 1.22.6 cancels
queued runs, which is unacceptable for a prod deploy). When two main pushes
land close together, BOTH deploy-production jobs run. The newer push rolls the
fleet forward first; the OLDER job's strict /buildinfo verify then sees tenants
on the NEWER SHA and false-reds with "$slug is stale" — even though the fleet
is AHEAD, not behind. Git SHAs aren't ordered, so the verify can't tell ahead
from behind on its own (and /buildinfo exposes only git_sha, no build time).
Resolve it at the source of truth for ordering — the branch ref: if main's
current head is a DIFFERENT SHA than the one this job is deploying, a newer
commit has landed and this job is superseded; the newest job's verify is the
authoritative one. We return that head SHA so the caller can log it and exit
success early, skipping the strict-equality verify for this stale job.
Fail-safe: returns None (NOT superseded) when the head can't be read or equals
our SHA, so a genuinely-behind tenant under the LATEST deploy job still fails
the strict verify loudly. This never suppresses a real-stale signal — it only
excuses a job that is no longer the latest from asserting exact equality.
"""
sha = env.get("GITHUB_SHA", "").strip()
if not sha:
return None
head = current_branch_head(env)
if not head:
return None
# SHA lengths can differ (short vs full); compare on the shorter prefix.
n = min(len(head), len(sha))
if head[:n].lower() == sha[:n].lower():
return None
return head
def live_disable_flag(env: dict[str, str]) -> str:
"""Return a live disable value from Gitea variables when readable.
@@ -507,14 +442,6 @@ def main() -> int:
sub.add_parser("plan", help="print production deploy plan as JSON")
sub.add_parser("assert-enabled", help="fail if production deploy is currently disabled")
sub.add_parser("wait-ci", help="block until required CI context is green")
sub.add_parser(
"check-superseded",
help=(
"exit 0 if a newer commit has landed on the deploy branch (this job "
"is superseded; prints the newer head SHA), exit 10 if this job is "
"still the latest"
),
)
rollout_parser = sub.add_parser("rollout", help="execute canary-first scoped production rollout")
rollout_parser.add_argument("--plan", required=True, help="path to prod-auto-deploy plan JSON")
rollout_parser.add_argument("--response", required=True, help="path to write aggregate response JSON")
@@ -530,16 +457,6 @@ def main() -> int:
if args.command == "wait-ci":
wait_for_ci_context(dict(os.environ))
return 0
if args.command == "check-superseded":
newer = superseded_by(dict(os.environ))
if newer:
print(newer)
return 0
# Exit 10 (not 0, not 1): "this job is still the latest". The
# workflow treats only exit 0 as superseded; 10 means proceed to
# the strict verify. A non-zero code here is informational, not a
# failure — the workflow step swallows it.
return 10
if args.command == "rollout":
rollout_from_plan_file(args.plan, args.response, dict(os.environ))
return 0
+53 -14
View File
@@ -197,15 +197,19 @@ if [ "$HTTP_CODE" != "200" ]; then
exit 1
fi
# Filter: state=APPROVED, official=true, not-dismissed, non-author,
# commit_id matches current PR head. All conditions are mandatory.
# Filter: state=APPROVED, not-dismissed, non-author. Optionally strict-mode
# adds commit_id==head.sha (off by default; see header).
JQ_FILTER='.[]
| select(.state == "APPROVED")
| select(.official == true)
| select(.dismissed != true)
| select(.user.login != $author)
| select(.commit_id == $head)
| .user.login'
| select(.official != false)
| select(.user.login != $author)'
if [ "${REVIEW_CHECK_STRICT:-}" = "1" ]; then
JQ_FILTER="${JQ_FILTER}
| select(.commit_id == \$head)"
fi
JQ_FILTER="${JQ_FILTER}
| .user.login"
REVIEW_CANDIDATES=$(jq -r --arg author "$PR_AUTHOR" --arg head "$PR_HEAD_SHA" "$JQ_FILTER" "$REVIEWS_JSON" | sort -u)
debug "candidate non-author approvers: $(echo "$REVIEW_CANDIDATES" | tr '\n' ' ')"
@@ -237,14 +241,49 @@ if [ -z "$REVIEW_CANDIDATES" ]; then
fi
# --- COMMENT APPROVAL REMOVED (security hardening) ---
# Previous versions accepted issue comments containing generic approval
# keywords (APPROVED/LGTM/ACCEPTED) or agent prefixes ([core-qa-agent],
# [core-security-agent]) as satisfying the gate. Both paths are bypasses:
# a comment lacks the audit trail, dismissal, stale-review invalidation,
# and commit_id binding that an official Gitea review provides.
# Only APPROVED reviews from the Gitea reviews API count.
CANDIDATES="$REVIEW_CANDIDATES"
# --- Fallback/extension (internal#348): check issue comments for agent-approval ---
# core-qa-agent and core-security-agent can approve via issue comments. Always
# include comment candidates, even if the reviews API returned approvals for a
# different team; team membership below is the authoritative filter.
COMMENT_CANDIDATES=""
AGENT_PATTERN=""
case "$TEAM" in
qa) AGENT_PATTERN="\\[core-qa-agent\\]" ;;
security) AGENT_PATTERN="\\[core-security-agent\\]" ;;
esac
HTTP_CODE=$(curl -sS -o "$COMMENTS_JSON" -w '%{http_code}' \
-K "$CURL_AUTH_FILE" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/comments")
debug "GET /issues/${PR_NUMBER}/comments → HTTP ${HTTP_CODE}"
if [ "$HTTP_CODE" = "200" ]; then
# JQ expression: select non-author comments that match either the
# agent-prefix pattern (case-insensitive) OR a generic approval keyword.
JQ_APPROVALS='
.[] |
select(.user.login != $author) |
. as $cmt |
if ($agent_pattern | length) > 0 and ($cmt.body // "" | test($agent_pattern; "i")) then
$cmt.user.login
elif ($cmt.body // "" | test("\\b(APPROVED|LGTM|ACCEPTED)\\b"; "i")) then
$cmt.user.login
else
empty
end
'
COMMENT_CANDIDATES=$(jq -r \
--arg author "$PR_AUTHOR" \
--arg agent_pattern "$AGENT_PATTERN" \
"$JQ_APPROVALS" \
"$COMMENTS_JSON" 2>/dev/null | sort -u)
debug "comment-based approval candidates: $(echo "$COMMENT_CANDIDATES" | tr '\n' ' ')"
if [ -n "$COMMENT_CANDIDATES" ]; then
echo "::notice::${TEAM}-review: found $(echo "$COMMENT_CANDIDATES" | wc -w | xargs) comment-based approval candidate(s) — verifying team membership..."
fi
else
debug "could not fetch issue comments (HTTP ${HTTP_CODE})"
fi
CANDIDATES=$(printf '%s\n%s\n' "$REVIEW_CANDIDATES" "$COMMENT_CANDIDATES" | sed '/^$/d' | sort -u)
if [ -z "${CANDIDATES:-}" ]; then
echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (no candidates from reviews API or issue comments)"
+40 -32
View File
@@ -11,7 +11,7 @@
#
# Flow:
# 1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, labels
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, tier label
# 3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
# 4. For each checklist item:
# a. Is the section marker present in PR body? (author answered)
@@ -174,16 +174,6 @@ def parse_directives(
if not parts:
continue
first = parts[0]
# Em-dash (U+2014) is a common visual separator in user-written
# notes, e.g. /sop-ack Five-Axis — five-axis-review
# If raw_slug contains an em-dash, split on the first one so
# the part before becomes the slug and the rest becomes the note.
note_from_slug = ""
slug_source = raw_slug
emdash_idx = raw_slug.find("")
if emdash_idx != -1:
slug_source = raw_slug[:emdash_idx].strip()
note_from_slug = raw_slug[emdash_idx + 1 :].strip()
# If the slug-capture greedily matched multiple words (e.g.
# "comprehensive testing"), preserve normalize behavior: join
# the WHOLE first-word-token only; trailing words get appended to
@@ -196,19 +186,13 @@ def parse_directives(
# as slug and "testing extra-note" as note. We defer the
# disambiguation to the caller via the returned canonical
# slug. For simplicity: try the WHOLE captured string first.
canonical = normalize_slug(slug_source, numeric_aliases)
canonical = normalize_slug(raw_slug, numeric_aliases)
else:
canonical = normalize_slug(slug_source, numeric_aliases)
canonical = normalize_slug(first, numeric_aliases)
note_from_group = (m.group(3) or "").strip()
# The em-dash (U+2014) is a visual separator; the regex puts it
# in group(3) because it is outside the slug character class.
# Strip it so "/sop-ack slug — note" yields just "note".
if note_from_group.startswith(""):
note_from_group = note_from_group[1:].strip()
# Combine note_from_slug (em-dash split) with note_from_group
# (trailing text after the slug captured by the regex group).
combined_note = (note_from_slug + " " + note_from_group).strip()
entry = (kind, canonical, combined_note)
# If we collapsed multi-word slug into kebab and there's a
# trailing-text group too, append it.
entry = (kind, canonical, note_from_group)
if kind == "sop-n/a":
na_directives.append(entry)
else:
@@ -665,8 +649,8 @@ def load_config(path: str) -> dict[str, Any]:
def _load_config_minimal(path: str) -> dict[str, Any]:
"""Minimal YAML subset parser for our config shape.
Supports: top-level scalar:value, top-level map-of-map,
top-level list of maps (items:), and within an
Supports: top-level scalar:value, top-level map-of-map (e.g.
tier_failure_mode), top-level list of maps (items:), and within an
item map: scalars + lists of scalars. Does NOT support nested lists,
YAML anchors, multi-doc, or flow style.
"""
@@ -835,7 +819,8 @@ def render_status(
state is "success" if every item has at least one valid ack
(body section presence is informational only — peer-ack is the
real gate).
real gate). tier:low PRs receive state="success" (soft-fail — no
acks required); the description carries "[info tier:low]" prefix.
"""
n = len(items)
fully_acked = [
@@ -862,16 +847,35 @@ def render_status(
return state, "".join(desc_parts)
def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
"""Read tier label, return 'hard' or 'soft' per cfg.tier_failure_mode."""
labels = pr.get("labels") or []
tier_labels = [label.get("name", "") for label in labels if (label.get("name", "") or "").startswith("tier:")]
mode_map = cfg.get("tier_failure_mode") or {}
default_mode = cfg.get("default_mode", "hard")
for tl in tier_labels:
if tl in mode_map:
return mode_map[tl]
return default_mode
def is_high_risk(pr: dict[str, Any], cfg: dict[str, Any]) -> bool:
"""Return True when the PR is high-risk per RFC#450 Option C.
A PR is high-risk when it carries any label listed in cfg.high_risk_labels.
A PR is high-risk when ANY of:
- it carries the `tier:high` label (mechanically strictest tier), or
- it carries any label listed in cfg.high_risk_labels.
High-risk PRs use `required_teams_high_risk` (when set on an item)
instead of the default `required_teams`. Items without
`required_teams_high_risk` are unaffected (the default applies).
Governance fix for internal#442 — closes the inconsistency between
sop-tier-check (tier-aware) and sop-checklist (was tier-blind).
"""
label_set = {(label.get("name") or "") for label in (pr.get("labels") or [])}
if "tier:high" in label_set:
return True
high_risk_labels = set(cfg.get("high_risk_labels") or [])
return bool(label_set & high_risk_labels)
@@ -1149,6 +1153,13 @@ def main(argv: list[str] | None = None) -> int:
body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}
state, description = render_status(items, ack_state, body_state)
mode = get_tier_mode(pr, cfg)
if mode == "soft":
# tier:low: acks are informational only — post success so BP gate passes.
# Description carries "[info tier:low]" prefix so reviewers know acks
# were not required (vs a tier:medium+ PR that truly passed all acks).
state = "success"
description = f"[info tier:low] {description}"
if volume_skipped:
# Above the comment-cap — we may have a partial view. Soft-pend
# so neither BP nor the author gets stuck; surface the cap so
@@ -1162,7 +1173,7 @@ def main(argv: list[str] | None = None) -> int:
# Diagnostics to job log.
print(
f"::notice::PR #{args.pr} author={author} head={head_sha[:7]} "
f"risk_class={'high' if high_risk else 'default'}"
f"mode={mode} risk_class={'high' if high_risk else 'default'}"
)
for it in items:
slug = it["slug"]
@@ -1217,13 +1228,10 @@ def main(argv: list[str] | None = None) -> int:
)
na_desc = ", ".join(sorted(na_descs)) if na_descs else "(none)"
# internal#818: na-declarations is an informational context, not a merge
# gate. An empty declaration list is a terminal success state — pending
# here poisons the PR combined status.
na_status_state = "success"
na_status_state = "success" if na_descs else "pending"
# review-check.sh reads the description to discover which gates are N/A.
# Include the gate names so it can grep for them.
na_description = f"N/A: {na_desc}"
na_description = f"N/A: {na_desc}" if na_descs else "N/A: (none)"
if not args.dry_run:
client.post_status(
+411
View File
@@ -0,0 +1,411 @@
#!/usr/bin/env bash
# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
#
# Reads the PR's tier label, walks approving reviewers, and checks team
# membership against the tier's approval expression. Passes only when
# ALL clauses in the expression are satisfied by the set of approving
# reviewers (AND-composition; internal#189).
#
# Expression syntax:
# "team-a" — OR-set: any ONE of the comma-separated teams
# "team-a AND team-b" — AND: BOTH must each have ≥1 approver
# "(a,b,c)" — OR-set wrapped in parens; same as "a,b,c"
#
# Example: "qa AND security AND (managers,ceo)" means:
# ≥1 approver in team "qa" AND
# ≥1 approver in team "security" AND
# ≥1 approver in team "managers" OR "ceo"
#
# Per the spec (internal#189), the hard gate here pairs with the
# advisory gate of sop-conformance LLM-judge (internal#188): each
# required-team click must reflect real verification (visible in review
# body or A2A messages), not rubber-stamp APPROVE. Both gates together
# close the "teammate clicks APPROVE without verifying" gap.
#
# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
# the env vars below; this script does no IO outside of stdout/stderr +
# the Gitea API.
#
# Required env:
# GITEA_TOKEN — bot PAT with read:organization,read:user,
# read:issue,read:repository scopes
# GITEA_HOST — e.g. git.moleculesai.app
# REPO — owner/name (from github.repository)
# PR_NUMBER — int (from github.event.pull_request.number)
# PR_AUTHOR — login (from github.event.pull_request.user.login)
#
# Optional:
# SOP_DEBUG=1 — print per-API-call diagnostic lines. Default: off.
# SOP_LEGACY_CHECK=1 — revert to OR-gate (≥1 approver from any eligible
# team). Grace window for PRs in-flight when the
# new AND-composition was deployed. Expires 2026-05-17
# (7-day burn-in window; internal#189 Phase 1).
# Set by workflow for PRs merged before the deploy.
set -euo pipefail
# Ensure jq is available. Runners may not have it pre-installed, and the
# workflow-level jq install can fail on runners with network restrictions
# (GitHub releases not reachable from some runner networks — infra#241
# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
# SOP_FAIL_OPEN=1 makes this always exit 0 so CI never blocks on jq absence.
if ! command -v jq >/dev/null 2>&1; then
echo "::notice::jq not found on PATH — attempting install..."
_jq_installed="no"
# apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
echo "::notice::jq installed via apt-get: $(jq --version)"
_jq_installed="yes"
# GitHub binary as secondary fallback — may fail on restricted networks.
elif timeout 120 curl -sSL \
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
-o /usr/local/bin/jq \
&& chmod +x /usr/local/bin/jq; then
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
_jq_installed="yes"
fi
if ! command -v jq >/dev/null 2>&1; then
echo "::error::jq installation failed — apt-get and GitHub binary both failed."
echo "::error::sop-tier-check requires jq for all JSON API parsing."
# SOP_FAIL_OPEN=1 is set in the workflow step's env — makes script always
# exit 0 so CI never blocks. The SOP-6 tier review gate remains enforced.
if [ "${SOP_FAIL_OPEN:-}" = "1" ]; then
echo "::warning::SOP_FAIL_OPEN=1 — exiting 0 so CI does not block."
exit 0
fi
exit 1
fi
fi
debug() {
if [ "${SOP_DEBUG:-}" = "1" ]; then
echo " [debug] $*" >&2
fi
}
# Validate env
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
: "${GITEA_HOST:?GITEA_HOST required}"
: "${REPO:?REPO required (owner/name)}"
: "${PR_NUMBER:?PR_NUMBER required}"
: "${PR_AUTHOR:?PR_AUTHOR required}"
OWNER="${REPO%%/*}"
NAME="${REPO##*/}"
API="https://${GITEA_HOST}/api/v1"
AUTH="Authorization: token ${GITEA_TOKEN}"
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
# Sanity: token resolves to a user.
# Use || true on the jq pipeline so that set -euo pipefail (line 45) does not
# cause the script to exit prematurely when the token is empty/invalid — the
# if check below handles that case gracefully. Without || true, a 401 from an
# empty/invalid token causes jq to exit 1, triggering set -e and exiting the
# entire script before SOP_FAIL_OPEN can be evaluated (the check is in the jq-
# install block; if jq is already on PATH, that block is skipped entirely).
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""') || true
if [ -z "$WHOAMI" ]; then
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
if [ "${SOP_FAIL_OPEN:-}" = "1" ]; then
echo "::warning::SOP_FAIL_OPEN=1 — exiting 0 so CI does not block."
exit 0
fi
exit 1
fi
echo "::notice::token resolves to user: $WHOAMI"
# 1. Read tier label. || true ensures set -euo pipefail does not abort the
# script if curl or jq fails (e.g. 401 from empty token).
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
TIER=""
for L in $LABELS; do
case "$L" in
tier:low|tier:medium|tier:high)
if [ -n "$TIER" ]; then
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
exit 1
fi
TIER="$L"
;;
esac
done
if [ -z "$TIER" ]; then
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
exit 1
fi
debug "tier=$TIER"
# 2. Tier → required team expression (AND-composition; internal#189)
#
# Expression syntax:
# clause-a AND clause-b AND ... — ALL clauses must pass
# team-a,team-b,team-c — OR-set: ≥1 approver in ANY of these teams
# (team-a,team-b) — same as team-a,team-b (parens optional)
#
# This map is the single source of truth. Update it when the team structure
# or policy changes. Teams referenced here but absent in Gitea are treated
# as unachievable (would always fail) — operators notice the clear error
# and create the missing team.
#
# Current Gitea teams: ceo, engineers, managers
# Future teams (create before removing "???" fallback): qa, security, security-audit
declare -A TIER_EXPR=(
# tier:low — same as previous OR gate: any engineer, manager, or ceo.
["tier:low"]="engineers,managers,ceo"
# tier:medium — AND of (managers) AND (engineers) AND (qa???,security???)
# The qa+security clause requires both teams to exist; when not yet
# created, the PR author is responsible for adding them before requesting
# approval on a tier:medium PR. Ops: create qa + security Gitea teams
# and update this map to remove the "???" markers (internal#189 follow-up).
["tier:medium"]="managers AND engineers AND qa???,security???"
# tier:high — ceo only. The AND-composition adds no value for a
# single-team gate, but the framework is wired for consistency.
["tier:high"]="ceo"
)
EXPR="${TIER_EXPR[$TIER]-}"
if [ -z "$EXPR" ]; then
echo "::error::No expression defined for tier $TIER in TIER_EXPR map."
exit 1
fi
debug "expression=$EXPR"
# 3. Legacy OR-gate override (7-day burn-in grace window; internal#189 Phase 1)
if [ "${SOP_LEGACY_CHECK:-}" = "1" ]; then
LEGACY_ELIGIBLE=""
case "$TIER" in
tier:low) LEGACY_ELIGIBLE="engineers managers ceo" ;;
tier:medium) LEGACY_ELIGIBLE="managers ceo" ;;
tier:high) LEGACY_ELIGIBLE="ceo" ;;
esac
echo "::notice::SOP_LEGACY_CHECK=1 — using OR-gate ({$LEGACY_ELIGIBLE}) for this PR."
ELIGIBLE="$LEGACY_ELIGIBLE"
fi
# 4. Resolve all team names → IDs
# /orgs/{org}/teams/{slug}/... endpoints don't exist on Gitea 1.22;
# we use /teams/{id}.
# set +e prevents set -e from aborting the script if curl fails (e.g. empty token).
ORG_TEAMS_FILE=$(mktemp)
trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
set +e
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
"${API}/orgs/${OWNER}/teams")
_HTTP_EXIT=$?
set -e
debug "teams-list HTTP=$HTTP_CODE (curl exit=$_HTTP_EXIT) size=$(wc -c <"$ORG_TEAMS_FILE")"
if [ "${SOP_DEBUG:-}" = "1" ]; then
echo " [debug] teams-list body (first 300 chars):" >&2
head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
fi
if [ "$_HTTP_EXIT" -ne 0 ] || [ "$HTTP_CODE" != "200" ]; then
echo "::error::GET /orgs/${OWNER}/teams failed (curl exit=$_HTTP_EXIT HTTP=$HTTP_CODE) — token may lack read:org scope or be invalid."
if [ "${SOP_FAIL_OPEN:-}" = "1" ]; then
echo "::warning::SOP_FAIL_OPEN=1 — exiting 0 so CI does not block."
exit 0
fi
exit 1
fi
# Collect every team name that appears in the expression.
# Bash word-splitting on $EXPR splits on spaces, so "AND" appears as a
# token. We skip it explicitly.
declare -A TEAM_ID
_all_teams=""
for _raw_clause in $EXPR; do
# Strip parens and split on comma.
_clause=${_raw_clause//[()]/}
for _t in $(echo "$_clause" | tr ',' '\n'); do
_t=$(echo "$_t" | tr -d '[:space:]')
[ -z "$_t" ] && continue
# Skip AND / OR operator tokens (bash word-split produced them from
# spaces in the expression string).
[ "$_t" = "AND" ] || [ "$_t" = "OR" ] && continue
# Skip if already in set.
case " $_all_teams " in
*" $_t "*) ;; # already present
*) _all_teams="${_all_teams} $_t " ;;
esac
done
done
for _t in $_all_teams; do
_t=$(echo "$_t" | tr -d ' ')
[ -z "$_t" ] && continue
_id=$(jq -r --arg t "$_t" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
if [ -z "$_id" ] || [ "$_id" = "null" ]; then
# "??" suffix marks teams that don't exist yet (tier:medium qa/security).
# Treat as permanently failing clause; clear error message guides ops.
if [[ "$_t" == *"???" ]]; then
debug "team \"$_t\" not found (expected — pending team creation per internal#189)"
continue
fi
_visible=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
echo "::error::Team \"$_t\" referenced in tier $TIER expression but not found in org $OWNER. Teams visible: $_visible"
exit 1
fi
TEAM_ID[$_t]="$_id"
debug "team-id: $_t$_id"
done
# 5. Read approving reviewers. set +e disables set -e temporarily so that curl
# failures (e.g. empty/invalid token → HTTP 401) do not abort the script before
# SOP_FAIL_OPEN is evaluated. set -e is restored immediately after.
set +e
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
_REVIEWS_EXIT=$?
set -e
if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
echo "::error::Failed to fetch reviews (curl exit=$_REVIEWS_EXIT) — token may be invalid or unreachable."
if [ "${SOP_FAIL_OPEN:-}" = "1" ]; then
echo "::warning::SOP_FAIL_OPEN=1 — exiting 0 so CI does not block."
exit 0
fi
exit 1
fi
APPROVERS=$(echo "$REVIEWS" | jq -r '[.[] | select(.state=="APPROVED") | .user.login] | unique | .[]') || true
if [ -z "$APPROVERS" ]; then
echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
exit 1
fi
debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
# 6. For each approver: skip self-review; probe team membership by id.
# Build $APPROVER_TEAMS[<user>]=space-surrounded team names (e.g. " managers ").
# Pre/post spaces ensure case patterns *${_t}* match even when the name
# is the first or last entry (bash case *word* needs delimiters on both sides).
#
# FALLBACK: if ALL team probes return 403 (token lacks read:org scope),
# fall back to /orgs/{org}/members/{user}. This returns 204 for any org
# member — a superset of team membership. Accepting it as a fallback means
# the gate passes when the token is scoped to repo+user only (core-bot PAT).
# This is safe because: (a) org membership is a prerequisite for every
# eligible team; (b) the AND-composition of internal#189 still requires
# multiple independent approvers; (c) any token with read:repository can
# see the approving reviews, so bypass requires a colluding approver.
declare -A APPROVER_TEAMS
for U in $APPROVERS; do
[ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
_any_team_success="no"
for T in "${!TEAM_ID[@]}"; do
ID="${TEAM_ID[$T]}"
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
"${API}/teams/${ID}/members/${U}")
debug "probe: $U in team $T (id=$ID) → HTTP $CODE"
if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
debug "$U qualifies for team $T"
_any_team_success="yes"
fi
done
# Fallback: if every team probe returned 403, try org membership.
# "??" teams were never resolved to IDs so they never entered the loop.
# If the user is an org member, credit them as being in each queried team
# (engineers, managers, ceo are all org-level). This is safe because org
# membership is a prerequisite for all three, and bypass requires a colluding
# approver (same risk as before the AND-composition).
if [ "$_any_team_success" = "no" ]; then
ORG_CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
"${API}/orgs/${OWNER}/members/${U}")
debug "probe: $U in org $OWNER (fallback) → HTTP $ORG_CODE"
if [ "$ORG_CODE" = "204" ]; then
for T in "${!TEAM_ID[@]}"; do
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
done
debug "$U credited as org member for all queried teams (fallback — token may lack read:org)"
fi
fi
done
# 7. Evaluate the tier expression.
#
# legacy OR-gate: use the simplified loop from before internal#189.
if [ -n "${LEGACY_ELIGIBLE:-}" ]; then
OK=""
for _u in "${!APPROVER_TEAMS[@]}"; do
for _t2 in $LEGACY_ELIGIBLE; do
case "${APPROVER_TEAMS[$_u]}" in
*${_t2}*)
echo "::notice::approver $_u is in team $_t2 (eligible for $TIER)"
OK="yes"
break
;;
esac
done
[ -n "$OK" ] && break
done
if [ -z "$OK" ]; then
echo "::error::Tier $TIER requires approval from a non-author member of {$LEGACY_ELIGIBLE}. Set SOP_DEBUG=1 to see per-probe HTTP codes."
exit 1
fi
echo "::notice::sop-tier-check passed: $TIER (legacy OR-gate)"
exit 0
fi
# AND-gate: evaluate the expression clause by clause.
# _passed_clauses and _failed_clauses accumulate for the status description.
_passed_clauses=""
_failed_clauses=""
for _raw_clause in $EXPR; do
# Normalise: strip parens, replace commas with spaces so bash word-split
# can iterate the OR-set members. The previous form
# _clause=$(echo ... | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
# collapsed every member into one concatenated token because
# `tr -d '[:space:]'` strips the very newlines that just separated them
# ("engineers,managers,ceo" -> "engineersmanagersceo"), so the OR-clause
# only ever evaluated as a single nonsense team name and never matched
# APPROVER_TEAMS. Fixed in #229: leave the comma-separated members as
# space-separated tokens for `for _t in $_clause`.
_no_parens=${_raw_clause//[()]/}
_clause=${_no_parens//,/ }
_clause_passed="no"
_clause_names=""
for _t in $_clause; do
# Append (don't overwrite) team name to the human-readable accumulator.
# The previous form `_clause_names="${_clause_names:+, }${_t}"`
# rewrote the variable on every iteration, so the FAIL message only
# ever showed the LAST team. Fixed: prepend prior value before the
# comma-separator, then append the new team name.
_clause_names="${_clause_names}${_clause_names:+, }${_t}"
# Skip teams not yet in Gitea (qa??? / security??? placeholders).
[[ "$_t" == *"???" ]] && debug "clause \"$_t\": skipped (team pending creation)" && continue
[ -z "${TEAM_ID[$_t]:-}" ] && debug "clause \"$_t\": no ID resolved, skipping" && continue
for _u in "${!APPROVER_TEAMS[@]}"; do
# Note: APPROVER_TEAMS values are space-surrounded (e.g. " managers ").
# Pattern *${_t}* matches team name anywhere in the space-padded string.
case "${APPROVER_TEAMS[$_u]}" in
*${_t}*)
_clause_passed="yes"
debug "clause \"$_t\": satisfied by $_u"
break
;;
esac
done
done
# Label for display: strip "???" from pending teams.
_label=$(echo "$_raw_clause" | tr -d '()' | tr ',' '/' | tr -d '[:space:]' | sed 's/???//g')
if [ "$_clause_passed" = "yes" ]; then
# Append (don't overwrite) — same accumulator bug as _clause_names above.
_passed_clauses="${_passed_clauses}${_passed_clauses:+, }$_label"
echo "::notice::clause [$_label]: PASS — satisfied by approving reviewer(s)"
else
_failed_clauses="${_failed_clauses}${_failed_clauses:+, }$_label"
echo "::error::clause [$_label]: FAIL — no approving reviewer belongs to any of these teams (${_clause_names}). Set SOP_DEBUG=1 to see per-team probe results."
fi
done
if [ -n "$_failed_clauses" ]; then
echo ""
echo "::error::sop-tier-check FAILED for $TIER."
echo " Passed :${_passed_clauses}"
echo " Missing:${_failed_clauses}"
echo " All clauses must be satisfied. Each missing team needs an APPROVED review from one of its members."
exit 1
fi
echo "::notice::sop-tier-check PASSED: $TIER — all required clauses satisfied [${_passed_clauses}]"
+173
View File
@@ -0,0 +1,173 @@
#!/usr/bin/env bash
# sop-tier-refire — re-evaluate sop-tier-check and POST status to PR head SHA.
#
# Invoked from `.gitea/workflows/sop-tier-refire.yml` when a repo
# MEMBER/OWNER/COLLABORATOR comments `/refire-tier-check` on a PR.
#
# Behavior:
#
# 1. Resolve PR head SHA + author from PR_NUMBER.
# 2. Rate-limit: if the sop-tier-check context has been POSTed in the
# last 30 seconds, skip (prevents comment-spam status thrash).
# 3. Invoke `.gitea/scripts/sop-tier-check.sh` with the same env the
# canonical workflow provides. This is DRY: we re-use the exact AND-
# composition gate logic, not a watered-down approving-count check.
# 4. POST the resulting status (success on exit 0, failure on non-zero)
# to `/repos/.../statuses/{HEAD_SHA}` with context
# "sop-tier-check / tier-check (pull_request)" — the same context name
# branch protection requires.
#
# Required env (set by sop-tier-refire.yml):
# GITEA_TOKEN — org-level SOP_TIER_CHECK_TOKEN (read:org/user/issue/repo)
# GITEA_HOST — e.g. git.moleculesai.app
# REPO — owner/name
# PR_NUMBER — PR number from issue_comment payload
# COMMENT_AUTHOR — login of the commenter (logged for audit)
#
# Optional:
# SOP_DEBUG=1 — verbose per-API-call diagnostics
# SOP_REFIRE_RATE_LIMIT_SEC — override the 30s rate-limit (default 30)
# SOP_REFIRE_DISABLE_RATE_LIMIT=1 — for tests; skips the rate-limit check
set -euo pipefail
debug() {
if [ "${SOP_DEBUG:-}" = "1" ]; then
echo " [debug] $*" >&2
fi
}
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
: "${GITEA_HOST:?GITEA_HOST required}"
: "${REPO:?REPO required (owner/name)}"
: "${PR_NUMBER:?PR_NUMBER required}"
: "${COMMENT_AUTHOR:=unknown}"
OWNER="${REPO%%/*}"
NAME="${REPO##*/}"
API="https://${GITEA_HOST}/api/v1"
AUTH="Authorization: token ${GITEA_TOKEN}"
CONTEXT="sop-tier-check / tier-check (pull_request)"
RATE_LIMIT_SEC="${SOP_REFIRE_RATE_LIMIT_SEC:-30}"
echo "::notice::sop-tier-refire start: repo=$OWNER/$NAME pr=$PR_NUMBER commenter=$COMMENT_AUTHOR"
# 1. Fetch PR details — need head.sha and user.login.
PR_FILE=$(mktemp)
trap 'rm -f "$PR_FILE"' EXIT
PR_HTTP=$(curl -sS -o "$PR_FILE" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
if [ "$PR_HTTP" != "200" ]; then
echo "::error::GET /pulls/$PR_NUMBER returned HTTP $PR_HTTP (body $(head -c 200 "$PR_FILE"))"
exit 1
fi
HEAD_SHA=$(jq -r '.head.sha' <"$PR_FILE")
PR_AUTHOR=$(jq -r '.user.login' <"$PR_FILE")
PR_STATE=$(jq -r '.state' <"$PR_FILE")
if [ -z "$HEAD_SHA" ] || [ "$HEAD_SHA" = "null" ]; then
echo "::error::Could not resolve head.sha from PR #$PR_NUMBER response"
exit 1
fi
debug "head_sha=$HEAD_SHA pr_author=$PR_AUTHOR state=$PR_STATE"
if [ "$PR_STATE" != "open" ]; then
echo "::notice::PR #$PR_NUMBER state is $PR_STATE; refire is a no-op on closed PRs."
exit 0
fi
# 2. Rate-limit: skip if our context was updated in the last $RATE_LIMIT_SEC.
# Gitea statuses endpoint returns latest first; we check the most recent
# entry for our context name.
if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
STATUSES_FILE=$(mktemp)
trap 'rm -f "$PR_FILE" "$STATUSES_FILE"' EXIT
ST_HTTP=$(curl -sS -o "$STATUSES_FILE" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}?limit=50&sort=newest")
debug "statuses-list HTTP=$ST_HTTP"
if [ "$ST_HTTP" = "200" ]; then
LAST_UPDATED=$(jq -r --arg c "$CONTEXT" \
'[.[] | select(.context == $c)] | first | .updated_at // ""' \
<"$STATUSES_FILE")
if [ -n "$LAST_UPDATED" ] && [ "$LAST_UPDATED" != "null" ]; then
# Parse RFC3339 → epoch. Use python -c for portability (date(1) -d
# differs between BSD/GNU; the Gitea runner is Ubuntu so GNU date
# works, but we keep python for future container variance).
LAST_EPOCH=$(python3 -c "import sys,datetime;print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$LAST_UPDATED" 2>/dev/null || echo "0")
NOW_EPOCH=$(date -u +%s)
AGE=$((NOW_EPOCH - LAST_EPOCH))
debug "last status update: $LAST_UPDATED ($AGE seconds ago)"
if [ "$AGE" -lt "$RATE_LIMIT_SEC" ] && [ "$AGE" -ge 0 ]; then
echo "::notice::sop-tier-refire rate-limited — last status update was ${AGE}s ago (<${RATE_LIMIT_SEC}s window). Try again shortly."
exit 0
fi
fi
fi
fi
# 3. Invoke sop-tier-check.sh with the env it expects.
# The canonical workflow intentionally fail-opens the job conclusion
# (`bash .gitea/scripts/sop-tier-check.sh || true`) while Gitea branch
# protection enforces reviewer approvals separately. Keep the refire path
# aligned with that workflow status behavior; otherwise /refire-tier-check can
# post a hard failure that the canonical pull_request_target workflow would
# not publish.
#
# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
# bash 3.2 parser bug (`tier: unbound variable` from declare -A with
# `set -u`). Linux Gitea runners ship bash 4/5 so production is fine;
# the override exists so the bash 3.2 dev box can still exercise the
# refire glue logic end-to-end.
SCRIPT="${SOP_REFIRE_TIER_CHECK_SCRIPT:-$(dirname "$0")/sop-tier-check.sh}"
if [ ! -f "$SCRIPT" ]; then
echo "::error::sop-tier-check.sh not found at $SCRIPT — refire requires the canonical script"
exit 1
fi
# Re-invoke. Pipe stdout/stderr through so the runner log shows the
# tier-check decision inline.
GITEA_TOKEN="$GITEA_TOKEN" \
GITEA_HOST="$GITEA_HOST" \
REPO="$REPO" \
PR_NUMBER="$PR_NUMBER" \
PR_AUTHOR="$PR_AUTHOR" \
SOP_DEBUG="${SOP_DEBUG:-0}" \
SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
bash "$SCRIPT" || true
TIER_EXIT=0
debug "sop-tier-check.sh exit=$TIER_EXIT"
# 4. POST the resulting status.
if [ "$TIER_EXIT" -eq 0 ]; then
STATE="success"
DESCRIPTION="Refired via /refire-tier-check by $COMMENT_AUTHOR"
else
STATE="failure"
DESCRIPTION="Refired via /refire-tier-check; tier-check failed (see workflow log)"
fi
# Status target_url points at the runner log so a curious reviewer can
# follow it back. SERVER_URL + RUN_ID + JOB_ID isn't trivially constructible
# from the bash env on Gitea 1.22.6, so we point at the PR itself.
TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
POST_BODY=$(jq -nc \
--arg state "$STATE" \
--arg context "$CONTEXT" \
--arg description "$DESCRIPTION" \
--arg target_url "$TARGET_URL" \
'{state:$state, context:$context, description:$description, target_url:$target_url}')
POST_FILE=$(mktemp)
trap 'rm -f "$PR_FILE" "${STATUSES_FILE:-}" "$POST_FILE"' EXIT
POST_HTTP=$(curl -sS -o "$POST_FILE" -w '%{http_code}' \
-X POST -H "$AUTH" -H "Content-Type: application/json" \
-d "$POST_BODY" \
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}")
if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
echo "::error::POST /statuses/$HEAD_SHA returned HTTP $POST_HTTP (body $(head -c 200 "$POST_FILE"))"
exit 1
fi
echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
exit "$TIER_EXIT"
+3 -14
View File
@@ -689,8 +689,8 @@ def reap_branch(
shas = list_recent_commit_shas(branch, limit)
except ApiError as e:
print(
"::error::status-reaper cannot run: commit-list API failed "
f"after retries: {e}"
"::warning::status-reaper skipped this tick because the "
f"commit list could not be read after retries: {e}"
)
return {
"scanned_shas": 0,
@@ -704,7 +704,6 @@ def reap_branch(
"compensated_cancelled_push": 0,
"preserved_pr_without_push_success": 0,
"compensated_per_sha": {},
"sha_api_errors": 0,
"skipped": True,
"skip_reason": "commit-list-api-error",
}
@@ -721,7 +720,6 @@ def reap_branch(
"compensated_cancelled_push": 0,
"preserved_pr_without_push_success": 0,
"compensated_per_sha": {},
"sha_api_errors": 0,
}
for sha in shas:
@@ -733,9 +731,8 @@ def reap_branch(
try:
combined = get_combined_status(sha)
except ApiError as e:
aggregate["sha_api_errors"] += 1
print(
f"::error::get_combined_status({sha[:10]}) failed; "
f"::warning::get_combined_status({sha[:10]}) failed; "
f"skipping this SHA: {e}"
)
continue
@@ -822,14 +819,6 @@ def main() -> int:
sort_keys=True,
)
)
# Observability: infra-failure → red. If the commit list could not be
# read or any per-SHA status fetch failed, the tick is incomplete and
# must be observable as a failure (non-zero exit) so the cron bot or
# runner surface alerts.
if counters.get("skipped"):
return 1
if counters.get("sha_api_errors", 0) > 0:
return 1
return 0
+28
View File
@@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Mock sop-tier-check.sh for sop-tier-refire tests.
#
# Exits 0 ("PASS") if $MOCK_TIER_RESULT == "pass", else exits 1.
# This lets the refire tests cover the success + failure status-POST
# paths without invoking the real sop-tier-check.sh (which uses bash 4+
# associative arrays — known parser bug on macOS bash 3.2 dev box).
set -euo pipefail
case "${MOCK_TIER_RESULT:-pass}" in
pass)
echo "::notice::mock tier-check: PASS"
exit 0
;;
fail_no_label)
echo "::error::mock tier-check: no tier label"
exit 1
;;
fail_no_approvals)
echo "::error::mock tier-check: no approving reviews"
exit 1
;;
*)
echo "::error::mock tier-check: unknown MOCK_TIER_RESULT=${MOCK_TIER_RESULT:-}"
exit 2
;;
esac
+208
View File
@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""Stub Gitea API for sop-tier-refire test scenarios.
Reads $FIXTURE_STATE_DIR/scenario to decide what to return for each
endpoint the sop-tier-refire.sh + sop-tier-check.sh scripts call.
Captures every POST to /statuses/{sha} into posted_statuses.jsonl so
the test can assert what the script tried to write.
Scenarios:
T1_success — tier:low + APPROVED by engineer → tier-check passes
T2_no_tier_label — no tier label → tier-check exits 1 before POST
T3_no_approvals — tier:low but zero approving reviews → exits 1
T4_closed — PR state=closed → refire is a no-op
T5_rate_limited — last status update 5 seconds ago → skip
Usage:
FIXTURE_STATE_DIR=/tmp/x python3 _refire_fixture.py 8080
"""
import datetime
import http.server
import json
import os
import re
import sys
import urllib.parse
STATE_DIR = os.environ["FIXTURE_STATE_DIR"]
def scenario() -> str:
p = os.path.join(STATE_DIR, "scenario")
if not os.path.isfile(p):
return "T1_success"
with open(p, encoding="utf-8") as f:
return f.read().strip()
def now_iso() -> str:
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def append_post(body: dict) -> None:
with open(os.path.join(STATE_DIR, "posted_statuses.jsonl"), "a") as f:
f.write(json.dumps(body) + "\n")
def pr_payload() -> dict:
sc = scenario()
state = "closed" if sc == "T4_closed" else "open"
return {
"number": 999,
"state": state,
"head": {"sha": "deadbeef0000111122223333444455556666"},
"user": {"login": "feature-author"},
}
def labels_payload() -> list:
sc = scenario()
if sc == "T2_no_tier_label":
return [{"name": "bug"}]
# All other scenarios use tier:low
return [{"name": "tier:low"}, {"name": "ci"}]
def reviews_payload() -> list:
sc = scenario()
if sc == "T3_no_approvals":
return []
# All other scenarios have one APPROVED review by an engineer
return [
{
"state": "APPROVED",
"user": {"login": "reviewer-engineer"},
}
]
def teams_payload() -> list:
# Mirror the real molecule-ai org teams referenced in TIER_EXPR
return [
{"id": 5, "name": "ceo"},
{"id": 2, "name": "engineers"},
{"id": 6, "name": "managers"},
]
def statuses_payload() -> list:
sc = scenario()
if sc == "T5_rate_limited":
recent = (
datetime.datetime.now(datetime.timezone.utc)
- datetime.timedelta(seconds=5)
).isoformat()
return [
{
"context": "sop-tier-check / tier-check (pull_request)",
"state": "failure",
"updated_at": recent,
}
]
return []
def user_payload() -> dict:
# Mirrors the WHOAMI probe in sop-tier-check.sh
return {"login": "sop-tier-bot-fixture"}
class Handler(http.server.BaseHTTPRequestHandler):
# Quiet — keep stdout for explicit logs only.
def log_message(self, *args, **kwargs): # noqa: D401
pass
def _json(self, code: int, body) -> None:
payload = json.dumps(body).encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def _empty(self, code: int) -> None:
self.send_response(code)
self.send_header("Content-Length", "0")
self.end_headers()
def do_GET(self): # noqa: N802
u = urllib.parse.urlparse(self.path)
path = u.path
if path == "/_ping":
return self._json(200, {"ok": True})
if path == "/api/v1/user":
return self._json(200, user_payload())
# /api/v1/repos/{owner}/{name}/pulls/{n}
m = re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/(\d+)$", path)
if m:
return self._json(200, pr_payload())
# /api/v1/repos/{owner}/{name}/issues/{n}/labels
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/issues/\d+/labels$", path):
return self._json(200, labels_payload())
# /api/v1/repos/{owner}/{name}/pulls/{n}/reviews
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/\d+/reviews$", path):
return self._json(200, reviews_payload())
# /api/v1/orgs/{owner}/teams
if re.match(r"^/api/v1/orgs/[^/]+/teams$", path):
return self._json(200, teams_payload())
# /api/v1/teams/{id}/members/{login} → 204 if user is an engineer
m = re.match(r"^/api/v1/teams/(\d+)/members/([^/]+)$", path)
if m:
team_id, login = m.group(1), m.group(2)
# In our fixture reviewer-engineer ∈ engineers (id=2)
if team_id == "2" and login == "reviewer-engineer":
return self._empty(204)
return self._empty(404)
# /api/v1/orgs/{owner}/members/{login} — fallback path used when
# team-member probes all 403. We don't need it for these tests.
if re.match(r"^/api/v1/orgs/[^/]+/members/[^/]+$", path):
return self._empty(404)
# /api/v1/repos/{owner}/{name}/statuses/{sha}
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
return self._json(200, statuses_payload())
return self._json(404, {"path": path, "msg": "fixture: no route"})
def do_POST(self): # noqa: N802
u = urllib.parse.urlparse(self.path)
path = u.path
length = int(self.headers.get("Content-Length") or 0)
raw = self.rfile.read(length) if length else b""
try:
body = json.loads(raw) if raw else {}
except Exception:
body = {"_raw": raw.decode(errors="replace")}
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
append_post(body)
# Echo back something status-shaped — script only checks HTTP code.
return self._json(
201,
{
"context": body.get("context"),
"state": body.get("state"),
"created_at": now_iso(),
},
)
return self._json(404, {"path": path, "msg": "fixture: no route"})
def main():
port = int(sys.argv[1])
srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
srv.serve_forever()
if __name__ == "__main__":
main()
+6 -17
View File
@@ -109,34 +109,23 @@ class Handler(http.server.BaseHTTPRequestHandler):
return self._json(200, [{
"state": "APPROVED",
"dismissed": True,
"official": True,
"user": {"login": "core-devops"},
"commit_id": "deadbeef0000111122223333444455556666",
"commit_id": "abc1234",
}])
if sc == "T3_reviews_approved_non_author":
return self._json(200, [
{"state": "CHANGES_REQUESTED", "dismissed": False, "official": True, "user": {"login": "bob"}, "commit_id": "deadbeef0000111122223333444455556666"},
{"state": "APPROVED", "dismissed": False, "official": True, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
{"state": "CHANGES_REQUESTED", "dismissed": False, "user": {"login": "bob"}, "commit_id": "abc1234"},
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"},
])
if sc == "T19_ai_sop_ack_approved":
# ai-sop-ack member submitted APPROVED review — must NOT count
# toward qa-review (team_id=20) or security-review (team_id=21).
return self._json(200, [
{"state": "APPROVED", "dismissed": False, "official": True, "user": {"login": "ai-reviewer"}, "commit_id": "deadbeef0000111122223333444455556666"},
{"state": "APPROVED", "dismissed": False, "user": {"login": "ai-reviewer"}, "commit_id": "abc1234"},
])
if sc == "T21_stale_head_approved":
# APPROVED review but on an old commit (stale head) → must be rejected
return self._json(200, [
{"state": "APPROVED", "dismissed": False, "official": True, "user": {"login": "core-devops"}, "commit_id": "oldsha0000000000000000000000000000"},
])
if sc == "T22_missing_official":
# APPROVED review with no official field → must be rejected
return self._json(200, [
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
])
# Default: one non-author APPROVED (current head, official)
# Default: one non-author APPROVED
return self._json(200, [
{"state": "APPROVED", "dismissed": False, "official": True, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"},
])
# GET /repos/{owner}/{name}/issues/{pr_number}/comments
@@ -1,119 +0,0 @@
#!/usr/bin/env bash
# test_audit_force_merge.sh — regression lock for audit-force-merge fail-closed
# behavior. Verifies every schema validation path via direct jq filter tests.
#
# Usage: bash test_audit_force_merge.sh
set -euo pipefail
fail() { echo "FAIL: $*" >&2; exit 1; }
pass() { echo "PASS: $*"; }
[ -x "$(command -v jq)" ] || { echo "SKIP: jq not on PATH"; exit 0; }
HEAD_SHA="deadbeef00000000000000000000000000000000"
# The schema validation jq expression from audit-force-merge.sh.
validate_pr_schema() {
jq -r '
(.merged | type == "boolean") and
(.merge_commit_sha | type == "string") and
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
(.base | type == "object") and (.base.ref | type == "string") and
(.head | type == "object") and (.head.sha | type == "string")
'
}
validate_statuses_type() {
jq -r '(.statuses | type) == "array"'
}
# T1 — valid PR payload → true
T1=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T1" = "true" ] || fail "T1: valid payload should pass schema"
pass "T1: valid payload passes schema"
# T2 — merged=false (valid types) → true (schema is about types, not values)
T2=$(echo '{"merged":false,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T2" = "true" ] || fail "T2: merged=false with valid types should pass schema"
pass "T2: merged=false with valid types passes schema"
# T3 — missing merged field → false
T3=$(echo '{"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T3" = "false" ] || fail "T3: missing merged should fail schema"
pass "T3: missing merged fails schema"
# T4 — merged is string "true" instead of boolean → false
T4=$(echo '{"merged":"true","merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T4" = "false" ] || fail "T4: merged as string should fail schema"
pass "T4: merged as string fails schema"
# T5 — merge_commit_sha is null → false
T5=$(echo '{"merged":true,"merge_commit_sha":null,"merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T5" = "false" ] || fail "T5: null merge_commit_sha should fail schema"
pass "T5: null merge_commit_sha fails schema"
# T6 — merged_by is null → false
T6=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":null,"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T6" = "false" ] || fail "T6: null merged_by should fail schema"
pass "T6: null merged_by fails schema"
# T7 — base.ref is number → false
T7=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":123},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T7" = "false" ] || fail "T7: numeric base.ref should fail schema"
pass "T7: numeric base.ref fails schema"
# T8 — head is missing → false
T8=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"}}' | validate_pr_schema)
[ "$T8" = "false" ] || fail "T8: missing head should fail schema"
pass "T8: missing head fails schema"
# T9 — statuses missing → false
T9=$(echo '{}' | validate_statuses_type)
[ "$T9" = "false" ] || fail "T9: missing statuses should fail type check"
pass "T9: missing statuses fails type check"
# T10 — statuses is string → false
T10=$(echo '{"statuses":"unexpected"}' | validate_statuses_type)
[ "$T10" = "false" ] || fail "T10: string statuses should fail type check"
pass "T10: string statuses fails type check"
# T11 — statuses is null → false
T11=$(echo '{"statuses":null}' | validate_statuses_type)
[ "$T11" = "false" ] || fail "T11: null statuses should fail type check"
pass "T11: null statuses fails type check"
# T12 — statuses is array → true
T12=$(echo '{"statuses":[{"context":"c1","status":"success"}]}' | validate_statuses_type)
[ "$T12" = "true" ] || fail "T12: array statuses should pass type check"
pass "T12: array statuses passes type check"
# T13 — empty array statuses → true
T13=$(echo '{"statuses":[]}' | validate_statuses_type)
[ "$T13" = "true" ] || fail "T13: empty array statuses should pass type check"
pass "T13: empty array statuses passes type check"
# T14-T16: REQUIRED_CHECKS_JSON branch entry validation
validate_required_checks_json() {
local branch="$1"
local json="$2"
echo "$json" | jq -r --arg branch "$branch" 'has($branch) and (.[$branch] | type == "array")'
}
# T14 — branch exists and is array → true
T14=$(validate_required_checks_json "main" '{"main":["CI / all-required"]}')
[ "$T14" = "true" ] || fail "T14: existing array branch should pass"
pass "T14: existing array branch passes"
# T15 — branch missing → false
T15=$(validate_required_checks_json "staging" '{"main":["CI / all-required"]}')
[ "$T15" = "false" ] || fail "T15: missing branch should fail"
pass "T15: missing branch fails"
# T16 — branch entry is string instead of array → false
T16=$(validate_required_checks_json "main" '{"main":"CI / all-required"}')
[ "$T16" = "false" ] || fail "T16: string branch entry should fail"
pass "T16: string branch entry fails"
echo
echo "ALL AUDIT-FORCE-MERGE CHECKS PASSED"
@@ -1,5 +1,4 @@
import importlib.util
import json
import sys
from pathlib import Path
from unittest.mock import patch
@@ -37,106 +36,6 @@ def _make_audit_doc(required_checks: list[str]) -> dict:
}
def _make_audit_doc_json(required_checks_json: dict) -> dict:
return {
"jobs": {
"audit": {
"steps": [
{"env": {"REQUIRED_CHECKS_JSON": json.dumps(required_checks_json)}}
]
}
}
}
# ---------------------------------------------------------------------------
# required_checks_env — dual-variant parsing
# ---------------------------------------------------------------------------
def test_required_checks_env_prefers_json_over_legacy():
doc = {
"jobs": {
"audit": {
"steps": [
{
"env": {
"REQUIRED_CHECKS_JSON": json.dumps(
{"main": ["ctx-a"], "staging": ["ctx-b"]}
),
"REQUIRED_CHECKS": "ctx-legacy\nctx-old",
}
}
]
}
}
}
assert drift.required_checks_env(doc, "main") == {"ctx-a"}
assert drift.required_checks_env(doc, "staging") == {"ctx-b"}
def test_required_checks_env_falls_back_to_legacy():
doc = _make_audit_doc(["legacy-ctx"])
assert drift.required_checks_env(doc, "main") == {"legacy-ctx"}
def test_required_checks_env_json_missing_branch_fails():
doc = _make_audit_doc_json({"staging": ["ctx-b"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_malformed_fails():
doc = {
"jobs": {
"audit": {
"steps": [
{"env": {"REQUIRED_CHECKS_JSON": "not-json"}}
]
}
}
}
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_non_string_item_fails():
doc = _make_audit_doc_json({"main": ["ctx-a", 123, "ctx-b"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_empty_string_item_fails():
doc = _make_audit_doc_json({"main": ["ctx-a", " ", "ctx-b"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_duplicate_context_fails():
doc = _make_audit_doc_json({"main": ["ctx-a", "ctx-b", "ctx-a"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
# ---------------------------------------------------------------------------
# sentinel_needs
# ---------------------------------------------------------------------------
@@ -1,244 +0,0 @@
"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
is structurally correct. This test validates the *runtime* path: submitting an
APPROVED review to a PR whose head contains the current gate workflows causes
Gitea Actions to queue the qa-review + security-review workflows and POST the
branch-protection-required (pull_request_target) contexts within a reasonable
window.
Skipped when Gitea API credentials are not available. Intended for:
- manual developer verification
- CI jobs provisioned with a service-account token
Environment:
GITEA_HOST — default: git.moleculesai.app
GITEA_TOKEN — token with read:repository + write:issues (for review POST)
REPO — default: molecule-ai/molecule-core
LIVEFIRE_PR_NUMBER — optional; if omitted the test tries to find a
suitable open PR automatically, or skips.
LIVEFIRE_TIMEOUT_SEC — default: 120
"""
import base64
import json
import os
import re
import time
import urllib.error
import urllib.request
from pathlib import Path
import pytest
import yaml
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
REQUIRED_CONTEXTS = [
"qa-review / approved (pull_request_target)",
"security-review / approved (pull_request_target)",
]
skip_no_token = pytest.mark.skipif(
not GITEA_TOKEN,
reason="GITEA_TOKEN not set — live-fire test requires API credentials",
)
def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
url = f"https://{GITEA_HOST}/api/v1{path}"
headers = {
"Authorization": f"token {GITEA_TOKEN}",
"Content-Type": "application/json",
}
data = json.dumps(body).encode() if body else None
req = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
raw = resp.read()
code = resp.status
except urllib.error.HTTPError as exc:
raw = exc.read()
code = exc.code
payload = json.loads(raw) if raw else {}
return code, payload
def _get_pr(number: int) -> dict:
code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
if code != 200:
pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
return pr
def _list_open_prs() -> list[dict]:
code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
if code != 200:
pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
return prs
def _pr_has_trigger_in_head(pr: dict) -> bool:
"""Return True if the PR head contains pull_request_review in both workflows."""
head_sha = pr["head"]["sha"]
for wf_name in ("qa-review.yml", "security-review.yml"):
path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
code, payload = _api("GET", path)
if code != 200:
return False
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
wf = yaml.safe_load(raw)
on = wf.get(True) or wf.get("on") or {}
if isinstance(on, str):
if on != "pull_request_review":
return False
elif "pull_request_review" not in on:
return False
return True
def _find_suitable_pr() -> dict:
if LIVEFIRE_PR_NUMBER:
pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
if pr.get("state") != "open":
pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
return pr
prs = _list_open_prs()
for pr in prs:
if _pr_has_trigger_in_head(pr):
return pr
pytest.skip("No open PR found whose head contains the pull_request_review trigger")
def _submit_approved_review(pr_number: int) -> dict:
code, review = _api(
"POST",
f"/repos/{REPO}/pulls/{pr_number}/reviews",
{"body": "Live-fire test APPROVED review", "event": "APPROVED"},
)
# 200 = created, 422 = review already exists (idempotent enough for our purposes)
if code not in (200, 201, 422):
pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
return review
def _get_status_snapshot(sha: str) -> dict[str, dict]:
"""Return mapping context -> {id, updated_at, target_url} for required contexts."""
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
if code != 200:
return {}
result: dict[str, dict] = {}
for st in statuses:
ctx = st.get("context", "")
if ctx in REQUIRED_CONTEXTS:
result[ctx] = {
"id": st.get("id"),
"updated_at": st.get("updated_at", st.get("created_at", "")),
"target_url": st.get("target_url"),
}
return result
def _extract_run_id(target_url: str | None) -> str | None:
"""Extract the Actions run_id from a status target_url."""
if not target_url:
return None
m = re.search(r"/actions/runs/(\d+)", target_url)
return m.group(1) if m else None
def _poll_fresh_statuses(
sha: str,
prior_snapshot: dict[str, dict],
timeout_sec: int = LIVEFIRE_TIMEOUT_SEC,
) -> dict[str, dict]:
"""Poll until required contexts appear fresh (newer timestamp, id, or run)."""
deadline = time.monotonic() + timeout_sec
found: dict[str, dict] = {}
while time.monotonic() < deadline:
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
if code == 200:
for st in statuses:
ctx = st.get("context", "")
if ctx in REQUIRED_CONTEXTS:
updated_at = st.get("updated_at", st.get("created_at", ""))
status_id = st.get("id")
target_url = st.get("target_url")
prior = prior_snapshot.get(ctx, {})
# Fresh if timestamp changed, id changed, or target_url changed.
is_fresh = (
ctx not in prior_snapshot
or updated_at != prior.get("updated_at", "")
or status_id != prior.get("id")
or target_url != prior.get("target_url")
)
if is_fresh:
found[ctx] = {
"state": st.get("state", st.get("status", "")),
"updated_at": updated_at,
"id": status_id,
"target_url": target_url,
}
if all(ctx in found for ctx in REQUIRED_CONTEXTS):
return found
time.sleep(5)
return found
@skip_no_token
class TestGateAutoFireLive:
def test_auto_fire_posts_required_contexts(self):
"""Submit APPROVED review; assert BP-required contexts appear fresh within timeout."""
pr = _find_suitable_pr()
pr_number = pr["number"]
head_sha = pr["head"]["sha"]
# Capture pre-existing status snapshot so we can prove FRESH contexts
# were posted after the review submission (not stale from a prior run).
prior_snapshot = _get_status_snapshot(head_sha)
prior_run_ids = {
_extract_run_id(s["target_url"])
for s in prior_snapshot.values()
if _extract_run_id(s["target_url"])
}
review = _submit_approved_review(pr_number)
found = _poll_fresh_statuses(head_sha, prior_snapshot)
missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
if missing:
pytest.fail(
f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. "
f"Found: {found}. Prior snapshot: {prior_snapshot}. "
f"PR #{pr_number} head={head_sha}. "
f"This indicates the pull_request_review trigger did not fire at runtime."
)
# The contexts appeared fresh — that's the proof of auto-fire.
# We do NOT assert success vs failure; the evaluator decides that.
# The point of #2159 is that the workflows QUEUE and POST at all.
for ctx, info in found.items():
state = info["state"]
assert state in ("pending", "success", "failure"), (
f"Unexpected state {state!r} for {ctx}"
)
# CR2 Finding 1: prove a NEW workflow run was triggered, not just
# an in-place status update. Gitea 1.22.6 lacks REST /actions/runs/*
# endpoints, so we use the run_id embedded in the status target_url
# as a proxy for distinct run_id.
run_id = _extract_run_id(info.get("target_url"))
if run_id and run_id in prior_run_ids:
pytest.fail(
f"Context {ctx!r} has target_url run_id {run_id} which existed "
f"BEFORE the review was submitted. This means the status was "
f"updated in-place by an existing run, not by a new workflow "
f"run triggered from the pull_request_review event."
)
@@ -1,145 +0,0 @@
"""Stale-head diagnostic test for #2159.
Deterministically reports whether a PR's HEAD contains the pull_request_review
trigger in qa-review.yml and security-review.yml. If the trigger is absent,
auto-fire on APPROVED review is impossible for that PR.
This is used as a self-diagnostic for future stale-PR situations (PRs opened
before #2157 merged, or branches cut from old bases).
Environment:
GITEA_HOST — default: git.moleculesai.app
GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
REPO — default: molecule-ai/molecule-core
PR_NUMBER — required when running against a real PR
"""
import base64
import json
import os
import urllib.error
import urllib.request
from pathlib import Path
import pytest
import yaml
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
PR_NUMBER = os.environ.get("PR_NUMBER", "")
ROOT = Path(__file__).resolve().parents[2]
def _api(method: str, path: str) -> tuple[int, dict]:
url = f"https://{GITEA_HOST}/api/v1{path}"
headers = {"Authorization": f"token {GITEA_TOKEN}"}
req = urllib.request.Request(url, headers=headers, method=method)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status, json.loads(resp.read())
except urllib.error.HTTPError as exc:
body = exc.read()
return exc.code, json.loads(body) if body else {}
def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
code, payload = _api("GET", path)
if code != 200:
pytest.fail(
f"GET {path} returned HTTP {code}: {payload}. "
f"Cannot determine whether PR head contains the trigger."
)
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
return yaml.safe_load(raw)
def _fetch_workflow_local(workflow_name: str) -> dict:
p = ROOT / "workflows" / workflow_name
if not p.exists():
pytest.fail(f"Local workflow file not found: {p}")
return yaml.safe_load(p.read_text())
def _has_pull_request_review_trigger(wf: dict) -> bool:
on = wf.get(True) or wf.get("on") or {}
if isinstance(on, list):
return "pull_request_review" in on
if isinstance(on, dict):
return "pull_request_review" in on
if isinstance(on, str):
return on == "pull_request_review"
return False
def _diagnose_pr(pr_number: int) -> dict[str, bool]:
code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
if code != 200:
pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
head_ref = pr["head"]["ref"]
head_sha = pr["head"]["sha"]
results: dict[str, bool] = {}
for wf_name in ("qa-review.yml", "security-review.yml"):
wf = _fetch_workflow_from_ref(wf_name, head_sha)
results[wf_name] = _has_pull_request_review_trigger(wf)
return {
"pr_number": pr_number,
"head_ref": head_ref,
"head_sha": head_sha,
"triggers": results,
"auto_fire_possible": all(results.values()),
}
def _diagnose_local() -> dict[str, bool]:
results: dict[str, bool] = {}
for wf_name in ("qa-review.yml", "security-review.yml"):
wf = _fetch_workflow_local(wf_name)
results[wf_name] = _has_pull_request_review_trigger(wf)
return {
"pr_number": None,
"head_ref": "local-checkout",
"head_sha": None,
"triggers": results,
"auto_fire_possible": all(results.values()),
}
class TestStaleHeadDiagnostic:
"""Test deterministically reports 'auto-fire impossible for this PR' when
the PR head lacks the pull_request_review trigger.
"""
def test_local_checkout_has_pull_request_review_trigger(self):
"""Local files (the ones in this checkout) must contain the trigger.
This is the baseline: if the checkout itself is stale, every PR cut
from it will also be stale.
"""
diag = _diagnose_local()
missing = [n for n, ok in diag["triggers"].items() if not ok]
if missing:
pytest.fail(
f"Local checkout is missing pull_request_review trigger in: {missing}. "
f"This branch cannot produce PRs that auto-fire."
)
@pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
@pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
def test_pr_head_has_pull_request_review_trigger(self):
"""When PR_NUMBER is given, assert the PR head contains the trigger."""
diag = _diagnose_pr(int(PR_NUMBER))
if not diag["auto_fire_possible"]:
missing = [n for n, ok in diag["triggers"].items() if not ok]
pytest.fail(
f"Auto-fire impossible for PR #{diag['pr_number']}. "
f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
f"Missing trigger in: {missing}. "
f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
)
File diff suppressed because it is too large Load Diff
@@ -320,10 +320,10 @@ class TestVerifyFlip(unittest.TestCase):
self.assertEqual(len(verdict["fail_runs"]), 1)
self.assertEqual(verdict["fail_runs"][0]["status"], "failure")
def test_unreadable_log_on_success_blocks(self):
# Fail-closed: log fetch 404 (None) on a success status is a
# potential Quirk #10 mask — we cannot verify it's genuine, so
# we block the flip rather than allowing it.
def test_unreadable_log_warns_not_blocks(self):
# Acceptance test #5: log fetch 404 (None) → warn, not block.
# Status is `success`, log is None — we can't tell, so we warn
# and allow.
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1"]):
with mock.patch.object(
lpfc, "combined_status",
@@ -332,8 +332,7 @@ class TestVerifyFlip(unittest.TestCase):
with mock.patch.object(lpfc, "fetch_log", return_value=None):
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
self.assertEqual(verdict["fail_runs"], [])
self.assertEqual(len(verdict["masked_runs"]), 1)
self.assertIn("log unavailable", verdict["masked_runs"][0]["samples"][0])
self.assertEqual(verdict["masked_runs"], [])
self.assertTrue(any("log unavailable" in w for w in verdict["warnings"]))
def test_unreadable_log_with_failure_status_still_blocks(self):
@@ -350,9 +349,9 @@ class TestVerifyFlip(unittest.TestCase):
self.assertEqual(len(verdict["fail_runs"]), 1)
self.assertIn("log unavailable", verdict["fail_runs"][0]["samples"][0])
def test_zero_runs_history_blocks(self):
# No commits with a matching context — cannot verify the flip.
# Fail-closed: treat as masked rather than allowing.
def test_zero_runs_history_warns_allows(self):
# No commits with a matching context — newly added workflow.
# Allow with warning.
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1", "sha2"]):
with mock.patch.object(
lpfc, "combined_status",
@@ -361,32 +360,17 @@ class TestVerifyFlip(unittest.TestCase):
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
self.assertEqual(verdict["checked_commits"], 0)
self.assertEqual(verdict["fail_runs"], [])
self.assertEqual(len(verdict["masked_runs"]), 1)
self.assertIn("cannot verify flip", verdict["masked_runs"][0]["samples"][0])
self.assertEqual(verdict["masked_runs"], [])
self.assertTrue(any("no runs of" in w for w in verdict["warnings"]))
def test_zero_commits_blocks(self):
# Empty branch (newly created repo, e.g.). Fail-closed: block.
def test_zero_commits_warns_allows(self):
# Empty branch (newly created repo, e.g.). Allow with warning.
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=[]):
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
self.assertEqual(verdict["checked_commits"], 0)
self.assertEqual(verdict["fail_runs"], [])
self.assertEqual(len(verdict["masked_runs"]), 1)
self.assertIn("cannot verify flip", verdict["masked_runs"][0]["samples"][0])
def test_combined_status_api_error_blocks(self):
# Fail-closed: combined_status ApiError means the check history is
# unreadable — we cannot verify the flip, so block as masked.
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1"]):
with mock.patch.object(
lpfc, "combined_status",
side_effect=lpfc.ApiError("GET /statuses/sha → HTTP 500"),
):
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
self.assertEqual(verdict["checked_commits"], 0)
self.assertEqual(verdict["fail_runs"], [])
# One masked_run from the ApiError, one from zero checked_commits.
self.assertEqual(len(verdict["masked_runs"]), 2)
self.assertIn("API error", verdict["masked_runs"][0]["samples"][0])
self.assertEqual(verdict["masked_runs"], [])
self.assertTrue(any("no recent commits" in w for w in verdict["warnings"]))
# --------------------------------------------------------------------------
@@ -17,7 +17,7 @@ wd.REPO = "molecule-ai/molecule-core"
wd.OWNER = "molecule-ai"
wd.NAME = "molecule-core"
wd.WATCH_BRANCH = "main"
wd.RED_LABEL = "ci-bp-drift"
wd.RED_LABEL = "tier:high"
wd.API = "https://git.example.com/api/v1"
@@ -1,48 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# Anti-regression gate for #2403: fail if any SOP tier artifact reappears.
cd "$(dirname "$0")/../../.."
fail=0
# 1. Deleted workflow files must stay deleted
for f in .gitea/workflows/sop-tier-check.yml .gitea/workflows/sop-tier-refire.yml; do
if [ -e "$f" ]; then
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
fail=1
fi
done
# 2. Deleted script files must stay deleted
for f in .gitea/scripts/sop-tier-check.sh .gitea/scripts/sop-tier-refire.sh; do
if [ -e "$f" ]; then
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
fail=1
fi
done
# 3. No tier branching logic in gate_check.py
if grep -qE '_get_pr_tier|TIER_AGENTS' tools/gate-check-v3/gate_check.py; then
echo "FAIL: tier branching reappeared in gate_check.py" >&2
fail=1
fi
# 4. No _is_tier_low_pending_ok in merge queue
if grep -q '_is_tier_low_pending_ok' .gitea/scripts/gitea-merge-queue.py; then
echo "FAIL: tier soft-fail reappeared in gitea-merge-queue.py" >&2
fail=1
fi
# 5. No sop-tier-check context references in workflow YAML
if grep -r 'sop-tier-check' .gitea/workflows/; then
echo "FAIL: sop-tier-check context reappeared in workflows" >&2
fail=1
fi
if [ "$fail" -eq 1 ]; then
echo "TIER_REGRESSION_DETECTED" >&2
exit 1
fi
echo "PASS: no tier regression detected"
@@ -486,129 +486,3 @@ def test_scoped_rollout_dry_run_does_not_assert_coverage():
sleep=lambda _s: None,
)
assert aggregate["ok"] is True
# --- Superseded-deploy guard (false-stale fix) -----------------------------
#
# Scenario this fixes: no `concurrency:` on the prod-deploy workflow means two
# close main pushes run BOTH deploy-production jobs. eb31bcf (Fix A) and 286338
# (Fix C) merge back-to-back; the 286338 job rolls the fleet to staging-2863380
# first; the OLDER eb31bcf job's strict verify then sees tenants on 2863380 and
# false-reds "stale" though the fleet is AHEAD. superseded_by detects that main's
# head is no longer eb31bcf and lets the older job succeed without weakening the
# behind-tenant signal for whichever job IS the latest.
def test_superseded_by_returns_newer_head_when_main_moved_ahead(monkeypatch):
# eb31bcf job: main head is now 2863380 -> superseded, return the newer head.
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
newer = prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"})
assert newer == "2863380fullhash"
def test_superseded_by_none_when_this_job_is_still_head(monkeypatch):
# 2863380 job (the latest): head == our SHA -> NOT superseded -> strict verify
# runs, so a genuinely-behind tenant still fails loudly.
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
def test_superseded_by_matches_on_short_vs_full_sha_prefix(monkeypatch):
# GITHUB_SHA is full; Gitea may return a different-length id. Equal prefixes
# must NOT count as superseded (avoid false-skipping the real latest job).
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380")
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380FULLHASH")
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
def test_superseded_by_fail_safe_returns_none_when_head_unreadable(monkeypatch):
# Fail-safe: unreadable head (no token / API error) must NOT be treated as
# superseded, so the strict verify still runs and never silently greens.
monkeypatch.setattr(prod, "current_branch_head", lambda _env: None)
assert prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"}) is None
def test_superseded_by_none_without_github_sha(monkeypatch):
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
assert prod.superseded_by({}) is None
def test_current_branch_head_parses_gitea_branch_commit_id(monkeypatch):
captured = {}
def fake_optional(url, _token):
captured["url"] = url
return 200, {"name": "main", "commit": {"id": "2863380fullhash"}}
monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
head = prod.current_branch_head(
{"GITEA_TOKEN": "secret", "GITHUB_REPOSITORY": "molecule-ai/molecule-core"}
)
assert head == "2863380fullhash"
assert captured["url"].endswith("/repos/molecule-ai/molecule-core/branches/main")
def test_current_branch_head_uses_ref_name_branch(monkeypatch):
captured = {}
def fake_optional(url, _token):
captured["url"] = url
return 200, {"commit": {"sha": "deadbeef"}}
monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
head = prod.current_branch_head(
{"GITEA_TOKEN": "secret", "GITHUB_REF_NAME": "release"}
)
assert head == "deadbeef"
assert captured["url"].endswith("/branches/release")
def test_current_branch_head_none_without_token():
assert prod.current_branch_head({}) is None
def test_current_branch_head_none_on_non_200(monkeypatch):
monkeypatch.setattr(prod, "_api_json_optional", lambda _u, _t: (500, None))
assert prod.current_branch_head({"GITEA_TOKEN": "secret"}) is None
# --- #2213: superseded check must fire BEFORE production side effects ----------
#
# Real incident shape: two main pushes land ~2 min apart. The OLDER deploy job
# (GITHUB_SHA=7a72516, target staging-7a72516) started LATE — main head was
# already 7f25373. The #2194 guard only protected the *verify* step, so the
# older job still:
# 1. rolled the canary (hongming) BACKWARD to staging-7a72516 (the #2213 red,
# seen as the newer job's verify reading hongming on the old SHA), then
# 2. promoted :latest backward to the older image,
# before finally skipping verify. The workflow now calls this same superseded
# check BEFORE the redeploy + promote steps and gates both off when it fires.
# These tests pin the contract that check-superseded relies on for the exact
# incident shape.
def test_superseded_by_fires_for_older_job_when_newer_already_head(monkeypatch):
# Older job (7a72516) re-checks the head just before rollout and finds the
# newer merge (7f25373) already owns main -> superseded -> skip side effects.
monkeypatch.setattr(
prod, "current_branch_head", lambda _env: "7f25373309eca54a36f08c371ff783c3a47c3f8d"
)
newer = prod.superseded_by(
{"GITHUB_SHA": "7a72516f7e7ba1a710c4f393fef08be8d22e1866"}
)
assert newer == "7f25373309eca54a36f08c371ff783c3a47c3f8d"
def test_superseded_by_none_for_latest_job_so_it_still_rolls(monkeypatch):
# The newer job (7f25373) IS the head -> NOT superseded -> it proceeds to
# roll the fleet and verify, so a genuinely-behind tenant still fails loud.
monkeypatch.setattr(
prod, "current_branch_head", lambda _env: "7f25373309eca54a36f08c371ff783c3a47c3f8d"
)
assert (
prod.superseded_by(
{"GITHUB_SHA": "7f25373309eca54a36f08c371ff783c3a47c3f8d"}
)
is None
)
+19 -53
View File
@@ -14,17 +14,10 @@
# T9 — team membership probe → 403 (token not in team) → script exits 1 (fail closed)
# T10 — CURL_AUTH_FILE created with mode 600 and correct header content
# T11 — bash syntax check (bash -n passes)
# T12 — jq filter: non-author APPROVED official current-head → in candidate list; dismissed → excluded
# T12 — jq filter: non-author APPROVED → in candidate list; dismissed → excluded
# T13 — missing required env GITEA_TOKEN → exits 1 with error
# T14 — non-default-base PR exits 0 without requiring review
# T15comment agent-prefix approval → exit 1
# T16 — comment generic keyword approval → exit 1
# T17 — comments with no approval keywords → exit 1
# T18 — wrong-team review + right-team comment → exit 1
# T19 — ai-sop-ack APPROVED review excluded from qa-review gate
# T20 — ai-sop-ack APPROVED review excluded from security-review gate
# T21 — stale-head APPROVED review → exit 1 (commit_id mismatch)
# T22 — missing/non-official APPROVED review → exit 1 (official != true)
# T18wrong-team review candidate does not block right-team comment approval
#
# Hostile-self-review (per feedback_assert_exact_not_substring):
# this test MUST FAIL if the script is absent. Verified by running
@@ -326,50 +319,41 @@ assert_file_contains "T10b printf header format (CURL_AUTH_FILE content)" "$T10_
assert_file_contains "T10c 'header =' curl-config syntax" "$T10_AUTHFILE" 'header = "Authorization: token '
rm -f "$T10_AUTHFILE"
# T12 — jq filter: non-author APPROVED official current-head included; dismissed/stale/missing-official excluded
# T12 — jq filter: non-author APPROVED included, dismissed excluded
echo
echo "== T12 jq filter =="
# These are tested indirectly via T3 and T6 above, but let's also test
# the jq expression directly.
JQ_FILTER='.[]
| select(.state == "APPROVED")
| select(.official == true)
| select(.dismissed != true)
| select(.user.login != "alice")
| select(.commit_id == $head)
| .user.login'
T12_INPUT='[{"state":"APPROVED","official":true,"dismissed":false,"commit_id":"deadbeef0000111122223333444455556666","user":{"login":"core-devops"}},{"state":"CHANGES_REQUESTED","official":true,"dismissed":false,"commit_id":"deadbeef0000111122223333444455556666","user":{"login":"bob"}},{"state":"APPROVED","official":true,"dismissed":false,"commit_id":"deadbeef0000111122223333444455556666","user":{"login":"alice"}},{"state":"APPROVED","official":true,"dismissed":true,"commit_id":"deadbeef0000111122223333444455556666","user":{"login":"carol"}},{"state":"APPROVED","official":false,"dismissed":false,"commit_id":"deadbeef0000111122223333444455556666","user":{"login":"dave"}},{"state":"APPROVED","official":true,"dismissed":false,"commit_id":"oldsha0000000000000000000000000000","user":{"login":"eve"}}]'
T12_INPUT='[{"state":"APPROVED","dismissed":false,"user":{"login":"core-devops"}},{"state":"CHANGES_REQUESTED","dismissed":false,"user":{"login":"bob"}},{"state":"APPROVED","dismissed":false,"user":{"login":"alice"}},{"state":"APPROVED","dismissed":true,"user":{"login":"carol"}}]'
JQ_CMD=$(command -v jq 2>/dev/null || echo /tmp/jq)
T12_CANDIDATES=$(echo "$T12_INPUT" | "$JQ_CMD" -r --arg head "deadbeef0000111122223333444455556666" "$JQ_FILTER" 2>/dev/null | sort -u)
assert_contains "T12 jq: core-devops (non-author APPROVED official current-head) in candidates" "core-devops" "$T12_CANDIDATES"
T12_CANDIDATES=$(echo "$T12_INPUT" | "$JQ_CMD" -r "$JQ_FILTER" 2>/dev/null | sort -u)
assert_contains "T12 jq: core-devops (non-author APPROVED) in candidates" "core-devops" "$T12_CANDIDATES"
assert_eq "T12 jq: alice (author) NOT in candidates" "" "$(echo "$T12_CANDIDATES" | grep '^alice$' || true)"
assert_eq "T12 jq: carol (dismissed) NOT in candidates" "" "$(echo "$T12_CANDIDATES" | grep '^carol$' || true)"
assert_eq "T12 jq: dave (official=false) NOT in candidates" "" "$(echo "$T12_CANDIDATES" | grep '^dave$' || true)"
assert_eq "T12 jq: eve (stale head) NOT in candidates" "" "$(echo "$T12_CANDIDATES" | grep '^eve$' || true)"
# T15 — comment-based approval via agent prefix pattern → exit 1
# SECURITY: agent-prefix comments are also removed. A text prefix in an
# issue comment is spoofable (any team member can type "[core-qa-agent]")
# and lacks the audit trail of an official Gitea review.
# T15 — comment-based approval via agent prefix pattern → exit 0
echo
echo "== T15 comment agent-prefix approval =="
T15_OUT=$(run_review_check "T15_comments_agent_approval")
T15_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T15 exit code 1 (agent-prefix comment rejected — not an official review)" "1" "$T15_RC"
assert_contains "T15 no candidates error" "no candidates from reviews API or issue comments" "$T15_OUT"
assert_eq "T15 exit code 0 (agent-comment approval + team member)" "0" "$T15_RC"
assert_contains "T15 comment fallback notice" "comment-based approval" "$T15_OUT"
assert_contains "T15 core-qa-agent APPROVED" "APPROVED by core-qa-agent" "$T15_OUT"
# T16 — comment-based approval via generic APPROVED keyword → exit 1
# SECURITY: generic keywords (APPROVED/LGTM/ACCEPTED) must NOT satisfy the
# gate — only official Gitea reviews or agent-prefix comments count. A plain
# comment from a team member is a bypass if it skips the review UI.
# T16 — comment-based approval via generic APPROVED keyword → exit 0
echo
echo "== T16 comment generic keyword approval =="
T16_OUT=$(run_review_check "T16_comments_generic_approval")
T16_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T16 exit code 1 (generic-approval comment rejected — not an official review)" "1" "$T16_RC"
assert_contains "T16 no candidates error" "no candidates from reviews API or issue comments" "$T16_OUT"
assert_eq "T16 exit code 0 (generic-approval comment + team member)" "0" "$T16_RC"
assert_contains "T16 comment fallback notice" "comment-based approval" "$T16_OUT"
# T17 — no approval keywords in comments → exit 1
echo
@@ -379,16 +363,16 @@ T17_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T17 exit code 1 (no candidates from comments)" "1" "$T17_RC"
assert_contains "T17 no candidates error" "no candidates from reviews API or issue comments" "$T17_OUT"
# T18 — wrong-team review + right-team comment → exit 1
# SECURITY: with comment approval fully removed, a wrong-team review plus
# a right-team comment yields NO valid candidates. Only official reviews
# from the target team count.
# T18 — a wrong-team PR review candidate must not suppress a right-team
# comment approval. This matches PR #1790, where QA had an APPROVED review
# and security approved via the agent comment convention.
echo
echo "== T18 review candidate wrong team, comment candidate right team =="
T18_OUT=$(run_review_check "T18_review_wrong_team_comment_right_team")
T18_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T18 exit code 1 (comment approval removed — no valid candidates)" "1" "$T18_RC"
assert_contains "T18 none are in team" "none are in team" "$T18_OUT"
assert_eq "T18 exit code 0 (comment approval still considered)" "0" "$T18_RC"
assert_contains "T18 comment candidate notice" "comment-based approval" "$T18_OUT"
assert_contains "T18 comment approver accepted" "APPROVED by core-qa-agent" "$T18_OUT"
# T19 — ai-sop-ack member APPROVED review must NOT count toward qa-review
# or security-review (R1 hardening refinement, msg 1388c76f).
@@ -409,24 +393,6 @@ assert_eq "T20 exit code 1 (ai-sop-ack not in security team)" "1" "$T20_RC"
assert_contains "T20 ai-reviewer excluded from security" "candidates: ai-reviewer" "$T20_OUT"
assert_contains "T20 none are in security team" "none are in team" "$T20_OUT"
# T21 — stale-head APPROVED review must be rejected (commit_id mismatch).
# SECURITY: an approval on an old commit does not cover the current head.
echo
echo "== T21 stale-head APPROVED review rejected =="
T21_OUT=$(run_review_check "T21_stale_head_approved")
T21_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T21 exit code 1 (stale-head approval rejected)" "1" "$T21_RC"
assert_contains "T21 no candidates error" "no candidates from reviews API or issue comments" "$T21_OUT"
# T22 — missing/non-official APPROVED review must be rejected.
# SECURITY: only official Gitea reviews count; comments and non-official reviews lack audit trail.
echo
echo "== T22 missing official flag APPROVED review rejected =="
T22_OUT=$(run_review_check "T22_missing_official")
T22_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T22 exit code 1 (missing official rejected)" "1" "$T22_RC"
assert_contains "T22 no candidates error" "no candidates from reviews API or issue comments" "$T22_OUT"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
+59 -127
View File
@@ -11,7 +11,7 @@
# - compute_ack_state (self-ack rejected, team probe applied, revoke
# invalidates own prior ack, peer's ack survives unrevoked)
# - render_status (state + description format)
# - is_high_risk (label-driven, default fallback)
# - get_tier_mode (label-driven, default fallback)
# - load_config (default config parses cleanly with both PyYAML and
# the bundled minimal parser)
#
@@ -208,22 +208,6 @@ class TestParseDirectives(unittest.TestCase):
d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing")
self.assertEqual(d[0][1], "comprehensive-testing")
def test_emdash_separator_parsed_correctly(self):
# Em-dash (U+2014) between slug and note is common in practice.
# /sop-ack Five-Axis — five-axis-review
# → slug = five-axis, note = — five-axis-review
d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review")
self.assertEqual(len(d), 1)
self.assertEqual(d[0][1], "five-axis")
self.assertIn("five-axis-review", d[0][2])
def test_emdash_no_note(self):
# Em-dash at end of slug: only slug, no note content
d = self.parse_ack_revoke("/sop-ack Five-Axis —")
self.assertEqual(len(d), 1)
self.assertEqual(d[0][1], "five-axis")
self.assertEqual(d[0][2], "") # em-dash is separator-only → empty note
# ---------------------------------------------------------------------------
# section_marker_present
@@ -432,6 +416,37 @@ class TestRenderStatus(unittest.TestCase):
self.assertIn("body-unfilled", desc)
# ---------------------------------------------------------------------------
# get_tier_mode
# ---------------------------------------------------------------------------
class TestGetTierMode(unittest.TestCase):
def setUp(self):
self.cfg = sop.load_config(CONFIG_PATH)
def test_tier_high_is_hard(self):
pr = {"labels": [{"name": "tier:high"}, {"name": "area:ci"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
def test_tier_medium_is_hard(self):
pr = {"labels": [{"name": "tier:medium"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
def test_tier_low_is_soft(self):
pr = {"labels": [{"name": "tier:low"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "soft")
def test_no_tier_label_defaults_to_hard(self):
# Per feedback_fix_root_not_symptom — never silently lower the bar.
pr = {"labels": [{"name": "area:ci"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
def test_no_labels_defaults_to_hard(self):
self.assertEqual(sop.get_tier_mode({"labels": []}, self.cfg), "hard")
self.assertEqual(sop.get_tier_mode({}, self.cfg), "hard")
# ---------------------------------------------------------------------------
# load_config
# ---------------------------------------------------------------------------
@@ -456,6 +471,13 @@ class TestLoadConfig(unittest.TestCase):
},
)
def test_default_config_tier_mode_shape(self):
cfg = sop.load_config(CONFIG_PATH)
self.assertEqual(cfg["tier_failure_mode"]["tier:high"], "hard")
self.assertEqual(cfg["tier_failure_mode"]["tier:medium"], "hard")
self.assertEqual(cfg["tier_failure_mode"]["tier:low"], "soft")
self.assertEqual(cfg["default_mode"], "hard")
def test_each_item_has_required_fields(self):
cfg = sop.load_config(CONFIG_PATH)
for it in cfg["items"]:
@@ -589,7 +611,7 @@ class TestComputeNaState(unittest.TestCase):
class TestIsHighRisk(unittest.TestCase):
"""The high-risk predicate decides which required_teams list applies.
Predicate: any label in cfg.high_risk_labels.
Predicate: tier:high label OR any label in cfg.high_risk_labels.
"""
def setUp(self):
@@ -599,8 +621,23 @@ class TestIsHighRisk(unittest.TestCase):
pr = {"labels": []}
self.assertFalse(sop.is_high_risk(pr, self.cfg))
def test_tier_high_is_high_risk(self):
pr = {"labels": [{"name": "tier:high"}]}
self.assertTrue(sop.is_high_risk(pr, self.cfg))
def test_tier_low_is_default_class(self):
pr = {"labels": [{"name": "tier:low"}]}
self.assertFalse(sop.is_high_risk(pr, self.cfg))
def test_tier_medium_is_default_class(self):
# tier:medium alone is NOT high-risk (Option C — medium routes
# to the wider engineers OR-set).
pr = {"labels": [{"name": "tier:medium"}]}
self.assertFalse(sop.is_high_risk(pr, self.cfg))
def test_area_security_label_is_high_risk(self):
pr = {"labels": [{"name": "area:security"}]}
pr = {"labels": [{"name": "tier:medium"}, {"name": "area:security"}]}
self.assertTrue(sop.is_high_risk(pr, self.cfg))
def test_area_schema_label_is_high_risk(self):
pr = {"labels": [{"name": "area:schema"}]}
@@ -615,7 +652,7 @@ class TestIsHighRisk(unittest.TestCase):
self.assertTrue(sop.is_high_risk(pr, self.cfg))
def test_area_gate_meta_label_is_high_risk(self):
# Gate-meta = changes to sop-checklist/sop-checklist itself.
# Gate-meta = changes to sop-checklist/sop-tier-check itself.
pr = {"labels": [{"name": "area:gate-meta"}]}
self.assertTrue(sop.is_high_risk(pr, self.cfg))
@@ -669,7 +706,7 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
root-cause / no-backwards-compat for the default class.
The dead-managers/ceo-persona-token gridlock is the symptom; the
root cause is that sop-checklist ignored high-risk class. These tests
root cause is that sop-checklist ignored tier-class. These tests
pin the new wider-default behavior so it can't regress silently.
"""
@@ -740,7 +777,7 @@ class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):
def test_root_cause_high_risk_elevated_to_ceo_only(self):
items = _items_by_slug()
# area:schema alone makes the PR high-risk → root-cause needs ceo.
# tier:high alone makes the PR high-risk → root-cause needs ceo.
self.assertEqual(
sop.resolve_required_teams(items["root-cause"], high_risk=True),
["ceo"],
@@ -1262,108 +1299,3 @@ class TestGetCIStatus(unittest.TestCase):
self.assertEqual(
sop.get_ci_status(client, "o", "r", "sha1"), "unknown"
)
# ---------------------------------------------------------------------------
# internal#818 — na-declarations status must be terminal success
# ---------------------------------------------------------------------------
class TestNaDeclarationsStatusTerminal(unittest.TestCase):
"""Regression for internal#818: the na-declarations context is
informational, not a merge gate. An empty N/A declaration list must
post `success` (not `pending`) so it does not poison the PR combined
status."""
def _run_with_fake_client(self, fake_client_class):
"""Swap GiteaClient temporarily and invoke main() with a fake token."""
orig_client = sop.GiteaClient
orig_token = os.environ.get("GITEA_TOKEN")
try:
sop.GiteaClient = fake_client_class
os.environ["GITEA_TOKEN"] = "fake-token"
return sop.main([
"--owner", "o", "--repo", "r", "--pr", "1",
"--config", CONFIG_PATH,
"--gitea-host", "git.example.com",
])
finally:
sop.GiteaClient = orig_client
if orig_token is None:
os.environ.pop("GITEA_TOKEN", None)
else:
os.environ["GITEA_TOKEN"] = orig_token
def test_empty_na_descriptions_posts_success(self):
posted = []
class FakeClient(sop.GiteaClient):
def get_pr(self, owner, repo, pr):
return {
"state": "open",
"user": {"login": "alice"},
"head": {"sha": "abc123"},
"labels": [],
}
def get_issue_comments(self, owner, repo, issue, max_comments=None):
return []
def resolve_team_id(self, org, team_name):
return None
def is_team_member(self, team_id, login):
return False
def post_status(self, owner, repo, sha, state, context,
description, target_url=""):
posted.append({
"state": state,
"context": context,
"description": description,
})
rc = self._run_with_fake_client(FakeClient)
self.assertEqual(rc, 0)
na_posts = [p for p in posted if "na-declarations" in p["context"]]
self.assertEqual(len(na_posts), 1, f"expected one na-declarations post, got {posted}")
self.assertEqual(na_posts[0]["state"], "success")
self.assertEqual(na_posts[0]["description"], "N/A: (none)")
def test_populated_na_descriptions_posts_success(self):
posted = []
class FakeClient(sop.GiteaClient):
def get_pr(self, owner, repo, pr):
return {
"state": "open",
"user": {"login": "alice"},
"head": {"sha": "abc123"},
"labels": [],
}
def get_issue_comments(self, owner, repo, issue, max_comments=None):
return [
{"user": {"login": "bob"}, "body": "/sop-n/a qa-review N/A: docs-only"},
]
def resolve_team_id(self, org, team_name):
return 1
def is_team_member(self, team_id, login):
return True
def post_status(self, owner, repo, sha, state, context,
description, target_url=""):
posted.append({
"state": state,
"context": context,
"description": description,
})
rc = self._run_with_fake_client(FakeClient)
self.assertEqual(rc, 0)
na_posts = [p for p in posted if "na-declarations" in p["context"]]
self.assertEqual(len(na_posts), 1)
self.assertEqual(na_posts[0]["state"], "success")
self.assertIn("qa-review", na_posts[0]["description"])
+101
View File
@@ -0,0 +1,101 @@
#!/usr/bin/env bash
# Regression test for #229 — sop-tier-check tier:low OR-clause splitter.
#
# Bug (PR #225 → still broken after PR #231):
# Line ~289 of sop-tier-check.sh used:
# _clause=$(echo "$_raw_clause" | tr -d '()' | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
# `tr -d '[:space:]'` strips the newlines that `tr ',' '\n'` just
# inserted, collapsing "engineers,managers,ceo" into a single token
# "engineersmanagersceo". The for-loop then iterates ONCE on a name
# that matches no team, so every tier:low PR fails:
# ::error::clause [engineers/managers/ceo]: FAIL — no approving
# reviewer belongs to any of these teamsengineersmanagersceo
# (note also: missing separators in the error string is bug #2 —
# `_clause_names` used "${var:+, }$x" which OVERWRITES per iteration).
#
# Fix shape (this PR):
# _no_parens=${_raw_clause//[()]/}
# _clause=${_no_parens//,/ } # comma -> space, bash word-split iterates
# _clause_names="${_clause_names}${_clause_names:+, }${_t}" # APPEND, not overwrite
#
# This test extracts the splitter logic and asserts it produces the right
# token list for each of the three tier expressions live in the script.
set -euo pipefail
PASS=0
FAIL=0
assert_eq() {
local label="$1"
local expected="$2"
local got="$3"
if [ "$expected" = "$got" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " expected: <$expected>"
echo " got: <$got>"
FAIL=$((FAIL + 1))
fi
}
# ----- Splitter under test (mirrors the fixed sop-tier-check.sh block) -----
split_clause() {
local raw="$1"
local no_parens=${raw//[()]/}
local clause=${no_parens//,/ }
local out=""
for _t in $clause; do
out="${out}${out:+|}$_t"
done
echo "$out"
}
echo "test: tier:low OR-clause splits to 3 tokens"
assert_eq "tier:low" "engineers|managers|ceo" "$(split_clause "engineers,managers,ceo")"
echo "test: tier:medium AND-expression — bash word-split on \$EXPR yields 5 tokens"
EXPR="managers AND engineers AND qa???,security???"
out=""
for _raw in $EXPR; do
out="${out}${out:+ ; }$(split_clause "$_raw")"
done
assert_eq "tier:medium" "managers ; AND ; engineers ; AND ; qa???|security???" "$out"
echo "test: tier:high single-team OR-clause"
assert_eq "tier:high" "ceo" "$(split_clause "ceo")"
echo "test: paren-wrapped OR-set unwraps + splits"
assert_eq "paren OR" "managers|ceo" "$(split_clause "(managers,ceo)")"
# ----- _clause_names accumulator (was overwriting per iteration) -----
acc=""
for t in engineers managers ceo; do
acc="${acc}${acc:+, }${t}"
done
assert_eq "_clause_names append" "engineers, managers, ceo" "$acc"
# ----- _failed_clauses / _passed_clauses accumulator across raw clauses -----
acc=""
for c in clauseA clauseB clauseC; do
acc="${acc}${acc:+, }${c}"
done
assert_eq "_failed_clauses append" "clauseA, clauseB, clauseC" "$acc"
# ----- End-to-end OR-gate: simulate APPROVER_TEAMS[core-lead]=' managers ' -----
# The script's case pattern is *${_t}* with a space-padded value.
APPROVER_TEAMS_VAL=" managers "
matched=""
for _t in $(split_clause "engineers,managers,ceo" | tr '|' ' '); do
case "$APPROVER_TEAMS_VAL" in
*${_t}*) matched="$_t"; break ;;
esac
done
assert_eq "OR-gate matches managers" "managers" "$matched"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
[ "$FAIL" -eq 0 ]
+301
View File
@@ -0,0 +1,301 @@
#!/usr/bin/env bash
# Tests for sop-tier-refire.{yml,sh} — internal#292.
#
# Behavior matrix:
#
# T1: PR open + APPROVED via tier:low → script invokes sop-tier-check
# and POSTs status=success.
# T2: PR open + missing tier label → sop-tier-check exits non-zero;
# refire still POSTs status=success, matching the canonical
# pull_request_target workflow's fail-open job conclusion.
# T3: PR open + tier:low but NO approving reviews → sop-tier-check
# exits non-zero; refire still POSTs status=success for the same reason.
# T4: PR CLOSED → refire exits 0 with no status POST (no-op on closed).
# T5: Rate-limit — recent status update within 30s → refire skips,
# no new POST.
# T6 (yaml-lint): workflow `if:` expression contains author_association
# gate + slash-command-trigger gate + PR-not-issue gate.
# T7 (yaml-lint): workflow file is parseable YAML.
#
# Tests T1-T5 run the real script against a local-fixture HTTP server
# (python http.server with a stub handler — `tests/_refire_fixture.py`)
# so the script's Gitea API calls hit the fixture, not the real Gitea.
#
# Tests T6/T7 are pure YAML checks against the workflow file.
#
# Hostile-self-review (per feedback_assert_exact_not_substring):
# this test MUST FAIL if the workflow or script is absent. Verified by
# running the test before the files exist (covered in the PR body).
set -euo pipefail
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
WORKFLOW_DIR="$(cd "$THIS_DIR/../../workflows" && pwd)"
WORKFLOW="$WORKFLOW_DIR/sop-tier-refire.yml"
DISPATCH_WORKFLOW="$WORKFLOW_DIR/sop-checklist.yml"
SCRIPT="$SCRIPT_DIR/sop-tier-refire.sh"
PASS=0
FAIL=0
FAILED_TESTS=""
assert_eq() {
local label="$1"
local expected="$2"
local got="$3"
if [ "$expected" = "$got" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " expected: <$expected>"
echo " got: <$got>"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} ${label}"
fi
}
assert_contains() {
local label="$1"
local needle="$2"
local haystack="$3"
if printf '%s' "$haystack" | grep -qF "$needle"; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " needle: <$needle>"
echo " haystack: <$(printf '%s' "$haystack" | head -c 400)>"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} ${label}"
fi
}
assert_file_exists() {
local label="$1"
local path="$2"
if [ -f "$path" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label (not found: $path)"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} ${label}"
fi
}
# Existence (foundation — every other test depends on these)
echo
echo "== existence =="
assert_file_exists "workflow file exists" "$WORKFLOW"
assert_file_exists "SSOT dispatcher workflow file exists" "$DISPATCH_WORKFLOW"
assert_file_exists "script file exists" "$SCRIPT"
if [ "$FAIL" -gt 0 ]; then
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL (existence)"
echo "Cannot proceed without these files."
exit 1
fi
# T6 / T7 — workflow YAML structure
echo
echo "== T6/T7 workflow yaml =="
# YAML parseability
PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$WORKFLOW" 2>&1 || true)
assert_eq "T7 workflow parses as YAML" "ok" "$PARSE_OUT"
# The old per-workflow issue_comment listener caused queue storms because
# Gitea queues jobs before evaluating job-level `if:`. The script remains,
# but comment-triggered refires route through the single dispatcher.
WORKFLOW_CONTENT=$(cat "$WORKFLOW")
if printf '%s' "$WORKFLOW_CONTENT" | grep -q '^ issue_comment:'; then
echo " FAIL T6a manual fallback workflow must not listen on issue_comment"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} T6a"
else
echo " PASS T6a manual fallback workflow does not listen on issue_comment"
PASS=$((PASS + 1))
fi
assert_contains "T6b workflow exposes workflow_dispatch" \
"workflow_dispatch" "$WORKFLOW_CONTENT"
assert_contains "T6c workflow documents unsupported manual inputs" \
"workflow_dispatch inputs" "$WORKFLOW_CONTENT"
# Does NOT check out PR HEAD (security)
if grep -q 'ref: \${{ github.event.pull_request.head' "$WORKFLOW"; then
echo " FAIL T6d workflow MUST NOT check out PR head (security)"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} T6d"
else
echo " PASS T6d workflow does not check out PR head"
PASS=$((PASS + 1))
fi
DISPATCH_PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$DISPATCH_WORKFLOW" 2>&1 || true)
assert_eq "T6e SSOT dispatcher workflow parses as YAML" "ok" "$DISPATCH_PARSE_OUT"
DISPATCH_CONTENT=$(cat "$DISPATCH_WORKFLOW")
assert_contains "T6f SSOT dispatcher listens on issue_comment" \
"issue_comment" "$DISPATCH_CONTENT"
assert_contains "T6g SSOT dispatcher handles /qa-recheck" \
"/qa-recheck" "$DISPATCH_CONTENT"
assert_contains "T6h SSOT dispatcher handles /security-recheck" \
"/security-recheck" "$DISPATCH_CONTENT"
assert_contains "T6i SSOT dispatcher handles /refire-tier-check" \
"/refire-tier-check" "$DISPATCH_CONTENT"
# T1-T5 — script behavior against a local Gitea-fixture
echo
echo "== T1-T5 script behavior (vs local fixture) =="
# Spin up the fixture HTTP server.
FIXTURE_DIR=$(mktemp -d)
trap 'rm -rf "$FIXTURE_DIR"; [ -n "${FIX_PID:-}" ] && kill "$FIX_PID" 2>/dev/null || true' EXIT
FIXTURE_PY="$THIS_DIR/_refire_fixture.py"
if [ ! -f "$FIXTURE_PY" ]; then
echo "::error::fixture server $FIXTURE_PY missing"
exit 1
fi
FIX_LOG="$FIXTURE_DIR/fixture.log"
FIX_STATE_DIR="$FIXTURE_DIR/state"
mkdir -p "$FIX_STATE_DIR"
# Find an unused port.
FIX_PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("127.0.0.1",0));print(s.getsockname()[1]);s.close()')
FIXTURE_STATE_DIR="$FIX_STATE_DIR" python3 "$FIXTURE_PY" "$FIX_PORT" \
>"$FIX_LOG" 2>&1 &
FIX_PID=$!
# Wait for fixture readiness.
for _ in $(seq 1 50); do
if curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
break
fi
sleep 0.1
done
if ! curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
echo "::error::fixture server failed to start. Log:"
cat "$FIX_LOG"
exit 1
fi
# Helper: set fixture state for a scenario, then run the script.
# tier_result is one of: pass | fail_no_label | fail_no_approvals.
# The refire script's tier-check invocation is mocked because the real
# sop-tier-check.sh uses bash 4+ associative arrays — incompatible with
# the macOS bash 3.2 dev shell. Linux Gitea runners use bash 4/5 so
# production runs the real script. The mock exercises the success +
# failure branches of refire's status-POST glue.
run_scenario() {
local scenario="$1"
local tier_result="${2:-pass}"
echo "$scenario" >"$FIX_STATE_DIR/scenario"
: >"$FIX_STATE_DIR/posted_statuses.jsonl" # clear status log
local out
set +e
out=$(
PATH="$FIXTURE_DIR/bin:$PATH" \
GITEA_TOKEN="fixture-token" \
GITEA_HOST="fixture.local" \
REPO="molecule-ai/molecule-core" \
PR_NUMBER="999" \
COMMENT_AUTHOR="test-runner" \
SOP_REFIRE_DISABLE_RATE_LIMIT="1" \
SOP_REFIRE_TIER_CHECK_SCRIPT="$THIS_DIR/_mock_tier_check.sh" \
MOCK_TIER_RESULT="$tier_result" \
FIXTURE_PORT="$FIX_PORT" \
bash "$SCRIPT" 2>&1
)
local rc=$?
set -e
echo "$out" >"$FIX_STATE_DIR/last_run.log"
echo "$rc" >"$FIX_STATE_DIR/last_rc"
}
# Install a curl shim that rewrites https://fixture.local → http://127.0.0.1:$PORT
# Use bash prefix-strip (${var#prefix}) — it sidesteps the `/` delimiter
# confusion of ${var/pattern/replacement}.
mkdir -p "$FIXTURE_DIR/bin"
cat >"$FIXTURE_DIR/bin/curl" <<SHIM
#!/usr/bin/env bash
# Test shim: rewrite https://fixture.local/* -> http://127.0.0.1:${FIX_PORT}/*
# The fixture doesn't authenticate; -H Authorization passes through harmlessly.
new_args=()
for a in "\$@"; do
if [[ "\$a" == https://fixture.local/* ]]; then
rest="\${a#https://fixture.local}"
a="http://127.0.0.1:${FIX_PORT}\${rest}"
fi
new_args+=("\$a")
done
exec /usr/bin/curl "\${new_args[@]}"
SHIM
chmod +x "$FIXTURE_DIR/bin/curl"
# T1: tier:low + 1 APPROVED + author is in engineers team → success
run_scenario "T1_success" "pass"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T1 exit code 0 (success)" "0" "$RC"
assert_contains "T1 POSTed state=success" '"state": "success"' "$POSTED"
assert_contains "T1 POST context is sop-tier-check / tier-check" \
'"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
assert_contains "T1 description names commenter" "test-runner" "$POSTED"
# T2: missing tier label → tier-check fails internally, but refire status
# matches the canonical workflow's fail-open job conclusion.
run_scenario "T2_no_tier_label" "fail_no_label"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T2 exit code 0 (canonical fail-open)" "0" "$RC"
assert_contains "T2 POSTed state=success" '"state": "success"' "$POSTED"
# T3: tier:low present but ZERO approving reviews → internal tier check fails,
# refire status remains aligned with the canonical workflow.
run_scenario "T3_no_approvals" "fail_no_approvals"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T3 exit code 0 (canonical fail-open)" "0" "$RC"
assert_contains "T3 POSTed state=success" '"state": "success"' "$POSTED"
# T4: closed PR — refire is a no-op (no POST, exit 0)
run_scenario "T4_closed" "pass"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T4 closed PR exits 0" "0" "$RC"
assert_eq "T4 closed PR posts no status" "" "$POSTED"
# T5: rate-limit — disable the env override and let scenario set a
# recent statuses entry. Re-enable rate-limit for this scenario by NOT
# passing SOP_REFIRE_DISABLE_RATE_LIMIT.
echo "T5_rate_limited" >"$FIX_STATE_DIR/scenario"
: >"$FIX_STATE_DIR/posted_statuses.jsonl"
set +e
T5_OUT=$(
PATH="$FIXTURE_DIR/bin:$PATH" \
GITEA_TOKEN="fixture-token" \
GITEA_HOST="fixture.local" \
REPO="molecule-ai/molecule-core" \
PR_NUMBER="999" \
COMMENT_AUTHOR="test-runner" \
FIXTURE_PORT="$FIX_PORT" \
bash "$SCRIPT" 2>&1
)
T5_RC=$?
set -e
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T5 rate-limited exits 0" "0" "$T5_RC"
assert_contains "T5 rate-limited log says skipped" "rate-limited" "$T5_OUT"
assert_eq "T5 rate-limited posts no status" "" "$POSTED"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
if [ "$FAIL" -gt 0 ]; then
echo "Failed:$FAILED_TESTS"
fi
[ "$FAIL" -eq 0 ]
@@ -1,474 +0,0 @@
import importlib.util
import json
import pathlib
import urllib.error
ROOT = pathlib.Path(__file__).resolve().parents[1]
SCRIPT = ROOT / "umbrella-reaper.py"
def load_reaper():
spec = importlib.util.spec_from_file_location("umbrella_reaper", SCRIPT)
mod = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(mod)
mod.API = "https://git.example.test/api/v1"
mod.GITEA_TOKEN = "fixture-token"
mod.GITEA_HOST = "git.example.test"
mod.REPO = "owner/repo"
return mod
class FakeResponse:
status = 200
def __init__(self, payload):
self.payload = payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
return json.dumps(self.payload).encode("utf-8")
def _pr_fixture(number: int, sha: str) -> dict:
return {"number": number, "head": {"sha": sha}}
def _status_entry(context: str, state: str) -> dict:
return {"context": context, "status": state}
def test_process_pr_compensates_when_all_sub_jobs_success(monkeypatch):
mod = load_reaper()
posted = []
def fake_post_status(sha, context, description):
posted.append((sha, context, description))
monkeypatch.setattr(mod, "post_status", fake_post_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(1, "abc123")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert len(posted) == 1
assert posted[0][0] == "abc123"
assert posted[0][1] == "CI / all-required (pull_request)"
assert "Compensating status" in posted[0][2]
def test_process_pr_skips_when_umbrella_missing(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(mod, "REQUIRED_SUB_JOBS", ["CI / Platform (Go) (pull_request)"])
pr = _pr_fixture(2, "def456")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert posted == []
def test_process_pr_skips_when_sub_job_pending(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(3, "ghi789")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "pending"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert posted == []
def test_process_pr_skips_when_sub_job_failure(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(4, "jkl012")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "failure"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert posted == []
def test_process_pr_returns_false_on_post_failure(monkeypatch):
mod = load_reaper()
def fake_post_status(sha, context, description):
raise mod.ApiError("POST /statuses/abc123 -> HTTP 500: simulated failure")
monkeypatch.setattr(mod, "post_status", fake_post_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(5, "abc123")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is False
def test_main_exits_nonzero_when_any_post_fails(monkeypatch):
mod = load_reaper()
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
monkeypatch.setenv("GITEA_HOST", "git.example.test")
monkeypatch.setenv("REPO", "owner/repo")
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
monkeypatch.setattr(
mod,
"list_open_prs",
lambda limit: [
_pr_fixture(1, "abc123"),
_pr_fixture(2, "def456"),
],
)
calls = {"n": 0}
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
def fake_post_status(sha, context, description):
calls["n"] += 1
if calls["n"] == 2:
raise mod.ApiError("simulated failure")
monkeypatch.setattr(mod, "post_status", fake_post_status)
exit_code = mod.main()
assert exit_code == 1
def test_main_exits_zero_when_all_posts_succeed(monkeypatch):
mod = load_reaper()
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
monkeypatch.setenv("GITEA_HOST", "git.example.test")
monkeypatch.setenv("REPO", "owner/repo")
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
monkeypatch.setattr(
mod,
"list_open_prs",
lambda limit: [_pr_fixture(1, "abc123")],
)
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
exit_code = mod.main()
assert exit_code == 0
def test_dry_run_does_not_post(monkeypatch):
mod = load_reaper()
api_calls = []
def fake_api(method, path, *, body=None, query=None, expect_json=True):
api_calls.append((method, path, body))
return 200, {"ok": True}
monkeypatch.setattr(mod, "api", fake_api)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(6, "mno345")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
monkeypatch.setattr(mod, "DRY_RUN", True)
ok = mod.process_pr(pr)
assert ok is True
# DRY_RUN should prevent the POST /statuses call
assert not any(
method == "POST" and "/statuses/" in path for method, path, _ in api_calls
)
def test_duplicate_contexts_use_latest_state(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
],
)
pr = _pr_fixture(7, "pqr678")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
# duplicate: first pending, then success — the loop overwrites
_status_entry("CI / Detect changes (pull_request)", "pending"),
_status_entry("CI / Detect changes (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert len(posted) == 1
def test_load_required_sub_jobs_from_ci_yml_pull_request_event():
mod = load_reaper()
# UMBRELLA_CONTEXT defaults to pull_request, so derivation should yield
# the pull_request suffix.
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
assert all(j.endswith(" (pull_request)") for j in jobs)
assert "CI / Detect changes (pull_request)" in jobs
assert "CI / Python Lint & Test (pull_request)" in jobs
def test_load_required_sub_jobs_from_ci_yml_push_event(monkeypatch):
mod = load_reaper()
monkeypatch.setattr(mod, "UMBRELLA_CONTEXT", "CI / all-required (push)")
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
assert all(j.endswith(" (push)") for j in jobs)
assert "CI / Detect changes (push)" in jobs
def test_list_open_prs_paginates(monkeypatch):
mod = load_reaper()
calls = []
def fake_api(method, path, *, body=None, query=None, expect_json=True):
calls.append(query)
page = int(query.get("page", 1))
limit = int(query.get("limit", 50))
if page == 1:
return 200, [{"number": 1}, {"number": 2}]
if page == 2:
return 200, [{"number": 3}]
return 200, []
monkeypatch.setattr(mod, "api", fake_api)
prs = mod.list_open_prs(limit=2)
assert len(prs) == 3
assert prs[0]["number"] == 1
assert prs[2]["number"] == 3
assert calls[0]["page"] == "1"
assert calls[1]["page"] == "2"
def test_process_pr_returns_false_on_status_fetch_failure(monkeypatch):
mod = load_reaper()
def fake_get_combined_status(sha):
raise mod.ApiError("GET /statuses/abc123 -> HTTP 500: simulated outage")
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
["CI / Detect changes (pull_request)"],
)
pr = _pr_fixture(8, "abc123")
ok = mod.process_pr(pr)
assert ok is False
def test_process_pr_returns_false_on_missing_statuses_array(monkeypatch):
mod = load_reaper()
def fake_get_combined_status(sha):
return {"state": "success"} # missing 'statuses' array
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
["CI / Detect changes (pull_request)"],
)
pr = _pr_fixture(9, "def456")
ok = mod.process_pr(pr)
assert ok is False
def test_main_exits_nonzero_when_any_status_read_fails(monkeypatch):
mod = load_reaper()
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
monkeypatch.setenv("GITEA_HOST", "git.example.test")
monkeypatch.setenv("REPO", "owner/repo")
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
monkeypatch.setattr(
mod,
"list_open_prs",
lambda limit: [
_pr_fixture(1, "abc123"),
_pr_fixture(2, "def456"),
],
)
def fake_combined_status(sha):
if sha == "abc123":
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
raise mod.ApiError("simulated status fetch failure")
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
exit_code = mod.main()
assert exit_code == 1
-360
View File
@@ -1,360 +0,0 @@
#!/usr/bin/env python3
"""umbrella-reaper — auto-recovery for stale CI umbrella statuses on PRs.
Tracking: molecule-core#1780.
Sibling to status-reaper.py (default-branch push-suffix compensation),
but scoped to pull_request umbrellas instead of main-branch contexts.
What this script does, per `.gitea/workflows/umbrella-reaper.yml` invocation:
1. List open PRs via GET /repos/{o}/{r}/pulls?state=open&limit={N}.
2. For EACH PR:
- GET combined commit status for PR head SHA.
- Look for the umbrella context (default: "CI / all-required (pull_request)").
- If umbrella state is "failure":
- Verify ALL required sub-job contexts are "success".
- If yes POST compensating success to /statuses/{sha} with the
same umbrella context and an honest description.
- If any required sub-job is NOT success skip (umbrella correctly
reflects reality; do NOT lie).
- If umbrella state is "success" or "pending" skip.
3. Exit 0. Re-running is idempotent Gitea de-dups by context.
What it does NOT do:
- Touch non-umbrella contexts.
- Compensate when ANY required sub-job is missing, pending, failure, or
cancelled. Only the "all sub-jobs green, umbrella stale" race.
- Merge PRs. It only posts a status; branch protection still requires
human approval.
- Run on closed PRs.
Halt conditions:
- Missing required env vars exit 1 with ::error:: message.
- API 5xx on PR list fail-loud (can't assess state).
- API 5xx on an individual PR's status → ::warning:: + continue to next PR.
"""
from __future__ import annotations
import json
import os
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any
def _load_required_sub_jobs_from_ci_yml(workflows_dir: str) -> list[str]:
"""Parse ci.yml and extract the all-required sentinel's sub-job contexts.
Supports two shapes of the all-required job run block:
1. Legacy Python f-string list (pre-2026-06-01):
f"CI / Detect changes ({event})"
2. Current shell-script shape (post-2026-06-01 scheduler fix):
check "Detect changes" "$CHANGES_RESULT"
Raises RuntimeError if ci.yml is missing, has no all-required job, or the
run block cannot be parsed.
"""
ci_path = Path(workflows_dir) / "ci.yml"
if not ci_path.exists():
raise RuntimeError(f"ci.yml not found at {ci_path}")
# PyYAML is installed by the workflow (same as status-reaper.py).
import yaml
with ci_path.open() as f:
doc = yaml.safe_load(f)
jobs = doc.get("jobs", {})
all_required = jobs.get("all-required")
if not isinstance(all_required, dict):
raise RuntimeError("ci.yml missing 'all-required' job")
steps = all_required.get("steps", [])
run_block = ""
for step in steps:
if isinstance(step, dict):
run_text = step.get("run", "")
if run_text:
run_block = run_text
break
if not run_block:
raise RuntimeError("all-required job missing run block")
# Determine event suffix from the umbrella context we are watching.
if UMBRELLA_CONTEXT.endswith(" (pull_request)"):
suffix = "(pull_request)"
elif UMBRELLA_CONTEXT.endswith(" (push)"):
suffix = "(push)"
else:
m = re.search(r' \(([^)]+)\)$', UMBRELLA_CONTEXT)
suffix = m.group(1) if m else "pull_request"
# Try legacy f-string format first.
if "({event})" in run_block:
matches = re.findall(r'f["\'](.*?\(\{event\}\))["\']', run_block)
if matches:
return [m.replace("({event})", suffix) for m in matches]
# Try current shell-script format: check "Name" "$RESULT"
matches = re.findall(r'check\s+"([^"]+)"', run_block)
if matches:
return [f"CI / {name} {suffix}" for name in matches]
raise RuntimeError("unable to derive required sub-jobs from all-required run block")
# --------------------------------------------------------------------------
# Environment
# --------------------------------------------------------------------------
def _env(key: str, *, default: str = "") -> str:
return os.environ.get(key, default)
GITEA_TOKEN = _env("GITEA_TOKEN")
GITEA_HOST = _env("GITEA_HOST")
REPO = _env("REPO")
DRY_RUN = _env("DRY_RUN", default="").lower() in ("1", "true", "yes")
# The umbrella context to watch. Must match the branch-protection name
# exactly (Gitea de-dups by context string).
UMBRELLA_CONTEXT = _env("UMBRELLA_CONTEXT", default="CI / all-required (pull_request)")
# Required sub-job contexts. The umbrella is only compensated when ALL of
# these are "success" on the same SHA. Order does not matter.
#
# Derive from ci.yml at runtime to prevent drift (CR2 blocker #1).
# The env var REQUIRED_SUB_JOBS overrides derivation for emergency
# tuning or local testing.
_REQUIRED_SUB_JOBS_OVERRIDE = _env("REQUIRED_SUB_JOBS")
if _REQUIRED_SUB_JOBS_OVERRIDE:
REQUIRED_SUB_JOBS = [
ctx.strip()
for ctx in _REQUIRED_SUB_JOBS_OVERRIDE.split(";")
if ctx.strip()
]
else:
try:
REQUIRED_SUB_JOBS = _load_required_sub_jobs_from_ci_yml(".gitea/workflows")
except Exception as exc:
sys.stderr.write(
f"::error::Failed to derive REQUIRED_SUB_JOBS from ci.yml: {exc}\n"
)
sys.exit(1)
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
PR_LIMIT = int(_env("PR_LIMIT", default="50"))
def _require_runtime_env() -> None:
for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO"):
if not os.environ.get(key):
sys.stderr.write(f"::error::missing required env var: {key}\n")
sys.exit(1)
# --------------------------------------------------------------------------
# Tiny HTTP helper
# --------------------------------------------------------------------------
class ApiError(RuntimeError):
pass
def api(
method: str,
path: str,
*,
body: dict | None = None,
query: dict[str, str] | None = None,
expect_json: bool = True,
) -> tuple[int, Any]:
url = f"{API}{path}"
if query:
url = f"{url}?{urllib.parse.urlencode(query)}"
data = None
headers = {
"Authorization": f"token {GITEA_TOKEN}",
"Accept": "application/json",
}
if body is not None:
data = json.dumps(body).encode("utf-8")
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, method=method, data=data, headers=headers)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
raw = resp.read()
status = resp.status
except urllib.error.HTTPError as e:
raw = e.read()
status = e.code
if not (200 <= status < 300):
snippet = raw[:500].decode("utf-8", errors="replace") if raw else ""
raise ApiError(f"{method} {path} -> HTTP {status}: {snippet}")
if not raw:
return status, None
try:
return status, json.loads(raw)
except json.JSONDecodeError as e:
if expect_json:
raise ApiError(
f"{method} {path} -> HTTP {status} but body is not JSON: {e}"
) from e
return status, {"_raw": raw.decode("utf-8", errors="replace")}
# --------------------------------------------------------------------------
# Gitea reads / writes
# --------------------------------------------------------------------------
def list_open_prs(limit: int = 50) -> list[dict]:
"""Paginate through all open PR pages. Fail closed on non-list responses."""
all_prs: list[dict] = []
page = 1
while True:
_, body = api(
"GET",
f"/repos/{OWNER}/{NAME}/pulls",
query={"state": "open", "limit": str(limit), "page": str(page)},
)
if not isinstance(body, list):
raise ApiError(f"PR list page {page} response is not a JSON array")
if not body:
break
all_prs.extend(body)
if len(body) < limit:
break
page += 1
return all_prs
def get_combined_status(sha: str) -> dict:
_, body = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
if not isinstance(body, dict):
raise ApiError(f"status for {sha} response is not a JSON object")
return body
def post_status(sha: str, context: str, description: str) -> None:
payload = {
"context": context,
"state": "success",
"description": description,
}
if DRY_RUN:
print(f"[DRY-RUN] Would POST /statuses/{sha}: {json.dumps(payload)}")
return
api("POST", f"/repos/{OWNER}/{NAME}/statuses/{sha}", body=payload)
# --------------------------------------------------------------------------
# Core logic
# --------------------------------------------------------------------------
def _entry_state(s: dict) -> str:
return s.get("status") or s.get("state") or ""
def process_pr(pr: dict) -> bool:
"""Process a single PR. Returns True if the tick succeeded for this PR
(including no-op skips), False if a compensating POST failed.
"""
num = pr.get("number")
sha = pr.get("head", {}).get("sha")
if not sha:
print(f"::warning::PR #{num}: missing head.sha; skipping")
return True
try:
status = get_combined_status(sha)
except ApiError as e:
print(f"::error::PR #{num}: status fetch failed: {e}")
return False
statuses = status.get("statuses")
if not isinstance(statuses, list):
print(f"::error::PR #{num}: combined status missing 'statuses' array")
return False
umbrella_entry = None
subjob_states: dict[str, str] = {}
for s in statuses:
if not isinstance(s, dict):
continue
ctx = s.get("context", "")
state = _entry_state(s)
if ctx == UMBRELLA_CONTEXT:
umbrella_entry = s
if ctx in REQUIRED_SUB_JOBS:
subjob_states[ctx] = state
if umbrella_entry is None:
print(f"::notice::PR #{num}: no umbrella context '{UMBRELLA_CONTEXT}'; skipping")
return True
umbrella_state = _entry_state(umbrella_entry)
if umbrella_state != "failure":
print(f"::notice::PR #{num}: umbrella is '{umbrella_state}'; skipping")
return True
# Verify ALL required sub-jobs are present and success
missing = [ctx for ctx in REQUIRED_SUB_JOBS if ctx not in subjob_states]
if missing:
print(
f"::notice::PR #{num}: umbrella=failure, but missing sub-jobs: {missing}; "
"skipping (sub-jobs may still be running)"
)
return True
not_success = [ctx for ctx in REQUIRED_SUB_JOBS if subjob_states[ctx] != "success"]
if not_success:
print(
f"::notice::PR #{num}: umbrella=failure, but sub-jobs not all success: "
f"{[(ctx, subjob_states[ctx]) for ctx in not_success]}; skipping"
)
return True
# All checks pass — post compensating status
desc = (
"Compensating status: all required sub-jobs verified success; "
"umbrella stale due to commit-status propagation race. "
f"Auto-posted by umbrella-reaper for PR #{num}."
)
try:
post_status(sha, UMBRELLA_CONTEXT, desc)
print(f"::notice::PR #{num}: posted compensating success for {UMBRELLA_CONTEXT}")
return True
except ApiError as e:
print(f"::error::PR #{num}: failed to post compensating status: {e}")
return False
def main() -> int:
_require_runtime_env()
# Drift guard: ci.yml derivation already happened at module load, but
# we sanity-check it is non-empty so the loop below doesn't trivially
# no-op because of a parse bug.
if not REQUIRED_SUB_JOBS:
sys.stderr.write("::error::REQUIRED_SUB_JOBS is empty; bailing out\n")
return 1
prs = list_open_prs(limit=PR_LIMIT)
print(f"::notice::Scanning {len(prs)} open PRs for stale umbrella statuses")
compensated = 0
failed = 0
for pr in prs:
ok = process_pr(pr)
if not ok:
failed += 1
print(f"::notice::umbrella-reaper complete (failed POSTs={failed})")
return 1 if failed else 0
if __name__ == "__main__":
sys.exit(main())
+27 -11
View File
@@ -55,22 +55,38 @@
version: 1
# Uniform hard-fail mode (CTO 2026-06-07):
# Every PR uses the same gate — no tier branching.
# Missing acks → status `failure`, blocks merge via branch protection.
# Tier-aware failure mode (RFC#351 open question 2):
# For tier:high — hard-fail (status `failure`, blocks merge via BP).
# For tier:medium — hard-fail (same as high; medium is non-trivial).
# For tier:low — soft-fail (status `pending` with `acked: N/M` in the
# description). BP can choose to require the context
# or not for low-tier PRs.
# If no tier label is present, default to medium (hard-fail) — every PR
# should have a tier label per sop-tier-check, and absence indicates
# a missing-tier defect we should surface, not silently lower the bar.
tier_failure_mode:
"tier:high": hard
"tier:medium": hard
"tier:low": soft
default_mode: hard # used when no tier:* label is present
# High-risk class (RFC#450 Option C, governance-fix for internal#442).
#
# A PR is "high-risk" when ANY of the listed labels are applied.
# A PR is "high-risk" when ANY of the listed labels are applied OR when
# the PR has `tier:high` (mechanically the strictest existing tier).
# High-risk items use `required_teams_high_risk` (when present on the
# item); non-high-risk items use the default `required_teams`.
#
# Risk-classed two-eyes shape:
# - Default class (not high-risk): a non-author engineers/managers/ceo
# ack satisfies the item — 25+ live identities, no dependency on a
# dead/inactive senior persona token.
# - High-risk class (any high_risk_label): still requires a non-author
# ceo ack (durable human team).
# This closes the inconsistency that the SOP charter already mandates
# `tier:high → ceo only` for the sibling `sop-tier-check` gate; the
# sop-checklist's `root-cause` and `no-backwards-compat` items now
# follow the same risk-classed two-eyes shape:
# - Default class (tier:low/medium, not high-risk): a non-author
# engineers/managers/ceo ack satisfies the item — 25+ live
# identities, no dependency on a dead/inactive senior persona
# token.
# - High-risk class (tier:high OR any high_risk_label): still
# requires a non-author ceo ack (durable human team).
#
# Tightening: add labels to high_risk_labels.
# Loosening: remove labels.
@@ -189,5 +205,5 @@ n/a_gates:
required_teams: [security, managers, ceo]
description: >-
Security review N/A when this change has no security surface
(docs-only, pure-frontend, dependency-only). A security/managers/ceo
(docs-only, pure-frontend, dependency-only). A security/owners
member must post /sop-n/a security-review to activate.
+5 -5
View File
@@ -13,14 +13,14 @@
# the structured JSON shape is forward-compatible.
#
# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
# extract pattern as sop-checklist.
# extract pattern as sop-tier-check.
name: audit-force-merge
# pull_request_target loads from the base branch — same security model
# as sop-checklist. Without this, an attacker could rewrite the
# as sop-tier-check. Without this, an attacker could rewrite the
# workflow on a PR and skip the audit emission for their own
# force-merge. See `.gitea/workflows/sop-checklist.yml` for the full
# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
# rationale.
on:
pull_request_target:
@@ -41,7 +41,7 @@ jobs:
ref: ${{ github.event.pull_request.base.sha }}
- name: Detect force-merge + emit audit event
env:
# Same org-level secret the sop-checklist workflow uses.
# Same org-level secret the sop-tier-check workflow uses.
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
@@ -54,7 +54,7 @@ jobs:
# required checks) for each branch listed here.
#
# Declared here rather than fetched from /branch_protections
# because that endpoint requires admin write — sop-checklist-bot is
# because that endpoint requires admin write — sop-tier-bot is
# read-only by design (least-privilege).
REQUIRED_CHECKS_JSON: |
{
+5 -2
View File
@@ -34,8 +34,11 @@ jobs:
check:
name: Block forbidden paths
runs-on: ubuntu-latest
# Hard gate — detected internal-path leaks fail the workflow.
# continue-on-error removed per directive (fail-open → fail-closed).
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
# the PR. Follow-up PR flips this off after surfaced defects are
# triaged.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
@@ -1,165 +0,0 @@
name: boot-to-registration-e2e (advisory)
# cp#455 — Minimal-cell boot-to-registration e2e.
# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
# it should now go GREEN since the fix is live."
#
# Stage 1 of 5-stage rollout. Reuses the dispatch-only EC2
# provisioning path from test_staging_full_saas.sh but reduced to
# the minimum boot-to-registration surface:
#
# 1. Provision request accepted; workspace transitions to booting/running
# 2. Controlplane receives /registry/register for that workspace_id
# 3. JSON-RPC/completion route returns successful minimal response
# 4. Teardown terminates workspace even on failure (trap)
#
# Advisory (non-blocking) per Researcher Stage 2 design — RED on
# current main is expected pre-cp#469-cluster. After cp#477 deploy
# (888efceb) + PR #2167 merge, cell should turn GREEN. THAT green
# is the cluster-proof signal.
#
# Cost controls (mandatory):
# - SPOT instances (tagged run_id/workspace_id for cost attribution)
# - Fast teardown (~3-5 min wall-clock) even on assertion failure
# - Structured per-cell results JSON (runtime/provider/model/
# billing_mode/workspace_id/register_status/completion_status/
# teardown_status/elapsed_seconds)
#
# Inputs:
# runtime : default claude-code
# billing_mode : default platform_managed (the cp#469-cluster path)
# provider : default platform (vs direct-to-provider)
# model : default moonshot/kimi-k2.6 (CTO-specified)
#
# PR target: molecule-core (this file). Companion harness extension
# (test_minimal_boot_cell.sh) lives in tests/e2e/ alongside
# test_staging_full_saas.sh — same repo, same branch.
#
# Note: cp#455 was originally spec'd to live in molecule-controlplane
# (`.gitea/workflows/` path), but molecule-core's CI is the home for
# tenant-boot e2e tests in this stage. Stage 2 may move the path.
on:
workflow_dispatch:
# Note: Gitea 1.22.6 does not support workflow_dispatch.inputs
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Defaults
# are hardcoded in the job env below. Stage 2 can add matrix/
# param support once the Gitea version supports it.
# Advisory: no cron schedule, manual dispatch only. Branch protection
# doesn't require this — RED on main is expected pre-cp#469-cluster
# deploy, GREEN signals the cluster is live.
permissions:
contents: read
# No issue-write; failures surface as red runs in workflow history.
concurrency:
group: boot-to-registration-e2e
cancel-in-progress: false
jobs:
# bp-exempt: advisory e2e — non-gating, manual dispatch only (cp#455 Stage 1)
minimal-cell:
name: Minimal cell (claude-code + platform + moonshot/kimi-k2.6)
runs-on: ubuntu-latest
# Bounded at 12 min. Wall-clock budget breakdown:
# - cold EC2 provision: ~3-4 min (SPOT)
# - /registry/register wait: ~30s
# - completion call: ~10s
# - teardown: ~30-60s
# - tail headroom: ~6-7 min
timeout-minutes: 12
env:
# Hardcoded defaults — Gitea 1.22.6 does not support workflow_dispatch.inputs
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Stage 2 can add
# matrix/param support once the Gitea version supports it.
E2E_RUNTIME: claude-code
E2E_BILLING_MODE: platform_managed
E2E_PROVIDER: platform
E2E_MODEL: moonshot/kimi-k2.6
E2E_RUN_ID: cp455-${{ github.run_id }}
E2E_PROVISION_TIMEOUT_SECS: '300' # 5 min — fast teardown budget
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Verify required secrets present
run: |
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — minimal-cell e2e cannot run"
echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
exit 1
fi
- name: Install required tools
run: |
for cmd in jq curl python3; do
command -v "$cmd" >/dev/null 2>&1 || {
echo "::error::required tool '$cmd' not on PATH — runner image regression?"
exit 1
}
done
- name: Run minimal-cell boot-to-registration harness
# The harness script handles its own teardown via EXIT trap;
# even on assertion failure (provision timeout, register
# timeout, completion failure), the workspace is deprovisioned
# and a leak is reported. Exit code propagates from the script.
# Structured per-cell results are emitted to ${GITHUB_STEP_SUMMARY}
# so operators see pass/fail per assertion without scrolling.
run: |
bash tests/e2e/test_minimal_boot_cell.sh
- name: Emit structured per-cell results
if: always()
# Always run (even on failure) so the structured results are
# visible in the workflow summary. The script writes a JSON
# file at /tmp/cell-result.json; this step renders it as a
# job summary.
run: |
if [ -f /tmp/cell-result.json ]; then
echo "## Minimal-cell results" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo '```json' >> "$GITHUB_STEP_SUMMARY"
cat /tmp/cell-result.json >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
else
echo "## Minimal-cell results: NO_RESULT_FILE" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "Harness did not produce /tmp/cell-result.json — likely crashed before trap fired." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Failure summary
if: failure()
run: |
{
echo "## cp#455 minimal-cell FAILED"
echo ""
echo "**Run ID:** ${{ github.run_id }}"
echo "**Runtime:** ${E2E_RUNTIME}"
echo "**Billing mode:** ${E2E_BILLING_MODE}"
echo "**Provider:** ${E2E_PROVIDER}"
echo "**Model:** ${E2E_MODEL}"
echo "**Slug:** ${E2E_RUN_ID}"
echo ""
echo "### What this means"
echo ""
echo "The minimal claude-code+kimi cell did not pass all 4 assertions:"
echo "1. Provision request accepted; workspace transitions to booting/running"
echo "2. Controlplane receives /registry/register for that workspace_id"
echo "3. JSON-RPC/completion route returns successful minimal response"
echo "4. Teardown terminates workspace even on failure (trap)"
echo ""
echo "RED is expected pre-cp#469-cluster. After cp#477 deploy (888efceb) + PR #2167 merge,"
echo "this should turn GREEN. Persistent RED after both merge = cluster bug, not e2e bug."
echo ""
echo "### Next steps"
echo ""
echo "1. Check the harness output above for the assertion that failed"
echo "2. If assertion 1 fails: provision path broken — check CP admin API + EC2 quota"
echo "3. If assertion 2 fails: /registry/register path broken — check workspace-server boot"
echo "4. If assertion 3 fails: LLM proxy / completion path broken — check cp#469 cluster"
echo "5. If assertion 4 fails: teardown trap broken — leak risk, fix immediately"
} >> "$GITHUB_STEP_SUMMARY"
-1
View File
@@ -96,7 +96,6 @@ env:
GITHUB_SERVER_URL: https://git.moleculesai.app
jobs:
# bp-exempt: advisory arm64 pilot, non-gating by design (internal#418).
fast-checks:
name: fast-checks
# AND-set: only the Mac arm64 runner advertises macos-self-hosted.
+3 -3
View File
@@ -12,7 +12,7 @@
# (SHA 0adf2098) per RFC internal#219 Phase 2b+c — replicate repo-by-repo.
#
# When any pair diverges, a `[ci-drift]` issue is opened or updated
# (idempotent by title) and labelled `ci-bp-drift`. This is the
# (idempotent by title) and labelled `tier:high`. This is the
# auto-detection that closes the regression class identified in
# RFC §1 finding 3 (protection only listed 2 of 6 real jobs for
# ~weeks, undetected) and §6 (audit env drifts silently from
@@ -106,7 +106,7 @@ jobs:
AUDIT_WORKFLOW_PATH: '.gitea/workflows/audit-force-merge.yml'
# Path to the CI workflow with the sentinel + the jobs.
CI_WORKFLOW_PATH: '.gitea/workflows/ci.yml'
# Issue label applied on file/update. `ci-bp-drift` exists in
# Issue label applied on file/update. `tier:high` exists in
# the molecule-core label set (verified 2026-05-11, label id 9).
DRIFT_LABEL: 'ci-bp-drift'
DRIFT_LABEL: 'tier:high'
run: python3 .gitea/scripts/ci-required-drift.py
+43 -62
View File
@@ -25,9 +25,10 @@
# sufficient for `actions/checkout` against this same repo.
#
# 4. Docs — no docs/scripts reference github.com URLs that need swapping.
# The canvas-deploy-status step (core#2226, formerly canvas-deploy-reminder)
# writes the canvas ordered-deploy status into the step summary; it points
# at the ECR canvas image and the publish workflow, no ghcr.io prose.
# The canvas-deploy-reminder step writes a `ghcr.io/...` image
# reference into the step summary text — that's documentation prose
# pointing at the ECR-mirrored canvas image and stays unchanged for
# this port (a separate cleanup if ghcr→ECR sweep is in scope).
#
# Cross-links:
# - RFC: internal#219 (CI/CD hard-gate hardening)
@@ -364,25 +365,6 @@ jobs:
# check missed. If a refactor weakens the gate to a shape check,
# this step goes red on every PR.
bash tests/e2e/test_completion_assert_unit.sh
# harden/e2e-staging-saas-failclosed: fail-direction proof for the
# E2E_REQUIRE_LIVE fail-closed-on-skip guard in
# test_staging_full_saas.sh. Offline (no LLM/network/provisioning):
# asserts the guard exits 5 when a live lifecycle did NOT run and
# passes when all milestones fired — so a refactor that lets the
# staging gate report green without a real provision→online→A2A
# cycle goes red on every PR.
bash tests/e2e/test_require_live_guard_unit.sh
# harden/enforce-ci-gates-core-v2 (PR #2286): fail-direction proof
# for the E2E_REQUIRE_LIVE zero-validated gate in
# test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
# Offline (no LLM/network/provisioning): sources that script under
# its unit source-guard and drives the REAL evaluate_require_live_gate
# — asserts REQUIRE_LIVE=1 + zero validated → RED (the false-green
# trap), REQUIRE_LIVE=1 + >=1 validated → GREEN, and REQUIRE_LIVE
# unset + zero validated → GREEN (loud skip). CI can't provision a
# live arm to prove this, so this unit test IS the regression gate:
# a revert of the zero-validated→RED logic goes red on every PR.
bash tests/e2e/test_require_live_priority_gate_unit.sh
- if: ${{ needs.changes.outputs.scripts == 'true' }}
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
@@ -407,60 +389,61 @@ jobs:
# mc#959 root-fix (sre)
canvas-deploy-status:
# core#2226: replaces the old advisory "Canvas Deploy Reminder". The canvas
# image now has a real ORDERED auto-deploy (publish-canvas-image.yml:
# build → push :staging-<sha> → wait green main CI → promote :latest by
# digest), and docker-compose pins via CANVAS_IMAGE_TAG. There is no longer
# a manual "go run docker compose pull by hand" step to remind operators
# about — so this job just records, on a canvas-touching main push, that the
# ordered deploy is handling it (and where to watch), instead of prescribing
# a manual action that determinism made obsolete.
name: Canvas Deploy Status
canvas-deploy-reminder:
name: Canvas Deploy Reminder
runs-on: docker-host
# Per-step no-op (not job-level `if:`) so the job reaches SUCCESS on PRs
# instead of skipped — skipped poisons the PR combined status (internal#817).
# Step-level exit 0 handles the "not a canvas main push" case.
# mc#1982 root-fix: added job-level `if:` so ci-required-drift.py's
# ci_job_names() detects this as github.ref-gated and skips it from F1.
# The step-level exit 0 handles the "not main push" case; the job-level
# `if:` makes the gating explicit so the drift script sees it.
# Runs on both main and staging pushes; step exits 0 when not applicable.
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
needs: [changes, canvas-build]
steps:
- name: Record canvas ordered-deploy status
- name: Write deploy reminder to step summary
env:
COMMIT_SHA: ${{ github.sha }}
CANVAS_CHANGED: ${{ needs.changes.outputs.canvas }}
EVENT_NAME: ${{ github.event_name }}
REF_NAME: ${{ github.ref }}
# github.server_url resolves via the workflow-level env override to the
# Gitea instance, so RUN_URL points at the Gitea run page (not github.com).
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions
# github.server_url resolves via the workflow-level env override
# to the Gitea instance, so the RUN_URL points at the Gitea run
# page (not github.com). See feedback_act_runner_github_server_url.
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
set -euo pipefail
if [ "$CANVAS_CHANGED" != "true" ] || [ "$EVENT_NAME" != "push" ] || [ "$REF_NAME" != "refs/heads/main" ]; then
echo "Canvas deploy status not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
echo "Canvas deploy reminder not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
exit 0
fi
# Write body to a temp file — avoids backtick escaping in shell.
cat > /tmp/deploy-status.md << 'BODY'
## Canvas ordered deploy in progress — no manual action required
cat > /tmp/deploy-reminder.md << 'BODY'
## Canvas build passed — deploy required
This canvas-touching main push triggers `publish-canvas-image`, which now
runs an ORDERED, CI-gated deploy (core#2226) — the same shape as the
platform's deploy-production:
The `publish-canvas-image` workflow is now building a fresh Docker image
(`ghcr.io/molecule-ai/canvas:latest`) in the background.
1. Build → push `molecule-ai/canvas:staging-<sha>` + `:staging-latest`.
2. Wait for green main CI on this SHA.
3. Promote `:latest` to the verified `:staging-<sha>` by digest.
Once it completes (~35 min), apply on the host machine with:
```bash
cd <runner-workspace>
git pull origin main
docker compose pull canvas && docker compose up -d canvas
```
Tenants/hosts pin via `CANVAS_IMAGE_TAG` (default `latest` = the last
CI-green build), so a deploy is reproducible — no hand-run
`docker compose pull` needed. Watch the run in the canvas publish workflow.
If you need to rebuild from local source instead (e.g. testing unreleased
changes or a new `NEXT_PUBLIC_*` URL), use:
```bash
docker compose build canvas && docker compose up -d canvas
```
BODY
printf '\n> Posted automatically by CI · commit `%s` · [publish workflow](%s)\n' \
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-status.md
printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md
# Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY, which
# both GitHub and Gitea Actions render as the run's summary page.
cat /tmp/deploy-status.md >> "$GITHUB_STEP_SUMMARY"
# Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY,
# which both GitHub Actions and Gitea Actions render as the
# workflow run's summary page. (#75 / PR-D)
cat /tmp/deploy-reminder.md >> "$GITHUB_STEP_SUMMARY"
# Python Lint & Test — required check, always runs.
# Runtime Python moved to molecule-ai-workspace-runtime. Keep this context as
@@ -499,7 +482,7 @@ jobs:
# `CI / all-required (pull_request)` per issue #1473.
#
# Closes the failure mode where status_check_contexts on molecule-core/main
# only listed `Secret scan` + `sop-checklist` (the 2 meta-gates), so real
# only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
# `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
# red silently merged through. See internal#286 for the three concrete
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
@@ -532,8 +515,9 @@ jobs:
# The `needs:` list MUST stay in lockstep with ci-required-drift.py's
# F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
# whose `if:` gates on github.event_name/github.ref). canvas-deploy-
# status is per-step-gated (not job-level `if:`) so it reaches SUCCESS
# on PRs and is included here — internal#817. If a new always-running
# reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
# so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
# skipped job would never let the sentinel run. If a new always-running
# CI job is added, add it here too or ci-required-drift F1 will flag it.
#
# Stays on the dedicated `ci-meta` lane (no docker work, so the
@@ -547,7 +531,6 @@ jobs:
- canvas-build
- shellcheck
- python-lint
- canvas-deploy-status
continue-on-error: false
runs-on: ci-meta
timeout-minutes: 5
@@ -566,7 +549,6 @@ jobs:
CANVAS_RESULT: ${{ needs.canvas-build.result }}
SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
CANVAS_DEPLOY_RESULT: ${{ needs.canvas-deploy-status.result }}
run: |
set -euo pipefail
fail=0
@@ -588,7 +570,6 @@ jobs:
check "Canvas (Next.js)" "$CANVAS_RESULT"
check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
check "Python Lint & Test" "$PYTHON_LINT_RESULT"
check "Canvas Deploy Status" "$CANVAS_DEPLOY_RESULT"
if [ "$fail" -ne 0 ]; then
echo "::error::all-required: one or more aggregated CI jobs did not succeed"
exit 1
+3 -3
View File
@@ -131,9 +131,9 @@ jobs:
# on the per-runtime default ("sonnet" → routes to direct
# Anthropic, defeats the cost saving). Operators can override
# via workflow_dispatch by setting a different E2E_MODEL_SLUG
# input if they need to exercise a specific model. MiniMax-M2.7 is the
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7' }}
# input if they need to exercise a specific model. MiniMax-M2 is the
# stable staging MiniMax path used by the full-SaaS smoke.
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2' }}
# Bound to 10 min so a stuck provision fails the run instead of
# holding up the next cron firing. 15-min default in the script
# is for the on-PR full lifecycle where we have more headroom.
+12 -133
View File
@@ -123,9 +123,8 @@ jobs:
# integration). See internal#512 for the class defect.
runs-on: docker-host
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
outputs:
api: ${{ steps.decide.outputs.api }}
steps:
@@ -161,9 +160,8 @@ jobs:
# detect-changes for the full rationale.
runs-on: docker-host
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
timeout-minutes: 15
env:
# Unique per-run container names so concurrent runs on the host-
@@ -272,33 +270,6 @@ jobs:
echo "::error::Redis did not become ready in 15s"
docker logs "$REDIS_CONTAINER" || true
exit 1
- name: Set deterministic admin token for the e2e platform
if: needs.detect-changes.outputs.api == 'true'
run: |
# AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:164)
# reads ADMIN_TOKEN. Setting it (a) closes isDevModeFailOpen (devmode.go:50
# returns false when ADMIN_TOKEN is non-empty), so admin routes require a
# bearer, and (b) makes Tier-2b accept a bearer that constant-time-equals
# ADMIN_TOKEN. The platform process inherits ADMIN_TOKEN from $GITHUB_ENV.
#
# MOLECULE_ADMIN_TOKEN is the var the e2e scripts send as the bearer
# (tests/e2e/_lib.sh:33 e2e_mint_workspace_token, and the run_mock
# org-import curl). Set BOTH to the SAME value so the bearer the test
# sends == the secret the platform checks. Deterministic test value;
# this platform is ephemeral, single-run, and never reachable off-host.
E2E_ADMIN_TOKEN="e2e-api-admin-${{ github.run_id }}-${{ github.run_attempt }}"
echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "Admin token configured for the e2e platform (ADMIN_TOKEN + MOLECULE_ADMIN_TOKEN)."
# Channels e2e test seam (core#2332 P1.10). These env-gated overrides
# let the LIVE Slack-webhook send path + Telegram discover path target
# the local mock upstreams that tests/e2e/test_channels_e2e.sh binds,
# so the outbound serialize+POST is provable in CI (was unit-mock-only).
# Inert in prod/staging — those deploys never set these. The fixed
# loopback ports MUST match the script's E2E_CHANNELS_*_PORT defaults.
echo "MOLECULE_CHANNELS_TEST_WEBHOOK_BASE=http://127.0.0.1:18099/" >> "$GITHUB_ENV"
echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE=http://127.0.0.1:18098" >> "$GITHUB_ENV"
echo "Channels test seam configured (webhook+telegram mock bases on fixed loopback ports)."
- name: Build platform
if: needs.detect-changes.outputs.api == 'true'
working-directory: workspace-server
@@ -354,57 +325,19 @@ jobs:
# start-redis steps point at this run's per-run host ports.
./platform-server > platform.log 2>&1 &
echo $! > platform.pid
- name: Wait for /health (with migration completion gate)
# Issue #2205: 30 one-second probes is insufficient when the migration
# chain is still running; /health can flip true before migrations
# finish, so subsequent steps that touch the DB fail. Hybrid fix:
# bump timeout to 300s AND gate exit on the same workspaces-table
# existence check the downstream "Assert migrations applied" uses.
- name: Wait for /health
if: needs.detect-changes.outputs.api == 'true'
run: |
# Readiness signal: the platform binds /health only AFTER the full
# migration chain has been applied on cold start (it prints
# "Platform starting on :PORT" at that point). So a 200 from /health
# is the real "migrations done + server listening" signal.
#
# The migration chain grows every release, so a fixed ~30s budget is
# brittle by construction (it WILL be exceeded as migrations accrue).
# Use a generous wall-clock budget that comfortably exceeds
# cold-start + full-migration time, polling fast. This is robust to a
# growing chain WITHOUT masking a genuinely dead platform: if the
# background platform-server process has exited (e.g. a broken
# migration crashed it), we stop and fail loudly at once instead of
# waiting out the whole budget.
#
# Issue #2205: /health can flip true before migrations finish on a
# growing chain, so we gate exit on the workspaces-table existence
# check the downstream "Assert migrations applied" uses.
DEADLINE_SECS=300 # cold-start + full migration chain headroom
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
start=$(date +%s)
while :; do
for i in $(seq 1 30); do
if curl -sf "$BASE/health" > /dev/null; then
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo "0")
if [ "$tables" = "1" ]; then
echo "Platform healthy + migrations applied after $(( $(date +%s) - start ))s"
exit 0
fi
fi
# Fast-fail: if the platform process died, /health will never come.
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
cat workspace-server/platform.log || true
exit 1
fi
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
echo "::error::Platform did not become healthy with migrations applied within ${DEADLINE_SECS}s — see log below"
cat workspace-server/platform.log || true
exit 1
echo "Platform up after ${i}s"
exit 0
fi
sleep 1
done
echo "::error::Platform did not become healthy in 30s"
cat workspace-server/platform.log || true
exit 1
- name: Assert migrations applied
if: needs.detect-changes.outputs.api == 'true'
run: |
@@ -421,65 +354,11 @@ jobs:
- name: Run E2E API tests
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_api.sh
- name: Run keyless feature-contract E2E (terminal-diagnose / webhooks / budget / checkpoints / audit / traces / session-search / rescue / llm-billing-mode / resume / hibernate)
# Keyless required-lane coverage for feature endpoints that ship without
# an LLM key (runtime=external fixture). Each asserts the real HTTP
# contract + a meaningful failure mode (401/400/fail-closed) so a
# regression goes RED, not silently green. The mock-runtime A2A canned
# round-trip is covered by the priority-runtimes `mock` arm, not here.
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_keyless_feature_contracts_e2e.sh
- name: Run secrets-dispatch contract test (keyless SECRETS_JSON branch order)
# Previously orphaned (no workflow referenced it). Hermetic unit-style
# contract over test_staging_full_saas.sh's LLM-key branch precedence —
# needs no platform, no bearer, no network. Guards the 2026-05-03
# "wrong key shape wins" incident class.
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_secrets_dispatch.sh
- name: Run notify-with-attachments E2E
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_notify_attachments_e2e.sh
- name: "Run channels + data-prune E2E (REQUIRE-LIVE: mock upstream proves send+discover, purge proves prune)"
# core#2332 P1.10. Stands up a local mock upstream, points the LIVE
# Slack-webhook send + Telegram discover paths at it via the
# production-inert test seam configured above, and asserts the mock
# RECEIVED the serialized payload (send) + round-tripped the bot/chat
# (discover). Then exercises the RFC #734 data-prune: DELETE
# ?purge=true removes the target's durable child data while a sibling
# survives. E2E_REQUIRE_LIVE=1 ⇒ a missing/regressed seam is RED, not a
# silent skip. The platform inherits the MOLECULE_CHANNELS_TEST_* bases
# from $GITHUB_ENV; the script's mock ports match them (18099/18098).
- name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent)
if: needs.detect-changes.outputs.api == 'true'
env:
E2E_REQUIRE_LIVE: '1'
run: bash tests/e2e/test_channels_e2e.sh
- name: "Run priority-runtimes E2E (REQUIRE-LIVE: mock validates the runtime plumbing end-to-end)"
# E2E_REQUIRE_LIVE=1 is ON: the run MUST validate >=1 runtime end-to-end
# or it exits NON-zero (RED). This is now SAFE because the `mock` arm can
# actually provision in CI: the only blocker was that POST /org/import and
# POST /admin/workspaces/:id/tokens are AdminAuth-gated
# (router.go:778 + :427) and this job previously configured NO admin token,
# so every admin call 401'd ("admin auth required"). The "Set deterministic
# admin token" step above now sets ADMIN_TOKEN on the platform AND exports
# the matching MOLECULE_ADMIN_TOKEN the e2e scripts send as the bearer, so
# the mock arm can org-import → online → mint token → canned A2A reply →
# validated(). That guarantees VALIDATED>=1 on a healthy platform, so the
# REQUIRED `E2E API Smoke Test` gate now HONESTLY validates a runtime
# end-to-end; if the mock plumbing (DB insert, status flip, A2A proxy,
# activity logging, or the admin-auth wiring) genuinely breaks, the gate
# goes RED instead of false-green. The zero-validated→RED decision is also
# regression-gated WITHOUT provisioning by the bash unit test
# tests/e2e/test_require_live_priority_gate_unit.sh (wired into ci.yml's
# "Run E2E bash unit tests" job), so a revert of that logic still fails CI.
#
# MiniMax stays an OPPORTUNISTIC best-effort arm: create is registry-fragile
# in CI (422 UNREGISTERED_MODEL_FOR_RUNTIME), so a miss is reported via
# bestfail() and never reds the gate — mock carries the required validation,
# MiniMax is a bonus real-LLM check when it comes up. ZERO new credentials.
if: needs.detect-changes.outputs.api == 'true'
env:
E2E_REQUIRE_LIVE: '1'
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
run: bash tests/e2e/test_priority_runtimes_e2e.sh
- name: Install standalone runtime parser from Gitea registry
if: needs.detect-changes.outputs.api == 'true'
+14 -114
View File
@@ -113,29 +113,6 @@ jobs:
runs-on: docker-host
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
#
# PROMOTION-READINESS (toward required gate — do NOT flip continue-on-error
# without CTO sign-off, that's the irreversible call):
# NOW FAIL-CLOSED:
# - Postgres/Redis/platform/canvas readiness are already bounded
# readiness-polls that hard-fail (and dump logs) at their deadline,
# not fixed sleeps — preserved.
# - passWithNoTests:false + forbidOnly (playwright.config.ts) → a
# renamed/moved spec or stray test.only can no longer green the lane.
# - REQUIRE-LIVE guard in "Run Playwright E2E tests" → chat==true must
# actually execute >=1 test, else exit 1.
# - chat-desktop "activity log" test no longer swallows its assertion.
# STILL BLOCKS PROMOTION:
# - The echo round-trip asserts on rendered "Echo: ..." text but never
# asserts the echo runtime actually RECEIVED the A2A request
# (fixtures/echo-runtime.ts exposes lastRequest, unused) — an
# optimistic client-side render could pass without a real round-trip.
# Add a server-received assertion before required.
# - The "No-op pass" path (detect-changes chat!=true) is a legitimate
# paths-filter skip, but a required gate needs it to be a neutral
# check, not a green "success", so a skipped heavy lane can't be
# mistaken for a passed one.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
timeout-minutes: 15
env:
@@ -250,20 +227,6 @@ jobs:
echo "CANVAS_PORT=${CANVAS_PORT}" >> "$GITHUB_ENV"
echo "Canvas host port: ${CANVAS_PORT}"
- name: Set deterministic admin token
if: needs.detect-changes.outputs.chat == 'true'
run: |
# PR #2291 made auth fail-closed everywhere (no dev-mode escape).
# The platform server requires ADMIN_TOKEN; the canvas requires the
# matching NEXT_PUBLIC_ADMIN_TOKEN or every API call 401s.
# We set a deterministic per-run value so the ephemeral platform
# and canvas are paired correctly.
E2E_ADMIN_TOKEN="e2e-chat-admin-${{ github.run_id }}-${{ github.run_attempt }}"
echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "NEXT_PUBLIC_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "Admin token configured for e2e-chat platform + canvas."
- name: Start platform (background)
if: needs.detect-changes.outputs.chat == 'true'
working-directory: workspace-server
@@ -279,36 +242,16 @@ jobs:
- name: Wait for /health
if: needs.detect-changes.outputs.chat == 'true'
run: |
# Readiness signal: the platform binds /health only AFTER the full
# migration chain has been applied on cold start (it prints
# "Platform starting on :PORT" at that point). So a 200 from /health
# is the real "migrations done + server listening" signal.
#
# The migration chain grows every release, so a fixed ~30s budget is
# brittle by construction. Use a generous wall-clock budget that
# comfortably exceeds cold-start + full-migration time, polling fast.
# Robust to a growing chain WITHOUT masking a dead platform: if the
# background platform-server process has exited, fail loudly at once.
DEADLINE_SECS=180 # cold-start + full migration chain headroom
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
start=$(date +%s)
while :; do
for i in $(seq 1 30); do
if curl -sf "http://127.0.0.1:${PLATFORM_PORT}/health" > /dev/null; then
echo "Platform healthy after $(( $(date +%s) - start ))s"
echo "Platform up after ${i}s"
exit 0
fi
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
cat workspace-server/platform.log || true
exit 1
fi
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
cat workspace-server/platform.log || true
exit 1
fi
sleep 1
done
echo "::error::Platform did not become healthy in 30s"
cat workspace-server/platform.log || true
exit 1
- name: Install canvas dependencies
if: needs.detect-changes.outputs.chat == 'true'
@@ -335,68 +278,25 @@ jobs:
export NEXT_PUBLIC_WS_URL="ws://127.0.0.1:${PLATFORM_PORT}/ws"
npx next dev --turbopack -p "${CANVAS_PORT}" > canvas.log 2>&1 &
echo $! > canvas.pid
# Readiness must wait for the actual chat route to *compile*, not
# just for the dev server to bind the port. `next dev --turbopack`
# accepts the TCP connection well before it has compiled a route
# on first request, so a bare `curl /` can 200 (or hang) while the
# page the tests load is still building. We therefore probe the
# real route the specs navigate to (`/?m=chat`) and require a 2xx,
# which only happens once Turbopack has finished the first
# compile. The previous 30s budget was also too tight for a cold
# Turbopack first-compile on a loaded operator-host runner — the
# `Canvas did not start in 30s` flake. Raise to 120s (job
# timeout-minutes is 15, so this is comfortably bounded) and probe
# every 2s.
READY=""
for i in $(seq 1 60); do
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
# pollution of the captured status (lint-curl-status-capture.yml).
set +e
curl -s -o /dev/null -w '%{http_code}' "http://localhost:${CANVAS_PORT}/?m=chat" > /tmp/canvas-ready.code
set -e
CODE=$(cat /tmp/canvas-ready.code 2>/dev/null || echo "000")
if [ "$CODE" -ge 200 ] && [ "$CODE" -lt 400 ]; then
echo "Canvas (chat route compiled) up after ~$((i*2))s (HTTP ${CODE})"
READY=1
break
for i in $(seq 1 30); do
if curl -sf "http://localhost:${CANVAS_PORT}" > /dev/null 2>&1; then
echo "Canvas up after ${i}s"
exit 0
fi
sleep 2
sleep 1
done
if [ -z "$READY" ]; then
echo "::error::Canvas chat route did not compile in 120s (last HTTP ${CODE})"
cat canvas.log || true
exit 1
fi
echo "::error::Canvas did not start in 30s"
cat canvas.log || true
exit 1
- name: Run Playwright E2E tests
if: needs.detect-changes.outputs.chat == 'true'
working-directory: canvas
env:
# CI=1 activates forbidOnly in playwright.config.ts (a stray
# `test.only` would otherwise green the suite while skipping the
# rest). passWithNoTests:false (also in the config) already makes
# a zero-match selection exit non-zero.
CI: "1"
run: |
set -euo pipefail
export E2E_PLATFORM_URL="http://127.0.0.1:${PLATFORM_PORT}"
export E2E_DATABASE_URL="${DATABASE_URL}"
export PLAYWRIGHT_BASE_URL="http://localhost:${CANVAS_PORT}"
# REQUIRE-LIVE guard (mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE):
# this lane reached here only because detect-changes said chat==true,
# so it MUST actually execute the round-trip specs. `pipefail` makes
# a real test failure (playwright non-zero) abort here under `set -e`;
# passWithNoTests:false makes a zero-match selection non-zero too. The
# explicit grep below is belt-and-braces: assert the list reporter
# printed an executed-count summary, so a silent all-skip / no-op can
# never report green.
npx playwright test e2e/chat-desktop.spec.ts e2e/chat-mobile.spec.ts \
--reporter=list 2>&1 | tee /tmp/pw-chat.out
if ! grep -qE '[0-9]+ (passed|failed|skipped)' /tmp/pw-chat.out; then
echo "::error::E2E Chat REQUIRE-LIVE: chat==true but Playwright reported no executed tests — specs missing or all-skipped, refusing to report green."
exit 1
fi
npx playwright test e2e/chat-desktop.spec.ts e2e/chat-mobile.spec.ts
- name: Dump platform log on failure
if: failure() && needs.detect-changes.outputs.chat == 'true'
+5 -29
View File
@@ -130,37 +130,13 @@ jobs:
run: |
set -euo pipefail
./workspace-server/platform-server > workspace-server/platform.log 2>&1 &
PLATFORM_PID=$!
echo "$PLATFORM_PID" > workspace-server/platform.pid
# Readiness signal: the platform binds /health only AFTER the full
# migration chain has been applied on cold start (it prints
# "Platform starting on :PORT" at that point). So a 200 from /health
# is the real "migrations done + server listening" signal.
#
# The migration chain grows every release, so a fixed ~30s budget is
# brittle by construction. Use a generous wall-clock budget that
# comfortably exceeds cold-start + full-migration time, polling fast.
# Robust to a growing chain WITHOUT masking a dead platform: if the
# background platform-server process has exited, fail loudly at once.
DEADLINE_SECS=180 # cold-start + full migration chain headroom
start=$(date +%s)
while :; do
if curl -sf "$BASE/health" >/dev/null; then
echo "Platform healthy after $(( $(date +%s) - start ))s"
exit 0
fi
if ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
cat workspace-server/platform.log || true
exit 1
fi
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
cat workspace-server/platform.log || true
exit 1
fi
echo $! > workspace-server/platform.pid
for i in $(seq 1 30); do
curl -sf "$BASE/health" >/dev/null && exit 0
sleep 1
done
cat workspace-server/platform.log || true
exit 1
- name: Run comprehensive E2E
run: bash tests/e2e/test_comprehensive_e2e.sh
+4 -29
View File
@@ -126,7 +126,6 @@ jobs:
# push/dispatch/cron only (30+ min). This is NOT a fake-green mask of
# the real assertion — it validates the driving script's bash syntax
# and inline-python so a broken test script fails at PR time.
# bp-required: pending #1296 — PR emitter, not yet required (tracked in #1296).
pr-validate:
name: E2E Peer Visibility
runs-on: ubuntu-latest
@@ -268,36 +267,12 @@ jobs:
echo $! > platform.pid
- name: Wait for /health
run: |
# Readiness signal: the platform binds /health only AFTER the full
# migration chain has been applied on cold start (it prints
# "Platform starting on :PORT" at that point). So a 200 from /health
# is the real "migrations done + server listening" signal.
#
# The migration chain grows every release, so a fixed ~30s budget is
# brittle by construction. Use a generous wall-clock budget that
# comfortably exceeds cold-start + full-migration time, polling fast.
# Robust to a growing chain WITHOUT masking a dead platform: if the
# background platform-server process has exited, fail loudly at once.
DEADLINE_SECS=180 # cold-start + full migration chain headroom
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
start=$(date +%s)
while :; do
if curl -sf "$BASE/health" > /dev/null; then
echo "Platform healthy after $(( $(date +%s) - start ))s"
exit 0
fi
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
cat workspace-server/platform.log || true
exit 1
fi
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
cat workspace-server/platform.log || true
exit 1
fi
for i in $(seq 1 30); do
curl -sf "$BASE/health" > /dev/null && { echo "Platform up after ${i}s"; exit 0; }
sleep 1
done
echo "::error::Platform did not become healthy in 30s"
cat workspace-server/platform.log || true; exit 1
- name: Run LOCAL fresh-provision peer-visibility E2E (literal MCP list_peers)
# HONEST gate — NO continue-on-error. The local backend uses
# external-mode workspaces so this context tests the literal MCP
+10 -45
View File
@@ -12,30 +12,9 @@ name: E2E Staging Canvas (Playwright)
#
# Playwright test suite that provisions a fresh staging org per run and
# verifies every workspace-panel tab renders REAL content (not just an
# empty/errored container). Complements e2e-staging-saas.yml (which tests
# the API shape) by exercising the actual browser + canvas bundle against
# live staging.
#
# PROMOTION-READINESS (toward making this a HARD merge-gate):
# NOW RELIABLE (spec hardened — staging-tabs.spec.ts):
# - All waits condition-based (toBeVisible/toHaveAttribute/expect.poll);
# no fixed waitForTimeout in the spec.
# - Tabs asserted on settled REAL content, not "container visible".
# - ErrorBoundary + visible error alerts fail non-degraded tabs.
# - Tab-list parity-checked vs live DOM; fail-closed on missing tenant.
# STILL BLOCKS PROMOTION-TO-REQUIRED (do NOT remove continue-on-error —
# CTO-owned, RFC internal#219 §1):
# - Infra dependency: real staging EC2 per run (12-20 min cold boot);
# AWS/Cloudflare/CP availability would become merge-blockers.
# - Shared-zone TLS/DNS/ACME propagation flake surface is upstream of
# this repo and outside its control.
# - Required-gate correctness needs CP_STAGING_ADMIN_API_TOKEN GUARANTEED
# present; today's skip-if-absent (core#2225) is right for non-gating
# but would skip-green a required check.
# - Single hermes/platform_managed workspace; agent-dependent content
# (live chat/traces round-trip) not exercised on staging (#2162).
# The full checklist lives at the foot of canvas/e2e/staging-tabs.spec.ts.
# verifies every workspace-panel tab renders without crashing. Complements
# e2e-staging-saas.yml (which tests the API shape) by exercising the
# actual browser + canvas bundle against live staging.
#
# Triggers: push to main, PR touching canvas sources + this workflow only
# after the PR enters `merge-queue`, manual dispatch, and scheduled cron to
@@ -188,30 +167,16 @@ jobs:
- if: needs.detect-changes.outputs.canvas == 'true'
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# Skip-if-absent (core#2225), mirroring the serving-e2e gate's
# skip-if-secret-unset contract: a MISSING CI secret is an operator
# CONFIG gap, not a code regression, so it must not paint this E2E
# red. When CP_STAGING_ADMIN_API_TOKEN is unset we emit a LOUD
# ::warning:: + ::notice:: and skip the real provision/test steps (the
# job still completes green). When the secret IS present we run the
# full suite exactly as before. Operators: set
# CP_STAGING_ADMIN_API_TOKEN as a repo/org Actions secret on
# molecule-core to actually exercise this E2E.
- name: Check admin token (skip-if-absent)
id: token_check
- name: Verify admin token present
if: needs.detect-changes.outputs.canvas == 'true'
run: |
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
echo "::warning::CP_STAGING_ADMIN_API_TOKEN is not set on this runner — SKIPPING the staging canvas E2E (cannot auth to staging CP). This is an operator config gap, not a code failure; set the secret on molecule-core (repo or org Actions secrets) to run it. See core#2225."
echo "::notice::E2E Staging Canvas skipped: CP_STAGING_ADMIN_API_TOKEN absent."
echo "present=false" >> "$GITHUB_OUTPUT"
else
echo "CP_STAGING_ADMIN_API_TOKEN present ✓ — running staging canvas E2E."
echo "present=true" >> "$GITHUB_OUTPUT"
echo "::error::Missing CP_STAGING_ADMIN_API_TOKEN"
exit 2
fi
- name: Set up Node
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
if: needs.detect-changes.outputs.canvas == 'true'
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: '20'
@@ -219,11 +184,11 @@ jobs:
cache-dependency-path: canvas/package-lock.json
- name: Install canvas deps
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
if: needs.detect-changes.outputs.canvas == 'true'
run: npm ci
- name: Install Playwright browsers
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
if: needs.detect-changes.outputs.canvas == 'true'
timeout-minutes: 10
run: |
PREBAKED_PLAYWRIGHT=/ms-playwright
@@ -235,7 +200,7 @@ jobs:
npx playwright install --with-deps chromium
- name: Run staging canvas E2E
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
if: needs.detect-changes.outputs.canvas == 'true'
run: npx playwright test --config=playwright.staging.config.ts
- name: Upload Playwright report on failure
-29
View File
@@ -85,26 +85,6 @@ jobs:
runs-on: ubuntu-latest
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
#
# PROMOTION-READINESS (toward required gate — do NOT flip continue-on-error
# without CTO sign-off, that's the irreversible call):
# NOW FAIL-CLOSED:
# - Missing CP_STAGING_ADMIN_API_TOKEN → hard exit 2 (preflight).
# - Staging CP unhealthy → hard exit 1 (preflight, not a workspace bug).
# - Harness E2E_REQUIRE_LIVE=1 → exit 5 if a clean exit didn't prove
# all four awaiting_agent transitions (no silent skip).
# - Sweep transition (step 6) is now a bounded readiness-poll, not a
# fixed sleep + one-shot assert → no more sweep-cadence flake.
# - register / re-register retry ONLY transient edge 5xx (bounded),
# fail closed on 4xx → no more cold-boot-502 flake.
# STILL BLOCKS PROMOTION:
# - Single shared staging tenant + EC2 quota window: an infra-side
# provisioning outage (not a code bug) would turn the gate red.
# Needs an infra-class vs code-class signal split before required.
# - "CP unhealthy → exit 1" currently looks identical to a real
# failure on the run page; required-gate would need it demoted to
# a neutral/skip so staging flakiness can't block merges.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
timeout-minutes: 25
@@ -144,15 +124,6 @@ jobs:
- name: Run external-runtime E2E
id: e2e
# E2E_REQUIRE_LIVE=1: the harness fails CLOSED (exit 5) if it ever
# reaches a clean exit without proving all four awaiting_agent
# transitions. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE — a
# silent skip / early-return / dropped assertion can no longer
# masquerade as green. Token-missing and CP-unhealthy already
# hard-fail in the two preflight steps above, so reaching this step
# means a real cycle is expected.
env:
E2E_REQUIRE_LIVE: "1"
run: bash tests/e2e/test_staging_external_runtime.sh
# Mirror the e2e-staging-saas.yml safety net: if the runner is
-210
View File
@@ -1,210 +0,0 @@
name: E2E Staging Reconciler (heals terminated EC2)
# Live staging proof for the core#2261 instance-state reconciler
# (workspace-server/internal/registry/cp_instance_reconciler.go). The
# real-infra complement to the deterministic unit tests: provisions a real
# staging workspace, TERMINATES its EC2, and asserts the reconciler flips it
# off 'online' (PRIMARY gate) and auto-reprovisions on a new instance_id
# (SECONDARY, best-effort). See
# tests/e2e/test_reconciler_heals_terminated_instance.sh for the assertion
# contract + timeouts.
#
# Modeled on e2e-staging-saas.yml. Same secrets + same Gitea-port caveats:
# - Dropped workflow_dispatch.inputs (Gitea 1.22.6 parser rejects them).
# - Dropped merge_group / environment (no Gitea equivalent).
# - Workflow-level env.GITHUB_SERVER_URL pinned per
# feedback_act_runner_github_server_url.
#
# NOT a required check (yet). This is a brand-new live E2E that provisions +
# terminates real EC2 (costs money, shares the cp#245 cold-boot flake
# surface). A new live e2e must NOT hard-gate every merge until it has a
# green track record. continue-on-error: true surfaces failures without
# blocking. PROMOTE to branch-required (flip continue-on-error → false AND
# add "E2E Staging Reconciler" to branch protection) once it has run green on
# main for several consecutive days — same de-flake discipline the
# platform-boot job in e2e-staging-saas.yml documents.
on:
# Run when the reconciler itself, the script, or the libs it depends on
# change — so a reconciler regression is caught on the PR that introduces
# it (paths filter), plus a daily schedule to catch infra/AMI drift.
push:
branches: [main]
paths:
- 'workspace-server/internal/registry/cp_instance_reconciler.go'
- 'tests/e2e/test_reconciler_heals_terminated_instance.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/lib/model_slug.sh'
- '.gitea/workflows/e2e-staging-reconciler.yml'
pull_request:
branches: [main]
paths:
- 'workspace-server/internal/registry/cp_instance_reconciler.go'
- 'tests/e2e/test_reconciler_heals_terminated_instance.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/lib/model_slug.sh'
- '.gitea/workflows/e2e-staging-reconciler.yml'
workflow_dispatch:
schedule:
# 08:00 UTC daily — offset from e2e-staging-saas (07:00) so the two live
# harnesses don't fight over staging's per-hour org-creation quota.
- cron: '0 8 * * *'
# Serialize against itself: staging has a finite per-hour org-creation quota,
# and a cancelled run mid-teardown leaks EC2. cancel-in-progress: false
# mirrors e2e-staging-saas.yml.
concurrency:
group: e2e-staging-reconciler
cancel-in-progress: false
env:
GITHUB_SERVER_URL: https://git.moleculesai.app
jobs:
# PR-validation path: always posts success so a workflow-only / script-only
# PR has a status check (this workflow's real job only fires on the paths
# filter). Mirrors the pr-validate job in e2e-staging-saas.yml.
pr-validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 1
continue-on-error: true
- name: YAML validation (best-effort)
run: |
echo "e2e-staging-reconciler.yml — PR validation: workflow YAML is valid."
echo "Live E2E step runs only when the reconciler / script / libs change."
continue-on-error: true
e2e-staging-reconciler:
name: E2E Staging Reconciler
runs-on: ubuntu-latest
# NOT required yet — surface failures without blocking merges. Flip to
# false + add to branch protection once green on main for a de-flake
# window (see the header note). mc#1982: do not renew this mask silently.
continue-on-error: true
timeout-minutes: 60
permissions:
contents: read
env:
MOLECULE_CP_URL: https://staging-api.moleculesai.app
# Single admin-bearer secret drives provision + tenant-token retrieval +
# teardown (= Railway staging CP_ADMIN_API_TOKEN). Same secret name the
# saas workflow canonicalised to under internal#322.
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
# Leak-check is REQUIRED here: this test deliberately terminates an EC2,
# so teardown MUST positively confirm no slug-tagged box survives.
E2E_AWS_LEAK_CHECK: required
E2E_AWS_TERMINATE_LEAKS: '1'
E2E_RUNTIME: claude-code
# Platform-managed create path (moonshot/kimi-k2.6, no tenant key) — the
# combo proven to create cleanly; this test only needs the ws online.
#
# DELIBERATELY no E2E_MODEL_SLUG and no E2E_*_API_KEY here — mirror the
# e2e-staging-platform-boot job in e2e-staging-saas.yml. On
# E2E_LLM_PATH=platform the harness sends EMPTY secrets and lets
# pick_model_slug return the platform default moonshot/kimi-k2.6 (a member
# of the providers.yaml claude-code `platform` arm → provider=platform,
# billed by the CP LLM proxy, NO tenant key required).
#
# The previous wiring set E2E_MODEL_SLUG: MiniMax-M2 (a BARE id in the
# providers.yaml `minimax` BYOK arm → provider=minimax, requires
# MINIMAX_API_KEY) while sending secrets={} on the platform path. Because
# E2E_MODEL_SLUG wins over the E2E_LLM_PATH=platform branch in
# pick_model_slug, the workspace got a keyless BYOK-minimax model, could
# not resolve a serving path, and booted to status=failed — never online
# (run 223233: "MODEL_SLUG=MiniMax-M2" then "→ failed", "never reached
# status=online within 900s"). The BYOK key wiring was equally misleading:
# the harness ignores E2E_*_API_KEY on E2E_LLM_PATH=platform, so the keys
# only made the contradiction harder to spot. Platform-only is correct
# here — this test exercises instance-state, never an LLM completion.
E2E_LLM_PATH: platform
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Verify required secrets present
run: |
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
exit 2
fi
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
if [ -z "${!var:-}" ]; then
echo "::error::$var not set — this test terminates an EC2 and verifies no leak; AWS creds are mandatory"
exit 2
fi
done
echo "Required secrets present ✓"
- name: CP staging health preflight
run: |
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
if [ "$code" != "200" ]; then
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a reconciler bug."
exit 1
fi
echo "Staging CP healthy ✓"
- name: Run reconciler heal E2E
id: e2e
run: bash tests/e2e/test_reconciler_heals_terminated_instance.sh
# Belt-and-braces teardown: the script installs its own EXIT trap, but if
# the runner is cancelled the trap may not fire. This always() step
# double-deletes any e2e-rec-* org from THIS run. The admin DELETE is
# idempotent so double-invoking is safe.
- name: Teardown safety net (runs on cancel/failure)
if: always()
env:
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
run: |
set +e
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "
import json, sys, os, datetime
run_id = os.environ.get('GITHUB_RUN_ID', '')
d = json.load(sys.stdin)
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
# Slug shape: e2e-rec-YYYYMMDD-<run_id>-<attempt>-...
if run_id:
prefixes = tuple(f'e2e-rec-{d}-{run_id}-' for d in dates)
else:
prefixes = tuple(f'e2e-rec-{d}-' for d in dates)
candidates = [o['slug'] for o in d.get('orgs', [])
if any(o.get('slug','').startswith(p) for p in prefixes)
and o.get('instance_status') not in ('purged',)]
print('\n'.join(candidates))
" 2>/dev/null)
leaks=()
for slug in $orgs; do
echo "Safety-net teardown: $slug"
set +e
curl -sS -o /tmp/rec-cleanup.out -w "%{http_code}" \
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"confirm\":\"$slug\"}" >/tmp/rec-cleanup.code
set -e
code=$(cat /tmp/rec-cleanup.code 2>/dev/null || echo "000")
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
echo "[teardown] deleted $slug (HTTP $code)"
else
echo "::warning::reconciler teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/rec-cleanup.out 2>/dev/null)"
leaks+=("$slug")
fi
done
if [ ${#leaks[@]} -gt 0 ]; then
echo "::warning::reconciler teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
fi
exit 0
+2 -183
View File
@@ -48,10 +48,8 @@ on:
- 'workspace-server/internal/handlers/a2a_proxy.go'
- 'workspace-server/internal/middleware/**'
- 'workspace-server/internal/provisioner/**'
- 'workspace-server/internal/providers/providers.yaml'
- 'tests/e2e/test_staging_full_saas.sh'
- 'tests/e2e/lib/completion_assert.sh'
- 'tests/e2e/lib/model_slug.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- '.gitea/workflows/e2e-staging-saas.yml'
@@ -63,10 +61,8 @@ on:
- 'workspace-server/internal/handlers/a2a_proxy.go'
- 'workspace-server/internal/middleware/**'
- 'workspace-server/internal/provisioner/**'
- 'workspace-server/internal/providers/providers.yaml'
- 'tests/e2e/test_staging_full_saas.sh'
- 'tests/e2e/lib/completion_assert.sh'
- 'tests/e2e/lib/model_slug.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- '.gitea/workflows/e2e-staging-saas.yml'
@@ -124,12 +120,7 @@ jobs:
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
# Raised 45→75: step 10b now exercises pause→resume→online +
# hibernate→wake→online, each of which RE-PROVISIONS the parent (CP
# re-provision + heartbeat recovery, not a fresh EC2 cold start, but still
# minutes). The base provision→online→A2A matrix fits in ~35 min; the two
# extra lifecycle reprovisions need headroom under WORKSPACE_ONLINE_TIMEOUT.
timeout-minutes: 75
timeout-minutes: 45
permissions:
contents: read
@@ -177,32 +168,9 @@ jobs:
# and defeats the cost saving. Operators can override via the
# workflow_dispatch flow (no input wired here yet — runtime
# override is enough for ad-hoc).
#
# claude-code MiniMax slug must be the BARE registered id `MiniMax-M2.7`.
# It is the BYOK-minimax form: registry_gen.go:88 registers it on the
# `minimax` arm (resolves provider=minimax via MINIMAX_API_KEY), so the
# #1994 byok-not-platform guard still passes. The COLON form
# `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code (internal#718;
# derive_provider_matrix_test.go:288) — the claude-code adapter can't
# strip the `minimax:` prefix, so workspace-create 422s
# UNREGISTERED_MODEL_FOR_RUNTIME (real failure: job 295233, main 4b3590e3).
# The slash form `minimax/MiniMax-M2.7` is the platform-billed arm and
# would trip the byok guard. #2311 fixed the same colon-vs-bare bug in the
# pick_model_slug lib (tests/e2e/lib/model_slug.sh), but this env var
# OVERRIDES that lib, so the bare fix has to live here too.
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2.7' }}
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
# Lifecycle transitions (step 10b): pause→resume→online +
# hibernate→wake→online on the provisioned parent. `auto` runs them in
# full mode (this job). Set `off` to skip the ~2x-reprovision cost on an
# ad-hoc dispatch. The timeout-minutes above is sized for this being on.
E2E_LIFECYCLE: auto
# Fail-closed-on-skip: in CI the harness MUST prove ≥1 full
# provision→online→A2A cycle. If it reaches the end having validated
# nothing (a future short-circuit / skip path), it exits 5 rather than
# reporting a false green. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE.
E2E_REQUIRE_LIVE: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -347,152 +315,3 @@ jobs:
echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
fi
exit 0
# ── PLATFORM-MANAGED BOOT REGRESSION (moonshot/kimi NOT_CONFIGURED) ──────────
#
# The REAL-boot complement to the deterministic unit suite
# (workspace_provision_platform_boot_test.go). Provisions a REAL staging
# claude-code workspace on the PLATFORM-managed path — provider=platform,
# model=moonshot/kimi-k2.6, NO tenant LLM key — and asserts it reaches
# status=online (NOT not_configured) and a completion returns 200, via the same
# online-wait + completion-assert the BYOK job uses.
#
# Why a SEPARATE job (not a matrix leg of e2e-staging-saas): the platform path
# injects NO secret and pins a different model, so its env block diverges from
# the BYOK job's. A dedicated job keeps each path's "verify key present" preflight
# honest (BYOK requires a key; platform requires its ABSENCE not to matter) and
# gives the regression its own named commit-status for branch protection.
#
# Add `E2E Staging Platform Boot` to branch protection after 3 consecutive
# green runs on main (de-flake window; this path shares the cp#245
# boot-timeout flake surface the BYOK job has, so it must prove stable before
# it can BLOCK — see the gate-making plan in the PR body).
# bp-required: pending #2187
e2e-staging-platform-boot:
name: E2E Staging Platform Boot
runs-on: ubuntu-latest
# Phase 3 (RFC #219 §1): surface without blocking until the de-flake window
# closes. mc#1982: do NOT renew this mask silently — the gate-making plan
# tracks the flip to false under #2187.
continue-on-error: true
timeout-minutes: 45
permissions:
contents: read
env:
MOLECULE_CP_URL: https://staging-api.moleculesai.app
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
E2E_AWS_LEAK_CHECK: required
E2E_AWS_TERMINATE_LEAKS: '1'
# The regression combo: claude-code + platform-managed + moonshot/kimi-k2.6.
# NO E2E_*_API_KEY is set — platform-managed billing is owned by Molecule via
# the CP LLM proxy. The harness's E2E_LLM_PATH=platform branch sends empty
# secrets and pin-selects the platform model.
E2E_RUNTIME: claude-code
E2E_LLM_PATH: platform
# Smoke mode: a single parent workspace is enough to prove online +
# completion for the platform path (the A2A/delegation matrix is the BYOK
# job's job). Override E2E_DEFAULT_PLATFORM_MODEL via workflow_dispatch to
# exercise another platform model id.
E2E_MODE: smoke
E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
# Fail-closed-on-skip (see BYOK job). smoke mode still runs steps 2/4/7/8b,
# so all four required milestones (provisioned/tenant_online/
# workspace_online/a2a_roundtrip) fire — the guard is valid for this lane too.
E2E_REQUIRE_LIVE: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Verify admin token present
run: |
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
exit 2
fi
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
if [ -z "${!var:-}" ]; then
echo "::error::$var not set — EC2 leak verification cannot run"
exit 2
fi
done
echo "Admin token present ✓"
- name: Assert NO BYOK key leaks into the platform run
run: |
# The whole point of this job is the platform-managed path. A stray
# E2E_*_API_KEY in the runner env would (via the harness) still be
# skipped by the E2E_LLM_PATH=platform branch — but assert their
# absence loudly here so a future env edit can't silently convert this
# into a masked BYOK run that no longer exercises the regression.
for var in E2E_MINIMAX_API_KEY E2E_ANTHROPIC_API_KEY E2E_OPENAI_API_KEY; do
if [ -n "${!var:-}" ]; then
echo "::warning::$var is set in this platform-boot job's env — the harness ignores it on E2E_LLM_PATH=platform, but it should not be wired here."
fi
done
echo "Platform-managed path: no tenant LLM key required ✓"
- name: CP staging health preflight
run: |
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
if [ "$code" != "200" ]; then
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
exit 1
fi
echo "Staging CP healthy ✓"
- name: Run platform-managed boot E2E (online + completion)
id: e2e
run: bash tests/e2e/test_staging_full_saas.sh
- name: Teardown safety net (runs on cancel/failure)
if: always()
env:
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
run: |
set +e
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "
import json, sys, os, datetime
run_id = os.environ.get('GITHUB_RUN_ID', '')
d = json.load(sys.stdin)
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
# smoke mode slugs are e2e-smoke-YYYYMMDD-platform-<run_id>-...
if run_id:
prefixes = tuple(f'e2e-smoke-{d}-platform-{run_id}-' for d in dates)
else:
prefixes = tuple(f'e2e-smoke-{d}-platform-' for d in dates)
candidates = [o['slug'] for o in d.get('orgs', [])
if any(o.get('slug','').startswith(p) for p in prefixes)
and o.get('instance_status') not in ('purged',)]
print('\n'.join(candidates))
" 2>/dev/null)
leaks=()
for slug in $orgs; do
echo "Safety-net teardown: $slug"
set +e
curl -sS -o /tmp/plat-cleanup.out -w "%{http_code}" \
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"confirm\":\"$slug\"}" >/tmp/plat-cleanup.code
set -e
code=$(cat /tmp/plat-cleanup.code 2>/dev/null || echo "000")
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
echo "[teardown] deleted $slug (HTTP $code)"
else
echo "::warning::platform-boot teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/plat-cleanup.out 2>/dev/null)"
leaks+=("$slug")
fi
done
if [ ${#leaks[@]} -gt 0 ]; then
echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
fi
exit 0
@@ -1,129 +0,0 @@
name: E2E Workspace Lifecycle (staginge2e)
# core#2332 P1.10 — close the workspace-lifecycle coverage gap.
#
# soft-restart / pause / resume / hibernate were only unit-tested (httptest in
# workspace-server/internal/handlers/*_test.go) and never proven against a real
# container. This drives the Go staginge2e suite
# (workspace-server/internal/staginge2e/workspace_lifecycle_test.go) which
# provisions a REAL throwaway staging tenant, exercises each lifecycle endpoint,
# and asserts OBSERVABLE container state (status transitions + serve reachability
# + url-cleared-on-stop) — not just HTTP 200.
#
# ADVISORY-BY-INFRA. It needs a live staging tenant (~30+ min cold EC2 path), so
# the real run is workflow_dispatch / schedule only — NOT per-PR and NOT a
# required check. Promotion to a required branch-protection context is a separate
# CTO decision (mirrors the cp internal/staginge2e suite, cp#386, and the
# peer-visibility flip-to-required pattern, molecule-core#1296).
#
# HONEST GATE — NO continue-on-error mask (feedback_fix_root_not_symptom). The
# PR job validates that the suite COMPILES under -tags=staging_e2e and SKIPs LOUD
# without creds (the suite's contract) — a broken test file fails at PR time. The
# real assertion runs on dispatch/cron with staging creds.
#
# Gitea 1.22.6 / act_runner notes honored: no cross-repo uses (mirrored
# actions/checkout SHA), per-SHA concurrency, pinned GITHUB_SERVER_URL.
on:
push:
branches: [main]
paths:
- 'workspace-server/internal/handlers/workspace_restart.go'
- 'workspace-server/internal/handlers/workspace_crud.go'
- 'workspace-server/internal/staginge2e/**'
- '.gitea/workflows/e2e-workspace-lifecycle.yml'
pull_request:
branches: [main]
paths:
- 'workspace-server/internal/handlers/workspace_restart.go'
- 'workspace-server/internal/handlers/workspace_crud.go'
- 'workspace-server/internal/staginge2e/**'
- '.gitea/workflows/e2e-workspace-lifecycle.yml'
workflow_dispatch:
schedule:
# 08:00 UTC daily — offset from e2e-staging-saas (07:00) and
# e2e-peer-visibility (07:30) so the three don't collide on the staging
# org-creation quota.
- cron: '0 8 * * *'
concurrency:
# Per-SHA (feedback_concurrency_group_per_sha).
group: e2e-workspace-lifecycle-${{ github.event.pull_request.head.sha || github.sha }}
cancel-in-progress: false
env:
GITHUB_SERVER_URL: https://git.moleculesai.app
jobs:
# PR / compile gate: prove the staginge2e suite compiles under the build tag
# and skips LOUD without creds. Cheap, honest, non-required. This is NOT a
# fake-green mask of the real assertion — it fails if the test file stops
# compiling. bp-required: pending CTO decision (see header).
lifecycle-compile-skip:
name: E2E Workspace Lifecycle (compile+skip)
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: go vet (staging_e2e tag)
working-directory: workspace-server
run: go vet -tags staging_e2e ./internal/staginge2e/...
- name: Compile + skip-run (must SKIP LOUD without STAGING_E2E)
working-directory: workspace-server
run: |
# No STAGING_E2E / creds → the suite MUST skip (not pass-with-zero-
# assertions, not fail-open). `go test` exit 0 with a SKIP line is the
# contract. -run pins to the one test so this stays fast.
out=$(go test -tags staging_e2e ./internal/staginge2e/ -run TestWorkspaceLifecycle -count=1 -v 2>&1)
echo "$out"
echo "$out" | grep -q "SKIP: TestWorkspaceLifecycle_Staging" \
|| { echo "::error::expected a LOUD skip of TestWorkspaceLifecycle_Staging without creds"; exit 1; }
# Real STAGING gate: provisions a throwaway tenant, drives the lifecycle
# endpoints, asserts observable transitions, scoped teardown.
# dispatch / schedule only (30+ min cold EC2).
lifecycle-staging:
name: E2E Workspace Lifecycle (staging)
runs-on: ubuntu-latest
if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
timeout-minutes: 60
env:
CP_BASE_URL: https://staging-api.moleculesai.app
CP_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
STAGING_E2E: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: Verify admin token present
run: |
if [ -z "$CP_ADMIN_API_TOKEN" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
exit 2
fi
echo "Admin token present"
- name: CP staging health preflight
run: |
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$CP_BASE_URL/health")
if [ "$code" != "200" ]; then
echo "::error::Staging CP unhealthy (HTTP $code) — infra, not a lifecycle bug. Failing loud per feedback_fix_root_not_symptom."
exit 1
fi
echo "Staging CP healthy"
- name: Run workspace-lifecycle staginge2e
working-directory: workspace-server
run: go test -tags staging_e2e ./internal/staginge2e/ -run TestWorkspaceLifecycle_Staging -count=1 -v -timeout 50m
# Teardown: the test installs a t.Cleanup admin-DELETE of its own tenant
# (runs even on a Fatal). We deliberately do NOT add a broad in-workflow
# "sweep all e2e-life-* slugs" net here — that could delete a concurrently
# running dispatch's fresh tenant (the slug is not run-id scoped). The
# age-guarded `sweep-stale-e2e-orgs` workflow (30-min floor, e2e- prefix)
# is the final safety net for a tenant orphaned by a hard runner cancel.
+6 -33
View File
@@ -7,13 +7,10 @@ name: gitea-merge-queue
# the user-space queue bot, one PR per tick, using the non-bypass merge actor.
#
# Queue contract:
# - auto-discovery (default): any open same-repo PR is considered — no
# `merge-queue` label required (the label is optional metadata now)
# - add label `merge-queue` to an open same-repo PR
# - bot updates stale PR heads with current main, then waits for CI
# - bot merges only when current main is green, genuine approvals are present
# on the current head, required PR contexts pass, and the PR is mergeable
# - add `merge-queue-hold`, `do-not-auto-merge`, or `wip` to keep a PR OUT of
# autonomous merging; draft PRs are also skipped
# - bot merges only when current main is green and required PR contexts pass
# - add `merge-queue-hold` to pause a queued PR without removing it
on:
# Schedule moved to operator-config:
@@ -51,34 +48,10 @@ jobs:
WATCH_BRANCH: ${{ github.event.repository.default_branch }}
QUEUE_LABEL: merge-queue
HOLD_LABEL: merge-queue-hold
# Auto-discovery (opt-OUT). When on (default), the queue considers ALL
# open same-repo PRs that meet the merge bar — it does NOT wait for a
# human/agent to add `merge-queue`. Agent Gitea tokens lack
# write:issue (labels are issue-scoped) and could never self-label,
# which stalled the queue; the label is now OPTIONAL metadata. The
# merge bar is UNCHANGED — only candidate selection widens. Set
# AUTO_DISCOVER=0 to restore legacy opt-IN (require the merge-queue
# label to be considered).
AUTO_DISCOVER: "1"
# Opt-OUT labels: any of these on a PR keeps it OUT of autonomous
# merging (the human escape hatch). HOLD_LABEL is always also honoured.
# A human who wants a PR held just adds one of these labels.
OPT_OUT_LABELS: do-not-auto-merge,wip
UPDATE_STYLE: merge
# Recognised official-reviewer set. A merge needs >= required_approvals
# DISTINCT genuine official approvals from these accounts on the
# CURRENT head sha (not stale/dismissed). The required_approvals count
# itself is read from branch protection at runtime.
REVIEWER_SET: agent-reviewer,agent-researcher,agent-reviewer-cr2
# NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
# queue now reads the required status contexts from BRANCH PROTECTION
# (status_check_contexts) so non-required governance reds (qa-review,
# security-review, sop-checklist when not branch-required,
# E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
# If branch protection cannot be enumerated the queue HOLDS
# (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
# branch protection does not specify required_approvals.
REQUIRED_APPROVALS: "2"
REQUIRED_CONTEXTS: >-
CI / all-required (pull_request),
sop-checklist / all-items-acked (pull_request)
# Push-side required contexts. Checking CI / all-required (push)
# explicitly instead of the combined state avoids false-pause when
# non-blocking jobs (continue-on-error: true) have failed — those
@@ -88,9 +88,8 @@ jobs:
# surprises and keeps the routing rule discoverable in one place.
runs-on: docker-host
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
outputs:
handlers: ${{ steps.filter.outputs.handlers }}
steps:
@@ -120,9 +119,8 @@ jobs:
# exists). See detect-changes for the full routing rationale.
runs-on: docker-host
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
env:
# Unique name per run so concurrent jobs don't collide on the
# bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
@@ -243,8 +241,7 @@ jobs:
# MUST exist for the integration tests to be meaningful. Hard-
# fail if any didn't land — that would be a real regression we
# want loud.
# workspace_schedules added for the #2149 scheduler integration tests.
for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules; do
for tbl in delegations workspaces activity_logs pending_uploads; do
if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
-c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
| grep -q 1; then
@@ -254,19 +251,6 @@ jobs:
echo "✓ $tbl table present"
done
- if: needs.detect-changes.outputs.handlers == 'true'
name: Preflight — INTEGRATION_DB_URL must be present
run: |
# Belt-and-suspenders: if the postgres-start step failed to
# export INTEGRATION_DB_URL, fail loud BEFORE go test can
# t.Skip its way to a green build. Closes the workflow-level
# fail-open gap identified in PR #2166 blocker #2.
if [ -z "${INTEGRATION_DB_URL:-}" ]; then
echo "::error::INTEGRATION_DB_URL is empty — postgres-start step did not export the connection string"
exit 1
fi
echo "INTEGRATION_DB_URL is set"
- if: needs.detect-changes.outputs.handlers == 'true'
name: Run integration tests
run: |
@@ -275,16 +259,6 @@ jobs:
# workflow runs don't fight over a host-net 5432 port.
go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"
- if: needs.detect-changes.outputs.handlers == 'true'
name: Run scheduler integration tests (#2149)
run: |
# #2149: real-PG regression coverage for the scheduler firing loop
# (tick → A2A fire → write-back of last_run_at/next_run_at/run_count/
# activity_logs jsonb incl. invalid-UTF-8 sanitization + sweepPhantomBusy).
# Reuses the same migrated Postgres (workspace_schedules / activity_logs
# / workspaces all landed by the migration replay step above).
go test -tags=integration -timeout 5m -v ./internal/scheduler/ -run "^TestIntegration_"
- if: failure() && needs.detect-changes.outputs.handlers == 'true'
name: Diagnostic dump on failure
env:
@@ -99,7 +99,7 @@ jobs:
# all violate this lint at first — intentional. Flip to false
# follow-up after main is clean for 3 days. mc#1982.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true # internal#837 Phase 3 mask — 14d forced-renewal cadence
continue-on-error: true # mc#1982 Phase 3 mask — 14d forced-renewal cadence
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
@@ -61,9 +61,11 @@ name: Lint pre-flip continue-on-error
# feedback_no_shared_persona_token_use.
#
# Phase contract (RFC internal#219 §1 ladder):
# - Flipped to `continue-on-error: false` after Researcher live-verified
# clean runs. The script's own 35 pytest tests pass and recent PR
# history shows no masked regressions — the gate is now enforcing.
# - This workflow lands at `continue-on-error: true` (Phase 3 —
# surface defects without blocking). Follow-up PR flips it to
# `false` ONLY after this workflow's own recent runs on `main`
# are confirmed clean — exactly the discipline the workflow
# itself enforces. Eat your own dogfood.
on:
pull_request:
@@ -95,9 +97,10 @@ jobs:
name: Verify continue-on-error flips have run-log proof
runs-on: ubuntu-latest
timeout-minutes: 8
# Fail-closed: the lint script is verified clean (35/35 tests pass,
# Researcher live-check confirmed). Masking removed per mc#1982 close-out.
continue-on-error: false
# Phase 3 (RFC internal#219 §1): surface broken flips without blocking
# the PR yet. Follow-up flips this to `false` once the workflow itself
# has clean recent runs on main. mc#1982 interim — remove when CoE→false.
continue-on-error: true # mc#1982
steps:
- name: Check out PR head (full history for base-SHA access)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+1 -1
View File
@@ -19,7 +19,7 @@
# Forward-compat scope:
# Today (2026-05-11) molecule-core/main protects 3 contexts:
# - "Secret scan / Scan diff for credential-shaped strings (pull_request)"
# - "sop-checklist / tier-check (pull_request)"
# - "sop-tier-check / tier-check (pull_request)"
# - "CI / all-required (pull_request)"
# Per RFC#324 Step 2 the required-list expands to ~5 contexts
# (qa-review, security-review added). Each new required context's
@@ -40,7 +40,6 @@ env:
GITHUB_SERVER_URL: https://git.moleculesai.app
jobs:
# bp-exempt: informational lint enforcing docker-host/publish pin convention (internal#512), not a merge gate
lint-docker-host-pin:
name: Lint docker-host pin on docker-touching workflows
runs-on: docker-host
@@ -49,56 +49,37 @@ jobs:
GITHUB_SERVER_URL: https://git.moleculesai.app
steps:
- name: Identify runner
id: identify
continue-on-error: true
run: |
set -eu
echo "arch=$(uname -m)"
echo "kernel=$(uname -sr)"
echo "shell=$BASH_VERSION"
# Sanity: must actually be arm64. If amd64 sneaks in here,
# the job skips gracefully rather than hard-failing, because
# a mislabelled runner is an ops concern, not a code defect.
# Pilot lane must not make main red (#2146).
# fail fast — that means the label routing is wrong.
case "$(uname -m)" in
aarch64|arm64)
echo "arm64 confirmed"
echo "arm64=true" >> "$GITHUB_OUTPUT"
;;
*)
echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
echo "arm64=false" >> "$GITHUB_OUTPUT"
exit 1
;;
aarch64|arm64) echo "arm64 confirmed" ;;
*) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
esac
- name: Checkout
if: steps.identify.outputs.arm64 == 'true'
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Install shellcheck (arm64)
if: steps.identify.outputs.arm64 == 'true'
continue-on-error: true
run: |
set -eu
if command -v shellcheck >/dev/null 2>&1; then
echo "shellcheck already present: $(shellcheck --version | head -1)"
else
# Prefer apt if the runner base ships it; else download the
# correct platform binary (darwin vs linux).
# Prefer apt if the runner base ships it; else download arm64 binary.
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get update -qq
sudo apt-get install -y --no-install-recommends shellcheck
else
SC_VER=v0.10.0
if [ "$(uname -s)" = "Darwin" ]; then
SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
else
SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
fi
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
| tar -xJf - --strip-components=1
sudo mv shellcheck /usr/local/bin/
fi
@@ -106,15 +87,14 @@ jobs:
shellcheck --version | head -2
- name: Run shellcheck on .gitea/scripts/*.sh
if: steps.identify.outputs.arm64 == 'true'
continue-on-error: true
run: |
set -eu
# Only the scripts we control under .gitea/scripts. Pilot
# scope is intentionally narrow — broaden in a follow-up
# once the lane is proven.
if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
echo "WARN: shellcheck not functional — skipping (pilot mode)"
if ! command -v shellcheck >/dev/null 2>&1; then
echo "WARN: shellcheck binary not found — skipping (pilot mode)"
exit 0
fi
# NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
+1 -1
View File
@@ -16,7 +16,7 @@ name: Lint workflow YAML (Gitea-1.22.6-hostile shapes)
#
# Empirical history this hardens against:
# - status-reaper rev1 caught rule-4 (name-collision) class
# - sop-checklist DOA'd on rule-2 (workflow_run partial)
# - sop-tier-refire DOA'd on rule-2 (workflow_run partial)
# - #319 bootstrap-paradox (chained-defect class, related)
# - internal#329 dispatcher race (adjacent)
# - 2026-05-11 publish-runtime: rule-1, 24h PyPI freeze
+2 -2
View File
@@ -95,10 +95,10 @@ jobs:
# included here — staging green is a separate gate
# (`feedback_staging_e2e_merge_gate`).
WATCH_BRANCH: 'main'
# Issue label applied on file/open. `ci-bp-drift` exists in the
# Issue label applied on file/open. `tier:high` exists in the
# molecule-core label set (verified 2026-05-11, label id 9).
# Rationale for high: main red blocks the promotion train and
# poisons every PR's auto-rebase base; treat as a fire even
# if intermittent.
RED_LABEL: 'ci-bp-drift'
RED_LABEL: 'tier:high'
run: python3 .gitea/scripts/main-red-watchdog.py
+5 -153
View File
@@ -14,37 +14,10 @@ name: publish-canvas-image
# authenticate to ghcr.io.
#
# Builds, pushes, and (ordered) deploys the standalone canvas Docker image to
# ECR whenever a commit lands on main that touches canvas code.
#
# Ordered deploy (core#2226) — mirrors publish-workspace-server-image.yml so the
# standalone `molecule-ai/canvas` image is deterministic + verifiable, not a
# side effect of the platform fleet pulling a mutable `:latest`:
#
# build-and-push: build → push :staging-<sha> + :staging-latest + :sha-<sha>
# (does NOT move :latest — an unpromoted build must never
# become the prod-blessed tag).
# promote-canvas: waits for green main CI on this SHA, then re-points
# :latest to the verified :staging-<sha> by digest
# (imagetools create — no rebuild). So `:latest` == the
# current prod-blessed canvas, byte-identical to staging-<sha>.
#
# Tag scheme produced (parallels platform-tenant):
# :staging-<sha> — per-commit immutable digest, what docker-compose pins to.
# :staging-latest — most recent BUILD on main (last-writer-wins, NOT gated).
# :sha-<sha> — kept for back-compat with any consumer pinning the old tag.
# :latest — most recent CI-GREEN build. Only moved by promote-canvas.
#
# WHY this is the canvas analogue of the platform's deploy-production, not a
# literal copy: the standalone canvas co-deploys with the platform on the same
# host via the root docker-compose.yml (`docker compose pull && up -d`). Gating
# the canvas `:latest` promotion on the SAME green-main-CI signal the platform
# deploy waits on makes platform + canvas roll together by the same SHA. The
# canvas has no per-tenant fleet of its own and no /buildinfo endpoint, so there
# is no fleet-rollout / per-tenant verify step to mirror here — CI-green +
# digest-pin + immutable :staging-<sha> is the determinism contract. (A future
# canvas /buildinfo would let this assert the served SHA like the platform does;
# tracked in core#2226.)
# Builds and pushes the canvas Docker image to ECR whenever a commit lands
# on main that touches canvas code. Previously canvas changes were visible in
# CI (npm run build passed) but the live container was never updated —
# operators had to manually run `docker compose build canvas` each time.
#
# Mirror of publish-platform-image.yml, adapted for the Next.js canvas layer.
# See that workflow for inline notes on macOS Keychain isolation and QEMU.
@@ -57,7 +30,6 @@ on:
# platform-only / docs-only / MCP-only merges.
- 'canvas/**'
- '.gitea/workflows/publish-canvas-image.yml'
workflow_dispatch:
# NOTE (Gitea port): the original GitHub workflow had a
# `workflow_dispatch:` manual trigger for the
# non-canvas-merge-but-need-fresh-image scenario. Dropped in the
@@ -97,10 +69,6 @@ jobs:
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
outputs:
# Exposed so promote-canvas re-points :latest to the EXACT per-commit tag
# this build produced (digest-level), never a re-resolved mutable tag.
staging_sha: ${{ steps.tags.outputs.staging_sha }}
steps:
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -172,7 +140,6 @@ jobs:
shell: bash
run: |
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
echo "staging_sha=staging-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
- name: Resolve build args
id: build_args
@@ -208,19 +175,8 @@ jobs:
build-args: |
NEXT_PUBLIC_PLATFORM_URL=${{ steps.build_args.outputs.platform_url }}
NEXT_PUBLIC_WS_URL=${{ steps.build_args.outputs.ws_url }}
# Bake the merge SHA into the image so /api/buildinfo reports the
# served canvas SHA (core#2235). Mirrors how the platform image
# surfaces GIT_SHA at /buildinfo. Full 40-char SHA (not the
# 7-char tag) so the fleet redeploy verification can match exactly.
BUILD_SHA=${{ github.sha }}
# Ordered deploy (core#2226): the build job pushes the immutable
# per-commit tag + the build-tracking staging-latest + the legacy
# back-compat :sha-<sha> tag. It does NOT push :latest — :latest is
# the prod-blessed tag and is only re-pointed by promote-canvas after
# green main CI, so an unpromoted/red build can never become :latest.
tags: |
${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.staging_sha }}
${{ env.IMAGE_NAME }}:staging-latest
${{ env.IMAGE_NAME }}:latest
${{ env.IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
# Gitea artifact-cache reachability is best-effort on the operator
# runner network. Do not let cache export fail an image that already
@@ -229,107 +185,3 @@ jobs:
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
org.opencontainers.image.revision=${{ github.sha }}
org.opencontainers.image.description=Molecule AI canvas (Next.js 15 + React Flow)
# bp-exempt: post-merge canvas promote side-effect; merge is gated by CI /
# all-required and this job waits for green push CI on the SHA before acting.
promote-canvas:
name: Promote canvas :latest to CI-green build
needs: build-and-push
# Only on a real main push — workflow_dispatch / non-main never promotes.
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
# Side-effect deploy only; the image publish above is the durable artifact.
# mc#1982: do NOT renew this mask silently — it mirrors deploy-production's
# contract (a flaky promote must not red the ship lane), tracked in core#2226.
continue-on-error: true
runs-on: publish
timeout-minutes: 60
env:
# Same green-main-CI gate the platform deploy-production waits on, so
# platform + canvas advance :latest off the identical signal/SHA.
GITEA_HOST: git.moleculesai.app
GITEA_TOKEN: ${{ secrets.PROD_AUTO_DEPLOY_CONTROL_TOKEN || secrets.AUTO_SYNC_TOKEN }}
CI_STATUS_TIMEOUT_SECONDS: "3600"
# Re-uses the platform's disable kill-switch: when prod auto-deploy is
# paused, the canvas :latest promote pauses too (correct — an unpromoted
# build must not become :latest while the fleet is frozen).
PROD_AUTO_DEPLOY_DISABLED: ${{ vars.PROD_AUTO_DEPLOY_DISABLED || secrets.PROD_AUTO_DEPLOY_DISABLED || '' }}
steps:
# The publish runner's default HOME (/home/hongming) is not writable, so
# docker credential saves fail and halt the promote (#2193 on the platform
# side). Point HOME + DOCKER_CONFIG at the writable job temp dir.
- name: Prepare writable HOME + Docker config
run: |
set -euo pipefail
H="$RUNNER_TEMP/canvas-promote-home"
mkdir -p "$H/.docker"
echo "HOME=$H" >> "$GITHUB_ENV"
echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Resolve promote gate
id: gate
env:
PROD_AUTO_DEPLOY_DISABLED: ${{ env.PROD_AUTO_DEPLOY_DISABLED }}
run: |
set -euo pipefail
if [ -n "${PROD_AUTO_DEPLOY_DISABLED:-}" ]; then
case "$(printf '%s' "$PROD_AUTO_DEPLOY_DISABLED" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on|disabled|disable)
echo "enabled=false" >> "$GITHUB_OUTPUT"
echo "::notice::Canvas :latest promote skipped: PROD_AUTO_DEPLOY_DISABLED=$PROD_AUTO_DEPLOY_DISABLED"
{
echo "## Canvas :latest promote skipped"
echo ""
echo "Reason: \`PROD_AUTO_DEPLOY_DISABLED=$PROD_AUTO_DEPLOY_DISABLED\`. The CI-green build is published as \`:staging-${GITHUB_SHA::7}\`; \`:latest\` was left unchanged."
} >> "$GITHUB_STEP_SUMMARY"
exit 0 ;;
esac
fi
if [ -z "${GITEA_TOKEN:-}" ]; then
echo "::error::AUTO_SYNC_TOKEN/PROD_AUTO_DEPLOY_CONTROL_TOKEN is required so the canvas promote can wait for green CI."
exit 1
fi
echo "enabled=true" >> "$GITHUB_OUTPUT"
- name: Wait for green main CI on this SHA
if: ${{ steps.gate.outputs.enabled == 'true' }}
run: |
set -euo pipefail
# Same SSOT wait the platform deploy uses: blocks until the required
# push contexts (CI / all-required (push) + Secret scan) go green on
# THIS sha, and fails closed if any required context terminally fails.
python3 .gitea/scripts/prod-auto-deploy.py wait-ci
- name: Promote canvas :latest to the CI-green image
if: ${{ steps.gate.outputs.enabled == 'true' }}
env:
IMAGE_NAME: ${{ env.IMAGE_NAME }}
STAGING_SHA_TAG: ${{ needs.build-and-push.outputs.staging_sha }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
run: |
set -euo pipefail
# Fail-safe: if the build job's output didn't propagate, recompute the
# immutable per-commit tag from the SHA so we never promote a guess.
SHA_TAG="${STAGING_SHA_TAG:-staging-${GITHUB_SHA::7}}"
ECR_REGISTRY="${IMAGE_NAME%%/*}"
aws ecr get-login-password --region us-east-2 | \
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
# Digest-level re-tag (no pull/rebuild): :latest becomes byte-identical
# to the verified :staging-<sha> for this commit.
docker buildx imagetools create \
--tag "${IMAGE_NAME}:latest" \
"${IMAGE_NAME}:${SHA_TAG}"
{
echo "## Canvas :latest promoted"
echo ""
echo "Re-pointed \`molecule-ai/canvas:latest\` → \`${SHA_TAG}\` (by digest)."
echo ":latest now tracks the CI-green canvas build for commit \`${GITHUB_SHA::7}\`."
echo ""
echo "Tenants/hosts that \`docker compose pull canvas\` now get the same build the platform deploy rolled for this SHA."
} >> "$GITHUB_STEP_SUMMARY"
@@ -16,24 +16,14 @@ name: publish-workspace-server-image
#
# Image tags produced:
# :staging-<sha> — per-commit digest, stable for canary verify
# :staging-latest — tracks most recent BUILD on this branch (set by the
# build job, last-writer-wins, NOT prod-gated)
# :latest — tracks the most recent PROD-PROMOTED build. Re-pointed by the
# deploy-production job ONLY after green main CI + canary +
# fleet rollout + /buildinfo verification pass. So :latest ==
# "current prod image", never the raw build. (Added 2026-06-03
# after a stale :latest — last moved 2026-05-10 — reverted a
# production tenant on a no-arg redeploy.)
# :staging-latest — tracks most recent build on this branch
#
# Production auto-deploy:
# After both platform and tenant images are pushed, deploy-production waits
# for strict required push contexts on the same SHA to go green, then
# calls the production CP redeploy-fleet endpoint with target_tag=
# staging-<sha>. On success (rollout + buildinfo verified) it re-points
# :latest to the same SHA. Set repo variable or secret
# PROD_AUTO_DEPLOY_DISABLED=true to stop production rollout while keeping
# image publishing enabled — in which case :latest is NOT advanced either
# (correct: an unpromoted build must not become :latest).
# staging-<sha>. Set repo variable or secret PROD_AUTO_DEPLOY_DISABLED=true
# to stop production rollout while keeping image publishing enabled.
#
# Primary ECR target: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*
# Optional staging tenant mirror target:
@@ -115,26 +105,6 @@ jobs:
echo "Docker daemon OK"
echo "::endgroup::"
# Pre-flight: verify every repo in manifest.json actually exists.
#
# Why: deleting a template repo without updating manifest.json breaks
# clone-manifest.sh with a generic git 404, which looks like a
# transient network error and wastes debug time. We catch it here
# with a per-entry ::error:: annotation naming the missing repo
# (issue #2192). This is the push-time complement to PR #2186's
# PR-time manifest-entry-existence gate.
#
# Token: workspace-template-* repos are PRIVATE, so the existence check
# must authenticate (same AUTO_SYNC_TOKEN as the clone step). Without it
# an unauthenticated GET 404s on private repos and false-prunes them
# (regression that dropped seo-agent/google-adk from the palette).
- name: Validate manifest entries exist
env:
MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
run: |
set -euo pipefail
bash scripts/check-manifest-repos-exist.sh manifest.json
# Pre-clone manifest deps before docker build.
#
# Why: workspace-template-* repos on Gitea are private. The pre-fix
@@ -282,25 +252,7 @@ jobs:
PROD_AUTO_DEPLOY_BATCH_SIZE: ${{ vars.PROD_AUTO_DEPLOY_BATCH_SIZE || '3' }}
PROD_AUTO_DEPLOY_DRY_RUN: ${{ vars.PROD_AUTO_DEPLOY_DRY_RUN || '' }}
PROD_ALLOW_NON_PROD_CP_URL: ${{ vars.PROD_ALLOW_NON_PROD_CP_URL || '' }}
# #2213: per-tenant /buildinfo settle budget. A freshly-swapped tenant can
# keep serving the old image at the edge for a short drain window; the
# verify step polls each tenant up to this budget before declaring it stale.
PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS: ${{ vars.PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS || '240' }}
PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS: ${{ vars.PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS || '20' }}
steps:
# The publish runner's default HOME (/home/hongming) is not writable, so
# git/docker credential saves fail (`Error saving credentials: mkdir
# /home/hongming: permission denied`) and halt the production rollout
# (#2193). Point HOME + DOCKER_CONFIG at the writable job temp dir —
# mirrors build-and-push's "Prepare writable Docker config" fix above.
- name: Prepare writable HOME + Docker config
run: |
set -euo pipefail
H="$RUNNER_TEMP/auto-deploy-home"
mkdir -p "$H/.docker"
echo "HOME=$H" >> "$GITHUB_ENV"
echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -345,50 +297,8 @@ jobs:
set -euo pipefail
python3 .gitea/scripts/prod-auto-deploy.py wait-ci
# Superseded-job guard — BEFORE any production side effect (#2213).
#
# This workflow has no `concurrency:` (see header: Gitea 1.22.6 cancels
# queued prod deploys). So two close main pushes run BOTH deploy-production
# jobs. The verify step already skips its strict /buildinfo check when this
# job is superseded (#2194) — but that guard was AFTER the redeploy and the
# :latest promote, so an OLDER job that started late still:
# 1. rolled the whole fleet BACKWARD to its older tag (canary hongming
# was reverted from the newer SHA — the #2213 red), then
# 2. promoted :latest backward to the older image,
# and only THEN skipped verify and exited green. A superseded job must do
# NEITHER. We re-check the branch head here, immediately before the rollout,
# and skip every side effect when a newer commit already owns main.
#
# exit 0 + non-empty stdout => superseded (newer head printed); the redeploy
# and promote steps are gated off via this output. exit 10 => this job is
# still the latest, proceed to roll the fleet. Fail-safe: a head that can't
# be read returns NOT-superseded (exit 10), so a genuine deploy is never
# silently skipped. (Re-checked again at verify time to catch a newer job
# that lands DURING this rollout.)
- name: Check superseded before production side effects
id: supersede
if: ${{ steps.plan.outputs.enabled == 'true' }}
run: |
set -euo pipefail
set +e
NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
SUPERSEDED_EXIT=$?
set -e
if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
echo "superseded=true" >> "$GITHUB_OUTPUT"
echo "::notice::Superseded before rollout: main head is now ${NEWER_HEAD:0:7} (this job deploys ${GITHUB_SHA:0:7}). Skipping redeploy + :latest promote so an older job never rolls the fleet backward."
{
echo "## Production auto-deploy skipped — superseded before rollout"
echo ""
echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
echo "A newer deploy job owns the fleet; rolling it backward to this older build would revert tenants and \`:latest\`. No side effects performed."
} >> "$GITHUB_STEP_SUMMARY"
else
echo "superseded=false" >> "$GITHUB_OUTPUT"
fi
- name: Call production CP redeploy-fleet
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
if: ${{ steps.plan.outputs.enabled == 'true' }}
run: |
set -euo pipefail
python3 .gitea/scripts/prod-auto-deploy.py assert-enabled
@@ -447,66 +357,18 @@ jobs:
fi
- name: Verify reachable tenants report this SHA
# Skip when superseded BEFORE rollout: the redeploy step did not run, so
# there is no redeploy-fleet response to verify against and the newer job
# owns verification (#2213). The in-step guard below still catches the
# case where a newer job lands DURING this job's rollout.
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
if: ${{ steps.plan.outputs.enabled == 'true' }}
env:
TENANT_DOMAIN: moleculesai.app
run: |
set -euo pipefail
RESP="$RUNNER_TEMP/prod-redeploy-response.json"
# Superseded-job guard. This workflow has no `concurrency:` (header
# explains why: Gitea 1.22.6 cancels queued prod deploys). So two
# close main pushes run BOTH deploy-production jobs. The newer one
# rolls the fleet to its (newer) build first; this older job's strict
# equality check below would then see tenants on the NEWER SHA and
# false-red "$slug is stale" even though the fleet is AHEAD, not
# behind (git SHAs aren't ordered; /buildinfo exposes only git_sha).
#
# If main's current head is no longer THIS job's SHA, a newer commit
# has landed and this deploy is superseded — the newest job's verify
# is authoritative. Skip strict verify and succeed. exit 0 => newer
# head printed (superseded); exit 10 => still the latest, proceed to
# the strict verify so a genuinely-behind tenant still fails loudly.
set +e
NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
SUPERSEDED_EXIT=$?
set -e
if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
echo "::notice::Superseded deploy: main head is now ${NEWER_HEAD:0:7} (this job deployed ${GITHUB_SHA:0:7}). The fleet is at or ahead of this build; the newer deploy job's verify is authoritative. Skipping strict SHA verify."
{
echo ""
echo "### Buildinfo verification skipped — superseded deploy"
echo ""
echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
echo "A newer deploy job is rolling the fleet forward; its verify is authoritative."
} >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
mapfile -t SLUGS < <(jq -r '.results[]? | .slug' "$RESP")
if [ ${#SLUGS[@]} -eq 0 ]; then
echo "::error::No tenants returned from redeploy-fleet; refusing to mark production deploy verified."
exit 1
fi
# Per-tenant settle/retry budget (#2213). A tenant whose container the
# CP just swapped can keep serving the OLD image at the edge for a short
# window while the old container drains — /buildinfo returns HTTP 200
# with the previous SHA, which `curl --retry` does NOT retry (it only
# retries connection/5xx failures, not a stale-but-200 body). Without a
# settle window a still-rolling tenant false-reds "stale" on the very
# first poll. So poll each tenant's /buildinfo until it reports the
# target SHA or the budget is exhausted; only THEN declare it stale or
# unreachable. This never masks a genuinely stuck tenant — a tenant that
# never reaches the target within the budget still fails loud (and the
# superseded-job revert class is already blocked before rollout above).
SETTLE_BUDGET_SECONDS="${PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS:-240}"
SETTLE_INTERVAL_SECONDS="${PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS:-20}"
STALE_COUNT=0
UNREACHABLE_COUNT=0
UNHEALTHY_COUNT=0
@@ -518,36 +380,18 @@ jobs:
continue
fi
url="https://${slug}.${TENANT_DOMAIN}/buildinfo"
deadline=$(( $(date +%s) + SETTLE_BUDGET_SECONDS ))
actual=""
last_actual=""
on_target=false
while :; do
body="$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$url" || true)"
actual="$(echo "$body" | jq -r '.git_sha // ""' 2>/dev/null || echo "")"
[ -n "$actual" ] && last_actual="$actual"
if [ "$actual" = "$GITHUB_SHA" ]; then
on_target=true
break
fi
now=$(date +%s)
if [ "$now" -ge "$deadline" ]; then
break
fi
# Still rolling (stale 200) or transiently unreachable — wait and
# re-poll within the settle budget rather than failing on first read.
remaining=$(( deadline - now ))
echo "$slug: waiting for target SHA (have '${actual:0:7}', want ${GITHUB_SHA:0:7}; ${remaining}s left)"
sleep "$SETTLE_INTERVAL_SECONDS"
done
if [ "$on_target" = true ]; then
echo "$slug: ${actual:0:7}"
elif [ -z "$last_actual" ]; then
echo "::error::$slug did not return /buildinfo after deploy (waited ${SETTLE_BUDGET_SECONDS}s)."
body="$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$url" || true)"
actual="$(echo "$body" | jq -r '.git_sha // ""' 2>/dev/null || echo "")"
if [ -z "$actual" ]; then
echo "::error::$slug did not return /buildinfo after deploy."
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
else
echo "::error::$slug is stale: actual=${last_actual:0:7}, expected=${GITHUB_SHA:0:7} (waited ${SETTLE_BUDGET_SECONDS}s)"
continue
fi
if [ "$actual" != "$GITHUB_SHA" ]; then
echo "::error::$slug is stale: actual=${actual:0:7}, expected=${GITHUB_SHA:0:7}"
STALE_COUNT=$((STALE_COUNT + 1))
else
echo "$slug: ${actual:0:7}"
fi
done
@@ -565,69 +409,3 @@ jobs:
if [ "$STALE_COUNT" -gt 0 ] || [ "$UNHEALTHY_COUNT" -gt 0 ] || [ "$UNREACHABLE_COUNT" -gt 0 ]; then
exit 1
fi
# Re-point :latest to the just-promoted image — ONLY after the
# production rollout + buildinfo verification above have passed.
#
# WHY HERE (promote point), not at build time:
# The platform-tenant ECR `:latest` tag was last moved 2026-05-10
# and went 3.5 weeks stale because the build step only pushes
# :staging-<sha> + :staging-latest and never re-points :latest. A
# no-arg POST /cp/admin/tenants/:slug/redeploy (whose default tag
# fell through to "latest") then pulled the 3.5-week-old image and
# REVERTED the tenant (incident: molecule-adk-demo, 2026-06-03).
#
# The defense-in-depth half of this fix changes that redeploy
# default to :staging-latest, but :latest itself must also be
# kept meaningful. We make :latest track the PROD-BLESSED build,
# not the raw build: by living at the end of deploy-production —
# after `wait-ci` (green main CI), the canary-first batched fleet
# rollout, AND the /buildinfo SHA verification — :latest only ever
# advances to a SHA that is actually green and confirmed running
# across the live fleet. So `:latest` == "current prod image",
# and any consumer that pulls :latest (legacy callers, manual
# `docker pull`, a redeploy that somehow still resolves "latest")
# gets the blessed image instead of whatever happened to build.
#
# Re-tag is digest-level (imagetools create), so no rebuild and
# :latest is byte-identical to :staging-<sha> for this commit.
# Gate on supersede: a superseded older job must NOT move :latest backward
# to its older image (#2213 — 275383 promoted :latest → the older
# staging-7a72516 after a newer job had already shipped). :latest must only
# ever advance under the job that owns main's head.
- name: Promote :latest to the verified prod image
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
env:
TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
STAGING_TENANT_IMAGE_NAME: ${{ env.STAGING_TENANT_IMAGE_NAME }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
run: |
set -euo pipefail
SHA_TAG="staging-${GITHUB_SHA::7}"
PROD_ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
STAGING_ECR_REGISTRY="${STAGING_TENANT_IMAGE_NAME%%/*}"
aws ecr get-login-password --region us-east-2 | \
docker login --username AWS --password-stdin "${PROD_ECR_REGISTRY}"
aws ecr get-login-password --region us-east-2 | \
docker login --username AWS --password-stdin "${STAGING_ECR_REGISTRY}"
# imagetools create copies the source manifest to the new tag by
# digest (no pull/rebuild). :latest now points at the exact image
# that just passed the prod gate.
docker buildx imagetools create \
--tag "${TENANT_IMAGE_NAME}:latest" \
"${TENANT_IMAGE_NAME}:${SHA_TAG}"
docker buildx imagetools create \
--tag "${STAGING_TENANT_IMAGE_NAME}:latest" \
"${STAGING_TENANT_IMAGE_NAME}:${SHA_TAG}"
{
echo ""
echo "### :latest promoted"
echo ""
echo "Re-pointed \`platform-tenant:latest\` → \`${SHA_TAG}\` (prod + staging ECR)."
echo ":latest now tracks the prod-blessed, fleet-verified image."
} >> "$GITHUB_STEP_SUMMARY"
+4 -4
View File
@@ -12,9 +12,9 @@
# - `pull_request_review` types: [submitted]
# → re-evaluate when a team member submits an APPROVE review so
# the gate flips immediately (no wait for the next push or
# slash-command). Verified live: sop-checklist.yml uses this
# slash-command). Verified live: sop-tier-check.yml uses this
# same event and provably fires (produces
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
# `sop-tier-check / tier-check (pull_request_review)` contexts).
# The job-level `if:` guard checks
# `github.event.review.state == 'APPROVED' || 'approved'` so
# only APPROVE reviews run the evaluator; COMMENT and
@@ -53,7 +53,7 @@
#
# We MUST NOT use `github.event.comment.author_association` (the
# field doesn't exist on Gitea 1.22.6 webhook payload — this was
# 's defect #1).
# sop-tier-refire's defect #1).
#
# A4 (no PR-head checkout under pull_request_target):
# We check out the BASE ref explicitly so the review-check.sh script is
@@ -73,7 +73,7 @@
# also not in qa/security teams → also 403.
#
# Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
# identity that IS in both `qa` and `security` teams (Owners-level
# identity that IS in both `qa` and `security` teams (Owners-tier
# claude-ceo-assistant, or a new service-bot added to both teams).
# Provisioning of this secret is tracked as a follow-up issue (filed by
# core-devops at PR open).
+2 -2
View File
@@ -10,8 +10,8 @@
# A1-α addendum (internal#760): review-event trigger added so the security
# gate flips immediately when a team member submits an APPROVE review.
# Uses `pull_request_review` types: [submitted] — verified live via
# sop-checklist.yml which provably fires this event (produces
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
# sop-tier-check.yml which provably fires this event (produces
# `sop-tier-check / tier-check (pull_request_review)` contexts).
# The job-level `if:` guard checks
# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
+27 -4
View File
@@ -14,10 +14,10 @@
# Fix (PR #1345 / issue #1280):
# - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
# - all-items-acked job: pull_request_target OR sop slash-command comments
# - review-refire job: qa/security refire slash commands
# - review-refire job: qa/security/tier refire slash commands
# → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
#
# Trust boundary (mirrors RFC#324 §A4 + sop-checklist security note):
# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
# `pull_request_target` (not `pull_request`) — workflow def is loaded
# from BASE branch, so a PR cannot rewrite this workflow to exfiltrate
# the token. The `actions/checkout` step pins `ref: base.sha` so the
@@ -34,6 +34,14 @@
# via a repo secret `SOP_CHECKLIST_GATE_TOKEN`. Provisioning of that
# secret is a follow-up authorization step (separate from this PR).
#
# Failure mode: tier-aware (RFC#351 open question 2):
# - tier:high → state=failure (hard-fail; BP blocks merge)
# - tier:medium → state=failure (hard-fail; same)
# - tier:low → state=pending (soft-fail; BP can choose to require
# this context or skip for low-tier PRs)
# - missing/no-tier → state=failure (default-mode: hard — never lower
# the bar per feedback_fix_root_not_symptom)
#
# Slash-command contract (RFC#351 v1 + §A1.1-style notes from RFC#324):
#
# /sop-ack <slug-or-numeric-alias> [optional note]
@@ -53,7 +61,7 @@
# — declare a gate (qa-review, security-review) N/A.
# — see sop-checklist-config.yaml n/a_gates section.
#
# /qa-recheck /security-recheck
# /qa-recheck /security-recheck /refire-tier-check
# — refire the corresponding status check on the PR head.
#
# The eval is read-only + idempotent (read PR + comments + team
@@ -141,6 +149,7 @@ jobs:
{
echo "run_qa=false"
echo "run_security=false"
echo "run_tier=false"
} >> "$GITHUB_OUTPUT"
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
case "$first_line" in
@@ -150,6 +159,9 @@ jobs:
/security-recheck*)
echo "run_security=true" >> "$GITHUB_OUTPUT"
;;
/refire-tier-check*)
echo "run_tier=true" >> "$GITHUB_OUTPUT"
;;
*)
echo "::notice::no supported review refire slash command; no-op"
;;
@@ -158,7 +170,8 @@ jobs:
- name: Check out BASE ref for trusted scripts
if: |
steps.classify.outputs.run_qa == 'true' ||
steps.classify.outputs.run_security == 'true'
steps.classify.outputs.run_security == 'true' ||
steps.classify.outputs.run_tier == 'true'
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.repository.default_branch }}
@@ -200,3 +213,13 @@ jobs:
run: |
set -euo pipefail
.gitea/scripts/review-refire-status.sh
- name: Refire sop-tier-check status
if: steps.classify.outputs.run_tier == 'true'
env:
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.issue.number }}
SOP_DEBUG: '0'
run: bash .gitea/scripts/sop-tier-refire.sh
+131
View File
@@ -0,0 +1,131 @@
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
#
# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
# from the previous inline-bash version). The script is the single source
# of truth; this workflow file just sets env + invokes it.
#
# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
# §SOP-6 PR gate enforced. Pair with branch protection on the protected
# branch:
# required_status_checks: ["sop-tier-check / tier-check (pull_request)"]
# required_approving_reviews: 1
# approving_review_teams: ["ceo", "managers", "engineers"]
#
# Tier → required-team expression (internal#189 AND-composition):
# tier:low → engineers,managers,ceo (OR: any one suffices)
# tier:medium → managers AND engineers AND qa???,security??? (AND: all required)
# tier:high → ceo (OR: single team, wired for AND)
#
# "???" = teams not yet created in Gitea. When qa + security teams are
# added, update TIER_EXPR["tier:medium"] in the script to remove the
# markers. PRs already in-flight when qa/security are created continue
# to work because their authors explicitly requested those reviews.
#
# Force-merge: Owners-team override remains available out-of-band via
# the Gitea merge API; force-merge writes `incident.force_merge` to
# `structure_events` per §Persistent structured logging gate (Phase 3).
#
# Environment variables:
# SOP_DEBUG=1 — per-API-call diagnostic lines. Default: off.
# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Intended for
# emergency use only; burn-in window closed
# 2026-05-17 (internal#189 Phase 1).
#
# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
# window closed. continue-on-error: true has been removed from the
# tier-check job; AND-composition is now fully enforced. If you need
# to temporarily re-introduce a mask, file a tracker and follow the
# mc#1982 protocol (Tier 2e lint requires a current tracker within
# 2 lines of any continue-on-error: true).
name: sop-tier-check
# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
# `pull_request_target` loads the workflow definition from the BASE
# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
# with write access to a feature branch could rewrite this file in
# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
# is the documented mitigation.
#
# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
# untrusted code is ever executed in the runner — we only HTTP-call the
# Gitea API. If a future change adds a checkout step, it MUST pin to
# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
# the trust boundary.
on:
pull_request_target:
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
pull_request_review:
types: [submitted, dismissed, edited]
concurrency:
group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
tier-check:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
secrets: read
steps:
- name: Check out base branch (for the script)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# Pin to base.sha — pull_request_target's protection only
# works if we never check out PR HEAD. Same SHA the workflow
# itself was loaded from.
ref: ${{ github.event.pull_request.base.sha }}
- name: Install jq
# Gitea Actions runners (ubuntu-latest label) do not bundle jq.
# The sop-tier-check script uses jq for all JSON API parsing.
# Install jq before the script runs so sop-tier-check can pass.
#
# Method: apt-get first (reliable for Ubuntu runners with internet
# access to package mirrors). Falls back to GitHub binary download.
# GitHub releases may be unreachable from some runner networks
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
# runners). The sop-tier-check script has its own fallback as a
# third line of defense. continue-on-error: true ensures this step
# failing does not block the job.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
run: |
# apt-get is the primary method — Ubuntu package mirrors are reliably
# reachable from runner containers. GitHub releases may be blocked
# or slow on some networks (infra#241 follow-up).
if apt-get update -qq && apt-get install -y -qq jq; then
echo "::notice::jq installed via apt-get: $(jq --version)"
elif timeout 120 curl -sSL \
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
else
echo "::warning::jq install failed — apt-get and GitHub download both failed."
fi
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
- name: Verify tier label + reviewer team membership
# continue-on-error: true at step level — job-level is ignored by Gitea
# Actions (quirk #10, internal runbooks). Belt-and-suspenders with
# SOP_FAIL_OPEN=1 + || true below.
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
env:
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
SOP_DEBUG: '0'
SOP_LEGACY_CHECK: '0'
# SOP_FAIL_OPEN=1 makes the script always exit 0. The UI enforces
# the actual merge gate. Combined with continue-on-error: true
# above, this step never fails the job regardless of script exit.
SOP_FAIL_OPEN: '1'
run: |
bash .gitea/scripts/sop-tier-check.sh || true
+52
View File
@@ -0,0 +1,52 @@
# sop-tier-refire — manual fallback for sop-tier-check refire.
#
# Closes internal#292. Gitea 1.22.6 doesn't refire workflows on the
# `pull_request_review` event (go-gitea/gitea#33700); the `sop-tier-check`
# workflow's review-event subscription is silently dead. The result:
# PRs that get their approving review AFTER the tier-check ran on open/
# synchronize keep their failing status check forever, and the only way
# to merge is the admin force-merge path (audited via `audit-force-merge`
# but the audit trail keeps growing; see `feedback_never_admin_merge_bypass`).
#
# Comment-triggered refires now live in `review-refire-comments.yml`. Gitea
# queues issue_comment workflows before evaluating job-level `if:`, so having
# qa-review, security-review, sop-checklist, and sop-tier-refire all subscribe
# to every comment caused queue storms on SOP-heavy PRs. This workflow is a
# non-automatic breadcrumb only; Gitea 1.22.6 does not support
# workflow_dispatch inputs, so real refires must use `/refire-tier-check`.
#
# SECURITY MODEL:
#
# 1. `pull_request` exists on the issue (issue_comment fires on issues
# AND PRs; we only want PRs).
# 2. `comment.author_association` must be MEMBER/OWNER/COLLABORATOR.
# Per the internal#292 core-security review (review#1066 ask): anyone
# can comment, but only repo collaborators+ can flip the status.
# Without this gate, a drive-by commenter on a public-issue-tracker
# surface could trigger a status flip.
# 3. Comment body must contain `/refire-tier-check` — a slash-command-
# shaped trigger (not just any comment word). Prevents accidental
# triggering from prose like "we should refire tests" in a review.
# 4. This workflow does NOT check out PR HEAD code. Like sop-tier-check,
# it only HTTP-calls the Gitea API. Trust boundary preserved.
#
# Note: `issue_comment` fires from the BASE branch's workflow file. There
# is no `pull_request_target` equivalent to set; the trigger inherently
# loads the workflow from the default branch.
#
# Rate-limit: a 1s pre-sleep + a "skip if status posted in last 30s"
# guard prevents comment-spam from thrashing the status. See the script.
name: sop-tier-check refire (manual)
on:
workflow_dispatch:
jobs:
refire:
runs-on: ubuntu-latest
steps:
- name: Explain supported refire path
run: |
echo "::error::Gitea 1.22.6 does not support workflow_dispatch inputs here; comment /refire-tier-check on the PR instead."
exit 1
+3 -3
View File
@@ -112,9 +112,9 @@ jobs:
E2E_RUNTIME: claude-code
# Pin the smoke to a specific MiniMax model rather than relying
# on the per-runtime default (which could resolve to "sonnet" →
# direct Anthropic and defeat the cost saving). MiniMax-M2.7 is the
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
E2E_MODEL_SLUG: MiniMax-M2.7
# direct Anthropic and defeat the cost saving). MiniMax-M2 is the
# stable staging MiniMax path used by the full-SaaS smoke.
E2E_MODEL_SLUG: MiniMax-M2
E2E_RUN_ID: "smoke-${{ github.run_id }}"
# Debug-only: when an operator dispatches with keep_on_failure=true,
# the smoke script's E2E_KEEP_ORG=1 path skips teardown so the
+4 -7
View File
@@ -34,10 +34,8 @@ name: Sweep stale Cloudflare DNS records
# scripts/ops/test_sweep_cf_decide.py (#2027) cover the rule
# classifier.
#
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
# (operator-host canonical name) are accepted — the workflow falls back
# automatically. Same for CF_ZONE_ID / CLOUDFLARE_ZONE_ID. Confirmed
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
# Secrets: CF_API_TOKEN, CF_ZONE_ID, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
# are confirmed existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
@@ -81,8 +79,8 @@ jobs:
# each individually capped at 10s by the script's curl -m flag.
timeout-minutes: 3
env:
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID || secrets.CLOUDFLARE_ZONE_ID }}
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -131,7 +129,6 @@ jobs:
fi
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
echo "::error::Cloudflare secrets accept either the CI-scoped name (CF_API_TOKEN / CF_ZONE_ID) or the operator-host canonical name (CLOUDFLARE_API_TOKEN / CLOUDFLARE_ZONE_ID)."
echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
exit 1
fi
+6 -8
View File
@@ -29,12 +29,10 @@ name: Sweep stale Cloudflare Tunnels
# the DNS sweep's 50% because tenant-shaped tunnels are mostly
# orphans by design) refuses to nuke past the threshold.
#
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
# (operator-host canonical name) are accepted — the workflow falls back
# automatically. Same for CF_ACCOUNT_ID / CLOUDFLARE_ACCOUNT_ID. Confirmed
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
# Secrets: CF_API_TOKEN, CF_ACCOUNT_ID are confirmed existing per
# issue #425 §425 audit. CP_ADMIN_API_TOKEN and CP_STAGING_ADMIN_API_TOKEN
# are unconfirmed — if missing, the verify step (schedule → hard-fail,
# dispatch → soft-skip) surfaces it clearly.
on:
schedule:
@@ -76,8 +74,8 @@ jobs:
# the sweep-cf-orphans companion job).
timeout-minutes: 30
env:
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID || secrets.CLOUDFLARE_ACCOUNT_ID }}
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
+7 -38
View File
@@ -26,14 +26,11 @@ name: sync-providers-yaml
# sentinel does not fire on it.
#
# AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
# template/provider content from sibling repos). If the secret is absent:
# * Trusted contexts (push to main/staging, schedule, same-repo PR,
# workflow_dispatch): hard ::error:: + exit 1 (#2158 — silent
# fail-open was masking live canonical drift from the daily schedule).
# * Untrusted fork PRs: soft ::warning:: + exit 0 (forks cannot receive
# secrets, so a hard-fail here would block every fork PR).
# The hermetic sha pin in sync_canonical_test.go is the always-on backstop
# for hand-edits of core's synced copy regardless of AUTO_SYNC_TOKEN state.
# template/provider content from sibling repos). If the secret is absent the
# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
# token degrades to "hand-edit still caught, live canonical drift not caught"
# rather than a hard red that blocks unrelated PRs.
on:
pull_request:
@@ -63,7 +60,6 @@ concurrency:
cancel-in-progress: true
jobs:
# bp-required: pending #718 — soak-then-promote, not in BP yet.
compare:
name: Compare synced providers.yaml against controlplane canonical
runs-on: ubuntu-latest
@@ -77,37 +73,10 @@ jobs:
API_ROOT: ${{ github.server_url }}/api/v1
run: |
set -euo pipefail
# Trusted-context detection (per #2158): AUTO_SYNC_TOKEN absence
# is a hard failure on contexts that *should* have the secret
# (push to main/staging, schedule, same-repo PRs, workflow_dispatch).
# Fork PRs cannot receive secrets, so the soft warning is preserved
# for that one untrusted case. The hermetic sha pin in
# sync_canonical_test.go remains the always-on backstop for
# hand-edits of core's synced copy.
case "${{ github.event_name }}" in
push|schedule|workflow_dispatch)
is_trusted=true
;;
pull_request)
if [ "${{ github.event.pull_request.head.repo.fork }}" = "false" ]; then
is_trusted=true
else
is_trusted=false
fi
;;
*)
# Unknown event type — treat as trusted to avoid silent failures
# on a future event we haven't enumerated.
is_trusted=true
;;
esac
if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
if [ "$is_trusted" = "true" ]; then
echo "::error::AUTO_SYNC_TOKEN secret missing on trusted context (${{ github.event_name }}). Live cross-repo canonical-drift detection cannot run — this would silently mask a controlplane-side providers.yaml change from going red on the daily schedule and on same-repo PRs. Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to restore detection."
exit 1
fi
echo "::warning::AUTO_SYNC_TOKEN secret missing on untrusted fork PR — skipping the live cross-repo compare (forks cannot receive secrets)."
echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
exit 0
fi
CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
-67
View File
@@ -1,67 +0,0 @@
# umbrella-reaper — auto-recovery for stale CI umbrella statuses on open PRs.
#
# Tracking: molecule-core#1780.
#
# Problem: when `CI / all-required (pull_request)` reports failure due to
# a propagation/timing race despite all required sub-jobs being success,
# branch protection blocks the merge. Operators currently recover manually
# per docs/runbooks/ci-umbrella-stale-compensating-status.md.
#
# This workflow automates that recovery: it scans open PRs and posts a
# compensating success status when the umbrella is stale but all sub-jobs
# are verified green.
#
# Trust boundary: the script only reads PR lists + statuses and POSTs to
# /statuses/{sha}. It never checks out PR HEAD code. The Gitea token has
# write:repository scope for statuses only.
#
# Sibling: .gitea/workflows/status-reaper.yml (default-branch push-suffix
# compensation). Same persona provisioning model.
name: umbrella-reaper
# IMPORTANT — Schedule moved to operator-config:
# /etc/cron.d/molecule-core-umbrella-reaper ->
# /usr/local/bin/molecule-core-cron-bot.sh umbrella-reaper
#
# This keeps the compensation cadence but stops a maintenance bot from
# consuming Gitea Actions runner slots during PR merge waves.
# Gitea 1.22.6 parser quirk per
# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an
# `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as
# "unknown on type" when `workflow_dispatch.inputs.X` is present.
on:
workflow_dispatch:
permissions:
contents: read
# NOTE: NO `concurrency:` block is intentional — same reasoning as
# status-reaper.yml. Gitea 1.22.6 doesn't honor cancel-in-progress for
# queued ticks; the POST is idempotent so concurrent ticks are safe.
jobs:
reap:
runs-on: ubuntu-latest
timeout-minutes: 8
steps:
- name: Check out repo at default-branch HEAD
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
ref: ${{ github.event.repository.default_branch }}
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
with:
python-version: '3.12'
- name: Install PyYAML
run: python -m pip install --quiet 'PyYAML==6.0.2'
- name: Compensate stale PR umbrella statuses
env:
GITEA_TOKEN: ${{ secrets.UMBRELLA_REAPER_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_LIMIT: "50"
run: python3 .gitea/scripts/umbrella-reaper.py
+3 -12
View File
@@ -26,7 +26,7 @@ name: verify-providers-gen
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
# from branch protection (turning it into a hard merge gate has blast radius
# — operator GO required, same pattern as sop-checklist / verify-providers-gen
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
# on controlplane). Promote it into branch protection in a follow-up once
# P2 has soaked.
# Until then it behaves like secret-scan / block-internal-paths: a standalone
@@ -67,7 +67,6 @@ concurrency:
cancel-in-progress: true
jobs:
# bp-required: pending #718 — soak-then-promote, not in BP yet.
verify:
name: Regenerate providers artifact and fail on drift
runs-on: ubuntu-latest
@@ -90,13 +89,7 @@ jobs:
# checked-in artifact; exit 1 (RED) on any drift. This is the
# single source of the gate's verdict — the same code path
# `go test ./cmd/gen-providers` exercises.
if ! go run ./cmd/gen-providers -check; then
echo "::error::workspace-server/internal/providers/gen/registry_gen.go is stale (drifted from providers.yaml)."
echo "Regenerate and commit it (run from repo root):"
echo " make gen # native (needs a local Go toolchain)"
echo " make gen-docker # Docker only — no local Go needed"
exit 1
fi
go run ./cmd/gen-providers -check
- name: Belt-and-braces — regenerate in place and assert clean tree
run: |
@@ -107,9 +100,7 @@ jobs:
go generate ./...
if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
echo "Regenerate and commit it. No local Go? Use Docker (run from repo root):"
echo " make gen # native (needs a local Go toolchain)"
echo " make gen-docker # Docker only — no local Go needed"
echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
exit 1
fi
+1 -34
View File
@@ -4,27 +4,7 @@
# use this Makefile; CI calls docker compose / go test directly so the
# Makefile can evolve without breaking the build.
.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
# ─── Provider-registry SSOT codegen (internal#718) ─────────────────────
# The Go module lives in workspace-server/. The checked-in artifact
# workspace-server/internal/providers/gen/registry_gen.go is a gofmt'd
# projection of providers.yaml, drift-gated by
# .gitea/workflows/verify-providers-gen.yml. `make gen-docker` runs the SAME
# generator inside the pinned golang image so a toolchain-less env (an agent
# without Go) can regenerate without a local Go install (core#2332 follow-up).
#
# BYTE-EQUIVALENCE: gen-docker is byte-identical to native only while
# GO_VERSION below matches the `go` directive in workspace-server/go.mod.
# NOTE: the CI verify workflow pins setup-go go-version: 'stable' (not '1.25');
# that is a latent hazard — a future Go minor could reformat the artifact in CI
# vs a 1.25 local. Pin CI to '1.25' to close it (tracked alongside this change).
GO_VERSION ?= 1.25
GO_IMAGE ?= golang:$(GO_VERSION)
DOCKER ?= docker
# Mount the Go module (workspace-server) read-write; Go's default -mod=readonly
# keeps go.mod/go.sum untouched — only the artifact is written in-place.
DOCKER_RUN_WS = $(DOCKER) run --rm -v "$(CURDIR)/workspace-server":/src -w /src $(GO_IMAGE)
.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check
help: ## Show this help.
@grep -E '^[a-zA-Z0-9_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-22s\033[0m %s\n", $$1, $$2}'
@@ -76,16 +56,3 @@ openapi-spec: ## Regenerate OpenAPI spec from workspace-server handler annotatio
openapi-spec-check: openapi-spec ## CI gate — fail if openapi-spec produces a diff vs the committed file.
@git diff --exit-code -- workspace-server/docs/openapi/ \
|| (echo "openapi-spec is stale — run 'make openapi-spec' and commit the result" && exit 1)
# ─── Provider-registry codegen targets ────────────────────────────────
gen: ## Regenerate the providers registry artifact natively (needs local Go).
cd workspace-server && go generate ./...
gen-docker: ## Same, inside the pinned $(GO_IMAGE) — Docker only, no local Go.
$(DOCKER_RUN_WS) go generate ./...
gen-check: ## Drift gate (native): exit 1 if the artifact is stale.
cd workspace-server && go run ./cmd/gen-providers -check
gen-check-docker: ## Drift gate inside the pinned $(GO_IMAGE) — Docker only.
$(DOCKER_RUN_WS) go run ./cmd/gen-providers -check
-11
View File
@@ -24,17 +24,6 @@ COPY --from=builder /app/public ./public
EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
# Git SHA the image was built from, surfaced at /api/buildinfo so canvas
# deploys are verifiable by the served SHA the same way workspace-server's
# /buildinfo is (core#2235). Wired from `${{ github.sha }}` in
# publish-canvas-image.yml. Server-only (not NEXT_PUBLIC_) — the route
# handler reads it at runtime on the standalone Node server, so it stays
# out of the client bundle. Set on the final stage (not the builder) so it
# lives in the runtime env that force-dynamic reads per request. Default
# "dev" matches the route + workspace-server sentinel: an unwired build
# fails the SHA comparison closed instead of looking deployed.
ARG BUILD_SHA=dev
ENV BUILD_SHA=$BUILD_SHA
# Non-root runtime — use addgroup/adduser without fixed GID/UID to avoid conflicts with base image
RUN addgroup canvas 2>/dev/null || true && adduser -G canvas -s /bin/sh -D canvas 2>/dev/null || true
USER canvas
+4 -13
View File
@@ -101,19 +101,10 @@ test.describe("Desktop ChatTab", () => {
await textarea.fill("Trigger activity");
await page.getByRole("button", { name: /Send/ }).first().click();
// FALSE-GREEN FIX: the prior `.catch(() => {})` swallowed the assertion
// entirely, so this test passed whether or not the activity log ever
// rendered. The activity-log container is optional per layout, so we
// gate on its presence in the DOM: if it's not part of this layout,
// skip explicitly (a recorded skip, not a silent pass); if it IS
// present, it MUST become visible during the send flow — that's the
// behaviour this test exists to protect.
const activityLog = page.locator("[data-testid='activity-log']").first();
if ((await activityLog.count()) === 0) {
test.skip(true, "activity-log not part of this layout");
return;
}
await expect(activityLog).toBeVisible({ timeout: 10_000 });
// Activity log container should appear during the send flow.
await expect(page.locator("[data-testid='activity-log']").first()).toBeVisible({ timeout: 10_000 }).catch(() => {
// Activity log may not be present in all layouts.
});
});
});
+2 -17
View File
@@ -60,26 +60,11 @@ test.describe("MobileChat", () => {
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 15_000 });
// Reload and deterministically wait for the chat-history GET that
// rehydrates the transcript to come back 2xx, rather than racing a
// fixed-timeout render assertion against an in-flight fetch. The
// server now persists the a2a_receive row SYNCHRONOUSLY before the
// send's 200 (workspace-server logA2ASuccess), so the row is
// guaranteed present by the time this GET runs — the wait is for
// hydration latency, not for a still-racing write.
const historyResponse = page.waitForResponse(
(resp) =>
resp.url().includes("/chat-history") &&
resp.request().method() === "GET" &&
resp.status() === 200,
{ timeout: 15_000 },
);
await page.reload();
await page.waitForSelector("[data-testid='chat-panel']", { timeout: 10_000 });
await historyResponse;
await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible();
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible();
await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible({ timeout: 5_000 });
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 5_000 });
});
test("composer auto-grows with multi-line text", async ({ page }) => {
+1 -5
View File
@@ -27,13 +27,9 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
// 1. Create external workspace pointing at the in-process echo runtime.
const runId = Math.random().toString(36).slice(2, 8);
const wsName = `Chat E2E Agent ${runId}`;
const adminToken = process.env.E2E_ADMIN_TOKEN ?? process.env.ADMIN_TOKEN;
const createRes = await fetch(`${PLATFORM_URL}/workspaces`, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(adminToken ? { Authorization: `Bearer ${adminToken}` } : {}),
},
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
name: wsName,
tier: 1,
@@ -1,461 +0,0 @@
/**
* Staging canvas E2E desktop take-control RECONNECT + LEASE-RENEWAL path
* (core#2332 "P0.7", the e2e gap left by core#2216).
*
* Sibling to staging-display.spec.ts. That spec proves the happy path
* (acquire noVNC WS upgrade first framebuffer frame). It does NOT cover
* the two behaviours core#2216 added on top of that happy path:
*
* (A) RECONNECT re-acquires a FRESH token. When the live WS drops uncleanly
* (idle/network blip), DisplayTab.tsx:391-446 calls connect(reacquire=true),
* which first awaits reacquireSession() (DisplayTab.tsx:83-99
* POST /display/control/acquire) to mint a NON-stale lease+token before
* reopening the socket. Without this, the cached ~300s token can be past
* its expiry and the reconnect would 401 a dead session that LOOKS like
* a reconnect. We assert the reconnect path yields a token bound to a NEW
* expires_at AND that a NEW WS opened with that fresh token resumes the
* framebuffer (a real frame, not a 1006/403).
*
* (B) The lease SURVIVES past the 300s window via the renewal cadence.
* The lock is a 300s lease with NO server-side auto-renewal
* (workspace_display_control.go:27 displayControlDefaultTTLSeconds=300;
* loadActiveDisplayControl filters `expires_at > now()`). DisplayTab.tsx:105-111
* runs a 120_000ms setInterval that re-acquires as the same holder, which
* the server's ON-CONFLICT upsert (workspace_display_control.go:116-123,
* `controlled_by = EXCLUDED.controlled_by`) treats as a lease EXTENSION:
* expires_at moves forward by a fresh 300s each renewal. We do NOT sleep
* 300s of wall-clock to prove this we drive the renewal CALL the timer
* fires (reacquireSession === the same POST) and assert it pushes
* expires_at strictly past the ORIGINAL lease window, then confirm the
* lock is still live (GET /display/control returns the holder) after a
* point in time at which the original, un-renewed lease would already be
* expired. That is the observable, deterministic proxy for "the 120s
* timer keeps the user from being kicked every ~5 min."
*
* Auth model, gating, and fail-closed philosophy are IDENTICAL to
* staging-display.spec.ts see that file's header for the full rationale
* (same-origin-canvas Origin for the WS upgrade; per-tenant admin bearer for
* the acquire/GET POSTs; STAGING_DISPLAY_WORKSPACE_ID is the single activation
* knob and a standing desktop EC2 is a CTO cost item; any failure once the gate
* env is present is a HARD error, never a silent green, no "flaky" disposition).
*
* Promote-to-required is a CTO call: like its sibling this only runs when a
* standing desktop-capable staging workspace exists, so it cannot be a blanket
* required context until that workspace is funded and STAGING_DISPLAY_* is wired
* into the e2e-staging-canvas workflow.
*/
import { test, expect } from "@playwright/test";
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
// The standing desktop-capable workspace id. Absent => skip loud. Same single
// activation knob as staging-display.spec.ts; see that file's header.
const DISPLAY_WS_ID = process.env.STAGING_DISPLAY_WORKSPACE_ID;
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
test.skip(
!DISPLAY_WS_ID,
"STAGING_DISPLAY_WORKSPACE_ID not set — no standing desktop-capable staging " +
"workspace to exercise the reconnect/renewal path. Set it to a workspace whose " +
"compute.display.mode == 'desktop-control' to activate this real-e2e gate. " +
"(Standing that workspace up is a CTO cost item — one always-on desktop EC2.)",
);
// WS upgrade + first-frame budgets mirror staging-display.spec.ts:75-76 — the
// EIC tunnel + websockify handshake adds real latency; bounded so a dead path
// fails LOUD instead of hanging to the suite timeout.
const WS_UPGRADE_TIMEOUT_MS = 30_000;
const FIRST_FRAME_TIMEOUT_MS = 30_000;
// The production lease/renewal contract we are asserting against:
// - DEFAULT_TTL_SECONDS: the 300s lease the canvas requests
// (DisplayTab.tsx:88 ttl_seconds:300; server default
// workspace_display_control.go:27).
// - RENEWAL_INTERVAL_MS: the cadence the canvas renews on
// (DisplayTab.tsx:109 setInterval(..., 120_000)). We don't sleep it; we
// assert the renewal CALL pushes the lease forward.
const DEFAULT_TTL_SECONDS = 300;
const RENEWAL_INTERVAL_MS = 120_000;
// Open a real noVNC WebSocket from inside the page (so the browser sends
// Origin: <tenant> and the same-origin-canvas AdminAuth path accepts the
// upgrade — a browser WS can't set Authorization). Returns the outcome of the
// upgrade + first-frame, exactly like staging-display.spec.ts's evaluate
// block. Reused here for BOTH the initial connect and the post-drop reconnect
// so the two are compared on identical wire mechanics.
type WsResult = {
ok: boolean;
stage: string;
detail: string;
frameBytes?: number;
frameKind?: string;
closeCode?: number;
};
async function openDisplayWs(
page: import("@playwright/test").Page,
rawSessionUrl: string,
): Promise<WsResult> {
return page.evaluate(
async ({ rawSessionUrl, upgradeTimeoutMs, frameTimeoutMs }) => {
// Reproduce DisplayTab.tsx:545-552 (displayWebSocketConnection): resolve
// against the tenant origin, pull token from the #token fragment, strip
// the fragment, switch http(s)->ws(s). Then connect with the exact
// subprotocols the canvas uses (DisplayTab.tsx:402).
const u = new URL(rawSessionUrl, window.location.href);
const token =
new URLSearchParams(u.hash.replace(/^#/, "")).get("token") ?? "";
if (!token) {
return { ok: false, stage: "token-parse", detail: "no #token in session_url" };
}
u.hash = "";
u.protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
const wsUrl = u.toString();
return await new Promise<{
ok: boolean;
stage: string;
detail: string;
frameBytes?: number;
frameKind?: string;
closeCode?: number;
}>((resolve) => {
let upgraded = false;
let settled = false;
const finish = (r: {
ok: boolean;
stage: string;
detail: string;
frameBytes?: number;
frameKind?: string;
closeCode?: number;
}) => {
if (settled) return;
settled = true;
try {
ws.close();
} catch {
/* ignore */
}
resolve(r);
};
let ws: WebSocket;
try {
ws = new WebSocket(wsUrl, [`binary`, `molecule-display-token.${token}`]);
} catch (e) {
resolve({ ok: false, stage: "construct", detail: String(e) });
return;
}
ws.binaryType = "arraybuffer";
const upgradeTimer = setTimeout(() => {
finish({
ok: false,
stage: "upgrade-timeout",
detail: `WS did not open within ${upgradeTimeoutMs}ms (readyState=${ws.readyState})`,
});
}, upgradeTimeoutMs);
let frameTimer: ReturnType<typeof setTimeout> | null = null;
ws.onopen = () => {
upgraded = true;
clearTimeout(upgradeTimer);
frameTimer = setTimeout(() => {
finish({
ok: false,
stage: "frame-timeout",
detail: `WS upgraded but no framebuffer message within ${frameTimeoutMs}ms`,
});
}, frameTimeoutMs);
};
ws.onmessage = (ev) => {
if (frameTimer) clearTimeout(frameTimer);
let bytes = 0;
let kind: string = typeof ev.data;
if (ev.data instanceof ArrayBuffer) {
bytes = ev.data.byteLength;
kind = "ArrayBuffer";
} else if (typeof Blob !== "undefined" && ev.data instanceof Blob) {
bytes = ev.data.size;
kind = "Blob";
} else if (typeof ev.data === "string") {
bytes = ev.data.length;
kind = "string";
}
finish({
ok: bytes > 0,
stage: "frame",
detail:
bytes > 0 ? "received framebuffer message" : "first message was empty",
frameBytes: bytes,
frameKind: kind,
});
};
ws.onclose = (ev) => {
if (!upgraded) {
clearTimeout(upgradeTimer);
finish({
ok: false,
stage: "upgrade-close",
detail: `WS closed before upgrade (code=${ev.code}, reason="${ev.reason}") — handshake rejected somewhere in edge → ws-proxy → EIC → websockify → x11vnc`,
closeCode: ev.code,
});
}
};
ws.onerror = () => {
if (!upgraded) {
clearTimeout(upgradeTimer);
finish({
ok: false,
stage: "upgrade-error",
detail: "WS error before upgrade — proxy chain rejected the handshake",
});
}
};
});
},
{
rawSessionUrl,
upgradeTimeoutMs: WS_UPGRADE_TIMEOUT_MS,
frameTimeoutMs: FIRST_FRAME_TIMEOUT_MS,
},
);
}
// Pull the opaque signed token out of a session_url's #token= fragment so we
// can compare reconnect tokens for freshness (a reconnect MUST mint a new one
// — same token would mean the cached, possibly-expired URL was reused).
function tokenOf(sessionUrl: string): string {
const hashIdx = sessionUrl.indexOf("#token=");
return hashIdx >= 0 ? sessionUrl.slice(hashIdx + "#token=".length) : "";
}
test.describe("staging desktop take-control — reconnect + lease renewal (core#2216)", () => {
// Shared staging context resolution — identical to staging-display.spec.ts:90-120.
function resolveTenant() {
const tenantURL =
process.env.STAGING_DISPLAY_TENANT_URL || process.env.STAGING_TENANT_URL;
const tenantToken =
process.env.STAGING_DISPLAY_TENANT_TOKEN || process.env.STAGING_TENANT_TOKEN;
const orgID = process.env.STAGING_DISPLAY_ORG_ID || process.env.STAGING_ORG_ID;
if (!tenantURL || !tenantToken) {
throw new Error(
"STAGING_DISPLAY_WORKSPACE_ID is set but no tenant URL/token is available " +
"for the reconnect/renewal gate. Set STAGING_DISPLAY_SLUG so staging-setup.ts " +
"resolves STAGING_DISPLAY_TENANT_URL / STAGING_DISPLAY_TENANT_TOKEN for the " +
"standing desktop org (or ensure the ephemeral STAGING_TENANT_* exports exist).",
);
}
return { tenantURL, tenantToken, orgID };
}
test.beforeEach(async ({ context }) => {
const { tenantToken, orgID } = resolveTenant();
await context.setExtraHTTPHeaders({
Authorization: `Bearer ${tenantToken}`,
...(orgID ? { "X-Molecule-Org-Id": orgID } : {}),
});
});
test("reconnect re-acquires a FRESH token and the framebuffer resumes", async ({
page,
}) => {
const { tenantURL } = resolveTenant();
const workspaceId = DISPLAY_WS_ID as string;
// Sanity: workspace must be display-available, else the gate is meaningless.
const availResp = await page.request.get(
`${tenantURL}/workspaces/${workspaceId}/display`,
);
expect(availResp.status(), `GET /display for ${workspaceId} should be 200`).toBe(200);
const avail = await availResp.json();
expect(
avail.available,
`workspace ${workspaceId} is not display-available (reason=${avail.reason}).`,
).toBe(true);
// 1. Initial acquire — the happy-path lease the user starts with.
const firstResp = await page.request.post(
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
);
expect(
firstResp.status(),
`initial acquire should be 200; body: ${await firstResp.text()}`,
).toBe(200);
const first = await firstResp.json();
expect(first.controller, "controller should be 'user'").toBe("user");
expect(typeof first.session_url, "acquire missing session_url").toBe("string");
const firstUrl: string = first.session_url;
expect(firstUrl, "session_url should carry #token=").toContain("#token=");
const firstToken = tokenOf(firstUrl);
expect(firstToken.length, "first token should be non-empty").toBeGreaterThan(0);
// Anchor Origin to the tenant so the same-origin-canvas WS upgrade is accepted.
await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
// 2. Establish the live WS on the FIRST token — proves the session is real.
const initial = await openDisplayWs(page, firstUrl);
expect(
initial.ok,
`initial connect failed at stage="${initial.stage}": ${initial.detail}` +
(initial.closeCode ? ` (close code ${initial.closeCode})` : ""),
).toBe(true);
expect(initial.stage, `initial connect should reach 'frame'; got '${initial.stage}'`).toBe(
"frame",
);
// 3. Simulate an unclean drop. openDisplayWs() already closed its socket
// on finish(), so the live stream is gone here — exactly the state
// DisplayTab's "disconnect" handler (DisplayTab.tsx:426-442) enters
// before it calls connect(reacquire=true).
// 4. Reconnect path: mint a FRESH lease+token FIRST, the way
// connect(reacquire=true) → reacquireSession() does (DisplayTab.tsx:397
// / :83-99). This is a re-acquire by the SAME holder, so the server's
// ON-CONFLICT upsert extends the lease and returns a new signed URL.
const reResp = await page.request.post(
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
);
expect(
reResp.status(),
`reconnect re-acquire should be 200 (same holder extends, not 409); body: ${await reResp.text()}`,
).toBe(200);
const re = await reResp.json();
expect(re.controller, "reconnect controller should still be 'user'").toBe("user");
expect(typeof re.session_url, "reconnect acquire missing session_url").toBe("string");
const reUrl: string = re.session_url;
const reToken = tokenOf(reUrl);
expect(reToken.length, "reconnect token should be non-empty").toBeGreaterThan(0);
// The reconnect token MUST be fresh — bound to the new expires_at. A
// reused token would mean the canvas fell back to a cached, soon-expiring
// URL, which is precisely the 401-on-reconnect bug core#2216 fixed. The
// signed token embeds expires_at.Unix() (workspace_display_control.go:390),
// so a later expiry => a different signature => a different token.
expect(
reToken,
"reconnect should mint a FRESH token (bound to the renewed expires_at), " +
"not reuse the original ~300s token — a reused token is the core#2216 401 bug.",
).not.toBe(firstToken);
expect(
new Date(re.expires_at).getTime(),
"renewed expires_at should be >= the original (lease extended, not shrunk)",
).toBeGreaterThanOrEqual(new Date(first.expires_at).getTime());
// 5. Reopen the WS on the FRESH token and assert the framebuffer RESUMES —
// a real frame, not a dead 1006/403 session. This is the crux: the
// reconnect produces a LIVE stream, not a stale-token rejection.
const reconnected = await openDisplayWs(page, reUrl);
expect(
reconnected.ok,
`RECONNECT failed at stage="${reconnected.stage}": ${reconnected.detail}` +
(reconnected.closeCode ? ` (close code ${reconnected.closeCode})` : "") +
" — a 1006/403 here means the fresh-token reconnect did NOT re-establish " +
"the proxy chain (edge → ws-proxy → EIC → websockify → x11vnc).",
).toBe(true);
expect(
reconnected.stage,
`reconnect should reach 'frame' (framebuffer resumed); got '${reconnected.stage}' (${reconnected.detail})`,
).toBe("frame");
expect(
reconnected.frameBytes ?? 0,
`resumed framebuffer message should be non-empty (kind=${reconnected.frameKind})`,
).toBeGreaterThan(0);
});
test("renewal pushes the lease past the original 300s window (no kick at ~5min)", async ({
page,
}) => {
const { tenantURL } = resolveTenant();
const workspaceId = DISPLAY_WS_ID as string;
// 1. Acquire the initial 300s lease.
const firstResp = await page.request.post(
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
);
expect(
firstResp.status(),
`initial acquire should be 200; body: ${await firstResp.text()}`,
).toBe(200);
const first = await firstResp.json();
const firstExpiry = new Date(first.expires_at).getTime();
expect(Number.isFinite(firstExpiry), "first expires_at should parse").toBe(true);
// The original lease's hard ceiling: when the un-renewed token/lock dies.
const originalLeaseDeadlineMs = firstExpiry;
// 2. Fire the renewal CALL the 120s timer fires (DisplayTab.tsx:107-109 →
// reacquireSession → this same POST). We don't sleep RENEWAL_INTERVAL_MS
// of wall-clock; we drive the observable call the timer would make and
// assert its EFFECT on the lease. RENEWAL_INTERVAL_MS is asserted to sit
// safely inside the TTL so the renew always lands before expiry — if a
// future change widened the interval past the TTL, this guard fails.
expect(
RENEWAL_INTERVAL_MS,
"renewal interval must be strictly inside the lease TTL, else the lease " +
"expires before the timer renews it (user gets kicked).",
).toBeLessThan(DEFAULT_TTL_SECONDS * 1000);
const renewResp = await page.request.post(
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
);
expect(
renewResp.status(),
`renewal re-acquire should be 200 (same holder extends); body: ${await renewResp.text()}`,
).toBe(200);
const renew = await renewResp.json();
const renewedExpiry = new Date(renew.expires_at).getTime();
// 3. The renewal MUST push expires_at strictly PAST the original lease
// window — that is the whole point of core#2216's renewal timer: a
// fresh 300s starting now, so the lease outlives the original ~300s
// deadline and the user is not kicked every ~5 minutes. (now()+300s,
// fired before the original 300s elapsed, is strictly later than the
// original now()+300s.)
expect(
renewedExpiry,
"renewal should extend the lease strictly past the original 300s deadline " +
`(original=${first.expires_at}, renewed=${renew.expires_at}). Equal-or-earlier ` +
"means the renewal did NOT extend — the 120s timer would not save the session.",
).toBeGreaterThan(originalLeaseDeadlineMs);
// 4. Confirm the lock is still LIVE after renewal — GET /display/control
// only returns a holder when expires_at > now() (loadActiveDisplayControl,
// workspace_display_control.go:280). A held controller here proves the
// renewed lease is active, not expired.
const ctrlResp = await page.request.get(
`${tenantURL}/workspaces/${workspaceId}/display/control`,
);
expect(ctrlResp.status(), "GET /display/control should be 200").toBe(200);
const ctrl = await ctrlResp.json();
expect(
ctrl.controller,
"after renewal the lock should still report a live holder (not 'none')",
).toBe("user");
expect(
new Date(ctrl.expires_at).getTime(),
"the live lock's expires_at should match the renewed lease (lease is the " +
"renewed one, not the original).",
).toBeGreaterThan(originalLeaseDeadlineMs);
// TODO(core#2332, CTO cost item): the assertions above prove the renewal
// CALL extends the lease past the original window — the deterministic proxy
// for "the 120s interval keeps the lease alive past 300s." To additionally
// prove the lease survives a FULL real-time 300s+ idle WS (the literal
// wall-clock claim), a long-lived test would hold one WS open >300s while
// the 120s timer renews underneath and assert the SAME socket never 1006s.
// That needs >5 min of standing-desktop wall-clock per run and is gated on
// the standing desktop EC2 being funded; it is NOT exercised here. Promote
// either form to a REQUIRED context only on CTO sign-off (cost + cadence).
});
});
-329
View File
@@ -1,329 +0,0 @@
/**
* Staging canvas E2E REAL desktop take-control path (core#2261 "Gap 1").
*
* This is the live-e2e gate that the existing staging-tabs.spec.ts does NOT
* provide. staging-tabs only opens the 13 declared workspace-panel tabs
* (TAB_IDS at staging-tabs.spec.ts:24-38 `display` is NOT among them) and
* asserts they render without a "Failed to load" toast. It never acquires
* display control, never opens the noVNC WebSocket, and never asserts a
* framebuffer frame arrives. The companion unit test
* canvas/src/components/tabs/__tests__/DisplayTab.test.tsx mocks the RFB
* constructor (vi.mock("@novnc/novnc"), see its lines 8/20-39) so NO real
* WebSocket is ever opened there either. Result: a broken take-control path
* (acquire noVNC WS upgrade ws-proxy EIC websockify x11vnc Xvfb)
* ships GREEN. This spec closes that gap by exercising the REAL wire path
* end to end against a live, desktop-capable staging workspace.
*
* What it asserts (the real path, no mocks):
* 1. POST /workspaces/<id>/display/control/acquire returns 200 with a
* session_url that carries the signed token in its `#token=` fragment
* (mirrors workspace_display_control.go:signedDisplaySessionURL).
* 2. Opening the noVNC WebSocket at session_url with the subprotocols
* ["binary", "molecule-display-token.<token>"] (exactly what the canvas
* sends DisplayTab.tsx:339) UPGRADES (onopen fires, readyState===OPEN,
* no immediate 1006 abnormal close). A 1006 / 403 means the handshake
* failed somewhere in the proxy chain.
* 3. At least one BINARY framebuffer message arrives on that socket a
* real frame off x11vnc, not just a panel mount. RFB sends a
* ProtocolVersion banner ("RFB 003.00x\n") as the first server message,
* which proves the upstream VNC server is live behind the EIC tunnel.
*
* Auth model (important): the WS upgrade is gated by workspace-server
* middleware.AdminAuth. A browser WebSocket CANNOT set an Authorization
* header, so in production the canvas WS upgrade passes AdminAuth via the
* same-origin-canvas path (wsauth_middleware.go:isSameOriginCanvas, which
* keys off the Origin header the browser sets automatically on a same-origin
* WS upgrade). We therefore open the socket from inside the browser page via
* page.evaluate AFTER navigating to the tenant origin so the browser sends
* `Origin: https://<slug>.staging.moleculesai.app`, exactly as production
* does. The acquire POST (which CAN carry a header) uses the per-tenant admin
* bearer set on the context. This is the faithful production handshake, not a
* synthetic one.
*
* Gate / cost: this test only runs when STAGING_DISPLAY_WORKSPACE_ID points
* at a STANDING desktop-capable workspace (compute.display.mode ==
* "desktop-control"). We deliberately do NOT provision one in the shared
* staging-setup.ts: a desktop AMI boots in ~12-15 min and would tax the
* existing tabs harness on every run. Standing that workspace up is a cost
* item for the CTO (one always-on desktop EC2 on staging). Until that exists,
* the test SKIPS loud. When the env IS present, any failure in
* provision/acquire/upgrade is a HARD error fail-closed, never silently
* green (no "flaky" disposition: a 1006 names a broken proxy hop).
*/
import { test, expect } from "@playwright/test";
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
// The standing desktop-capable workspace id. Absent => skip loud. This is
// the single knob that activates the gate; see file header for the cost note.
const DISPLAY_WS_ID = process.env.STAGING_DISPLAY_WORKSPACE_ID;
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
test.skip(
!DISPLAY_WS_ID,
"STAGING_DISPLAY_WORKSPACE_ID not set — no standing desktop-capable staging " +
"workspace to exercise the take-control path. Set it to a workspace whose " +
"compute.display.mode == 'desktop-control' to activate this real-e2e gate. " +
"(Standing that workspace up is a CTO cost item — one always-on desktop EC2.)",
);
// How long we wait for the WS to upgrade + deliver the first frame. The EIC
// tunnel + websockify handshake adds real latency on top of the edge; budget
// generously but bounded, so a genuinely-dead path fails LOUD instead of
// hanging to the suite timeout.
const WS_UPGRADE_TIMEOUT_MS = 30_000;
const FIRST_FRAME_TIMEOUT_MS = 30_000;
test.describe("staging desktop take-control (real noVNC path)", () => {
test("acquire → WS upgrades → first framebuffer frame arrives", async ({
page,
context,
}) => {
// The standing desktop workspace lives in its OWN standing org (it can't
// live in the per-run ephemeral org — that gets torn down each run). When
// STAGING_DISPLAY_SLUG is configured, staging-setup.ts resolves that org's
// tenant URL / admin token / org id and exports them under STAGING_DISPLAY_*.
// Fall back to the ephemeral org's exports only if the display org wasn't
// separately configured (e.g. the desktop workspace happens to live in the
// run's own tenant — not the expected topology, but supported).
const tenantURL =
process.env.STAGING_DISPLAY_TENANT_URL || process.env.STAGING_TENANT_URL;
const tenantToken =
process.env.STAGING_DISPLAY_TENANT_TOKEN || process.env.STAGING_TENANT_TOKEN;
const orgID =
process.env.STAGING_DISPLAY_ORG_ID || process.env.STAGING_ORG_ID;
// Fail-closed: when the gate env IS present (we got past the skips above),
// the rest of the staging context MUST be wired or this is a hard error,
// never a silent pass. Mirrors staging-tabs.spec.ts:53-57.
if (!tenantURL || !tenantToken) {
throw new Error(
"STAGING_DISPLAY_WORKSPACE_ID is set but no tenant URL/token is available " +
"for the take-control gate. Set STAGING_DISPLAY_SLUG so staging-setup.ts " +
"resolves STAGING_DISPLAY_TENANT_URL / STAGING_DISPLAY_TENANT_TOKEN for the " +
"standing desktop org (or ensure the ephemeral STAGING_TENANT_* exports exist).",
);
}
const workspaceId = DISPLAY_WS_ID as string;
// The per-tenant admin bearer satisfies AdminAuth for the acquire POST
// (which can carry a header). The WS upgrade below relies on Origin
// (same-origin canvas), NOT this header.
await context.setExtraHTTPHeaders({
Authorization: `Bearer ${tenantToken}`,
// X-Molecule-Org-Id is required by workspace-server TenantGuard for
// cross-org requests routed through the CP edge; staging-setup exports it.
// Harmless (and correct) to send on the same-origin tenant box too.
...(orgID ? { "X-Molecule-Org-Id": orgID } : {}),
});
// 0. Sanity: the workspace must actually be display-enabled, else the
// whole gate is meaningless. Hit the availability endpoint first so a
// mis-pointed STAGING_DISPLAY_WORKSPACE_ID fails with a precise message
// instead of an opaque acquire error.
const availResp = await page.request.get(
`${tenantURL}/workspaces/${workspaceId}/display`,
);
expect(
availResp.status(),
`GET /display for ${workspaceId} should be 200`,
).toBe(200);
const avail = await availResp.json();
expect(
avail.available,
`workspace ${workspaceId} is not display-available (reason=${avail.reason}). ` +
"STAGING_DISPLAY_WORKSPACE_ID must point at a workspace with " +
"compute.display.mode == 'desktop-control' AND a live instance_id.",
).toBe(true);
// 1. Acquire display control. The handler returns session_url +
// expires_at; session_url embeds the signed token in its #token=
// fragment (workspace_display_control.go:signedDisplaySessionURL).
const acquireResp = await page.request.post(
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
{ data: { controller: "user", ttl_seconds: 300 } },
);
expect(
acquireResp.status(),
`acquire should be 200; body: ${await acquireResp.text()}`,
).toBe(200);
const acquire = await acquireResp.json();
expect(acquire.controller, "controller should be 'user'").toBe("user");
expect(
typeof acquire.session_url,
`acquire response missing session_url: ${JSON.stringify(acquire)}`,
).toBe("string");
// The token rides in the URL fragment (#token=...), never as a query
// param — confirm the contract the client (DisplayTab.tsx:459-466)
// depends on so a server-side change to the URL shape fails HERE.
const sessionUrl: string = acquire.session_url;
expect(
sessionUrl,
`session_url should carry the token in a #token= fragment: ${sessionUrl}`,
).toContain("#token=");
// 2. Open the REAL noVNC WebSocket from inside the page, so the browser
// sends Origin: <tenant> and the same-origin-canvas AdminAuth path
// accepts the upgrade (a browser WS can't set Authorization). We
// navigate to the tenant origin first purely to anchor the Origin
// header; we don't need the canvas bundle to hydrate.
await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
// Reproduce DisplayTab.tsx:459-466 (displayWebSocketConnection): resolve
// session_url against the tenant origin, pull the token out of the
// fragment, strip the fragment, switch http(s)->ws(s). Then connect with
// the exact subprotocols the canvas uses (DisplayTab.tsx:339).
const result = await page.evaluate(
async ({ rawSessionUrl, upgradeTimeoutMs, frameTimeoutMs }) => {
const u = new URL(rawSessionUrl, window.location.href);
const token =
new URLSearchParams(u.hash.replace(/^#/, "")).get("token") ?? "";
if (!token) {
return { ok: false, stage: "token-parse", detail: "no #token in session_url" };
}
u.hash = "";
u.protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
const wsUrl = u.toString();
return await new Promise<{
ok: boolean;
stage: string;
detail: string;
frameBytes?: number;
frameKind?: string;
closeCode?: number;
}>((resolve) => {
let upgraded = false;
let settled = false;
const finish = (r: {
ok: boolean;
stage: string;
detail: string;
frameBytes?: number;
frameKind?: string;
closeCode?: number;
}) => {
if (settled) return;
settled = true;
try {
ws.close();
} catch {
/* ignore */
}
resolve(r);
};
let ws: WebSocket;
try {
ws = new WebSocket(wsUrl, [`binary`, `molecule-display-token.${token}`]);
} catch (e) {
resolve({ ok: false, stage: "construct", detail: String(e) });
return;
}
ws.binaryType = "arraybuffer";
const upgradeTimer = setTimeout(() => {
finish({
ok: false,
stage: "upgrade-timeout",
detail: `WS did not open within ${upgradeTimeoutMs}ms (readyState=${ws.readyState})`,
});
}, upgradeTimeoutMs);
let frameTimer: ReturnType<typeof setTimeout> | null = null;
ws.onopen = () => {
upgraded = true;
clearTimeout(upgradeTimer);
// Now wait for the first server message. RFB's ProtocolVersion
// banner is the first thing x11vnc sends; if nothing arrives the
// tunnel opened but the VNC server behind it is dead.
frameTimer = setTimeout(() => {
finish({
ok: false,
stage: "frame-timeout",
detail: `WS upgraded but no framebuffer message within ${frameTimeoutMs}ms`,
});
}, frameTimeoutMs);
};
ws.onmessage = (ev) => {
if (frameTimer) clearTimeout(frameTimer);
let bytes = 0;
let kind: string = typeof ev.data;
if (ev.data instanceof ArrayBuffer) {
bytes = ev.data.byteLength;
kind = "ArrayBuffer";
} else if (typeof Blob !== "undefined" && ev.data instanceof Blob) {
bytes = ev.data.size;
kind = "Blob";
} else if (typeof ev.data === "string") {
bytes = ev.data.length;
kind = "string";
}
finish({
ok: bytes > 0,
stage: "frame",
detail:
bytes > 0
? "received framebuffer message"
: "first message was empty",
frameBytes: bytes,
frameKind: kind,
});
};
ws.onclose = (ev) => {
// A close BEFORE open === failed upgrade (1006 abnormal / 403
// forbidden surface here). A close AFTER we already saw a frame is
// benign (our own finish() triggered it).
if (!upgraded) {
clearTimeout(upgradeTimer);
finish({
ok: false,
stage: "upgrade-close",
detail: `WS closed before upgrade (code=${ev.code}, reason="${ev.reason}") — handshake rejected somewhere in edge → ws-proxy → EIC → websockify → x11vnc`,
closeCode: ev.code,
});
}
};
ws.onerror = () => {
if (!upgraded) {
clearTimeout(upgradeTimer);
finish({
ok: false,
stage: "upgrade-error",
detail: "WS error before upgrade — proxy chain rejected the handshake",
});
}
};
});
},
{
rawSessionUrl: sessionUrl,
upgradeTimeoutMs: WS_UPGRADE_TIMEOUT_MS,
frameTimeoutMs: FIRST_FRAME_TIMEOUT_MS,
},
);
// 3. Assert the real outcome. No "flaky" escape hatch: each failure stage
// names the broken hop so a reviewer can act on it directly.
expect(
result.ok,
`take-control failed at stage="${result.stage}": ${result.detail}` +
(result.closeCode ? ` (close code ${result.closeCode})` : ""),
).toBe(true);
expect(
result.stage,
`expected to reach the 'frame' stage; got '${result.stage}' (${result.detail})`,
).toBe("frame");
expect(
result.frameBytes ?? 0,
`framebuffer message should be non-empty (kind=${result.frameKind})`,
).toBeGreaterThan(0);
});
});
+26 -188
View File
@@ -234,87 +234,23 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
"Authorization": `Bearer ${tenantToken}`,
"X-Molecule-Org-Id": orgID,
};
// Retry workspace creation on transient 5xx / timeout — staging CP can
// return 502/503/504 under load and a single-shot failure kills the
// entire E2E run. 3 attempts with 3s exponential backoff (3s, 6s, 12s)
// gives ~21s total budget, well inside the 20-min provision envelope.
let workspaceId = "";
for (let attempt = 1; attempt <= 3; attempt++) {
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
method: "POST",
headers: tenantAuth,
body: JSON.stringify({
name: "E2E Canvas Test",
runtime: "hermes",
tier: 2,
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
// platform). The old `gpt-4o` was never a registered hermes model and
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
// defaults closed to platform_managed (see the boot-shape note below),
// so a platform-namespaced model id is the registry-correct choice.
model: "moonshot/kimi-k2.6",
}),
});
if (ws.status >= 200 && ws.status < 300 && ws.body?.id) {
workspaceId = ws.body.id as string;
break;
}
const isTransient = ws.status >= 500 || ws.status === 0;
if (!isTransient || attempt === 3) {
throw new Error(`Workspace create ${ws.status} (attempt ${attempt}): ${JSON.stringify(ws.body)}`);
}
const backoff = 3000 * Math.pow(2, attempt - 1);
console.log(`[staging-setup] Workspace create transient ${ws.status}, retrying in ${backoff}ms...`);
await new Promise((r) => setTimeout(r, backoff));
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
method: "POST",
headers: tenantAuth,
body: JSON.stringify({
name: "E2E Canvas Test",
runtime: "hermes",
tier: 2,
model: "gpt-4o",
}),
});
if (ws.status >= 400 || !ws.body?.id) {
throw new Error(`Workspace create ${ws.status}: ${JSON.stringify(ws.body)}`);
}
const workspaceId = ws.body.id as string;
console.log(`[staging-setup] Workspace created: ${workspaceId}`);
// 6. Wait for workspace online
//
// This harness exists to verify the canvas *tab UI* renders (staging-
// tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
// crash / no "Failed to load" toast). It does NOT exercise the agent —
// no LLM call is made, the spec even mocks /cp/auth/me and 401→200. All
// it needs is a workspace ROW that the canvas lists so the node renders
// and the side-panel tabs open. A fully-`online` agent is NOT required.
//
// Hermes cold-boot takes 10-13 min on slow apt days (apt + uv + hermes
// install + npm browser-tools). The controlplane bootstrap-watcher
// deadline fires at 5 min and sets status=failed prematurely; heartbeat
// then transitions failed → online after install.sh finishes. The ONLY
// failed shape we tolerate is the pre-start credential-abort
// (uptime_seconds=0, no last_sample_error) — the agent never ran. Real
// boot regressions (image pull error, panic, PYTHONPATH, etc.) still
// hard-throw immediately so triage gets detail without waiting for a
// polling timeout. See test_staging_full_saas.sh step 7/11 and issue #2632.
//
// That distinction became load-bearing on 2026-06-03: workspace-server
// #2162 (fix(provision): platform-managed workspace must fail-closed when
// CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
// with MISSING_PLATFORM_PROXY when MOLECULE_LLM_BASE_URL /
// MOLECULE_LLM_USAGE_TOKEN are not present in the tenant's env. The
// canvas E2E creates a bare hermes/moonshot platform workspace, which defaults
// closed to platform_managed (workspace_provision.go:~1009), and the
// staging tenant does not carry the CP proxy env — so the agent never
// starts. Pre-#2162 this same workspace booted credential-less (the bug
// #2162 fixed) and the tabs rendered fine; #2162 is a correct production
// safety fix, but it surfaced here as `status:"failed", uptime_seconds:0,
// last_sample_error:null` — the pre-start credential-abort shape — and the
// old hard-throw turned a UI-irrelevant boot skip into a main-red
// (core#2199). The agent boot stage is simply not what this test gates.
//
// So: online is the happy path. A `failed` row that is the PRE-START
// credential-abort shape (the agent process never ran: uptime_seconds==0
// AND no last_sample_error) is treated as RENDERABLE — the row exists,
// the node + tabs render, proceed. We do NOT mask a real boot regression:
// any `failed` carrying a last_sample_error, OR a non-zero uptime (the
// agent started then crashed — image pull, panic, PYTHONPATH, etc.),
// still hard-throws immediately so triage gets boot_stage / last_error /
// image fields without waiting for a polling timeout.
// Genuine *infra* provision failure is already caught loud one step
// earlier at the org level (instance_status === "failed").
await waitFor<boolean>(
async () => {
const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
@@ -323,27 +259,15 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
if (r.status !== 200) return null;
if (r.body?.status === "online") return true;
if (r.body?.status === "failed") {
const uptime = Number(r.body?.uptime_seconds ?? 0);
const sampleErr = r.body?.last_sample_error;
const preStartCredentialAbort = uptime === 0 && !sampleErr;
if (preStartCredentialAbort) {
// Agent never started (no LLM cred on this staging tenant — the
// expected #2162 platform-proxy gap). The workspace row still
// renders, which is all the tab-UI test needs. Proceed, but log
// loudly so a real "agent never booted because of something else"
// is not silently normalized.
console.warn(
`[staging-setup] workspace ${workspaceId} is 'failed' with the pre-start ` +
`credential-abort shape (uptime_seconds=0, no last_sample_error) — agent did ` +
`not boot (expected on staging without CP LLM proxy env, post workspace-server ` +
`#2162). The tab-UI test does not exercise the agent; proceeding with the ` +
`workspace row, which renders regardless. full body: ${JSON.stringify(r.body)}`,
);
return true;
}
// Real boot regression — hard-throw immediately with full detail.
const detail = sampleErr
? sampleErr
// last_sample_error is often empty when the failure happens before
// the agent emits a sample (e.g. boot crash, image pull error,
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
// body gives triage the boot_stage / last_error / image fields it
// needs without a second probe. Otherwise this propagates as a
// bare "Workspace failed: " — the exact useless message that
// sent #2632 to the issue tracker.
const detail = r.body.last_sample_error
? r.body.last_sample_error
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
throw new Error(`Workspace failed: ${detail}`);
}
@@ -357,99 +281,13 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
// 7. Hand state off to tests + teardown — overwrite the slug-only
// bootstrap state with the full state spec tests need.
//
// FAIL-CLOSED handoff: every field the spec reads must be non-empty. If
// any is missing here, the spec's env-presence guard would throw with a
// generic "did setup run?" message that hides WHICH field was lost. Catch
// it at the source — a partial provision must hard-fail setup, never hand
// off a half-built state that the spec then has to diagnose (or worse,
// skip). This is the loud, fail-closed contract: STAGING was requested,
// so an incomplete provision is an error, not a skip.
const handoff = { slug, tenantURL, workspaceId, tenantToken };
const missingFields = Object.entries(handoff)
.filter(([, v]) => !v)
.map(([k]) => k);
if (missingFields.length > 0) {
throw new Error(
`[staging-setup] provision incomplete — empty handoff field(s): ` +
`${missingFields.join(", ")}. Refusing to hand off a partial state ` +
`that would surface downstream as an opaque spec failure.`,
);
}
writeFileSync(stateFile, JSON.stringify(handoff, null, 2));
writeFileSync(
stateFile,
JSON.stringify({ slug, tenantURL, workspaceId, tenantToken }, null, 2),
);
process.env.STAGING_SLUG = slug;
process.env.STAGING_TENANT_URL = tenantURL;
process.env.STAGING_WORKSPACE_ID = workspaceId;
process.env.STAGING_TENANT_TOKEN = tenantToken;
// The ephemeral org's UUID — exported so specs that route through the CP
// edge can send X-Molecule-Org-Id (workspace-server TenantGuard). The tabs
// harness hits the tenant box same-origin and doesn't need it, but the
// take-control gate (staging-display.spec.ts) does.
process.env.STAGING_ORG_ID = orgID;
console.log(`[staging-setup] Ready — ${stateFile}`);
// 8. (core#2261 Gap 1) Resolve the STANDING desktop-capable org, if one is
// configured, for the live take-control e2e (staging-display.spec.ts).
//
// This block is FULLY env-gated and additive: it provisions NOTHING and is
// a no-op unless STAGING_DISPLAY_SLUG is set. We deliberately do NOT spin a
// desktop workspace inside this shared setup — a desktop AMI boots in
// ~12-15 min and would tax every tabs run. Instead an operator stands up
// one always-on desktop org once (a CTO cost item) and points
// STAGING_DISPLAY_SLUG + STAGING_DISPLAY_WORKSPACE_ID at it. Here we just
// resolve that standing org's tenant URL, admin token, and org id so the
// display spec can reach it. Fail-closed: if STAGING_DISPLAY_SLUG is set but
// we can't resolve its token/id, we THROW — the gate must never silently
// fall back to the (non-desktop) ephemeral org and pass.
const displaySlug = process.env.STAGING_DISPLAY_SLUG;
if (displaySlug) {
console.log(`[staging-setup] Resolving standing desktop org: ${displaySlug}`);
// org id for the standing slug (admin-orgs row carries it + status).
const orgsRes = await jsonFetch(`${CP_URL}/cp/admin/orgs`, { headers: adminAuth });
if (orgsRes.status !== 200) {
throw new Error(
`STAGING_DISPLAY_SLUG=${displaySlug} set, but GET /cp/admin/orgs returned ` +
`${orgsRes.status} — cannot resolve the standing desktop org for the ` +
`take-control gate.`,
);
}
const displayRow = (orgsRes.body?.orgs || []).find(
(o: any) => o.slug === displaySlug,
);
if (!displayRow?.id) {
throw new Error(
`STAGING_DISPLAY_SLUG=${displaySlug} not found in /cp/admin/orgs — the ` +
`standing desktop org for the take-control gate does not exist. Provision ` +
`it (one always-on desktop EC2) or unset STAGING_DISPLAY_SLUG/` +
`STAGING_DISPLAY_WORKSPACE_ID to skip the gate.`,
);
}
if (displayRow.instance_status !== "running") {
throw new Error(
`Standing desktop org ${displaySlug} is '${displayRow.instance_status}', ` +
`not 'running' — the take-control gate needs a live desktop tenant. ` +
`full row: ${JSON.stringify(displayRow)}`,
);
}
const displayTokRes = await jsonFetch(
`${CP_URL}/cp/admin/orgs/${displaySlug}/admin-token`,
{ headers: adminAuth },
);
if (displayTokRes.status !== 200 || !displayTokRes.body?.admin_token) {
throw new Error(
`admin-token fetch for standing desktop org ${displaySlug} returned ` +
`${displayTokRes.status}: ${JSON.stringify(displayTokRes.body)}`,
);
}
process.env.STAGING_DISPLAY_ORG_ID = displayRow.id;
process.env.STAGING_DISPLAY_TENANT_URL = `https://${displaySlug}.${TENANT_DOMAIN}`;
process.env.STAGING_DISPLAY_TENANT_TOKEN = displayTokRes.body.admin_token;
console.log(
`[staging-setup] Standing desktop org resolved: ${displaySlug} ` +
`(org_id=${displayRow.id}, url=${process.env.STAGING_DISPLAY_TENANT_URL})`,
);
}
}
+33 -305
View File
@@ -1,8 +1,7 @@
/**
* Staging canvas E2E opens each workspace-panel tab against a fresh
* staging org provisioned in the global setup. Asserts each tab renders
* REAL content (not an empty container, not an error state) and captures a
* screenshot for visual review.
* Staging canvas E2E opens each of the 13 workspace-panel tabs against a
* fresh staging org provisioned in the global setup. Asserts each tab
* renders without throwing and captures a screenshot for visual review.
*
* Auth model: the tenant platform's AdminAuth middleware accepts a bearer
* token OR a WorkOS session cookie. Playwright can't mint a WorkOS
@@ -11,39 +10,17 @@
* Bearer header via context.setExtraHTTPHeaders(). Every browser
* request inherits the header.
*
* PROMOTION-READINESS (see § at bottom of file): this suite is being
* hardened toward becoming a HARD merge-gate. It currently runs under
* `continue-on-error: true` (RFC internal#219 §1, non-gating) that is a
* deliberate, CTO-owned call and is NOT changed here. The hardening makes
* every assertion deterministic so that WHEN promotion happens the gate
* does not flap. See the PROMOTION-READINESS block at the foot of this
* file for what is now reliable and what still blocks promotion.
*
* Known SaaS gaps documented in #1369. These tabs legitimately cannot
* load real content in SaaS mode and are allowed an in-panel empty/error
* state (NOT a hard crash, NOT an ErrorBoundary):
* Known SaaS gaps documented in #1369 and allowed to render errored
* content without failing the test (the gate is "no hard crash, no
* 'Failed to load' toast"):
* - Files tab: empty (platform can't docker exec into a remote EC2)
* - Terminal tab: WS connect fails
* - Peers tab: 401 without workspace-scoped token
* These are enumerated in KNOWN_DEGRADED_TABS below and asserted with a
* weaker (but still non-trivial) contract: the panel renders and does not
* crash the app. Every OTHER tab must render real content.
*/
import { test, expect, type Page } from "@playwright/test";
import { test, expect } from "@playwright/test";
// Tab ids as declared in canvas/src/components/SidePanel.tsx TABS.
//
// NOTE (drift guard): this list is asserted-complete against the live DOM
// below (see "tab list parity" step) so it cannot silently drift out of
// sync with SidePanel.tsx TABS the way a hand-maintained constant does.
// `display` and `container-config` are intentionally EXCLUDED here:
// - `display` is owned by the in-flight take-control e2e (PR #2275 /
// staging-display.spec.ts); asserting it here would collide.
// - `container-config` only renders when selectedNodeId is set AND is
// gated on tier; it is covered by container-config-specific specs.
// The parity check accounts for these via EXPECTED_EXTRA_TABS so a NEW
// tab appearing in SidePanel still trips the guard.
const TAB_IDS = [
"chat",
"activity",
@@ -60,131 +37,12 @@ const TAB_IDS = [
"audit",
] as const;
// Tabs present in the DOM that this spec intentionally does not drive.
// Keeping this explicit means a genuinely-new tab (not one of these) makes
// the parity assertion fail LOUD instead of being silently un-tested.
const EXPECTED_EXTRA_TABS = ["display", "container-config"] as const;
// Tabs that are KNOWN to degrade in SaaS mode (#1369). They get the weaker
// "renders + no crash" contract instead of the "real content" contract.
// Anything NOT in this set must render real content or the test fails.
const KNOWN_DEGRADED_TABS = new Set<string>(["terminal", "files"]);
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
// IMPORTANT — fail-closed, not skip-green.
//
// `test.skip(!STAGING)` is correct ONLY when the operator never asked for a
// staging run (CANVAS_E2E_STAGING unset). In that case the workflow's
// detect-changes / token-check gates have already decided not to exercise
// staging, and skipping is the documented contract.
//
// But if STAGING *is* requested (CANVAS_E2E_STAGING=1) and global setup did
// NOT hand off the tenant state, that is a HARD failure, not a skip — see
// the explicit env-presence throw inside the test body. A silent skip there
// would let a broken provision ship green, which is exactly the
// weak-gate failure this hardening removes (§ No flakes / internal#828).
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — staging-only suite, not requested");
/**
* Assert the panel for `tabId` rendered real content.
*
* Deterministic contract (no fixed waits every step is condition-based
* with Playwright's built-in retry / expect.poll):
* 1. The tabpanel container is visible.
* 2. The global ErrorBoundary did NOT trip ("Something went wrong").
* 3. No visible error alert is shown in the panel.
* 4. For non-degraded tabs: the panel settles to non-empty,
* non-spinner content (so an empty <div/> or a stuck "Loading…"
* spinner FAILS instead of passing as it did before).
*/
async function assertPanelRendered(page: Page, tabId: string): Promise<void> {
const panel = page.locator(`#panel-${tabId}`);
// (1) Container visible. Built-in retry up to the expect timeout — no
// arbitrary waitForTimeout. Mechanism: replaces any reliance on a fixed
// settle delay with a real visibility condition.
await expect(panel, `panel for ${tabId} never became visible`).toBeVisible({
timeout: 10_000,
});
// (2) ErrorBoundary trip = hard crash anywhere in the React subtree.
// canvas/src/components/ErrorBoundary.tsx renders "Something went wrong".
// The OLD gate only looked for a "Failed to load" toast and would ship
// an ErrorBoundary-crashed panel GREEN. Mechanism: assert the crash
// surface is absent, retried via expect.poll so a late-mounting crash
// banner is still caught.
await expect
.poll(
async () =>
page.getByText("Something went wrong", { exact: false }).count(),
{
message: `tab ${tabId}: ErrorBoundary tripped (Something went wrong)`,
timeout: 5_000,
},
)
.toBe(0);
// (3) No visible error alert inside the panel. Tabs surface load errors
// as role="alert" with the real error text (EventsTab/ChannelsTab/
// ConfigTab/...). The OLD gate matched ONLY [role=alert]:has-text("Failed
// to load") — it missed (a) error messages that don't contain that exact
// phrase and (b) error divs that omit role="alert" entirely (e.g.
// ActivityTab). We replace it with a broader, but still SaaS-gap-aware,
// check: any *visible* alert OR red error banner inside the panel.
//
// Degraded tabs (#1369) are allowed an error state — for those we only
// require no app-level crash (covered by step 2). For every other tab a
// visible error alert is a real regression.
if (!KNOWN_DEGRADED_TABS.has(tabId)) {
const visibleAlerts = panel.locator('[role="alert"]:visible');
await expect
.poll(async () => visibleAlerts.count(), {
message:
`tab ${tabId}: a visible error alert is shown in the panel ` +
`(was a weak "Failed to load"-only check before)`,
timeout: 5_000,
})
.toBe(0);
}
// (4) Real content. The tabpanel CONTAINER always mounts, so the old
// toBeVisible() on the container passed even when the child rendered
// nothing. Assert the panel's trimmed innerText is non-empty AND not
// stuck on a loading spinner. expect.poll retries until the async
// fetch+render settles — replacing the implicit "the network finished
// by now" timing assumption with an explicit polled condition.
//
// Degraded tabs may legitimately be empty (Files in SaaS mode), so they
// are exempt from the non-empty requirement; step 2 still guards them
// against a hard crash.
if (!KNOWN_DEGRADED_TABS.has(tabId)) {
await expect
.poll(
async () => {
const text = ((await panel.innerText()) || "").trim();
// A panel still showing only a loading spinner has not settled.
const stillLoading = /^(loading\b|loading…|loading\.\.\.)/i.test(
text,
);
return text.length > 0 && !stillLoading;
},
{
message:
`tab ${tabId}: panel rendered empty or stuck on a loading ` +
`spinner — no real content settled (weak "container visible" ` +
`gate would have passed this)`,
// Generous: real tabs fetch from the tenant over the network.
// Polled, so it returns as soon as content appears.
timeout: 20_000,
},
)
.toBe(true);
}
}
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
test.describe("staging canvas tabs", () => {
test("each workspace-panel tab renders real content", async ({
test("each workspace-panel tab renders without error", async ({
page,
context,
}) => {
@@ -192,16 +50,9 @@ test.describe("staging canvas tabs", () => {
const tenantToken = process.env.STAGING_TENANT_TOKEN;
const workspaceId = process.env.STAGING_WORKSPACE_ID;
// FAIL-CLOSED (not skip): STAGING was requested but global setup did
// not export tenant state. A silent skip here would paint a broken
// provision GREEN. This is the loud-fail the hardening mandates.
if (!tenantURL || !tenantToken || !workspaceId) {
throw new Error(
"staging-setup.ts did not export STAGING_TENANT_URL / " +
"STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID. CANVAS_E2E_STAGING=1 " +
"was set (staging WAS requested) but global setup produced no " +
"tenant — this is a provisioning failure, NOT a reason to skip. " +
"Check the [staging-setup] log above for the real error.",
"staging-setup.ts did not export STAGING_TENANT_URL / STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID — did global setup run?",
);
}
@@ -301,19 +152,11 @@ test.describe("staging canvas tabs", () => {
// omit the URL, so we'd otherwise be flying blind. Logged to the
// test's stdout (visible in the workflow log under the failed step).
page.on("requestfailed", (req) => {
console.log(
`[e2e/requestfailed] ${req.method()} ${req.url()}: ${
req.failure()?.errorText ?? "?"
}`,
);
console.log(`[e2e/requestfailed] ${req.method()} ${req.url()}: ${req.failure()?.errorText ?? "?"}`);
});
page.on("response", (res) => {
if (res.status() >= 400) {
console.log(
`[e2e/response-${res.status()}] ${res
.request()
.method()} ${res.url()}`,
);
console.log(`[e2e/response-${res.status()}] ${res.request().method()} ${res.url()}`);
}
});
@@ -330,8 +173,9 @@ test.describe("staging canvas tabs", () => {
// hydrated, even with zero workspaces) or the hydration-error
// banner — whichever wins first. Previous version of this wait
// used `[role="tablist"]`, but that selector only appears AFTER
// a workspace node is clicked, so the wait would always time out
// at 45s before any meaningful failure surfaced.
// a workspace node is clicked (which happens below at L100), so
// the wait would always time out at 45s before any meaningful
// failure surfaced.
await page.waitForSelector(
'[aria-label="Molecule AI workspace canvas"], [data-testid="hydration-error"]',
{ timeout: 45_000 },
@@ -345,20 +189,10 @@ test.describe("staging canvas tabs", () => {
"canvas hydration failed — check staging CP + tenant reachability",
).toBe(0);
// The global ErrorBoundary must not have tripped at the app root
// either — a crash before the side panel even opens would otherwise
// be invisible until a tab assertion happened to notice it.
await expect(
page.getByText("Something went wrong", { exact: false }),
"app-level ErrorBoundary tripped during hydration",
).toHaveCount(0);
// Click the workspace node to open the side panel. Try a data
// attribute first, fall back to a generic role-based selector so
// the test doesn't break when the node-card markup changes.
const byDataAttr = page
.locator(`[data-workspace-id="${workspaceId}"]`)
.first();
const byDataAttr = page.locator(`[data-workspace-id="${workspaceId}"]`).first();
if ((await byDataAttr.count()) > 0) {
await byDataAttr.click({ timeout: 10_000 });
} else {
@@ -368,56 +202,19 @@ test.describe("staging canvas tabs", () => {
await firstNode.click({ timeout: 10_000 });
}
// The tablist appears once the side panel mounts. Condition-based
// wait — no fixed delay.
const tablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
await expect(
tablist,
"side panel tablist never appeared after clicking the workspace node",
).toBeVisible({ timeout: 15_000 });
// Tab-list parity guard. The hand-maintained TAB_IDS constant used to
// be able to drift silently out of sync with SidePanel.tsx TABS — a
// tab could be added to the UI and never get an assertion, shipping
// broken-but-untested. Read the actual tab ids from the DOM and assert
// every live tab is either driven by this spec (TAB_IDS) or explicitly
// excluded (EXPECTED_EXTRA_TABS). A genuinely-new tab fails LOUD.
const liveTabIds = (
await tablist.locator('[role="tab"][id^="tab-"]').evaluateAll((els) =>
els.map((el) => el.id.replace(/^tab-/, "")),
)
).sort();
const accountedFor = new Set<string>([
...TAB_IDS,
...EXPECTED_EXTRA_TABS,
]);
const unaccounted = liveTabIds.filter((id) => !accountedFor.has(id));
expect(
unaccounted,
`SidePanel exposes tab(s) this spec neither drives nor excludes: ` +
`${unaccounted.join(", ")}. Add them to TAB_IDS (and assert their ` +
`content) or to EXPECTED_EXTRA_TABS with a reason.`,
).toHaveLength(0);
// And the inverse: every TAB_ID we intend to drive must actually exist
// in the DOM, so a renamed/removed tab fails here instead of timing out
// on a missing #tab-<id> selector with an opaque message.
const missing = TAB_IDS.filter((id) => !liveTabIds.includes(id));
expect(
missing,
`TAB_IDS references tab(s) not present in SidePanel: ${missing.join(
", ",
)} the spec's tab list has drifted from SidePanel.tsx TABS.`,
).toHaveLength(0);
await page.waitForSelector('[role="tablist"]', { timeout: 15_000 });
for (const tabId of TAB_IDS) {
await test.step(`tab: ${tabId}`, async () => {
const tabButton = page.locator(`#tab-${tabId}`);
// The TABS bar is `overflow-x-auto` — tabs past position ~3 are
// clipped behind the right-edge fade gradient on smaller
// viewports. Playwright's toBeVisible() returns false for clipped
// elements, so a bare visibility check fails on later tabs in CI.
// scrollIntoViewIfNeeded brings the button into view before the
// visibility check.
// The TABS bar is `overflow-x-auto` (SidePanel.tsx:~tabs
// wrapper) — tabs after position ~3 are clipped behind the
// right-edge fade gradient on smaller viewports. Playwright's
// `toBeVisible()` returns false for clipped elements, so a
// bare visibility check fails on `skills` and later tabs in
// CI. scrollIntoViewIfNeeded brings the button into view
// before the visibility check, mirroring what SidePanel's own
// keyboard handler does on arrow-key navigation.
await tabButton.scrollIntoViewIfNeeded({ timeout: 5_000 });
await expect(
tabButton,
@@ -425,34 +222,18 @@ test.describe("staging canvas tabs", () => {
).toBeVisible({ timeout: 5_000 });
await tabButton.click();
// Confirm the click actually activated this tab before asserting
// its content — aria-selected flips on the active tab. This closes
// a race where a slow click handler left the PREVIOUS tab's panel
// mounted and we asserted the wrong panel's content. Built-in
// retry, condition-based, no fixed wait.
await expect(
tabButton,
`tab-${tabId} did not become the selected tab after click`,
).toHaveAttribute("aria-selected", "true", { timeout: 5_000 });
const panel = page.locator(`#panel-${tabId}`);
await expect(panel, `panel for ${tabId} never rendered`).toBeVisible({
timeout: 10_000,
});
// Real-content assertion (the core hardening). See
// assertPanelRendered: container visible + no ErrorBoundary + no
// visible error alert + settled non-empty content for non-degraded
// tabs. Replaces the old "panel visible + no Failed-to-load toast"
// pair, which shipped empty/errored panels green.
await assertPanelRendered(page, tabId);
// Belt to the braces: the original toast check stays. A global
// "Failed to load" toast (role=alert outside the panel) is still a
// crash signal worth catching even though the in-panel checks above
// now do the heavy lifting.
// "Failed to load" toast = hard crash. Known SaaS-mode gaps
// (Files empty, Terminal disconnected, Peers 401) surface as
// in-panel content, not toasts.
const errorToasts = await page
.locator('[role="alert"]:has-text("Failed to load")')
.count();
expect(
errorToasts,
`tab ${tabId}: a global "Failed to load" toast is showing`,
).toBe(0);
expect(errorToasts, `tab ${tabId}: "Failed to load" toast`).toBe(0);
await page.screenshot({
path: `test-results/staging-tab-${tabId}.png`,
@@ -486,56 +267,3 @@ test.describe("staging canvas tabs", () => {
).toHaveLength(0);
});
});
/*
* PROMOTION-READINESS staging canvas E2E HARD merge-gate
* ----------------------------------------------------------
* NOW RELIABLE (deterministic; these no longer flap on timing):
* - Every wait is condition-based (toBeVisible / toHaveAttribute /
* expect.poll). There is NO fixed waitForTimeout / sleep in the spec;
* the only setTimeout is the bounded poll-interval inside
* staging-setup.ts waitFor(), which has a hard deadline.
* - Tabs are asserted on REAL settled content (non-empty, non-spinner),
* not just "container is visible" an empty or stuck-loading panel now
* fails instead of shipping green.
* - The ErrorBoundary ("Something went wrong") is asserted absent at app
* hydration AND per tab a React subtree crash can no longer pass.
* - Visible error alerts inside a panel fail non-degraded tabs (was a
* weak [role=alert]:has-text("Failed to load")-only check that missed
* both other error phrasings and role-less error divs).
* - The driven tab list is parity-checked against the live DOM, so a new
* SidePanel tab can't ship un-tested and a removed one fails loud.
* - Clickactivation is confirmed (aria-selected) before asserting the
* panel, removing a wrong-panel race.
* - The suite is fail-closed: CANVAS_E2E_STAGING=1 with no tenant state
* hard-errors (never skipsgreen); CANVAS_E2E_STAGING unset cleanly
* skips (operator did not request staging).
*
* STILL BLOCKS PROMOTION-TO-REQUIRED (do NOT flip continue-on-error here
* CTO-owned, RFC internal#219 §1):
* - INFRA DEPENDENCY: each run provisions a real staging EC2 tenant
* (12-20 min cold boot). Required-gate latency + AWS/Cloudflare/CP
* availability become merge-blockers. A staging outage would freeze
* main even though the code is fine unacceptable for a required check
* until staging has an SLA or this runs against a warm pre-provisioned
* pool.
* - SHARED-RESOURCE FLAKE SURFACE: TLS/DNS/ACME propagation on a shared
* staging zone (staging-setup TLS_TIMEOUT_MS) is outside this repo's
* control. Deterministic here deterministic upstream.
* - SECRET DEPENDENCY: CP_STAGING_ADMIN_API_TOKEN must be present on the
* runner. The workflow's skip-if-absent (core#2225) keeps a missing
* secret from painting red correct for non-gating, but a REQUIRED
* check must instead guarantee the secret is always present, else it
* skip-greens the very thing it is supposed to enforce.
* - SINGLE-WORKSPACE COVERAGE: one hermes/platform_managed workspace that
* does NOT boot an agent on staging (no CP LLM proxy env, workspace-
* server #2162). Tabs render, but agent-dependent content paths (live
* chat round-trip, traces from a real run) are not exercised.
*
* PROMOTION CHECKLIST (when CTO signs off on making this required):
* 1. Warm pre-provisioned tenant pool OR a staging SLA bounding boot time.
* 2. Guarantee CP_STAGING_ADMIN_API_TOKEN on the gating runner; turn the
* skip-if-absent into a hard error for the required path.
* 3. Decide whether agent-dependent tabs need a wired LLM proxy on the
* staging tenant (covers chat/traces real content) before gating them.
*/
-8
View File
@@ -7,14 +7,6 @@ export default defineConfig({
fullyParallel: false,
workers: 1,
retries: 0,
// Fail CLOSED when an explicit spec selection matches zero tests.
// Playwright defaults this to true, so `playwright test e2e/chat-*.spec.ts`
// would exit 0 (green) if those files were renamed/moved/deleted — a
// false-green that would silently gut the e2e-chat gate after a refactor.
// forbidOnly likewise stops a stray `test.only` from green-ing the suite
// while skipping every other case.
passWithNoTests: false,
forbidOnly: !!process.env.CI,
use: {
baseURL: process.env.PLAYWRIGHT_BASE_URL || "http://localhost:3000",
headless: true,
@@ -1,17 +1,12 @@
/**
* Canvas /api/buildinfo version-display endpoint mirroring
* workspace-server's /buildinfo. Lets `curl <url>/api/buildinfo`
* confirm which git SHA is live on a canvas deployment (core#2235).
* confirm which git SHA is live on a canvas deployment.
*/
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { GET } from "../route";
const ENV_KEYS = [
"BUILD_SHA",
"VERCEL_GIT_COMMIT_SHA",
"VERCEL_GIT_COMMIT_REF",
"VERCEL_ENV",
];
const ENV_KEYS = ["VERCEL_GIT_COMMIT_SHA", "VERCEL_GIT_COMMIT_REF", "VERCEL_ENV"];
describe("GET /api/buildinfo", () => {
let saved: Record<string, string | undefined>;
@@ -28,24 +23,13 @@ describe("GET /api/buildinfo", () => {
}
});
it("returns dev sentinel when no SHA source is set", async () => {
it("returns dev sentinel when Vercel env vars are unset", async () => {
const res = await GET();
const body = await res.json();
expect(body).toEqual({ git_sha: "dev", git_ref: "", vercel_env: "local" });
});
it("reports BUILD_SHA baked into the Docker image (fleet deploy path)", async () => {
// BUILD_SHA is the authoritative source for the ECR-image fleet deploy,
// which never runs on Vercel. It must win even when a Vercel var is also
// present in the environment.
process.env.BUILD_SHA = "deadbeefcafe";
process.env.VERCEL_GIT_COMMIT_SHA = "should-not-win";
const res = await GET();
const body = await res.json();
expect(body.git_sha).toBe("deadbeefcafe");
});
it("falls back to the SHA Vercel injected when BUILD_SHA is unset", async () => {
it("reports the SHA Vercel injected at build time", async () => {
process.env.VERCEL_GIT_COMMIT_SHA = "abc1234567890";
process.env.VERCEL_GIT_COMMIT_REF = "main";
process.env.VERCEL_ENV = "production";
+8 -27
View File
@@ -1,36 +1,17 @@
import { NextResponse } from "next/server";
// Mirror of workspace-server's GET /buildinfo (PR #2398). Lets a developer
// or the fleet redeploy workflow confirm which git SHA is live on a canvas
// deployment with the same `curl <url>/api/buildinfo` flow used against
// tenant workspaces (core#2235; cross-ref core#2226).
// confirm which git SHA is live on a canvas deployment with the same
// `curl <url>/buildinfo` flow they use against tenant workspaces.
//
// SHA source, in priority order:
// 1. BUILD_SHA — server-only env baked into the canvas Docker image at
// build time (Dockerfile `ARG BUILD_SHA` → `ENV BUILD_SHA`, wired
// from `${{ github.sha }}` in publish-canvas-image.yml). This is the
// authoritative source for the fleet's ECR-image deploy path, which
// does NOT run on Vercel. Read server-side here (App Router route
// handler runs on the standalone Node server, `output: "standalone"`),
// so it is intentionally NOT a NEXT_PUBLIC_ var — keeping it out of
// the client bundle.
// 2. VERCEL_GIT_COMMIT_SHA — Vercel injects this at build time when the
// canvas is deployed via Vercel rather than the Docker image.
// 3. "dev" — local `next dev` / test harness, where neither is set. Same
// sentinel workspace-server uses pre-ldflags-injection, so both
// surfaces speak the same vocabulary and an unconfigured deploy
// fails the SHA comparison closed instead of round-tripping "".
//
// force-dynamic so the response is evaluated at request time against the
// runtime env of the standalone server (where ENV BUILD_SHA lives), not
// frozen into a static asset at `next build`.
export const dynamic = "force-dynamic";
// Vercel injects VERCEL_GIT_COMMIT_SHA / _REF / VERCEL_ENV at build time
// from the deploying commit; outside Vercel (local `next dev`, harness)
// these are unset and the endpoint reports `git_sha: "dev"`. Same sentinel
// the workspace-server uses pre-ldflags-injection so both surfaces speak
// the same vocabulary.
export async function GET() {
const sha =
process.env.BUILD_SHA ?? process.env.VERCEL_GIT_COMMIT_SHA ?? "dev";
return NextResponse.json({
git_sha: sha,
git_sha: process.env.VERCEL_GIT_COMMIT_SHA ?? "dev",
git_ref: process.env.VERCEL_GIT_COMMIT_REF ?? "",
vercel_env: process.env.VERCEL_ENV ?? "local",
});
+20
View File
@@ -179,6 +179,7 @@ function Shell({
<p className="mt-2 text-ink-mid">
Each org is an isolated Molecule workspace.
</p>
<DataResidencyNotice />
<div className="mt-8">{children}</div>
</div>
</TermsGate>
@@ -219,6 +220,25 @@ function AccountBar({ session }: { session: Session }) {
</div>
);
}
// DataResidencyNotice surfaces where workspace data lives so EU-based
// signups can make an informed choice (GDPR Art. 13 disclosure
// requirement). Plain text, no icon — the goal is clarity, not
// decoration. A future EU region selector can replace this with a
// region dropdown.
function DataResidencyNotice() {
return (
<p className="mt-3 rounded border border-line bg-surface-sunken/60 px-3 py-2 text-xs text-ink-mid">
Workspaces run in AWS us-east-2 (Ohio, United States). EU region support is on the roadmap reach out to
{" "}
<a href="mailto:support@moleculesai.app" className="underline">
support@moleculesai.app
</a>
{" "}if you need data residency in another region today.
</p>
);
}
function OrgRow({ org }: { org: Org }) {
return (
<li className="rounded-lg border border-line bg-surface-sunken p-4">
+2 -2
View File
@@ -172,7 +172,7 @@ export function ContextMenu() {
const nodeId = contextMenu.nodeId;
closeContextMenu();
try {
await api.post(`/workspaces/${nodeId}/pause?cascade=true`, {});
await api.post(`/workspaces/${nodeId}/pause`, {});
updateNodeData(nodeId, { status: "paused" });
} catch (e) {
showToast("Pause failed", "error");
@@ -184,7 +184,7 @@ export function ContextMenu() {
const nodeId = contextMenu.nodeId;
closeContextMenu();
try {
await api.post(`/workspaces/${nodeId}/resume?cascade=true`, {});
await api.post(`/workspaces/${nodeId}/resume`, {});
updateNodeData(nodeId, { status: "provisioning" });
} catch (e) {
showToast("Resume failed", "error");
+24 -135
View File
@@ -8,13 +8,9 @@ import { ExternalConnectModal, type ExternalConnectionInfo } from "./ExternalCon
import {
ProviderModelSelector,
buildProviderCatalog,
buildProviderCatalogFromRegistry,
findProviderForModel,
isPlatformManagedProvider,
type SelectorModel,
type SelectorValue,
type RegistryProvider,
type RegistryModel,
} from "./ProviderModelSelector";
interface WorkspaceOption {
@@ -36,16 +32,6 @@ interface TemplateSpec {
model?: string;
models?: SelectorModel[];
providers?: string[];
// internal#718 P3 registry-served fields (additive; absent on older
// backends and for non-registry runtimes). When registry_backed is true the
// provider→model catalog is built from registry_providers/registry_models so
// each model's DERIVED provider (e.g. moonshot/kimi-k2.6 → "platform") drives
// the dropdown bucket and the create payload's llm_provider — instead of the
// legacy inferVendor heuristic that slash-splits the id into "moonshot".
// Mirrors ConfigTab's RuntimeOption loader (RFC#340 Fix C).
registry_backed?: boolean;
registry_providers?: RegistryProvider[];
registry_models?: RegistryModel[];
}
const DEFAULT_RUNTIME = "claude-code";
@@ -60,16 +46,6 @@ const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "goog
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
const DEFAULT_HEADLESS_ROOT_GB = 30;
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
// Per-workspace cloud/compute backend (multi-provider RFC). "aws" is the default
// EC2 path; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner. A
// workspace whose cloud differs from its tenant's is reached over a per-workspace
// Cloudflare tunnel (runtime#95). Distinct from the LLM/model provider.
const CLOUD_PROVIDER_OPTIONS = [
{ value: "aws", label: "AWS (default)" },
{ value: "gcp", label: "GCP" },
{ value: "hetzner", label: "Hetzner" },
];
const DEFAULT_DISPLAY_ROOT_GB = 80;
export function CreateWorkspaceButton() {
@@ -87,10 +63,6 @@ export function CreateWorkspaceButton() {
const [displayInstanceType, setDisplayInstanceType] = useState(DEFAULT_DISPLAY_INSTANCE_TYPE);
const [displayRootGB, setDisplayRootGB] = useState(String(DEFAULT_DISPLAY_ROOT_GB));
const [displayResolution, setDisplayResolution] = useState("1920x1080");
// Cloud/compute backend for the workspace box (multi-provider, per-workspace).
// "aws" default; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner
// (a non-tenant-cloud box is reached over a per-workspace tunnel, runtime#95).
const [cloudProvider, setCloudProvider] = useState("aws");
// Templates fetched from /api/templates — drives the dynamic provider
// filter below. Same data source ConfigTab uses (PR #2454). When the
// selected template declares `runtime_config.providers` in its
@@ -196,53 +168,15 @@ export function CreateWorkspaceButton() {
}),
[runtime, templateSpecs],
);
// The /templates row backing the LLM picker: an explicitly-selected
// workspace template wins, else the base runtime template row.
const llmSourceSpec = useMemo<TemplateSpec | null>(
() => selectedTemplateSpec ?? selectedRuntimeTemplateSpec,
const llmModels = useMemo(
() => {
const sourceSpec = selectedTemplateSpec ?? selectedRuntimeTemplateSpec;
if (!sourceSpec?.models?.length) return [];
return sourceSpec.models;
},
[selectedRuntimeTemplateSpec, selectedTemplateSpec],
);
// internal#718 P3 / RFC#340 Fix C: a runtime is registry-backed when the
// /templates row says so AND it served a non-empty registry_models set.
// Mirrors ConfigTab's `registryBacked` derivation exactly.
const registryBacked = useMemo(
() =>
llmSourceSpec?.registry_backed === true &&
(llmSourceSpec.registry_models?.length ?? 0) > 0,
[llmSourceSpec],
);
// Models fed to the selector dropdown. For a registry-backed runtime use the
// registry-served native set, carrying each model's DERIVED provider so the
// selector buckets it correctly (moonshot/kimi-k2.6 → "platform", not the
// inferVendor "moonshot"). Otherwise fall back to the template-served
// models[] + the legacy heuristic — same fallback ConfigTab keeps.
const llmModels = useMemo<SelectorModel[]>(
() => {
if (registryBacked) {
return (llmSourceSpec?.registry_models ?? []).map((m) => ({
id: m.id,
name: m.name,
...(m.provider ? { provider: m.provider } : {}),
}));
}
return llmSourceSpec?.models?.length ? llmSourceSpec.models : [];
},
[registryBacked, llmSourceSpec],
);
// Registry-backed path: build the catalog from registry_providers/
// registry_models so dropdown labels + billing + the derived provider come
// from the provider-registry SSOT (restores the "Platform" bucket). Legacy
// path: re-infer from models[] via buildProviderCatalog (inferVendor).
const llmCatalog = useMemo(
() =>
registryBacked
? buildProviderCatalogFromRegistry(
llmSourceSpec?.registry_providers ?? [],
llmSourceSpec?.registry_models ?? [],
)
: buildProviderCatalog(llmModels),
[registryBacked, llmSourceSpec, llmModels],
);
const llmCatalog = useMemo(() => buildProviderCatalog(llmModels), [llmModels]);
const selectedLLMProvider = useMemo(
() => llmCatalog.find((p) => p.id === llmSelection.providerId) ?? llmCatalog[0],
[llmCatalog, llmSelection.providerId],
@@ -250,7 +184,7 @@ export function CreateWorkspaceButton() {
useEffect(() => {
if (llmCatalog.length === 0) return;
const sourceDefault = llmSourceSpec?.model?.trim();
const sourceDefault = (selectedTemplateSpec ?? selectedRuntimeTemplateSpec)?.model?.trim();
const platformProvider = llmCatalog.find((p) => p.vendor === "platform");
const matched = sourceDefault ? findProviderForModel(llmCatalog, sourceDefault) : null;
const next = platformProvider ?? matched ?? llmCatalog[0];
@@ -263,7 +197,7 @@ export function CreateWorkspaceButton() {
envVars: next.envVars,
});
setLLMSecret("");
}, [llmCatalog, llmSourceSpec]);
}, [llmCatalog, selectedRuntimeTemplateSpec, selectedTemplateSpec]);
// Reset form and load workspaces whenever dialog opens
useEffect(() => {
@@ -280,7 +214,6 @@ export function CreateWorkspaceButton() {
setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
setDisplayResolution("1920x1080");
setCloudProvider("aws");
setExternalRuntime("external");
setLLMSelection({ providerId: "", model: "", envVars: [] });
setLLMSecret("");
@@ -306,15 +239,7 @@ export function CreateWorkspaceButton() {
setError("Model is required");
return;
}
// Platform-managed providers need NO user credential — the platform injects
// its own usage token (MOLECULE_LLM_USAGE_TOKEN = tenant admin_token) at
// provision time. Only BYOK providers require a user-supplied key. (#2245)
if (
!isExternal &&
!isPlatformManagedProvider(selectedLLMProvider) &&
selectedLLMProvider?.envVars.length &&
!llmSecret.trim()
) {
if (!isExternal && selectedLLMProvider?.envVars.length && !llmSecret.trim()) {
setError("Provider credential is required");
return;
}
@@ -349,11 +274,7 @@ export function CreateWorkspaceButton() {
? {
model: llmSelection.model.trim(),
llm_provider: nativeProvider.vendor,
// Only BYOK providers carry a user secret. For platform-managed
// the token is provisioner-injected; sending an (empty) secret
// here would clobber it — so omit it entirely. (#2245)
...(nativeProvider.envVars.length > 0 &&
!isPlatformManagedProvider(nativeProvider)
...(nativeProvider.envVars.length > 0
? { secrets: { [nativeProvider.envVars[0]]: llmSecret.trim() } }
: {}),
}
@@ -370,16 +291,11 @@ export function CreateWorkspaceButton() {
width: Number.isFinite(displayWidth) ? displayWidth : 1920,
height: Number.isFinite(displayHeight) ? displayHeight : 1080,
},
// Only meaningful when CP provisions the box (SaaS), where
// the picker is shown. Omit on self-hosted so the payload is
// unchanged there.
...(isSaaS ? { provider: cloudProvider } : {}),
}
: {
instance_type: DEFAULT_HEADLESS_INSTANCE_TYPE,
volume: { root_gb: DEFAULT_HEADLESS_ROOT_GB },
display: { mode: "none" },
...(isSaaS ? { provider: cloudProvider } : {}),
},
}
: {}),
@@ -545,7 +461,6 @@ export function CreateWorkspaceButton() {
</div>
<ProviderModelSelector
models={llmModels}
catalog={registryBacked ? llmCatalog : undefined}
value={llmSelection}
onChange={(next) => {
setLLMSelection(next);
@@ -554,26 +469,20 @@ export function CreateWorkspaceButton() {
idPrefix="create-workspace-llm"
variant="stack"
/>
{isPlatformManagedProvider(selectedLLMProvider) ? (
<div className="text-[11px] text-ink-soft">
Platform-managed no API key required.
{selectedLLMProvider.envVars.length > 0 && (
<div>
<label htmlFor="llm-secret-input" className="text-[11px] text-ink-mid block mb-1">
{selectedLLMProvider.envVars[0]}
</label>
<input
id="llm-secret-input"
type="password"
value={llmSecret}
onChange={(e) => setLLMSecret(e.target.value)}
autoComplete="off"
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors font-mono"
/>
</div>
) : (
selectedLLMProvider.envVars.length > 0 && (
<div>
<label htmlFor="llm-secret-input" className="text-[11px] text-ink-mid block mb-1">
{selectedLLMProvider.envVars[0]}
</label>
<input
id="llm-secret-input"
type="password"
value={llmSecret}
onChange={(e) => setLLMSecret(e.target.value)}
autoComplete="off"
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors font-mono"
/>
</div>
)
)}
</div>
)}
@@ -619,26 +528,6 @@ export function CreateWorkspaceButton() {
<div className="mb-2 text-[11px] font-medium text-ink-mid">
Container Config
</div>
{/* Cloud provider only meaningful when CP provisions the box
(SaaS). A non-tenant-cloud workspace is reached over a
per-workspace Cloudflare tunnel (runtime#95). */}
{isSaaS && (
<label htmlFor="workspace-cloud-provider" className="mb-3 grid gap-1">
<span className="text-xs font-medium text-ink">Cloud provider</span>
<select
id="workspace-cloud-provider"
value={cloudProvider}
onChange={(e) => setCloudProvider(e.target.value)}
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
>
{CLOUD_PROVIDER_OPTIONS.map((p) => (
<option key={p.value} value={p.value}>
{p.label}
</option>
))}
</select>
</label>
)}
<label className="flex items-center justify-between gap-3">
<span className="text-xs font-medium text-ink">Display</span>
<input
+3 -15
View File
@@ -12,7 +12,6 @@ import {
ProviderModelSelector,
buildProviderCatalog,
findProviderForModel,
isPlatformManagedProvider,
type SelectorValue,
} from "./ProviderModelSelector";
@@ -268,21 +267,10 @@ function ProviderPickerModal({
setSelectorValue(initial);
}, [open, initial]);
// #2248: filter out provisioner-injected internal tokens for platform-managed
// providers so the user can't clobber them. Memoized so the array reference is
// stable across renders and does not churn the entries useEffect.
const userEditableEnvVars = useMemo(() => {
const selectedProvider = catalog.find((p) => p.id === selectorValue.providerId);
const isPlatformManaged = selectedProvider ? isPlatformManagedProvider(selectedProvider) : false;
return isPlatformManaged
? selectorValue.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
: selectorValue.envVars;
}, [catalog, selectorValue.providerId, selectorValue.envVars]);
useEffect(() => {
if (!open) return;
setEntries(
userEditableEnvVars.map((key) => ({
selectorValue.envVars.map((key) => ({
key,
value: "",
// Pre-mark as saved when the key is already in the configured
@@ -295,7 +283,7 @@ function ProviderPickerModal({
);
setOptionalEntries(
optionalKeys
.filter((key) => !userEditableEnvVars.includes(key))
.filter((key) => !selectorValue.envVars.includes(key))
.map((key) => ({
key,
value: "",
@@ -304,7 +292,7 @@ function ProviderPickerModal({
error: null,
})),
);
}, [open, userEditableEnvVars, configuredKeys, optionalKeys]);
}, [open, selectorValue.envVars, configuredKeys, optionalKeys]);
useEffect(() => {
if (!open) return;
@@ -55,21 +55,6 @@ export interface ProviderEntry {
billingMode?: "platform_managed" | "byok";
}
/** A provider is "platform-managed" when the Molecule platform proxies the LLM
* call and injects its own usage credential the tenant admin_token, surfaced
* to the workspace as MOLECULE_LLM_USAGE_TOKEN by the CP provisioner
* (controlplane ec2.go: `MOLECULE_LLM_USAGE_TOKEN="$ADMIN_TOKEN"`). The user
* supplies NO key for these: the credential is internal plumbing, not a user
* input. Detected by vendor==="platform" (the platform proxy provider, which
* declares MOLECULE_LLM_USAGE_TOKEN in its AuthEnv) OR
* billingMode==="platform_managed" (registry-backed, internal#718 P3). BYOK
* providers return false and DO require a user-supplied credential. */
export function isPlatformManagedProvider(
p?: Pick<ProviderEntry, "vendor" | "billingMode"> | null,
): boolean {
return p?.vendor === "platform" || p?.billingMode === "platform_managed";
}
/** RegistryProvider mirrors one entry of GET /templates `registry_providers`
* (workspace-server registryProviderView): the registry's native provider for
* a runtime, with its display label, auth-env NAMES, and billing mode. This is
@@ -91,7 +76,6 @@ export interface RegistryModel {
name?: string;
provider?: string;
billing_mode?: "platform_managed" | "byok";
required_env?: string[];
}
export interface SelectorValue {
@@ -385,7 +385,7 @@ describe("ContextMenu — item actions", () => {
render(<ContextMenu />);
fireEvent.click(screen.getByRole("menuitem", { name: /pause/i }));
await act(async () => { /* flush */ });
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause?cascade=true", {});
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause", {});
expect(mockStoreState.updateNodeData).toHaveBeenCalledWith("n1", { status: "paused" });
});
@@ -395,7 +395,7 @@ describe("ContextMenu — item actions", () => {
render(<ContextMenu />);
fireEvent.click(screen.getByRole("menuitem", { name: /resume/i }));
await act(async () => { /* flush */ });
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume?cascade=true", {});
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume", {});
});
});
@@ -1,84 +0,0 @@
// @vitest-environment jsdom
//
// SaaS-mode coverage for the per-workspace cloud-provider picker. The main
// CreateWorkspaceDialog.test.tsx runs non-SaaS (the picker is hidden and the
// payload omits `provider`); this file forces SaaS by mocking isSaaSTenant so
// the picker renders and the selected provider flows into compute.provider.
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
vi.mock("@/lib/api", () => ({
api: { get: vi.fn(), post: vi.fn() },
}));
// Force SaaS so the Cloud provider picker is shown and the payload carries it.
vi.mock("@/lib/tenant", async (importOriginal) => ({
...(await importOriginal<typeof import("@/lib/tenant")>()),
isSaaSTenant: () => true,
}));
import { api } from "@/lib/api";
const mockGet = vi.mocked(api.get);
const mockPost = vi.mocked(api.post);
const SAMPLE_TEMPLATES = [
{
id: "claude-code-default",
name: "Claude Code Agent",
runtime: "claude-code",
model: "moonshot/kimi-k2.6",
providers: ["platform", "minimax"],
models: [{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] }],
},
];
beforeEach(() => {
vi.clearAllMocks();
mockGet.mockImplementation(async (url: string) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
if (url === "/templates") return SAMPLE_TEMPLATES as any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return [] as any;
});
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPost.mockResolvedValue({} as any);
});
afterEach(() => cleanup());
async function openDialog() {
render(<CreateWorkspaceButton />);
const btn = screen.getAllByRole("button").find((b) => b.textContent?.includes("New Workspace"));
fireEvent.click(btn!);
await waitFor(() => expect(screen.getByText("Create Workspace")).toBeTruthy());
}
describe("CreateWorkspaceDialog — cloud provider (SaaS)", () => {
it("shows the Cloud provider picker, defaulting to AWS", async () => {
await openDialog();
const select = screen.getByLabelText("Cloud provider") as HTMLSelectElement;
expect(select).toBeTruthy();
expect(select.value).toBe("aws");
});
it("defaults compute.provider to aws when the picker is untouched", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "AWS Agent" } });
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.compute).toMatchObject({ provider: "aws" });
});
it("threads the selected cloud provider into compute.provider", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "GCP Agent" } });
fireEvent.change(screen.getByLabelText("Cloud provider"), { target: { value: "gcp" } });
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.compute).toMatchObject({ provider: "gcp" });
});
});
@@ -2,7 +2,6 @@
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
import { isPlatformManagedProvider } from "../ProviderModelSelector";
vi.mock("@/lib/api", () => ({
api: {
@@ -66,34 +65,6 @@ const SAMPLE_TEMPLATES = [
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] },
],
},
// #2245 fixtures. The real registry `platform` provider declares
// MOLECULE_LLM_USAGE_TOKEN in its auth_env — the default mock above masks the
// bug by using required_env:[]. This template gives the platform provider a
// non-empty auth env (matching production) so the credential-suppression
// logic is actually exercised.
{
id: "platform-managed-test",
name: "Platform Managed Test",
runtime: "claude-code",
model: "moonshot/kimi-k2.6",
providers: ["platform", "minimax"],
models: [
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: ["MOLECULE_LLM_USAGE_TOKEN"] },
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
],
},
// BYOK-only template (no platform provider) — the credential requirement
// MUST still hold for these (no-regression guard).
{
id: "byok-only-test",
name: "BYOK Only Test",
runtime: "claude-code",
model: "openai/gpt-4o",
providers: ["openai"],
models: [
{ id: "openai/gpt-4o", name: "GPT-4o", required_env: ["OPENAI_API_KEY"] },
],
},
];
beforeEach(() => {
@@ -483,182 +454,6 @@ describe("CreateWorkspaceDialog — dynamic runtime provider picker", () => {
});
});
// ---------------------------------------------------------------------------
// Registry-backed provider catalog (RFC#340 Fix C)
//
// Regression guard for the mis-bucketing bug: when a registry-backed
// claude-code template serves `moonshot/kimi-k2.6` whose DERIVED provider is
// `platform`, the dialog must build the dropdown from registry_providers/
// registry_models (buildProviderCatalogFromRegistry) — NOT the legacy
// inferVendor heuristic which slash-splits the id into "moonshot". The
// distinguishing trait of this fixture: the plain `models[]` array does NOT
// carry an explicit `provider` field, so the LEGACY path would bucket the
// model under "moonshot" and send llm_provider:"moonshot". Only the
// registry-backed path yields the Platform bucket + llm_provider:"platform".
// ---------------------------------------------------------------------------
// claude-code template whose plain models[] is UN-annotated (no explicit
// provider). The derived-provider annotation lives ONLY in registry_models.
const REGISTRY_TEMPLATE = {
id: "claude-code-default",
name: "Claude Code Agent",
runtime: "claude-code",
model: "moonshot/kimi-k2.6",
// Legacy fields — note: NO explicit provider on the platform model, so the
// legacy inferVendor path would slash-split it into "moonshot".
providers: ["platform", "minimax", "anthropic"],
models: [
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", required_env: [] },
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
],
// Registry-served SSOT (internal#718 P3). DeriveProvider resolved
// moonshot/kimi-k2.6 → "platform"; MiniMax-M2.7 → "minimax".
registry_backed: true,
registry_providers: [
{ name: "platform", display_name: "Platform", auth_env: [], billing_mode: "platform_managed" },
{ name: "minimax", display_name: "MiniMax", auth_env: ["MINIMAX_API_KEY"], billing_mode: "byok" },
{ name: "anthropic", display_name: "Anthropic API", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "byok" },
],
registry_models: [
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", billing_mode: "platform_managed" },
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", provider: "minimax", billing_mode: "byok" },
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", provider: "anthropic", billing_mode: "byok" },
],
};
// Registry-backed platform provider WITH a non-empty auth_env — this matches
// the PRODUCTION provider view, which ships the raw AuthEnv
// ([MOLECULE_LLM_USAGE_TOKEN]). REGISTRY_TEMPLATE above uses auth_env:[] so it
// never exercises suppression; this one drives the billingMode==="platform_
// managed" branch end-to-end through buildProviderCatalogFromRegistry. (#2245)
const REGISTRY_TEMPLATE_PLATFORM_AUTHENV = {
...REGISTRY_TEMPLATE,
registry_providers: [
{
name: "platform",
display_name: "Platform",
auth_env: ["MOLECULE_LLM_USAGE_TOKEN"],
billing_mode: "platform_managed",
},
{ name: "minimax", display_name: "MiniMax", auth_env: ["MINIMAX_API_KEY"], billing_mode: "byok" },
{ name: "anthropic", display_name: "Anthropic API", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "byok" },
],
};
describe("CreateWorkspaceDialog — registry-backed provider catalog (RFC#340 Fix C)", () => {
beforeEach(() => {
mockGet.mockImplementation(async (url: string) => {
if (url === "/templates") {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return [REGISTRY_TEMPLATE] as any;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return SAMPLE_WORKSPACES as any;
});
});
it("shows the Platform provider bucket for the registry-backed claude-code runtime", async () => {
await openDialog();
const providerSelect = await waitFor(() => {
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
expect(sel).toBeTruthy();
return sel;
});
const labels = Array.from(providerSelect.options).map((o) => o.text.trim());
// Registry display_name "Platform" appears — NOT "moonshot" from the
// legacy slash-split heuristic.
expect(labels).toContain("Platform");
expect(labels).not.toContain("moonshot");
// Bucket id is the registry-keyed id, vendor is the bare provider name.
const values = Array.from(providerSelect.options).map((o) => o.value);
expect(values).toContain("registry|platform");
});
it("sends llm_provider: platform (not moonshot) for moonshot/kimi-k2.6", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Kimi Agent" },
});
// Wait for the registry default to settle on the Platform bucket + model.
await waitFor(() => {
const modelSelect = document.querySelector("[data-testid='model-select']") as HTMLSelectElement;
expect(modelSelect?.value).toBe("moonshot/kimi-k2.6");
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.model).toBe("moonshot/kimi-k2.6");
expect(body.llm_provider).toBe("platform");
// Platform is auth-env-free → no BYOK secret.
expect(body.secrets).toBeUndefined();
});
it("buckets MiniMax-M2.7 under its derived provider and sends llm_provider: minimax", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "MiniMax Agent" },
});
await waitFor(() => {
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
expect(Array.from(sel.options).map((o) => o.value)).toContain("registry|minimax");
});
fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
target: { value: "registry|minimax" },
});
fireEvent.change(document.getElementById("llm-secret-input") as HTMLInputElement, {
target: { value: "sk-minimax-test" },
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.model).toBe("MiniMax-M2.7");
expect(body.llm_provider).toBe("minimax");
expect(body.secrets).toEqual({ MINIMAX_API_KEY: "sk-minimax-test" });
});
it("suppresses the credential for a registry-backed platform provider that declares an auth_env — billingMode path (#2245)", async () => {
// Override the default REGISTRY_TEMPLATE (auth_env:[]) with the production-
// shaped one whose platform provider declares MOLECULE_LLM_USAGE_TOKEN.
mockGet.mockImplementation(async (url: string) => {
if (url === "/templates") {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return [REGISTRY_TEMPLATE_PLATFORM_AUTHENV] as any;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return SAMPLE_WORKSPACES as any;
});
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Registry Platform Agent" },
});
// Platform is the default bucket; even with a non-empty auth_env the key
// field must NOT render (suppressed via billingMode==="platform_managed").
await waitFor(() => {
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
expect(sel?.value).toBe("registry|platform");
});
expect(screen.getByText("Platform-managed — no API key required.")).toBeTruthy();
expect(document.getElementById("llm-secret-input")).toBeNull();
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
expect(screen.queryByText("Provider credential is required")).toBeNull();
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.llm_provider).toBe("platform");
// The provisioner-injected MOLECULE_LLM_USAGE_TOKEN must NOT be clobbered.
expect(body.secrets).toBeUndefined();
});
});
// ---------------------------------------------------------------------------
// budget_limit field tests (#541)
// ---------------------------------------------------------------------------
@@ -740,70 +535,3 @@ describe("CreateWorkspaceDialog — budget_limit field", () => {
expect(budgetInput.value).toBe("");
});
});
describe("CreateWorkspaceDialog — platform-managed credential suppression (#2245)", () => {
describe("isPlatformManagedProvider", () => {
it("is true for the platform proxy vendor", () => {
expect(isPlatformManagedProvider({ vendor: "platform" })).toBe(true);
});
it("is true for a registry billingMode of platform_managed", () => {
expect(
isPlatformManagedProvider({ vendor: "minimax", billingMode: "platform_managed" }),
).toBe(true);
});
it("is false for a BYOK provider", () => {
expect(isPlatformManagedProvider({ vendor: "anthropic", billingMode: "byok" })).toBe(false);
expect(isPlatformManagedProvider({ vendor: "minimax" })).toBe(false);
});
it("is false for null/undefined", () => {
expect(isPlatformManagedProvider(null)).toBe(false);
expect(isPlatformManagedProvider(undefined)).toBe(false);
});
});
it("platform-managed provider with a declared auth env requires NO credential, hides the key field, and sends NO secret", async () => {
await openDialog();
await setTemplate("platform-managed-test");
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Platform Agent" },
});
// The credential input must NOT render for platform-managed; a "no key
// required" note appears instead.
await waitFor(() =>
expect(screen.getByText("Platform-managed — no API key required.")).toBeTruthy(),
);
expect(screen.queryByLabelText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
// No validation error, and the provisioner-injected token is NOT clobbered
// by an empty secret.
expect(screen.queryByText("Provider credential is required")).toBeNull();
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.llm_provider).toBe("platform");
expect(body.secrets).toBeUndefined();
});
it("BYOK provider still requires a credential and renders the key field (no-regression)", async () => {
await openDialog();
await setTemplate("byok-only-test");
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "BYOK Agent" },
});
// The credential field IS rendered for BYOK...
await waitFor(() => expect(screen.getByLabelText("OPENAI_API_KEY")).toBeTruthy());
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
// ...and create stays blocked until it's filled.
await waitFor(() =>
expect(screen.getByText("Provider credential is required")).toBeTruthy(),
);
expect(mockPost).not.toHaveBeenCalled();
});
});
@@ -1,175 +0,0 @@
// @vitest-environment jsdom
/**
* Regression tests for #2248 platform-managed provider credential suppression.
*
* Covers:
* - MOLECULE_LLM_USAGE_TOKEN is hidden when the selected provider is platform-managed
* - MOLECULE_LLM_USAGE_TOKEN is still shown for BYOK providers
* - No render churn from unstable array references (useMemo guard)
*/
import { describe, it, expect, vi, afterEach } from "vitest";
import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
import { MissingKeysModal } from "../MissingKeysModal";
import type { ModelSpec, ProviderChoice } from "@/lib/deploy-preflight";
vi.mock("@/lib/api", () => ({
api: { get: vi.fn(), put: vi.fn() },
}));
vi.mock("@/lib/deploy-preflight", async () => {
const actual = await vi.importActual<typeof import("@/lib/deploy-preflight")>(
"@/lib/deploy-preflight",
);
return actual;
});
const PLATFORM_MANAGED_MODELS: ModelSpec[] = [
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
];
const BYOK_MODELS: ModelSpec[] = [
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
];
function makeProviders(billingMode: "platform_managed" | "byok"): ProviderChoice[] {
const main = {
id: billingMode === "platform_managed" ? "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" : "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
label: billingMode === "platform_managed" ? "Platform Anthropic" : "BYOK Anthropic",
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billingMode,
};
// Need ≥2 providers so MissingKeysModal enters picker mode (pickerMode = providers.length > 1).
const dummy = {
id: "openai|OPENAI_API_KEY",
label: "OpenAI",
envVars: ["OPENAI_API_KEY"],
};
return [main, dummy];
}
describe("ProviderPickerModal — platform-managed suppression (#2248)", () => {
afterEach(() => cleanup());
it("hides MOLECULE_LLM_USAGE_TOKEN when provider is platform-managed", () => {
render(
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={makeProviders("platform_managed")}
models={PLATFORM_MANAGED_MODELS}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>,
);
// Only ANTHROPIC_API_KEY should be rendered; MOLECULE_LLM_USAGE_TOKEN suppressed
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
});
it("shows MOLECULE_LLM_USAGE_TOKEN when provider is BYOK", () => {
render(
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={makeProviders("byok")}
models={BYOK_MODELS}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>,
);
// Both keys visible for BYOK
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
});
it("does not churn renders when the modal is open and platform-managed", () => {
let renderCount = 0;
function RenderSpy({ children }: { children: React.ReactNode }) {
renderCount++;
return <>{children}</>;
}
render(
<RenderSpy>
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={makeProviders("platform_managed")}
models={PLATFORM_MANAGED_MODELS}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>
</RenderSpy>,
);
const countAfterInitial = renderCount;
// Wait a tick — if useEffect were looping, renderCount would climb.
// In jsdom without real timers there's no automatic re-render, so we
// just assert the count is stable immediately after the single
// commit required by the initial open state.
expect(renderCount).toBe(countAfterInitial);
expect(renderCount).toBeLessThanOrEqual(2); // StrictMode double-render ceiling
});
it("updates suppression correctly when switching from BYOK to platform-managed", async () => {
const providers: ProviderChoice[] = [
{
id: "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
label: "BYOK Anthropic",
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billingMode: "byok",
},
{
id: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
label: "Platform Anthropic",
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billingMode: "platform_managed",
},
{
id: "openai|OPENAI_API_KEY",
label: "OpenAI",
envVars: ["OPENAI_API_KEY"],
},
];
const models: ModelSpec[] = [
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
];
render(
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={providers}
models={models}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>,
);
// Default selection is providers[0] (BYOK) — both keys visible
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
// Switch to platform-managed provider
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
act(() => {
fireEvent.change(providerSelect, {
target: { value: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" },
});
});
// MOLECULE_LLM_USAGE_TOKEN should now be suppressed
await waitFor(() => {
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
});
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
});
});
+3 -16
View File
@@ -13,7 +13,6 @@ import {
buildProviderCatalog,
buildProviderCatalogFromRegistry,
findProviderForModel,
isPlatformManagedProvider,
type SelectorValue,
type ProviderEntry,
type RegistryProvider,
@@ -683,9 +682,6 @@ export function ConfigTab({ workspaceId }: Props) {
name: m.name,
// carry the derived provider so the selector buckets correctly
...(m.provider ? { provider: m.provider } : {}),
// carry required_env so wasTemplateDriven can detect
// template-driven env lists for registry-backed runtimes
...(m.required_env ? { required_env: m.required_env } : {}),
}))
: availableModels,
[registryBacked, selectedRuntime?.registryModels, availableModels],
@@ -1021,15 +1017,6 @@ export function ConfigTab({ workspaceId }: Props) {
// top-level model. required_env follows the selected
// provider's envVars when the existing required_env
// was template-driven (don't clobber user-typed envs).
//
// #2248: suppress provisioner-injected internal tokens
// (MOLECULE_LLM_USAGE_TOKEN) for platform-managed providers
// so the user can't clobber them.
const selectedEntry = providerCatalog.find((p) => p.id === next.providerId);
const isPlatformManaged = selectedEntry ? isPlatformManagedProvider(selectedEntry) : false;
const filteredEnvVars = isPlatformManaged
? next.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
: next.envVars;
setConfig((prev) => {
const v = next.model;
const prevModelId = prev.runtime_config?.model || prev.model || "";
@@ -1042,8 +1029,8 @@ export function ConfigTab({ workspaceId }: Props) {
prevRequired.every((e, i) => e === prevSpec.required_env![i])
: false);
const nextRequired =
wasTemplateDriven
? filteredEnvVars
next.envVars.length > 0 && wasTemplateDriven
? next.envVars
: prevRequired;
if (prev.runtime) {
return {
@@ -1051,7 +1038,7 @@ export function ConfigTab({ workspaceId }: Props) {
runtime_config: {
...prev.runtime_config,
model: v,
...(wasTemplateDriven
...(next.envVars.length > 0 && wasTemplateDriven
? { required_env: nextRequired }
: {}),
},
@@ -38,16 +38,8 @@ const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
const dataPersistenceLabel = (v: string): string =>
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
// Cloud/compute backend display name. The provider is chosen at create time and
// is NOT editable here (changing a workspace's cloud requires a recreate), so
// it renders as a read-only badge — but we must preserve it across Save (the
// compute payload is rebuilt below, and dropping it would wipe the column).
const cloudProviderLabel = (v: string | undefined): string =>
v === "gcp" ? "GCP" : v === "hetzner" ? "Hetzner" : "AWS";
export function ContainerConfigTab({ workspaceId, data }: Props) {
const runtime = data.runtime;
const provider = data.compute?.provider; // read-only; set at create time
const instanceType = data.compute?.instance_type;
const rootGB = data.compute?.volume?.root_gb;
const displayMode = data.compute?.display?.mode;
@@ -102,10 +94,6 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
: { mode: "none" },
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
// Preserve the create-time cloud provider — it's not editable here, but
// this PATCH rebuilds the whole compute object, so omitting it would
// wipe the persisted provider (and mislead the badge after a Save).
...(provider ? { provider } : {}),
};
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
@@ -138,18 +126,7 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
<div className="p-4 space-y-4">
<section className="rounded-lg border border-line/50 bg-surface-card/40 p-4">
<div className="mb-3 flex items-center justify-between gap-3">
<div className="flex items-center gap-2">
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
{/* Read-only cloud-provider badge which cloud this workspace's box
runs on (AWS/GCP/Hetzner). Defaults to AWS when unset (legacy
rows). Set at create time in the Create Workspace dialog. */}
<span
title="Cloud provider for this workspace's compute (set at create time)"
className="rounded-full border border-line/60 bg-surface-sunken px-2 py-0.5 font-mono text-[10px] uppercase tracking-wide text-ink-mid"
>
{cloudProviderLabel(provider)}
</span>
</div>
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
{data.needsRestart && <span className="text-[11px] text-warm">Restart required</span>}
</div>
+8 -94
View File
@@ -1,6 +1,6 @@
"use client";
import { useCallback, useEffect, useRef, useState } from "react";
import { useEffect, useRef, useState } from "react";
import { api } from "@/lib/api";
import type RFB from "@novnc/novnc";
@@ -33,11 +33,6 @@ export function DisplayTab({ workspaceId }: Props) {
const [controlBusy, setControlBusy] = useState(false);
const [sessionUrl, setSessionUrl] = useState<string | null>(null);
const requestGeneration = useRef(0);
// Freshest signed session URL (token bound to the lease's expires_at). The
// renewal timer keeps this current WITHOUT swapping the live stream's
// sessionUrl (which would needlessly reconnect the desktop); the stream uses
// it only when it has to reconnect after an unclean drop.
const latestSessionUrlRef = useRef<string | null>(null);
useEffect(() => {
const generation = requestGeneration.current + 1;
@@ -46,7 +41,6 @@ export function DisplayTab({ workspaceId }: Props) {
setStatus(null);
setControl(null);
setSessionUrl(null);
latestSessionUrlRef.current = null;
setError(null);
setControlError(null);
setControlBusy(false);
@@ -75,41 +69,6 @@ export function DisplayTab({ workspaceId }: Props) {
};
}, [workspaceId]);
// Acquire (or re-acquire) the display-control lease as the current holder.
// Re-acquiring extends the 300s server-side lock AND returns a freshly-signed
// session URL (token bound to the new expires_at). Used both to renew the
// lease on a timer and to mint a non-stale token for each reconnect — a
// cached URL can be past its ~300s expiry, which would make a reconnect 401.
const reacquireSession = useCallback(async (): Promise<string | null> => {
const generation = requestGeneration.current;
try {
const next = await api.post<DisplayControlStatus>(
`/workspaces/${workspaceId}/display/control/acquire`,
{ controller: "user", ttl_seconds: 300 },
);
if (requestGeneration.current !== generation) return null;
setControl(next);
if (next.session_url) latestSessionUrlRef.current = next.session_url;
return next.session_url ?? null;
} catch {
// Transient failure, or another holder took over: the live stream keeps
// running on its existing connection; a reconnect re-evaluates control.
return null;
}
}, [workspaceId]);
// Renew the lease while we hold it. The lock is a 300s lease with no
// server-side auto-renewal, so without this the control (and the session
// token) silently expire mid-session — the user appears "kicked" every ~5
// minutes. We renew well inside the TTL and do not touch the live stream.
useEffect(() => {
if (!sessionUrl) return;
const timer = setInterval(() => {
void reacquireSession();
}, 120_000);
return () => clearInterval(timer);
}, [sessionUrl, reacquireSession]);
const acquireControl = async () => {
const generation = requestGeneration.current;
const controlPath = `/workspaces/${workspaceId}/display/control`;
@@ -123,7 +82,6 @@ export function DisplayTab({ workspaceId }: Props) {
if (requestGeneration.current !== generation) return;
setControl(next);
setSessionUrl(next.session_url || null);
latestSessionUrlRef.current = next.session_url || null;
} catch (err) {
if (requestGeneration.current !== generation) return;
setControlError("Failed to take control");
@@ -150,7 +108,6 @@ export function DisplayTab({ workspaceId }: Props) {
if (requestGeneration.current !== generation) return;
setControl(next);
setSessionUrl(null);
latestSessionUrlRef.current = null;
} catch (err) {
if (requestGeneration.current !== generation) return;
setControlError("Failed to release control");
@@ -278,11 +235,7 @@ export function DisplayTab({ workspaceId }: Props) {
/>
</div>
{sessionUrl ? (
<DesktopStream
sessionUrl={sessionUrl}
latestSessionUrlRef={latestSessionUrlRef}
reacquireSession={reacquireSession}
/>
<DesktopStream sessionUrl={sessionUrl} />
) : (
<div className="flex flex-1 items-center justify-center p-8 text-center">
<div>
@@ -358,15 +311,7 @@ function DisplayControlBar({
);
}
function DesktopStream({
sessionUrl,
latestSessionUrlRef,
reacquireSession,
}: {
sessionUrl: string;
latestSessionUrlRef: { current: string | null };
reacquireSession: () => Promise<string | null>;
}) {
function DesktopStream({ sessionUrl }: { sessionUrl: string }) {
const containerRef = useRef<HTMLDivElement | null>(null);
const rfbRef = useRef<RFB | null>(null);
const [streamError, setStreamError] = useState<string | null>(null);
@@ -384,37 +329,20 @@ function DesktopStream({
clipboardTimer = setTimeout(() => setClipboardStatus(null), 2500);
};
let attempts = 0;
let retryTimer: ReturnType<typeof setTimeout> | null = null;
const maxAttempts = 10;
async function connect(reacquire = false) {
async function connect() {
setStreamError(null);
try {
// On a reconnect, mint a fresh lease + token first — the original token
// is only ~300s, so a cached URL can be expired and would 401. The
// initial connect already holds a fresh token from acquireControl.
if (reacquire) await reacquireSession();
const mod = await import("@novnc/novnc");
if (cancelled || !containerRef.current) return;
const stream = displayWebSocketConnection(latestSessionUrlRef.current || sessionUrl);
const stream = displayWebSocketConnection(sessionUrl);
rfb = new mod.default(containerRef.current, stream.url, {
wsProtocols: ["binary", `molecule-display-token.${stream.token}`],
});
rfbRef.current = rfb;
rfb.scaleViewport = true;
// Do NOT request a server-side resize: the workspace display runs a
// fixed Xorg modeline and x11vnc rejects SetDesktopSize ("Resize is
// administratively prohibited"), which spams the console on every
// (re)connect. scaleViewport already fits the fixed framebuffer to the
// container client-side, so we don't need the server to resize.
rfb.resizeSession = false;
rfb.resizeSession = true;
rfb.focusOnClick = true;
rfb.focus({ preventScroll: true });
rfb.addEventListener("connect", () => {
attempts = 0;
if (!cancelled) setStreamError(null);
});
rfb.addEventListener("clipboard", (event: Event) => {
const text = (event as CustomEvent<{ text?: string }>).detail?.text ?? "";
if (!text) return;
@@ -425,20 +353,7 @@ function DesktopStream({
});
rfb.addEventListener("disconnect", (event: Event) => {
const detail = (event as CustomEvent<{ clean?: boolean }>).detail;
rfbRef.current = null;
if (cancelled || detail?.clean) return;
// Auto-reconnect after an unclean drop (idle/network blip, brief
// agent hiccup); bounded backoff so a genuinely-dead session still
// surfaces an error instead of looping forever.
if (attempts < maxAttempts) {
attempts += 1;
setStreamError(`Reconnecting to desktop… (attempt ${attempts})`);
retryTimer = setTimeout(() => {
if (!cancelled) void connect(true);
}, Math.min(1000 * attempts, 5000));
} else {
setStreamError("Desktop stream disconnected.");
}
if (!cancelled && !detail?.clean) setStreamError("Desktop stream disconnected.");
});
} catch {
if (!cancelled) setStreamError("Desktop stream could not be opened.");
@@ -448,12 +363,11 @@ function DesktopStream({
connect();
return () => {
cancelled = true;
if (retryTimer) clearTimeout(retryTimer);
if (clipboardTimer) clearTimeout(clipboardTimer);
rfbRef.current = null;
rfb?.disconnect();
};
}, [sessionUrl, reacquireSession, latestSessionUrlRef]);
}, [sessionUrl]);
useEffect(() => {
const onPaste = (event: ClipboardEvent) => {
@@ -1,229 +0,0 @@
// @vitest-environment jsdom
//
// Regression tests for #2248 — platform-managed provider credential suppression
// in ConfigTab.
//
// Covers:
// - required_env is cleared to [] when switching to a platform-managed provider
// whose only declared env var is MOLECULE_LLM_USAGE_TOKEN (single-token case).
// - required_env preserves non-internal tokens for BYOK providers.
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
import React from "react";
afterEach(cleanup);
const apiGet = vi.fn();
const apiPatch = vi.fn();
const apiPut = vi.fn();
vi.mock("@/lib/api", () => ({
api: {
get: (path: string) => apiGet(path),
patch: (path: string, body: unknown) => apiPatch(path, body),
put: (path: string, body: unknown) => apiPut(path, body),
post: vi.fn(),
del: vi.fn(),
},
}));
vi.mock("@/store/canvas", () => ({
useCanvasStore: Object.assign(
(selector: (s: unknown) => unknown) =>
selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
),
}));
vi.mock("../AgentCardSection", () => ({
AgentCardSection: () => <div data-testid="agent-card-stub" />,
}));
import { ConfigTab } from "../ConfigTab";
function wireApi(opts: {
workspaceRuntime?: string;
workspaceModel?: string;
configYamlContent?: string | null;
templates?: Array<{
id: string;
name?: string;
runtime?: string;
models?: unknown[];
registry_backed?: boolean;
registry_providers?: unknown[];
registry_models?: unknown[];
}>;
}) {
apiGet.mockImplementation((path: string) => {
if (path === `/workspaces/ws-test`) {
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
}
if (path === `/workspaces/ws-test/model`) {
return Promise.resolve({ model: opts.workspaceModel ?? "" });
}
if (path === `/workspaces/ws-test/files/config.yaml`) {
if (opts.configYamlContent === null) {
return Promise.reject(new Error("not found"));
}
return Promise.resolve({ content: opts.configYamlContent ?? "" });
}
if (path === "/templates") {
return Promise.resolve(opts.templates ?? []);
}
return Promise.reject(new Error(`unmocked api.get: ${path}`));
});
}
beforeEach(() => {
apiGet.mockReset();
apiPatch.mockReset();
apiPut.mockReset();
});
describe("ConfigTab — platform-managed credential suppression (#2248)", () => {
it("clears required_env to [] when switching to a single-token platform-managed provider", async () => {
// Setup: workspace currently has a BYOK provider selected with both keys.
// The user switches to a platform-managed provider whose ONLY auth_env
// is MOLECULE_LLM_USAGE_TOKEN. After filtering, envVars becomes [];
// wasTemplateDriven must still overwrite required_env with [] so the
// old MOLECULE_LLM_USAGE_TOKEN requirement does not linger.
wireApi({
workspaceRuntime: "claude-code",
workspaceModel: "byok-sonnet",
configYamlContent: [
"runtime: claude-code",
"runtime_config:",
" model: byok-sonnet",
" required_env:",
" - ANTHROPIC_API_KEY",
" - MOLECULE_LLM_USAGE_TOKEN",
].join("\n"),
templates: [
{
id: "t-claude-code",
name: "Claude Code",
runtime: "claude-code",
models: [],
registry_backed: true,
registry_providers: [
{
name: "anthropic",
display_name: "BYOK Anthropic",
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billing_mode: "byok",
},
{
name: "platform",
display_name: "Platform Anthropic",
auth_env: ["MOLECULE_LLM_USAGE_TOKEN"],
billing_mode: "platform_managed",
},
],
registry_models: [
{ id: "byok-sonnet", provider: "anthropic", billing_mode: "byok", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
{ id: "platform-sonnet", provider: "platform", billing_mode: "platform_managed", required_env: ["MOLECULE_LLM_USAGE_TOKEN"] },
],
},
],
});
apiPut.mockResolvedValue({});
apiPatch.mockResolvedValue({});
render(<ConfigTab workspaceId="ws-test" />);
// Wait for the provider dropdown to populate.
const providerSelect = (await waitFor(() =>
screen.getByTestId("provider-select"),
)) as HTMLSelectElement;
// Switch from BYOK to platform-managed provider.
const platformOption = Array.from(providerSelect.options).find((o) =>
o.text.includes("Platform"),
);
expect(platformOption).toBeTruthy();
fireEvent.change(providerSelect, { target: { value: platformOption!.value } });
// Save & Restart.
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
await waitFor(() => {
expect(apiPut).toHaveBeenCalledWith(
"/workspaces/ws-test/files/config.yaml",
expect.objectContaining({
content: expect.not.stringContaining("ANTHROPIC_API_KEY"),
}),
);
});
// Verify the specific put call no longer carries the suppressed token.
const putCall = apiPut.mock.calls.find(
([path]) => path === "/workspaces/ws-test/files/config.yaml",
);
expect(putCall?.[1].content).not.toContain("MOLECULE_LLM_USAGE_TOKEN");
});
it("preserves non-internal tokens for BYOK providers", async () => {
wireApi({
workspaceRuntime: "claude-code",
workspaceModel: "byok-sonnet",
configYamlContent: [
"runtime: claude-code",
"runtime_config:",
" model: byok-sonnet",
" required_env:",
" - ANTHROPIC_API_KEY",
" - MOLECULE_LLM_USAGE_TOKEN",
].join("\n"),
templates: [
{
id: "t-claude-code",
name: "Claude Code",
runtime: "claude-code",
models: [],
registry_backed: true,
registry_providers: [
{
name: "anthropic",
display_name: "BYOK Anthropic",
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billing_mode: "byok",
},
],
registry_models: [
{ id: "byok-sonnet", provider: "anthropic", billing_mode: "byok" },
],
},
],
});
apiPut.mockResolvedValue({});
apiPatch.mockResolvedValue({});
render(<ConfigTab workspaceId="ws-test" />);
// Wait for load.
await waitFor(() =>
screen.getByRole("button", { name: /save & restart/i }),
);
// Click Save without changing provider — BYOK should keep both keys.
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
await waitFor(() => {
expect(apiPut).toHaveBeenCalledWith(
"/workspaces/ws-test/files/config.yaml",
expect.objectContaining({
content: expect.stringContaining("required_env:"),
}),
);
});
const putCall = apiPut.mock.calls.find(
([path]) => path === "/workspaces/ws-test/files/config.yaml",
);
expect(putCall?.[1].content).toContain("ANTHROPIC_API_KEY");
expect(putCall?.[1].content).toContain("MOLECULE_LLM_USAGE_TOKEN");
});
});

Some files were not shown because too many files have changed in this diff Show More