Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b948784cda | |||
| 9b911d803d | |||
| 801e53a52e |
@@ -31,7 +31,7 @@
|
||||
#
|
||||
# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
|
||||
# JSON variable is not set. Declared in the workflow YAML rather than
|
||||
# fetched from /branch_protections (which needs admin scope —
|
||||
# fetched from /branch_protections (which needs admin scope — sop-tier-bot
|
||||
# has read-only). Trade dynamism for simplicity: when the required-check
|
||||
# set changes, update both branch protection AND this env. Keeping them
|
||||
# in sync is less complexity than granting the audit bot admin perms on
|
||||
@@ -54,57 +54,32 @@ API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
|
||||
# 1. Fetch the PR. If not merged, no-op.
|
||||
# Fail-closed: verify HTTP 200 before parsing. A 401/403/404 means the token
|
||||
# is invalid or the PR is inaccessible — we must NOT silently treat that as
|
||||
# "not merged" and skip the audit.
|
||||
PR_TMP=$(mktemp)
|
||||
PR_HTTP=$(curl -sS -o "$PR_TMP" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
PR=$(cat "$PR_TMP")
|
||||
rm -f "$PR_TMP"
|
||||
if [ "$PR_HTTP" != "200" ]; then
|
||||
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${PR_HTTP} — cannot evaluate merge state."
|
||||
exit 1
|
||||
fi
|
||||
# FAIL-CLOSED: a 200 response with a missing/malformed `merged` field must
|
||||
# NOT be treated as "not merged" (that would silently skip the audit).
|
||||
# We verify both presence AND correct type for every field we consume.
|
||||
PR_SCHEMA_OK=$(echo "$PR" | jq -r '
|
||||
(.merged | type == "boolean") and
|
||||
(.merge_commit_sha | type == "string") and
|
||||
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
|
||||
(.base | type == "object") and (.base.ref | type == "string") and
|
||||
(.head | type == "object") and (.head.sha | type == "string")
|
||||
')
|
||||
if [ "$PR_SCHEMA_OK" != "true" ]; then
|
||||
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP 200 but one or more required fields are missing, null, or of wrong type — cannot evaluate force-merge."
|
||||
exit 1
|
||||
fi
|
||||
MERGED=$(echo "$PR" | jq -r '.merged')
|
||||
PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
MERGED=$(echo "$PR" | jq -r '.merged // false')
|
||||
if [ "$MERGED" != "true" ]; then
|
||||
echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha')
|
||||
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login')
|
||||
# NOTE: no || true — with set -euo pipefail, jq parse failures (e.g. field
|
||||
# missing from API response) propagate as hard errors. Use jq's // operator
|
||||
# for graceful defaults instead of bash || true guards. This was re-added by
|
||||
# 8c343e3a ("fix(gitea): add || true guards to jq pipelines") — reverted
|
||||
# here because the guards mask silent failures that hide malformed API responses.
|
||||
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
|
||||
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
|
||||
TITLE=$(echo "$PR" | jq -r '.title // ""')
|
||||
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref')
|
||||
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha')
|
||||
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
|
||||
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
|
||||
|
||||
if [ -z "$MERGE_SHA" ]; then
|
||||
echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Required status checks — branch-aware JSON dict takes precedence.
|
||||
if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
|
||||
# FAIL-CLOSED: if REQUIRED_CHECKS_JSON is set, the branch entry must exist
|
||||
# and be an array. A missing branch or non-array value means the config is
|
||||
# malformed or drifted — we must NOT silently treat it as "no checks".
|
||||
_RC_JSON_OK=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '
|
||||
has($branch) and (.[$branch] | type == "array")
|
||||
')
|
||||
if [ "$_RC_JSON_OK" != "true" ]; then
|
||||
echo "::error::REQUIRED_CHECKS_JSON missing or non-array entry for branch '$BASE_BRANCH' — cannot evaluate required checks."
|
||||
exit 1
|
||||
fi
|
||||
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] | .[]')
|
||||
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
|
||||
else
|
||||
REQUIRED="$REQUIRED_CHECKS"
|
||||
fi
|
||||
@@ -116,28 +91,12 @@ fi
|
||||
# 3. Status-check state at the PR HEAD (where checks ran). The merge
|
||||
# commit doesn't get its own checks; we evaluate the PR's last
|
||||
# commit, which is what branch protection compared against.
|
||||
# Fail-closed: verify HTTP 200. A 401/403/404 means the status is
|
||||
# unreadable — we must NOT treat that as "no statuses" and skip checks.
|
||||
STATUS_TMP=$(mktemp)
|
||||
STATUS_HTTP=$(curl -sS -o "$STATUS_TMP" -w '%{http_code}' -H "$AUTH" \
|
||||
STATUS=$(curl -sS -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
|
||||
STATUS=$(cat "$STATUS_TMP")
|
||||
rm -f "$STATUS_TMP"
|
||||
if [ "$STATUS_HTTP" != "200" ]; then
|
||||
echo "::error::GET /commits/${HEAD_SHA}/status returned HTTP ${STATUS_HTTP} — cannot evaluate required checks."
|
||||
exit 1
|
||||
fi
|
||||
# FAIL-CLOSED: a 200 status response missing the 'statuses' array, or with
|
||||
# 'statuses' set to a non-array type (null/string/object), must NOT be treated
|
||||
# as "no checks" — that would silently declare all checks green.
|
||||
if ! echo "$STATUS" | jq -e '(.statuses | type) == "array"' >/dev/null; then
|
||||
echo "::error::GET /commits/${HEAD_SHA}/status returned HTTP 200 but 'statuses' is missing or not an array — cannot evaluate required checks."
|
||||
exit 1
|
||||
fi
|
||||
declare -A CHECK_STATE
|
||||
while IFS=$'\t' read -r ctx state; do
|
||||
[ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
|
||||
done < <(echo "$STATUS" | jq -r '.statuses | .[] | "\(.context)\t\(.status)"')
|
||||
done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
|
||||
|
||||
# 4. For each required check, was it green at merge? YAML block scalars
|
||||
# (`|`) leave a trailing newline; skip blank/whitespace-only lines.
|
||||
|
||||
@@ -317,33 +317,7 @@ def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
# Fail-closed validation: every entry must be a non-empty string.
|
||||
# Reject null, int, dict, or empty/whitespace strings silently —
|
||||
# they indicate a malformed manifest that drift-detect must not
|
||||
# normalize away (that would hide config errors).
|
||||
validated: set[str] = set()
|
||||
for idx, item in enumerate(branch_checks):
|
||||
if not isinstance(item, str):
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
|
||||
f"{type(item).__name__} (value={item!r}), expected str\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
stripped = item.strip()
|
||||
if not stripped:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
|
||||
f"empty/whitespace string\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
if stripped in validated:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] contains "
|
||||
f"duplicate context '{stripped}' at index {idx}\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
validated.add(stripped)
|
||||
return validated
|
||||
return {str(item).strip() for item in branch_checks if str(item).strip()}
|
||||
|
||||
# Legacy variant fallback.
|
||||
if found_legacy:
|
||||
@@ -578,34 +552,23 @@ def find_open_issue(title: str) -> dict | None:
|
||||
hourly; failing one cycle loudly is strictly better than silently
|
||||
duplicating.
|
||||
|
||||
Paginates through all open issues (limit=50 per page) until the
|
||||
title is found or the result set is exhausted. Previously only one
|
||||
page was fetched, causing duplicate [ci-drift] issues when the
|
||||
existing tracking issue fell beyond page 1.
|
||||
Gitea issue search returns at most page=50 per page; one page is
|
||||
enough as long as `[ci-drift]` issues are a tiny minority. (See
|
||||
follow-up issue for Link-header pagination.)
|
||||
"""
|
||||
page = 1
|
||||
while True:
|
||||
_, results = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={
|
||||
"state": "open",
|
||||
"type": "issues",
|
||||
"limit": "50",
|
||||
"page": str(page),
|
||||
},
|
||||
_, results = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={"state": "open", "type": "issues", "limit": "50"},
|
||||
)
|
||||
if not isinstance(results, list):
|
||||
raise ApiError(
|
||||
f"issue search returned non-list body (got {type(results).__name__})"
|
||||
)
|
||||
if not isinstance(results, list):
|
||||
raise ApiError(
|
||||
f"issue search returned non-list body (got {type(results).__name__})"
|
||||
)
|
||||
for issue in results:
|
||||
if issue.get("title") == title:
|
||||
return issue
|
||||
# Fewer than limit results means last page reached.
|
||||
if len(results) < 50:
|
||||
return None
|
||||
page += 1
|
||||
for issue in results:
|
||||
if issue.get("title") == title:
|
||||
return issue
|
||||
return None
|
||||
|
||||
|
||||
def render_body(branch: str, findings: list[str], debug: dict) -> str:
|
||||
|
||||
@@ -9,43 +9,27 @@ queue. This script provides the missing serialized policy in user space:
|
||||
candidate (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals,
|
||||
or red required CI) is SKIPPED so it cannot head-of-line block newer ready
|
||||
PRs; the scan continues to the next candidate.
|
||||
2. Refuse to act unless main's BP-required contexts are green. This is also
|
||||
the serialized backstop for direct-merge (see below): after a direct merge,
|
||||
main re-runs push CI and this gate PAUSES the queue if main goes red, so no
|
||||
merge piles onto an unverified/red main (issue #2358).
|
||||
2. Refuse to act unless main's BP-required contexts are green.
|
||||
3. Refuse fork PRs; the queue may only mutate same-repo branches.
|
||||
4. DIRECT-MERGE when conflict-free (issue #2358). When Gitea reports the PR
|
||||
conflict-free (mergeable is True) and the merge bar below is met, MERGE IT
|
||||
DIRECTLY — even if its head does not contain current main. We do NOT call
|
||||
/pulls/{n}/update first: branch protection does not require strict
|
||||
up-to-date, so behind-main conflict-free PRs merge cleanly, and calling
|
||||
/update would trigger Gitea dismiss_stale_approvals (dismissing the genuine
|
||||
approvals and forcing a re-review every tick — the rebase-churn bottleneck).
|
||||
The /update path is used ONLY when the PR is DEFINITIVELY not mergeable
|
||||
(mergeable is literal False) AND its head lacks current main — refreshing the
|
||||
branch may resolve a behind-main non-conflict; a real conflict returns HTTP
|
||||
409 and the PR is HELD per #2352. mergeable=None/missing (Gitea STILL
|
||||
COMPUTING conflict state) is a distinct fail-closed WAIT: never merged AND
|
||||
never /update'd — calling /update during the compute window would dismiss the
|
||||
PR's genuine approvals (dismiss_stale_approvals) and re-introduce the exact
|
||||
rebase-churn this queue eliminates. None is re-checked next tick.
|
||||
4. If the PR branch does not contain current main, call Gitea's
|
||||
/pulls/{n}/update endpoint and stop. CI must rerun on the updated head.
|
||||
5. Merge ONLY when, on the PR's CURRENT head sha:
|
||||
- >= REQUIRED_APPROVALS distinct GENUINE official APPROVED reviews from
|
||||
the recognised reviewer set (not stale, not dismissed, commit_id ==
|
||||
current head), AND
|
||||
- no open official REQUEST_CHANGES on the current head, AND
|
||||
- every BP-required status context is green, AND
|
||||
- the PR is mergeable (Gitea reports it conflict-free).
|
||||
- the PR is mergeable.
|
||||
|
||||
Authoritative gates (fail-closed):
|
||||
- The REQUIRED status contexts come from BRANCH PROTECTION
|
||||
(`status_check_contexts`) PLUS the hardcoded governance checks
|
||||
(qa-review, security-review, sop-checklist). If branch protection
|
||||
cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
(`status_check_contexts`), not a hand-maintained env list. If branch
|
||||
protection cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (qa-review, security-review, sop-tier, sop-checklist
|
||||
when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
continue-on-error job) MUST NOT block. They are reported, never gating.
|
||||
- `force_merge=true` is used ONLY when the merge is blocked *solely* by
|
||||
missing-but-non-required advisory contexts (required are green + genuine
|
||||
missing-but-non-required governance contexts (required are green + genuine
|
||||
approvals present). It is NEVER used to bypass a failing REQUIRED context
|
||||
or missing approvals.
|
||||
|
||||
@@ -144,15 +128,6 @@ OPT_OUT_LABELS = {
|
||||
).split(",")
|
||||
if name.strip()
|
||||
} | ({HOLD_LABEL} if HOLD_LABEL else set())
|
||||
# Governance checks that are ALWAYS required for every PR, regardless of
|
||||
# branch-protection configuration. These are the uniform-gate checks that
|
||||
# must pass before any PR can merge (SOP tier removal makes them mandatory
|
||||
# for all PRs, not just tier:medium/tier:high).
|
||||
GOVERNANCE_REQUIRED_CONTEXTS = [
|
||||
"qa-review / approved (pull_request)",
|
||||
"security-review / approved (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
]
|
||||
REQUIRED_CONTEXTS_RAW = _env(
|
||||
"REQUIRED_CONTEXTS",
|
||||
default=(
|
||||
@@ -346,15 +321,41 @@ def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
|
||||
return latest
|
||||
|
||||
|
||||
def _is_tier_low_pending_ok(
|
||||
latest_statuses: dict[str, dict],
|
||||
context: str,
|
||||
pr_labels: set[str],
|
||||
) -> bool:
|
||||
"""Return True if tier:low PR can tolerate sop-checklist pending state.
|
||||
|
||||
GENERIC PENDING-AS-GREEN REMOVED (Researcher + CR2 RC on #2368):
|
||||
The prior soft-fail accepted ANY pending sop-checklist for tier:low,
|
||||
which allowed required checks to pass without genuine verification.
|
||||
Pending required sop-checklist must now always HOLD and appear in
|
||||
missing_or_bad. This function is retained as a policy hook but
|
||||
currently always returns False so pending never counts green.
|
||||
|
||||
If a positively identifiable genuine soft-fail state is defined in
|
||||
future (e.g., a specific check-run conclusion), implement it here
|
||||
with strict positive identification — never default to pass.
|
||||
"""
|
||||
return False
|
||||
|
||||
|
||||
def required_contexts_green(
|
||||
latest_statuses: dict[str, dict],
|
||||
contexts: list[str],
|
||||
pr_labels: set[str] | None = None,
|
||||
) -> tuple[bool, list[str]]:
|
||||
missing_or_bad: list[str] = []
|
||||
for context in contexts:
|
||||
status = latest_statuses.get(context)
|
||||
state = status_state(status or {})
|
||||
if state != "success":
|
||||
if pr_labels and _is_tier_low_pending_ok(
|
||||
latest_statuses, context, pr_labels
|
||||
):
|
||||
continue # tier:low soft-fail: accept pending sop-checklist
|
||||
missing_or_bad.append(f"{context}={state or 'missing'}")
|
||||
return not missing_or_bad, missing_or_bad
|
||||
|
||||
@@ -621,32 +622,29 @@ def evaluate_merge_readiness(
|
||||
approvers: set[str],
|
||||
request_changes: list[str],
|
||||
pr_has_current_base: bool,
|
||||
mergeable: bool | None,
|
||||
mergeable: bool,
|
||||
pr_labels: set[str] | None = None,
|
||||
) -> MergeDecision:
|
||||
# 1) Main's push-required contexts must be green. Combined state can be
|
||||
# "failure" due to non-blocking jobs (continue-on-error: true) that do
|
||||
# not gate merges, so check the explicit required set, not combined.
|
||||
#
|
||||
# This main-green gate is ALSO the serialized backstop that makes the
|
||||
# direct-merge (no update) path safe (issue #2358): after a direct merge
|
||||
# of a behind-main PR, main re-runs its push CI; if a semantic main-break
|
||||
# slips through (PR green standalone but broken when combined with newer
|
||||
# main), main's required contexts go red and this gate PAUSES the queue —
|
||||
# no further merge piles onto an unverified/red main until it is green.
|
||||
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
|
||||
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
|
||||
if not main_ok:
|
||||
return MergeDecision(False, "pause", "main required contexts not green: " + ", ".join(main_bad))
|
||||
|
||||
# 2) No open official REQUEST_CHANGES on the current head.
|
||||
# 2) PR head must contain current main.
|
||||
if not pr_has_current_base:
|
||||
return MergeDecision(False, "update", "PR head does not contain current main")
|
||||
|
||||
# 3) No open official REQUEST_CHANGES on the current head.
|
||||
if request_changes:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
"open REQUEST_CHANGES on current head from: " + ", ".join(sorted(request_changes)),
|
||||
)
|
||||
|
||||
# 3) Enough distinct genuine official approvals on the current head.
|
||||
# 4) Enough distinct genuine official approvals on the current head.
|
||||
if len(approvers) < required_approvals:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
@@ -655,63 +653,26 @@ def evaluate_merge_readiness(
|
||||
f"need {required_approvals}",
|
||||
)
|
||||
|
||||
# 4) Every REQUIRED status context must be green. This includes both
|
||||
# branch-protection-required contexts AND the hardcoded governance checks
|
||||
# (qa-review, security-review, sop-checklist). NON-required reds (E2E
|
||||
# Chat, Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# 5) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
|
||||
# the authoritative status gate — NON-required reds (qa-review,
|
||||
# security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
|
||||
# Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# consulted here and must not block.
|
||||
latest = latest_statuses_by_context(pr_status.get("statuses") or [])
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts)
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
|
||||
if not ok:
|
||||
return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
|
||||
|
||||
# 5) DIRECT-MERGE when conflict-free (issue #2358 — throughput fix).
|
||||
# If Gitea reports the PR conflict-free (mergeable is True), MERGE IT
|
||||
# DIRECTLY even if its head does not yet contain current main. Branch
|
||||
# protection does NOT require strict up-to-date, so a behind-main but
|
||||
# conflict-free PR merges cleanly. We deliberately do NOT call
|
||||
# /pulls/{n}/update first: update triggers Gitea dismiss_stale_approvals,
|
||||
# which would dismiss the PR's genuine approvals and force a full
|
||||
# re-review every tick — the rebase-churn bottleneck that collapsed
|
||||
# throughput to ~0/hr with dozens of mergeable PRs open.
|
||||
#
|
||||
# The merge bar is UNCHANGED: we only reach here with main green +
|
||||
# >= required genuine approvals on the current head + no open
|
||||
# REQUEST_CHANGES + every BP-required context green. The trade-off is
|
||||
# that the PR's CI ran on a possibly-behind base, so a SEMANTIC main-break
|
||||
# is caught by POST-merge main CI (step 1's pause backstop) rather than
|
||||
# pre-merge. force_merge is used ONLY for missing-but-non-required
|
||||
# governance reds (required are green + approvals genuine), never to
|
||||
# bypass a failing required context or an approval shortfall.
|
||||
if mergeable is True:
|
||||
force = _non_required_red_present(latest, required_contexts)
|
||||
return MergeDecision(True, "merge", "ready", force=force)
|
||||
# 6) Gitea must consider the PR mergeable (no conflicts).
|
||||
if not mergeable:
|
||||
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
|
||||
|
||||
# 6) NOT (yet) mergeable. TRI-STATE, fail-closed — never merge on an unknown.
|
||||
# We MUST distinguish "still computing" (None/missing) from a "definitive
|
||||
# conflict" (False); collapsing them would route a behind-main but
|
||||
# STILL-COMPUTING PR into the /update path, whose dismiss_stale_approvals
|
||||
# is the rebase-churn this change eliminates.
|
||||
#
|
||||
# mergeable is None → Gitea has NOT finished computing conflict state.
|
||||
# WAIT: do nothing this tick — never /update (would dismiss genuine
|
||||
# approvals during the compute window → churn), never merge. Re-check next
|
||||
# tick once Gitea reports a decisive True/False.
|
||||
if mergeable is None:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
"PR mergeability is still being computed (mergeable=None) — waiting",
|
||||
)
|
||||
|
||||
# mergeable is False → DEFINITIVE not-mergeable. If the head also does not
|
||||
# contain current main, try the /update path to refresh the branch (this
|
||||
# may resolve a behind-main non-conflict; a real conflict returns HTTP 409
|
||||
# and process_once HOLDs the PR per #2352). If the head already contains
|
||||
# current main yet Gitea still reports not-mergeable, there is nothing the
|
||||
# queue can do (genuine conflict against current main) — WAIT.
|
||||
if not pr_has_current_base:
|
||||
return MergeDecision(False, "update", "PR not mergeable and head does not contain current main")
|
||||
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
|
||||
# Ready. Use force_merge ONLY if the merge would otherwise be blocked by
|
||||
# missing-but-non-required governance contexts. Required are green and
|
||||
# approvals are genuine, so force only bypasses non-required reds — never a
|
||||
# failing required context or missing approval.
|
||||
force = _non_required_red_present(latest, required_contexts)
|
||||
return MergeDecision(True, "merge", "ready", force=force)
|
||||
|
||||
|
||||
def get_branch_head(branch: str) -> str:
|
||||
@@ -928,9 +889,7 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
f"unavailable (fail-closed): {exc}\n"
|
||||
)
|
||||
return 0
|
||||
# Uniform gate: governance checks are ALWAYS required, even if branch
|
||||
# protection does not enumerate them. Deduplicate against BP list.
|
||||
contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
|
||||
contexts = bp.required_contexts
|
||||
required_approvals = bp.required_approvals
|
||||
print(
|
||||
f"::notice::queue policy from branch protection: "
|
||||
@@ -1117,20 +1076,12 @@ def _evaluate_candidate(
|
||||
# never treated as green).
|
||||
pr_status = get_combined_status(head_sha)
|
||||
pr_labels = label_names(pr)
|
||||
# FAIL-CLOSED, TRI-STATE: Gitea returns mergeable=None (or omits the field)
|
||||
# while it is still COMPUTING conflict state, mergeable=False for a definitive
|
||||
# conflict, and mergeable=True only when it has proven the PR conflict-free.
|
||||
# We preserve all THREE states (do NOT collapse None/missing into False):
|
||||
# - True → direct-merge eligible (step 5).
|
||||
# - None / missing → still computing → WAIT (never merge, never update,
|
||||
# never dismiss approvals); re-check next tick.
|
||||
# - False → definitive conflict → the update/hold path (step 6).
|
||||
# Collapsing None→False would route a behind-main but STILL-COMPUTING PR into
|
||||
# the /update path, which triggers dismiss_stale_approvals — the exact
|
||||
# rebase-churn this change eliminates. Normalize only to the literal True /
|
||||
# False / None set (some Gitea versions omit the key entirely → None).
|
||||
raw_mergeable = pr.get("mergeable")
|
||||
mergeable: bool | None = raw_mergeable if isinstance(raw_mergeable, bool) else None
|
||||
# FAIL-CLOSED: Gitea returns mergeable=None (or omits the field) while it is
|
||||
# still COMPUTING conflict state. Only the literal True is decisive proof the
|
||||
# PR is conflict-free; None and False both mean "not (yet) mergeable". We must
|
||||
# NOT autonomously merge on an unknown — treat anything but True as not-yet-
|
||||
# mergeable so evaluate_merge_readiness returns a "wait" decision.
|
||||
mergeable = pr.get("mergeable") is True
|
||||
|
||||
reviews = get_pull_reviews(pr_number)
|
||||
approvers, request_changes = genuine_approvals(
|
||||
@@ -1151,122 +1102,12 @@ def _evaluate_candidate(
|
||||
return decision, ctx
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class ReadinessEntry:
|
||||
"""One candidate's readiness state."""
|
||||
|
||||
pr_number: int
|
||||
decision: MergeDecision | None
|
||||
reason: str
|
||||
|
||||
|
||||
def enumerate_readiness(*, dry_run: bool = False) -> list[ReadinessEntry]:
|
||||
"""Evaluate ALL candidates and return their readiness states.
|
||||
|
||||
Fail-closed: if branch protection cannot be fetched, raise
|
||||
BranchProtectionUnavailable (caller must handle). Unlike
|
||||
process_once, this does NOT stop at the first actionable candidate;
|
||||
it evaluates every eligible PR and returns the full list so a
|
||||
post-batch summary can be printed.
|
||||
"""
|
||||
bp = get_branch_protection(WATCH_BRANCH)
|
||||
contexts = bp.required_contexts
|
||||
required_approvals = bp.required_approvals
|
||||
|
||||
main_sha = get_branch_head(WATCH_BRANCH)
|
||||
main_status = get_combined_status(main_sha)
|
||||
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
|
||||
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
|
||||
|
||||
candidates = choose_candidate_issues(
|
||||
list_candidate_issues(auto_discover=AUTO_DISCOVER),
|
||||
queue_label=QUEUE_LABEL,
|
||||
opt_out_labels=OPT_OUT_LABELS,
|
||||
auto_discover=AUTO_DISCOVER,
|
||||
)
|
||||
|
||||
entries: list[ReadinessEntry] = []
|
||||
for issue in candidates:
|
||||
pr_number = int(issue["number"])
|
||||
try:
|
||||
decision, ctx = _evaluate_candidate(
|
||||
issue,
|
||||
main_sha=main_sha,
|
||||
main_status=main_status,
|
||||
required_contexts=contexts,
|
||||
required_approvals=required_approvals,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
except ApiError as exc:
|
||||
# Fail-closed per candidate: an unreadable PR is recorded as
|
||||
# unverifiable, not skipped silently.
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=None,
|
||||
reason=f"unverifiable (API error: {exc})",
|
||||
)
|
||||
)
|
||||
continue
|
||||
if decision is None:
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=None,
|
||||
reason="not merge-eligible (opt-out/draft/fork/wrong-base)",
|
||||
)
|
||||
)
|
||||
continue
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=decision,
|
||||
reason=decision.reason,
|
||||
)
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def print_post_batch_summary(entries: list[ReadinessEntry]) -> None:
|
||||
"""Print a structured summary of all candidates' readiness.
|
||||
|
||||
Emits ::notice:: lines for machine parsing and a human-readable
|
||||
block for operator visibility.
|
||||
"""
|
||||
ready = [e for e in entries if e.decision and e.decision.ready]
|
||||
waiting = [e for e in entries if e.decision and not e.decision.ready]
|
||||
ineligible = [e for e in entries if e.decision is None]
|
||||
|
||||
print("::group::merge-queue readiness summary")
|
||||
print(f"total_candidates={len(entries)}")
|
||||
print(f"ready={len(ready)}")
|
||||
print(f"waiting={len(waiting)}")
|
||||
print(f"ineligible/unverifiable={len(ineligible)}")
|
||||
print("")
|
||||
for e in entries:
|
||||
state = "ready" if e.decision and e.decision.ready else (
|
||||
"waiting" if e.decision else "ineligible"
|
||||
)
|
||||
action = e.decision.action if e.decision else "n/a"
|
||||
print(f"PR #{e.pr_number}: state={state} action={action} reason={e.reason}")
|
||||
print("::endgroup::")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument(
|
||||
"--enumerate",
|
||||
action="store_true",
|
||||
help="Evaluate all candidates and print a readiness summary without merging.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
_require_runtime_env()
|
||||
try:
|
||||
if args.enumerate:
|
||||
entries = enumerate_readiness(dry_run=args.dry_run)
|
||||
print_post_batch_summary(entries)
|
||||
return 0
|
||||
return process_once(dry_run=args.dry_run)
|
||||
except ApiError as exc:
|
||||
# FAIL-CLOSED: API errors are not "transient success" — they mean
|
||||
|
||||
@@ -165,7 +165,7 @@ def api(
|
||||
# Format: "<workflow_name> / <job_name_or_key> (<event>)"
|
||||
# Examples observed on molecule-core/main:
|
||||
# "Secret scan / Scan diff for credential-shaped strings (pull_request)"
|
||||
# " / tier-check (pull_request)"
|
||||
# "sop-tier-check / tier-check (pull_request)"
|
||||
#
|
||||
# Split strategy: peel off the trailing ` (<event>)` first, then split
|
||||
# the leading `<workflow> / <rest>` on the FIRST ` / ` (workflow names
|
||||
|
||||
@@ -17,7 +17,7 @@ Rules (4 fatal + 1 fatal cross-file + 1 heuristic-warn):
|
||||
enumeration; task #81). Workflow registers, fires for 0 events.
|
||||
3. `name:` containing `/` — breaks the
|
||||
`<workflow> / <job> (<event>)` commit-status context convention;
|
||||
downstream parsers (sop-checklist, status-reaper) tokenize on `/`.
|
||||
downstream parsers (sop-tier-check, status-reaper) tokenize on `/`.
|
||||
4. `name:` collision across files — Gitea routes commit-status updates
|
||||
by `name` and behavior on collision is undefined (status-reaper
|
||||
rev1 fail-loud).
|
||||
@@ -150,7 +150,7 @@ def check_name_with_slash(filename: str, doc: Any) -> list[str]:
|
||||
f"::error file={filename}::Rule 3 (FATAL): workflow `name: "
|
||||
f"{name!r}` contains `/`. The commit-status context convention "
|
||||
f"is `<workflow> / <job> (<event>)`; embedding `/` in the "
|
||||
f"workflow name makes downstream parsers (sop-checklist, "
|
||||
f"workflow name makes downstream parsers (sop-tier-check, "
|
||||
f"status-reaper) tokenize ambiguously. Rename to use `-` or "
|
||||
f"` ` instead."
|
||||
)
|
||||
|
||||
@@ -49,7 +49,8 @@ Daily scheduled run + workflow_dispatch:
|
||||
4. If orphans exist:
|
||||
- File or PATCH a `[ci-bp-drift]` issue (idempotency contract:
|
||||
search for exact title prefix, edit existing if open).
|
||||
- Apply label `ci-bp-drift` (lookup ID per repo).
|
||||
- Apply labels `tier:high` + `ci-bp-drift` (lookup IDs per
|
||||
repo; per `feedback_tier_label_ids_are_per_repo`).
|
||||
- Exit 1.
|
||||
|
||||
5. If no orphans:
|
||||
@@ -81,7 +82,7 @@ Memory cross-links
|
||||
------------------
|
||||
- internal#350 (the RFC that specs this lint)
|
||||
- feedback_phantom_required_check_after_gitea_migration
|
||||
- feedback_label_ids_are_per_repo
|
||||
- feedback_tier_label_ids_are_per_repo
|
||||
- reference_post_suspension_pipeline
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -358,7 +359,7 @@ def file_or_update_issue(
|
||||
existing = h
|
||||
break
|
||||
|
||||
label_ids = _ensure_labels(repo, ["ci-bp-drift"])
|
||||
label_ids = _ensure_labels(repo, ["ci-bp-drift", "tier:high"])
|
||||
|
||||
if existing:
|
||||
api(
|
||||
|
||||
@@ -305,9 +305,9 @@ def validate_tracker(
|
||||
if status == "error":
|
||||
sys.stderr.write(
|
||||
f"::error::issue {slug}#{num} fetch errored — treating as "
|
||||
f"unverified, FAILING CLOSED (do not skip on outage).\n"
|
||||
f"unverified, skipping this check.\n"
|
||||
)
|
||||
return (False, f"{slug}#{num} fetch errored — cannot verify tracker")
|
||||
return (True, "fetch-error — skipped")
|
||||
|
||||
assert payload is not None
|
||||
state = payload.get("state", "")
|
||||
|
||||
@@ -50,7 +50,7 @@ runtime contract enforcement lives in `_require_runtime_env()`.
|
||||
|
||||
Run locally (dry-run, no API mutation):
|
||||
GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\
|
||||
WATCH_BRANCH=main RED_LABEL=ci-bp-drift \\
|
||||
WATCH_BRANCH=main RED_LABEL=tier:high \\
|
||||
python3 .gitea/scripts/main-red-watchdog.py --dry-run
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -81,7 +81,7 @@ GITEA_TOKEN = _env("GITEA_TOKEN")
|
||||
GITEA_HOST = _env("GITEA_HOST")
|
||||
REPO = _env("REPO")
|
||||
WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
|
||||
RED_LABEL = _env("RED_LABEL", default="ci-bp-drift")
|
||||
RED_LABEL = _env("RED_LABEL", default="tier:high")
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#
|
||||
# Flow:
|
||||
# 1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
|
||||
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, labels
|
||||
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, tier label
|
||||
# 3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
|
||||
# 4. For each checklist item:
|
||||
# a. Is the section marker present in PR body? (author answered)
|
||||
@@ -665,8 +665,8 @@ def load_config(path: str) -> dict[str, Any]:
|
||||
def _load_config_minimal(path: str) -> dict[str, Any]:
|
||||
"""Minimal YAML subset parser for our config shape.
|
||||
|
||||
Supports: top-level scalar:value, top-level map-of-map,
|
||||
top-level list of maps (items:), and within an
|
||||
Supports: top-level scalar:value, top-level map-of-map (e.g.
|
||||
tier_failure_mode), top-level list of maps (items:), and within an
|
||||
item map: scalars + lists of scalars. Does NOT support nested lists,
|
||||
YAML anchors, multi-doc, or flow style.
|
||||
"""
|
||||
@@ -835,7 +835,8 @@ def render_status(
|
||||
|
||||
state is "success" if every item has at least one valid ack
|
||||
(body section presence is informational only — peer-ack is the
|
||||
real gate).
|
||||
real gate). tier:low PRs receive state="success" (soft-fail — no
|
||||
acks required); the description carries "[info tier:low]" prefix.
|
||||
"""
|
||||
n = len(items)
|
||||
fully_acked = [
|
||||
@@ -862,16 +863,35 @@ def render_status(
|
||||
return state, " — ".join(desc_parts)
|
||||
|
||||
|
||||
def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
|
||||
"""Read tier label, return 'hard' or 'soft' per cfg.tier_failure_mode."""
|
||||
labels = pr.get("labels") or []
|
||||
tier_labels = [label.get("name", "") for label in labels if (label.get("name", "") or "").startswith("tier:")]
|
||||
mode_map = cfg.get("tier_failure_mode") or {}
|
||||
default_mode = cfg.get("default_mode", "hard")
|
||||
for tl in tier_labels:
|
||||
if tl in mode_map:
|
||||
return mode_map[tl]
|
||||
return default_mode
|
||||
|
||||
|
||||
def is_high_risk(pr: dict[str, Any], cfg: dict[str, Any]) -> bool:
|
||||
"""Return True when the PR is high-risk per RFC#450 Option C.
|
||||
|
||||
A PR is high-risk when it carries any label listed in cfg.high_risk_labels.
|
||||
A PR is high-risk when ANY of:
|
||||
- it carries the `tier:high` label (mechanically strictest tier), or
|
||||
- it carries any label listed in cfg.high_risk_labels.
|
||||
|
||||
High-risk PRs use `required_teams_high_risk` (when set on an item)
|
||||
instead of the default `required_teams`. Items without
|
||||
`required_teams_high_risk` are unaffected (the default applies).
|
||||
|
||||
Governance fix for internal#442 — closes the inconsistency between
|
||||
sop-tier-check (tier-aware) and sop-checklist (was tier-blind).
|
||||
"""
|
||||
label_set = {(label.get("name") or "") for label in (pr.get("labels") or [])}
|
||||
if "tier:high" in label_set:
|
||||
return True
|
||||
high_risk_labels = set(cfg.get("high_risk_labels") or [])
|
||||
return bool(label_set & high_risk_labels)
|
||||
|
||||
@@ -1149,6 +1169,13 @@ def main(argv: list[str] | None = None) -> int:
|
||||
body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}
|
||||
|
||||
state, description = render_status(items, ack_state, body_state)
|
||||
mode = get_tier_mode(pr, cfg)
|
||||
if mode == "soft":
|
||||
# tier:low: acks are informational only — post success so BP gate passes.
|
||||
# Description carries "[info tier:low]" prefix so reviewers know acks
|
||||
# were not required (vs a tier:medium+ PR that truly passed all acks).
|
||||
state = "success"
|
||||
description = f"[info tier:low] {description}"
|
||||
if volume_skipped:
|
||||
# Above the comment-cap — we may have a partial view. Soft-pend
|
||||
# so neither BP nor the author gets stuck; surface the cap so
|
||||
@@ -1162,7 +1189,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
# Diagnostics to job log.
|
||||
print(
|
||||
f"::notice::PR #{args.pr} author={author} head={head_sha[:7]} "
|
||||
f"risk_class={'high' if high_risk else 'default'}"
|
||||
f"mode={mode} risk_class={'high' if high_risk else 'default'}"
|
||||
)
|
||||
for it in items:
|
||||
slug = it["slug"]
|
||||
|
||||
Executable
+427
@@ -0,0 +1,427 @@
|
||||
#!/usr/bin/env bash
|
||||
# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
|
||||
#
|
||||
# Reads the PR's tier label, walks approving reviewers, and checks team
|
||||
# membership against the tier's approval expression. Passes only when
|
||||
# ALL clauses in the expression are satisfied by the set of approving
|
||||
# reviewers (AND-composition; internal#189).
|
||||
#
|
||||
# Expression syntax:
|
||||
# "team-a" — OR-set: any ONE of the comma-separated teams
|
||||
# "team-a AND team-b" — AND: BOTH must each have ≥1 approver
|
||||
# "(a,b,c)" — OR-set wrapped in parens; same as "a,b,c"
|
||||
#
|
||||
# Example: "qa AND security AND (managers,ceo)" means:
|
||||
# ≥1 approver in team "qa" AND
|
||||
# ≥1 approver in team "security" AND
|
||||
# ≥1 approver in team "managers" OR "ceo"
|
||||
#
|
||||
# Per the spec (internal#189), the hard gate here pairs with the
|
||||
# advisory gate of sop-conformance LLM-judge (internal#188): each
|
||||
# required-team click must reflect real verification (visible in review
|
||||
# body or A2A messages), not rubber-stamp APPROVE. Both gates together
|
||||
# close the "teammate clicks APPROVE without verifying" gap.
|
||||
#
|
||||
# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
|
||||
# the env vars below; this script does no IO outside of stdout/stderr +
|
||||
# the Gitea API.
|
||||
#
|
||||
# Required env:
|
||||
# GITEA_TOKEN — bot PAT with read:organization,read:user,
|
||||
# read:issue,read:repository scopes
|
||||
# GITEA_HOST — e.g. git.moleculesai.app
|
||||
# REPO — owner/name (from github.repository)
|
||||
# PR_NUMBER — int (from github.event.pull_request.number)
|
||||
# PR_AUTHOR — login (from github.event.pull_request.user.login)
|
||||
#
|
||||
# Optional:
|
||||
# SOP_DEBUG=1 — print per-API-call diagnostic lines. Default: off.
|
||||
# SOP_LEGACY_CHECK=1 — revert to OR-gate (≥1 approver from any eligible
|
||||
# team). Grace window for PRs in-flight when the
|
||||
# new AND-composition was deployed. Expires 2026-05-17
|
||||
# (7-day burn-in window; internal#189 Phase 1).
|
||||
# Set by workflow for PRs merged before the deploy.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure jq is available. Runners may not have it pre-installed, and the
|
||||
# workflow-level jq install can fail on runners with network restrictions
|
||||
# (GitHub releases not reachable from some runner networks — infra#241
|
||||
# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::notice::jq not found on PATH — attempting install..."
|
||||
_jq_installed="no"
|
||||
# apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
|
||||
if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
_jq_installed="yes"
|
||||
# GitHub binary as secondary fallback — may fail on restricted networks.
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq \
|
||||
&& chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
_jq_installed="yes"
|
||||
fi
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::error::jq installation failed — apt-get and GitHub binary both failed."
|
||||
echo "::error::sop-tier-check requires jq for all JSON API parsing."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
debug() {
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Validate env
|
||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
||||
: "${REPO:?REPO required (owner/name)}"
|
||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
||||
: "${PR_AUTHOR:?PR_AUTHOR required}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
|
||||
|
||||
# Sanity: token resolves to a user.
|
||||
# Use || true on the jq pipeline so that set -euo pipefail (line 45) does not
|
||||
# cause the script to exit prematurely when the token is empty/invalid — the
|
||||
# if check below handles that case gracefully. Without || true, a 401 from an
|
||||
# empty/invalid token causes jq to exit 1, triggering set -e and exiting the
|
||||
# entire script before the error can be logged.
|
||||
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""') || true
|
||||
if [ -z "$WHOAMI" ]; then
|
||||
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::token resolves to user: $WHOAMI"
|
||||
|
||||
# 0.5 Read PR head SHA so we can reject stale approvals after head moves
|
||||
# (internal#816). Reviews carry the commit_id they were submitted against.
|
||||
HEAD_SHA=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}" | jq -r '.head.sha // ""') || true
|
||||
if [ -z "$HEAD_SHA" ]; then
|
||||
echo "::error::Failed to fetch PR head SHA — token may be invalid."
|
||||
exit 1
|
||||
fi
|
||||
debug "pr-head-sha=$HEAD_SHA"
|
||||
|
||||
# 1. Read tier label. || true ensures set -euo pipefail does not abort the
|
||||
# script if curl or jq fails (e.g. 401 from empty token).
|
||||
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
|
||||
TIER=""
|
||||
for L in $LABELS; do
|
||||
case "$L" in
|
||||
tier:low|tier:medium|tier:high)
|
||||
if [ -n "$TIER" ]; then
|
||||
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
|
||||
exit 1
|
||||
fi
|
||||
TIER="$L"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$TIER" ]; then
|
||||
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
|
||||
exit 1
|
||||
fi
|
||||
debug "tier=$TIER"
|
||||
|
||||
# 2. Tier → required team expression (AND-composition; internal#189)
|
||||
#
|
||||
# Expression syntax:
|
||||
# clause-a AND clause-b AND ... — ALL clauses must pass
|
||||
# team-a,team-b,team-c — OR-set: ≥1 approver in ANY of these teams
|
||||
# (team-a,team-b) — same as team-a,team-b (parens optional)
|
||||
#
|
||||
# This map is the single source of truth. Update it when the team structure
|
||||
# or policy changes. Teams referenced here but absent in Gitea are treated
|
||||
# as unachievable (would always fail) — operators notice the clear error
|
||||
# and create the missing team.
|
||||
#
|
||||
# Current Gitea teams: ceo, engineers, managers
|
||||
# Future teams (create before removing "???" fallback): qa, security, security-audit
|
||||
declare -A TIER_EXPR=(
|
||||
# tier:low — same as previous OR gate: any engineer, manager, or ceo.
|
||||
["tier:low"]="engineers,managers,ceo"
|
||||
|
||||
# tier:medium — AND of (managers) AND (engineers) AND (qa???,security???)
|
||||
# The qa+security clause requires both teams to exist; when not yet
|
||||
# created, the PR author is responsible for adding them before requesting
|
||||
# approval on a tier:medium PR. Ops: create qa + security Gitea teams
|
||||
# and update this map to remove the "???" markers (internal#189 follow-up).
|
||||
["tier:medium"]="managers AND engineers AND qa???,security???"
|
||||
|
||||
# tier:high — ceo only. The AND-composition adds no value for a
|
||||
# single-team gate, but the framework is wired for consistency.
|
||||
["tier:high"]="ceo"
|
||||
)
|
||||
|
||||
EXPR="${TIER_EXPR[$TIER]-}"
|
||||
if [ -z "$EXPR" ]; then
|
||||
echo "::error::No expression defined for tier $TIER in TIER_EXPR map."
|
||||
exit 1
|
||||
fi
|
||||
debug "expression=$EXPR"
|
||||
|
||||
# 3. Legacy OR-gate override (7-day burn-in grace window; internal#189 Phase 1)
|
||||
if [ "${SOP_LEGACY_CHECK:-}" = "1" ]; then
|
||||
LEGACY_ELIGIBLE=""
|
||||
case "$TIER" in
|
||||
tier:low) LEGACY_ELIGIBLE="engineers managers ceo" ;;
|
||||
tier:medium) LEGACY_ELIGIBLE="managers ceo" ;;
|
||||
tier:high) LEGACY_ELIGIBLE="ceo" ;;
|
||||
esac
|
||||
echo "::notice::SOP_LEGACY_CHECK=1 — using OR-gate ({$LEGACY_ELIGIBLE}) for this PR."
|
||||
ELIGIBLE="$LEGACY_ELIGIBLE"
|
||||
fi
|
||||
|
||||
# 4. Resolve all team names → IDs
|
||||
# /orgs/{org}/teams/{slug}/... endpoints don't exist on Gitea 1.22;
|
||||
# we use /teams/{id}.
|
||||
# set +e prevents set -e from aborting the script if curl fails (e.g. empty token).
|
||||
ORG_TEAMS_FILE=$(mktemp)
|
||||
trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
|
||||
set +e
|
||||
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/orgs/${OWNER}/teams")
|
||||
_HTTP_EXIT=$?
|
||||
set -e
|
||||
debug "teams-list HTTP=$HTTP_CODE (curl exit=$_HTTP_EXIT) size=$(wc -c <"$ORG_TEAMS_FILE")"
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] teams-list body (first 300 chars):" >&2
|
||||
head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
|
||||
fi
|
||||
if [ "$_HTTP_EXIT" -ne 0 ] || [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "::error::GET /orgs/${OWNER}/teams failed (curl exit=$_HTTP_EXIT HTTP=$HTTP_CODE) — token may lack read:org scope or be invalid."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Collect every team name that appears in the expression.
|
||||
# Bash word-splitting on $EXPR splits on spaces, so "AND" appears as a
|
||||
# token. We skip it explicitly.
|
||||
declare -A TEAM_ID
|
||||
_all_teams=""
|
||||
for _raw_clause in $EXPR; do
|
||||
# Strip parens and split on comma.
|
||||
_clause=${_raw_clause//[()]/}
|
||||
for _t in $(echo "$_clause" | tr ',' '\n'); do
|
||||
_t=$(echo "$_t" | tr -d '[:space:]')
|
||||
[ -z "$_t" ] && continue
|
||||
# Skip AND / OR operator tokens (bash word-split produced them from
|
||||
# spaces in the expression string).
|
||||
[ "$_t" = "AND" ] || [ "$_t" = "OR" ] && continue
|
||||
# Skip if already in set.
|
||||
case " $_all_teams " in
|
||||
*" $_t "*) ;; # already present
|
||||
*) _all_teams="${_all_teams} $_t " ;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
for _t in $_all_teams; do
|
||||
_t=$(echo "$_t" | tr -d ' ')
|
||||
[ -z "$_t" ] && continue
|
||||
_id=$(jq -r --arg t "$_t" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
|
||||
if [ -z "$_id" ] || [ "$_id" = "null" ]; then
|
||||
# "??" suffix marks teams that don't exist yet (tier:medium qa/security).
|
||||
# Treat as permanently failing clause; clear error message guides ops.
|
||||
if [[ "$_t" == *"???" ]]; then
|
||||
debug "team \"$_t\" not found (expected — pending team creation per internal#189)"
|
||||
continue
|
||||
fi
|
||||
_visible=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
|
||||
echo "::error::Team \"$_t\" referenced in tier $TIER expression but not found in org $OWNER. Teams visible: $_visible"
|
||||
exit 1
|
||||
fi
|
||||
TEAM_ID[$_t]="$_id"
|
||||
debug "team-id: $_t → $_id"
|
||||
done
|
||||
|
||||
# 5. Read approving reviewers. set +e disables set -e temporarily so that curl
|
||||
# failures (e.g. empty/invalid token → HTTP 401) do not abort the script before
|
||||
# set -e is restored immediately after.
|
||||
set +e
|
||||
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
|
||||
_REVIEWS_EXIT=$?
|
||||
set -e
|
||||
if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
|
||||
echo "::error::Failed to fetch reviews (curl exit=$_REVIEWS_EXIT) — token may be invalid or unreachable."
|
||||
exit 1
|
||||
fi
|
||||
APPROVERS=$(echo "$REVIEWS" | jq -r --arg head_sha "$HEAD_SHA" '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]') || true
|
||||
if [ -z "$APPROVERS" ]; then
|
||||
echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
|
||||
exit 1
|
||||
fi
|
||||
debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
|
||||
|
||||
# 6. For each approver: skip self-review; probe team membership by id.
|
||||
# Build $APPROVER_TEAMS[<user>]=space-surrounded team names (e.g. " managers ").
|
||||
# Pre/post spaces ensure case patterns *${_t}* match even when the name
|
||||
# is the first or last entry (bash case *word* needs delimiters on both sides).
|
||||
#
|
||||
# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
|
||||
#
|
||||
# This used to fall back to /orgs/{org}/members/{user} whenever every team
|
||||
# probe failed and credit any org member as a member of EVERY queried team.
|
||||
# That was a privilege-escalation: org membership is NOT team membership, so
|
||||
# a 403/visibility/token-scope gap on the team probes silently promoted a
|
||||
# plain org member to satisfy tier:high (ceo). An inability-to-verify became
|
||||
# an authorization GRANT. The fallback is REMOVED — org membership must never
|
||||
# satisfy a team-gated tier.
|
||||
#
|
||||
# A team-membership probe has exactly three meaningful outcomes:
|
||||
# 200 / 204 → the user IS a member of that team (credit it)
|
||||
# 404 → the user is definitively NOT a member (no credit, verified)
|
||||
# anything else (403 / 401 / 5xx / curl failure / non-numeric)
|
||||
# → membership CANNOT be read (cannot-verify)
|
||||
#
|
||||
# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
|
||||
# pass — and here, never an authorization grant), a cannot-verify outcome on
|
||||
# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
|
||||
# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
|
||||
# / unverifiable membership picture, because doing so could let an unverifiable
|
||||
# approver's clause silently fail-or-pass on incomplete data. Fix the token
|
||||
# scope (read:organization) or the runner network — not the gate.
|
||||
declare -A APPROVER_TEAMS
|
||||
_verify_failed="" # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
|
||||
for U in $APPROVERS; do
|
||||
[ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
|
||||
for T in "${!TEAM_ID[@]}"; do
|
||||
ID="${TEAM_ID[$T]}"
|
||||
set +e
|
||||
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/teams/${ID}/members/${U}")
|
||||
_curl_exit=$?
|
||||
set -e
|
||||
debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
|
||||
if [ "$_curl_exit" -ne 0 ]; then
|
||||
# curl itself failed (DNS, connection refused, timeout) — unreachable.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
|
||||
continue
|
||||
fi
|
||||
case "$CODE" in
|
||||
200|204)
|
||||
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
|
||||
debug "$U qualifies for team $T"
|
||||
;;
|
||||
404)
|
||||
# Definitively not a member of this team — a verified negative.
|
||||
debug "$U is NOT a member of team $T (verified 404)"
|
||||
;;
|
||||
*)
|
||||
# 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
|
||||
# member" and do NOT fall back to org membership. This is cannot-verify.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Fail-closed: if ANY membership probe could not be read, we cannot make an
|
||||
# authorization decision. Publish a loud cannot-verify / infra-failed status
|
||||
# and exit non-zero. Never grant the tier on unverifiable membership.
|
||||
if [ -n "$_verify_failed" ]; then
|
||||
echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
|
||||
echo "::error::Unreadable membership probe(s): ${_verify_failed}"
|
||||
echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
|
||||
echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 7. Evaluate the tier expression.
|
||||
#
|
||||
# legacy OR-gate: use the simplified loop from before internal#189.
|
||||
if [ -n "${LEGACY_ELIGIBLE:-}" ]; then
|
||||
OK=""
|
||||
for _u in "${!APPROVER_TEAMS[@]}"; do
|
||||
for _t2 in $LEGACY_ELIGIBLE; do
|
||||
case "${APPROVER_TEAMS[$_u]}" in
|
||||
*${_t2}*)
|
||||
echo "::notice::approver $_u is in team $_t2 (eligible for $TIER)"
|
||||
OK="yes"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
[ -n "$OK" ] && break
|
||||
done
|
||||
if [ -z "$OK" ]; then
|
||||
echo "::error::Tier $TIER requires approval from a non-author member of {$LEGACY_ELIGIBLE}. Set SOP_DEBUG=1 to see per-probe HTTP codes."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::sop-tier-check passed: $TIER (legacy OR-gate)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# AND-gate: evaluate the expression clause by clause.
|
||||
# _passed_clauses and _failed_clauses accumulate for the status description.
|
||||
_passed_clauses=""
|
||||
_failed_clauses=""
|
||||
|
||||
for _raw_clause in $EXPR; do
|
||||
# Normalise: strip parens, replace commas with spaces so bash word-split
|
||||
# can iterate the OR-set members. The previous form
|
||||
# _clause=$(echo ... | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
|
||||
# collapsed every member into one concatenated token because
|
||||
# `tr -d '[:space:]'` strips the very newlines that just separated them
|
||||
# ("engineers,managers,ceo" -> "engineersmanagersceo"), so the OR-clause
|
||||
# only ever evaluated as a single nonsense team name and never matched
|
||||
# APPROVER_TEAMS. Fixed in #229: leave the comma-separated members as
|
||||
# space-separated tokens for `for _t in $_clause`.
|
||||
_no_parens=${_raw_clause//[()]/}
|
||||
_clause=${_no_parens//,/ }
|
||||
_clause_passed="no"
|
||||
_clause_names=""
|
||||
for _t in $_clause; do
|
||||
# Append (don't overwrite) team name to the human-readable accumulator.
|
||||
# The previous form `_clause_names="${_clause_names:+, }${_t}"`
|
||||
# rewrote the variable on every iteration, so the FAIL message only
|
||||
# ever showed the LAST team. Fixed: prepend prior value before the
|
||||
# comma-separator, then append the new team name.
|
||||
_clause_names="${_clause_names}${_clause_names:+, }${_t}"
|
||||
# Skip teams not yet in Gitea (qa??? / security??? placeholders).
|
||||
[[ "$_t" == *"???" ]] && debug "clause \"$_t\": skipped (team pending creation)" && continue
|
||||
[ -z "${TEAM_ID[$_t]:-}" ] && debug "clause \"$_t\": no ID resolved, skipping" && continue
|
||||
for _u in "${!APPROVER_TEAMS[@]}"; do
|
||||
# Note: APPROVER_TEAMS values are space-surrounded (e.g. " managers ").
|
||||
# Pattern *${_t}* matches team name anywhere in the space-padded string.
|
||||
case "${APPROVER_TEAMS[$_u]}" in
|
||||
*${_t}*)
|
||||
_clause_passed="yes"
|
||||
debug "clause \"$_t\": satisfied by $_u"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Label for display: strip "???" from pending teams.
|
||||
_label=$(echo "$_raw_clause" | tr -d '()' | tr ',' '/' | tr -d '[:space:]' | sed 's/???//g')
|
||||
|
||||
if [ "$_clause_passed" = "yes" ]; then
|
||||
# Append (don't overwrite) — same accumulator bug as _clause_names above.
|
||||
_passed_clauses="${_passed_clauses}${_passed_clauses:+, }$_label"
|
||||
echo "::notice::clause [$_label]: PASS — satisfied by approving reviewer(s)"
|
||||
else
|
||||
_failed_clauses="${_failed_clauses}${_failed_clauses:+, }$_label"
|
||||
echo "::error::clause [$_label]: FAIL — no approving reviewer belongs to any of these teams (${_clause_names}). Set SOP_DEBUG=1 to see per-team probe results."
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$_failed_clauses" ]; then
|
||||
echo ""
|
||||
echo "::error::sop-tier-check FAILED for $TIER."
|
||||
echo " Passed :${_passed_clauses}"
|
||||
echo " Missing:${_failed_clauses}"
|
||||
echo " All clauses must be satisfied. Each missing team needs an APPROVED review from one of its members."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-check PASSED: $TIER — all required clauses satisfied [${_passed_clauses}]"
|
||||
Executable
+199
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env bash
|
||||
# sop-tier-refire — re-evaluate sop-tier-check and POST status to PR head SHA.
|
||||
#
|
||||
# Invoked from `.gitea/workflows/sop-tier-refire.yml` when a repo
|
||||
# MEMBER/OWNER/COLLABORATOR comments `/refire-tier-check` on a PR.
|
||||
#
|
||||
# Behavior:
|
||||
#
|
||||
# 1. Resolve PR head SHA + author from PR_NUMBER.
|
||||
# 2. Rate-limit: if the sop-tier-check context has been POSTed in the
|
||||
# last 30 seconds, skip (prevents comment-spam status thrash).
|
||||
# 3. Invoke `.gitea/scripts/sop-tier-check.sh` with the same env the
|
||||
# canonical workflow provides. This is DRY: we re-use the exact AND-
|
||||
# composition gate logic, not a watered-down approving-count check.
|
||||
# 4. POST the resulting status (success on exit 0, failure on non-zero)
|
||||
# to `/repos/.../statuses/{HEAD_SHA}` with context
|
||||
# "sop-tier-check / tier-check (pull_request)" — the same context name
|
||||
# branch protection requires.
|
||||
#
|
||||
# Required env (set by sop-tier-refire.yml):
|
||||
# GITEA_TOKEN — org-level SOP_TIER_CHECK_TOKEN (read:org/user/issue/repo)
|
||||
# GITEA_HOST — e.g. git.moleculesai.app
|
||||
# REPO — owner/name
|
||||
# PR_NUMBER — PR number from issue_comment payload
|
||||
# COMMENT_AUTHOR — login of the commenter (logged for audit)
|
||||
#
|
||||
# Optional:
|
||||
# SOP_DEBUG=1 — verbose per-API-call diagnostics
|
||||
# SOP_REFIRE_RATE_LIMIT_SEC — override the 30s rate-limit (default 30)
|
||||
# SOP_REFIRE_DISABLE_RATE_LIMIT=1 — for tests; skips the rate-limit check
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
debug() {
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
||||
: "${REPO:?REPO required (owner/name)}"
|
||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
||||
: "${COMMENT_AUTHOR:=unknown}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
CONTEXT="sop-tier-check / tier-check (pull_request)"
|
||||
RATE_LIMIT_SEC="${SOP_REFIRE_RATE_LIMIT_SEC:-30}"
|
||||
|
||||
echo "::notice::sop-tier-refire start: repo=$OWNER/$NAME pr=$PR_NUMBER commenter=$COMMENT_AUTHOR"
|
||||
|
||||
# 1. Fetch PR details — need head.sha and user.login.
|
||||
PR_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE"' EXIT
|
||||
PR_HTTP=$(curl -sS -o "$PR_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
if [ "$PR_HTTP" != "200" ]; then
|
||||
echo "::error::GET /pulls/$PR_NUMBER returned HTTP $PR_HTTP (body $(head -c 200 "$PR_FILE"))"
|
||||
exit 1
|
||||
fi
|
||||
HEAD_SHA=$(jq -r '.head.sha' <"$PR_FILE")
|
||||
PR_AUTHOR=$(jq -r '.user.login' <"$PR_FILE")
|
||||
PR_STATE=$(jq -r '.state' <"$PR_FILE")
|
||||
if [ -z "$HEAD_SHA" ] || [ "$HEAD_SHA" = "null" ]; then
|
||||
echo "::error::Could not resolve head.sha from PR #$PR_NUMBER response"
|
||||
exit 1
|
||||
fi
|
||||
debug "head_sha=$HEAD_SHA pr_author=$PR_AUTHOR state=$PR_STATE"
|
||||
|
||||
if [ "$PR_STATE" != "open" ]; then
|
||||
echo "::notice::PR #$PR_NUMBER state is $PR_STATE; refire is a no-op on closed PRs."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Rate-limit: skip if our context was updated in the last $RATE_LIMIT_SEC.
|
||||
# Gitea statuses endpoint returns latest first; we check the most recent
|
||||
# entry for our context name.
|
||||
if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
|
||||
STATUSES_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE" "$STATUSES_FILE"' EXIT
|
||||
ST_HTTP=$(curl -sS -o "$STATUSES_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}?limit=50&sort=newest")
|
||||
debug "statuses-list HTTP=$ST_HTTP"
|
||||
if [ "$ST_HTTP" = "200" ]; then
|
||||
LAST_UPDATED=$(jq -r --arg c "$CONTEXT" \
|
||||
'[.[] | select(.context == $c)] | first | .updated_at // ""' \
|
||||
<"$STATUSES_FILE")
|
||||
if [ -n "$LAST_UPDATED" ] && [ "$LAST_UPDATED" != "null" ]; then
|
||||
# Parse RFC3339 → epoch. Use python -c for portability (date(1) -d
|
||||
# differs between BSD/GNU; the Gitea runner is Ubuntu so GNU date
|
||||
# works, but we keep python for future container variance).
|
||||
LAST_EPOCH=$(python3 -c "import sys,datetime;print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$LAST_UPDATED" 2>/dev/null || echo "0")
|
||||
NOW_EPOCH=$(date -u +%s)
|
||||
AGE=$((NOW_EPOCH - LAST_EPOCH))
|
||||
debug "last status update: $LAST_UPDATED ($AGE seconds ago)"
|
||||
if [ "$AGE" -lt "$RATE_LIMIT_SEC" ] && [ "$AGE" -ge 0 ]; then
|
||||
echo "::notice::sop-tier-refire rate-limited — last status update was ${AGE}s ago (<${RATE_LIMIT_SEC}s window). Try again shortly."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# 3. Invoke sop-tier-check.sh with the env it expects.
|
||||
#
|
||||
# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
|
||||
# fix/core-ci-fail-closed). The previous shape was:
|
||||
# bash "$SCRIPT" || true
|
||||
# TIER_EXIT=0 # <-- hardcoded success
|
||||
# which discarded the real verdict and ALWAYS POSTed
|
||||
# `state=success` for the REQUIRED context
|
||||
# `sop-tier-check / tier-check (pull_request)`. That meant ANY
|
||||
# collaborator could comment `/refire-tier-check` to forcibly green
|
||||
# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
|
||||
# privilege bypass of branch protection. The canonical
|
||||
# pull_request_target workflow's conclusion publishes the same
|
||||
# context honestly (red on a real violation); the refire MUST mirror
|
||||
# THAT honesty, not a discarded exit code.
|
||||
#
|
||||
# We now capture the script's real exit code under `set +e` and POST
|
||||
# success ONLY when it actually exited 0. sop-tier-check.sh itself
|
||||
# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
|
||||
# so a bad token / unreachable API / missing jq → non-zero → we POST
|
||||
# `state=failure`, never a false green.
|
||||
#
|
||||
# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
|
||||
# bash 3.2 parser bug (`tier: unbound variable` from declare -A with
|
||||
# `set -u`). Linux Gitea runners ship bash 4/5 so production is fine;
|
||||
# the override exists so the bash 3.2 dev box can still exercise the
|
||||
# refire glue logic end-to-end.
|
||||
SCRIPT="${SOP_REFIRE_TIER_CHECK_SCRIPT:-$(dirname "$0")/sop-tier-check.sh}"
|
||||
if [ ! -f "$SCRIPT" ]; then
|
||||
echo "::error::sop-tier-check.sh not found at $SCRIPT — refire requires the canonical script"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Re-invoke. Pipe stdout/stderr through so the runner log shows the
|
||||
# tier-check decision inline. Capture the REAL exit code (set +e so a
|
||||
# non-zero verdict doesn't abort this script under set -e) — the POST
|
||||
# below keys off it, so a failed tier-check posts state=failure.
|
||||
set +e
|
||||
GITEA_TOKEN="$GITEA_TOKEN" \
|
||||
GITEA_HOST="$GITEA_HOST" \
|
||||
REPO="$REPO" \
|
||||
PR_NUMBER="$PR_NUMBER" \
|
||||
PR_AUTHOR="$PR_AUTHOR" \
|
||||
SOP_DEBUG="${SOP_DEBUG:-0}" \
|
||||
SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
|
||||
bash "$SCRIPT"
|
||||
TIER_EXIT=$?
|
||||
set -e
|
||||
debug "sop-tier-check.sh exit=$TIER_EXIT"
|
||||
|
||||
# 4. POST the resulting status.
|
||||
if [ "$TIER_EXIT" -eq 0 ]; then
|
||||
STATE="success"
|
||||
DESCRIPTION="Refired via /refire-tier-check by $COMMENT_AUTHOR"
|
||||
else
|
||||
STATE="failure"
|
||||
DESCRIPTION="Refired via /refire-tier-check; tier-check failed (see workflow log)"
|
||||
fi
|
||||
|
||||
# Status target_url points at the runner log so a curious reviewer can
|
||||
# follow it back. SERVER_URL + RUN_ID + JOB_ID isn't trivially constructible
|
||||
# from the bash env on Gitea 1.22.6, so we point at the PR itself.
|
||||
TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
|
||||
|
||||
POST_BODY=$(jq -nc \
|
||||
--arg state "$STATE" \
|
||||
--arg context "$CONTEXT" \
|
||||
--arg description "$DESCRIPTION" \
|
||||
--arg target_url "$TARGET_URL" \
|
||||
'{state:$state, context:$context, description:$description, target_url:$target_url}')
|
||||
|
||||
POST_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE" "${STATUSES_FILE:-}" "$POST_FILE"' EXIT
|
||||
POST_HTTP=$(curl -sS -o "$POST_FILE" -w '%{http_code}' \
|
||||
-X POST -H "$AUTH" -H "Content-Type: application/json" \
|
||||
-d "$POST_BODY" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}")
|
||||
if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
|
||||
echo "::error::POST /statuses/$HEAD_SHA returned HTTP $POST_HTTP (body $(head -c 200 "$POST_FILE"))"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
|
||||
# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
|
||||
# an HONEST status. The gate VERDICT is carried by the POSTed status
|
||||
# ($STATE), which is what branch protection reads; a failing tier-check
|
||||
# posts state=failure (red on the PR), so there is no fail-open. We do
|
||||
# NOT also exit non-zero on a failing verdict — that would double-signal
|
||||
# the same failure as both a red status AND a red refire job. The
|
||||
# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
|
||||
# is fixed above by capturing the real exit code.
|
||||
exit 0
|
||||
Executable
+28
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
# Mock sop-tier-check.sh for sop-tier-refire tests.
|
||||
#
|
||||
# Exits 0 ("PASS") if $MOCK_TIER_RESULT == "pass", else exits 1.
|
||||
# This lets the refire tests cover the success + failure status-POST
|
||||
# paths without invoking the real sop-tier-check.sh (which uses bash 4+
|
||||
# associative arrays — known parser bug on macOS bash 3.2 dev box).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
case "${MOCK_TIER_RESULT:-pass}" in
|
||||
pass)
|
||||
echo "::notice::mock tier-check: PASS"
|
||||
exit 0
|
||||
;;
|
||||
fail_no_label)
|
||||
echo "::error::mock tier-check: no tier label"
|
||||
exit 1
|
||||
;;
|
||||
fail_no_approvals)
|
||||
echo "::error::mock tier-check: no approving reviews"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "::error::mock tier-check: unknown MOCK_TIER_RESULT=${MOCK_TIER_RESULT:-}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
Executable
+208
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Stub Gitea API for sop-tier-refire test scenarios.
|
||||
|
||||
Reads $FIXTURE_STATE_DIR/scenario to decide what to return for each
|
||||
endpoint the sop-tier-refire.sh + sop-tier-check.sh scripts call.
|
||||
Captures every POST to /statuses/{sha} into posted_statuses.jsonl so
|
||||
the test can assert what the script tried to write.
|
||||
|
||||
Scenarios:
|
||||
T1_success — tier:low + APPROVED by engineer → tier-check passes
|
||||
T2_no_tier_label — no tier label → tier-check exits 1 before POST
|
||||
T3_no_approvals — tier:low but zero approving reviews → exits 1
|
||||
T4_closed — PR state=closed → refire is a no-op
|
||||
T5_rate_limited — last status update 5 seconds ago → skip
|
||||
|
||||
Usage:
|
||||
FIXTURE_STATE_DIR=/tmp/x python3 _refire_fixture.py 8080
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import http.server
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.parse
|
||||
|
||||
|
||||
STATE_DIR = os.environ["FIXTURE_STATE_DIR"]
|
||||
|
||||
|
||||
def scenario() -> str:
|
||||
p = os.path.join(STATE_DIR, "scenario")
|
||||
if not os.path.isfile(p):
|
||||
return "T1_success"
|
||||
with open(p, encoding="utf-8") as f:
|
||||
return f.read().strip()
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
return datetime.datetime.now(datetime.timezone.utc).isoformat()
|
||||
|
||||
|
||||
def append_post(body: dict) -> None:
|
||||
with open(os.path.join(STATE_DIR, "posted_statuses.jsonl"), "a") as f:
|
||||
f.write(json.dumps(body) + "\n")
|
||||
|
||||
|
||||
def pr_payload() -> dict:
|
||||
sc = scenario()
|
||||
state = "closed" if sc == "T4_closed" else "open"
|
||||
return {
|
||||
"number": 999,
|
||||
"state": state,
|
||||
"head": {"sha": "deadbeef0000111122223333444455556666"},
|
||||
"user": {"login": "feature-author"},
|
||||
}
|
||||
|
||||
|
||||
def labels_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T2_no_tier_label":
|
||||
return [{"name": "bug"}]
|
||||
# All other scenarios use tier:low
|
||||
return [{"name": "tier:low"}, {"name": "ci"}]
|
||||
|
||||
|
||||
def reviews_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T3_no_approvals":
|
||||
return []
|
||||
# All other scenarios have one APPROVED review by an engineer
|
||||
return [
|
||||
{
|
||||
"state": "APPROVED",
|
||||
"user": {"login": "reviewer-engineer"},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def teams_payload() -> list:
|
||||
# Mirror the real molecule-ai org teams referenced in TIER_EXPR
|
||||
return [
|
||||
{"id": 5, "name": "ceo"},
|
||||
{"id": 2, "name": "engineers"},
|
||||
{"id": 6, "name": "managers"},
|
||||
]
|
||||
|
||||
|
||||
def statuses_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T5_rate_limited":
|
||||
recent = (
|
||||
datetime.datetime.now(datetime.timezone.utc)
|
||||
- datetime.timedelta(seconds=5)
|
||||
).isoformat()
|
||||
return [
|
||||
{
|
||||
"context": "sop-tier-check / tier-check (pull_request)",
|
||||
"state": "failure",
|
||||
"updated_at": recent,
|
||||
}
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def user_payload() -> dict:
|
||||
# Mirrors the WHOAMI probe in sop-tier-check.sh
|
||||
return {"login": "sop-tier-bot-fixture"}
|
||||
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
# Quiet — keep stdout for explicit logs only.
|
||||
def log_message(self, *args, **kwargs): # noqa: D401
|
||||
pass
|
||||
|
||||
def _json(self, code: int, body) -> None:
|
||||
payload = json.dumps(body).encode()
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _empty(self, code: int) -> None:
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Length", "0")
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self): # noqa: N802
|
||||
u = urllib.parse.urlparse(self.path)
|
||||
path = u.path
|
||||
|
||||
if path == "/_ping":
|
||||
return self._json(200, {"ok": True})
|
||||
if path == "/api/v1/user":
|
||||
return self._json(200, user_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/pulls/{n}
|
||||
m = re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/(\d+)$", path)
|
||||
if m:
|
||||
return self._json(200, pr_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/issues/{n}/labels
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/issues/\d+/labels$", path):
|
||||
return self._json(200, labels_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/pulls/{n}/reviews
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/\d+/reviews$", path):
|
||||
return self._json(200, reviews_payload())
|
||||
|
||||
# /api/v1/orgs/{owner}/teams
|
||||
if re.match(r"^/api/v1/orgs/[^/]+/teams$", path):
|
||||
return self._json(200, teams_payload())
|
||||
|
||||
# /api/v1/teams/{id}/members/{login} → 204 if user is an engineer
|
||||
m = re.match(r"^/api/v1/teams/(\d+)/members/([^/]+)$", path)
|
||||
if m:
|
||||
team_id, login = m.group(1), m.group(2)
|
||||
# In our fixture reviewer-engineer ∈ engineers (id=2)
|
||||
if team_id == "2" and login == "reviewer-engineer":
|
||||
return self._empty(204)
|
||||
return self._empty(404)
|
||||
|
||||
# /api/v1/orgs/{owner}/members/{login} — fallback path used when
|
||||
# team-member probes all 403. We don't need it for these tests.
|
||||
if re.match(r"^/api/v1/orgs/[^/]+/members/[^/]+$", path):
|
||||
return self._empty(404)
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/statuses/{sha}
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
|
||||
return self._json(200, statuses_payload())
|
||||
|
||||
return self._json(404, {"path": path, "msg": "fixture: no route"})
|
||||
|
||||
def do_POST(self): # noqa: N802
|
||||
u = urllib.parse.urlparse(self.path)
|
||||
path = u.path
|
||||
length = int(self.headers.get("Content-Length") or 0)
|
||||
raw = self.rfile.read(length) if length else b""
|
||||
try:
|
||||
body = json.loads(raw) if raw else {}
|
||||
except Exception:
|
||||
body = {"_raw": raw.decode(errors="replace")}
|
||||
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
|
||||
append_post(body)
|
||||
# Echo back something status-shaped — script only checks HTTP code.
|
||||
return self._json(
|
||||
201,
|
||||
{
|
||||
"context": body.get("context"),
|
||||
"state": body.get("state"),
|
||||
"created_at": now_iso(),
|
||||
},
|
||||
)
|
||||
|
||||
return self._json(404, {"path": path, "msg": "fixture: no route"})
|
||||
|
||||
|
||||
def main():
|
||||
port = int(sys.argv[1])
|
||||
srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
|
||||
srv.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,119 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# test_audit_force_merge.sh — regression lock for audit-force-merge fail-closed
|
||||
# behavior. Verifies every schema validation path via direct jq filter tests.
|
||||
#
|
||||
# Usage: bash test_audit_force_merge.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
fail() { echo "FAIL: $*" >&2; exit 1; }
|
||||
pass() { echo "PASS: $*"; }
|
||||
|
||||
[ -x "$(command -v jq)" ] || { echo "SKIP: jq not on PATH"; exit 0; }
|
||||
|
||||
HEAD_SHA="deadbeef00000000000000000000000000000000"
|
||||
|
||||
# The schema validation jq expression from audit-force-merge.sh.
|
||||
validate_pr_schema() {
|
||||
jq -r '
|
||||
(.merged | type == "boolean") and
|
||||
(.merge_commit_sha | type == "string") and
|
||||
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
|
||||
(.base | type == "object") and (.base.ref | type == "string") and
|
||||
(.head | type == "object") and (.head.sha | type == "string")
|
||||
'
|
||||
}
|
||||
|
||||
validate_statuses_type() {
|
||||
jq -r '(.statuses | type) == "array"'
|
||||
}
|
||||
|
||||
# T1 — valid PR payload → true
|
||||
T1=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T1" = "true" ] || fail "T1: valid payload should pass schema"
|
||||
pass "T1: valid payload passes schema"
|
||||
|
||||
# T2 — merged=false (valid types) → true (schema is about types, not values)
|
||||
T2=$(echo '{"merged":false,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T2" = "true" ] || fail "T2: merged=false with valid types should pass schema"
|
||||
pass "T2: merged=false with valid types passes schema"
|
||||
|
||||
# T3 — missing merged field → false
|
||||
T3=$(echo '{"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T3" = "false" ] || fail "T3: missing merged should fail schema"
|
||||
pass "T3: missing merged fails schema"
|
||||
|
||||
# T4 — merged is string "true" instead of boolean → false
|
||||
T4=$(echo '{"merged":"true","merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T4" = "false" ] || fail "T4: merged as string should fail schema"
|
||||
pass "T4: merged as string fails schema"
|
||||
|
||||
# T5 — merge_commit_sha is null → false
|
||||
T5=$(echo '{"merged":true,"merge_commit_sha":null,"merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T5" = "false" ] || fail "T5: null merge_commit_sha should fail schema"
|
||||
pass "T5: null merge_commit_sha fails schema"
|
||||
|
||||
# T6 — merged_by is null → false
|
||||
T6=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":null,"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T6" = "false" ] || fail "T6: null merged_by should fail schema"
|
||||
pass "T6: null merged_by fails schema"
|
||||
|
||||
# T7 — base.ref is number → false
|
||||
T7=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":123},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T7" = "false" ] || fail "T7: numeric base.ref should fail schema"
|
||||
pass "T7: numeric base.ref fails schema"
|
||||
|
||||
# T8 — head is missing → false
|
||||
T8=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"}}' | validate_pr_schema)
|
||||
[ "$T8" = "false" ] || fail "T8: missing head should fail schema"
|
||||
pass "T8: missing head fails schema"
|
||||
|
||||
# T9 — statuses missing → false
|
||||
T9=$(echo '{}' | validate_statuses_type)
|
||||
[ "$T9" = "false" ] || fail "T9: missing statuses should fail type check"
|
||||
pass "T9: missing statuses fails type check"
|
||||
|
||||
# T10 — statuses is string → false
|
||||
T10=$(echo '{"statuses":"unexpected"}' | validate_statuses_type)
|
||||
[ "$T10" = "false" ] || fail "T10: string statuses should fail type check"
|
||||
pass "T10: string statuses fails type check"
|
||||
|
||||
# T11 — statuses is null → false
|
||||
T11=$(echo '{"statuses":null}' | validate_statuses_type)
|
||||
[ "$T11" = "false" ] || fail "T11: null statuses should fail type check"
|
||||
pass "T11: null statuses fails type check"
|
||||
|
||||
# T12 — statuses is array → true
|
||||
T12=$(echo '{"statuses":[{"context":"c1","status":"success"}]}' | validate_statuses_type)
|
||||
[ "$T12" = "true" ] || fail "T12: array statuses should pass type check"
|
||||
pass "T12: array statuses passes type check"
|
||||
|
||||
# T13 — empty array statuses → true
|
||||
T13=$(echo '{"statuses":[]}' | validate_statuses_type)
|
||||
[ "$T13" = "true" ] || fail "T13: empty array statuses should pass type check"
|
||||
pass "T13: empty array statuses passes type check"
|
||||
|
||||
# T14-T16: REQUIRED_CHECKS_JSON branch entry validation
|
||||
validate_required_checks_json() {
|
||||
local branch="$1"
|
||||
local json="$2"
|
||||
echo "$json" | jq -r --arg branch "$branch" 'has($branch) and (.[$branch] | type == "array")'
|
||||
}
|
||||
|
||||
# T14 — branch exists and is array → true
|
||||
T14=$(validate_required_checks_json "main" '{"main":["CI / all-required"]}')
|
||||
[ "$T14" = "true" ] || fail "T14: existing array branch should pass"
|
||||
pass "T14: existing array branch passes"
|
||||
|
||||
# T15 — branch missing → false
|
||||
T15=$(validate_required_checks_json "staging" '{"main":["CI / all-required"]}')
|
||||
[ "$T15" = "false" ] || fail "T15: missing branch should fail"
|
||||
pass "T15: missing branch fails"
|
||||
|
||||
# T16 — branch entry is string instead of array → false
|
||||
T16=$(validate_required_checks_json "main" '{"main":"CI / all-required"}')
|
||||
[ "$T16" = "false" ] || fail "T16: string branch entry should fail"
|
||||
pass "T16: string branch entry fails"
|
||||
|
||||
echo
|
||||
echo "ALL AUDIT-FORCE-MERGE CHECKS PASSED"
|
||||
@@ -107,36 +107,6 @@ def test_required_checks_env_json_malformed_fails():
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_non_string_item_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", 123, "ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_empty_string_item_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", " ", "ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_duplicate_context_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", "ctx-b", "ctx-a"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sentinel_needs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -46,12 +46,12 @@ def test_required_contexts_green_rejects_missing_and_pending():
|
||||
]
|
||||
|
||||
|
||||
def test_required_contexts_green_rejects_volume_skipped():
|
||||
def test_required_contexts_green_rejects_volume_skipped_even_for_tier_low():
|
||||
"""volume-skipped pending is a partial view, not a genuine soft-fail.
|
||||
|
||||
Per sop-checklist.py:1179-1187, volume_skipped posts pending with a
|
||||
'[volume-skipped]' prefix. The merge queue must NOT treat this as an
|
||||
acceptable soft-fail — the gate did not finish evaluating.
|
||||
acceptable soft-fail for tier:low — the gate did not finish evaluating.
|
||||
"""
|
||||
latest = mq.latest_statuses_by_context([
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
@@ -68,6 +68,7 @@ def test_required_contexts_green_rejects_volume_skipped():
|
||||
"CI / all-required (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
],
|
||||
pr_labels={"tier:low"},
|
||||
)
|
||||
|
||||
assert ok is False
|
||||
@@ -113,13 +114,7 @@ def test_pr_needs_update_when_base_sha_absent_from_commits():
|
||||
|
||||
|
||||
def _ready_kwargs(**overrides):
|
||||
"""Default kwargs for a fully-ready merge; override per test.
|
||||
|
||||
Includes the uniform governance checks (qa-review, security-review,
|
||||
sop-checklist) as required contexts and green statuses, matching the
|
||||
behaviour of process_once which merges GOVERNANCE_REQUIRED_CONTEXTS
|
||||
with branch-protection contexts.
|
||||
"""
|
||||
"""Default kwargs for a fully-ready merge; override per test."""
|
||||
base = dict(
|
||||
main_status={
|
||||
"state": "success",
|
||||
@@ -127,19 +122,9 @@ def _ready_kwargs(**overrides):
|
||||
},
|
||||
pr_status={
|
||||
"state": "success",
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
],
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}],
|
||||
},
|
||||
required_contexts=[
|
||||
"CI / all-required (pull_request)",
|
||||
"qa-review / approved (pull_request)",
|
||||
"security-review / approved (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
],
|
||||
required_contexts=["CI / all-required (pull_request)"],
|
||||
required_approvals=2,
|
||||
approvers={"agent-reviewer-cr2", "agent-researcher"},
|
||||
request_changes=[],
|
||||
@@ -158,72 +143,13 @@ def test_merge_decision_requires_main_green_pr_green_and_current_base():
|
||||
assert decision.force is False # no non-required reds present
|
||||
|
||||
|
||||
def test_behind_main_but_mergeable_pr_merges_directly():
|
||||
"""§SOP-22 (#2358): a behind-main but CONFLICT-FREE PR (mergeable is True)
|
||||
merges DIRECTLY — no update step. Branch protection does not require strict
|
||||
up-to-date, and calling /update would dismiss the genuine approvals
|
||||
(dismiss_stale_approvals), forcing re-review every tick (the throughput
|
||||
bottleneck). This replaces the old update-before-merge behavior."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=False, mergeable=True)
|
||||
)
|
||||
|
||||
assert decision.ready is True
|
||||
assert decision.action == "merge"
|
||||
|
||||
|
||||
def test_behind_main_and_not_mergeable_pr_updates():
|
||||
"""The /update path is reached ONLY when the PR is NOT mergeable AND its head
|
||||
lacks current main — refreshing the branch may resolve a behind-main
|
||||
non-conflict; a real conflict 409s and is held (#2352)."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=False, mergeable=False)
|
||||
)
|
||||
def test_merge_decision_updates_stale_pr_before_merge():
|
||||
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_has_current_base=False))
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "update"
|
||||
|
||||
|
||||
def test_current_base_but_not_mergeable_pr_waits():
|
||||
"""Up-to-date with main yet Gitea reports not-mergeable → genuine conflict
|
||||
against current main (or still computing). The queue cannot act: WAIT,
|
||||
never update (update would not help) and never merge (fail-closed)."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=True, mergeable=False)
|
||||
)
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
assert "not mergeable" in decision.reason
|
||||
|
||||
|
||||
def test_behind_main_and_mergeable_none_waits_not_update():
|
||||
"""§SOP-22 (CR2 #2374) — the churn-residual fix. A BEHIND-MAIN PR whose
|
||||
mergeability Gitea is STILL COMPUTING (mergeable is None) must WAIT, NOT take
|
||||
the /update path. The old code collapsed None→False, so a behind-main +
|
||||
None PR returned action="update" → /pulls/{n}/update → dismiss_stale_approvals
|
||||
→ the exact rebase-churn this change eliminates, fired during the compute
|
||||
window. None and False are now DISTINCT: None waits, False updates."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=False, mergeable=None)
|
||||
)
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait" # NOT "update" — no churn during compute
|
||||
assert "computed" in decision.reason
|
||||
|
||||
|
||||
def test_current_base_and_mergeable_none_waits():
|
||||
"""Up-to-date with main + mergeable None (still computing) → WAIT (unchanged
|
||||
fail-closed; just confirming None is never merged regardless of base)."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=True, mergeable=None)
|
||||
)
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
|
||||
|
||||
def test_MergePermissionError_inherits_from_ApiError():
|
||||
assert issubclass(mq.MergePermissionError, mq.ApiError)
|
||||
|
||||
@@ -314,35 +240,16 @@ def test_merge_blocked_when_insufficient_genuine_approvals():
|
||||
assert "insufficient genuine approvals" in decision.reason
|
||||
|
||||
|
||||
def test_governance_red_blocks_merge():
|
||||
# Uniform gate: qa-review, security-review, sop-checklist are ALWAYS
|
||||
# required. If any of them fail/pending, the PR is blocked.
|
||||
def test_non_required_red_does_not_block_merge():
|
||||
# Required (CI) green; non-required governance reds present → still merge,
|
||||
# and force is set so force_merge bypasses ONLY those non-required reds.
|
||||
pr_status = {
|
||||
"state": "failure",
|
||||
"state": "failure", # combined polluted by non-required reds
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "failure"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "pending"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "failure"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
assert "required contexts not green" in decision.reason
|
||||
|
||||
|
||||
def test_non_required_advisory_red_does_not_block_merge():
|
||||
# Governance checks are green; only advisory non-required reds (Staging SaaS)
|
||||
# are present → PR is still mergeable with force_merge bypassing the advisory.
|
||||
pr_status = {
|
||||
"state": "failure", # combined polluted by advisory non-required reds
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
{"context": "sop-tier-check / tier-check (pull_request)", "status": "failure"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
@@ -446,14 +353,8 @@ def test_process_once_holds_pr_on_permanent_merge_error(monkeypatch):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
|
||||
@@ -519,14 +420,8 @@ def _fully_ready_process_once_monkeypatch(monkeypatch, mergeable, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
|
||||
@@ -611,131 +506,6 @@ def test_process_once_merges_when_mergeable_is_true(monkeypatch):
|
||||
assert calls["hold_label"] is None
|
||||
|
||||
|
||||
def test_process_once_behind_main_mergeable_none_waits_no_update(monkeypatch):
|
||||
"""§SOP-22 (CR2 #2374) — end-to-end churn-residual regression. A BEHIND-MAIN
|
||||
PR (commits do NOT contain main_sha) whose mergeability Gitea is STILL
|
||||
COMPUTING (mergeable=None) must WAIT: process_once returns 0 and NEVER calls
|
||||
update_pull (which dismisses genuine approvals via dismiss_stale_approvals)
|
||||
NOR merge_pull NOR hold. The old None→False collapse routed this exact case
|
||||
into the /update path → approval-dismissing rebase churn during the compute
|
||||
window. This proves the durable churn elimination: no update, approvals
|
||||
preserved, re-checked next tick."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=None, calls=calls)
|
||||
# Make the head BEHIND main: commits do NOT contain main_sha. This is the
|
||||
# case the bug missed (the prior None test had current base, masking it).
|
||||
behind_head = "a" * 40
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["updated"] is False # NO /update → approvals NOT dismissed
|
||||
assert calls["merge_attempts"] == 0 # never merge on an unknown
|
||||
assert calls["hold_label"] is None # transient → not held, retried next tick
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# §SOP-22: DIRECT-MERGE throughput fix (#2358). A conflict-free 2-genuine PR
|
||||
# merges WITHOUT a pre-merge /update call, so its approvals are NOT dismissed by
|
||||
# dismiss_stale_approvals. The merge bar (2-genuine-on-current-head +
|
||||
# BP-required green + mergeable + no RC + opt-out) is UNCHANGED; only the
|
||||
# unnecessary update-before-merge churn is removed. The /update path survives
|
||||
# for the genuine case it is needed (not-mergeable + behind-main), where a real
|
||||
# conflict 409s and is held per #2352. mergeable=None stays fail-closed.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_process_once_merges_conflict_free_pr_without_update(monkeypatch):
|
||||
"""§SOP-22(a) — the core throughput fix. A conflict-free, fully-approved PR
|
||||
merges WITHOUT update_pull ever being called. The old behavior called
|
||||
/update first whenever the head lacked current main, which dismissed the 2
|
||||
genuine approvals (dismiss_stale_approvals) and forced re-review every tick.
|
||||
Assert update_pull is NOT invoked and merge_pull IS invoked."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
|
||||
# Make the head BEHIND main: commits do NOT contain main_sha. Under the old
|
||||
# logic this alone forced an update_pull; under the fix it merges directly.
|
||||
head_sha = "a" * 40
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merge_attempts"] == 1 # merged directly
|
||||
assert calls["updated"] is False # NO update_pull → approvals NOT dismissed
|
||||
assert calls["hold_label"] is None
|
||||
|
||||
|
||||
def test_process_once_behind_main_conflict_free_merges_directly(monkeypatch):
|
||||
"""§SOP-22(b) — explicit behind-main + conflict-free case: it still merges
|
||||
directly (branch protection does not require strict up-to-date)."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
|
||||
behind_head = "a" * 40
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merge_attempts"] == 1
|
||||
assert calls["updated"] is False
|
||||
|
||||
|
||||
def test_process_once_pauses_when_main_not_green_no_direct_merge(monkeypatch):
|
||||
"""§SOP-22 backstop — the serialized safety that makes direct-merge safe:
|
||||
when main's required push contexts are NOT green (e.g. a prior direct merge
|
||||
introduced a semantic main-break caught by post-merge main CI), the queue
|
||||
PAUSES — it does NOT merge the next PR onto an unverified/red main."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
|
||||
main_sha = "b" * 40
|
||||
|
||||
def red_main_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "failure",
|
||||
"statuses": [{"context": "CI / all-required (push)", "status": "failure"}]}
|
||||
return {"state": "success",
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", red_main_combined)
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merge_attempts"] == 0 # paused — no merge onto red main
|
||||
assert calls["updated"] is False
|
||||
|
||||
|
||||
def test_direct_merge_bar_unchanged_behind_main(monkeypatch):
|
||||
"""§SOP-22(d) — the merge bar is UNCHANGED on the new direct-merge path. A
|
||||
behind-main + conflict-free PR is still rejected (no merge) when ANY gate
|
||||
fails: insufficient genuine approvals, red required context, open
|
||||
REQUEST_CHANGES, or opt-out label. Direct-merge removes the update churn, it
|
||||
does NOT weaken the bar — fail-closed on every gate."""
|
||||
head_sha = "a" * 40
|
||||
behind_main = dict(pr_has_current_base=False, mergeable=True)
|
||||
|
||||
# <2 genuine approvals → wait, not merge.
|
||||
d = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(approvers={"agent-researcher"}, **behind_main)
|
||||
)
|
||||
assert d.action == "wait" and d.ready is False
|
||||
|
||||
# Red required context → wait, not merge.
|
||||
red_required = {"state": "failure", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "failure"}]}
|
||||
d = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_status=red_required, **behind_main)
|
||||
)
|
||||
assert d.action == "wait" and d.ready is False
|
||||
|
||||
# Open REQUEST_CHANGES on current head → wait, not merge.
|
||||
d = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(request_changes=["agent-reviewer-cr2"], **behind_main)
|
||||
)
|
||||
assert d.action == "wait" and d.ready is False
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Fix 3: status fetch is fail-closed (failed fetch != green)
|
||||
# --------------------------------------------------------------------------
|
||||
@@ -930,30 +700,20 @@ def _stale_pr_update_409_monkeypatch(monkeypatch, queued_issues, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
# Scan-loop process_once enumerates candidates via list_candidate_issues.
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: queued_issues)
|
||||
monkeypatch.setattr(mq, "get_pull", lambda n: {
|
||||
"state": "open", "number": n, "mergeable": False,
|
||||
"state": "open", "number": n, "mergeable": True,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head_sha, "repo_id": 1},
|
||||
"labels": [{"name": "merge-queue"}],
|
||||
})
|
||||
# NOTE: mergeable is False (real conflict) AND commits do NOT contain
|
||||
# main_sha → pr_has_current_base is False → decision.action == "update".
|
||||
# Under the #2358 direct-merge fix the update path is reached ONLY when the
|
||||
# PR is NOT mergeable; a mergeable=True behind-main PR would merge directly,
|
||||
# so this fixture sets mergeable=False to exercise the #2352 409-on-update
|
||||
# hold path.
|
||||
# NOTE: commits do NOT contain main_sha → pr_has_current_base is False →
|
||||
# decision.action == "update".
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
|
||||
monkeypatch.setattr(mq, "get_pull_reviews", lambda n: [
|
||||
{"state": "APPROVED", "user": {"login": "agent-researcher"},
|
||||
@@ -1205,16 +965,8 @@ def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (push)", "status": "success"},
|
||||
]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
|
||||
monkeypatch.setattr(mq, "get_pull", lambda n: dict(pr_payload, number=n))
|
||||
@@ -1395,14 +1147,8 @@ def _wire_multi_candidate_process_once(monkeypatch, *, issues, pulls, reviews, c
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: MAIN_SHA)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == MAIN_SHA:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == MAIN_SHA else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
|
||||
@@ -1534,12 +1280,7 @@ def test_hol_unready_red_required_ci_is_skipped_for_ready_pr(monkeypatch):
|
||||
"statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
state = "failure" if sha == red_head else "success"
|
||||
return {"state": state,
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": state},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "success"},
|
||||
]}
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": state}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
@@ -1634,126 +1375,3 @@ def test_process_once_defensive_skip_when_pull_payload_opted_out(monkeypatch):
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merged"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# readiness-enumeration + post-batch summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_enumerate_readiness_evaluates_all_candidates(monkeypatch):
|
||||
"""enumerate_readiness returns every candidate's state, not stopping at
|
||||
the first actionable one."""
|
||||
old_head, new_head = "a" * 40, "c" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(500, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(501, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
500: {"state": "open", "mergeable": False, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": old_head, "repo_id": 1}, "labels": []},
|
||||
501: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": new_head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={500: _two_approvals(old_head), 501: _two_approvals(new_head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
assert len(entries) == 2
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[500].decision is not None
|
||||
assert by_num[500].decision.ready is False
|
||||
assert by_num[501].decision is not None
|
||||
assert by_num[501].decision.ready is True
|
||||
|
||||
|
||||
def test_enumerate_readiness_includes_ineligible_pr(monkeypatch):
|
||||
"""enumerate_readiness marks fork / wrong-base PRs as ineligible
|
||||
(decision=None) while still evaluating the rest."""
|
||||
head = "a" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(600, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(601, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
600: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 2}, "labels": []}, # fork
|
||||
601: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={600: _two_approvals(head), 601: _two_approvals(head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[600].decision is None
|
||||
assert "not merge-eligible" in by_num[600].reason
|
||||
assert by_num[601].decision is not None
|
||||
assert by_num[601].decision.ready is True
|
||||
|
||||
|
||||
def test_enumerate_readiness_fail_closed_on_api_error(monkeypatch):
|
||||
"""If get_pull raises for one candidate, that candidate is recorded as
|
||||
unverifiable; other candidates are still evaluated."""
|
||||
head = "a" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(700, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(701, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
700: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
701: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={700: _two_approvals(head), 701: _two_approvals(head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
original_get_pull = mq.get_pull
|
||||
def failing_get_pull(n):
|
||||
if n == 700:
|
||||
raise mq.ApiError("simulated API failure")
|
||||
return original_get_pull(n)
|
||||
monkeypatch.setattr(mq, "get_pull", failing_get_pull)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[700].decision is None
|
||||
assert "unverifiable" in by_num[700].reason
|
||||
assert by_num[701].decision is not None
|
||||
assert by_num[701].decision.ready is True
|
||||
|
||||
|
||||
def test_print_post_batch_summary_counts_correctly(capsys):
|
||||
entries = [
|
||||
mq.ReadinessEntry(pr_number=1, decision=mq.MergeDecision(True, "merge", "ready"), reason="ready"),
|
||||
mq.ReadinessEntry(pr_number=2, decision=mq.MergeDecision(False, "wait", "CI red"), reason="CI red"),
|
||||
mq.ReadinessEntry(pr_number=3, decision=None, reason="draft"),
|
||||
]
|
||||
mq.print_post_batch_summary(entries)
|
||||
captured = capsys.readouterr()
|
||||
out = captured.out
|
||||
assert "total_candidates=3" in out
|
||||
assert "ready=1" in out
|
||||
assert "waiting=1" in out
|
||||
assert "ineligible/unverifiable=1" in out
|
||||
assert "PR #1: state=ready" in out
|
||||
assert "PR #2: state=waiting" in out
|
||||
assert "PR #3: state=ineligible" in out
|
||||
|
||||
@@ -17,7 +17,7 @@ wd.REPO = "molecule-ai/molecule-core"
|
||||
wd.OWNER = "molecule-ai"
|
||||
wd.NAME = "molecule-core"
|
||||
wd.WATCH_BRANCH = "main"
|
||||
wd.RED_LABEL = "ci-bp-drift"
|
||||
wd.RED_LABEL = "tier:high"
|
||||
wd.API = "https://git.example.com/api/v1"
|
||||
|
||||
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
# Anti-regression gate for #2403: fail if any SOP tier artifact reappears.
|
||||
|
||||
cd "$(dirname "$0")/../../.."
|
||||
|
||||
fail=0
|
||||
|
||||
# 1. Deleted workflow files must stay deleted
|
||||
for f in .gitea/workflows/sop-tier-check.yml .gitea/workflows/sop-tier-refire.yml; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 2. Deleted script files must stay deleted
|
||||
for f in .gitea/scripts/sop-tier-check.sh .gitea/scripts/sop-tier-refire.sh; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. No tier branching logic in gate_check.py
|
||||
if grep -qE '_get_pr_tier|TIER_AGENTS' tools/gate-check-v3/gate_check.py; then
|
||||
echo "FAIL: tier branching reappeared in gate_check.py" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 4. No _is_tier_low_pending_ok in merge queue
|
||||
if grep -q '_is_tier_low_pending_ok' .gitea/scripts/gitea-merge-queue.py; then
|
||||
echo "FAIL: tier soft-fail reappeared in gitea-merge-queue.py" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 5. No sop-tier-check context references in workflow YAML
|
||||
if grep -r 'sop-tier-check' .gitea/workflows/; then
|
||||
echo "FAIL: sop-tier-check context reappeared in workflows" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
if [ "$fail" -eq 1 ]; then
|
||||
echo "TIER_REGRESSION_DETECTED" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: no tier regression detected"
|
||||
@@ -11,7 +11,7 @@
|
||||
# - compute_ack_state (self-ack rejected, team probe applied, revoke
|
||||
# invalidates own prior ack, peer's ack survives unrevoked)
|
||||
# - render_status (state + description format)
|
||||
# - is_high_risk (label-driven, default fallback)
|
||||
# - get_tier_mode (label-driven, default fallback)
|
||||
# - load_config (default config parses cleanly with both PyYAML and
|
||||
# the bundled minimal parser)
|
||||
#
|
||||
@@ -432,6 +432,37 @@ class TestRenderStatus(unittest.TestCase):
|
||||
self.assertIn("body-unfilled", desc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_tier_mode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetTierMode(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.cfg = sop.load_config(CONFIG_PATH)
|
||||
|
||||
def test_tier_high_is_hard(self):
|
||||
pr = {"labels": [{"name": "tier:high"}, {"name": "area:ci"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_tier_medium_is_hard(self):
|
||||
pr = {"labels": [{"name": "tier:medium"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_tier_low_is_soft(self):
|
||||
pr = {"labels": [{"name": "tier:low"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "soft")
|
||||
|
||||
def test_no_tier_label_defaults_to_hard(self):
|
||||
# Per feedback_fix_root_not_symptom — never silently lower the bar.
|
||||
pr = {"labels": [{"name": "area:ci"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_no_labels_defaults_to_hard(self):
|
||||
self.assertEqual(sop.get_tier_mode({"labels": []}, self.cfg), "hard")
|
||||
self.assertEqual(sop.get_tier_mode({}, self.cfg), "hard")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# load_config
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -456,6 +487,13 @@ class TestLoadConfig(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_default_config_tier_mode_shape(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:high"], "hard")
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:medium"], "hard")
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:low"], "soft")
|
||||
self.assertEqual(cfg["default_mode"], "hard")
|
||||
|
||||
def test_each_item_has_required_fields(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
for it in cfg["items"]:
|
||||
@@ -589,7 +627,7 @@ class TestComputeNaState(unittest.TestCase):
|
||||
class TestIsHighRisk(unittest.TestCase):
|
||||
"""The high-risk predicate decides which required_teams list applies.
|
||||
|
||||
Predicate: any label in cfg.high_risk_labels.
|
||||
Predicate: tier:high label OR any label in cfg.high_risk_labels.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
@@ -599,8 +637,23 @@ class TestIsHighRisk(unittest.TestCase):
|
||||
pr = {"labels": []}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_high_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "tier:high"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_low_is_default_class(self):
|
||||
pr = {"labels": [{"name": "tier:low"}]}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_medium_is_default_class(self):
|
||||
# tier:medium alone is NOT high-risk (Option C — medium routes
|
||||
# to the wider engineers OR-set).
|
||||
pr = {"labels": [{"name": "tier:medium"}]}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_security_label_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "area:security"}]}
|
||||
pr = {"labels": [{"name": "tier:medium"}, {"name": "area:security"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_schema_label_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "area:schema"}]}
|
||||
@@ -615,7 +668,7 @@ class TestIsHighRisk(unittest.TestCase):
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_gate_meta_label_is_high_risk(self):
|
||||
# Gate-meta = changes to sop-checklist/sop-checklist itself.
|
||||
# Gate-meta = changes to sop-checklist/sop-tier-check itself.
|
||||
pr = {"labels": [{"name": "area:gate-meta"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
@@ -669,7 +722,7 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
|
||||
root-cause / no-backwards-compat for the default class.
|
||||
|
||||
The dead-managers/ceo-persona-token gridlock is the symptom; the
|
||||
root cause is that sop-checklist ignored high-risk class. These tests
|
||||
root cause is that sop-checklist ignored tier-class. These tests
|
||||
pin the new wider-default behavior so it can't regress silently.
|
||||
"""
|
||||
|
||||
@@ -740,7 +793,7 @@ class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):
|
||||
|
||||
def test_root_cause_high_risk_elevated_to_ceo_only(self):
|
||||
items = _items_by_slug()
|
||||
# area:schema alone makes the PR high-risk → root-cause needs ceo.
|
||||
# tier:high alone makes the PR high-risk → root-cause needs ceo.
|
||||
self.assertEqual(
|
||||
sop.resolve_required_teams(items["root-cause"], high_risk=True),
|
||||
["ceo"],
|
||||
|
||||
+272
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env bash
|
||||
# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
|
||||
#
|
||||
# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
|
||||
# sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
|
||||
# If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
|
||||
# any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
|
||||
# and credited that org member as a member of EVERY queried team. The
|
||||
# evaluator then treated those synthetic memberships as real, so a plain
|
||||
# NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
|
||||
# a real highest-tier authorization PASS — privilege escalation.
|
||||
#
|
||||
# Fix (fail-closed authorization):
|
||||
# - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
|
||||
# membership is never credited as team membership.
|
||||
# - A team probe that returns anything other than 200/204 (member) or 404
|
||||
# (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
|
||||
# (exit 1) with a cannot-verify status and never grants the tier.
|
||||
#
|
||||
# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
|
||||
# that serves canned Gitea API responses keyed by URL, then runs the REAL
|
||||
# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
|
||||
# path — no logic is re-implemented in the test.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
|
||||
|
||||
# sop-tier-check.sh uses `declare -A` (associative arrays), which require
|
||||
# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
|
||||
# bash >= 4 to run the script under.
|
||||
pick_bash() {
|
||||
local c
|
||||
for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
|
||||
local p; p="$(command -v "$c" 2>/dev/null || true)"
|
||||
[ -n "$p" ] || continue
|
||||
local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
|
||||
if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
|
||||
echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1" expected="$2" got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (missing substring: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_not_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " FAIL $label (unexpected substring present: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake-curl harness.
|
||||
#
|
||||
# The real script calls curl in two shapes:
|
||||
# (a) body capture: curl -sS -H AUTH URL -> prints JSON body
|
||||
# (b) http-code: curl -sS -o FILE -w '%{http_code}' -H AUTH URL
|
||||
# (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
|
||||
#
|
||||
# Our fake reads the URL (last non-flag arg), looks up a response in fixture
|
||||
# files under $FIXDIR, and emits body and/or http-code accordingly.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
make_harness() {
|
||||
# $1 = scenario dir to populate with fixtures
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
mkdir -p "$BIN"
|
||||
cat > "$BIN/curl" <<'FAKE'
|
||||
#!/usr/bin/env bash
|
||||
# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
|
||||
set -u
|
||||
FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
|
||||
|
||||
url=""
|
||||
out=""
|
||||
want_code="no"
|
||||
prev=""
|
||||
for a in "$@"; do
|
||||
case "$prev" in
|
||||
-o) out="$a" ;;
|
||||
esac
|
||||
case "$a" in
|
||||
http*://*) url="$a" ;;
|
||||
'%{http_code}') want_code="yes" ;;
|
||||
esac
|
||||
# -w '%{http_code}' arrives as the value of the -w flag
|
||||
if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
|
||||
prev="$a"
|
||||
done
|
||||
|
||||
# Map URL -> fixture key (a filename-safe slug).
|
||||
# We only need the path after /api/v1.
|
||||
path="${url#*/api/v1}"
|
||||
slug="$(printf '%s' "$path" | tr '/?=&' '____')"
|
||||
|
||||
body_file="$FIXDIR/body${slug}"
|
||||
code_file="$FIXDIR/code${slug}"
|
||||
|
||||
# Emit body to -o target (or capture for stdout) when a body fixture exists.
|
||||
body=""
|
||||
if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
|
||||
if [ -n "$out" ]; then
|
||||
printf '%s' "$body" > "$out"
|
||||
else
|
||||
printf '%s' "$body"
|
||||
fi
|
||||
|
||||
# Emit http code when requested.
|
||||
if [ "$want_code" = "yes" ]; then
|
||||
if [ -f "$code_file" ]; then
|
||||
printf '%s' "$(cat "$code_file")"
|
||||
else
|
||||
printf '200'
|
||||
fi
|
||||
fi
|
||||
exit 0
|
||||
FAKE
|
||||
chmod +x "$BIN/curl"
|
||||
echo "$BIN"
|
||||
}
|
||||
|
||||
# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
|
||||
# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
|
||||
seed_common() {
|
||||
local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
|
||||
mkdir -p "$FIXDIR"
|
||||
# /user -> whoami
|
||||
printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
|
||||
# PR head sha
|
||||
printf '%s' '{"head":{"sha":"headsha1"}}' \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
|
||||
# labels
|
||||
printf '%s' "[{\"name\":\"$tier\"}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
|
||||
# org teams list
|
||||
printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
|
||||
printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
|
||||
# reviews: one APPROVED on current head by $approver
|
||||
printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
|
||||
}
|
||||
|
||||
run_script() {
|
||||
# $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
set +e
|
||||
OUT=$(
|
||||
SOP_TEST_FIXDIR="$FIXDIR" \
|
||||
PATH="$BIN:$PATH" \
|
||||
GITEA_TOKEN="faketoken" \
|
||||
GITEA_HOST="git.moleculesai.app" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="42" \
|
||||
PR_AUTHOR="pr-author" \
|
||||
SOP_DEBUG="0" \
|
||||
SOP_LEGACY_CHECK="0" \
|
||||
"$BASH4" "$SCRIPT" 2>&1
|
||||
)
|
||||
RC=$?
|
||||
set -e
|
||||
printf '%s' "$OUT"
|
||||
return $RC
|
||||
}
|
||||
|
||||
TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
|
||||
|
||||
echo "=============================================================="
|
||||
echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
|
||||
echo " is a plain org member but NOT in ceo team."
|
||||
echo " EXPECT: tier NOT granted (fail-closed cannot-verify)."
|
||||
echo "=============================================================="
|
||||
S1="$(mktemp -d)"
|
||||
make_harness "$S1" >/dev/null
|
||||
seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
|
||||
# Team membership probe for ceo (id=10) returns 403 — cannot read.
|
||||
printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
|
||||
# The OLD bug path: org membership probe would 204 and synthetic-credit.
|
||||
printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
|
||||
set +e
|
||||
OUT1="$(run_script "$S1")"; RC1=$?
|
||||
set -e
|
||||
echo "$OUT1" | sed 's/^/ /'
|
||||
echo " (exit=$RC1)"
|
||||
assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
|
||||
assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
|
||||
assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
|
||||
rm -rf "$S1"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
|
||||
echo " EXPECT: tier GRANTED."
|
||||
echo "=============================================================="
|
||||
S2="$(mktemp -d)"
|
||||
make_harness "$S2" >/dev/null
|
||||
seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '204' > "$S2/code_teams_10_members_real-ceo" # ceo team: member
|
||||
set +e
|
||||
OUT2="$(run_script "$S2")"; RC2=$?
|
||||
set -e
|
||||
echo "$OUT2" | sed 's/^/ /'
|
||||
echo " (exit=$RC2)"
|
||||
assert_eq "S2 exit zero (granted)" "0" "$RC2"
|
||||
assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
|
||||
rm -rf "$S2"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
|
||||
echo " non-member of ceo (team probe 404). Org probe would"
|
||||
echo " 204 — must NEVER be synthetic-credited."
|
||||
echo " EXPECT: tier NOT granted (clause FAIL), no fallback."
|
||||
echo "=============================================================="
|
||||
S3="$(mktemp -d)"
|
||||
make_harness "$S3" >/dev/null
|
||||
seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol" # verified NOT in ceo
|
||||
printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
|
||||
set +e
|
||||
OUT3="$(run_script "$S3")"; RC3=$?
|
||||
set -e
|
||||
echo "$OUT3" | sed 's/^/ /'
|
||||
echo " (exit=$RC3)"
|
||||
assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
|
||||
assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
|
||||
assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
|
||||
rm -rf "$S3"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
+101
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for #229 — sop-tier-check tier:low OR-clause splitter.
|
||||
#
|
||||
# Bug (PR #225 → still broken after PR #231):
|
||||
# Line ~289 of sop-tier-check.sh used:
|
||||
# _clause=$(echo "$_raw_clause" | tr -d '()' | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
|
||||
# `tr -d '[:space:]'` strips the newlines that `tr ',' '\n'` just
|
||||
# inserted, collapsing "engineers,managers,ceo" into a single token
|
||||
# "engineersmanagersceo". The for-loop then iterates ONCE on a name
|
||||
# that matches no team, so every tier:low PR fails:
|
||||
# ::error::clause [engineers/managers/ceo]: FAIL — no approving
|
||||
# reviewer belongs to any of these teamsengineersmanagersceo
|
||||
# (note also: missing separators in the error string is bug #2 —
|
||||
# `_clause_names` used "${var:+, }$x" which OVERWRITES per iteration).
|
||||
#
|
||||
# Fix shape (this PR):
|
||||
# _no_parens=${_raw_clause//[()]/}
|
||||
# _clause=${_no_parens//,/ } # comma -> space, bash word-split iterates
|
||||
# _clause_names="${_clause_names}${_clause_names:+, }${_t}" # APPEND, not overwrite
|
||||
#
|
||||
# This test extracts the splitter logic and asserts it produces the right
|
||||
# token list for each of the three tier expressions live in the script.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ----- Splitter under test (mirrors the fixed sop-tier-check.sh block) -----
|
||||
split_clause() {
|
||||
local raw="$1"
|
||||
local no_parens=${raw//[()]/}
|
||||
local clause=${no_parens//,/ }
|
||||
local out=""
|
||||
for _t in $clause; do
|
||||
out="${out}${out:+|}$_t"
|
||||
done
|
||||
echo "$out"
|
||||
}
|
||||
|
||||
echo "test: tier:low OR-clause splits to 3 tokens"
|
||||
assert_eq "tier:low" "engineers|managers|ceo" "$(split_clause "engineers,managers,ceo")"
|
||||
|
||||
echo "test: tier:medium AND-expression — bash word-split on \$EXPR yields 5 tokens"
|
||||
EXPR="managers AND engineers AND qa???,security???"
|
||||
out=""
|
||||
for _raw in $EXPR; do
|
||||
out="${out}${out:+ ; }$(split_clause "$_raw")"
|
||||
done
|
||||
assert_eq "tier:medium" "managers ; AND ; engineers ; AND ; qa???|security???" "$out"
|
||||
|
||||
echo "test: tier:high single-team OR-clause"
|
||||
assert_eq "tier:high" "ceo" "$(split_clause "ceo")"
|
||||
|
||||
echo "test: paren-wrapped OR-set unwraps + splits"
|
||||
assert_eq "paren OR" "managers|ceo" "$(split_clause "(managers,ceo)")"
|
||||
|
||||
# ----- _clause_names accumulator (was overwriting per iteration) -----
|
||||
acc=""
|
||||
for t in engineers managers ceo; do
|
||||
acc="${acc}${acc:+, }${t}"
|
||||
done
|
||||
assert_eq "_clause_names append" "engineers, managers, ceo" "$acc"
|
||||
|
||||
# ----- _failed_clauses / _passed_clauses accumulator across raw clauses -----
|
||||
acc=""
|
||||
for c in clauseA clauseB clauseC; do
|
||||
acc="${acc}${acc:+, }${c}"
|
||||
done
|
||||
assert_eq "_failed_clauses append" "clauseA, clauseB, clauseC" "$acc"
|
||||
|
||||
# ----- End-to-end OR-gate: simulate APPROVER_TEAMS[core-lead]=' managers ' -----
|
||||
# The script's case pattern is *${_t}* with a space-padded value.
|
||||
APPROVER_TEAMS_VAL=" managers "
|
||||
matched=""
|
||||
for _t in $(split_clause "engineers,managers,ceo" | tr '|' ' '); do
|
||||
case "$APPROVER_TEAMS_VAL" in
|
||||
*${_t}*) matched="$_t"; break ;;
|
||||
esac
|
||||
done
|
||||
assert_eq "OR-gate matches managers" "managers" "$matched"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for internal#816 — sop-tier-check must ignore APPROVED
|
||||
# reviews that were submitted against an old PR head SHA.
|
||||
#
|
||||
# Bug: the script collected approvers with
|
||||
# jq '[.[] | select(.state=="APPROVED") | .user.login]'
|
||||
# without filtering on .commit_id == HEAD_SHA. After a PR head moved,
|
||||
# stale approvals looked valid to the tier gate.
|
||||
#
|
||||
# Fix: the jq filter now includes
|
||||
# select(.state=="APPROVED" and .commit_id == $head_sha)
|
||||
# where $head_sha is the current PR head fetched from the API.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# jq may not be on PATH in all environments (e.g. dev containers).
|
||||
PATH="/tmp/bin:$PATH"
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Sample reviews matching the shape from Gitea API
|
||||
REVIEWS_JSON='[
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"bob"}},
|
||||
{"state":"APPROVED","commit_id":"old456","user":{"login":"alice"}},
|
||||
{"state":"COMMENT","commit_id":"abc123","user":{"login":"carol"}},
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"dave"}},
|
||||
{"state":"REQUEST_CHANGES","commit_id":"abc123","user":{"login":"eve"}}
|
||||
]'
|
||||
|
||||
echo "test: jq filter keeps only APPROVED on current head"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "current-head approvers" "bob dave" "$(echo "$GOT" | tr '\n' ' ' | sed 's/ $//')"
|
||||
|
||||
echo "test: jq filter with all-stale reviews yields empty"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "new789" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "all-stale yields empty" "" "$GOT"
|
||||
|
||||
echo "test: jq filter handles null commit_id gracefully"
|
||||
NULL_JSON='[{"state":"APPROVED","commit_id":null,"user":{"login":"mallory"}}]'
|
||||
GOT=$(echo "$NULL_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "null commit_id excluded" "" "$GOT"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
Executable
+304
@@ -0,0 +1,304 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tests for sop-tier-refire.{yml,sh} — internal#292.
|
||||
#
|
||||
# Behavior matrix:
|
||||
#
|
||||
# T1: PR open + APPROVED via tier:low → script invokes sop-tier-check
|
||||
# and POSTs status=success.
|
||||
# T2: PR open + missing tier label → sop-tier-check exits non-zero;
|
||||
# refire still POSTs status=success, matching the canonical
|
||||
# pull_request_target workflow's fail-open job conclusion.
|
||||
# T3: PR open + tier:low but NO approving reviews → sop-tier-check
|
||||
# exits non-zero; refire still POSTs status=success for the same reason.
|
||||
# T4: PR CLOSED → refire exits 0 with no status POST (no-op on closed).
|
||||
# T5: Rate-limit — recent status update within 30s → refire skips,
|
||||
# no new POST.
|
||||
# T6 (yaml-lint): workflow `if:` expression contains author_association
|
||||
# gate + slash-command-trigger gate + PR-not-issue gate.
|
||||
# T7 (yaml-lint): workflow file is parseable YAML.
|
||||
#
|
||||
# Tests T1-T5 run the real script against a local-fixture HTTP server
|
||||
# (python http.server with a stub handler — `tests/_refire_fixture.py`)
|
||||
# so the script's Gitea API calls hit the fixture, not the real Gitea.
|
||||
#
|
||||
# Tests T6/T7 are pure YAML checks against the workflow file.
|
||||
#
|
||||
# Hostile-self-review (per feedback_assert_exact_not_substring):
|
||||
# this test MUST FAIL if the workflow or script is absent. Verified by
|
||||
# running the test before the files exist (covered in the PR body).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
WORKFLOW_DIR="$(cd "$THIS_DIR/../../workflows" && pwd)"
|
||||
WORKFLOW="$WORKFLOW_DIR/sop-tier-refire.yml"
|
||||
DISPATCH_WORKFLOW="$WORKFLOW_DIR/sop-checklist.yml"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-refire.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
FAILED_TESTS=""
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1"
|
||||
local needle="$2"
|
||||
local haystack="$3"
|
||||
if printf '%s' "$haystack" | grep -qF "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " needle: <$needle>"
|
||||
echo " haystack: <$(printf '%s' "$haystack" | head -c 400)>"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_file_exists() {
|
||||
local label="$1"
|
||||
local path="$2"
|
||||
if [ -f "$path" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (not found: $path)"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Existence (foundation — every other test depends on these)
|
||||
echo
|
||||
echo "== existence =="
|
||||
assert_file_exists "workflow file exists" "$WORKFLOW"
|
||||
assert_file_exists "SSOT dispatcher workflow file exists" "$DISPATCH_WORKFLOW"
|
||||
assert_file_exists "script file exists" "$SCRIPT"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL (existence)"
|
||||
echo "Cannot proceed without these files."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# T6 / T7 — workflow YAML structure
|
||||
echo
|
||||
echo "== T6/T7 workflow yaml =="
|
||||
|
||||
# YAML parseability
|
||||
PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$WORKFLOW" 2>&1 || true)
|
||||
assert_eq "T7 workflow parses as YAML" "ok" "$PARSE_OUT"
|
||||
|
||||
# The old per-workflow issue_comment listener caused queue storms because
|
||||
# Gitea queues jobs before evaluating job-level `if:`. The script remains,
|
||||
# but comment-triggered refires route through the single dispatcher.
|
||||
WORKFLOW_CONTENT=$(cat "$WORKFLOW")
|
||||
if printf '%s' "$WORKFLOW_CONTENT" | grep -q '^ issue_comment:'; then
|
||||
echo " FAIL T6a manual fallback workflow must not listen on issue_comment"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} T6a"
|
||||
else
|
||||
echo " PASS T6a manual fallback workflow does not listen on issue_comment"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
assert_contains "T6b workflow exposes workflow_dispatch" \
|
||||
"workflow_dispatch" "$WORKFLOW_CONTENT"
|
||||
assert_contains "T6c workflow documents unsupported manual inputs" \
|
||||
"workflow_dispatch inputs" "$WORKFLOW_CONTENT"
|
||||
# Does NOT check out PR HEAD (security)
|
||||
if grep -q 'ref: \${{ github.event.pull_request.head' "$WORKFLOW"; then
|
||||
echo " FAIL T6d workflow MUST NOT check out PR head (security)"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} T6d"
|
||||
else
|
||||
echo " PASS T6d workflow does not check out PR head"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
|
||||
DISPATCH_PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$DISPATCH_WORKFLOW" 2>&1 || true)
|
||||
assert_eq "T6e SSOT dispatcher workflow parses as YAML" "ok" "$DISPATCH_PARSE_OUT"
|
||||
DISPATCH_CONTENT=$(cat "$DISPATCH_WORKFLOW")
|
||||
assert_contains "T6f SSOT dispatcher listens on issue_comment" \
|
||||
"issue_comment" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6g SSOT dispatcher handles /qa-recheck" \
|
||||
"/qa-recheck" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6h SSOT dispatcher handles /security-recheck" \
|
||||
"/security-recheck" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6i SSOT dispatcher handles /refire-tier-check" \
|
||||
"/refire-tier-check" "$DISPATCH_CONTENT"
|
||||
|
||||
# T1-T5 — script behavior against a local Gitea-fixture
|
||||
echo
|
||||
echo "== T1-T5 script behavior (vs local fixture) =="
|
||||
|
||||
# Spin up the fixture HTTP server.
|
||||
FIXTURE_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$FIXTURE_DIR"; [ -n "${FIX_PID:-}" ] && kill "$FIX_PID" 2>/dev/null || true' EXIT
|
||||
FIXTURE_PY="$THIS_DIR/_refire_fixture.py"
|
||||
if [ ! -f "$FIXTURE_PY" ]; then
|
||||
echo "::error::fixture server $FIXTURE_PY missing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FIX_LOG="$FIXTURE_DIR/fixture.log"
|
||||
FIX_STATE_DIR="$FIXTURE_DIR/state"
|
||||
mkdir -p "$FIX_STATE_DIR"
|
||||
|
||||
# Find an unused port.
|
||||
FIX_PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("127.0.0.1",0));print(s.getsockname()[1]);s.close()')
|
||||
|
||||
FIXTURE_STATE_DIR="$FIX_STATE_DIR" python3 "$FIXTURE_PY" "$FIX_PORT" \
|
||||
>"$FIX_LOG" 2>&1 &
|
||||
FIX_PID=$!
|
||||
|
||||
# Wait for fixture readiness.
|
||||
for _ in $(seq 1 50); do
|
||||
if curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
if ! curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
|
||||
echo "::error::fixture server failed to start. Log:"
|
||||
cat "$FIX_LOG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Helper: set fixture state for a scenario, then run the script.
|
||||
# tier_result is one of: pass | fail_no_label | fail_no_approvals.
|
||||
# The refire script's tier-check invocation is mocked because the real
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays — incompatible with
|
||||
# the macOS bash 3.2 dev shell. Linux Gitea runners use bash 4/5 so
|
||||
# production runs the real script. The mock exercises the success +
|
||||
# failure branches of refire's status-POST glue.
|
||||
run_scenario() {
|
||||
local scenario="$1"
|
||||
local tier_result="${2:-pass}"
|
||||
echo "$scenario" >"$FIX_STATE_DIR/scenario"
|
||||
: >"$FIX_STATE_DIR/posted_statuses.jsonl" # clear status log
|
||||
|
||||
local out
|
||||
set +e
|
||||
out=$(
|
||||
PATH="$FIXTURE_DIR/bin:$PATH" \
|
||||
GITEA_TOKEN="fixture-token" \
|
||||
GITEA_HOST="fixture.local" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
COMMENT_AUTHOR="test-runner" \
|
||||
SOP_REFIRE_DISABLE_RATE_LIMIT="1" \
|
||||
SOP_REFIRE_TIER_CHECK_SCRIPT="$THIS_DIR/_mock_tier_check.sh" \
|
||||
MOCK_TIER_RESULT="$tier_result" \
|
||||
FIXTURE_PORT="$FIX_PORT" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
)
|
||||
local rc=$?
|
||||
set -e
|
||||
echo "$out" >"$FIX_STATE_DIR/last_run.log"
|
||||
echo "$rc" >"$FIX_STATE_DIR/last_rc"
|
||||
}
|
||||
|
||||
# Install a curl shim that rewrites https://fixture.local → http://127.0.0.1:$PORT
|
||||
# Use bash prefix-strip (${var#prefix}) — it sidesteps the `/` delimiter
|
||||
# confusion of ${var/pattern/replacement}.
|
||||
mkdir -p "$FIXTURE_DIR/bin"
|
||||
cat >"$FIXTURE_DIR/bin/curl" <<SHIM
|
||||
#!/usr/bin/env bash
|
||||
# Test shim: rewrite https://fixture.local/* -> http://127.0.0.1:${FIX_PORT}/*
|
||||
# The fixture doesn't authenticate; -H Authorization passes through harmlessly.
|
||||
new_args=()
|
||||
for a in "\$@"; do
|
||||
if [[ "\$a" == https://fixture.local/* ]]; then
|
||||
rest="\${a#https://fixture.local}"
|
||||
a="http://127.0.0.1:${FIX_PORT}\${rest}"
|
||||
fi
|
||||
new_args+=("\$a")
|
||||
done
|
||||
exec /usr/bin/curl "\${new_args[@]}"
|
||||
SHIM
|
||||
chmod +x "$FIXTURE_DIR/bin/curl"
|
||||
|
||||
# T1: tier:low + 1 APPROVED + author is in engineers team → success
|
||||
run_scenario "T1_success" "pass"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T1 exit code 0 (success)" "0" "$RC"
|
||||
assert_contains "T1 POSTed state=success" '"state": "success"' "$POSTED"
|
||||
assert_contains "T1 POST context is sop-tier-check / tier-check" \
|
||||
'"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
|
||||
assert_contains "T1 description names commenter" "test-runner" "$POSTED"
|
||||
|
||||
# T2: missing tier label → tier-check fails internally (mock exits 1).
|
||||
# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
|
||||
# REAL exit code and POSTs state=failure — it does NOT forge a green on
|
||||
# the required context. The refire job itself still exits 0 (it succeeded
|
||||
# at posting an honest failure status).
|
||||
run_scenario "T2_no_tier_label" "fail_no_label"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T3: tier:low present but ZERO approving reviews → internal tier check
|
||||
# fails (mock exits 1). Refire POSTs state=failure, never a false green.
|
||||
run_scenario "T3_no_approvals" "fail_no_approvals"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T4: closed PR — refire is a no-op (no POST, exit 0)
|
||||
run_scenario "T4_closed" "pass"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T4 closed PR exits 0" "0" "$RC"
|
||||
assert_eq "T4 closed PR posts no status" "" "$POSTED"
|
||||
|
||||
# T5: rate-limit — disable the env override and let scenario set a
|
||||
# recent statuses entry. Re-enable rate-limit for this scenario by NOT
|
||||
# passing SOP_REFIRE_DISABLE_RATE_LIMIT.
|
||||
echo "T5_rate_limited" >"$FIX_STATE_DIR/scenario"
|
||||
: >"$FIX_STATE_DIR/posted_statuses.jsonl"
|
||||
set +e
|
||||
T5_OUT=$(
|
||||
PATH="$FIXTURE_DIR/bin:$PATH" \
|
||||
GITEA_TOKEN="fixture-token" \
|
||||
GITEA_HOST="fixture.local" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
COMMENT_AUTHOR="test-runner" \
|
||||
FIXTURE_PORT="$FIX_PORT" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
)
|
||||
T5_RC=$?
|
||||
set -e
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T5 rate-limited exits 0" "0" "$T5_RC"
|
||||
assert_contains "T5 rate-limited log says skipped" "rate-limited" "$T5_OUT"
|
||||
assert_eq "T5 rate-limited posts no status" "" "$POSTED"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "Failed:$FAILED_TESTS"
|
||||
fi
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -1,474 +0,0 @@
|
||||
import importlib.util
|
||||
import json
|
||||
import pathlib
|
||||
import urllib.error
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
SCRIPT = ROOT / "umbrella-reaper.py"
|
||||
|
||||
|
||||
def load_reaper():
|
||||
spec = importlib.util.spec_from_file_location("umbrella_reaper", SCRIPT)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(mod)
|
||||
mod.API = "https://git.example.test/api/v1"
|
||||
mod.GITEA_TOKEN = "fixture-token"
|
||||
mod.GITEA_HOST = "git.example.test"
|
||||
mod.REPO = "owner/repo"
|
||||
return mod
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
status = 200
|
||||
|
||||
def __init__(self, payload):
|
||||
self.payload = payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
return json.dumps(self.payload).encode("utf-8")
|
||||
|
||||
|
||||
def _pr_fixture(number: int, sha: str) -> dict:
|
||||
return {"number": number, "head": {"sha": sha}}
|
||||
|
||||
|
||||
def _status_entry(context: str, state: str) -> dict:
|
||||
return {"context": context, "status": state}
|
||||
|
||||
|
||||
def test_process_pr_compensates_when_all_sub_jobs_success(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
|
||||
def fake_post_status(sha, context, description):
|
||||
posted.append((sha, context, description))
|
||||
|
||||
monkeypatch.setattr(mod, "post_status", fake_post_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(1, "abc123")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert len(posted) == 1
|
||||
assert posted[0][0] == "abc123"
|
||||
assert posted[0][1] == "CI / all-required (pull_request)"
|
||||
assert "Compensating status" in posted[0][2]
|
||||
|
||||
|
||||
def test_process_pr_skips_when_umbrella_missing(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(mod, "REQUIRED_SUB_JOBS", ["CI / Platform (Go) (pull_request)"])
|
||||
|
||||
pr = _pr_fixture(2, "def456")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert posted == []
|
||||
|
||||
|
||||
def test_process_pr_skips_when_sub_job_pending(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(3, "ghi789")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "pending"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert posted == []
|
||||
|
||||
|
||||
def test_process_pr_skips_when_sub_job_failure(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(4, "jkl012")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "failure"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert posted == []
|
||||
|
||||
|
||||
def test_process_pr_returns_false_on_post_failure(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_post_status(sha, context, description):
|
||||
raise mod.ApiError("POST /statuses/abc123 -> HTTP 500: simulated failure")
|
||||
|
||||
monkeypatch.setattr(mod, "post_status", fake_post_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(5, "abc123")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_main_exits_nonzero_when_any_post_fails(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
|
||||
monkeypatch.setenv("GITEA_HOST", "git.example.test")
|
||||
monkeypatch.setenv("REPO", "owner/repo")
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"list_open_prs",
|
||||
lambda limit: [
|
||||
_pr_fixture(1, "abc123"),
|
||||
_pr_fixture(2, "def456"),
|
||||
],
|
||||
)
|
||||
|
||||
calls = {"n": 0}
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
def fake_post_status(sha, context, description):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 2:
|
||||
raise mod.ApiError("simulated failure")
|
||||
|
||||
monkeypatch.setattr(mod, "post_status", fake_post_status)
|
||||
|
||||
exit_code = mod.main()
|
||||
assert exit_code == 1
|
||||
|
||||
|
||||
def test_main_exits_zero_when_all_posts_succeed(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
|
||||
monkeypatch.setenv("GITEA_HOST", "git.example.test")
|
||||
monkeypatch.setenv("REPO", "owner/repo")
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"list_open_prs",
|
||||
lambda limit: [_pr_fixture(1, "abc123")],
|
||||
)
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
|
||||
|
||||
exit_code = mod.main()
|
||||
assert exit_code == 0
|
||||
|
||||
|
||||
def test_dry_run_does_not_post(monkeypatch):
|
||||
mod = load_reaper()
|
||||
api_calls = []
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
api_calls.append((method, path, body))
|
||||
return 200, {"ok": True}
|
||||
|
||||
monkeypatch.setattr(mod, "api", fake_api)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(6, "mno345")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
monkeypatch.setattr(mod, "DRY_RUN", True)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
# DRY_RUN should prevent the POST /statuses call
|
||||
assert not any(
|
||||
method == "POST" and "/statuses/" in path for method, path, _ in api_calls
|
||||
)
|
||||
|
||||
|
||||
def test_duplicate_contexts_use_latest_state(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(7, "pqr678")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
# duplicate: first pending, then success — the loop overwrites
|
||||
_status_entry("CI / Detect changes (pull_request)", "pending"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert len(posted) == 1
|
||||
|
||||
|
||||
def test_load_required_sub_jobs_from_ci_yml_pull_request_event():
|
||||
mod = load_reaper()
|
||||
# UMBRELLA_CONTEXT defaults to pull_request, so derivation should yield
|
||||
# the pull_request suffix.
|
||||
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
|
||||
assert all(j.endswith(" (pull_request)") for j in jobs)
|
||||
assert "CI / Detect changes (pull_request)" in jobs
|
||||
assert "CI / Python Lint & Test (pull_request)" in jobs
|
||||
|
||||
|
||||
def test_load_required_sub_jobs_from_ci_yml_push_event(monkeypatch):
|
||||
mod = load_reaper()
|
||||
monkeypatch.setattr(mod, "UMBRELLA_CONTEXT", "CI / all-required (push)")
|
||||
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
|
||||
assert all(j.endswith(" (push)") for j in jobs)
|
||||
assert "CI / Detect changes (push)" in jobs
|
||||
|
||||
|
||||
def test_list_open_prs_paginates(monkeypatch):
|
||||
mod = load_reaper()
|
||||
calls = []
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
calls.append(query)
|
||||
page = int(query.get("page", 1))
|
||||
limit = int(query.get("limit", 50))
|
||||
if page == 1:
|
||||
return 200, [{"number": 1}, {"number": 2}]
|
||||
if page == 2:
|
||||
return 200, [{"number": 3}]
|
||||
return 200, []
|
||||
|
||||
monkeypatch.setattr(mod, "api", fake_api)
|
||||
prs = mod.list_open_prs(limit=2)
|
||||
assert len(prs) == 3
|
||||
assert prs[0]["number"] == 1
|
||||
assert prs[2]["number"] == 3
|
||||
assert calls[0]["page"] == "1"
|
||||
assert calls[1]["page"] == "2"
|
||||
|
||||
|
||||
def test_process_pr_returns_false_on_status_fetch_failure(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_get_combined_status(sha):
|
||||
raise mod.ApiError("GET /statuses/abc123 -> HTTP 500: simulated outage")
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
["CI / Detect changes (pull_request)"],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(8, "abc123")
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_process_pr_returns_false_on_missing_statuses_array(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_get_combined_status(sha):
|
||||
return {"state": "success"} # missing 'statuses' array
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
["CI / Detect changes (pull_request)"],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(9, "def456")
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_main_exits_nonzero_when_any_status_read_fails(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
|
||||
monkeypatch.setenv("GITEA_HOST", "git.example.test")
|
||||
monkeypatch.setenv("REPO", "owner/repo")
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"list_open_prs",
|
||||
lambda limit: [
|
||||
_pr_fixture(1, "abc123"),
|
||||
_pr_fixture(2, "def456"),
|
||||
],
|
||||
)
|
||||
|
||||
def fake_combined_status(sha):
|
||||
if sha == "abc123":
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
raise mod.ApiError("simulated status fetch failure")
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
|
||||
|
||||
exit_code = mod.main()
|
||||
assert exit_code == 1
|
||||
@@ -1,360 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""umbrella-reaper — auto-recovery for stale CI umbrella statuses on PRs.
|
||||
|
||||
Tracking: molecule-core#1780.
|
||||
|
||||
Sibling to status-reaper.py (default-branch push-suffix compensation),
|
||||
but scoped to pull_request umbrellas instead of main-branch contexts.
|
||||
|
||||
What this script does, per `.gitea/workflows/umbrella-reaper.yml` invocation:
|
||||
|
||||
1. List open PRs via GET /repos/{o}/{r}/pulls?state=open&limit={N}.
|
||||
2. For EACH PR:
|
||||
- GET combined commit status for PR head SHA.
|
||||
- Look for the umbrella context (default: "CI / all-required (pull_request)").
|
||||
- If umbrella state is "failure":
|
||||
- Verify ALL required sub-job contexts are "success".
|
||||
- If yes → POST compensating success to /statuses/{sha} with the
|
||||
same umbrella context and an honest description.
|
||||
- If any required sub-job is NOT success → skip (umbrella correctly
|
||||
reflects reality; do NOT lie).
|
||||
- If umbrella state is "success" or "pending" → skip.
|
||||
3. Exit 0. Re-running is idempotent — Gitea de-dups by context.
|
||||
|
||||
What it does NOT do:
|
||||
- Touch non-umbrella contexts.
|
||||
- Compensate when ANY required sub-job is missing, pending, failure, or
|
||||
cancelled. Only the "all sub-jobs green, umbrella stale" race.
|
||||
- Merge PRs. It only posts a status; branch protection still requires
|
||||
human approval.
|
||||
- Run on closed PRs.
|
||||
|
||||
Halt conditions:
|
||||
- Missing required env vars → exit 1 with ::error:: message.
|
||||
- API 5xx on PR list → fail-loud (can't assess state).
|
||||
- API 5xx on an individual PR's status → ::warning:: + continue to next PR.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _load_required_sub_jobs_from_ci_yml(workflows_dir: str) -> list[str]:
|
||||
"""Parse ci.yml and extract the all-required sentinel's sub-job contexts.
|
||||
|
||||
Supports two shapes of the all-required job run block:
|
||||
1. Legacy Python f-string list (pre-2026-06-01):
|
||||
f"CI / Detect changes ({event})"
|
||||
2. Current shell-script shape (post-2026-06-01 scheduler fix):
|
||||
check "Detect changes" "$CHANGES_RESULT"
|
||||
|
||||
Raises RuntimeError if ci.yml is missing, has no all-required job, or the
|
||||
run block cannot be parsed.
|
||||
"""
|
||||
ci_path = Path(workflows_dir) / "ci.yml"
|
||||
if not ci_path.exists():
|
||||
raise RuntimeError(f"ci.yml not found at {ci_path}")
|
||||
|
||||
# PyYAML is installed by the workflow (same as status-reaper.py).
|
||||
import yaml
|
||||
|
||||
with ci_path.open() as f:
|
||||
doc = yaml.safe_load(f)
|
||||
|
||||
jobs = doc.get("jobs", {})
|
||||
all_required = jobs.get("all-required")
|
||||
if not isinstance(all_required, dict):
|
||||
raise RuntimeError("ci.yml missing 'all-required' job")
|
||||
|
||||
steps = all_required.get("steps", [])
|
||||
run_block = ""
|
||||
for step in steps:
|
||||
if isinstance(step, dict):
|
||||
run_text = step.get("run", "")
|
||||
if run_text:
|
||||
run_block = run_text
|
||||
break
|
||||
|
||||
if not run_block:
|
||||
raise RuntimeError("all-required job missing run block")
|
||||
|
||||
# Determine event suffix from the umbrella context we are watching.
|
||||
if UMBRELLA_CONTEXT.endswith(" (pull_request)"):
|
||||
suffix = "(pull_request)"
|
||||
elif UMBRELLA_CONTEXT.endswith(" (push)"):
|
||||
suffix = "(push)"
|
||||
else:
|
||||
m = re.search(r' \(([^)]+)\)$', UMBRELLA_CONTEXT)
|
||||
suffix = m.group(1) if m else "pull_request"
|
||||
|
||||
# Try legacy f-string format first.
|
||||
if "({event})" in run_block:
|
||||
matches = re.findall(r'f["\'](.*?\(\{event\}\))["\']', run_block)
|
||||
if matches:
|
||||
return [m.replace("({event})", suffix) for m in matches]
|
||||
|
||||
# Try current shell-script format: check "Name" "$RESULT"
|
||||
matches = re.findall(r'check\s+"([^"]+)"', run_block)
|
||||
if matches:
|
||||
return [f"CI / {name} {suffix}" for name in matches]
|
||||
|
||||
raise RuntimeError("unable to derive required sub-jobs from all-required run block")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Environment
|
||||
# --------------------------------------------------------------------------
|
||||
def _env(key: str, *, default: str = "") -> str:
|
||||
return os.environ.get(key, default)
|
||||
|
||||
|
||||
GITEA_TOKEN = _env("GITEA_TOKEN")
|
||||
GITEA_HOST = _env("GITEA_HOST")
|
||||
REPO = _env("REPO")
|
||||
DRY_RUN = _env("DRY_RUN", default="").lower() in ("1", "true", "yes")
|
||||
|
||||
# The umbrella context to watch. Must match the branch-protection name
|
||||
# exactly (Gitea de-dups by context string).
|
||||
UMBRELLA_CONTEXT = _env("UMBRELLA_CONTEXT", default="CI / all-required (pull_request)")
|
||||
|
||||
# Required sub-job contexts. The umbrella is only compensated when ALL of
|
||||
# these are "success" on the same SHA. Order does not matter.
|
||||
#
|
||||
# Derive from ci.yml at runtime to prevent drift (CR2 blocker #1).
|
||||
# The env var REQUIRED_SUB_JOBS overrides derivation for emergency
|
||||
# tuning or local testing.
|
||||
_REQUIRED_SUB_JOBS_OVERRIDE = _env("REQUIRED_SUB_JOBS")
|
||||
if _REQUIRED_SUB_JOBS_OVERRIDE:
|
||||
REQUIRED_SUB_JOBS = [
|
||||
ctx.strip()
|
||||
for ctx in _REQUIRED_SUB_JOBS_OVERRIDE.split(";")
|
||||
if ctx.strip()
|
||||
]
|
||||
else:
|
||||
try:
|
||||
REQUIRED_SUB_JOBS = _load_required_sub_jobs_from_ci_yml(".gitea/workflows")
|
||||
except Exception as exc:
|
||||
sys.stderr.write(
|
||||
f"::error::Failed to derive REQUIRED_SUB_JOBS from ci.yml: {exc}\n"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
PR_LIMIT = int(_env("PR_LIMIT", default="50"))
|
||||
|
||||
|
||||
def _require_runtime_env() -> None:
|
||||
for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO"):
|
||||
if not os.environ.get(key):
|
||||
sys.stderr.write(f"::error::missing required env var: {key}\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Tiny HTTP helper
|
||||
# --------------------------------------------------------------------------
|
||||
class ApiError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def api(
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
body: dict | None = None,
|
||||
query: dict[str, str] | None = None,
|
||||
expect_json: bool = True,
|
||||
) -> tuple[int, Any]:
|
||||
url = f"{API}{path}"
|
||||
if query:
|
||||
url = f"{url}?{urllib.parse.urlencode(query)}"
|
||||
data = None
|
||||
headers = {
|
||||
"Authorization": f"token {GITEA_TOKEN}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
headers["Content-Type"] = "application/json"
|
||||
req = urllib.request.Request(url, method=method, data=data, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
raw = resp.read()
|
||||
status = resp.status
|
||||
except urllib.error.HTTPError as e:
|
||||
raw = e.read()
|
||||
status = e.code
|
||||
|
||||
if not (200 <= status < 300):
|
||||
snippet = raw[:500].decode("utf-8", errors="replace") if raw else ""
|
||||
raise ApiError(f"{method} {path} -> HTTP {status}: {snippet}")
|
||||
|
||||
if not raw:
|
||||
return status, None
|
||||
try:
|
||||
return status, json.loads(raw)
|
||||
except json.JSONDecodeError as e:
|
||||
if expect_json:
|
||||
raise ApiError(
|
||||
f"{method} {path} -> HTTP {status} but body is not JSON: {e}"
|
||||
) from e
|
||||
return status, {"_raw": raw.decode("utf-8", errors="replace")}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Gitea reads / writes
|
||||
# --------------------------------------------------------------------------
|
||||
def list_open_prs(limit: int = 50) -> list[dict]:
|
||||
"""Paginate through all open PR pages. Fail closed on non-list responses."""
|
||||
all_prs: list[dict] = []
|
||||
page = 1
|
||||
while True:
|
||||
_, body = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/pulls",
|
||||
query={"state": "open", "limit": str(limit), "page": str(page)},
|
||||
)
|
||||
if not isinstance(body, list):
|
||||
raise ApiError(f"PR list page {page} response is not a JSON array")
|
||||
if not body:
|
||||
break
|
||||
all_prs.extend(body)
|
||||
if len(body) < limit:
|
||||
break
|
||||
page += 1
|
||||
return all_prs
|
||||
|
||||
|
||||
def get_combined_status(sha: str) -> dict:
|
||||
_, body = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
|
||||
if not isinstance(body, dict):
|
||||
raise ApiError(f"status for {sha} response is not a JSON object")
|
||||
return body
|
||||
|
||||
|
||||
def post_status(sha: str, context: str, description: str) -> None:
|
||||
payload = {
|
||||
"context": context,
|
||||
"state": "success",
|
||||
"description": description,
|
||||
}
|
||||
if DRY_RUN:
|
||||
print(f"[DRY-RUN] Would POST /statuses/{sha}: {json.dumps(payload)}")
|
||||
return
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/statuses/{sha}", body=payload)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Core logic
|
||||
# --------------------------------------------------------------------------
|
||||
def _entry_state(s: dict) -> str:
|
||||
return s.get("status") or s.get("state") or ""
|
||||
|
||||
|
||||
def process_pr(pr: dict) -> bool:
|
||||
"""Process a single PR. Returns True if the tick succeeded for this PR
|
||||
(including no-op skips), False if a compensating POST failed.
|
||||
"""
|
||||
num = pr.get("number")
|
||||
sha = pr.get("head", {}).get("sha")
|
||||
if not sha:
|
||||
print(f"::warning::PR #{num}: missing head.sha; skipping")
|
||||
return True
|
||||
|
||||
try:
|
||||
status = get_combined_status(sha)
|
||||
except ApiError as e:
|
||||
print(f"::error::PR #{num}: status fetch failed: {e}")
|
||||
return False
|
||||
|
||||
statuses = status.get("statuses")
|
||||
if not isinstance(statuses, list):
|
||||
print(f"::error::PR #{num}: combined status missing 'statuses' array")
|
||||
return False
|
||||
umbrella_entry = None
|
||||
subjob_states: dict[str, str] = {}
|
||||
|
||||
for s in statuses:
|
||||
if not isinstance(s, dict):
|
||||
continue
|
||||
ctx = s.get("context", "")
|
||||
state = _entry_state(s)
|
||||
if ctx == UMBRELLA_CONTEXT:
|
||||
umbrella_entry = s
|
||||
if ctx in REQUIRED_SUB_JOBS:
|
||||
subjob_states[ctx] = state
|
||||
|
||||
if umbrella_entry is None:
|
||||
print(f"::notice::PR #{num}: no umbrella context '{UMBRELLA_CONTEXT}'; skipping")
|
||||
return True
|
||||
|
||||
umbrella_state = _entry_state(umbrella_entry)
|
||||
if umbrella_state != "failure":
|
||||
print(f"::notice::PR #{num}: umbrella is '{umbrella_state}'; skipping")
|
||||
return True
|
||||
|
||||
# Verify ALL required sub-jobs are present and success
|
||||
missing = [ctx for ctx in REQUIRED_SUB_JOBS if ctx not in subjob_states]
|
||||
if missing:
|
||||
print(
|
||||
f"::notice::PR #{num}: umbrella=failure, but missing sub-jobs: {missing}; "
|
||||
"skipping (sub-jobs may still be running)"
|
||||
)
|
||||
return True
|
||||
|
||||
not_success = [ctx for ctx in REQUIRED_SUB_JOBS if subjob_states[ctx] != "success"]
|
||||
if not_success:
|
||||
print(
|
||||
f"::notice::PR #{num}: umbrella=failure, but sub-jobs not all success: "
|
||||
f"{[(ctx, subjob_states[ctx]) for ctx in not_success]}; skipping"
|
||||
)
|
||||
return True
|
||||
|
||||
# All checks pass — post compensating status
|
||||
desc = (
|
||||
"Compensating status: all required sub-jobs verified success; "
|
||||
"umbrella stale due to commit-status propagation race. "
|
||||
f"Auto-posted by umbrella-reaper for PR #{num}."
|
||||
)
|
||||
try:
|
||||
post_status(sha, UMBRELLA_CONTEXT, desc)
|
||||
print(f"::notice::PR #{num}: posted compensating success for {UMBRELLA_CONTEXT}")
|
||||
return True
|
||||
except ApiError as e:
|
||||
print(f"::error::PR #{num}: failed to post compensating status: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> int:
|
||||
_require_runtime_env()
|
||||
|
||||
# Drift guard: ci.yml derivation already happened at module load, but
|
||||
# we sanity-check it is non-empty so the loop below doesn't trivially
|
||||
# no-op because of a parse bug.
|
||||
if not REQUIRED_SUB_JOBS:
|
||||
sys.stderr.write("::error::REQUIRED_SUB_JOBS is empty; bailing out\n")
|
||||
return 1
|
||||
|
||||
prs = list_open_prs(limit=PR_LIMIT)
|
||||
print(f"::notice::Scanning {len(prs)} open PRs for stale umbrella statuses")
|
||||
compensated = 0
|
||||
failed = 0
|
||||
for pr in prs:
|
||||
ok = process_pr(pr)
|
||||
if not ok:
|
||||
failed += 1
|
||||
print(f"::notice::umbrella-reaper complete (failed POSTs={failed})")
|
||||
return 1 if failed else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -55,22 +55,38 @@
|
||||
|
||||
version: 1
|
||||
|
||||
# Uniform hard-fail mode (CTO 2026-06-07):
|
||||
# Every PR uses the same gate — no tier branching.
|
||||
# Missing acks → status `failure`, blocks merge via branch protection.
|
||||
# Tier-aware failure mode (RFC#351 open question 2):
|
||||
# For tier:high — hard-fail (status `failure`, blocks merge via BP).
|
||||
# For tier:medium — hard-fail (same as high; medium is non-trivial).
|
||||
# For tier:low — soft-fail (status `pending` with `acked: N/M` in the
|
||||
# description). BP can choose to require the context
|
||||
# or not for low-tier PRs.
|
||||
# If no tier label is present, default to medium (hard-fail) — every PR
|
||||
# should have a tier label per sop-tier-check, and absence indicates
|
||||
# a missing-tier defect we should surface, not silently lower the bar.
|
||||
tier_failure_mode:
|
||||
"tier:high": hard
|
||||
"tier:medium": hard
|
||||
"tier:low": soft
|
||||
default_mode: hard # used when no tier:* label is present
|
||||
|
||||
# High-risk class (RFC#450 Option C, governance-fix for internal#442).
|
||||
#
|
||||
# A PR is "high-risk" when ANY of the listed labels are applied.
|
||||
# A PR is "high-risk" when ANY of the listed labels are applied OR when
|
||||
# the PR has `tier:high` (mechanically the strictest existing tier).
|
||||
# High-risk items use `required_teams_high_risk` (when present on the
|
||||
# item); non-high-risk items use the default `required_teams`.
|
||||
#
|
||||
# Risk-classed two-eyes shape:
|
||||
# - Default class (not high-risk): a non-author engineers/managers/ceo
|
||||
# ack satisfies the item — 25+ live identities, no dependency on a
|
||||
# dead/inactive senior persona token.
|
||||
# - High-risk class (any high_risk_label): still requires a non-author
|
||||
# ceo ack (durable human team).
|
||||
# This closes the inconsistency that the SOP charter already mandates
|
||||
# `tier:high → ceo only` for the sibling `sop-tier-check` gate; the
|
||||
# sop-checklist's `root-cause` and `no-backwards-compat` items now
|
||||
# follow the same risk-classed two-eyes shape:
|
||||
# - Default class (tier:low/medium, not high-risk): a non-author
|
||||
# engineers/managers/ceo ack satisfies the item — 25+ live
|
||||
# identities, no dependency on a dead/inactive senior persona
|
||||
# token.
|
||||
# - High-risk class (tier:high OR any high_risk_label): still
|
||||
# requires a non-author ceo ack (durable human team).
|
||||
#
|
||||
# Tightening: add labels to high_risk_labels.
|
||||
# Loosening: remove labels.
|
||||
|
||||
@@ -13,14 +13,14 @@
|
||||
# the structured JSON shape is forward-compatible.
|
||||
#
|
||||
# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
|
||||
# extract pattern as sop-checklist.
|
||||
# extract pattern as sop-tier-check.
|
||||
|
||||
name: audit-force-merge
|
||||
|
||||
# pull_request_target loads from the base branch — same security model
|
||||
# as sop-checklist. Without this, an attacker could rewrite the
|
||||
# as sop-tier-check. Without this, an attacker could rewrite the
|
||||
# workflow on a PR and skip the audit emission for their own
|
||||
# force-merge. See `.gitea/workflows/sop-checklist.yml` for the full
|
||||
# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
|
||||
# rationale.
|
||||
on:
|
||||
pull_request_target:
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
ref: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Detect force-merge + emit audit event
|
||||
env:
|
||||
# Same org-level secret the sop-checklist workflow uses.
|
||||
# Same org-level secret the sop-tier-check workflow uses.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# required checks) for each branch listed here.
|
||||
#
|
||||
# Declared here rather than fetched from /branch_protections
|
||||
# because that endpoint requires admin write — sop-checklist-bot is
|
||||
# because that endpoint requires admin write — sop-tier-bot is
|
||||
# read-only by design (least-privilege).
|
||||
REQUIRED_CHECKS_JSON: |
|
||||
{
|
||||
|
||||
@@ -34,8 +34,6 @@ jobs:
|
||||
check:
|
||||
name: Block forbidden paths
|
||||
runs-on: ubuntu-latest
|
||||
# Hard gate — detected internal-path leaks fail the workflow.
|
||||
# continue-on-error removed per directive (fail-open → fail-closed).
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
name: boot-to-registration-e2e (advisory)
|
||||
|
||||
# cp#455 — Minimal-cell boot-to-registration e2e.
|
||||
# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
|
||||
# it should now go GREEN since the fix is live."
|
||||
#
|
||||
# Stage 1 of 5-stage rollout. Reuses the dispatch-only EC2
|
||||
# provisioning path from test_staging_full_saas.sh but reduced to
|
||||
# the minimum boot-to-registration surface:
|
||||
#
|
||||
# 1. Provision request accepted; workspace transitions to booting/running
|
||||
# 2. Controlplane receives /registry/register for that workspace_id
|
||||
# 3. JSON-RPC/completion route returns successful minimal response
|
||||
# 4. Teardown terminates workspace even on failure (trap)
|
||||
#
|
||||
# Advisory (non-blocking) per Researcher Stage 2 design — RED on
|
||||
# current main is expected pre-cp#469-cluster. After cp#477 deploy
|
||||
# (888efceb) + PR #2167 merge, cell should turn GREEN. THAT green
|
||||
# is the cluster-proof signal.
|
||||
#
|
||||
# Cost controls (mandatory):
|
||||
# - SPOT instances (tagged run_id/workspace_id for cost attribution)
|
||||
# - Fast teardown (~3-5 min wall-clock) even on assertion failure
|
||||
# - Structured per-cell results JSON (runtime/provider/model/
|
||||
# billing_mode/workspace_id/register_status/completion_status/
|
||||
# teardown_status/elapsed_seconds)
|
||||
#
|
||||
# Inputs:
|
||||
# runtime : default claude-code
|
||||
# billing_mode : default platform_managed (the cp#469-cluster path)
|
||||
# provider : default platform (vs direct-to-provider)
|
||||
# model : default moonshot/kimi-k2.6 (CTO-specified)
|
||||
#
|
||||
# PR target: molecule-core (this file). Companion harness extension
|
||||
# (test_minimal_boot_cell.sh) lives in tests/e2e/ alongside
|
||||
# test_staging_full_saas.sh — same repo, same branch.
|
||||
#
|
||||
# Note: cp#455 was originally spec'd to live in molecule-controlplane
|
||||
# (`.gitea/workflows/` path), but molecule-core's CI is the home for
|
||||
# tenant-boot e2e tests in this stage. Stage 2 may move the path.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# Note: Gitea 1.22.6 does not support workflow_dispatch.inputs
|
||||
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Defaults
|
||||
# are hardcoded in the job env below. Stage 2 can add matrix/
|
||||
# param support once the Gitea version supports it.
|
||||
|
||||
# Advisory: no cron schedule, manual dispatch only. Branch protection
|
||||
# doesn't require this — RED on main is expected pre-cp#469-cluster
|
||||
# deploy, GREEN signals the cluster is live.
|
||||
permissions:
|
||||
contents: read
|
||||
# No issue-write; failures surface as red runs in workflow history.
|
||||
|
||||
concurrency:
|
||||
group: boot-to-registration-e2e
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory e2e — non-gating, manual dispatch only (cp#455 Stage 1)
|
||||
minimal-cell:
|
||||
name: Minimal cell (claude-code + platform + moonshot/kimi-k2.6)
|
||||
runs-on: ubuntu-latest
|
||||
# Bounded at 12 min. Wall-clock budget breakdown:
|
||||
# - cold EC2 provision: ~3-4 min (SPOT)
|
||||
# - /registry/register wait: ~30s
|
||||
# - completion call: ~10s
|
||||
# - teardown: ~30-60s
|
||||
# - tail headroom: ~6-7 min
|
||||
timeout-minutes: 12
|
||||
env:
|
||||
# Hardcoded defaults — Gitea 1.22.6 does not support workflow_dispatch.inputs
|
||||
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Stage 2 can add
|
||||
# matrix/param support once the Gitea version supports it.
|
||||
E2E_RUNTIME: claude-code
|
||||
E2E_BILLING_MODE: platform_managed
|
||||
E2E_PROVIDER: platform
|
||||
E2E_MODEL: moonshot/kimi-k2.6
|
||||
E2E_RUN_ID: cp455-${{ github.run_id }}
|
||||
E2E_PROVISION_TIMEOUT_SECS: '300' # 5 min — fast teardown budget
|
||||
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify required secrets present
|
||||
run: |
|
||||
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — minimal-cell e2e cannot run"
|
||||
echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install required tools
|
||||
run: |
|
||||
for cmd in jq curl python3; do
|
||||
command -v "$cmd" >/dev/null 2>&1 || {
|
||||
echo "::error::required tool '$cmd' not on PATH — runner image regression?"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
|
||||
- name: Run minimal-cell boot-to-registration harness
|
||||
# The harness script handles its own teardown via EXIT trap;
|
||||
# even on assertion failure (provision timeout, register
|
||||
# timeout, completion failure), the workspace is deprovisioned
|
||||
# and a leak is reported. Exit code propagates from the script.
|
||||
# Structured per-cell results are emitted to ${GITHUB_STEP_SUMMARY}
|
||||
# so operators see pass/fail per assertion without scrolling.
|
||||
run: |
|
||||
bash tests/e2e/test_minimal_boot_cell.sh
|
||||
|
||||
- name: Emit structured per-cell results
|
||||
if: always()
|
||||
# Always run (even on failure) so the structured results are
|
||||
# visible in the workflow summary. The script writes a JSON
|
||||
# file at /tmp/cell-result.json; this step renders it as a
|
||||
# job summary.
|
||||
run: |
|
||||
if [ -f /tmp/cell-result.json ]; then
|
||||
echo "## Minimal-cell results" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```json' >> "$GITHUB_STEP_SUMMARY"
|
||||
cat /tmp/cell-result.json >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```' >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "## Minimal-cell results: NO_RESULT_FILE" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "Harness did not produce /tmp/cell-result.json — likely crashed before trap fired." >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
- name: Failure summary
|
||||
if: failure()
|
||||
run: |
|
||||
{
|
||||
echo "## cp#455 minimal-cell FAILED"
|
||||
echo ""
|
||||
echo "**Run ID:** ${{ github.run_id }}"
|
||||
echo "**Runtime:** ${E2E_RUNTIME}"
|
||||
echo "**Billing mode:** ${E2E_BILLING_MODE}"
|
||||
echo "**Provider:** ${E2E_PROVIDER}"
|
||||
echo "**Model:** ${E2E_MODEL}"
|
||||
echo "**Slug:** ${E2E_RUN_ID}"
|
||||
echo ""
|
||||
echo "### What this means"
|
||||
echo ""
|
||||
echo "The minimal claude-code+kimi cell did not pass all 4 assertions:"
|
||||
echo "1. Provision request accepted; workspace transitions to booting/running"
|
||||
echo "2. Controlplane receives /registry/register for that workspace_id"
|
||||
echo "3. JSON-RPC/completion route returns successful minimal response"
|
||||
echo "4. Teardown terminates workspace even on failure (trap)"
|
||||
echo ""
|
||||
echo "RED is expected pre-cp#469-cluster. After cp#477 deploy (888efceb) + PR #2167 merge,"
|
||||
echo "this should turn GREEN. Persistent RED after both merge = cluster bug, not e2e bug."
|
||||
echo ""
|
||||
echo "### Next steps"
|
||||
echo ""
|
||||
echo "1. Check the harness output above for the assertion that failed"
|
||||
echo "2. If assertion 1 fails: provision path broken — check CP admin API + EC2 quota"
|
||||
echo "3. If assertion 2 fails: /registry/register path broken — check workspace-server boot"
|
||||
echo "4. If assertion 3 fails: LLM proxy / completion path broken — check cp#469 cluster"
|
||||
echo "5. If assertion 4 fails: teardown trap broken — leak risk, fix immediately"
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
@@ -12,7 +12,7 @@
|
||||
# (SHA 0adf2098) per RFC internal#219 Phase 2b+c — replicate repo-by-repo.
|
||||
#
|
||||
# When any pair diverges, a `[ci-drift]` issue is opened or updated
|
||||
# (idempotent by title) and labelled `ci-bp-drift`. This is the
|
||||
# (idempotent by title) and labelled `tier:high`. This is the
|
||||
# auto-detection that closes the regression class identified in
|
||||
# RFC §1 finding 3 (protection only listed 2 of 6 real jobs for
|
||||
# ~weeks, undetected) and §6 (audit env drifts silently from
|
||||
@@ -106,7 +106,7 @@ jobs:
|
||||
AUDIT_WORKFLOW_PATH: '.gitea/workflows/audit-force-merge.yml'
|
||||
# Path to the CI workflow with the sentinel + the jobs.
|
||||
CI_WORKFLOW_PATH: '.gitea/workflows/ci.yml'
|
||||
# Issue label applied on file/update. `ci-bp-drift` exists in
|
||||
# Issue label applied on file/update. `tier:high` exists in
|
||||
# the molecule-core label set (verified 2026-05-11, label id 9).
|
||||
DRIFT_LABEL: 'ci-bp-drift'
|
||||
DRIFT_LABEL: 'tier:high'
|
||||
run: python3 .gitea/scripts/ci-required-drift.py
|
||||
|
||||
@@ -418,9 +418,10 @@ jobs:
|
||||
# a manual action that determinism made obsolete.
|
||||
name: Canvas Deploy Status
|
||||
runs-on: docker-host
|
||||
# Per-step no-op (not job-level `if:`) so the job reaches SUCCESS on PRs
|
||||
# instead of skipped — skipped poisons the PR combined status (internal#817).
|
||||
# Job-level `if:` so ci-required-drift.py's ci_job_names() detects this as
|
||||
# github.ref-gated and skips it from the required-context F1 set (mc#1982).
|
||||
# Step-level exit 0 handles the "not a canvas main push" case.
|
||||
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
|
||||
needs: [changes, canvas-build]
|
||||
steps:
|
||||
- name: Record canvas ordered-deploy status
|
||||
@@ -499,7 +500,7 @@ jobs:
|
||||
# `CI / all-required (pull_request)` per issue #1473.
|
||||
#
|
||||
# Closes the failure mode where status_check_contexts on molecule-core/main
|
||||
# only listed `Secret scan` + `sop-checklist` (the 2 meta-gates), so real
|
||||
# only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
|
||||
# `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
|
||||
# red silently merged through. See internal#286 for the three concrete
|
||||
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
|
||||
@@ -532,8 +533,9 @@ jobs:
|
||||
# The `needs:` list MUST stay in lockstep with ci-required-drift.py's
|
||||
# F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
|
||||
# whose `if:` gates on github.event_name/github.ref). canvas-deploy-
|
||||
# status is per-step-gated (not job-level `if:`) so it reaches SUCCESS
|
||||
# on PRs and is included here — internal#817. If a new always-running
|
||||
# reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
|
||||
# so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
|
||||
# skipped job would never let the sentinel run. If a new always-running
|
||||
# CI job is added, add it here too or ci-required-drift F1 will flag it.
|
||||
#
|
||||
# Stays on the dedicated `ci-meta` lane (no docker work, so the
|
||||
@@ -547,7 +549,6 @@ jobs:
|
||||
- canvas-build
|
||||
- shellcheck
|
||||
- python-lint
|
||||
- canvas-deploy-status
|
||||
continue-on-error: false
|
||||
runs-on: ci-meta
|
||||
timeout-minutes: 5
|
||||
@@ -566,7 +567,6 @@ jobs:
|
||||
CANVAS_RESULT: ${{ needs.canvas-build.result }}
|
||||
SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
|
||||
PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
|
||||
CANVAS_DEPLOY_RESULT: ${{ needs.canvas-deploy-status.result }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
fail=0
|
||||
@@ -588,7 +588,6 @@ jobs:
|
||||
check "Canvas (Next.js)" "$CANVAS_RESULT"
|
||||
check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
|
||||
check "Python Lint & Test" "$PYTHON_LINT_RESULT"
|
||||
check "Canvas Deploy Status" "$CANVAS_DEPLOY_RESULT"
|
||||
if [ "$fail" -ne 0 ]; then
|
||||
echo "::error::all-required: one or more aggregated CI jobs did not succeed"
|
||||
exit 1
|
||||
|
||||
@@ -131,9 +131,9 @@ jobs:
|
||||
# on the per-runtime default ("sonnet" → routes to direct
|
||||
# Anthropic, defeats the cost saving). Operators can override
|
||||
# via workflow_dispatch by setting a different E2E_MODEL_SLUG
|
||||
# input if they need to exercise a specific model. MiniMax-M2.7 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7' }}
|
||||
# input if they need to exercise a specific model. MiniMax-M2 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke.
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2' }}
|
||||
# Bound to 10 min so a stuck provision fails the run instead of
|
||||
# holding up the next cron firing. 15-min default in the script
|
||||
# is for the on-PR full lifecycle where we have more headroom.
|
||||
|
||||
@@ -250,20 +250,6 @@ jobs:
|
||||
echo "CANVAS_PORT=${CANVAS_PORT}" >> "$GITHUB_ENV"
|
||||
echo "Canvas host port: ${CANVAS_PORT}"
|
||||
|
||||
- name: Set deterministic admin token
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
run: |
|
||||
# PR #2291 made auth fail-closed everywhere (no dev-mode escape).
|
||||
# The platform server requires ADMIN_TOKEN; the canvas requires the
|
||||
# matching NEXT_PUBLIC_ADMIN_TOKEN or every API call 401s.
|
||||
# We set a deterministic per-run value so the ephemeral platform
|
||||
# and canvas are paired correctly.
|
||||
E2E_ADMIN_TOKEN="e2e-chat-admin-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "NEXT_PUBLIC_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "Admin token configured for e2e-chat platform + canvas."
|
||||
|
||||
- name: Start platform (background)
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
working-directory: workspace-server
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
# NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
|
||||
# queue now reads the required status contexts from BRANCH PROTECTION
|
||||
# (status_check_contexts) so non-required governance reds (qa-review,
|
||||
# security-review, sop-checklist when not branch-required,
|
||||
# security-review, sop-tier, sop-checklist when not branch-required,
|
||||
# E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
|
||||
# If branch protection cannot be enumerated the queue HOLDS
|
||||
# (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
# Forward-compat scope:
|
||||
# Today (2026-05-11) molecule-core/main protects 3 contexts:
|
||||
# - "Secret scan / Scan diff for credential-shaped strings (pull_request)"
|
||||
# - "sop-checklist / tier-check (pull_request)"
|
||||
# - "sop-tier-check / tier-check (pull_request)"
|
||||
# - "CI / all-required (pull_request)"
|
||||
# Per RFC#324 Step 2 the required-list expands to ~5 contexts
|
||||
# (qa-review, security-review added). Each new required context's
|
||||
|
||||
@@ -40,7 +40,6 @@ env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: informational lint enforcing docker-host/publish pin convention (internal#512), not a merge gate
|
||||
lint-docker-host-pin:
|
||||
name: Lint docker-host pin on docker-touching workflows
|
||||
runs-on: docker-host
|
||||
|
||||
@@ -16,7 +16,7 @@ name: Lint workflow YAML (Gitea-1.22.6-hostile shapes)
|
||||
#
|
||||
# Empirical history this hardens against:
|
||||
# - status-reaper rev1 caught rule-4 (name-collision) class
|
||||
# - sop-checklist DOA'd on rule-2 (workflow_run partial)
|
||||
# - sop-tier-refire DOA'd on rule-2 (workflow_run partial)
|
||||
# - #319 bootstrap-paradox (chained-defect class, related)
|
||||
# - internal#329 dispatcher race (adjacent)
|
||||
# - 2026-05-11 publish-runtime: rule-1, 24h PyPI freeze
|
||||
|
||||
@@ -95,10 +95,10 @@ jobs:
|
||||
# included here — staging green is a separate gate
|
||||
# (`feedback_staging_e2e_merge_gate`).
|
||||
WATCH_BRANCH: 'main'
|
||||
# Issue label applied on file/open. `ci-bp-drift` exists in the
|
||||
# Issue label applied on file/open. `tier:high` exists in the
|
||||
# molecule-core label set (verified 2026-05-11, label id 9).
|
||||
# Rationale for high: main red blocks the promotion train and
|
||||
# poisons every PR's auto-rebase base; treat as a fire even
|
||||
# if intermittent.
|
||||
RED_LABEL: 'ci-bp-drift'
|
||||
RED_LABEL: 'tier:high'
|
||||
run: python3 .gitea/scripts/main-red-watchdog.py
|
||||
|
||||
@@ -12,9 +12,9 @@
|
||||
# - `pull_request_review` types: [submitted]
|
||||
# → re-evaluate when a team member submits an APPROVE review so
|
||||
# the gate flips immediately (no wait for the next push or
|
||||
# slash-command). Verified live: sop-checklist.yml uses this
|
||||
# slash-command). Verified live: sop-tier-check.yml uses this
|
||||
# same event and provably fires (produces
|
||||
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so
|
||||
# only APPROVE reviews run the evaluator; COMMENT and
|
||||
@@ -53,7 +53,7 @@
|
||||
#
|
||||
# We MUST NOT use `github.event.comment.author_association` (the
|
||||
# field doesn't exist on Gitea 1.22.6 webhook payload — this was
|
||||
# 's defect #1).
|
||||
# sop-tier-refire's defect #1).
|
||||
#
|
||||
# A4 (no PR-head checkout under pull_request_target):
|
||||
# We check out the BASE ref explicitly so the review-check.sh script is
|
||||
@@ -73,7 +73,7 @@
|
||||
# also not in qa/security teams → also 403.
|
||||
#
|
||||
# Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
|
||||
# identity that IS in both `qa` and `security` teams (Owners-level
|
||||
# identity that IS in both `qa` and `security` teams (Owners-tier
|
||||
# claude-ceo-assistant, or a new service-bot added to both teams).
|
||||
# Provisioning of this secret is tracked as a follow-up issue (filed by
|
||||
# core-devops at PR open).
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
# A1-α addendum (internal#760): review-event trigger added so the security
|
||||
# gate flips immediately when a team member submits an APPROVE review.
|
||||
# Uses `pull_request_review` types: [submitted] — verified live via
|
||||
# sop-checklist.yml which provably fires this event (produces
|
||||
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
|
||||
# sop-tier-check.yml which provably fires this event (produces
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
|
||||
# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
# Fix (PR #1345 / issue #1280):
|
||||
# - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
|
||||
# - all-items-acked job: pull_request_target OR sop slash-command comments
|
||||
# - review-refire job: qa/security refire slash commands
|
||||
# - review-refire job: qa/security/tier refire slash commands
|
||||
# → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
|
||||
#
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-checklist security note):
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
|
||||
# `pull_request_target` (not `pull_request`) — workflow def is loaded
|
||||
# from BASE branch, so a PR cannot rewrite this workflow to exfiltrate
|
||||
# the token. The `actions/checkout` step pins `ref: base.sha` so the
|
||||
@@ -34,6 +34,14 @@
|
||||
# via a repo secret `SOP_CHECKLIST_GATE_TOKEN`. Provisioning of that
|
||||
# secret is a follow-up authorization step (separate from this PR).
|
||||
#
|
||||
# Failure mode: tier-aware (RFC#351 open question 2):
|
||||
# - tier:high → state=failure (hard-fail; BP blocks merge)
|
||||
# - tier:medium → state=failure (hard-fail; same)
|
||||
# - tier:low → state=pending (soft-fail; BP can choose to require
|
||||
# this context or skip for low-tier PRs)
|
||||
# - missing/no-tier → state=failure (default-mode: hard — never lower
|
||||
# the bar per feedback_fix_root_not_symptom)
|
||||
#
|
||||
# Slash-command contract (RFC#351 v1 + §A1.1-style notes from RFC#324):
|
||||
#
|
||||
# /sop-ack <slug-or-numeric-alias> [optional note]
|
||||
@@ -53,7 +61,7 @@
|
||||
# — declare a gate (qa-review, security-review) N/A.
|
||||
# — see sop-checklist-config.yaml n/a_gates section.
|
||||
#
|
||||
# /qa-recheck /security-recheck
|
||||
# /qa-recheck /security-recheck /refire-tier-check
|
||||
# — refire the corresponding status check on the PR head.
|
||||
#
|
||||
# The eval is read-only + idempotent (read PR + comments + team
|
||||
@@ -141,6 +149,7 @@ jobs:
|
||||
{
|
||||
echo "run_qa=false"
|
||||
echo "run_security=false"
|
||||
echo "run_tier=false"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
|
||||
case "$first_line" in
|
||||
@@ -150,6 +159,9 @@ jobs:
|
||||
/security-recheck*)
|
||||
echo "run_security=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/refire-tier-check*)
|
||||
echo "run_tier=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "::notice::no supported review refire slash command; no-op"
|
||||
;;
|
||||
@@ -158,7 +170,8 @@ jobs:
|
||||
- name: Check out BASE ref for trusted scripts
|
||||
if: |
|
||||
steps.classify.outputs.run_qa == 'true' ||
|
||||
steps.classify.outputs.run_security == 'true'
|
||||
steps.classify.outputs.run_security == 'true' ||
|
||||
steps.classify.outputs.run_tier == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
@@ -200,3 +213,13 @@ jobs:
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire sop-tier-check status
|
||||
if: steps.classify.outputs.run_tier == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
SOP_DEBUG: '0'
|
||||
run: bash .gitea/scripts/sop-tier-refire.sh
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
|
||||
#
|
||||
# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
|
||||
# from the previous inline-bash version). The script is the single source
|
||||
# of truth; this workflow file just sets env + invokes it.
|
||||
#
|
||||
# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
|
||||
# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
|
||||
# §SOP-6 PR gate enforced. Pair with branch protection on the protected
|
||||
# branch:
|
||||
# required_status_checks: ["sop-tier-check / tier-check (pull_request)"]
|
||||
# required_approving_reviews: 1
|
||||
# approving_review_teams: ["ceo", "managers", "engineers"]
|
||||
#
|
||||
# Tier → required-team expression (internal#189 AND-composition):
|
||||
# tier:low → engineers,managers,ceo (OR: any one suffices)
|
||||
# tier:medium → managers AND engineers AND qa???,security??? (AND: all required)
|
||||
# tier:high → ceo (OR: single team, wired for AND)
|
||||
#
|
||||
# "???" = teams not yet created in Gitea. When qa + security teams are
|
||||
# added, update TIER_EXPR["tier:medium"] in the script to remove the
|
||||
# markers. PRs already in-flight when qa/security are created continue
|
||||
# to work because their authors explicitly requested those reviews.
|
||||
#
|
||||
# Force-merge: Owners-team override remains available out-of-band via
|
||||
# the Gitea merge API; force-merge writes `incident.force_merge` to
|
||||
# `structure_events` per §Persistent structured logging gate (Phase 3).
|
||||
#
|
||||
# Environment variables:
|
||||
# SOP_DEBUG=1 — per-API-call diagnostic lines. Default: off.
|
||||
# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Intended for
|
||||
# emergency use only; burn-in window closed
|
||||
# 2026-05-17 (internal#189 Phase 1).
|
||||
#
|
||||
# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
|
||||
# window closed. As of 2026-06-04 the residual masks left behind by the
|
||||
# burn-in are removed for real (the comment previously claimed this while
|
||||
# the masks still persisted — that was stale):
|
||||
# - continue-on-error: true on the jq-install step (redundant; the step
|
||||
# already exits 0) and on the tier-check step (the burn-in mask).
|
||||
# - the `|| true` after the sop-tier-check.sh invocation, which masked
|
||||
# real tier-gate verdicts.
|
||||
# AND-composition is now fully enforced and the tier-check step can
|
||||
# honestly red CI on a real SOP-6 violation.
|
||||
#
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
|
||||
# REQUIRED branch-protected gate on `pull_request_target` (always
|
||||
# same-repo, secrets always present — no fork/advisory split). Failing
|
||||
# open on a token/network/jq fault greened the SOP-6 approval gate
|
||||
# WITHOUT verifying approvals — a fail-open on a required context. The
|
||||
# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
|
||||
# the gate. If you ever need to temporarily re-introduce a mask, file a
|
||||
# tracker and follow the mc#1982 protocol.
|
||||
|
||||
name: sop-tier-check
|
||||
|
||||
# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
|
||||
# `pull_request_target` loads the workflow definition from the BASE
|
||||
# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
|
||||
# with write access to a feature branch could rewrite this file in
|
||||
# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
|
||||
# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
|
||||
# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
|
||||
# is the documented mitigation.
|
||||
#
|
||||
# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
|
||||
# untrusted code is ever executed in the runner — we only HTTP-call the
|
||||
# Gitea API. If a future change adds a checkout step, it MUST pin to
|
||||
# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
|
||||
# the trust boundary.
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
||||
pull_request_review:
|
||||
types: [submitted, dismissed, edited]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tier-check:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
secrets: read
|
||||
steps:
|
||||
- name: Check out base branch (for the script)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
# Pin to base.sha — pull_request_target's protection only
|
||||
# works if we never check out PR HEAD. Same SHA the workflow
|
||||
# itself was loaded from.
|
||||
ref: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Install jq
|
||||
# Gitea Actions runners (ubuntu-latest label) do not bundle jq.
|
||||
# The sop-tier-check script uses jq for all JSON API parsing.
|
||||
# Install jq before the script runs so sop-tier-check can pass.
|
||||
#
|
||||
# Method: apt-get first (reliable for Ubuntu runners with internet
|
||||
# access to package mirrors). Falls back to GitHub binary download.
|
||||
# GitHub releases may be unreachable from some runner networks
|
||||
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense, and this step's final command
|
||||
# (`jq --version ... || echo`) already exits 0 unconditionally — so
|
||||
# the step cannot fail the job on its own.
|
||||
# continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
|
||||
# and remove, do not renew). It was redundant masking, not a gate.
|
||||
run: |
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
# reachable from runner containers. GitHub releases may be blocked
|
||||
# or slow on some networks (infra#241 follow-up).
|
||||
if apt-get update -qq && apt-get install -y -qq jq; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
else
|
||||
echo "::warning::jq install failed — apt-get and GitHub download both failed."
|
||||
fi
|
||||
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
|
||||
|
||||
- name: Verify tier label + reviewer team membership
|
||||
# continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
|
||||
# burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
|
||||
# remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
|
||||
# (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
|
||||
# faults too (see the env block below), not just on a real verdict.
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||
SOP_DEBUG: '0'
|
||||
SOP_LEGACY_CHECK: '0'
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
|
||||
#
|
||||
# This is the REQUIRED branch-protected gate
|
||||
# `sop-tier-check / tier-check (pull_request)`. It runs on
|
||||
# `pull_request_target`, which ALWAYS executes from the base
|
||||
# branch WITH secrets present — there is NO fork/advisory split
|
||||
# and no legitimate "secrets genuinely absent" degradation here.
|
||||
#
|
||||
# SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
|
||||
# token, an unreachable Gitea API, or missing jq — i.e. an AUTH
|
||||
# FAILURE or unreachable-dependency would green the SOP-6
|
||||
# approval gate WITHOUT verifying that the required teams
|
||||
# actually approved. That is a fail-open on a required gate: a
|
||||
# mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
|
||||
# merge past the approval requirement.
|
||||
#
|
||||
# Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
|
||||
# sop-tier-check.sh → every guarded `exit 0` branch instead falls
|
||||
# through to `exit 1`. Infra faults (bad token / API down / no
|
||||
# jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
|
||||
# SOP-6 violation. Fix the token/runner, not the gate.
|
||||
run: |
|
||||
bash .gitea/scripts/sop-tier-check.sh
|
||||
@@ -0,0 +1,52 @@
|
||||
# sop-tier-refire — manual fallback for sop-tier-check refire.
|
||||
#
|
||||
# Closes internal#292. Gitea 1.22.6 doesn't refire workflows on the
|
||||
# `pull_request_review` event (go-gitea/gitea#33700); the `sop-tier-check`
|
||||
# workflow's review-event subscription is silently dead. The result:
|
||||
# PRs that get their approving review AFTER the tier-check ran on open/
|
||||
# synchronize keep their failing status check forever, and the only way
|
||||
# to merge is the admin force-merge path (audited via `audit-force-merge`
|
||||
# but the audit trail keeps growing; see `feedback_never_admin_merge_bypass`).
|
||||
#
|
||||
# Comment-triggered refires now live in `review-refire-comments.yml`. Gitea
|
||||
# queues issue_comment workflows before evaluating job-level `if:`, so having
|
||||
# qa-review, security-review, sop-checklist, and sop-tier-refire all subscribe
|
||||
# to every comment caused queue storms on SOP-heavy PRs. This workflow is a
|
||||
# non-automatic breadcrumb only; Gitea 1.22.6 does not support
|
||||
# workflow_dispatch inputs, so real refires must use `/refire-tier-check`.
|
||||
#
|
||||
# SECURITY MODEL:
|
||||
#
|
||||
# 1. `pull_request` exists on the issue (issue_comment fires on issues
|
||||
# AND PRs; we only want PRs).
|
||||
# 2. `comment.author_association` must be MEMBER/OWNER/COLLABORATOR.
|
||||
# Per the internal#292 core-security review (review#1066 ask): anyone
|
||||
# can comment, but only repo collaborators+ can flip the status.
|
||||
# Without this gate, a drive-by commenter on a public-issue-tracker
|
||||
# surface could trigger a status flip.
|
||||
# 3. Comment body must contain `/refire-tier-check` — a slash-command-
|
||||
# shaped trigger (not just any comment word). Prevents accidental
|
||||
# triggering from prose like "we should refire tests" in a review.
|
||||
# 4. This workflow does NOT check out PR HEAD code. Like sop-tier-check,
|
||||
# it only HTTP-calls the Gitea API. Trust boundary preserved.
|
||||
#
|
||||
# Note: `issue_comment` fires from the BASE branch's workflow file. There
|
||||
# is no `pull_request_target` equivalent to set; the trigger inherently
|
||||
# loads the workflow from the default branch.
|
||||
#
|
||||
# Rate-limit: a 1s pre-sleep + a "skip if status posted in last 30s"
|
||||
# guard prevents comment-spam from thrashing the status. See the script.
|
||||
|
||||
name: sop-tier-check refire (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
refire:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Explain supported refire path
|
||||
run: |
|
||||
echo "::error::Gitea 1.22.6 does not support workflow_dispatch inputs here; comment /refire-tier-check on the PR instead."
|
||||
exit 1
|
||||
@@ -112,9 +112,9 @@ jobs:
|
||||
E2E_RUNTIME: claude-code
|
||||
# Pin the smoke to a specific MiniMax model rather than relying
|
||||
# on the per-runtime default (which could resolve to "sonnet" →
|
||||
# direct Anthropic and defeat the cost saving). MiniMax-M2.7 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
|
||||
E2E_MODEL_SLUG: MiniMax-M2.7
|
||||
# direct Anthropic and defeat the cost saving). MiniMax-M2 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke.
|
||||
E2E_MODEL_SLUG: MiniMax-M2
|
||||
E2E_RUN_ID: "smoke-${{ github.run_id }}"
|
||||
# Debug-only: when an operator dispatches with keep_on_failure=true,
|
||||
# the smoke script's E2E_KEEP_ORG=1 path skips teardown so the
|
||||
|
||||
@@ -34,10 +34,8 @@ name: Sweep stale Cloudflare DNS records
|
||||
# scripts/ops/test_sweep_cf_decide.py (#2027) cover the rule
|
||||
# classifier.
|
||||
#
|
||||
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
|
||||
# (operator-host canonical name) are accepted — the workflow falls back
|
||||
# automatically. Same for CF_ZONE_ID / CLOUDFLARE_ZONE_ID. Confirmed
|
||||
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
|
||||
# Secrets: CF_API_TOKEN, CF_ZONE_ID, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
|
||||
# are confirmed existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
|
||||
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
|
||||
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
|
||||
|
||||
@@ -81,8 +79,8 @@ jobs:
|
||||
# each individually capped at 10s by the script's curl -m flag.
|
||||
timeout-minutes: 3
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
|
||||
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID || secrets.CLOUDFLARE_ZONE_ID }}
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
@@ -131,7 +129,6 @@ jobs:
|
||||
fi
|
||||
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
|
||||
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
|
||||
echo "::error::Cloudflare secrets accept either the CI-scoped name (CF_API_TOKEN / CF_ZONE_ID) or the operator-host canonical name (CLOUDFLARE_API_TOKEN / CLOUDFLARE_ZONE_ID)."
|
||||
echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -29,12 +29,10 @@ name: Sweep stale Cloudflare Tunnels
|
||||
# the DNS sweep's 50% because tenant-shaped tunnels are mostly
|
||||
# orphans by design) refuses to nuke past the threshold.
|
||||
#
|
||||
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
|
||||
# (operator-host canonical name) are accepted — the workflow falls back
|
||||
# automatically. Same for CF_ACCOUNT_ID / CLOUDFLARE_ACCOUNT_ID. Confirmed
|
||||
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
|
||||
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
|
||||
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
|
||||
# Secrets: CF_API_TOKEN, CF_ACCOUNT_ID are confirmed existing per
|
||||
# issue #425 §425 audit. CP_ADMIN_API_TOKEN and CP_STAGING_ADMIN_API_TOKEN
|
||||
# are unconfirmed — if missing, the verify step (schedule → hard-fail,
|
||||
# dispatch → soft-skip) surfaces it clearly.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -76,8 +74,8 @@ jobs:
|
||||
# the sweep-cf-orphans companion job).
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
|
||||
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID || secrets.CLOUDFLARE_ACCOUNT_ID }}
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
# umbrella-reaper — auto-recovery for stale CI umbrella statuses on open PRs.
|
||||
#
|
||||
# Tracking: molecule-core#1780.
|
||||
#
|
||||
# Problem: when `CI / all-required (pull_request)` reports failure due to
|
||||
# a propagation/timing race despite all required sub-jobs being success,
|
||||
# branch protection blocks the merge. Operators currently recover manually
|
||||
# per docs/runbooks/ci-umbrella-stale-compensating-status.md.
|
||||
#
|
||||
# This workflow automates that recovery: it scans open PRs and posts a
|
||||
# compensating success status when the umbrella is stale but all sub-jobs
|
||||
# are verified green.
|
||||
#
|
||||
# Trust boundary: the script only reads PR lists + statuses and POSTs to
|
||||
# /statuses/{sha}. It never checks out PR HEAD code. The Gitea token has
|
||||
# write:repository scope for statuses only.
|
||||
#
|
||||
# Sibling: .gitea/workflows/status-reaper.yml (default-branch push-suffix
|
||||
# compensation). Same persona provisioning model.
|
||||
|
||||
name: umbrella-reaper
|
||||
|
||||
# IMPORTANT — Schedule moved to operator-config:
|
||||
# /etc/cron.d/molecule-core-umbrella-reaper ->
|
||||
# /usr/local/bin/molecule-core-cron-bot.sh umbrella-reaper
|
||||
#
|
||||
# This keeps the compensation cadence but stops a maintenance bot from
|
||||
# consuming Gitea Actions runner slots during PR merge waves.
|
||||
# Gitea 1.22.6 parser quirk per
|
||||
# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an
|
||||
# `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as
|
||||
# "unknown on type" when `workflow_dispatch.inputs.X` is present.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# NOTE: NO `concurrency:` block is intentional — same reasoning as
|
||||
# status-reaper.yml. Gitea 1.22.6 doesn't honor cancel-in-progress for
|
||||
# queued ticks; the POST is idempotent so concurrent ticks are safe.
|
||||
|
||||
jobs:
|
||||
reap:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 8
|
||||
steps:
|
||||
- name: Check out repo at default-branch HEAD
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install PyYAML
|
||||
run: python -m pip install --quiet 'PyYAML==6.0.2'
|
||||
|
||||
- name: Compensate stale PR umbrella statuses
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.UMBRELLA_REAPER_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_LIMIT: "50"
|
||||
run: python3 .gitea/scripts/umbrella-reaper.py
|
||||
@@ -26,7 +26,7 @@ name: verify-providers-gen
|
||||
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
|
||||
# from branch protection (turning it into a hard merge gate has blast radius
|
||||
# — operator GO required, same pattern as sop-checklist / verify-providers-gen
|
||||
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
|
||||
# on controlplane). Promote it into branch protection in a follow-up once
|
||||
# P2 has soaked.
|
||||
# Until then it behaves like secret-scan / block-internal-paths: a standalone
|
||||
|
||||
@@ -27,13 +27,9 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
|
||||
// 1. Create external workspace pointing at the in-process echo runtime.
|
||||
const runId = Math.random().toString(36).slice(2, 8);
|
||||
const wsName = `Chat E2E Agent ${runId}`;
|
||||
const adminToken = process.env.E2E_ADMIN_TOKEN ?? process.env.ADMIN_TOKEN;
|
||||
const createRes = await fetch(`${PLATFORM_URL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(adminToken ? { Authorization: `Bearer ${adminToken}` } : {}),
|
||||
},
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
name: wsName,
|
||||
tier: 1,
|
||||
|
||||
+31
-51
@@ -234,44 +234,30 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
"Authorization": `Bearer ${tenantToken}`,
|
||||
"X-Molecule-Org-Id": orgID,
|
||||
};
|
||||
// Retry workspace creation on transient 5xx / timeout — staging CP can
|
||||
// return 502/503/504 under load and a single-shot failure kills the
|
||||
// entire E2E run. 3 attempts with 3s exponential backoff (3s, 6s, 12s)
|
||||
// gives ~21s total budget, well inside the 20-min provision envelope.
|
||||
let workspaceId = "";
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: tenantAuth,
|
||||
body: JSON.stringify({
|
||||
name: "E2E Canvas Test",
|
||||
runtime: "hermes",
|
||||
tier: 2,
|
||||
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
|
||||
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
|
||||
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
|
||||
// platform). The old `gpt-4o` was never a registered hermes model and
|
||||
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
|
||||
// defaults closed to platform_managed (see the boot-shape note below),
|
||||
// so a platform-namespaced model id is the registry-correct choice.
|
||||
model: "moonshot/kimi-k2.6",
|
||||
}),
|
||||
});
|
||||
if (ws.status >= 200 && ws.status < 300 && ws.body?.id) {
|
||||
workspaceId = ws.body.id as string;
|
||||
break;
|
||||
}
|
||||
const isTransient = ws.status >= 500 || ws.status === 0;
|
||||
if (!isTransient || attempt === 3) {
|
||||
throw new Error(`Workspace create ${ws.status} (attempt ${attempt}): ${JSON.stringify(ws.body)}`);
|
||||
}
|
||||
const backoff = 3000 * Math.pow(2, attempt - 1);
|
||||
console.log(`[staging-setup] Workspace create transient ${ws.status}, retrying in ${backoff}ms...`);
|
||||
await new Promise((r) => setTimeout(r, backoff));
|
||||
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: tenantAuth,
|
||||
body: JSON.stringify({
|
||||
name: "E2E Canvas Test",
|
||||
runtime: "hermes",
|
||||
tier: 2,
|
||||
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
|
||||
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
|
||||
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
|
||||
// platform). The old `gpt-4o` was never a registered hermes model and
|
||||
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
|
||||
// defaults closed to platform_managed (see the boot-shape note below),
|
||||
// so a platform-namespaced model id is the registry-correct choice.
|
||||
model: "moonshot/kimi-k2.6",
|
||||
}),
|
||||
});
|
||||
if (ws.status >= 400 || !ws.body?.id) {
|
||||
throw new Error(`Workspace create ${ws.status}: ${JSON.stringify(ws.body)}`);
|
||||
}
|
||||
const workspaceId = ws.body.id as string;
|
||||
console.log(`[staging-setup] Workspace created: ${workspaceId}`);
|
||||
|
||||
// 6. Wait for workspace online
|
||||
// 6. Wait for workspace RENDERABLE.
|
||||
//
|
||||
// This harness exists to verify the canvas *tab UI* renders (staging-
|
||||
// tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
|
||||
@@ -280,16 +266,6 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
// it needs is a workspace ROW that the canvas lists so the node renders
|
||||
// and the side-panel tabs open. A fully-`online` agent is NOT required.
|
||||
//
|
||||
// Hermes cold-boot takes 10-13 min on slow apt days (apt + uv + hermes
|
||||
// install + npm browser-tools). The controlplane bootstrap-watcher
|
||||
// deadline fires at 5 min and sets status=failed prematurely; heartbeat
|
||||
// then transitions failed → online after install.sh finishes. The ONLY
|
||||
// failed shape we tolerate is the pre-start credential-abort
|
||||
// (uptime_seconds=0, no last_sample_error) — the agent never ran. Real
|
||||
// boot regressions (image pull error, panic, PYTHONPATH, etc.) still
|
||||
// hard-throw immediately so triage gets detail without waiting for a
|
||||
// polling timeout. See test_staging_full_saas.sh step 7/11 and issue #2632.
|
||||
//
|
||||
// That distinction became load-bearing on 2026-06-03: workspace-server
|
||||
// #2162 (fix(provision): platform-managed workspace must fail-closed when
|
||||
// CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
|
||||
@@ -311,10 +287,8 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
// the node + tabs render, proceed. We do NOT mask a real boot regression:
|
||||
// any `failed` carrying a last_sample_error, OR a non-zero uptime (the
|
||||
// agent started then crashed — image pull, panic, PYTHONPATH, etc.),
|
||||
// still hard-throws immediately so triage gets boot_stage / last_error /
|
||||
// image fields without waiting for a polling timeout.
|
||||
// Genuine *infra* provision failure is already caught loud one step
|
||||
// earlier at the org level (instance_status === "failed").
|
||||
// still hard-throws. Genuine *infra* provision failure is already caught
|
||||
// loud one step earlier at the org level (instance_status === "failed").
|
||||
await waitFor<boolean>(
|
||||
async () => {
|
||||
const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
|
||||
@@ -341,7 +315,13 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
);
|
||||
return true;
|
||||
}
|
||||
// Real boot regression — hard-throw immediately with full detail.
|
||||
// last_sample_error is often empty when the failure happens before
|
||||
// the agent emits a sample (e.g. boot crash, image pull error,
|
||||
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
|
||||
// body gives triage the boot_stage / last_error / image fields it
|
||||
// needs without a second probe. Otherwise this propagates as a
|
||||
// bare "Workspace failed: " — the exact useless message that
|
||||
// sent #2632 to the issue tracker.
|
||||
const detail = sampleErr
|
||||
? sampleErr
|
||||
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
|
||||
@@ -353,7 +333,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
10_000,
|
||||
"workspace online",
|
||||
);
|
||||
console.log(`[staging-setup] Workspace online`);
|
||||
console.log(`[staging-setup] Workspace renderable`);
|
||||
|
||||
// 7. Hand state off to tests + teardown — overwrite the slug-only
|
||||
// bootstrap state with the full state spec tests need.
|
||||
|
||||
@@ -370,7 +370,7 @@ test.describe("staging canvas tabs", () => {
|
||||
|
||||
// The tablist appears once the side panel mounts. Condition-based
|
||||
// wait — no fixed delay.
|
||||
const tablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
|
||||
const tablist = page.locator('[role="tablist"]');
|
||||
await expect(
|
||||
tablist,
|
||||
"side panel tablist never appeared after clicking the workspace node",
|
||||
|
||||
@@ -179,6 +179,7 @@ function Shell({
|
||||
<p className="mt-2 text-ink-mid">
|
||||
Each org is an isolated Molecule workspace.
|
||||
</p>
|
||||
<DataResidencyNotice />
|
||||
<div className="mt-8">{children}</div>
|
||||
</div>
|
||||
</TermsGate>
|
||||
@@ -219,6 +220,25 @@ function AccountBar({ session }: { session: Session }) {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// DataResidencyNotice surfaces where workspace data lives so EU-based
|
||||
// signups can make an informed choice (GDPR Art. 13 disclosure
|
||||
// requirement). Plain text, no icon — the goal is clarity, not
|
||||
// decoration. A future EU region selector can replace this with a
|
||||
// region dropdown.
|
||||
function DataResidencyNotice() {
|
||||
return (
|
||||
<p className="mt-3 rounded border border-line bg-surface-sunken/60 px-3 py-2 text-xs text-ink-mid">
|
||||
Workspaces run in AWS us-east-2 (Ohio, United States). EU region support is on the roadmap — reach out to
|
||||
{" "}
|
||||
<a href="mailto:support@moleculesai.app" className="underline">
|
||||
support@moleculesai.app
|
||||
</a>
|
||||
{" "}if you need data residency in another region today.
|
||||
</p>
|
||||
);
|
||||
}
|
||||
|
||||
function OrgRow({ org }: { org: Org }) {
|
||||
return (
|
||||
<li className="rounded-lg border border-line bg-surface-sunken p-4">
|
||||
|
||||
@@ -172,7 +172,7 @@ export function ContextMenu() {
|
||||
const nodeId = contextMenu.nodeId;
|
||||
closeContextMenu();
|
||||
try {
|
||||
await api.post(`/workspaces/${nodeId}/pause?cascade=true`, {});
|
||||
await api.post(`/workspaces/${nodeId}/pause`, {});
|
||||
updateNodeData(nodeId, { status: "paused" });
|
||||
} catch (e) {
|
||||
showToast("Pause failed", "error");
|
||||
@@ -184,7 +184,7 @@ export function ContextMenu() {
|
||||
const nodeId = contextMenu.nodeId;
|
||||
closeContextMenu();
|
||||
try {
|
||||
await api.post(`/workspaces/${nodeId}/resume?cascade=true`, {});
|
||||
await api.post(`/workspaces/${nodeId}/resume`, {});
|
||||
updateNodeData(nodeId, { status: "provisioning" });
|
||||
} catch (e) {
|
||||
showToast("Resume failed", "error");
|
||||
|
||||
@@ -60,16 +60,6 @@ const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "goog
|
||||
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
|
||||
const DEFAULT_HEADLESS_ROOT_GB = 30;
|
||||
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
|
||||
|
||||
// Per-workspace cloud/compute backend (multi-provider RFC). "aws" is the default
|
||||
// EC2 path; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner. A
|
||||
// workspace whose cloud differs from its tenant's is reached over a per-workspace
|
||||
// Cloudflare tunnel (runtime#95). Distinct from the LLM/model provider.
|
||||
const CLOUD_PROVIDER_OPTIONS = [
|
||||
{ value: "aws", label: "AWS (default)" },
|
||||
{ value: "gcp", label: "GCP" },
|
||||
{ value: "hetzner", label: "Hetzner" },
|
||||
];
|
||||
const DEFAULT_DISPLAY_ROOT_GB = 80;
|
||||
|
||||
export function CreateWorkspaceButton() {
|
||||
@@ -87,10 +77,6 @@ export function CreateWorkspaceButton() {
|
||||
const [displayInstanceType, setDisplayInstanceType] = useState(DEFAULT_DISPLAY_INSTANCE_TYPE);
|
||||
const [displayRootGB, setDisplayRootGB] = useState(String(DEFAULT_DISPLAY_ROOT_GB));
|
||||
const [displayResolution, setDisplayResolution] = useState("1920x1080");
|
||||
// Cloud/compute backend for the workspace box (multi-provider, per-workspace).
|
||||
// "aws" default; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner
|
||||
// (a non-tenant-cloud box is reached over a per-workspace tunnel, runtime#95).
|
||||
const [cloudProvider, setCloudProvider] = useState("aws");
|
||||
// Templates fetched from /api/templates — drives the dynamic provider
|
||||
// filter below. Same data source ConfigTab uses (PR #2454). When the
|
||||
// selected template declares `runtime_config.providers` in its
|
||||
@@ -280,7 +266,6 @@ export function CreateWorkspaceButton() {
|
||||
setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
|
||||
setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
|
||||
setDisplayResolution("1920x1080");
|
||||
setCloudProvider("aws");
|
||||
setExternalRuntime("external");
|
||||
setLLMSelection({ providerId: "", model: "", envVars: [] });
|
||||
setLLMSecret("");
|
||||
@@ -370,16 +355,11 @@ export function CreateWorkspaceButton() {
|
||||
width: Number.isFinite(displayWidth) ? displayWidth : 1920,
|
||||
height: Number.isFinite(displayHeight) ? displayHeight : 1080,
|
||||
},
|
||||
// Only meaningful when CP provisions the box (SaaS), where
|
||||
// the picker is shown. Omit on self-hosted so the payload is
|
||||
// unchanged there.
|
||||
...(isSaaS ? { provider: cloudProvider } : {}),
|
||||
}
|
||||
: {
|
||||
instance_type: DEFAULT_HEADLESS_INSTANCE_TYPE,
|
||||
volume: { root_gb: DEFAULT_HEADLESS_ROOT_GB },
|
||||
display: { mode: "none" },
|
||||
...(isSaaS ? { provider: cloudProvider } : {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
@@ -619,26 +599,6 @@ export function CreateWorkspaceButton() {
|
||||
<div className="mb-2 text-[11px] font-medium text-ink-mid">
|
||||
Container Config
|
||||
</div>
|
||||
{/* Cloud provider — only meaningful when CP provisions the box
|
||||
(SaaS). A non-tenant-cloud workspace is reached over a
|
||||
per-workspace Cloudflare tunnel (runtime#95). */}
|
||||
{isSaaS && (
|
||||
<label htmlFor="workspace-cloud-provider" className="mb-3 grid gap-1">
|
||||
<span className="text-xs font-medium text-ink">Cloud provider</span>
|
||||
<select
|
||||
id="workspace-cloud-provider"
|
||||
value={cloudProvider}
|
||||
onChange={(e) => setCloudProvider(e.target.value)}
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
|
||||
>
|
||||
{CLOUD_PROVIDER_OPTIONS.map((p) => (
|
||||
<option key={p.value} value={p.value}>
|
||||
{p.label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</label>
|
||||
)}
|
||||
<label className="flex items-center justify-between gap-3">
|
||||
<span className="text-xs font-medium text-ink">Display</span>
|
||||
<input
|
||||
|
||||
@@ -12,7 +12,6 @@ import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
findProviderForModel,
|
||||
isPlatformManagedProvider,
|
||||
type SelectorValue,
|
||||
} from "./ProviderModelSelector";
|
||||
|
||||
@@ -268,21 +267,10 @@ function ProviderPickerModal({
|
||||
setSelectorValue(initial);
|
||||
}, [open, initial]);
|
||||
|
||||
// #2248: filter out provisioner-injected internal tokens for platform-managed
|
||||
// providers so the user can't clobber them. Memoized so the array reference is
|
||||
// stable across renders and does not churn the entries useEffect.
|
||||
const userEditableEnvVars = useMemo(() => {
|
||||
const selectedProvider = catalog.find((p) => p.id === selectorValue.providerId);
|
||||
const isPlatformManaged = selectedProvider ? isPlatformManagedProvider(selectedProvider) : false;
|
||||
return isPlatformManaged
|
||||
? selectorValue.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
|
||||
: selectorValue.envVars;
|
||||
}, [catalog, selectorValue.providerId, selectorValue.envVars]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
setEntries(
|
||||
userEditableEnvVars.map((key) => ({
|
||||
selectorValue.envVars.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
// Pre-mark as saved when the key is already in the configured
|
||||
@@ -295,7 +283,7 @@ function ProviderPickerModal({
|
||||
);
|
||||
setOptionalEntries(
|
||||
optionalKeys
|
||||
.filter((key) => !userEditableEnvVars.includes(key))
|
||||
.filter((key) => !selectorValue.envVars.includes(key))
|
||||
.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
@@ -304,7 +292,7 @@ function ProviderPickerModal({
|
||||
error: null,
|
||||
})),
|
||||
);
|
||||
}, [open, userEditableEnvVars, configuredKeys, optionalKeys]);
|
||||
}, [open, selectorValue.envVars, configuredKeys, optionalKeys]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
|
||||
@@ -91,7 +91,6 @@ export interface RegistryModel {
|
||||
name?: string;
|
||||
provider?: string;
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
required_env?: string[];
|
||||
}
|
||||
|
||||
export interface SelectorValue {
|
||||
|
||||
@@ -385,7 +385,7 @@ describe("ContextMenu — item actions", () => {
|
||||
render(<ContextMenu />);
|
||||
fireEvent.click(screen.getByRole("menuitem", { name: /pause/i }));
|
||||
await act(async () => { /* flush */ });
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause?cascade=true", {});
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause", {});
|
||||
expect(mockStoreState.updateNodeData).toHaveBeenCalledWith("n1", { status: "paused" });
|
||||
});
|
||||
|
||||
@@ -395,7 +395,7 @@ describe("ContextMenu — item actions", () => {
|
||||
render(<ContextMenu />);
|
||||
fireEvent.click(screen.getByRole("menuitem", { name: /resume/i }));
|
||||
await act(async () => { /* flush */ });
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume?cascade=true", {});
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume", {});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// SaaS-mode coverage for the per-workspace cloud-provider picker. The main
|
||||
// CreateWorkspaceDialog.test.tsx runs non-SaaS (the picker is hidden and the
|
||||
// payload omits `provider`); this file forces SaaS by mocking isSaaSTenant so
|
||||
// the picker renders and the selected provider flows into compute.provider.
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
|
||||
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), post: vi.fn() },
|
||||
}));
|
||||
|
||||
// Force SaaS so the Cloud provider picker is shown and the payload carries it.
|
||||
vi.mock("@/lib/tenant", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("@/lib/tenant")>()),
|
||||
isSaaSTenant: () => true,
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockPost = vi.mocked(api.post);
|
||||
|
||||
const SAMPLE_TEMPLATES = [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code Agent",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
providers: ["platform", "minimax"],
|
||||
models: [{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] }],
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
if (url === "/templates") return SAMPLE_TEMPLATES as any;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return [] as any;
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPost.mockResolvedValue({} as any);
|
||||
});
|
||||
|
||||
afterEach(() => cleanup());
|
||||
|
||||
async function openDialog() {
|
||||
render(<CreateWorkspaceButton />);
|
||||
const btn = screen.getAllByRole("button").find((b) => b.textContent?.includes("New Workspace"));
|
||||
fireEvent.click(btn!);
|
||||
await waitFor(() => expect(screen.getByText("Create Workspace")).toBeTruthy());
|
||||
}
|
||||
|
||||
describe("CreateWorkspaceDialog — cloud provider (SaaS)", () => {
|
||||
it("shows the Cloud provider picker, defaulting to AWS", async () => {
|
||||
await openDialog();
|
||||
const select = screen.getByLabelText("Cloud provider") as HTMLSelectElement;
|
||||
expect(select).toBeTruthy();
|
||||
expect(select.value).toBe("aws");
|
||||
});
|
||||
|
||||
it("defaults compute.provider to aws when the picker is untouched", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "AWS Agent" } });
|
||||
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.compute).toMatchObject({ provider: "aws" });
|
||||
});
|
||||
|
||||
it("threads the selected cloud provider into compute.provider", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "GCP Agent" } });
|
||||
fireEvent.change(screen.getByLabelText("Cloud provider"), { target: { value: "gcp" } });
|
||||
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.compute).toMatchObject({ provider: "gcp" });
|
||||
});
|
||||
});
|
||||
@@ -1,175 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* Regression tests for #2248 — platform-managed provider credential suppression.
|
||||
*
|
||||
* Covers:
|
||||
* - MOLECULE_LLM_USAGE_TOKEN is hidden when the selected provider is platform-managed
|
||||
* - MOLECULE_LLM_USAGE_TOKEN is still shown for BYOK providers
|
||||
* - No render churn from unstable array references (useMemo guard)
|
||||
*/
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
|
||||
import { MissingKeysModal } from "../MissingKeysModal";
|
||||
import type { ModelSpec, ProviderChoice } from "@/lib/deploy-preflight";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), put: vi.fn() },
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/deploy-preflight", async () => {
|
||||
const actual = await vi.importActual<typeof import("@/lib/deploy-preflight")>(
|
||||
"@/lib/deploy-preflight",
|
||||
);
|
||||
return actual;
|
||||
});
|
||||
|
||||
const PLATFORM_MANAGED_MODELS: ModelSpec[] = [
|
||||
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
const BYOK_MODELS: ModelSpec[] = [
|
||||
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
function makeProviders(billingMode: "platform_managed" | "byok"): ProviderChoice[] {
|
||||
const main = {
|
||||
id: billingMode === "platform_managed" ? "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" : "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: billingMode === "platform_managed" ? "Platform Anthropic" : "BYOK Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode,
|
||||
};
|
||||
// Need ≥2 providers so MissingKeysModal enters picker mode (pickerMode = providers.length > 1).
|
||||
const dummy = {
|
||||
id: "openai|OPENAI_API_KEY",
|
||||
label: "OpenAI",
|
||||
envVars: ["OPENAI_API_KEY"],
|
||||
};
|
||||
return [main, dummy];
|
||||
}
|
||||
|
||||
describe("ProviderPickerModal — platform-managed suppression (#2248)", () => {
|
||||
afterEach(() => cleanup());
|
||||
|
||||
it("hides MOLECULE_LLM_USAGE_TOKEN when provider is platform-managed", () => {
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("platform_managed")}
|
||||
models={PLATFORM_MANAGED_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
// Only ANTHROPIC_API_KEY should be rendered; MOLECULE_LLM_USAGE_TOKEN suppressed
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
});
|
||||
|
||||
it("shows MOLECULE_LLM_USAGE_TOKEN when provider is BYOK", () => {
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("byok")}
|
||||
models={BYOK_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
// Both keys visible for BYOK
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does not churn renders when the modal is open and platform-managed", () => {
|
||||
let renderCount = 0;
|
||||
|
||||
function RenderSpy({ children }: { children: React.ReactNode }) {
|
||||
renderCount++;
|
||||
return <>{children}</>;
|
||||
}
|
||||
|
||||
render(
|
||||
<RenderSpy>
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("platform_managed")}
|
||||
models={PLATFORM_MANAGED_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>
|
||||
</RenderSpy>,
|
||||
);
|
||||
|
||||
const countAfterInitial = renderCount;
|
||||
|
||||
// Wait a tick — if useEffect were looping, renderCount would climb.
|
||||
// In jsdom without real timers there's no automatic re-render, so we
|
||||
// just assert the count is stable immediately after the single
|
||||
// commit required by the initial open state.
|
||||
expect(renderCount).toBe(countAfterInitial);
|
||||
expect(renderCount).toBeLessThanOrEqual(2); // StrictMode double-render ceiling
|
||||
});
|
||||
|
||||
it("updates suppression correctly when switching from BYOK to platform-managed", async () => {
|
||||
const providers: ProviderChoice[] = [
|
||||
{
|
||||
id: "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: "BYOK Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode: "byok",
|
||||
},
|
||||
{
|
||||
id: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: "Platform Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode: "platform_managed",
|
||||
},
|
||||
{
|
||||
id: "openai|OPENAI_API_KEY",
|
||||
label: "OpenAI",
|
||||
envVars: ["OPENAI_API_KEY"],
|
||||
},
|
||||
];
|
||||
|
||||
const models: ModelSpec[] = [
|
||||
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={providers}
|
||||
models={models}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Default selection is providers[0] (BYOK) — both keys visible
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
|
||||
|
||||
// Switch to platform-managed provider
|
||||
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
|
||||
act(() => {
|
||||
fireEvent.change(providerSelect, {
|
||||
target: { value: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" },
|
||||
});
|
||||
});
|
||||
|
||||
// MOLECULE_LLM_USAGE_TOKEN should now be suppressed
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
});
|
||||
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -13,7 +13,6 @@ import {
|
||||
buildProviderCatalog,
|
||||
buildProviderCatalogFromRegistry,
|
||||
findProviderForModel,
|
||||
isPlatformManagedProvider,
|
||||
type SelectorValue,
|
||||
type ProviderEntry,
|
||||
type RegistryProvider,
|
||||
@@ -683,9 +682,6 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
name: m.name,
|
||||
// carry the derived provider so the selector buckets correctly
|
||||
...(m.provider ? { provider: m.provider } : {}),
|
||||
// carry required_env so wasTemplateDriven can detect
|
||||
// template-driven env lists for registry-backed runtimes
|
||||
...(m.required_env ? { required_env: m.required_env } : {}),
|
||||
}))
|
||||
: availableModels,
|
||||
[registryBacked, selectedRuntime?.registryModels, availableModels],
|
||||
@@ -1021,15 +1017,6 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
// top-level model. required_env follows the selected
|
||||
// provider's envVars when the existing required_env
|
||||
// was template-driven (don't clobber user-typed envs).
|
||||
//
|
||||
// #2248: suppress provisioner-injected internal tokens
|
||||
// (MOLECULE_LLM_USAGE_TOKEN) for platform-managed providers
|
||||
// so the user can't clobber them.
|
||||
const selectedEntry = providerCatalog.find((p) => p.id === next.providerId);
|
||||
const isPlatformManaged = selectedEntry ? isPlatformManagedProvider(selectedEntry) : false;
|
||||
const filteredEnvVars = isPlatformManaged
|
||||
? next.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
|
||||
: next.envVars;
|
||||
setConfig((prev) => {
|
||||
const v = next.model;
|
||||
const prevModelId = prev.runtime_config?.model || prev.model || "";
|
||||
@@ -1042,8 +1029,8 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
prevRequired.every((e, i) => e === prevSpec.required_env![i])
|
||||
: false);
|
||||
const nextRequired =
|
||||
wasTemplateDriven
|
||||
? filteredEnvVars
|
||||
next.envVars.length > 0 && wasTemplateDriven
|
||||
? next.envVars
|
||||
: prevRequired;
|
||||
if (prev.runtime) {
|
||||
return {
|
||||
@@ -1051,7 +1038,7 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
runtime_config: {
|
||||
...prev.runtime_config,
|
||||
model: v,
|
||||
...(wasTemplateDriven
|
||||
...(next.envVars.length > 0 && wasTemplateDriven
|
||||
? { required_env: nextRequired }
|
||||
: {}),
|
||||
},
|
||||
|
||||
@@ -38,16 +38,8 @@ const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
|
||||
const dataPersistenceLabel = (v: string): string =>
|
||||
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
|
||||
|
||||
// Cloud/compute backend display name. The provider is chosen at create time and
|
||||
// is NOT editable here (changing a workspace's cloud requires a recreate), so
|
||||
// it renders as a read-only badge — but we must preserve it across Save (the
|
||||
// compute payload is rebuilt below, and dropping it would wipe the column).
|
||||
const cloudProviderLabel = (v: string | undefined): string =>
|
||||
v === "gcp" ? "GCP" : v === "hetzner" ? "Hetzner" : "AWS";
|
||||
|
||||
export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const runtime = data.runtime;
|
||||
const provider = data.compute?.provider; // read-only; set at create time
|
||||
const instanceType = data.compute?.instance_type;
|
||||
const rootGB = data.compute?.volume?.root_gb;
|
||||
const displayMode = data.compute?.display?.mode;
|
||||
@@ -102,10 +94,6 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
: { mode: "none" },
|
||||
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
|
||||
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
|
||||
// Preserve the create-time cloud provider — it's not editable here, but
|
||||
// this PATCH rebuilds the whole compute object, so omitting it would
|
||||
// wipe the persisted provider (and mislead the badge after a Save).
|
||||
...(provider ? { provider } : {}),
|
||||
};
|
||||
|
||||
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
|
||||
@@ -138,18 +126,7 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
<div className="p-4 space-y-4">
|
||||
<section className="rounded-lg border border-line/50 bg-surface-card/40 p-4">
|
||||
<div className="mb-3 flex items-center justify-between gap-3">
|
||||
<div className="flex items-center gap-2">
|
||||
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
|
||||
{/* Read-only cloud-provider badge — which cloud this workspace's box
|
||||
runs on (AWS/GCP/Hetzner). Defaults to AWS when unset (legacy
|
||||
rows). Set at create time in the Create Workspace dialog. */}
|
||||
<span
|
||||
title="Cloud provider for this workspace's compute (set at create time)"
|
||||
className="rounded-full border border-line/60 bg-surface-sunken px-2 py-0.5 font-mono text-[10px] uppercase tracking-wide text-ink-mid"
|
||||
>
|
||||
{cloudProviderLabel(provider)}
|
||||
</span>
|
||||
</div>
|
||||
<h3 className="text-sm font-semibold text-ink">Container Config</h3>
|
||||
{data.needsRestart && <span className="text-[11px] text-warm">Restart required</span>}
|
||||
</div>
|
||||
|
||||
|
||||
@@ -1,229 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression tests for #2248 — platform-managed provider credential suppression
|
||||
// in ConfigTab.
|
||||
//
|
||||
// Covers:
|
||||
// - required_env is cleared to [] when switching to a platform-managed provider
|
||||
// whose only declared env var is MOLECULE_LLM_USAGE_TOKEN (single-token case).
|
||||
// - required_env preserves non-internal tokens for BYOK providers.
|
||||
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) =>
|
||||
selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
|
||||
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
function wireApi(opts: {
|
||||
workspaceRuntime?: string;
|
||||
workspaceModel?: string;
|
||||
configYamlContent?: string | null;
|
||||
templates?: Array<{
|
||||
id: string;
|
||||
name?: string;
|
||||
runtime?: string;
|
||||
models?: unknown[];
|
||||
registry_backed?: boolean;
|
||||
registry_providers?: unknown[];
|
||||
registry_models?: unknown[];
|
||||
}>;
|
||||
}) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: opts.workspaceModel ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
if (opts.configYamlContent === null) {
|
||||
return Promise.reject(new Error("not found"));
|
||||
}
|
||||
return Promise.resolve({ content: opts.configYamlContent ?? "" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve(opts.templates ?? []);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — platform-managed credential suppression (#2248)", () => {
|
||||
it("clears required_env to [] when switching to a single-token platform-managed provider", async () => {
|
||||
// Setup: workspace currently has a BYOK provider selected with both keys.
|
||||
// The user switches to a platform-managed provider whose ONLY auth_env
|
||||
// is MOLECULE_LLM_USAGE_TOKEN. After filtering, envVars becomes [];
|
||||
// wasTemplateDriven must still overwrite required_env with [] so the
|
||||
// old MOLECULE_LLM_USAGE_TOKEN requirement does not linger.
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "byok-sonnet",
|
||||
configYamlContent: [
|
||||
"runtime: claude-code",
|
||||
"runtime_config:",
|
||||
" model: byok-sonnet",
|
||||
" required_env:",
|
||||
" - ANTHROPIC_API_KEY",
|
||||
" - MOLECULE_LLM_USAGE_TOKEN",
|
||||
].join("\n"),
|
||||
templates: [
|
||||
{
|
||||
id: "t-claude-code",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [],
|
||||
registry_backed: true,
|
||||
registry_providers: [
|
||||
{
|
||||
name: "anthropic",
|
||||
display_name: "BYOK Anthropic",
|
||||
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
{
|
||||
name: "platform",
|
||||
display_name: "Platform Anthropic",
|
||||
auth_env: ["MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "platform_managed",
|
||||
},
|
||||
],
|
||||
registry_models: [
|
||||
{ id: "byok-sonnet", provider: "anthropic", billing_mode: "byok", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
{ id: "platform-sonnet", provider: "platform", billing_mode: "platform_managed", required_env: ["MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for the provider dropdown to populate.
|
||||
const providerSelect = (await waitFor(() =>
|
||||
screen.getByTestId("provider-select"),
|
||||
)) as HTMLSelectElement;
|
||||
|
||||
// Switch from BYOK to platform-managed provider.
|
||||
const platformOption = Array.from(providerSelect.options).find((o) =>
|
||||
o.text.includes("Platform"),
|
||||
);
|
||||
expect(platformOption).toBeTruthy();
|
||||
fireEvent.change(providerSelect, { target: { value: platformOption!.value } });
|
||||
|
||||
// Save & Restart.
|
||||
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPut).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-test/files/config.yaml",
|
||||
expect.objectContaining({
|
||||
content: expect.not.stringContaining("ANTHROPIC_API_KEY"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
// Verify the specific put call no longer carries the suppressed token.
|
||||
const putCall = apiPut.mock.calls.find(
|
||||
([path]) => path === "/workspaces/ws-test/files/config.yaml",
|
||||
);
|
||||
expect(putCall?.[1].content).not.toContain("MOLECULE_LLM_USAGE_TOKEN");
|
||||
});
|
||||
|
||||
it("preserves non-internal tokens for BYOK providers", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "byok-sonnet",
|
||||
configYamlContent: [
|
||||
"runtime: claude-code",
|
||||
"runtime_config:",
|
||||
" model: byok-sonnet",
|
||||
" required_env:",
|
||||
" - ANTHROPIC_API_KEY",
|
||||
" - MOLECULE_LLM_USAGE_TOKEN",
|
||||
].join("\n"),
|
||||
templates: [
|
||||
{
|
||||
id: "t-claude-code",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [],
|
||||
registry_backed: true,
|
||||
registry_providers: [
|
||||
{
|
||||
name: "anthropic",
|
||||
display_name: "BYOK Anthropic",
|
||||
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
],
|
||||
registry_models: [
|
||||
{ id: "byok-sonnet", provider: "anthropic", billing_mode: "byok" },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
|
||||
// Wait for load.
|
||||
await waitFor(() =>
|
||||
screen.getByRole("button", { name: /save & restart/i }),
|
||||
);
|
||||
|
||||
// Click Save without changing provider — BYOK should keep both keys.
|
||||
fireEvent.click(screen.getByRole("button", { name: /save & restart/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPut).toHaveBeenCalledWith(
|
||||
"/workspaces/ws-test/files/config.yaml",
|
||||
expect.objectContaining({
|
||||
content: expect.stringContaining("required_env:"),
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
const putCall = apiPut.mock.calls.find(
|
||||
([path]) => path === "/workspaces/ws-test/files/config.yaml",
|
||||
);
|
||||
expect(putCall?.[1].content).toContain("ANTHROPIC_API_KEY");
|
||||
expect(putCall?.[1].content).toContain("MOLECULE_LLM_USAGE_TOKEN");
|
||||
});
|
||||
});
|
||||
@@ -324,7 +324,7 @@ export const useCanvasStore = create<CanvasState>((set, get) => ({
|
||||
batchPause: async () => {
|
||||
const ids = Array.from(get().selectedNodeIds);
|
||||
const results = await Promise.allSettled(
|
||||
ids.map((id) => api.post(`/workspaces/${id}/pause?cascade=true`))
|
||||
ids.map((id) => api.post(`/workspaces/${id}/pause`))
|
||||
);
|
||||
const failed: string[] = [];
|
||||
results.forEach((r, i) => {
|
||||
|
||||
@@ -371,12 +371,6 @@ export interface WorkspaceCompute {
|
||||
// internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
|
||||
// undefined (auto). Controls whether the data volume survives recreate.
|
||||
data_persistence?: string;
|
||||
// Cloud/compute backend for this workspace box (multi-provider, per-workspace):
|
||||
// "aws" (default EC2) | "gcp" | "hetzner". Distinct from the LLM/model provider.
|
||||
// Set at create time; routed by CP to the matching WorkspaceProvisioner. A
|
||||
// workspace whose provider differs from its tenant's cloud is reached over a
|
||||
// per-workspace Cloudflare tunnel (runtime#95).
|
||||
provider?: string;
|
||||
}
|
||||
|
||||
let socket: ReconnectingSocket | null = null;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
**Status:** living document — update when you ship a feature that touches one backend.
|
||||
**Owner:** workspace-server + controlplane teams.
|
||||
**Last audit:** 2026-05-31 (Claude agent — drift risk #6 verified resolved; nil guards present, contract tests run without Skip).
|
||||
**Last audit:** 2026-05-07 (plugin install/uninstall closed for EC2 backend via EIC SSH push to the bind-mounted `/configs/plugins/<name>/`, mirroring the Files API PR #1702 pattern).
|
||||
|
||||
## Why this exists
|
||||
|
||||
@@ -93,12 +93,12 @@ For "do we have any backend?", use `HasProvisioner()`, never bare `h.provisioner
|
||||
3. **Restart divergence on runtime changes.** Docker re-reads `/configs/config.yaml` from the container before stop, so a changed `runtime:` survives a restart even if the DB isn't synced. EC2 trusts the DB only. If you change the runtime via the Config tab and the handler races the restart, Docker will land on the new runtime, EC2 will land on the old one. **Fix path:** make the Config-tab save explicitly flush to DB before kicking off a restart, not deferred.
|
||||
4. **Console-output asymmetry.** Users debugging a stuck workspace on Docker see `docker logs`; on EC2 they see `GetConsoleOutput`. The two outputs look nothing alike. **Fix path:** expose a unified `GET /workspaces/:id/boot-log` that proxies to whichever backend serves the data. Already partly there via `cp_provisioner.Console`.
|
||||
5. **Template script drift.** `install.sh` and `start.sh` in each template repo do the same high-level work (install hermes-agent, write .env, write config.yaml, start gateway) but must be kept byte-level consistent on the provider-key forwarding block. Easy to forget. Enforced now by `tools/check-template-parity.sh` (see below) — run it in each template repo's CI.
|
||||
6. ~~**Both backends panic when underlying client is nil.**~~ **RESOLVED** — nil guards landed in `Provisioner` (`Start`, `Stop`, `IsRunning`, `ExecRead`, `RemoveVolume`, `VolumeHasFile`, `WriteAuthTokenToVolume`) and `CPProvisioner` (`Stop`, `IsRunning`), all returning `ErrNoBackend`. Contract tests (`TestDockerBackend_Contract`, `TestCPProvisionerBackend_Contract`, `TestZeroValuedBackends_NoPanic`) run in CI without `t.Skip`.
|
||||
6. **Both backends panic when underlying client is nil.** ✅ **Resolved** (`fix/provisioner-nil-guards-1813`). `Provisioner.{Stop,IsRunning}` and `CPProvisioner.{Stop,IsRunning}` now guard against nil clients with `ErrNoBackend`, so the contract-test runner executes scenarios against zero-valued backends without panic.
|
||||
|
||||
## Enforcement
|
||||
|
||||
- **`tools/check-template-parity.sh`** (this repo) — ensures `install.sh` and `start.sh` in a template repo forward identical sets of provider keys. Wire into each template repo's CI as `bash $MONOREPO/tools/check-template-parity.sh install.sh start.sh`.
|
||||
- **Contract tests** — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs (`TestDockerBackend_Contract`, `TestCPProvisionerBackend_Contract`, `TestZeroValuedBackends_NoPanic`) execute in CI — drift risk #6 resolved.
|
||||
- **Contract tests** — `workspace-server/internal/provisioner/backend_contract_test.go` defines the behaviors every `provisioner.Provisioner` implementation must satisfy. Fails compile when a method drifts between `Docker` and `CPProvisioner`. Scenario-level runs execute against zero-valued backends since drift risk #6 was resolved (`fix/provisioner-nil-guards-1813`).
|
||||
- **Source-level dispatcher pins** — `workspace_provision_auto_test.go` enforces the SoT pattern documented above:
|
||||
- `TestNoCallSiteCallsDirectProvisionerExceptAuto` — no handler calls `.provisionWorkspace(` or `.provisionWorkspaceCP(` directly outside the dispatcher's allowlist.
|
||||
- `TestNoCallSiteCallsBareStop` — no handler calls `.provisioner.Stop(` or `.cpProv.Stop(` directly outside the dispatcher's allowlist (strips Go comments before substring match so archaeology in code comments doesn't trip the gate).
|
||||
|
||||
@@ -19,10 +19,7 @@
|
||||
#
|
||||
# Env vars required:
|
||||
# CF_API_TOKEN — Cloudflare token with zone:dns:edit
|
||||
# (falls back to CLOUDFLARE_API_TOKEN if CF_API_TOKEN is unset;
|
||||
# the workflow YAML maps both secret names into CF_API_TOKEN)
|
||||
# CF_ZONE_ID — the zone (moleculesai.app)
|
||||
# (falls back to CLOUDFLARE_ZONE_ID if CF_ZONE_ID is unset)
|
||||
# CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
# AWS_* — standard AWS creds (default region us-east-2)
|
||||
@@ -59,12 +56,6 @@ need() {
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
# Fallback: operator-host canonical names → CI-scoped names.
|
||||
# The workflow YAML already maps both, but direct script invocation
|
||||
# (e.g. local ops) may only have the canonical names set.
|
||||
CF_API_TOKEN="${CF_API_TOKEN:-${CLOUDFLARE_API_TOKEN:-}}"
|
||||
CF_ZONE_ID="${CF_ZONE_ID:-${CLOUDFLARE_ZONE_ID:-}}"
|
||||
|
||||
need CF_API_TOKEN
|
||||
need CF_ZONE_ID
|
||||
need CP_ADMIN_API_TOKEN
|
||||
@@ -130,7 +121,7 @@ if not payload.get("success", False) or not isinstance(payload.get("result"), li
|
||||
print(f"ERROR: Cloudflare DNS list failed: {detail}", file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
'; then
|
||||
log "Cloudflare DNS list failed; verify CF_API_TOKEN (or CLOUDFLARE_API_TOKEN) has Zone:DNS:Edit and CF_ZONE_ID (or CLOUDFLARE_ZONE_ID) is the moleculesai.app zone."
|
||||
log "Cloudflare DNS list failed; verify CF_API_TOKEN has Zone:DNS:Edit and CF_ZONE_ID is the moleculesai.app zone."
|
||||
exit 1
|
||||
fi
|
||||
TOTAL_CF=$(echo "$CF_JSON" | python3 -c "import json,sys; print(len(json.load(sys.stdin)['result']))")
|
||||
|
||||
@@ -29,11 +29,8 @@
|
||||
# account:cloudflare_tunnel:edit scope.
|
||||
# (Same secret as sweep-cf-orphans, but the
|
||||
# token must include the tunnel scope.)
|
||||
# (falls back to CLOUDFLARE_API_TOKEN if CF_API_TOKEN is unset;
|
||||
# the workflow YAML maps both secret names into CF_API_TOKEN)
|
||||
# CF_ACCOUNT_ID — the account that owns the tunnels (visible
|
||||
# in dash.cloudflare.com URL path)
|
||||
# (falls back to CLOUDFLARE_ACCOUNT_ID if CF_ACCOUNT_ID is unset)
|
||||
# CP_ADMIN_API_TOKEN — CP admin bearer for api.moleculesai.app
|
||||
# CP_STAGING_ADMIN_API_TOKEN — CP admin bearer for staging-api.moleculesai.app
|
||||
#
|
||||
@@ -73,12 +70,6 @@ need() {
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
# Fallback: operator-host canonical names → CI-scoped names.
|
||||
# The workflow YAML already maps both, but direct script invocation
|
||||
# (e.g. local ops) may only have the canonical names set.
|
||||
CF_API_TOKEN="${CF_API_TOKEN:-${CLOUDFLARE_API_TOKEN:-}}"
|
||||
CF_ACCOUNT_ID="${CF_ACCOUNT_ID:-${CLOUDFLARE_ACCOUNT_ID:-}}"
|
||||
|
||||
need CF_API_TOKEN
|
||||
need CF_ACCOUNT_ID
|
||||
need CP_ADMIN_API_TOKEN
|
||||
|
||||
@@ -1,299 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# cp#455 — Minimal-cell boot-to-registration harness.
|
||||
# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
|
||||
# it should now go GREEN since the fix is live."
|
||||
#
|
||||
# Stage 1 of 5-stage rollout. Reduced to the minimum boot-to-
|
||||
# registration surface so each cell run is ~3-5 min wall-clock.
|
||||
#
|
||||
# Four assertions (per Researcher Task #79 spec):
|
||||
# 1. Provision request accepted; workspace transitions to booting/running
|
||||
# 2. Controlplane receives /registry/register for that workspace_id
|
||||
# 3. JSON-RPC/completion route returns successful minimal response
|
||||
# 4. Teardown terminates workspace even on failure (trap)
|
||||
#
|
||||
# Cost controls (mandatory):
|
||||
# - SPOT instances (via the dispatch-only EC2 provisioning path;
|
||||
# we don't set instance type — that's the provisioner's call)
|
||||
# - Fast teardown ~3-5 min wall-clock
|
||||
# - Structured per-cell results JSON output
|
||||
#
|
||||
# Auth model (mirrors test_staging_full_saas.sh):
|
||||
# Single MOLECULE_ADMIN_TOKEN drives everything.
|
||||
# - POST /cp/admin/orgs to provision
|
||||
# - GET /cp/admin/orgs/:slug/admin-token for per-tenant token
|
||||
# - DELETE /cp/admin/tenants/:slug for teardown
|
||||
# Per-tenant admin token drives tenant API calls (workspaces,
|
||||
# /registry/register, JSON-RPC completion).
|
||||
#
|
||||
# Required env:
|
||||
# MOLECULE_CP_URL default: https://staging-api.moleculesai.app
|
||||
# MOLECULE_ADMIN_TOKEN CP admin bearer
|
||||
#
|
||||
# Optional env (passed from workflow_dispatch inputs):
|
||||
# E2E_RUNTIME default claude-code
|
||||
# E2E_BILLING_MODE default platform_managed
|
||||
# E2E_PROVIDER default platform
|
||||
# E2E_MODEL default moonshot/kimi-k2.6
|
||||
# E2E_RUN_ID Slug suffix; CI: cp455-${GITHUB_RUN_ID}
|
||||
# E2E_PROVISION_TIMEOUT_SECS default 300 (5 min — fast teardown budget)
|
||||
# E2E_KEEP_ORG 1 → skip teardown (debugging only)
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 happy path
|
||||
# 1 generic failure
|
||||
# 2 missing required env
|
||||
# 3 provisioning timed out (assertion 1)
|
||||
# 4 register timeout (assertion 2)
|
||||
# 5 completion failure (assertion 3)
|
||||
# 6 teardown left orphan (assertion 4)
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
CP_URL="${MOLECULE_CP_URL:-https://staging-api.moleculesai.app}"
|
||||
ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway staging CP_ADMIN_API_TOKEN}"
|
||||
RUNTIME="${E2E_RUNTIME:-claude-code}"
|
||||
BILLING_MODE="${E2E_BILLING_MODE:-platform_managed}"
|
||||
PROVIDER="${E2E_PROVIDER:-platform}"
|
||||
MODEL="${E2E_MODEL:-moonshot/kimi-k2.6}"
|
||||
PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-300}"
|
||||
KEEP_ORG="${E2E_KEEP_ORG:-}"
|
||||
RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
|
||||
SLUG="cp455-${RUNTIME}-${RUN_ID_SUFFIX}"
|
||||
WORKSPACE_ID=""
|
||||
TENANT_TOKEN=""
|
||||
RESULT_JSON="/tmp/cell-result.json"
|
||||
PROVISION_START_EPOCH=""
|
||||
PROVISION_END_EPOCH=""
|
||||
REGISTER_STATUS="not_attempted"
|
||||
COMPLETION_STATUS="not_attempted"
|
||||
TEARDOWN_STATUS="not_attempted"
|
||||
EXIT_CODE=0
|
||||
|
||||
# Structured per-cell results writer. Emits JSON with all 4
|
||||
# assertion statuses + elapsed timing. Called from EXIT trap so
|
||||
# results are captured even on early failure.
|
||||
write_result() {
|
||||
local elapsed="${1:-0}"
|
||||
cat > "${RESULT_JSON}" <<JSON
|
||||
{
|
||||
"runtime": "${RUNTIME}",
|
||||
"billing_mode": "${BILLING_MODE}",
|
||||
"provider": "${PROVIDER}",
|
||||
"model": "${MODEL}",
|
||||
"workspace_id": "${WORKSPACE_ID}",
|
||||
"register_status": "${REGISTER_STATUS}",
|
||||
"completion_status": "${COMPLETION_STATUS}",
|
||||
"teardown_status": "${TEARDOWN_STATUS}",
|
||||
"elapsed_seconds": ${elapsed},
|
||||
"exit_code": ${EXIT_CODE},
|
||||
"ts": "$(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
}
|
||||
JSON
|
||||
}
|
||||
|
||||
# EXIT trap — ALWAYS run. Writes structured results, tears down
|
||||
# workspace if we have one, never lets the script exit without
|
||||
# emitting /tmp/cell-result.json.
|
||||
on_exit() {
|
||||
local exit_code=$?
|
||||
EXIT_CODE=${exit_code}
|
||||
local now
|
||||
now=$(date +%s)
|
||||
local elapsed=0
|
||||
if [ -n "${PROVISION_START_EPOCH:-}" ] && [ "${PROVISION_START_EPOCH}" -gt 0 ] 2>/dev/null; then
|
||||
elapsed=$(( now - PROVISION_START_EPOCH ))
|
||||
fi
|
||||
|
||||
# Assertion 4: teardown terminates workspace even on failure.
|
||||
if [ -z "${KEEP_ORG}" ] && [ -n "${SLUG:-}" ]; then
|
||||
if [ -n "${WORKSPACE_ID:-}" ] || [ -n "${SLUG:-}" ]; then
|
||||
echo "::group::Teardown (trap)"
|
||||
echo "DELETE ${CP_URL}/cp/admin/tenants/${SLUG}"
|
||||
local teardown_http_code
|
||||
teardown_http_code=$(curl -sS -o /dev/null -w '%{http_code}' \
|
||||
-X DELETE \
|
||||
-H "Authorization: Bearer ${ADMIN_TOKEN}" \
|
||||
--max-time 60 \
|
||||
"${CP_URL}/cp/admin/tenants/${SLUG}" || echo "000")
|
||||
if [ "${teardown_http_code}" = "200" ] || [ "${teardown_http_code}" = "204" ] || [ "${teardown_http_code}" = "404" ]; then
|
||||
TEARDOWN_STATUS="ok"
|
||||
echo "Teardown OK (HTTP ${teardown_http_code})"
|
||||
else
|
||||
TEARDOWN_STATUS="leak_risk_http_${teardown_http_code}"
|
||||
echo "::error::Teardown returned HTTP ${teardown_http_code} — orphan risk"
|
||||
# Bump exit code to 6 if teardown is the failure source.
|
||||
if [ "${EXIT_CODE}" -eq 0 ]; then
|
||||
EXIT_CODE=6
|
||||
fi
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
fi
|
||||
else
|
||||
TEARDOWN_STATUS="skipped_keep_org"
|
||||
fi
|
||||
|
||||
write_result "${elapsed}"
|
||||
echo "Structured results written to ${RESULT_JSON}"
|
||||
cat "${RESULT_JSON}"
|
||||
exit "${EXIT_CODE}"
|
||||
}
|
||||
trap on_exit EXIT
|
||||
trap 'echo "::error::Script aborted on signal"; exit 130' INT TERM
|
||||
|
||||
PROVISION_START_EPOCH=$(date +%s)
|
||||
|
||||
# Assertion 1: Provision request accepted; workspace transitions to
|
||||
# booting/running.
|
||||
echo "::group::Assertion 1: Provision"
|
||||
echo "POST ${CP_URL}/cp/admin/orgs slug=${SLUG} runtime=${RUNTIME} billing_mode=${BILLING_MODE} provider=${PROVIDER} model=${MODEL}"
|
||||
PROVISION_HTTP_CODE=$(curl -sS -o /tmp/provision-resp.json -w '%{http_code}' \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${ADMIN_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time 30 \
|
||||
-d "$(cat <<JSON
|
||||
{
|
||||
"slug": "${SLUG}",
|
||||
"runtime": "${RUNTIME}",
|
||||
"billing_mode": "${BILLING_MODE}",
|
||||
"provider": "${PROVIDER}",
|
||||
"model": "${MODEL}",
|
||||
"tier": "spot",
|
||||
"tags": {
|
||||
"cp455_minimal_cell": "1",
|
||||
"run_id": "${RUN_ID_SUFFIX}"
|
||||
}
|
||||
}
|
||||
JSON
|
||||
)" \
|
||||
"${CP_URL}/cp/admin/orgs" || echo "000")
|
||||
echo "HTTP ${PROVISION_HTTP_CODE}"
|
||||
if [ "${PROVISION_HTTP_CODE}" != "202" ] && [ "${PROVISION_HTTP_CODE}" != "200" ]; then
|
||||
echo "::error::Provision failed (HTTP ${PROVISION_HTTP_CODE})"
|
||||
cat /tmp/provision-resp.json 2>/dev/null || true
|
||||
EXIT_CODE=1
|
||||
exit "${EXIT_CODE}"
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
|
||||
# Wait for org to reach running + retrieve per-tenant token. Bounded
|
||||
# at PROVISION_TIMEOUT_SECS. We poll the admin token endpoint; once
|
||||
# the org is up, the endpoint returns 200 with the token, and the
|
||||
# workspace_id is in the same response or in a follow-up /orgs/:slug
|
||||
# call.
|
||||
echo "::group::Wait for org to be ready (max ${PROVISION_TIMEOUT_SECS}s)"
|
||||
WAIT_START=$(date +%s)
|
||||
WAIT_DEADLINE=$(( WAIT_START + PROVISION_TIMEOUT_SECS ))
|
||||
TENANT_TOKEN=""
|
||||
while [ "$(date +%s)" -lt "${WAIT_DEADLINE}" ]; do
|
||||
TOKEN_HTTP_CODE=$(curl -sS -o /tmp/token-resp.json -w '%{http_code}' \
|
||||
-H "Authorization: Bearer ${ADMIN_TOKEN}" \
|
||||
--max-time 10 \
|
||||
"${CP_URL}/cp/admin/orgs/${SLUG}/admin-token" || echo "000")
|
||||
if [ "${TOKEN_HTTP_CODE}" = "200" ]; then
|
||||
TENANT_TOKEN=$(jq -r '.admin_token // .token // empty' /tmp/token-resp.json 2>/dev/null || echo "")
|
||||
if [ -n "${TENANT_TOKEN}" ]; then
|
||||
WORKSPACE_ID=$(jq -r '.workspace_id // .default_workspace_id // empty' /tmp/token-resp.json 2>/dev/null || echo "")
|
||||
if [ -z "${WORKSPACE_ID}" ]; then
|
||||
# Fallback: list orgs and find by slug
|
||||
WORKSPACE_ID=$(curl -sS -H "Authorization: Bearer ${ADMIN_TOKEN}" \
|
||||
"${CP_URL}/cp/admin/orgs/${SLUG}" | jq -r '.workspace_id // .default_workspace_id // empty' 2>/dev/null || echo "")
|
||||
fi
|
||||
if [ -n "${WORKSPACE_ID}" ]; then
|
||||
PROVISION_END_EPOCH=$(date +%s)
|
||||
echo "Org ready in $(( PROVISION_END_EPOCH - WAIT_START ))s — workspace_id=${WORKSPACE_ID}"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
if [ -z "${TENANT_TOKEN}" ] || [ -z "${WORKSPACE_ID}" ]; then
|
||||
echo "::error::Provision timed out (org never reached running within ${PROVISION_TIMEOUT_SECS}s)"
|
||||
EXIT_CODE=3
|
||||
exit "${EXIT_CODE}"
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
|
||||
# Assertion 2: Controlplane receives /registry/register for that
|
||||
# workspace_id. The harness doesn't POST to /registry/register
|
||||
# directly — that's the workspace-server's own job on boot. We
|
||||
# verify the registration was received by polling the registry
|
||||
# endpoint (or by checking that a /workspaces/:id call returns
|
||||
# the expected fields).
|
||||
echo "::group::Assertion 2: /registry/register for workspace_id=${WORKSPACE_ID}"
|
||||
REGISTER_DEADLINE=$(( $(date +%s) + 60 ))
|
||||
while [ "$(date +%s)" -lt "${REGISTER_DEADLINE}" ]; do
|
||||
REG_HTTP_CODE=$(curl -sS -o /tmp/reg-resp.json -w '%{http_code}' \
|
||||
-H "Authorization: Bearer ${TENANT_TOKEN}" \
|
||||
--max-time 10 \
|
||||
"${CP_URL}/cp/registry/workspaces/${WORKSPACE_ID}" || echo "000")
|
||||
if [ "${REG_HTTP_CODE}" = "200" ]; then
|
||||
REGISTERED=$(jq -r '.registered // .workspace_id // empty' /tmp/reg-resp.json 2>/dev/null || echo "")
|
||||
if [ -n "${REGISTERED}" ]; then
|
||||
REGISTER_STATUS="ok"
|
||||
echo "Registry confirms workspace_id=${WORKSPACE_ID} registered"
|
||||
break
|
||||
fi
|
||||
fi
|
||||
sleep 3
|
||||
done
|
||||
if [ "${REGISTER_STATUS}" != "ok" ]; then
|
||||
echo "::error::Registry did not confirm registration within 60s"
|
||||
cat /tmp/reg-resp.json 2>/dev/null || true
|
||||
EXIT_CODE=4
|
||||
exit "${EXIT_CODE}"
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
|
||||
# Assertion 3: JSON-RPC/completion route returns successful minimal
|
||||
# response. One minimal completion call — keep payload small.
|
||||
echo "::group::Assertion 3: JSON-RPC completion"
|
||||
COMPLETION_HTTP_CODE=$(curl -sS -o /tmp/completion-resp.json -w '%{http_code}' \
|
||||
-X POST \
|
||||
-H "Authorization: Bearer ${TENANT_TOKEN}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--max-time 30 \
|
||||
-d "$(cat <<JSON
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "completion",
|
||||
"params": {
|
||||
"workspace_id": "${WORKSPACE_ID}",
|
||||
"model": "${MODEL}",
|
||||
"messages": [{"role": "user", "content": "ping"}],
|
||||
"max_tokens": 1
|
||||
}
|
||||
}
|
||||
JSON
|
||||
)" \
|
||||
"${CP_URL}/cp/rpc" || echo "000")
|
||||
echo "HTTP ${COMPLETION_HTTP_CODE}"
|
||||
if [ "${COMPLETION_HTTP_CODE}" != "200" ]; then
|
||||
echo "::error::Completion failed (HTTP ${COMPLETION_HTTP_CODE})"
|
||||
cat /tmp/completion-resp.json 2>/dev/null || true
|
||||
EXIT_CODE=5
|
||||
exit "${EXIT_CODE}"
|
||||
fi
|
||||
# Verify JSON-RPC 2.0 success envelope
|
||||
RPC_ERROR=$(jq -r '.error // empty' /tmp/completion-resp.json 2>/dev/null || echo "")
|
||||
if [ -n "${RPC_ERROR}" ]; then
|
||||
echo "::error::Completion returned JSON-RPC error: ${RPC_ERROR}"
|
||||
cat /tmp/completion-resp.json 2>/dev/null || true
|
||||
EXIT_CODE=5
|
||||
exit "${EXIT_CODE}"
|
||||
fi
|
||||
RPC_RESULT=$(jq -r '.result // empty' /tmp/completion-resp.json 2>/dev/null || echo "")
|
||||
if [ -z "${RPC_RESULT}" ] || [ "${RPC_RESULT}" = "null" ]; then
|
||||
echo "::error::Completion response missing result field"
|
||||
cat /tmp/completion-resp.json 2>/dev/null || true
|
||||
EXIT_CODE=5
|
||||
exit "${EXIT_CODE}"
|
||||
fi
|
||||
COMPLETION_STATUS="ok"
|
||||
echo "Completion OK"
|
||||
echo "::endgroup::"
|
||||
|
||||
echo "All 4 assertions passed for ${SLUG} (workspace_id=${WORKSPACE_ID})"
|
||||
@@ -53,9 +53,7 @@
|
||||
# PV_RUNTIMES space list; default "hermes openclaw claude-code"
|
||||
# E2E_PROVISION_TIMEOUT_SECS default 1800 (hermes/openclaw cold EC2 budget)
|
||||
# E2E_MINIMAX_API_KEY / E2E_ANTHROPIC_API_KEY / E2E_OPENAI_API_KEY
|
||||
# DEPRECATED for this script — platform-managed models
|
||||
# use the CP LLM proxy; direct vendor keys are blocked
|
||||
# by PR #2291. Kept in workflow env for other E2Es.
|
||||
# LLM provider key injected so the runtime can boot
|
||||
# PV_TOKEN_DIAGNOSTIC_ONLY
|
||||
# 1 -> stop after create/token acquisition. Useful
|
||||
# to classify Hermes-only vs shared auth-route issues.
|
||||
@@ -224,14 +222,17 @@ else
|
||||
fi
|
||||
|
||||
# ─── 4. Provision the parent + one sibling per runtime under test ──────
|
||||
# Platform-managed models: Molecule owns billing via the CP LLM proxy, so
|
||||
# the workspace needs NO tenant key. PR #2291 blocks direct vendor key writes
|
||||
# (ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN, MINIMAX_API_KEY, etc.) for
|
||||
# platform-managed workspaces. We intentionally keep SECRETS_JSON empty so a
|
||||
# stray E2E_*_API_KEY in the runner env cannot silently convert this into a
|
||||
# BYOK run and mask the platform-managed path (mirrors
|
||||
# test_staging_full_saas.sh's E2E_LLM_PATH=platform branch).
|
||||
# Inject the LLM provider key so each runtime can authenticate at boot.
|
||||
# Priority: MiniMax → direct-Anthropic → OpenAI (mirrors
|
||||
# test_staging_full_saas.sh's secrets-injection chain).
|
||||
SECRETS_JSON='{}'
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_MINIMAX_API_KEY'];print(json.dumps({'ANTHROPIC_BASE_URL':'https://api.minimax.io/anthropic','ANTHROPIC_AUTH_TOKEN':k,'MINIMAX_API_KEY':k}))")
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_ANTHROPIC_API_KEY'];print(json.dumps({'ANTHROPIC_API_KEY':k}))")
|
||||
elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_OPENAI_API_KEY'];print(json.dumps({'OPENAI_API_KEY':k,'OPENAI_BASE_URL':'https://api.openai.com/v1','MODEL_PROVIDER':'openai:gpt-4o','HERMES_INFERENCE_PROVIDER':'custom','HERMES_CUSTOM_BASE_URL':'https://api.openai.com/v1','HERMES_CUSTOM_API_KEY':k,'HERMES_CUSTOM_API_MODE':'chat_completions'}))")
|
||||
fi
|
||||
|
||||
# Workspace-create now enforces the MODEL_REQUIRED contract: there is NO
|
||||
# platform-side default model for a runtime (feedback_workspace_model_required_
|
||||
|
||||
@@ -55,7 +55,7 @@ def drift_module():
|
||||
"SENTINEL_JOB": "all-required",
|
||||
"AUDIT_WORKFLOW_PATH": ".gitea/workflows/audit-force-merge.yml",
|
||||
"CI_WORKFLOW_PATH": ".gitea/workflows/ci.yml",
|
||||
"DRIFT_LABEL": "ci-bp-drift",
|
||||
"DRIFT_LABEL": "tier:high",
|
||||
}
|
||||
with mock.patch.dict(os.environ, env, clear=False):
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
@@ -584,54 +584,6 @@ def test_find_open_issue_raises_on_transient_error(drift_module, monkeypatch):
|
||||
drift_module.find_open_issue("[ci-drift] foo")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Pagination: search beyond page 1 so an existing issue on any page is found
|
||||
# --------------------------------------------------------------------------
|
||||
def test_find_open_issue_paginates_to_page_2(drift_module, monkeypatch):
|
||||
"""Issue exists on page 2 → paginate and find it."""
|
||||
target = {"number": 99, "title": "[ci-drift] foo"}
|
||||
filler = [{"number": i, "title": f"other-{i}"} for i in range(1, 51)]
|
||||
|
||||
class PaginatedStub:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def __call__(self, method, path, *, body=None, query=None, expect_json=True):
|
||||
self.calls.append((method, path, body, query))
|
||||
page = int((query or {}).get("page", "1"))
|
||||
if page == 1:
|
||||
return 200, filler
|
||||
if page == 2:
|
||||
return 200, [target]
|
||||
return 200, []
|
||||
|
||||
stub = PaginatedStub()
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
assert drift_module.find_open_issue("[ci-drift] foo") == target
|
||||
assert len(stub.calls) == 2
|
||||
|
||||
|
||||
def test_find_open_issue_stops_at_last_page(drift_module, monkeypatch):
|
||||
"""No match across pages → stop when a page has <50 results."""
|
||||
filler = [{"number": i, "title": f"other-{i}"} for i in range(1, 51)]
|
||||
|
||||
class PaginatedStub:
|
||||
def __init__(self):
|
||||
self.calls = []
|
||||
|
||||
def __call__(self, method, path, *, body=None, query=None, expect_json=True):
|
||||
self.calls.append((method, path, body, query))
|
||||
page = int((query or {}).get("page", "1"))
|
||||
if page == 1:
|
||||
return 200, filler
|
||||
return 200, []
|
||||
|
||||
stub = PaginatedStub()
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
assert drift_module.find_open_issue("[ci-drift] foo") is None
|
||||
assert len(stub.calls) == 2
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Idempotent path: existing issue is PATCHed, NOT duplicated
|
||||
# --------------------------------------------------------------------------
|
||||
@@ -665,7 +617,7 @@ def test_file_or_update_posts_new_issue_when_none_exists(drift_module, monkeypat
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/issues"): (200, []),
|
||||
("POST", "/repos/owner/repo/issues"): (201, {"number": 99}),
|
||||
("GET", "/repos/owner/repo/labels"): (200, [{"id": 10, "name": "ci-bp-drift"}]),
|
||||
("GET", "/repos/owner/repo/labels"): (200, [{"id": 10, "name": "tier:high"}]),
|
||||
("POST", "/repos/owner/repo/issues/99/labels"): (200, []),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
|
||||
@@ -127,7 +127,7 @@ def _stub_api(monkeypatch, lint_mod, bp_response, issue_search_response=None, po
|
||||
posted_record.setdefault("patches", []).append({"path": path, "body": body})
|
||||
return ("ok", {"number": 9001})
|
||||
if "/labels" in path:
|
||||
return ("ok", [{"id": 10, "name": "ci-bp-drift"}, {"id": 9, "name": "ci-bp-drift"}])
|
||||
return ("ok", [{"id": 10, "name": "ci-bp-drift"}, {"id": 9, "name": "tier:high"}])
|
||||
return ("ok", {})
|
||||
|
||||
monkeypatch.setattr(lint_mod, "api", fake_api)
|
||||
|
||||
@@ -427,13 +427,13 @@ def test_required_workflow_with_paths_ignore_fails(
|
||||
"""Same defect class for `paths-ignore` — exit 1, named."""
|
||||
_write_workflow(
|
||||
lint_module.WORKFLOWS_DIR,
|
||||
"sop-checklist.yml",
|
||||
"name: sop-checklist\n"
|
||||
"sop-tier-check.yml",
|
||||
"name: sop-tier-check\n"
|
||||
"on:\n"
|
||||
" pull_request_target:\n"
|
||||
" paths-ignore: ['docs/**']\n"
|
||||
"jobs:\n"
|
||||
" all-items-acked:\n"
|
||||
" tier-check:\n"
|
||||
" runs-on: ubuntu-latest\n",
|
||||
)
|
||||
stub = _make_stub_api({
|
||||
@@ -441,7 +441,7 @@ def test_required_workflow_with_paths_ignore_fails(
|
||||
200,
|
||||
{
|
||||
"status_check_contexts": [
|
||||
"sop-checklist / all-items-acked (pull_request_target)"
|
||||
"sop-tier-check / tier-check (pull_request_target)"
|
||||
]
|
||||
},
|
||||
),
|
||||
@@ -450,7 +450,7 @@ def test_required_workflow_with_paths_ignore_fails(
|
||||
rc = lint_module.run()
|
||||
assert rc == 1
|
||||
out = capsys.readouterr().out
|
||||
assert "sop-checklist.yml" in out
|
||||
assert "sop-tier-check.yml" in out
|
||||
assert "paths-ignore" in out
|
||||
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ def wd_module():
|
||||
"GITEA_HOST": "git.example.test",
|
||||
"REPO": "owner/repo",
|
||||
"WATCH_BRANCH": "main",
|
||||
"RED_LABEL": "ci-bp-drift",
|
||||
"RED_LABEL": "tier:high",
|
||||
}
|
||||
with mock.patch.dict(os.environ, env, clear=False):
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
@@ -463,7 +463,7 @@ def test_red_detected_opens_issue(wd_module, monkeypatch):
|
||||
("GET", "/repos/owner/repo/issues"): (200, []), # no existing issue
|
||||
("POST", "/repos/owner/repo/issues"): (201, {"number": 555}),
|
||||
("GET", "/repos/owner/repo/labels"): (
|
||||
200, [{"id": 9, "name": "ci-bp-drift"}],
|
||||
200, [{"id": 9, "name": "tier:high"}],
|
||||
),
|
||||
("POST", "/repos/owner/repo/issues/555/labels"): (200, []),
|
||||
})
|
||||
@@ -1063,7 +1063,7 @@ def test_head_recheck_files_when_still_red_after_settling(
|
||||
if method == "GET" and path == "/repos/owner/repo/issues":
|
||||
return (200, [])
|
||||
if method == "GET" and path == "/repos/owner/repo/labels":
|
||||
return (200, [{"id": 9, "name": "ci-bp-drift"}])
|
||||
return (200, [{"id": 9, "name": "tier:high"}])
|
||||
if method == "POST" and path == "/repos/owner/repo/issues":
|
||||
post_filed["value"] = True
|
||||
return (201, {"number": 999})
|
||||
|
||||
@@ -35,7 +35,7 @@ GITEA_TOKEN = os.environ.get("GITEA_TOKEN", os.environ.get("GITHUB_TOKEN", ""))
|
||||
API_BASE = f"https://{GITEA_HOST}/api/v1"
|
||||
|
||||
# Timeout in seconds for all HTTP calls. Defence-in-depth: ensures a missing or
|
||||
# invalid GITEA_TOKEN causes a fast (~15 s) failure rather than an
|
||||
# invalid SOP_TIER_CHECK_TOKEN causes a fast (~15 s) failure rather than an
|
||||
# indefinite hang. The real fix is provisioning the token; this caps worst-case
|
||||
# wall-clock on a broken/unreachable Gitea host.
|
||||
DEFAULT_TIMEOUT = 15
|
||||
@@ -116,27 +116,45 @@ LOGIN_ALIASES = {
|
||||
"infra-sre": "core-devops",
|
||||
}
|
||||
|
||||
# SOP-6 tier → required agent groups
|
||||
# tier:low → engineers,managers,ceo (OR: any one suffices)
|
||||
# tier:medium → managers AND engineers AND qa,security (AND)
|
||||
# tier:high → ceo (OR, but single)
|
||||
# "?" = teams not yet created; treated as optional for MVP
|
||||
TIER_AGENTS = {
|
||||
"tier:low": {"managers": "core-lead", "engineers": "core-devops", "ceo": "ceo"},
|
||||
"tier:medium": {"managers": "core-lead", "engineers": "core-devops", "qa": "core-qa", "security": "core-security"},
|
||||
"tier:high": {"ceo": "ceo"},
|
||||
}
|
||||
|
||||
POSITIVE_VERDICTS = {"APPROVED", "N/A", "ACK"}
|
||||
|
||||
# Uniform required-agent set (SOP-6 tier removal, CTO 2026-06-07).
|
||||
# ALL of the following must APPROVE (AND gate, strict).
|
||||
REQUIRED_AGENTS = {
|
||||
"managers": "core-lead",
|
||||
"engineers": "core-devops",
|
||||
"qa": "core-qa",
|
||||
"security": "core-security",
|
||||
}
|
||||
|
||||
def _get_pr_tier(pr_number: int, repo: str) -> str:
|
||||
"""Get the PR's tier label."""
|
||||
owner, name = repo.split("/", 1)
|
||||
try:
|
||||
pr = api_get(f"/repos/{owner}/{name}/pulls/{pr_number}")
|
||||
for label in pr.get("labels", []):
|
||||
name_l = label.get("name", "")
|
||||
if name_l in TIER_AGENTS:
|
||||
return name_l
|
||||
except GiteaError:
|
||||
pass
|
||||
return "tier:low" # Default for untagged PRs
|
||||
|
||||
|
||||
def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
|
||||
"""
|
||||
Scan issue + PR comments AND reviews for agent-tag policy gates.
|
||||
Matches tag AND author. All REQUIRED_AGENTS must positively ACK.
|
||||
Matches tag AND author. Filters to tier-relevant agents.
|
||||
Returns: {signal, results, verdict}
|
||||
"""
|
||||
owner, name = repo.split("/", 1)
|
||||
|
||||
relevant_roles = REQUIRED_AGENTS
|
||||
# Get tier label to determine relevant agents
|
||||
tier = _get_pr_tier(pr_number, repo)
|
||||
relevant_roles = TIER_AGENTS.get(tier, TIER_AGENTS["tier:low"])
|
||||
|
||||
# Build reverse map: login -> (group, agent_key)
|
||||
login_to_group = {}
|
||||
@@ -203,22 +221,35 @@ def signal_1_comment_scan(pr_number: int, repo: str) -> dict:
|
||||
latest = max(matches, key=lambda x: x["created_at"], default=None) if matches else None
|
||||
findings[agent_key] = {
|
||||
"group": group,
|
||||
"tier": tier,
|
||||
"found": latest,
|
||||
"verdict": latest["verdict"] if latest else "MISSING",
|
||||
}
|
||||
|
||||
# Uniform AND gate: ALL required agents must be positive.
|
||||
# Compute gate verdict using tier-specific logic:
|
||||
# - tier:low / tier:high (OR gate): ANY positive = CLEAR, ANY negative = BLOCKED
|
||||
# - tier:medium (AND gate): ALL must be positive = CLEAR, ANY negative = BLOCKED
|
||||
verdicts = [f["verdict"] for f in findings.values()]
|
||||
if not verdicts:
|
||||
gate_verdict = "N/A"
|
||||
elif all(v in POSITIVE_VERDICTS for v in verdicts):
|
||||
gate_verdict = "CLEAR"
|
||||
elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
|
||||
gate_verdict = "BLOCKED"
|
||||
elif tier in ("tier:low", "tier:high"):
|
||||
# OR gate: one positive is enough
|
||||
if any(v in POSITIVE_VERDICTS for v in verdicts):
|
||||
gate_verdict = "CLEAR"
|
||||
elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
|
||||
gate_verdict = "BLOCKED"
|
||||
else:
|
||||
gate_verdict = "INCOMPLETE"
|
||||
else:
|
||||
gate_verdict = "INCOMPLETE"
|
||||
# AND gate (tier:medium): all must be positive
|
||||
if all(v in POSITIVE_VERDICTS for v in verdicts):
|
||||
gate_verdict = "CLEAR"
|
||||
elif any(v in ("BLOCKED", "CHANGES_REQUESTED", "COMMENT") for v in verdicts):
|
||||
gate_verdict = "BLOCKED"
|
||||
else:
|
||||
gate_verdict = "INCOMPLETE"
|
||||
|
||||
return {"signal": "agent_tag_comments", "results": findings, "verdict": gate_verdict}
|
||||
return {"signal": "agent_tag_comments", "results": findings, "verdict": gate_verdict, "tier": tier}
|
||||
|
||||
|
||||
# ── Signal 2: REQUEST_CHANGES reviews state machine ────────────────────────────
|
||||
@@ -473,7 +504,6 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
|
||||
failing_required = []
|
||||
passing_required = []
|
||||
pending_required = []
|
||||
for ctx in required_checks:
|
||||
state = check_statuses.get(ctx, "null")
|
||||
if state == "failure":
|
||||
@@ -481,7 +511,7 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
elif state in ("success", "neutral"):
|
||||
passing_required.append(ctx)
|
||||
else:
|
||||
pending_required.append(ctx)
|
||||
passing_required.append(f"{ctx} (pending)")
|
||||
|
||||
# NOTE: do NOT use ci_state (combined_state) as a fallback verdict driver.
|
||||
# The combined_state is computed over ALL statuses including this
|
||||
@@ -489,14 +519,12 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
# self-referential loop: gate-check posts failure → combined_state
|
||||
# becomes failure → script re-blocks → posts failure again.
|
||||
# The check_statuses dict already excludes gate-check (Bug-1 fix from
|
||||
# PR #547).
|
||||
#
|
||||
# Fail-closed: any required check that is missing, pending, or failing
|
||||
# blocks the gate. Only return CLEAR when every required check is
|
||||
# explicitly success/neutral.
|
||||
# PR #547). Use failing_required as the sole CI gate; if no required
|
||||
# checks are defined on the branch, return CLEAR rather than re-using
|
||||
# the combined_state which includes our own status.
|
||||
if failing_required:
|
||||
verdict = "CI_FAIL"
|
||||
elif pending_required:
|
||||
elif ci_state == "pending":
|
||||
verdict = "CI_PENDING"
|
||||
else:
|
||||
verdict = "CLEAR"
|
||||
@@ -507,7 +535,6 @@ def signal_6_ci(pr_number: int, repo: str, branch: str | None = None, pr_data: d
|
||||
"required_checks": required_checks,
|
||||
"failing_required": failing_required,
|
||||
"passing_required": passing_required,
|
||||
"pending_required": pending_required,
|
||||
"all_check_statuses": check_statuses,
|
||||
"verdict": verdict,
|
||||
}
|
||||
|
||||
@@ -39,11 +39,11 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
|
||||
mod = load_gate_check()
|
||||
|
||||
def fake_api_get(path):
|
||||
# PR 900 has area:ci label
|
||||
# PR 900 has tier:low label
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/900":
|
||||
return {
|
||||
"number": 900,
|
||||
"labels": [{"name": "area:ci"}],
|
||||
"labels": [{"name": "tier:low"}],
|
||||
}
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
@@ -59,25 +59,7 @@ def test_signal_1_infra_sre_login_alias_resolved_to_core_devops(monkeypatch):
|
||||
"user": {"login": "infra-sre"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:00Z",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"user": {"login": "core-lead"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:01Z",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"user": {"login": "core-qa"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:02Z",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"user": {"login": "core-security"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:00:03Z",
|
||||
},
|
||||
}
|
||||
]
|
||||
raise AssertionError(f"unexpected api_list: {path}")
|
||||
|
||||
@@ -103,7 +85,7 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
if path == "/repos/molecule-ai/molecule-core/pulls/901":
|
||||
return {
|
||||
"number": 901,
|
||||
"labels": [{"name": "area:ci"}],
|
||||
"labels": [{"name": "tier:low"}],
|
||||
}
|
||||
raise AssertionError(f"unexpected api_get: {path}")
|
||||
|
||||
@@ -126,24 +108,6 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:00Z",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"user": {"login": "core-lead"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:01Z",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"user": {"login": "core-qa"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:02Z",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"user": {"login": "core-security"},
|
||||
"state": "APPROVED",
|
||||
"submitted_at": "2026-05-13T10:01:03Z",
|
||||
},
|
||||
]
|
||||
raise AssertionError(f"unexpected api_list: {path}")
|
||||
|
||||
@@ -152,7 +116,7 @@ def test_signal_1_null_user_in_review_does_not_crash(monkeypatch):
|
||||
|
||||
result = mod.signal_1_comment_scan(901, "molecule-ai/molecule-core")
|
||||
|
||||
# Should not crash; all required gates clear
|
||||
# Should not crash; the valid review from core-devops still satisfies engineers gate
|
||||
assert result["verdict"] == "CLEAR"
|
||||
assert result["results"]["core-devops"]["verdict"] == "APPROVED"
|
||||
|
||||
|
||||
@@ -351,17 +351,8 @@ func main() {
|
||||
// (true, err) on any transient error, so a CP blip never flips a healthy
|
||||
// workspace.
|
||||
if cpProv != nil {
|
||||
// Guard against double-reprovision thrash (internal#544): the restart
|
||||
// debounce window must cover the reconciler interval so a workspace
|
||||
// flipped offline by one reconcile tick isn't immediately reprovisioned
|
||||
// again by the next tick before the debounce drops it. If the interval
|
||||
// ever shrinks below the debounce window, the coupling silently breaks.
|
||||
reconcileInterval := 60 * time.Second
|
||||
if handlers.RestartDebounceWindow < reconcileInterval {
|
||||
log.Fatalf("RestartDebounceWindow (%s) must be >= CP instance reconciler interval (%s) to prevent double-reprovision thrash (internal#544)", handlers.RestartDebounceWindow, reconcileInterval)
|
||||
}
|
||||
go supervised.RunWithRecover(ctx, "cp-instance-reconciler", func(c context.Context) {
|
||||
registry.StartCPInstanceReconciler(c, cpProv, onWorkspaceOffline, reconcileInterval)
|
||||
registry.StartCPInstanceReconciler(c, cpProv, onWorkspaceOffline, 60*time.Second)
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -271,11 +271,6 @@ func (m *Manager) Reload(ctx context.Context) {
|
||||
ch.Config["_channel_id"] = ch.ID
|
||||
|
||||
go func(a ChannelAdapter, c ChannelRow, pCtx context.Context) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Printf("PANIC recovered in channel polling goroutine: %v", r)
|
||||
}
|
||||
}()
|
||||
if err := a.StartPolling(pCtx, c.Config, m.onInboundMessage); err != nil {
|
||||
log.Printf("Channels: polling error for %s/%s: %v", c.ChannelType, truncID(c.ID), err)
|
||||
}
|
||||
@@ -359,11 +354,6 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
|
||||
typingCtx, typingCancel := context.WithCancel(fireCtx)
|
||||
defer typingCancel()
|
||||
go func() {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Printf("PANIC recovered in typing indicator goroutine: %v", r)
|
||||
}
|
||||
}()
|
||||
typer.SendTyping(ch.Config, msg.ChatID)
|
||||
ticker := time.NewTicker(4 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
@@ -142,7 +142,7 @@ func ghcrAuthHeader() string {
|
||||
log.Printf("workspace-images: failed to marshal GHCR auth: %v", err)
|
||||
return ""
|
||||
}
|
||||
return base64.StdEncoding.EncodeToString(js)
|
||||
return base64.URLEncoding.EncodeToString(js)
|
||||
}
|
||||
|
||||
// Refresh pulls the requested runtimes' template images from GHCR and (if
|
||||
|
||||
@@ -47,9 +47,9 @@ func TestGHCRAuthHeader_EncodesDockerEnginePayload(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty auth header")
|
||||
}
|
||||
raw, err := base64.StdEncoding.DecodeString(got)
|
||||
raw, err := base64.URLEncoding.DecodeString(got)
|
||||
if err != nil {
|
||||
t.Fatalf("auth header is not valid base64: %v", err)
|
||||
t.Fatalf("auth header is not valid base64-url: %v", err)
|
||||
}
|
||||
var payload map[string]string
|
||||
if err := json.Unmarshal(raw, &payload); err != nil {
|
||||
@@ -80,9 +80,9 @@ func TestGHCRAuthHeader_RespectsRegistryEnv(t *testing.T) {
|
||||
if got == "" {
|
||||
t.Fatal("expected non-empty auth header")
|
||||
}
|
||||
raw, err := base64.StdEncoding.DecodeString(got)
|
||||
raw, err := base64.URLEncoding.DecodeString(got)
|
||||
if err != nil {
|
||||
t.Fatalf("auth header is not valid base64: %v", err)
|
||||
t.Fatalf("auth header is not valid base64-url: %v", err)
|
||||
}
|
||||
var payload map[string]string
|
||||
if err := json.Unmarshal(raw, &payload); err != nil {
|
||||
@@ -220,7 +220,7 @@ func TestGHCRAuthHeader_TrimsWhitespace(t *testing.T) {
|
||||
t.Setenv("GHCR_USER", " alice ")
|
||||
t.Setenv("GHCR_TOKEN", "\tfake-tok-value\n")
|
||||
got := ghcrAuthHeader()
|
||||
raw, _ := base64.StdEncoding.DecodeString(got)
|
||||
raw, _ := base64.URLEncoding.DecodeString(got)
|
||||
var payload map[string]string
|
||||
_ = json.Unmarshal(raw, &payload)
|
||||
if payload["username"] != "alice" {
|
||||
|
||||
@@ -29,7 +29,6 @@ import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
@@ -51,7 +50,7 @@ func approvalRequestHash(workspaceID, action string, contextMap map[string]inter
|
||||
// requireApproval returns (approved=true, consumedID) when a matching approval
|
||||
// exists and was just consumed; otherwise it creates/reuses a pending approval
|
||||
// and returns (false, pendingID). A non-nil error is a server error.
|
||||
func requireApproval(ctx context.Context, b events.EventEmitter, workspaceID string, action approvals.Action, reason string, contextMap map[string]interface{}) (bool, string, error) {
|
||||
func requireApproval(ctx context.Context, b *events.Broadcaster, workspaceID string, action approvals.Action, reason string, contextMap map[string]interface{}) (bool, string, error) {
|
||||
hash := approvalRequestHash(workspaceID, string(action), contextMap)
|
||||
|
||||
// 1. Atomically consume an approved + unconsumed request, if one exists.
|
||||
@@ -104,25 +103,18 @@ func requireApproval(ctx context.Context, b events.EventEmitter, workspaceID str
|
||||
// Broadcast to the canvas (the user-facing signal). For a platform agent the
|
||||
// parent_id is NULL, so the requested-event on its own workspace IS the user
|
||||
// prompt; ordinary workspaces also escalate to their parent.
|
||||
//
|
||||
// b may be nil: stateless handlers (e.g. org-token mint — OrgTokenHandler is
|
||||
// an empty struct with no broadcaster) still gate; they just can't push a
|
||||
// live canvas event. The pending approval row is persisted regardless, so
|
||||
// the request is never lost — only the notification is skipped.
|
||||
if b != nil {
|
||||
if bErr := b.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
|
||||
"approval_id": approvalID,
|
||||
"action": string(action),
|
||||
"reason": reason,
|
||||
}); bErr != nil {
|
||||
log.Printf("approval_gate: broadcast requested failed (ws=%s): %v", workspaceID, bErr)
|
||||
}
|
||||
if bErr := b.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
|
||||
"approval_id": approvalID,
|
||||
"action": string(action),
|
||||
"reason": reason,
|
||||
}); bErr != nil {
|
||||
log.Printf("approval_gate: broadcast requested failed (ws=%s): %v", workspaceID, bErr)
|
||||
}
|
||||
var parentID *string
|
||||
if pErr := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); pErr != nil {
|
||||
log.Printf("approval_gate: parent lookup failed (ws=%s): %v", workspaceID, pErr)
|
||||
}
|
||||
if parentID != nil && b != nil {
|
||||
if parentID != nil {
|
||||
if bErr := b.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
|
||||
"approval_id": approvalID,
|
||||
"from_workspace_id": workspaceID,
|
||||
@@ -138,26 +130,10 @@ func requireApproval(ctx context.Context, b events.EventEmitter, workspaceID str
|
||||
// gateDestructive runs requireApproval for a gated action and, when approval is
|
||||
// still pending, writes the 202 response and returns false (caller must stop).
|
||||
// Returns true when the caller may proceed (action consumed an approval).
|
||||
func gateDestructive(c *gin.Context, b events.EventEmitter, workspaceID string, action approvals.Action, reason string, contextMap map[string]interface{}) bool {
|
||||
func gateDestructive(c *gin.Context, b *events.Broadcaster, workspaceID string, action approvals.Action, reason string, contextMap map[string]interface{}) bool {
|
||||
if !approvals.IsGated(action) {
|
||||
return true
|
||||
}
|
||||
// Scope (RFC platform-agent Phase 4b). Wiring is a one-liner in each
|
||||
// destructive handler; the activation policy lives here, centrally, so it is
|
||||
// uniform and testable:
|
||||
// - default-OFF rollout flag, so the wiring is inert until an operator
|
||||
// enables it (mirrors the 3a/3c default-off design and protects existing
|
||||
// org-token automation from a surprise async-approval behaviour change);
|
||||
// - only callers holding an ORG token are gated. The platform agent runs
|
||||
// with MOLECULE_API_KEY=<org-admin token>, so the auth middleware sets
|
||||
// org_token_id. Ordinary workspace-token agents and human CP-session
|
||||
// operators (cp_session_actor — the approvers themselves) are NOT gated,
|
||||
// so normal operation is byte-identical. This realises the file-header
|
||||
// trust boundary ("anything holding an org-admin token still goes
|
||||
// through the gate") without gating everyone.
|
||||
if !destructiveGateEnabled() || !callerHoldsOrgToken(c) {
|
||||
return true
|
||||
}
|
||||
approved, approvalID, err := requireApproval(c.Request.Context(), b, workspaceID, action, reason, contextMap)
|
||||
if err != nil {
|
||||
log.Printf("gateDestructive: %v (ws=%s action=%s)", err, workspaceID, action)
|
||||
@@ -175,22 +151,3 @@ func gateDestructive(c *gin.Context, b events.EventEmitter, workspaceID string,
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// destructiveGateEnabled is the default-off rollout flag for the org-level
|
||||
// destructive-op approval gate. Inert until an operator sets
|
||||
// MOLECULE_PLATFORM_APPROVAL_GATE=1 (or "true") — typically when the platform
|
||||
// agent is deployed to the org. Keeps 4b's wiring shipped-but-dormant, matching
|
||||
// the platform-agent feature's default-off posture (3a/3c).
|
||||
func destructiveGateEnabled() bool {
|
||||
v := os.Getenv("MOLECULE_PLATFORM_APPROVAL_GATE")
|
||||
return v == "1" || v == "true"
|
||||
}
|
||||
|
||||
// callerHoldsOrgToken reports whether the request authenticated with an org
|
||||
// token (the auth middleware sets org_token_id, see middleware/wsauth_middleware.go).
|
||||
// The platform agent uses an org-admin token; ordinary workspace-token agents
|
||||
// and human CP sessions do not, so they bypass the gate entirely.
|
||||
func callerHoldsOrgToken(c *gin.Context) bool {
|
||||
_, ok := c.Get("org_token_id")
|
||||
return ok
|
||||
}
|
||||
|
||||
@@ -1,76 +0,0 @@
|
||||
package handlers
|
||||
|
||||
// Phase 4b — unit coverage for the gate's activation SCOPE: the default-off
|
||||
// rollout flag + org-token-only targeting. These exercise the short-circuit
|
||||
// paths that return "proceed" BEFORE requireApproval, so they need no DB. The
|
||||
// full flag-on + org-token + gated → 202 path is covered by the real-Postgres
|
||||
// approval_gate_integration_test.go.
|
||||
|
||||
import (
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func TestDestructiveGateEnabled_DefaultOff(t *testing.T) {
|
||||
os.Unsetenv("MOLECULE_PLATFORM_APPROVAL_GATE")
|
||||
if destructiveGateEnabled() {
|
||||
t.Fatal("gate must be OFF by default (no env)")
|
||||
}
|
||||
for _, v := range []string{"1", "true"} {
|
||||
t.Setenv("MOLECULE_PLATFORM_APPROVAL_GATE", v)
|
||||
if !destructiveGateEnabled() {
|
||||
t.Errorf("%q must enable the gate", v)
|
||||
}
|
||||
}
|
||||
t.Setenv("MOLECULE_PLATFORM_APPROVAL_GATE", "0")
|
||||
if destructiveGateEnabled() {
|
||||
t.Error(`"0" must keep the gate off`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCallerHoldsOrgToken(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
c, _ := gin.CreateTestContext(httptest.NewRecorder())
|
||||
if callerHoldsOrgToken(c) {
|
||||
t.Error("no org_token_id in context → must be false (workspace/CP caller)")
|
||||
}
|
||||
c.Set("org_token_id", "tok-abc")
|
||||
if !callerHoldsOrgToken(c) {
|
||||
t.Error("org_token_id set → must be true (platform-agent / org-admin caller)")
|
||||
}
|
||||
}
|
||||
|
||||
// gateDestructive must return true (proceed, no 202, no DB touch) whenever the
|
||||
// scope excludes the call: non-gated action, flag off, or non-org-token caller.
|
||||
func TestGateDestructive_ScopeShortCircuits(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
newCtx := func(orgToken bool) *gin.Context {
|
||||
c, _ := gin.CreateTestContext(httptest.NewRecorder())
|
||||
c.Request = httptest.NewRequest("DELETE", "/x", nil)
|
||||
if orgToken {
|
||||
c.Set("org_token_id", "tok")
|
||||
}
|
||||
return c
|
||||
}
|
||||
|
||||
// flag OFF (default) + org-token + gated action → proceed.
|
||||
os.Unsetenv("MOLECULE_PLATFORM_APPROVAL_GATE")
|
||||
if !gateDestructive(newCtx(true), nil, "ws", approvals.ActionDeleteWorkspace, "r", nil) {
|
||||
t.Error("flag off must proceed (gate dormant)")
|
||||
}
|
||||
|
||||
// flag ON + NO org token (workspace agent / human CP session) → proceed.
|
||||
t.Setenv("MOLECULE_PLATFORM_APPROVAL_GATE", "1")
|
||||
if !gateDestructive(newCtx(false), nil, "ws", approvals.ActionDeleteWorkspace, "r", nil) {
|
||||
t.Error("non-org-token caller must proceed (normal operation unchanged)")
|
||||
}
|
||||
|
||||
// flag ON + org token + NON-gated action → proceed (IsGated short-circuit).
|
||||
if !gateDestructive(newCtx(true), nil, "ws", approvals.Action("not_a_gated_action"), "r", nil) {
|
||||
t.Error("non-gated action must proceed")
|
||||
}
|
||||
}
|
||||
@@ -173,8 +173,20 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
|
||||
// check_task_status returned status='queued' forever even after a
|
||||
// real reply landed). messageId mirrors delegation_id so the
|
||||
// platform's idempotency-key extraction also keys off the same id.
|
||||
// Build A2A payload via helper so contract tests can assert the envelope shape.
|
||||
a2aBody, marshalErr := buildDelegateA2ABody(delegationID, body.Task)
|
||||
a2aBody, marshalErr := json.Marshal(map[string]interface{}{
|
||||
"method": "message/send",
|
||||
"params": map[string]interface{}{
|
||||
"message": map[string]interface{}{
|
||||
"role": "user",
|
||||
"messageId": delegationID,
|
||||
// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251) —
|
||||
// a `type`-keyed Part is dropped by the receiver's v0.3
|
||||
// validator, silently losing the delegated task.
|
||||
"parts": []map[string]interface{}{{"kind": "text", "text": body.Task}},
|
||||
"metadata": map[string]interface{}{"delegation_id": delegationID},
|
||||
},
|
||||
},
|
||||
})
|
||||
if marshalErr != nil {
|
||||
log.Printf("Delegation %s: json.Marshal a2aBody failed: %v", delegationID, marshalErr)
|
||||
}
|
||||
@@ -362,27 +374,6 @@ func insertDelegationRow(ctx context.Context, c *gin.Context, sourceID string, b
|
||||
return insertTrackingUnavailable
|
||||
}
|
||||
|
||||
// buildDelegateA2ABody constructs the A2A JSON-RPC envelope for a delegation.
|
||||
// The returned shape is a schema-valid SendMessageRequest with role="user",
|
||||
// messageId, parts, and delegation metadata. Extracted to a pure function so
|
||||
// unit tests can assert the envelope contract without standing up HTTP or DB.
|
||||
func buildDelegateA2ABody(delegationID, task string) ([]byte, error) {
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"method": "message/send",
|
||||
"params": map[string]interface{}{
|
||||
"message": map[string]interface{}{
|
||||
"role": "user",
|
||||
"messageId": delegationID,
|
||||
// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251) —
|
||||
// a `type`-keyed Part is dropped by the receiver's v0.3
|
||||
// validator, silently losing the delegated task.
|
||||
"parts": []map[string]interface{}{{"kind": "text", "text": task}},
|
||||
"metadata": map[string]interface{}{"delegation_id": delegationID},
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// executeDelegation runs in a goroutine — sends A2A and stores the result.
|
||||
// Updates delegation status through: pending → dispatched → received → completed/failed
|
||||
// delegationRetryDelay is the pause between the first failed proxy attempt
|
||||
|
||||
@@ -1762,74 +1762,3 @@ func TestListDelegations_LedgerFailedIncludesErrorDetail(t *testing.T) {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- buildDelegateA2ABody: schema-valid SendMessageRequest ----------
|
||||
|
||||
// TestBuildDelegateA2ABody_SchemaValidSendMessageRequest pins the contract
|
||||
// requested by issue #2251: delegate_task must produce a schema-valid A2A
|
||||
// SendMessageRequest with role="user", messageId, parts, and metadata.
|
||||
func TestBuildDelegateA2ABody_SchemaValidSendMessageRequest(t *testing.T) {
|
||||
delegationID := "del-2251-test"
|
||||
task := "write a contract test"
|
||||
|
||||
body, err := buildDelegateA2ABody(delegationID, task)
|
||||
if err != nil {
|
||||
t.Fatalf("buildDelegateA2ABody failed: %v", err)
|
||||
}
|
||||
|
||||
var envelope map[string]interface{}
|
||||
if err := json.Unmarshal(body, &envelope); err != nil {
|
||||
t.Fatalf("body is not valid JSON: %v", err)
|
||||
}
|
||||
|
||||
// Top-level envelope shape
|
||||
if envelope["method"] != "message/send" {
|
||||
t.Errorf("method = %v, want message/send", envelope["method"])
|
||||
}
|
||||
|
||||
params, ok := envelope["params"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("params missing or not a map: %T", envelope["params"])
|
||||
}
|
||||
|
||||
msg, ok := params["message"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("message missing or not a map: %T", params["message"])
|
||||
}
|
||||
|
||||
// Issue #2251: role is required
|
||||
if msg["role"] != "user" {
|
||||
t.Errorf("message.role = %v, want \"user\"", msg["role"])
|
||||
}
|
||||
|
||||
// messageId must be present and match delegationID
|
||||
if msg["messageId"] != delegationID {
|
||||
t.Errorf("message.messageId = %v, want %s", msg["messageId"], delegationID)
|
||||
}
|
||||
|
||||
// parts must be a non-empty list with a text part
|
||||
parts, ok := msg["parts"].([]interface{})
|
||||
if !ok || len(parts) == 0 {
|
||||
t.Fatalf("message.parts missing or empty: %T", msg["parts"])
|
||||
}
|
||||
firstPart, ok := parts[0].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("first part is not a map: %T", parts[0])
|
||||
}
|
||||
// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251)
|
||||
if firstPart["kind"] != "text" {
|
||||
t.Errorf("first part kind = %v, want text", firstPart["kind"])
|
||||
}
|
||||
if firstPart["text"] != task {
|
||||
t.Errorf("first part text = %v, want %q", firstPart["text"], task)
|
||||
}
|
||||
|
||||
// metadata.delegation_id must match
|
||||
meta, ok := msg["metadata"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("metadata missing or not a map: %T", msg["metadata"])
|
||||
}
|
||||
if meta["delegation_id"] != delegationID {
|
||||
t.Errorf("metadata.delegation_id = %v, want %s", meta["delegation_id"], delegationID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -450,98 +450,6 @@ func TestHeartbeat_DegradedRecovery(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeat_ErrorRateDegrade_Guarded verifies the error_rate degrade path
|
||||
// carries the `AND status = 'online'` guard, preventing a racing heartbeat
|
||||
// from flipping a concurrently-removed workspace back to degraded.
|
||||
func TestHeartbeat_ErrorRateDegrade_Guarded(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-degrade-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-degrade-guard", 0.6, "", 1, 100, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Stale read: heartbeat started before CascadeDelete set status='removed'
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-degrade-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("online"))
|
||||
|
||||
// Guarded UPDATE returns 0 rows because row is actually 'removed'
|
||||
mock.ExpectExec("UPDATE workspaces SET status =.*AND status = 'online'").
|
||||
WithArgs(models.StatusDegraded, "ws-degrade-guard").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
// Broadcast still fires (existing behaviour) — mock it so sqlmock passes
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-degrade-guard","error_rate":0.6,"sample_error":"","active_tasks":1,"uptime_seconds":100}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHeartbeat_DegradedRecovery_Guarded verifies the degraded→online recovery
|
||||
// path carries the `AND status = 'degraded'` guard, preventing a racing
|
||||
// heartbeat from flipping a concurrently-removed workspace back to online.
|
||||
func TestHeartbeat_DegradedRecovery_Guarded(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-recover-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-recover-guard", 0.05, "", 1, 100, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Stale read: heartbeat started before CascadeDelete set status='removed'
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-recover-guard").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("degraded"))
|
||||
|
||||
// Guarded UPDATE returns 0 rows because row is actually 'removed'
|
||||
mock.ExpectExec("UPDATE workspaces SET status =.*AND status = 'degraded'").
|
||||
WithArgs(models.StatusOnline, "ws-recover-guard").
|
||||
WillReturnResult(sqlmock.NewResult(0, 0))
|
||||
|
||||
// Broadcast still fires (existing behaviour) — mock it so sqlmock passes
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-recover-guard","error_rate":0.05,"sample_error":"","active_tasks":1,"uptime_seconds":100}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- a2a_proxy.go: Workspace has no URL (503 with status) ----------
|
||||
|
||||
func TestProxyA2A_WorkspaceNoURL(t *testing.T) {
|
||||
|
||||
@@ -63,11 +63,7 @@ var (
|
||||
providerRegistryErr error
|
||||
)
|
||||
|
||||
// providerRegistry loads the embedded providers manifest once and caches it.
|
||||
// Defined as a variable (not a named function) so tests can swap in a mock
|
||||
// without restarting the process — required for fail-closed coverage of the
|
||||
// registry-unavailable path (workspace_provision_derive_test.go).
|
||||
var providerRegistry = func() (*providers.Manifest, error) {
|
||||
func providerRegistry() (*providers.Manifest, error) {
|
||||
providerRegistryOnce.Do(func() {
|
||||
providerRegistryManifest, providerRegistryErr = providers.LoadManifest()
|
||||
if providerRegistryErr != nil {
|
||||
|
||||
@@ -54,55 +54,6 @@ func mcpPost(t *testing.T, h *MCPHandler, workspaceID string, body interface{})
|
||||
return w
|
||||
}
|
||||
|
||||
// assertA2ASendMessageSchema validates that body is a schema-valid A2A
|
||||
// SendMessageRequest with role="user", messageId, and non-empty parts.
|
||||
// Issue #2251 contract test: delegate_task must always produce this shape.
|
||||
func assertA2ASendMessageSchema(t *testing.T, body []byte, wantTask string) {
|
||||
t.Helper()
|
||||
var envelope map[string]interface{}
|
||||
if err := json.Unmarshal(body, &envelope); err != nil {
|
||||
t.Fatalf("A2A body is not valid JSON: %v", err)
|
||||
}
|
||||
if envelope["jsonrpc"] != "2.0" {
|
||||
t.Errorf("jsonrpc = %v, want 2.0", envelope["jsonrpc"])
|
||||
}
|
||||
if envelope["method"] != "message/send" {
|
||||
t.Errorf("method = %v, want message/send", envelope["method"])
|
||||
}
|
||||
|
||||
params, ok := envelope["params"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("params missing or not a map: %T", envelope["params"])
|
||||
}
|
||||
msg, ok := params["message"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("message missing or not a map: %T", params["message"])
|
||||
}
|
||||
|
||||
if msg["role"] != "user" {
|
||||
t.Errorf("message.role = %v, want \"user\"", msg["role"])
|
||||
}
|
||||
if msg["messageId"] == "" {
|
||||
t.Error("message.messageId is empty")
|
||||
}
|
||||
|
||||
parts, ok := msg["parts"].([]interface{})
|
||||
if !ok || len(parts) == 0 {
|
||||
t.Fatalf("message.parts missing or empty: %T", msg["parts"])
|
||||
}
|
||||
firstPart, ok := parts[0].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("first part is not a map: %T", parts[0])
|
||||
}
|
||||
// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251)
|
||||
if firstPart["kind"] != "text" {
|
||||
t.Errorf("first part kind = %v, want text", firstPart["kind"])
|
||||
}
|
||||
if firstPart["text"] != wantTask {
|
||||
t.Errorf("first part text = %v, want %q", firstPart["text"], wantTask)
|
||||
}
|
||||
}
|
||||
|
||||
func expectCanCommunicateSiblings(mock sqlmock.Sqlmock, callerID, targetID, parentID string) {
|
||||
mock.ExpectQuery(`SELECT id, parent_id FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(callerID).
|
||||
@@ -258,7 +209,9 @@ func TestMCPHandler_DelegateTask_RoutesThroughPlatformA2AProxy(t *testing.T) {
|
||||
if !logActivity {
|
||||
t.Fatal("delegate_task should log through platform A2A proxy")
|
||||
}
|
||||
assertA2ASendMessageSchema(t, body, "do work")
|
||||
if !strings.Contains(string(body), "do work") {
|
||||
t.Fatalf("A2A body missing task text: %s", string(body))
|
||||
}
|
||||
return 200, []byte(`{"result":{"message":{"parts":[{"text":"done"}]}}}`), nil
|
||||
}
|
||||
|
||||
@@ -291,10 +244,7 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("queued", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("delivered", "", callerID, sqlmock.AnyArg()).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
called := make(chan struct{}, 1)
|
||||
@@ -302,7 +252,9 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
|
||||
if workspaceID != targetID || proxyCallerID != callerID {
|
||||
t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
|
||||
}
|
||||
assertA2ASendMessageSchema(t, body, "async work")
|
||||
if !strings.Contains(string(body), "async work") {
|
||||
t.Fatalf("A2A body missing task text: %s", string(body))
|
||||
}
|
||||
called <- struct{}{}
|
||||
return 200, []byte(`{"result":{"message":{"parts":[{"text":"accepted"}]}}}`), nil
|
||||
}
|
||||
@@ -314,7 +266,7 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"queued"`) {
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
@@ -352,8 +304,10 @@ func TestMCPHandler_DelegateTask_WithAttachments(t *testing.T) {
|
||||
if workspaceID != targetID || proxyCallerID != callerID {
|
||||
t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
|
||||
}
|
||||
assertA2ASendMessageSchema(t, body, "review this video")
|
||||
bodyStr := string(body)
|
||||
if !strings.Contains(bodyStr, `"text":"review this video"`) {
|
||||
t.Fatalf("A2A body missing task text: %s", bodyStr)
|
||||
}
|
||||
if !strings.Contains(bodyStr, `"kind":"video"`) {
|
||||
t.Fatalf("A2A body missing video attachment kind: %s", bodyStr)
|
||||
}
|
||||
@@ -400,10 +354,7 @@ func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("queued", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("delivered", "", callerID, sqlmock.AnyArg()).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
called := make(chan []byte, 1)
|
||||
@@ -429,13 +380,12 @@ func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"queued"`) {
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
select {
|
||||
case body := <-called:
|
||||
assertA2ASendMessageSchema(t, body, "async work with image")
|
||||
bodyStr := string(body)
|
||||
if !strings.Contains(bodyStr, `"kind":"image"`) {
|
||||
t.Fatalf("A2A body missing image attachment kind: %s", bodyStr)
|
||||
@@ -461,10 +411,7 @@ func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("queued", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("failed", sqlmock.AnyArg(), callerID, sqlmock.AnyArg()).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// Force the (otherwise near-impossible) marshal failure for the A2A body.
|
||||
@@ -487,7 +434,7 @@ func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"queued"`) {
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
|
||||
|
||||
@@ -286,12 +286,12 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
delegationID := uuid.New().String()
|
||||
|
||||
// Issue #158: write delegation row so canvas Agent Comms tab shows the task text.
|
||||
// Insert with 'queued' status; goroutine updates to delivered or failed.
|
||||
// Insert with 'dispatched' status since the goroutine won't update it.
|
||||
if err := insertMCPDelegationRow(ctx, h.database, callerID, targetID, delegationID, task); err != nil {
|
||||
log.Printf("MCP delegate_task_async: failed to record delegation row: %v", err)
|
||||
// Non-fatal: still fire the A2A call.
|
||||
} else {
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "queued", "")
|
||||
updateMCPDelegationStatus(ctx, h.database, callerID, delegationID, "dispatched", "")
|
||||
}
|
||||
|
||||
// Fire and forget in a detached goroutine. Use a background context so
|
||||
@@ -321,28 +321,21 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
log.Printf("toolDelegateTask %s: json.Marshal a2aBody failed: %v", delegationID, marshalErr)
|
||||
// Bail out: proceeding would call proxyA2ARequest with a
|
||||
// nil/empty body, dispatching a malformed A2A request.
|
||||
updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "failed", fmt.Sprintf("marshal_error: %v", marshalErr))
|
||||
return
|
||||
}
|
||||
|
||||
status, _, err := h.proxyA2ARequest(bgCtx, targetID, a2aBody, callerID, true)
|
||||
if err != nil || status < 200 || status >= 300 {
|
||||
var errorDetail string
|
||||
if err != nil {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s: %v", targetID, err)
|
||||
errorDetail = fmt.Sprintf("target_offline: %v", err)
|
||||
} else {
|
||||
log.Printf("MCPHandler.delegate_task_async: A2A proxy to %s returned status %d", targetID, status)
|
||||
errorDetail = fmt.Sprintf("http_status: %d", status)
|
||||
}
|
||||
updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "failed", errorDetail)
|
||||
return
|
||||
}
|
||||
|
||||
updateMCPDelegationStatus(bgCtx, h.database, callerID, delegationID, "delivered", "")
|
||||
})
|
||||
|
||||
return fmt.Sprintf(`{"task_id":%q,"status":"queued","target_id":%q}`, delegationID, targetID), nil
|
||||
return fmt.Sprintf(`{"task_id":%q,"status":"dispatched","target_id":%q}`, delegationID, targetID), nil
|
||||
}
|
||||
|
||||
func (h *MCPHandler) toolCheckTaskStatus(ctx context.Context, callerID string, args map[string]interface{}) (string, error) {
|
||||
|
||||
@@ -226,27 +226,17 @@ func (h *MemoriesHandler) Commit(c *gin.Context) {
|
||||
Source: contract.MemorySourceUser,
|
||||
})
|
||||
if err != nil {
|
||||
// Server-side log ONLY. The client response below is the generic
|
||||
// 500 — the underlying plugin error must NOT leak to the HTTP
|
||||
// response body (clients have no business seeing the memory
|
||||
// plugin's internal error class, message, or stack; the same
|
||||
// discipline as the #2392 leak fix). We include enough context
|
||||
// here for an operator to diagnose the failure from the server
|
||||
// log: workspace id, requested scope, the resolved v2 namespace,
|
||||
// the concrete Go error type (for log-aggregator filtering via
|
||||
// `err_class=...`), and the quoted error message (preserves
|
||||
// trailing whitespace / special chars that %v would munge).
|
||||
log.Printf(
|
||||
"Commit memory plugin error: workspace=%s scope=%s namespace=%s err_class=%T err=%q",
|
||||
workspaceID, body.Scope, nsName, err, err,
|
||||
)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
|
||||
return
|
||||
}
|
||||
memoryID := resp.ID
|
||||
log.Printf("Commit memory error (plugin): %v", err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to store memory"})
|
||||
return
|
||||
}
|
||||
memoryID := resp.ID
|
||||
|
||||
// #767 Audit: write a GLOBAL memory audit log entry for forensic replay.
|
||||
// Records a SHA-256 hash of the content — never plaintext — so the audit
|
||||
// #767 Audit: write a GLOBAL memory audit log entry for forensic replay.
|
||||
// Records a SHA-256 hash of the content — never plaintext — so the audit
|
||||
// trail can prove what was written without leaking sensitive values.
|
||||
// Failure is non-fatal: a logging error must not roll back a successful write.
|
||||
if body.Scope == "GLOBAL" {
|
||||
// Hash the sanitised content so the audit trail reflects what was
|
||||
// actually persisted (not the raw, potentially secret-bearing input).
|
||||
sum := sha256.Sum256([]byte(content))
|
||||
|
||||
@@ -177,7 +177,7 @@ func isEnvIdentPart(c byte) bool {
|
||||
return isEnvIdentStart(c) || (c >= '0' && c <= '9')
|
||||
}
|
||||
|
||||
// loadWorkspaceEnv reads the org root .env and the workspace-specific .env files.
|
||||
// loadWorkspaceEnv reads the org root .env and the workspace-specific .env
|
||||
// (workspace overrides org root). Used by both secret injection and channel
|
||||
// config expansion.
|
||||
//
|
||||
|
||||
@@ -321,17 +321,7 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
}
|
||||
|
||||
// Always generate default config.yaml (runtime, model, tier, etc.)
|
||||
configFiles, cfgErr := h.workspace.ensureDefaultConfig(id, payload)
|
||||
if cfgErr != nil {
|
||||
log.Printf("Org import: default config generation failed for %s: %v — marking workspace failed", ws.Name, cfgErr)
|
||||
// Fail-closed: the workspace row + layout + broadcast are already
|
||||
// persisted above (status='provisioning'). If we fall through,
|
||||
// the workspace stays stuck in provisioning silently. Mark it
|
||||
// failed so the canvas surfaces the failure card and the operator
|
||||
// sees the signal immediately, then skip the provisioning block.
|
||||
h.workspace.markProvisionFailed(ctx, id, fmt.Sprintf("default config generation failed: %v", cfgErr), nil)
|
||||
goto skipProvision
|
||||
}
|
||||
configFiles := h.workspace.ensureDefaultConfig(id, payload)
|
||||
|
||||
// Copy files_dir contents on top (system-prompt.md, CLAUDE.md, skills/, etc.)
|
||||
// Uses templatePath for CopyTemplateToContainer — runs AFTER configFiles are written
|
||||
@@ -558,7 +548,6 @@ func (h *OrgHandler) createWorkspaceTree(ws OrgWorkspace, parentID *string, absX
|
||||
})
|
||||
}
|
||||
|
||||
skipProvision:
|
||||
// internal#2006: migrate runtime-created schedules from a removed
|
||||
// predecessor of the same agent (role+parent) onto this freshly-created
|
||||
// workspace. Reconcile re-derives template-sourced state below, but
|
||||
|
||||
@@ -1,135 +0,0 @@
|
||||
package handlers
|
||||
|
||||
// platform_agent.go — installs the org-level platform agent as the org root.
|
||||
// (RFC docs/design/rfc-platform-agent.md)
|
||||
//
|
||||
// The platform agent IS the org root: an org is the subtree under the single
|
||||
// parent_id IS NULL row (org_scope.go), so making the concierge the user's
|
||||
// universal A2A peer means making it that root. Installing it therefore:
|
||||
//
|
||||
// 1. upserts the platform-agent row (kind='platform', parent_id NULL);
|
||||
// 2. re-parents the org's existing root(s) under it;
|
||||
// 3. moves the org-anchor references — org_api_tokens.org_id and
|
||||
// org_plugin_allowlist.org_id, both of which key off the root workspace id
|
||||
// (see migrations 035/036 + 026) — from each old root to the platform agent.
|
||||
//
|
||||
// All of that happens in ONE transaction so a tenant's auth tokens and plugin
|
||||
// allowlist never point at a stale anchor. The operation is idempotent: a second
|
||||
// call finds the platform agent already the sole root and does nothing.
|
||||
//
|
||||
// Routing (CanCommunicate/sameOrg in registry/access.go + org_scope.go) needs NO
|
||||
// change — once the platform agent is the root, the existing ancestor/descendant
|
||||
// rules already give it universal in-org reach and keep tenant isolation intact.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
type installPlatformAgentPayload struct {
|
||||
// ID is the platform agent's workspace id (a deterministic uuidv5 the
|
||||
// control plane derives per org). Required.
|
||||
ID string `json:"id" binding:"required"`
|
||||
// Name is the display name; defaults to "Org Concierge" when omitted.
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
// InstallPlatformAgent handles POST /admin/org/platform-agent (AdminAuth).
|
||||
//
|
||||
// Idempotently installs the platform agent as the org root for THIS tenant. The
|
||||
// control plane calls it at org-provision time (new orgs) and during the
|
||||
// existing-org backfill rollout. Safe to call repeatedly.
|
||||
func InstallPlatformAgent(c *gin.Context) {
|
||||
var p installPlatformAgentPayload
|
||||
if err := c.ShouldBindJSON(&p); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid request body"})
|
||||
return
|
||||
}
|
||||
name := p.Name
|
||||
if name == "" {
|
||||
name = "Org Concierge"
|
||||
}
|
||||
if err := installPlatformAgent(c.Request.Context(), db.DB, p.ID, name); err != nil {
|
||||
log.Printf("InstallPlatformAgent: %v (id=%s)", err, p.ID)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "install failed"})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, gin.H{
|
||||
"status": "installed",
|
||||
"platform_agent_id": p.ID,
|
||||
"kind": models.KindPlatform,
|
||||
})
|
||||
}
|
||||
|
||||
// installPlatformAgent performs the idempotent, transactional install described
|
||||
// in the file header. Separated from the gin handler so integration tests can
|
||||
// exercise it directly against a real Postgres (the org-anchor migration cannot
|
||||
// be proven with sqlmock).
|
||||
func installPlatformAgent(ctx context.Context, database *sql.DB, platformID, name string) error {
|
||||
tx, err := database.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("begin: %w", err)
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }() // no-op after Commit
|
||||
|
||||
// 1. Ensure the platform-agent row exists as a kind='platform' root.
|
||||
// ON CONFLICT keeps it a platform root if it was pre-seeded; the row is
|
||||
// tier 0 and never billed/provisioned as an ordinary workspace EC2.
|
||||
if _, err := tx.ExecContext(ctx, `
|
||||
INSERT INTO workspaces (id, name, kind, tier, status, runtime, parent_id)
|
||||
VALUES ($1, $2, 'platform', 0, 'online', 'claude-code', NULL)
|
||||
ON CONFLICT (id) DO UPDATE SET kind = 'platform', parent_id = NULL
|
||||
`, platformID, name); err != nil {
|
||||
return fmt.Errorf("upsert platform agent: %w", err)
|
||||
}
|
||||
|
||||
// 2. Capture the org's other current roots (everything at parent_id IS NULL
|
||||
// except the platform agent itself). In a one-org tenant DB this is the
|
||||
// single team root; the query tolerates 0 (already installed) or N.
|
||||
rows, err := tx.QueryContext(ctx,
|
||||
`SELECT id FROM workspaces WHERE parent_id IS NULL AND id <> $1`, platformID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("select old roots: %w", err)
|
||||
}
|
||||
var oldRoots []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if err := rows.Scan(&id); err != nil {
|
||||
rows.Close()
|
||||
return fmt.Errorf("scan old root: %w", err)
|
||||
}
|
||||
oldRoots = append(oldRoots, id)
|
||||
}
|
||||
rows.Close()
|
||||
if err := rows.Err(); err != nil {
|
||||
return fmt.Errorf("iterate old roots: %w", err)
|
||||
}
|
||||
|
||||
// 3 + 4. Re-parent each old root under the platform agent and move its
|
||||
// org-anchor references in the same transaction. A non-root old root
|
||||
// is kind='workspace', so it does not trip workspaces_platform_root_check.
|
||||
for _, root := range oldRoots {
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`UPDATE workspaces SET parent_id = $1, updated_at = now() WHERE id = $2`,
|
||||
platformID, root); err != nil {
|
||||
return fmt.Errorf("re-parent %s: %w", root, err)
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`UPDATE org_api_tokens SET org_id = $1 WHERE org_id = $2`, platformID, root); err != nil {
|
||||
return fmt.Errorf("migrate org_api_tokens for %s: %w", root, err)
|
||||
}
|
||||
if _, err := tx.ExecContext(ctx,
|
||||
`UPDATE org_plugin_allowlist SET org_id = $1 WHERE org_id = $2`, platformID, root); err != nil {
|
||||
return fmt.Errorf("migrate org_plugin_allowlist for %s: %w", root, err)
|
||||
}
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
@@ -1,188 +0,0 @@
|
||||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
// platform_agent_integration_test.go — REAL Postgres gate for installPlatformAgent.
|
||||
//
|
||||
// Run with:
|
||||
//
|
||||
// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
|
||||
// go test -tags=integration ./internal/handlers/ -run Integration_PlatformAgentInstall -v
|
||||
//
|
||||
// CI: handlers-postgres-integration workflow (handlers + migrations path filter).
|
||||
//
|
||||
// Why this is NOT a sqlmock test
|
||||
// ------------------------------
|
||||
// The install re-parents the org's existing root under the platform agent AND
|
||||
// moves the org-anchor references (org_api_tokens.org_id, org_plugin_allowlist.
|
||||
// org_id) from old root to platform agent, atomically. The whole point is the
|
||||
// post-transaction row state: orgRootID() must resolve every node to the platform
|
||||
// agent, sameOrg() must still hold, and the auth/allowlist anchors must point at
|
||||
// the new root. Only a real Postgres can prove that; sqlmock cannot.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/google/uuid"
|
||||
_ "github.com/lib/pq"
|
||||
)
|
||||
|
||||
func integrationDB_PlatformAgentInstall(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
url := requireIntegrationDBURL(t)
|
||||
conn, err := sql.Open("postgres", url)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
if err := conn.Ping(); err != nil {
|
||||
t.Fatalf("ping: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { conn.Close() })
|
||||
return conn
|
||||
}
|
||||
|
||||
// TestIntegration_PlatformAgentInstall_ReparentsRootAndMovesAnchors builds a
|
||||
// real org in Postgres:
|
||||
//
|
||||
// root (parent_id NULL, kind=workspace)
|
||||
// └── child
|
||||
// + an org_api_token anchored to root
|
||||
// + an org_plugin_allowlist entry anchored to root
|
||||
//
|
||||
// then installs the platform agent and asserts:
|
||||
// - the platform agent is the new sole root (kind=platform, parent_id NULL);
|
||||
// - the old root is re-parented under it; the child is untouched;
|
||||
// - both org-anchor references now point at the platform agent;
|
||||
// - a second install is a no-op (idempotent).
|
||||
func TestIntegration_PlatformAgentInstall_ReparentsRootAndMovesAnchors(t *testing.T) {
|
||||
conn := integrationDB_PlatformAgentInstall(t)
|
||||
ctx := context.Background()
|
||||
|
||||
tag := uuid.New().String()[:8]
|
||||
prefix := fmt.Sprintf("itest-pinstall-%s", tag)
|
||||
rootID := uuid.New().String()
|
||||
childID := uuid.New().String()
|
||||
platformID := uuid.New().String()
|
||||
|
||||
cleanup := func() {
|
||||
_, _ = conn.ExecContext(ctx, `DELETE FROM org_plugin_allowlist WHERE plugin_name = $1`, prefix+"-plugin")
|
||||
_, _ = conn.ExecContext(ctx, `DELETE FROM org_api_tokens WHERE prefix = $1`, tag)
|
||||
// child + old root (prefixed names) first, then the platform agent by id
|
||||
// (root.parent_id references it, so it must go last).
|
||||
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%")
|
||||
_, _ = conn.ExecContext(ctx, `DELETE FROM workspaces WHERE id = $1`, platformID)
|
||||
}
|
||||
t.Cleanup(cleanup)
|
||||
cleanup()
|
||||
|
||||
// Seed org tree.
|
||||
if _, err := conn.ExecContext(ctx, `
|
||||
INSERT INTO workspaces (id, name, tier, runtime, status, parent_id)
|
||||
VALUES ($1, $2, 2, 'claude-code', 'online', NULL)`, rootID, prefix+"-root"); err != nil {
|
||||
t.Fatalf("seed root: %v", err)
|
||||
}
|
||||
if _, err := conn.ExecContext(ctx, `
|
||||
INSERT INTO workspaces (id, name, tier, runtime, status, parent_id)
|
||||
VALUES ($1, $2, 2, 'claude-code', 'online', $3)`, childID, prefix+"-child", rootID); err != nil {
|
||||
t.Fatalf("seed child: %v", err)
|
||||
}
|
||||
// Org-anchor rows keyed to the OLD root.
|
||||
if _, err := conn.ExecContext(ctx, `
|
||||
INSERT INTO org_api_tokens (token_hash, prefix, name, org_id)
|
||||
VALUES ($1, $2, $3, $4)`,
|
||||
[]byte("hash-"+tag), tag, prefix+"-tok", rootID); err != nil {
|
||||
t.Fatalf("seed org_api_token: %v", err)
|
||||
}
|
||||
if _, err := conn.ExecContext(ctx, `
|
||||
INSERT INTO org_plugin_allowlist (org_id, plugin_name, enabled_by)
|
||||
VALUES ($1, $2, $3)`, rootID, prefix+"-plugin", childID); err != nil {
|
||||
t.Fatalf("seed allowlist: %v", err)
|
||||
}
|
||||
|
||||
// Install.
|
||||
if err := installPlatformAgent(ctx, conn, platformID, "Org Concierge"); err != nil {
|
||||
t.Fatalf("install: %v", err)
|
||||
}
|
||||
|
||||
assertState := func(stage string) {
|
||||
// platform agent is a kind=platform root.
|
||||
var kind string
|
||||
var parent sql.NullString
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
`SELECT kind, parent_id FROM workspaces WHERE id = $1`, platformID).Scan(&kind, &parent); err != nil {
|
||||
t.Fatalf("[%s] read platform agent: %v", stage, err)
|
||||
}
|
||||
if kind != "platform" || parent.Valid {
|
||||
t.Fatalf("[%s] platform agent kind=%q parent=%v, want platform/NULL", stage, kind, parent)
|
||||
}
|
||||
// old root re-parented under the platform agent.
|
||||
var rootParent sql.NullString
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
`SELECT parent_id FROM workspaces WHERE id = $1`, rootID).Scan(&rootParent); err != nil {
|
||||
t.Fatalf("[%s] read old root: %v", stage, err)
|
||||
}
|
||||
if !rootParent.Valid || rootParent.String != platformID {
|
||||
t.Fatalf("[%s] old root parent=%v, want %s", stage, rootParent, platformID)
|
||||
}
|
||||
// child untouched.
|
||||
var childParent sql.NullString
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
`SELECT parent_id FROM workspaces WHERE id = $1`, childID).Scan(&childParent); err != nil {
|
||||
t.Fatalf("[%s] read child: %v", stage, err)
|
||||
}
|
||||
if !childParent.Valid || childParent.String != rootID {
|
||||
t.Fatalf("[%s] child parent=%v, want %s (unchanged)", stage, childParent, rootID)
|
||||
}
|
||||
// org-anchor references moved to the platform agent.
|
||||
var tokOrg, alOrg string
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
`SELECT org_id FROM org_api_tokens WHERE prefix = $1`, tag).Scan(&tokOrg); err != nil {
|
||||
t.Fatalf("[%s] read token org_id: %v", stage, err)
|
||||
}
|
||||
if tokOrg != platformID {
|
||||
t.Fatalf("[%s] org_api_tokens.org_id=%s, want %s", stage, tokOrg, platformID)
|
||||
}
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
`SELECT org_id FROM org_plugin_allowlist WHERE plugin_name = $1`, prefix+"-plugin").Scan(&alOrg); err != nil {
|
||||
t.Fatalf("[%s] read allowlist org_id: %v", stage, err)
|
||||
}
|
||||
if alOrg != platformID {
|
||||
t.Fatalf("[%s] org_plugin_allowlist.org_id=%s, want %s", stage, alOrg, platformID)
|
||||
}
|
||||
// orgRootID + sameOrg now resolve everything to the platform agent.
|
||||
got, err := orgRootID(ctx, conn, childID)
|
||||
if err != nil {
|
||||
t.Fatalf("[%s] orgRootID(child): %v", stage, err)
|
||||
}
|
||||
if got != platformID {
|
||||
t.Fatalf("[%s] orgRootID(child)=%s, want %s", stage, got, platformID)
|
||||
}
|
||||
same, err := sameOrg(ctx, conn, childID, platformID)
|
||||
if err != nil || !same {
|
||||
t.Fatalf("[%s] sameOrg(child, platform)=%v err=%v, want true", stage, same, err)
|
||||
}
|
||||
}
|
||||
|
||||
assertState("first install")
|
||||
|
||||
// Idempotent: second install must not error or change state.
|
||||
if err := installPlatformAgent(ctx, conn, platformID, "Org Concierge"); err != nil {
|
||||
t.Fatalf("second install (idempotent): %v", err)
|
||||
}
|
||||
assertState("second install")
|
||||
|
||||
// Neither seeded team node is a root any more — the platform agent is.
|
||||
var nRoots int
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
`SELECT count(*) FROM workspaces WHERE parent_id IS NULL AND id IN ($1, $2)`,
|
||||
rootID, childID).Scan(&nRoots); err != nil {
|
||||
t.Fatalf("count roots: %v", err)
|
||||
}
|
||||
if nRoots != 0 {
|
||||
t.Fatalf("team roots after install = %d, want 0 (old root re-parented under platform agent)", nRoots)
|
||||
}
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// TestInstallPlatformAgent_BadJSON rejects a payload missing the required id
|
||||
// before touching the DB (binding:"required" on ID).
|
||||
func TestInstallPlatformAgent_BadJSON(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest("POST", "/admin/org/platform-agent",
|
||||
bytes.NewBufferString(`{"name":"Org Concierge"}`)) // no id
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
InstallPlatformAgent(c)
|
||||
|
||||
if w.Code != http.StatusBadRequest {
|
||||
t.Errorf("missing id: expected 400, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
@@ -161,7 +161,7 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
// 1. Strip plugin's rule/fragment markers from CLAUDE.md (mirrors
|
||||
// AgentskillsAdaptor.uninstall lines 184-188). Best-effort: if
|
||||
// the user edited CLAUDE.md, our marker stays untouched.
|
||||
h.stripPluginMarkersFromMemory(ctx, workspaceID, containerName, pluginName)
|
||||
h.stripPluginMarkersFromMemory(ctx, containerName, pluginName)
|
||||
|
||||
// 2. Remove copied skill dirs declared in the plugin's plugin.yaml.
|
||||
for _, skill := range skillNames {
|
||||
@@ -171,11 +171,9 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
log.Printf("Plugin uninstall: skipping invalid skill name %q in %s: %v", skill, pluginName, err)
|
||||
continue
|
||||
}
|
||||
if _, rmErr := h.execAsRoot(ctx, containerName, []string{
|
||||
_, _ = h.execAsRoot(ctx, containerName, []string{
|
||||
"rm", "-rf", "/configs/skills/" + skill,
|
||||
}); rmErr != nil {
|
||||
log.Printf("Plugin uninstall: failed to remove skill %s from %s: %v", skill, workspaceID, rmErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// 3. Delete the plugin directory itself (as root to handle file ownership).
|
||||
|
||||
@@ -393,7 +393,7 @@ func (h *PluginsHandler) readPluginSkillsFromContainer(ctx context.Context, cont
|
||||
// `# Plugin: <name> /` — mirrors AgentskillsAdaptor.uninstall's stripping
|
||||
// logic so install/uninstall are symmetric. Best-effort: silent on read or
|
||||
// write failure, since the rest of uninstall must still succeed.
|
||||
func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, workspaceID, containerName, pluginName string) {
|
||||
func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, containerName, pluginName string) {
|
||||
// Use sed via bash -c for atomic in-place delete: drop the marker line
|
||||
// and the blank line that follows it (install adds a leading blank line
|
||||
// before the marker via append_to_memory). Three sed passes mirror the
|
||||
@@ -417,9 +417,7 @@ func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, works
|
||||
`awk 'BEGIN{skip=0; blanks=0} /^%s/{skip=1; blanks=0; next} skip==1 && /^[[:space:]]*$/{blanks++; if(blanks>=2){skip=0; print; next} next} /^# Plugin: /{if(skip==1)skip=0} skip==1{next} {print}' /configs/CLAUDE.md > /tmp/claude.new && mv /tmp/claude.new /configs/CLAUDE.md`,
|
||||
regexpEscapeForAwk(marker),
|
||||
)
|
||||
if _, awkErr := h.execAsRoot(ctx, containerName, []string{"bash", "-c", script}); awkErr != nil {
|
||||
log.Printf("Plugin uninstall: failed to strip markers from CLAUDE.md for %s in %s: %v", pluginName, workspaceID, awkErr)
|
||||
}
|
||||
_, _ = h.execAsRoot(ctx, containerName, []string{"bash", "-c", script})
|
||||
}
|
||||
|
||||
// regexpEscapeForAwk escapes characters that have special meaning inside an
|
||||
|
||||
@@ -787,8 +787,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
nativeStatus := runtimeOverrides.HasCapability(payload.WorkspaceID, "status_mgmt")
|
||||
|
||||
if !nativeStatus && currentStatus == "online" && payload.ErrorRate >= 0.5 {
|
||||
// #73 guard: heartbeat degrade must not resurrect a removed workspace.
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'online'`, models.StatusDegraded, payload.WorkspaceID); err != nil {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusDegraded, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to mark %s degraded: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceDegraded), payload.WorkspaceID, map[string]interface{}{
|
||||
@@ -807,8 +806,7 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
// Skipped under native_status_mgmt for the same reason as the
|
||||
// degrade branch above: the adapter owns the transition.
|
||||
if !nativeStatus && currentStatus == "degraded" && payload.ErrorRate < 0.1 && payload.RuntimeState == "" {
|
||||
// #73 guard: heartbeat recovery must not resurrect a removed workspace.
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'degraded'`, models.StatusOnline, payload.WorkspaceID); err != nil {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2`, models.StatusOnline, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to recover %s to online: %v", payload.WorkspaceID, err)
|
||||
}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{})
|
||||
@@ -869,29 +867,6 @@ func (h *RegistryHandler) evaluateStatus(c *gin.Context, payload models.Heartbea
|
||||
})
|
||||
}
|
||||
|
||||
// Auto-recovery from failed: the provision-timeout sweeper
|
||||
// (registry/provisiontimeout.go) flips a workspace to 'failed' when it sits
|
||||
// in 'provisioning' past DefaultProvisioningTimeout (10m for claude-code).
|
||||
// But a slow cold-boot (EC2 image pull + LLM preflight) can still finish and
|
||||
// start heartbeating AFTER the flip — agent_card is written unconditionally
|
||||
// on register, so the box is genuinely serving while its status is stuck
|
||||
// 'failed'. A live heartbeat is authoritative: recover to online. Without
|
||||
// this, a healthy-but-slow workspace (e.g. a model that preflights slower
|
||||
// than the 10m budget) shows 'failed' forever despite working — the
|
||||
// mechanism behind the intermittent multi-provider e2e "boot failures". The
|
||||
// `AND status = 'failed'` guard keeps the flip conditional (won't override
|
||||
// 'removed').
|
||||
if currentStatus == "failed" {
|
||||
if _, err := db.DB.ExecContext(ctx, `UPDATE workspaces SET status = $1, updated_at = now() WHERE id = $2 AND status = 'failed'`, models.StatusOnline, payload.WorkspaceID); err != nil {
|
||||
log.Printf("Heartbeat: failed to recover %s from failed: %v", payload.WorkspaceID, err)
|
||||
} else {
|
||||
log.Printf("Heartbeat: transitioned %s from failed to online (late heartbeat after provision-timeout)", payload.WorkspaceID)
|
||||
}
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceOnline), payload.WorkspaceID, map[string]interface{}{
|
||||
"recovered_from": currentStatus,
|
||||
})
|
||||
}
|
||||
|
||||
// #1870 Phase 1: drain one queued A2A request if the target reports
|
||||
// spare capacity. The heartbeat's active_tasks field reflects what the
|
||||
// workspace runtime is ACTUALLY running right now, independent of
|
||||
|
||||
@@ -193,54 +193,6 @@ func TestHeartbeatHandler_ProvisioningToOnline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Heartbeat — failed → online recovery (#616 follow-up) ====================
|
||||
|
||||
// A workspace flipped to 'failed' by the provision-timeout sweeper must recover
|
||||
// to 'online' once it starts heartbeating: a live heartbeat proves the agent
|
||||
// booted (just slowly, past the 10m budget), so the timeout flip was premature.
|
||||
func TestHeartbeatHandler_FailedToOnline(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewRegistryHandler(broadcaster)
|
||||
|
||||
mock.ExpectQuery("SELECT COALESCE\\(current_task").
|
||||
WithArgs("ws-failed").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"current_task"}).AddRow(""))
|
||||
|
||||
mock.ExpectExec("UPDATE workspaces SET").
|
||||
WithArgs("ws-failed", 0.0, "", 1, 3000, "").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
// evaluateStatus SELECT — currently failed (provision-timeout sweeper flip)
|
||||
mock.ExpectQuery("SELECT status FROM workspaces WHERE id =").
|
||||
WithArgs("ws-failed").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status"}).AddRow("failed"))
|
||||
|
||||
// the new failed → online recovery transition
|
||||
mock.ExpectExec("UPDATE workspaces SET status =").
|
||||
WithArgs(models.StatusOnline, "ws-failed").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
mock.ExpectExec("INSERT INTO structure_events").
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
body := `{"workspace_id":"ws-failed","error_rate":0.0,"sample_error":"","active_tasks":1,"uptime_seconds":3000}`
|
||||
c.Request = httptest.NewRequest("POST", "/registry/heartbeat", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.Heartbeat(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("expected status 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Heartbeat — awaiting_agent → online recovery ====================
|
||||
// External workspaces flip to 'awaiting_agent' via healthsweep when their
|
||||
// heartbeat goes stale. When the operator's poller comes back, heartbeat
|
||||
|
||||
@@ -9,7 +9,6 @@ import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/approvals"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/audit"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/crypto"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
@@ -321,18 +320,6 @@ func (h *SecretsHandler) Set(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// RFC platform-agent Phase 4b: gate org-token (platform-agent) secret writes
|
||||
// behind human approval. The context includes the key so an approval for one
|
||||
// secret cannot authorise writing another. No-op for ordinary callers and
|
||||
// when the rollout flag is off (scoping lives in gateDestructive).
|
||||
// SecretsHandler has no broadcaster, so pass nil — requireApproval persists
|
||||
// the pending row regardless; only the live canvas push is skipped.
|
||||
if !gateDestructive(c, nil, workspaceID, approvals.ActionSecretWrite,
|
||||
"write secret "+body.Key,
|
||||
map[string]interface{}{"workspace_id": workspaceID, "key": body.Key}) {
|
||||
return
|
||||
}
|
||||
|
||||
// Encrypt the value (AES-256-GCM if SECRETS_ENCRYPTION_KEY is set, plaintext otherwise)
|
||||
encrypted, err := crypto.Encrypt([]byte(body.Value))
|
||||
if err != nil {
|
||||
|
||||
@@ -14,7 +14,6 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime/debug"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -114,11 +113,6 @@ func (h *WorkspaceHandler) goAsync(fn func()) {
|
||||
h.asyncWG.Add(1)
|
||||
go func() {
|
||||
defer h.asyncWG.Done()
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Printf("PANIC recovered in goAsync goroutine: %v\n%s", r, debug.Stack())
|
||||
}
|
||||
}()
|
||||
fn()
|
||||
}()
|
||||
}
|
||||
@@ -157,11 +151,6 @@ func globalGoAsync(fn func()) {
|
||||
globalAsync.Add(1)
|
||||
go func() {
|
||||
defer globalAsync.Done()
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
log.Printf("PANIC recovered in globalGoAsync goroutine: %v\n%s", r, debug.Stack())
|
||||
}
|
||||
}()
|
||||
fn()
|
||||
}()
|
||||
}
|
||||
@@ -840,23 +829,11 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
if _, err := os.Stat(runtimeDefault); err == nil {
|
||||
templatePath = runtimeDefault
|
||||
} else {
|
||||
var cfgErr error
|
||||
configFiles, cfgErr = h.ensureDefaultConfig(id, payload)
|
||||
if cfgErr != nil {
|
||||
log.Printf("Create workspace %s: default config generation failed: %v", id, cfgErr)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate workspace configuration"})
|
||||
return
|
||||
}
|
||||
configFiles = h.ensureDefaultConfig(id, payload)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var cfgErr error
|
||||
configFiles, cfgErr = h.ensureDefaultConfig(id, payload)
|
||||
if cfgErr != nil {
|
||||
log.Printf("Create workspace %s: default config generation failed: %v", id, cfgErr)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate workspace configuration"})
|
||||
return
|
||||
}
|
||||
configFiles = h.ensureDefaultConfig(id, payload)
|
||||
}
|
||||
|
||||
// Auto-provision — pick backend: control plane (SaaS) or Docker (self-hosted).
|
||||
|
||||
@@ -112,12 +112,7 @@ func workspaceComputeIsZero(compute models.WorkspaceCompute) bool {
|
||||
compute.Display.Mode == "" &&
|
||||
compute.Display.Width == 0 &&
|
||||
compute.Display.Height == 0 &&
|
||||
compute.Display.Protocol == "" &&
|
||||
// A provider- or persistence-only compute is NOT zero — it must
|
||||
// round-trip so GET returns those fields (canvas provider badge +
|
||||
// data-persistence selector both read them back).
|
||||
compute.Provider == "" &&
|
||||
compute.DataPersistence == ""
|
||||
compute.Display.Protocol == ""
|
||||
}
|
||||
|
||||
func workspaceComputeJSON(compute models.WorkspaceCompute) (string, error) {
|
||||
@@ -147,17 +142,6 @@ func workspaceComputeJSON(compute models.WorkspaceCompute) (string, error) {
|
||||
if len(display) > 0 {
|
||||
out["display"] = display
|
||||
}
|
||||
// Cloud/compute provider + durable-data choice. These were FORWARDED to CP
|
||||
// at provision time but never serialized back here, so GET /workspaces
|
||||
// dropped them — the canvas provider badge always showed the default AWS and
|
||||
// the data-persistence selector always showed "auto". Round-trip them (still
|
||||
// omit-when-empty, so existing AWS/default rows serialize unchanged).
|
||||
if compute.Provider != "" {
|
||||
out["provider"] = compute.Provider
|
||||
}
|
||||
if compute.DataPersistence != "" {
|
||||
out["data_persistence"] = compute.DataPersistence
|
||||
}
|
||||
b, err := json.Marshal(out)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user