Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 331f502636 | |||
| d357f17be5 | |||
| a76386efc2 | |||
| 26ea3f8322 | |||
| 95631c0bb2 |
@@ -1,247 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SSOT fail-closed approval validator (SEV-1 internal#812).
|
||||
|
||||
This module is the SINGLE source of truth for whether a Gitea review counts
|
||||
as a "genuine" approval. Both consumers must call into it — they MUST NOT
|
||||
duplicate the predicate:
|
||||
|
||||
- .gitea/scripts/gitea-merge-queue.py (Python) — imports directly.
|
||||
- .gitea/scripts/review-check.sh (bash, jq) — calls the Python helper
|
||||
at .gitea/scripts/_review_check_filter.py, which in turn calls this
|
||||
module. There is no separate jq / bash copy of the predicate; a
|
||||
reviewer who wants to weaken the gate has to weaken this one file.
|
||||
|
||||
# The fail-closed contract
|
||||
|
||||
A review counts as a GENUINE APPROVED on the current head ONLY IF ALL hold:
|
||||
|
||||
1. state == "APPROVED"
|
||||
2. official == true
|
||||
3. dismissed != true
|
||||
4. stale != true
|
||||
5. commit_id is present and equals the PR's current head SHA
|
||||
|
||||
ANY failure of any of the above → REJECT.
|
||||
|
||||
# The bug this fixes
|
||||
|
||||
The previous gitea-merge-queue.py predicate had a `if isinstance(commit_id,
|
||||
str) and commit_id and headsha:` guard that *skipped* the commit_id check
|
||||
when the review carried no commit_id. The previous review-check.sh jq
|
||||
filter required `commit_id == $head`, which is also implicitly fail-closed
|
||||
on missing commit_id (null != head), but only one of the two consumers
|
||||
behaved correctly — a code-drift trap.
|
||||
|
||||
Both behaviors are now defined here, as a single fail-closed predicate.
|
||||
A MISSING commit_id is the Gitea row signature of a spoofed or pre-commit
|
||||
review: a real reviewer cannot have submitted against a commit that
|
||||
doesn't exist. Accepting these is exactly the fail-open that SEV-1
|
||||
internal#812 describes and the re-opened path that closed #843 (with CR2
|
||||
+ Researcher both flagging it) addresses.
|
||||
|
||||
# Mutation-resistance
|
||||
|
||||
The unit tests in tests/test_approval_validator.py assert rejection
|
||||
explicitly for each fail-closed case (missing commit_id, stale head,
|
||||
non-official, dismissed, etc.). A reviewer who tries to weaken the
|
||||
predicate by removing the commit_id check, by re-introducing the
|
||||
"no commit_id is accepted" escape hatch, or by changing `!=` to `==`
|
||||
in the head comparison will trip those tests in CI.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable, Optional, Tuple
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Canonical Gitea review-state enum (EXACT match -- no case coercion).
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# Gitea's reviews API emits review.state as one of a fixed set of
|
||||
# UPPERCASE string constants: "APPROVED", "REQUEST_CHANGES",
|
||||
# "REQUEST_REVIEW", "COMMENT", "PENDING", "DISMISSED" (verified
|
||||
# against the live API across real molecule-core PRs). They are ALWAYS
|
||||
# uppercase on the wire.
|
||||
#
|
||||
# FAIL-CLOSED: we compare review.state to these constants with EXACT
|
||||
# equality. The previous code used str(state or "").upper(), which
|
||||
# coerced a lowercase/mixed-case "approved" or "request_changes" into
|
||||
# the canonical value and ACCEPTED it. A real Gitea row never carries a
|
||||
# lowercase state, so a case-variant value is the signature of a
|
||||
# hand-forged / spoofed row, not a legitimate review. Coercing it was a
|
||||
# residual fail-open (SEV-1 internal#812, RCs 9849/9851/9852). We reject
|
||||
# anything that is not byte-for-byte the canonical constant.
|
||||
STATE_APPROVED = "APPROVED"
|
||||
STATE_REQUEST_CHANGES = "REQUEST_CHANGES"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shared predicate — fail-closed on every condition
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_official_current_head(review: object, headsha: object) -> bool:
|
||||
"""Common predicate: review is official, not dismissed, not stale, and
|
||||
bound to the PR's current head SHA. EVERY condition is mandatory and
|
||||
fail-closed. Both is_genuine_approval and is_open_request_changes build
|
||||
on this so the rule cannot drift between the two cases.
|
||||
|
||||
`official` is checked with `is not True` (NOT `not review.get("official")`).
|
||||
The latter is truthy on the string "false" or the integer 1, which is
|
||||
exactly the fail-open surface we are closing here — a non-boolean
|
||||
pass-through is treated as official. Gitea emits a real boolean, so
|
||||
the stricter check rejects anything that isn't literally True.
|
||||
"""
|
||||
if not isinstance(review, dict):
|
||||
return False
|
||||
if review.get("official") is not True:
|
||||
return False
|
||||
if review.get("dismissed"):
|
||||
return False
|
||||
if review.get("stale"):
|
||||
return False
|
||||
commit_id = review.get("commit_id")
|
||||
# FAIL-CLOSED: a missing/empty/non-string commit_id is REJECTED. The
|
||||
# previous code had `if isinstance(commit_id, str) and commit_id and
|
||||
# headsha:` which SKIPPED the check when the review carried no
|
||||
# commit_id. That was the spoof-bug surface.
|
||||
if not isinstance(commit_id, str) or not commit_id:
|
||||
return False
|
||||
# FAIL-CLOSED: a present-but-wrong commit_id is also REJECTED. Stale
|
||||
# reviews (on a previous head) cannot count.
|
||||
if not isinstance(headsha, str) or not headsha or commit_id != headsha:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-verdict predicates
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def is_genuine_approval(
|
||||
review: object,
|
||||
*,
|
||||
headsha: str,
|
||||
reviewer_set: Optional[Iterable[str]] = None,
|
||||
) -> bool:
|
||||
"""Return True iff `review` is a genuine APPROVED on the current head.
|
||||
|
||||
When `reviewer_set` is provided, the review's `user.login` must be in
|
||||
the set (the merge-queue uses this to count only "recognised"
|
||||
reviewers for the 2-genuine floor; review-check.sh applies its own
|
||||
team-membership probe separately and so does not pass a set).
|
||||
"""
|
||||
if not isinstance(review, dict):
|
||||
return False
|
||||
# EXACT-ENUM (fail-closed): no .upper()/.strip() coercion. A
|
||||
# case-variant or whitespace-padded state is a forged row and is
|
||||
# rejected, not normalised into APPROVED.
|
||||
if review.get("state") != STATE_APPROVED:
|
||||
return False
|
||||
if not is_official_current_head(review, headsha):
|
||||
return False
|
||||
if reviewer_set is not None:
|
||||
user = (review.get("user") or {}).get("login")
|
||||
if not isinstance(user, str) or user not in set(reviewer_set):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_open_request_changes(review: object, *, headsha: str) -> bool:
|
||||
"""Return True iff `review` is an open official REQUEST_CHANGES on the
|
||||
current head. Same fail-closed contract as is_genuine_approval —
|
||||
a missing commit_id is REJECTED, not silently treated as 'still
|
||||
blocking the merge from an old head'.
|
||||
"""
|
||||
if not isinstance(review, dict):
|
||||
return False
|
||||
# EXACT-ENUM (fail-closed): same contract as is_genuine_approval. A
|
||||
# lowercase/mixed-case "request_changes" must NOT be coerced into a
|
||||
# block-erasing match; an exact REQUEST_CHANGES is required.
|
||||
if review.get("state") != STATE_REQUEST_CHANGES:
|
||||
return False
|
||||
if not is_official_current_head(review, headsha):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Consumer-facing reducer (returns the two call sites need)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def classify_reviews(
|
||||
reviews: Iterable[object],
|
||||
*,
|
||||
headsha: str,
|
||||
reviewer_set: Optional[Iterable[str]] = None,
|
||||
) -> Tuple[set[str], list[str]]:
|
||||
"""Reduce a PR's reviews to (approvers, request_changes) on the CURRENT head.
|
||||
|
||||
approvers: distinct logins whose LATEST official review on the current
|
||||
head is APPROVED.
|
||||
request_changes: distinct logins whose LATEST official review on the
|
||||
current head is REQUEST_CHANGES.
|
||||
|
||||
Gitea returns reviews oldest-first. We keep the latest *VALID*
|
||||
submission per user (later VALID entries overwrite earlier ones; an
|
||||
invalid later row — a COMMENT, or a review with a null/old commit_id —
|
||||
is ignored and can NOT overwrite or erase a genuine review). See the
|
||||
inline VALIDATE-BEFORE-REDUCE note below for the exploit this closes.
|
||||
"""
|
||||
reviewer_set_set = set(reviewer_set) if reviewer_set is not None else None
|
||||
|
||||
# VALIDATE-BEFORE-REDUCE (SEV-1 internal#812 follow-up).
|
||||
#
|
||||
# The earlier implementation reduced FIRST (latest row per user, keyed
|
||||
# only on state in {APPROVED, REQUEST_CHANGES}) and validated the single
|
||||
# surviving row AFTER. That is reduce-before-validate, and it is
|
||||
# exploitable: a user posts a genuine current-head APPROVED (or
|
||||
# REQUEST_CHANGES), then posts a LATER row that fails the fail-closed
|
||||
# predicate (a COMMENT, or an APPROVED with a null/old commit_id). The
|
||||
# later INVALID row overwrote the genuine one in latest_by_user, so a
|
||||
# real approval was masked, and — worse — a real current-head
|
||||
# REQUEST_CHANGES could be erased and the block silently evaporate.
|
||||
#
|
||||
# The fix: filter to VALID reviews FIRST (each row must pass
|
||||
# is_official_current_head AND carry an APPROVED/REQUEST_CHANGES state),
|
||||
# and only then reduce to the latest VALID review per user. An invalid
|
||||
# later row is never eligible to become a user's "latest" state, so it
|
||||
# cannot overwrite or erase a genuine review. A user's verdict is the
|
||||
# state of their latest VALID (official, current-head, non-dismissed,
|
||||
# non-stale, commit_id-present-and-matching) review.
|
||||
latest_valid_by_user: dict = {}
|
||||
for review in reviews:
|
||||
if not isinstance(review, dict):
|
||||
continue
|
||||
user = (review.get("user") or {}).get("login")
|
||||
if not isinstance(user, str):
|
||||
continue
|
||||
if reviewer_set_set is not None and user not in reviewer_set_set:
|
||||
continue
|
||||
# EXACT-ENUM (fail-closed): exact constants only, no coercion. A
|
||||
# case-coerced row must not become eligible to overwrite/erase a
|
||||
# genuine per-user verdict in the reduce below.
|
||||
state = review.get("state")
|
||||
if state not in (STATE_APPROVED, STATE_REQUEST_CHANGES):
|
||||
continue
|
||||
# Fail-closed predicate BEFORE the reduce: official, not dismissed,
|
||||
# not stale, commit_id present AND == head. Invalid rows are dropped
|
||||
# here and so can never become the per-user "latest".
|
||||
if not is_official_current_head(review, headsha):
|
||||
continue
|
||||
latest_valid_by_user[user] = review
|
||||
|
||||
approvers: set[str] = set()
|
||||
request_changes: list[str] = []
|
||||
for user, review in latest_valid_by_user.items():
|
||||
# Each surviving review already passed is_official_current_head, so
|
||||
# the state alone determines the verdict. We still go through the
|
||||
# per-verdict SSOT predicates so the rule cannot drift.
|
||||
if is_genuine_approval(review, headsha=headsha, reviewer_set=None):
|
||||
approvers.add(user)
|
||||
elif is_open_request_changes(review, headsha=headsha):
|
||||
request_changes.append(user)
|
||||
return approvers, request_changes
|
||||
@@ -1,74 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Helper for review-check.sh: applies the SSOT approval predicate to a
|
||||
PR's reviews and prints the candidate approver logins on stdout (one per
|
||||
line, de-duplicated, author excluded).
|
||||
|
||||
review-check.sh uses this in place of its previous inline jq filter so the
|
||||
predicate is single-sourced. The jq filter is gone; if you want to change
|
||||
the predicate, edit .gitea/scripts/_approval_validator.py, not this file.
|
||||
|
||||
Usage:
|
||||
python3 _review_check_filter.py <reviews.json> <head-sha> <author-login>
|
||||
|
||||
Output:
|
||||
- Candidate approver logins, one per line, de-duplicated, sorted.
|
||||
- Excludes `author-login` (the PR author cannot approve their own PR).
|
||||
- Empty output → review-check.sh interprets as "no candidates" and exits 1
|
||||
after the team-membership probe.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Same-dir import — script lives next to _approval_validator.py
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent))
|
||||
from _approval_validator import is_genuine_approval # noqa: E402
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
if len(argv) != 4:
|
||||
print(
|
||||
f"usage: {argv[0] if argv else '_review_check_filter.py'} "
|
||||
"<reviews.json> <head-sha> <author-login>",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
reviews_path = Path(argv[1])
|
||||
headsha = argv[2]
|
||||
author = argv[3]
|
||||
|
||||
try:
|
||||
reviews = json.loads(reviews_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
print(f"::error::could not read reviews JSON: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
if not isinstance(reviews, list):
|
||||
print("::error::reviews JSON was not a list", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
candidates: set[str] = set()
|
||||
for review in reviews:
|
||||
# We pass reviewer_set=None here because review-check.sh applies its
|
||||
# own team-membership probe (CURL_AUTH_FILE + 200/204/403/404 logic)
|
||||
# separately. The SSOT predicate enforces only the fail-closed
|
||||
# commit_id / state / official / dismissed / stale contract here.
|
||||
if not is_genuine_approval(review, headsha=headsha, reviewer_set=None):
|
||||
continue
|
||||
user = (review.get("user") or {}).get("login")
|
||||
if not isinstance(user, str) or not user:
|
||||
continue
|
||||
if user == author:
|
||||
continue
|
||||
candidates.add(user)
|
||||
|
||||
for user in sorted(candidates):
|
||||
print(user)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
@@ -31,7 +31,7 @@
|
||||
#
|
||||
# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
|
||||
# JSON variable is not set. Declared in the workflow YAML rather than
|
||||
# fetched from /branch_protections (which needs admin scope —
|
||||
# fetched from /branch_protections (which needs admin scope — sop-tier-bot
|
||||
# has read-only). Trade dynamism for simplicity: when the required-check
|
||||
# set changes, update both branch protection AND this env. Keeping them
|
||||
# in sync is less complexity than granting the audit bot admin perms on
|
||||
@@ -54,57 +54,32 @@ API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
|
||||
# 1. Fetch the PR. If not merged, no-op.
|
||||
# Fail-closed: verify HTTP 200 before parsing. A 401/403/404 means the token
|
||||
# is invalid or the PR is inaccessible — we must NOT silently treat that as
|
||||
# "not merged" and skip the audit.
|
||||
PR_TMP=$(mktemp)
|
||||
PR_HTTP=$(curl -sS -o "$PR_TMP" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
PR=$(cat "$PR_TMP")
|
||||
rm -f "$PR_TMP"
|
||||
if [ "$PR_HTTP" != "200" ]; then
|
||||
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${PR_HTTP} — cannot evaluate merge state."
|
||||
exit 1
|
||||
fi
|
||||
# FAIL-CLOSED: a 200 response with a missing/malformed `merged` field must
|
||||
# NOT be treated as "not merged" (that would silently skip the audit).
|
||||
# We verify both presence AND correct type for every field we consume.
|
||||
PR_SCHEMA_OK=$(echo "$PR" | jq -r '
|
||||
(.merged | type == "boolean") and
|
||||
(.merge_commit_sha | type == "string") and
|
||||
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
|
||||
(.base | type == "object") and (.base.ref | type == "string") and
|
||||
(.head | type == "object") and (.head.sha | type == "string")
|
||||
')
|
||||
if [ "$PR_SCHEMA_OK" != "true" ]; then
|
||||
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP 200 but one or more required fields are missing, null, or of wrong type — cannot evaluate force-merge."
|
||||
exit 1
|
||||
fi
|
||||
MERGED=$(echo "$PR" | jq -r '.merged')
|
||||
PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
MERGED=$(echo "$PR" | jq -r '.merged // false')
|
||||
if [ "$MERGED" != "true" ]; then
|
||||
echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha')
|
||||
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login')
|
||||
# NOTE: no || true — with set -euo pipefail, jq parse failures (e.g. field
|
||||
# missing from API response) propagate as hard errors. Use jq's // operator
|
||||
# for graceful defaults instead of bash || true guards. This was re-added by
|
||||
# 8c343e3a ("fix(gitea): add || true guards to jq pipelines") — reverted
|
||||
# here because the guards mask silent failures that hide malformed API responses.
|
||||
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
|
||||
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
|
||||
TITLE=$(echo "$PR" | jq -r '.title // ""')
|
||||
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref')
|
||||
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha')
|
||||
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
|
||||
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
|
||||
|
||||
if [ -z "$MERGE_SHA" ]; then
|
||||
echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Required status checks — branch-aware JSON dict takes precedence.
|
||||
if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
|
||||
# FAIL-CLOSED: if REQUIRED_CHECKS_JSON is set, the branch entry must exist
|
||||
# and be an array. A missing branch or non-array value means the config is
|
||||
# malformed or drifted — we must NOT silently treat it as "no checks".
|
||||
_RC_JSON_OK=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '
|
||||
has($branch) and (.[$branch] | type == "array")
|
||||
')
|
||||
if [ "$_RC_JSON_OK" != "true" ]; then
|
||||
echo "::error::REQUIRED_CHECKS_JSON missing or non-array entry for branch '$BASE_BRANCH' — cannot evaluate required checks."
|
||||
exit 1
|
||||
fi
|
||||
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] | .[]')
|
||||
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
|
||||
else
|
||||
REQUIRED="$REQUIRED_CHECKS"
|
||||
fi
|
||||
@@ -116,65 +91,12 @@ fi
|
||||
# 3. Status-check state at the PR HEAD (where checks ran). The merge
|
||||
# commit doesn't get its own checks; we evaluate the PR's last
|
||||
# commit, which is what branch protection compared against.
|
||||
#
|
||||
# Pagination (status-pagination RCA, #2440-family): the combined
|
||||
# /commits/{sha}/status endpoint caps its embedded `statuses` array at the
|
||||
# Gitea default page size (~30). On a high-churn PR an older-but-still-current
|
||||
# required-context SUCCESS row is pushed PAST that cap, so reading the combined
|
||||
# view would record that context as `missing` and emit a FALSE-POSITIVE
|
||||
# force-merge. We instead page through the dedicated /commits/{sha}/statuses
|
||||
# list to EXHAUSTION (until a short/empty page), accumulating every row.
|
||||
#
|
||||
# Fail-closed is preserved end to end: any non-200 page, or a page whose body
|
||||
# is not a JSON array, aborts with exit 1 (we never treat an unreadable/partial
|
||||
# page as "no checks"). A genuinely-absent required context appears on NO page,
|
||||
# so CHECK_STATE has no entry for it → `${...:-missing}` below keeps it
|
||||
# `missing` → it is still counted as not-green. No fail-open path is added.
|
||||
PER_PAGE=100
|
||||
page=1
|
||||
ALL_STATUSES_TMP=$(mktemp)
|
||||
printf '[]' > "$ALL_STATUSES_TMP" # accumulator: a single JSON array of rows
|
||||
while :; do
|
||||
STATUS_TMP=$(mktemp)
|
||||
STATUS_HTTP=$(curl -sS -o "$STATUS_TMP" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/statuses?page=${page}&limit=${PER_PAGE}")
|
||||
PAGE_BODY=$(cat "$STATUS_TMP")
|
||||
rm -f "$STATUS_TMP"
|
||||
if [ "$STATUS_HTTP" != "200" ]; then
|
||||
rm -f "$ALL_STATUSES_TMP"
|
||||
echo "::error::GET /commits/${HEAD_SHA}/statuses?page=${page} returned HTTP ${STATUS_HTTP} — cannot evaluate required checks."
|
||||
exit 1
|
||||
fi
|
||||
# FAIL-CLOSED: the /statuses endpoint returns a bare JSON array. A non-array
|
||||
# body (null/object/string) means the response is malformed — we must NOT
|
||||
# treat that as "no checks", which would silently declare all checks green.
|
||||
if ! echo "$PAGE_BODY" | jq -e 'type == "array"' >/dev/null 2>&1; then
|
||||
rm -f "$ALL_STATUSES_TMP"
|
||||
echo "::error::GET /commits/${HEAD_SHA}/statuses?page=${page} returned HTTP 200 but body is not a JSON array — cannot evaluate required checks."
|
||||
exit 1
|
||||
fi
|
||||
PAGE_COUNT=$(echo "$PAGE_BODY" | jq 'length')
|
||||
# Append this page's rows to the accumulator (insertion order is preserved
|
||||
# but NOT relied upon — the collapse below selects max-by-id per context).
|
||||
COMBINED=$(jq -s '.[0] + .[1]' "$ALL_STATUSES_TMP" <(echo "$PAGE_BODY"))
|
||||
printf '%s' "$COMBINED" > "$ALL_STATUSES_TMP"
|
||||
# Short page (fewer than PER_PAGE rows) ⇒ last page ⇒ stop.
|
||||
if [ "$PAGE_COUNT" -lt "$PER_PAGE" ]; then
|
||||
break
|
||||
fi
|
||||
page=$((page + 1))
|
||||
done
|
||||
STATUS=$(cat "$ALL_STATUSES_TMP")
|
||||
rm -f "$ALL_STATUSES_TMP"
|
||||
STATUS=$(curl -sS -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
|
||||
declare -A CHECK_STATE
|
||||
# Gitea's /commits/{sha}/statuses is roughly newest-first but NOT strictly
|
||||
# monotonic by id (observed first ids 157,155,156,… — local inversions from
|
||||
# re-runs and page boundaries), so neither first- nor last-occurrence reliably
|
||||
# yields the current row. Select the MAX-id row per context explicitly
|
||||
# (order-independent), matching prod-auto-deploy.py's latest_status_for_context.
|
||||
while IFS=$'\t' read -r ctx state; do
|
||||
[ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
|
||||
done < <(echo "$STATUS" | jq -r 'group_by(.context) | map(max_by(.id)) | .[] | "\(.context)\t\(.status)"')
|
||||
done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
|
||||
|
||||
# 4. For each required check, was it green at merge? YAML block scalars
|
||||
# (`|`) leave a trailing newline; skip blank/whitespace-only lines.
|
||||
|
||||
@@ -317,33 +317,7 @@ def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
# Fail-closed validation: every entry must be a non-empty string.
|
||||
# Reject null, int, dict, or empty/whitespace strings silently —
|
||||
# they indicate a malformed manifest that drift-detect must not
|
||||
# normalize away (that would hide config errors).
|
||||
validated: set[str] = set()
|
||||
for idx, item in enumerate(branch_checks):
|
||||
if not isinstance(item, str):
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
|
||||
f"{type(item).__name__} (value={item!r}), expected str\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
stripped = item.strip()
|
||||
if not stripped:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
|
||||
f"empty/whitespace string\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
if stripped in validated:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] contains "
|
||||
f"duplicate context '{stripped}' at index {idx}\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
validated.add(stripped)
|
||||
return validated
|
||||
return {str(item).strip() for item in branch_checks if str(item).strip()}
|
||||
|
||||
# Legacy variant fallback.
|
||||
if found_legacy:
|
||||
@@ -578,34 +552,23 @@ def find_open_issue(title: str) -> dict | None:
|
||||
hourly; failing one cycle loudly is strictly better than silently
|
||||
duplicating.
|
||||
|
||||
Paginates through all open issues (limit=50 per page) until the
|
||||
title is found or the result set is exhausted. Previously only one
|
||||
page was fetched, causing duplicate [ci-drift] issues when the
|
||||
existing tracking issue fell beyond page 1.
|
||||
Gitea issue search returns at most page=50 per page; one page is
|
||||
enough as long as `[ci-drift]` issues are a tiny minority. (See
|
||||
follow-up issue for Link-header pagination.)
|
||||
"""
|
||||
page = 1
|
||||
while True:
|
||||
_, results = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={
|
||||
"state": "open",
|
||||
"type": "issues",
|
||||
"limit": "50",
|
||||
"page": str(page),
|
||||
},
|
||||
_, results = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={"state": "open", "type": "issues", "limit": "50"},
|
||||
)
|
||||
if not isinstance(results, list):
|
||||
raise ApiError(
|
||||
f"issue search returned non-list body (got {type(results).__name__})"
|
||||
)
|
||||
if not isinstance(results, list):
|
||||
raise ApiError(
|
||||
f"issue search returned non-list body (got {type(results).__name__})"
|
||||
)
|
||||
for issue in results:
|
||||
if issue.get("title") == title:
|
||||
return issue
|
||||
# Fewer than limit results means last page reached.
|
||||
if len(results) < 50:
|
||||
return None
|
||||
page += 1
|
||||
for issue in results:
|
||||
if issue.get("title") == title:
|
||||
return issue
|
||||
return None
|
||||
|
||||
|
||||
def render_body(branch: str, findings: list[str], debug: dict) -> str:
|
||||
|
||||
@@ -26,21 +26,10 @@ PROFILES: dict[str, dict[str, str]] = {
|
||||
"handlers": (
|
||||
r"^workspace-server/internal/handlers/"
|
||||
r"|^workspace-server/internal/wsauth/"
|
||||
# #2148: registry-auth real-PG integration tests (CanCommunicate
|
||||
# parent_id hierarchy lives in internal/registry; org-admin token
|
||||
# revoke/validate lives in internal/orgtoken) run in this same
|
||||
# workflow, so a regression in either package MUST trigger the job.
|
||||
r"|^workspace-server/internal/registry/"
|
||||
r"|^workspace-server/internal/orgtoken/"
|
||||
# #2149: the scheduler real-PG integration tests run in this same
|
||||
# workflow (they reuse its migrated Postgres), so changes to the
|
||||
# scheduler package must trigger the job too.
|
||||
r"|^workspace-server/internal/scheduler/"
|
||||
# #2150: the db package's real-PG migration-replay-from-scratch
|
||||
# + InitPostgres ping tests also run in this same workflow (they
|
||||
# reuse its sibling Postgres, against a separate `molecule_replay`
|
||||
# database). Changes to db must trigger the job too.
|
||||
r"|^workspace-server/internal/db/"
|
||||
r"|^workspace-server/migrations/"
|
||||
r"|^\.gitea/workflows/handlers-postgres-integration\.yml$"
|
||||
),
|
||||
|
||||
+147
-308
@@ -9,43 +9,27 @@ queue. This script provides the missing serialized policy in user space:
|
||||
candidate (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals,
|
||||
or red required CI) is SKIPPED so it cannot head-of-line block newer ready
|
||||
PRs; the scan continues to the next candidate.
|
||||
2. Refuse to act unless main's BP-required contexts are green. This is also
|
||||
the serialized backstop for direct-merge (see below): after a direct merge,
|
||||
main re-runs push CI and this gate PAUSES the queue if main goes red, so no
|
||||
merge piles onto an unverified/red main (issue #2358).
|
||||
2. Refuse to act unless main's BP-required contexts are green.
|
||||
3. Refuse fork PRs; the queue may only mutate same-repo branches.
|
||||
4. DIRECT-MERGE when conflict-free (issue #2358). When Gitea reports the PR
|
||||
conflict-free (mergeable is True) and the merge bar below is met, MERGE IT
|
||||
DIRECTLY — even if its head does not contain current main. We do NOT call
|
||||
/pulls/{n}/update first: branch protection does not require strict
|
||||
up-to-date, so behind-main conflict-free PRs merge cleanly, and calling
|
||||
/update would trigger Gitea dismiss_stale_approvals (dismissing the genuine
|
||||
approvals and forcing a re-review every tick — the rebase-churn bottleneck).
|
||||
The /update path is used ONLY when the PR is DEFINITIVELY not mergeable
|
||||
(mergeable is literal False) AND its head lacks current main — refreshing the
|
||||
branch may resolve a behind-main non-conflict; a real conflict returns HTTP
|
||||
409 and the PR is HELD per #2352. mergeable=None/missing (Gitea STILL
|
||||
COMPUTING conflict state) is a distinct fail-closed WAIT: never merged AND
|
||||
never /update'd — calling /update during the compute window would dismiss the
|
||||
PR's genuine approvals (dismiss_stale_approvals) and re-introduce the exact
|
||||
rebase-churn this queue eliminates. None is re-checked next tick.
|
||||
4. If the PR branch does not contain current main, call Gitea's
|
||||
/pulls/{n}/update endpoint and stop. CI must rerun on the updated head.
|
||||
5. Merge ONLY when, on the PR's CURRENT head sha:
|
||||
- >= REQUIRED_APPROVALS distinct GENUINE official APPROVED reviews from
|
||||
the recognised reviewer set (not stale, not dismissed, commit_id ==
|
||||
current head), AND
|
||||
- no open official REQUEST_CHANGES on the current head, AND
|
||||
- every BP-required status context is green, AND
|
||||
- the PR is mergeable (Gitea reports it conflict-free).
|
||||
- the PR is mergeable.
|
||||
|
||||
Authoritative gates (fail-closed):
|
||||
- The REQUIRED status contexts come from BRANCH PROTECTION
|
||||
(`status_check_contexts`) PLUS the hardcoded governance checks
|
||||
(qa-review, security-review, sop-checklist). If branch protection
|
||||
cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
(`status_check_contexts`), not a hand-maintained env list. If branch
|
||||
protection cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (qa-review, security-review, sop-tier, sop-checklist
|
||||
when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
continue-on-error job) MUST NOT block. They are reported, never gating.
|
||||
- `force_merge=true` is used ONLY when the merge is blocked *solely* by
|
||||
missing-but-non-required advisory contexts (required are green + genuine
|
||||
missing-but-non-required governance contexts (required are green + genuine
|
||||
approvals present). It is NEVER used to bypass a failing REQUIRED context
|
||||
or missing approvals.
|
||||
|
||||
@@ -105,12 +89,6 @@ import urllib.parse
|
||||
import urllib.request
|
||||
from typing import Any
|
||||
|
||||
# SSOT fail-closed approval predicate (SEV-1 internal#812). review-check.sh
|
||||
# consumes the same module via _review_check_filter.py — do NOT duplicate
|
||||
# the predicate here. See _approval_validator.py for the fail-closed contract.
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from _approval_validator import classify_reviews as _classify_reviews_ssot # noqa: E402
|
||||
|
||||
|
||||
def _env(key: str, *, default: str = "") -> str:
|
||||
return os.environ.get(key, default)
|
||||
@@ -150,28 +128,6 @@ OPT_OUT_LABELS = {
|
||||
).split(",")
|
||||
if name.strip()
|
||||
} | ({HOLD_LABEL} if HOLD_LABEL else set())
|
||||
# Governance checks that are ALWAYS required for every PR, regardless of
|
||||
# branch-protection configuration. These are the uniform-gate checks that
|
||||
# must pass before any PR can merge (SOP tier removal makes them mandatory
|
||||
# for all PRs, not just tier:medium/tier:high).
|
||||
#
|
||||
# Context names use the (pull_request_target) suffix (not pull_request)
|
||||
# to match the workflow event_type that actually emits them — verified
|
||||
# live against PR#2419/#2331/etc.: the qa-review/security-review
|
||||
# workflows run on pull_request_target (their `on:` block uses
|
||||
# pull_request_target, not pull_request), and sop-checklist's
|
||||
# all-items-acked job also uses pull_request_target. The previous
|
||||
# (pull_request) suffix never matched the live emitted contexts,
|
||||
# which is what was painting ~16 ready PRs red (gate appeared
|
||||
# "missing" qa-review/security-review even after both passed).
|
||||
# Verified against the lint-bp-context-emit-match test which already
|
||||
# asserts (pull_request_target) for these names. No requirement
|
||||
# dropped; just a name correction.
|
||||
GOVERNANCE_REQUIRED_CONTEXTS = [
|
||||
"qa-review / approved (pull_request_target)",
|
||||
"security-review / approved (pull_request_target)",
|
||||
"sop-checklist / all-items-acked (pull_request_target)",
|
||||
]
|
||||
REQUIRED_CONTEXTS_RAW = _env(
|
||||
"REQUIRED_CONTEXTS",
|
||||
default=(
|
||||
@@ -312,34 +268,6 @@ def api(
|
||||
return status, {"_raw": raw.decode("utf-8", errors="replace")}
|
||||
|
||||
|
||||
def api_paginated(
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
query: dict[str, str] | None = None,
|
||||
page_size: int = 50,
|
||||
) -> list[dict]:
|
||||
"""Fetch all pages of a paginated Gitea list endpoint.
|
||||
|
||||
Gitea paginates with `page` (1-indexed) and `limit`. We loop until a
|
||||
page returns fewer than `page_size` items, indicating the end.
|
||||
"""
|
||||
results: list[dict] = []
|
||||
page = 1
|
||||
while True:
|
||||
page_query = dict(query or {})
|
||||
page_query["page"] = str(page)
|
||||
page_query["limit"] = str(page_size)
|
||||
_, body = api(method, path, query=page_query)
|
||||
if not isinstance(body, list):
|
||||
raise ApiError(f"{path} paginated response not list")
|
||||
results.extend(body)
|
||||
if len(body) < page_size:
|
||||
break
|
||||
page += 1
|
||||
return results
|
||||
|
||||
|
||||
def required_contexts(raw: str) -> list[str]:
|
||||
return [part.strip() for part in raw.split(",") if part.strip()]
|
||||
|
||||
@@ -365,15 +293,40 @@ def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
|
||||
return latest
|
||||
|
||||
|
||||
def _is_tier_low_pending_ok(
|
||||
latest_statuses: dict[str, dict],
|
||||
context: str,
|
||||
pr_labels: set[str],
|
||||
) -> bool:
|
||||
"""Return True if tier:low PR can tolerate sop-checklist pending state.
|
||||
|
||||
Per sop-checklist-config.yaml tier_failure_mode, tier:low uses soft-fail:
|
||||
sop-checklist posts state=pending when acks are satisfied (missing
|
||||
manager/ceo acks are informational only). The queue should accept
|
||||
pending instead of waiting for success.
|
||||
"""
|
||||
if "tier:low" not in pr_labels:
|
||||
return False
|
||||
if "sop-checklist" not in context:
|
||||
return False
|
||||
status = latest_statuses.get(context) or {}
|
||||
return status_state(status) == "pending"
|
||||
|
||||
|
||||
def required_contexts_green(
|
||||
latest_statuses: dict[str, dict],
|
||||
contexts: list[str],
|
||||
pr_labels: set[str] | None = None,
|
||||
) -> tuple[bool, list[str]]:
|
||||
missing_or_bad: list[str] = []
|
||||
for context in contexts:
|
||||
status = latest_statuses.get(context)
|
||||
state = status_state(status or {})
|
||||
if state != "success":
|
||||
if pr_labels and _is_tier_low_pending_ok(
|
||||
latest_statuses, context, pr_labels
|
||||
):
|
||||
continue # tier:low soft-fail: accept pending sop-checklist
|
||||
missing_or_bad.append(f"{context}={state or 'missing'}")
|
||||
return not missing_or_bad, missing_or_bad
|
||||
|
||||
@@ -430,26 +383,57 @@ def get_branch_protection(branch: str) -> BranchProtection:
|
||||
def genuine_approvals(
|
||||
reviews: list[dict],
|
||||
*,
|
||||
headsha: str,
|
||||
head_sha: str,
|
||||
reviewer_set: set[str],
|
||||
) -> tuple[set[str], list[str]]:
|
||||
"""Thin wrapper over the SSOT predicate in _approval_validator.py.
|
||||
"""Reduce a PR's reviews to genuine official approvals on the CURRENT head.
|
||||
|
||||
All logic — the per-review commit_id / state / official / dismissed /
|
||||
stale contract — lives in _approval_validator.classify_reviews. This
|
||||
wrapper exists only to keep the call site (and external readers of
|
||||
the symbol) stable. Do NOT add any per-review logic here; if you need
|
||||
to change the predicate, edit _approval_validator.py.
|
||||
Returns (approvers, request_changes) where:
|
||||
- approvers is the set of distinct logins (in reviewer_set) whose LATEST
|
||||
review on the current head is an official, non-stale, non-dismissed
|
||||
APPROVED, and
|
||||
- request_changes is the list of logins (in reviewer_set) whose latest
|
||||
official review on the current head is REQUEST_CHANGES.
|
||||
|
||||
See _approval_validator.py for the full fail-closed contract
|
||||
(SEV-1 internal#812). The previous inline implementation had a
|
||||
`if isinstance(commit_id, str) and commit_id and headsha:` guard that
|
||||
silently accepted reviews with no commit_id; that fail-open surface is
|
||||
now closed at the SSOT.
|
||||
"Current head" is enforced two ways, because Gitea exposes both signals:
|
||||
a review must be `official` and NOT `stale`/`dismissed`, AND when the
|
||||
review carries a commit_id it must equal head_sha. A review with no
|
||||
commit_id but stale=False/dismissed=False is accepted (older Gitea rows).
|
||||
We take each reviewer's LATEST submission (reviews arrive oldest-first), so
|
||||
a later REQUEST_CHANGES correctly supersedes an earlier APPROVED and vice
|
||||
versa.
|
||||
"""
|
||||
return _classify_reviews_ssot(
|
||||
reviews, headsha=headsha, reviewer_set=reviewer_set
|
||||
)
|
||||
latest_by_user: dict[str, dict] = {}
|
||||
for review in reviews:
|
||||
if not isinstance(review, dict):
|
||||
continue
|
||||
user = (review.get("user") or {}).get("login")
|
||||
if not isinstance(user, str) or user not in reviewer_set:
|
||||
continue
|
||||
state = str(review.get("state") or "").upper()
|
||||
if state not in {"APPROVED", "REQUEST_CHANGES"}:
|
||||
continue # ignore COMMENT/PENDING/DISMISSED-state rows
|
||||
# reviews are returned oldest-first; later entries overwrite → latest wins
|
||||
latest_by_user[user] = review
|
||||
|
||||
approvers: set[str] = set()
|
||||
request_changes: list[str] = []
|
||||
for user, review in latest_by_user.items():
|
||||
if not review.get("official"):
|
||||
continue
|
||||
if review.get("stale") or review.get("dismissed"):
|
||||
continue
|
||||
commit_id = review.get("commit_id")
|
||||
if isinstance(commit_id, str) and commit_id and head_sha:
|
||||
if commit_id != head_sha:
|
||||
continue # review was on a previous head
|
||||
state = str(review.get("state") or "").upper()
|
||||
if state == "APPROVED":
|
||||
approvers.add(user)
|
||||
elif state == "REQUEST_CHANGES":
|
||||
request_changes.append(user)
|
||||
return approvers, request_changes
|
||||
|
||||
|
||||
def get_pull_reviews(pr_number: int) -> list[dict]:
|
||||
_, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/reviews")
|
||||
@@ -609,32 +593,29 @@ def evaluate_merge_readiness(
|
||||
approvers: set[str],
|
||||
request_changes: list[str],
|
||||
pr_has_current_base: bool,
|
||||
mergeable: bool | None,
|
||||
mergeable: bool,
|
||||
pr_labels: set[str] | None = None,
|
||||
) -> MergeDecision:
|
||||
# 1) Main's push-required contexts must be green. Combined state can be
|
||||
# "failure" due to non-blocking jobs (continue-on-error: true) that do
|
||||
# not gate merges, so check the explicit required set, not combined.
|
||||
#
|
||||
# This main-green gate is ALSO the serialized backstop that makes the
|
||||
# direct-merge (no update) path safe (issue #2358): after a direct merge
|
||||
# of a behind-main PR, main re-runs its push CI; if a semantic main-break
|
||||
# slips through (PR green standalone but broken when combined with newer
|
||||
# main), main's required contexts go red and this gate PAUSES the queue —
|
||||
# no further merge piles onto an unverified/red main until it is green.
|
||||
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
|
||||
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
|
||||
if not main_ok:
|
||||
return MergeDecision(False, "pause", "main required contexts not green: " + ", ".join(main_bad))
|
||||
|
||||
# 2) No open official REQUEST_CHANGES on the current head.
|
||||
# 2) PR head must contain current main.
|
||||
if not pr_has_current_base:
|
||||
return MergeDecision(False, "update", "PR head does not contain current main")
|
||||
|
||||
# 3) No open official REQUEST_CHANGES on the current head.
|
||||
if request_changes:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
"open REQUEST_CHANGES on current head from: " + ", ".join(sorted(request_changes)),
|
||||
)
|
||||
|
||||
# 3) Enough distinct genuine official approvals on the current head.
|
||||
# 4) Enough distinct genuine official approvals on the current head.
|
||||
if len(approvers) < required_approvals:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
@@ -643,63 +624,26 @@ def evaluate_merge_readiness(
|
||||
f"need {required_approvals}",
|
||||
)
|
||||
|
||||
# 4) Every REQUIRED status context must be green. This includes both
|
||||
# branch-protection-required contexts AND the hardcoded governance checks
|
||||
# (qa-review, security-review, sop-checklist). NON-required reds (E2E
|
||||
# Chat, Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# 5) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
|
||||
# the authoritative status gate — NON-required reds (qa-review,
|
||||
# security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
|
||||
# Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# consulted here and must not block.
|
||||
latest = latest_statuses_by_context(pr_status.get("statuses") or [])
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts)
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
|
||||
if not ok:
|
||||
return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
|
||||
|
||||
# 5) DIRECT-MERGE when conflict-free (issue #2358 — throughput fix).
|
||||
# If Gitea reports the PR conflict-free (mergeable is True), MERGE IT
|
||||
# DIRECTLY even if its head does not yet contain current main. Branch
|
||||
# protection does NOT require strict up-to-date, so a behind-main but
|
||||
# conflict-free PR merges cleanly. We deliberately do NOT call
|
||||
# /pulls/{n}/update first: update triggers Gitea dismiss_stale_approvals,
|
||||
# which would dismiss the PR's genuine approvals and force a full
|
||||
# re-review every tick — the rebase-churn bottleneck that collapsed
|
||||
# throughput to ~0/hr with dozens of mergeable PRs open.
|
||||
#
|
||||
# The merge bar is UNCHANGED: we only reach here with main green +
|
||||
# >= required genuine approvals on the current head + no open
|
||||
# REQUEST_CHANGES + every BP-required context green. The trade-off is
|
||||
# that the PR's CI ran on a possibly-behind base, so a SEMANTIC main-break
|
||||
# is caught by POST-merge main CI (step 1's pause backstop) rather than
|
||||
# pre-merge. force_merge is used ONLY for missing-but-non-required
|
||||
# governance reds (required are green + approvals genuine), never to
|
||||
# bypass a failing required context or an approval shortfall.
|
||||
if mergeable is True:
|
||||
force = _non_required_red_present(latest, required_contexts)
|
||||
return MergeDecision(True, "merge", "ready", force=force)
|
||||
# 6) Gitea must consider the PR mergeable (no conflicts).
|
||||
if not mergeable:
|
||||
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
|
||||
|
||||
# 6) NOT (yet) mergeable. TRI-STATE, fail-closed — never merge on an unknown.
|
||||
# We MUST distinguish "still computing" (None/missing) from a "definitive
|
||||
# conflict" (False); collapsing them would route a behind-main but
|
||||
# STILL-COMPUTING PR into the /update path, whose dismiss_stale_approvals
|
||||
# is the rebase-churn this change eliminates.
|
||||
#
|
||||
# mergeable is None → Gitea has NOT finished computing conflict state.
|
||||
# WAIT: do nothing this tick — never /update (would dismiss genuine
|
||||
# approvals during the compute window → churn), never merge. Re-check next
|
||||
# tick once Gitea reports a decisive True/False.
|
||||
if mergeable is None:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
"PR mergeability is still being computed (mergeable=None) — waiting",
|
||||
)
|
||||
|
||||
# mergeable is False → DEFINITIVE not-mergeable. If the head also does not
|
||||
# contain current main, try the /update path to refresh the branch (this
|
||||
# may resolve a behind-main non-conflict; a real conflict returns HTTP 409
|
||||
# and process_once HOLDs the PR per #2352). If the head already contains
|
||||
# current main yet Gitea still reports not-mergeable, there is nothing the
|
||||
# queue can do (genuine conflict against current main) — WAIT.
|
||||
if not pr_has_current_base:
|
||||
return MergeDecision(False, "update", "PR not mergeable and head does not contain current main")
|
||||
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
|
||||
# Ready. Use force_merge ONLY if the merge would otherwise be blocked by
|
||||
# missing-but-non-required governance contexts. Required are green and
|
||||
# approvals are genuine, so force only bypasses non-required reds — never a
|
||||
# failing required context or missing approval.
|
||||
force = _non_required_red_present(latest, required_contexts)
|
||||
return MergeDecision(True, "merge", "ready", force=force)
|
||||
|
||||
|
||||
def get_branch_head(branch: str) -> str:
|
||||
@@ -715,23 +659,32 @@ def get_combined_status(sha: str) -> dict:
|
||||
"""Combined status + all individual statuses for `sha`.
|
||||
|
||||
The /status endpoint caps the `statuses` array at 30 entries (Gitea
|
||||
default page size), so we fetch the full list via /statuses. The combined
|
||||
`state` still comes from /status.
|
||||
default page size), so we fetch the full list via /statuses with a
|
||||
higher limit. The combined `state` still comes from /status.
|
||||
|
||||
Fail-closed: BOTH the PRIMARY /status fetch AND the SECONDARY /statuses
|
||||
enrichment must succeed. If either raises, the error propagates so the
|
||||
caller skips this PR this tick (we never treat a failed status fetch as
|
||||
green — dev-sop "no fail-open"). A paginated /statuses error must NOT
|
||||
silently degrade to an incomplete status set.
|
||||
Fail-closed: the PRIMARY /status fetch must succeed. If it raises, the
|
||||
error propagates so the caller skips this PR this tick (we never treat a
|
||||
failed status fetch as green — dev-sop "no fail-open"). Only the SECONDARY
|
||||
/statuses enrichment (which merely extends the per-context list beyond the
|
||||
30-entry cap) is best-effort; if it fails we still have the combined set.
|
||||
"""
|
||||
_, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
|
||||
if not isinstance(combined, dict):
|
||||
raise ApiError(f"status for {sha} response not object")
|
||||
combined_statuses: list[dict] = combined.get("statuses") or []
|
||||
all_statuses = api_paginated(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
|
||||
)
|
||||
try:
|
||||
_, all_statuses_raw = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/commits/{sha}/statuses",
|
||||
query={"limit": "50"},
|
||||
)
|
||||
if isinstance(all_statuses_raw, list):
|
||||
all_statuses: list[dict] = list(all_statuses_raw)
|
||||
else:
|
||||
all_statuses = []
|
||||
except (ApiError, urllib.error.URLError, TimeoutError, OSError) as exc:
|
||||
sys.stderr.write(f"::warning::could not fetch full statuses list for {sha[:8]}: {exc}\n")
|
||||
all_statuses = []
|
||||
# Build latest per context: process combined (ascending→reverse=newest
|
||||
# first), then fill gaps from all_statuses (already newest-first).
|
||||
latest: dict[str, dict] = {}
|
||||
@@ -748,15 +701,19 @@ def get_combined_status(sha: str) -> dict:
|
||||
|
||||
|
||||
def list_queued_issues() -> list[dict]:
|
||||
return api_paginated(
|
||||
_, body = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={
|
||||
"state": "open",
|
||||
"type": "pulls",
|
||||
"label": QUEUE_LABEL,
|
||||
"labels": QUEUE_LABEL,
|
||||
"limit": "50",
|
||||
},
|
||||
)
|
||||
if not isinstance(body, list):
|
||||
raise ApiError("queued issues response not list")
|
||||
return body
|
||||
|
||||
|
||||
def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
|
||||
@@ -770,14 +727,18 @@ def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
|
||||
"""
|
||||
if not auto_discover:
|
||||
return list_queued_issues()
|
||||
return api_paginated(
|
||||
_, body = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={
|
||||
"state": "open",
|
||||
"type": "pulls",
|
||||
"limit": "50",
|
||||
},
|
||||
)
|
||||
if not isinstance(body, list):
|
||||
raise ApiError("candidate issues response not list")
|
||||
return body
|
||||
|
||||
|
||||
def get_pull(pr_number: int) -> dict:
|
||||
@@ -916,9 +877,7 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
f"unavailable (fail-closed): {exc}\n"
|
||||
)
|
||||
return 0
|
||||
# Uniform gate: governance checks are ALWAYS required, even if branch
|
||||
# protection does not enumerate them. Deduplicate against BP list.
|
||||
contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
|
||||
contexts = bp.required_contexts
|
||||
required_approvals = bp.required_approvals
|
||||
print(
|
||||
f"::notice::queue policy from branch protection: "
|
||||
@@ -1105,24 +1064,16 @@ def _evaluate_candidate(
|
||||
# never treated as green).
|
||||
pr_status = get_combined_status(head_sha)
|
||||
pr_labels = label_names(pr)
|
||||
# FAIL-CLOSED, TRI-STATE: Gitea returns mergeable=None (or omits the field)
|
||||
# while it is still COMPUTING conflict state, mergeable=False for a definitive
|
||||
# conflict, and mergeable=True only when it has proven the PR conflict-free.
|
||||
# We preserve all THREE states (do NOT collapse None/missing into False):
|
||||
# - True → direct-merge eligible (step 5).
|
||||
# - None / missing → still computing → WAIT (never merge, never update,
|
||||
# never dismiss approvals); re-check next tick.
|
||||
# - False → definitive conflict → the update/hold path (step 6).
|
||||
# Collapsing None→False would route a behind-main but STILL-COMPUTING PR into
|
||||
# the /update path, which triggers dismiss_stale_approvals — the exact
|
||||
# rebase-churn this change eliminates. Normalize only to the literal True /
|
||||
# False / None set (some Gitea versions omit the key entirely → None).
|
||||
raw_mergeable = pr.get("mergeable")
|
||||
mergeable: bool | None = raw_mergeable if isinstance(raw_mergeable, bool) else None
|
||||
# FAIL-CLOSED: Gitea returns mergeable=None (or omits the field) while it is
|
||||
# still COMPUTING conflict state. Only the literal True is decisive proof the
|
||||
# PR is conflict-free; None and False both mean "not (yet) mergeable". We must
|
||||
# NOT autonomously merge on an unknown — treat anything but True as not-yet-
|
||||
# mergeable so evaluate_merge_readiness returns a "wait" decision.
|
||||
mergeable = pr.get("mergeable") is True
|
||||
|
||||
reviews = get_pull_reviews(pr_number)
|
||||
approvers, request_changes = genuine_approvals(
|
||||
reviews, headsha=head_sha, reviewer_set=REVIEWER_SET
|
||||
reviews, head_sha=head_sha, reviewer_set=REVIEWER_SET
|
||||
)
|
||||
|
||||
decision = evaluate_merge_readiness(
|
||||
@@ -1139,138 +1090,26 @@ def _evaluate_candidate(
|
||||
return decision, ctx
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class ReadinessEntry:
|
||||
"""One candidate's readiness state."""
|
||||
|
||||
pr_number: int
|
||||
decision: MergeDecision | None
|
||||
reason: str
|
||||
|
||||
|
||||
def enumerate_readiness(*, dry_run: bool = False) -> list[ReadinessEntry]:
|
||||
"""Evaluate ALL candidates and return their readiness states.
|
||||
|
||||
Fail-closed: if branch protection cannot be fetched, raise
|
||||
BranchProtectionUnavailable (caller must handle). Unlike
|
||||
process_once, this does NOT stop at the first actionable candidate;
|
||||
it evaluates every eligible PR and returns the full list so a
|
||||
post-batch summary can be printed.
|
||||
"""
|
||||
bp = get_branch_protection(WATCH_BRANCH)
|
||||
# Uniform gate: governance checks are ALWAYS required, even if branch
|
||||
# protection does not enumerate them. Deduplicate against BP list.
|
||||
contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
|
||||
required_approvals = bp.required_approvals
|
||||
|
||||
main_sha = get_branch_head(WATCH_BRANCH)
|
||||
main_status = get_combined_status(main_sha)
|
||||
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
|
||||
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
|
||||
|
||||
candidates = choose_candidate_issues(
|
||||
list_candidate_issues(auto_discover=AUTO_DISCOVER),
|
||||
queue_label=QUEUE_LABEL,
|
||||
opt_out_labels=OPT_OUT_LABELS,
|
||||
auto_discover=AUTO_DISCOVER,
|
||||
)
|
||||
|
||||
entries: list[ReadinessEntry] = []
|
||||
for issue in candidates:
|
||||
pr_number = int(issue["number"])
|
||||
try:
|
||||
decision, ctx = _evaluate_candidate(
|
||||
issue,
|
||||
main_sha=main_sha,
|
||||
main_status=main_status,
|
||||
required_contexts=contexts,
|
||||
required_approvals=required_approvals,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
except ApiError as exc:
|
||||
# Fail-closed per candidate: an unreadable PR is recorded as
|
||||
# unverifiable, not skipped silently.
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=None,
|
||||
reason=f"unverifiable (API error: {exc})",
|
||||
)
|
||||
)
|
||||
continue
|
||||
if decision is None:
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=None,
|
||||
reason="not merge-eligible (opt-out/draft/fork/wrong-base)",
|
||||
)
|
||||
)
|
||||
continue
|
||||
entries.append(
|
||||
ReadinessEntry(
|
||||
pr_number=pr_number,
|
||||
decision=decision,
|
||||
reason=decision.reason,
|
||||
)
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def print_post_batch_summary(entries: list[ReadinessEntry]) -> None:
|
||||
"""Print a structured summary of all candidates' readiness.
|
||||
|
||||
Emits ::notice:: lines for machine parsing and a human-readable
|
||||
block for operator visibility.
|
||||
"""
|
||||
ready = [e for e in entries if e.decision and e.decision.ready]
|
||||
waiting = [e for e in entries if e.decision and not e.decision.ready]
|
||||
ineligible = [e for e in entries if e.decision is None]
|
||||
|
||||
print("::group::merge-queue readiness summary")
|
||||
print(f"total_candidates={len(entries)}")
|
||||
print(f"ready={len(ready)}")
|
||||
print(f"waiting={len(waiting)}")
|
||||
print(f"ineligible/unverifiable={len(ineligible)}")
|
||||
print("")
|
||||
for e in entries:
|
||||
state = "ready" if e.decision and e.decision.ready else (
|
||||
"waiting" if e.decision else "ineligible"
|
||||
)
|
||||
action = e.decision.action if e.decision else "n/a"
|
||||
print(f"PR #{e.pr_number}: state={state} action={action} reason={e.reason}")
|
||||
print("::endgroup::")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--dry-run", action="store_true")
|
||||
parser.add_argument(
|
||||
"--enumerate",
|
||||
action="store_true",
|
||||
help="Evaluate all candidates and print a readiness summary without merging.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
_require_runtime_env()
|
||||
try:
|
||||
if args.enumerate:
|
||||
entries = enumerate_readiness(dry_run=args.dry_run)
|
||||
print_post_batch_summary(entries)
|
||||
return 0
|
||||
return process_once(dry_run=args.dry_run)
|
||||
except ApiError as exc:
|
||||
# FAIL-CLOSED: API errors are not "transient success" — they mean
|
||||
# the queue could not evaluate merge state. Returning 0 hides
|
||||
# persistent infra issues (auth drift, endpoint outages) from
|
||||
# operators. Return 1 so the cron job surfaces red and paging fires.
|
||||
# API errors (401/403/404/500) are transient for a queue tick —
|
||||
# log and exit 0 so the workflow is not marked failed and the next
|
||||
# tick can retry. Returning non-zero would permanently fail the
|
||||
# workflow run, blocking future ticks.
|
||||
sys.stderr.write(f"::error::queue API error: {exc}\n")
|
||||
return 1
|
||||
return 0
|
||||
except urllib.error.URLError as exc:
|
||||
sys.stderr.write(f"::error::queue network error: {exc}\n")
|
||||
return 1
|
||||
return 0
|
||||
except TimeoutError as exc:
|
||||
sys.stderr.write(f"::error::queue timeout: {exc}\n")
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -165,7 +165,7 @@ def api(
|
||||
# Format: "<workflow_name> / <job_name_or_key> (<event>)"
|
||||
# Examples observed on molecule-core/main:
|
||||
# "Secret scan / Scan diff for credential-shaped strings (pull_request)"
|
||||
# "sop-checklist / all-items-acked (pull_request)"
|
||||
# "sop-tier-check / tier-check (pull_request)"
|
||||
#
|
||||
# Split strategy: peel off the trailing ` (<event>)` first, then split
|
||||
# the leading `<workflow> / <rest>` on the FIRST ` / ` (workflow names
|
||||
|
||||
@@ -17,7 +17,7 @@ Rules (4 fatal + 1 fatal cross-file + 1 heuristic-warn):
|
||||
enumeration; task #81). Workflow registers, fires for 0 events.
|
||||
3. `name:` containing `/` — breaks the
|
||||
`<workflow> / <job> (<event>)` commit-status context convention;
|
||||
downstream parsers (sop-checklist, status-reaper) tokenize on `/`.
|
||||
downstream parsers (sop-tier-check, status-reaper) tokenize on `/`.
|
||||
4. `name:` collision across files — Gitea routes commit-status updates
|
||||
by `name` and behavior on collision is undefined (status-reaper
|
||||
rev1 fail-loud).
|
||||
@@ -150,7 +150,7 @@ def check_name_with_slash(filename: str, doc: Any) -> list[str]:
|
||||
f"::error file={filename}::Rule 3 (FATAL): workflow `name: "
|
||||
f"{name!r}` contains `/`. The commit-status context convention "
|
||||
f"is `<workflow> / <job> (<event>)`; embedding `/` in the "
|
||||
f"workflow name makes downstream parsers (sop-checklist, "
|
||||
f"workflow name makes downstream parsers (sop-tier-check, "
|
||||
f"status-reaper) tokenize ambiguously. Rename to use `-` or "
|
||||
f"` ` instead."
|
||||
)
|
||||
|
||||
@@ -49,7 +49,8 @@ Daily scheduled run + workflow_dispatch:
|
||||
4. If orphans exist:
|
||||
- File or PATCH a `[ci-bp-drift]` issue (idempotency contract:
|
||||
search for exact title prefix, edit existing if open).
|
||||
- Apply label `ci-bp-drift` (lookup ID per repo).
|
||||
- Apply labels `tier:high` + `ci-bp-drift` (lookup IDs per
|
||||
repo; per `feedback_tier_label_ids_are_per_repo`).
|
||||
- Exit 1.
|
||||
|
||||
5. If no orphans:
|
||||
@@ -81,7 +82,7 @@ Memory cross-links
|
||||
------------------
|
||||
- internal#350 (the RFC that specs this lint)
|
||||
- feedback_phantom_required_check_after_gitea_migration
|
||||
- feedback_label_ids_are_per_repo
|
||||
- feedback_tier_label_ids_are_per_repo
|
||||
- reference_post_suspension_pipeline
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -358,7 +359,7 @@ def file_or_update_issue(
|
||||
existing = h
|
||||
break
|
||||
|
||||
label_ids = _ensure_labels(repo, ["ci-bp-drift"])
|
||||
label_ids = _ensure_labels(repo, ["ci-bp-drift", "tier:high"])
|
||||
|
||||
if existing:
|
||||
api(
|
||||
|
||||
@@ -305,9 +305,9 @@ def validate_tracker(
|
||||
if status == "error":
|
||||
sys.stderr.write(
|
||||
f"::error::issue {slug}#{num} fetch errored — treating as "
|
||||
f"unverified, FAILING CLOSED (do not skip on outage).\n"
|
||||
f"unverified, skipping this check.\n"
|
||||
)
|
||||
return (False, f"{slug}#{num} fetch errored — cannot verify tracker")
|
||||
return (True, "fetch-error — skipped")
|
||||
|
||||
assert payload is not None
|
||||
state = payload.get("state", "")
|
||||
|
||||
@@ -546,24 +546,16 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
|
||||
|
||||
shas = recent_commits_on_branch(branch, n)
|
||||
if not shas:
|
||||
result["masked_runs"].append({
|
||||
"sha": "",
|
||||
"status": "unverified",
|
||||
"target_url": "",
|
||||
"samples": [f"no recent commits on {branch} — cannot verify flip"],
|
||||
})
|
||||
result["warnings"].append(
|
||||
f"no recent commits on {branch} (cannot verify flip)"
|
||||
)
|
||||
return result
|
||||
|
||||
for sha in shas:
|
||||
try:
|
||||
status_doc = combined_status(sha)
|
||||
except ApiError as e:
|
||||
result["masked_runs"].append({
|
||||
"sha": sha,
|
||||
"status": "error",
|
||||
"target_url": "",
|
||||
"samples": [f"combined-status API error: {e}"],
|
||||
})
|
||||
result["warnings"].append(f"combined-status for {sha}: {e}")
|
||||
continue
|
||||
statuses = status_doc.get("statuses") or []
|
||||
# First entry matching the context name. Newest SHAs come
|
||||
@@ -590,17 +582,6 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
|
||||
"target_url": target_url,
|
||||
"samples": ["[log unavailable; status itself is " + state + "]"],
|
||||
})
|
||||
elif state == "success":
|
||||
# Fail-closed: unreadable log on a success status is a
|
||||
# potential Quirk #10 mask (continue-on-error hiding real
|
||||
# failures). We cannot verify it's clean, so treat as
|
||||
# masked rather than allowing the flip.
|
||||
result["masked_runs"].append({
|
||||
"sha": sha,
|
||||
"status": state,
|
||||
"target_url": target_url,
|
||||
"samples": ["[log unavailable; cannot verify status is genuine — treat as masked]"],
|
||||
})
|
||||
break
|
||||
samples = grep_fail_markers(log_text)
|
||||
if state in ("failure", "error"):
|
||||
@@ -624,12 +605,10 @@ def verify_flip(flip: dict, branch: str, n: int) -> dict:
|
||||
break
|
||||
|
||||
if result["checked_commits"] == 0:
|
||||
result["masked_runs"].append({
|
||||
"sha": "",
|
||||
"status": "unverified",
|
||||
"target_url": "",
|
||||
"samples": [f"no runs of {target_context!r} found in the last {n} commits on {branch} — cannot verify flip"],
|
||||
})
|
||||
result["warnings"].append(
|
||||
f"no runs of {target_context!r} found in the last {n} commits on "
|
||||
f"{branch} — cannot verify; allowing flip with warning"
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ runtime contract enforcement lives in `_require_runtime_env()`.
|
||||
|
||||
Run locally (dry-run, no API mutation):
|
||||
GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\
|
||||
WATCH_BRANCH=main RED_LABEL=ci-bp-drift \\
|
||||
WATCH_BRANCH=main RED_LABEL=tier:high \\
|
||||
python3 .gitea/scripts/main-red-watchdog.py --dry-run
|
||||
"""
|
||||
from __future__ import annotations
|
||||
@@ -81,7 +81,7 @@ GITEA_TOKEN = _env("GITEA_TOKEN")
|
||||
GITEA_HOST = _env("GITEA_HOST")
|
||||
REPO = _env("REPO")
|
||||
WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
|
||||
RED_LABEL = _env("RED_LABEL", default="ci-bp-drift")
|
||||
RED_LABEL = _env("RED_LABEL", default="tier:high")
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
|
||||
@@ -95,27 +95,17 @@ def build_plan(env: dict[str, str]) -> dict:
|
||||
|
||||
|
||||
def latest_status_for_context(statuses: list[dict], context: str) -> dict | None:
|
||||
"""Return the NEWEST status row for ``context`` (highest ``id``).
|
||||
"""Return the first matching status.
|
||||
|
||||
This must work for BOTH orderings Gitea exposes: the combined
|
||||
``/status`` view is newest-first, but the exhaustively-paginated
|
||||
``/statuses`` list (see ``fetch_all_statuses``) is ascending id order
|
||||
(oldest-first). Selecting by max ``id`` collapses duplicate context rows
|
||||
to the current one regardless of input order, so a stale earlier run can
|
||||
never shadow the latest result. Rows without an ``id`` are treated as
|
||||
oldest (id -1) so a well-formed newer row always wins.
|
||||
Gitea's combined-status response is newest-first in practice. The merge
|
||||
queue relies on the same contract; keeping the selector explicit makes
|
||||
stale duplicate contexts easy to test.
|
||||
"""
|
||||
newest: dict | None = None
|
||||
newest_id = -1
|
||||
|
||||
for status in statuses:
|
||||
if status.get("context") != context:
|
||||
continue
|
||||
raw_id = status.get("id")
|
||||
sid = raw_id if isinstance(raw_id, int) else -1
|
||||
if newest is None or sid >= newest_id:
|
||||
newest = status
|
||||
newest_id = sid
|
||||
return newest
|
||||
if status.get("context") == context:
|
||||
return status
|
||||
return None
|
||||
|
||||
|
||||
def ci_context_state(statuses: list[dict], context: str) -> str:
|
||||
@@ -361,55 +351,6 @@ def _api_json(url: str, token: str) -> dict:
|
||||
raise RuntimeError(f"GET {url} -> HTTP {exc.code}: {body}") from exc
|
||||
|
||||
|
||||
def _api_json_list(url: str, token: str) -> list:
|
||||
"""GET a Gitea list endpoint and return the JSON array.
|
||||
|
||||
Like ``_api_json`` but asserts the body is a list. Fail-closed: a non-list
|
||||
body (or HTTP error) raises so the caller never mistakes an unreadable page
|
||||
for "no more statuses" and silently truncates the required-context scan.
|
||||
"""
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
body = json.loads(resp.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")[:500]
|
||||
raise RuntimeError(f"GET {url} -> HTTP {exc.code}: {detail}") from exc
|
||||
if not isinstance(body, list):
|
||||
raise RuntimeError(f"GET {url} -> expected JSON array, got {type(body).__name__}")
|
||||
return body
|
||||
|
||||
|
||||
def fetch_all_statuses(host: str, repo: str, sha: str, token: str, page_size: int = 100) -> list[dict]:
|
||||
"""Return EVERY commit-status row for ``sha``, paginating to exhaustion.
|
||||
|
||||
The combined ``/commits/{sha}/status`` endpoint caps its embedded
|
||||
``statuses`` array at the Gitea default page size (~30). On a high-churn
|
||||
commit, an older-but-still-current required-context SUCCESS row is pushed
|
||||
PAST that cap, so a reader of the combined view sees the required context
|
||||
as ``missing`` and either blocks (force-merge audit) or waits forever
|
||||
(this deploy gate). We instead walk ``/commits/{sha}/statuses`` page by
|
||||
page until a short/empty page, accumulating ALL rows.
|
||||
|
||||
Fail-closed: any page that errors or is not a list raises (see
|
||||
``_api_json_list``) — we never degrade to a partial list and call a deploy
|
||||
green. A genuinely-absent required context simply never appears on ANY
|
||||
page, so the caller's ``ci_context_state`` still reports ``missing`` and
|
||||
the gate stays closed.
|
||||
"""
|
||||
base = f"https://{host}/api/v1/repos/{repo}/commits/{sha}/statuses"
|
||||
results: list[dict] = []
|
||||
page = 1
|
||||
while True:
|
||||
page_url = f"{base}?page={page}&limit={page_size}"
|
||||
rows = _api_json_list(page_url, token)
|
||||
results.extend(r for r in rows if isinstance(r, dict))
|
||||
if len(rows) < page_size:
|
||||
break
|
||||
page += 1
|
||||
return results
|
||||
|
||||
|
||||
def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
try:
|
||||
@@ -531,19 +472,12 @@ def wait_for_ci_context(env: dict[str, str]) -> str:
|
||||
if not token:
|
||||
raise ValueError("GITEA_TOKEN is required to wait for CI status")
|
||||
|
||||
url = f"https://{host}/api/v1/repos/{repo}/commits/{sha}/status"
|
||||
deadline = time.time() + timeout
|
||||
last_states: dict[str, str] = {}
|
||||
while time.time() <= deadline:
|
||||
# Read the FULL, exhaustively-paginated /statuses list — NOT the
|
||||
# combined /status view, whose embedded `statuses` array is capped at
|
||||
# the Gitea page size (~30). On a high-churn commit a required-context
|
||||
# SUCCESS row lands past that cap and the combined view would report
|
||||
# it `missing`, so this gate would wait until timeout and refuse a
|
||||
# legitimate prod deploy. Fetching every page closes that hole.
|
||||
# Fail-closed is preserved: a genuinely-absent required context is on
|
||||
# NO page, so ci_context_state() still returns "missing" → never
|
||||
# satisfied → the deploy stays blocked.
|
||||
statuses = fetch_all_statuses(host, repo, sha, token)
|
||||
body = _api_json(url, token)
|
||||
statuses = body.get("statuses") or []
|
||||
states = {context: ci_context_state(statuses, context) for context in contexts}
|
||||
for context, state in states.items():
|
||||
if state != last_states.get(context):
|
||||
|
||||
@@ -197,13 +197,17 @@ if [ "$HTTP_CODE" != "200" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Filter via the SSOT fail-closed predicate in _approval_validator.py
|
||||
# (same module gitea-merge-queue.py imports). The jq filter is gone
|
||||
# entirely — any change to the predicate must be made in
|
||||
# _approval_validator.py. See SEV-1 internal#812 for the fail-closed
|
||||
# contract this closes.
|
||||
SCRIPT_DIR_HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
REVIEW_CANDIDATES=$(python3 "$SCRIPT_DIR_HERE/_review_check_filter.py" "$REVIEWS_JSON" "$PR_HEAD_SHA" "$PR_AUTHOR")
|
||||
# Filter: state=APPROVED, official=true, not-dismissed, non-author,
|
||||
# commit_id matches current PR head. All conditions are mandatory.
|
||||
JQ_FILTER='.[]
|
||||
| select(.state == "APPROVED")
|
||||
| select(.official == true)
|
||||
| select(.dismissed != true)
|
||||
| select(.user.login != $author)
|
||||
| select(.commit_id == $head)
|
||||
| .user.login'
|
||||
|
||||
REVIEW_CANDIDATES=$(jq -r --arg author "$PR_AUTHOR" --arg head "$PR_HEAD_SHA" "$JQ_FILTER" "$REVIEWS_JSON" | sort -u)
|
||||
debug "candidate non-author approvers: $(echo "$REVIEW_CANDIDATES" | tr '\n' ' ')"
|
||||
|
||||
if [ -z "$REVIEW_CANDIDATES" ]; then
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#
|
||||
# Flow:
|
||||
# 1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
|
||||
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, labels
|
||||
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, tier label
|
||||
# 3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
|
||||
# 4. For each checklist item:
|
||||
# a. Is the section marker present in PR body? (author answered)
|
||||
@@ -665,8 +665,8 @@ def load_config(path: str) -> dict[str, Any]:
|
||||
def _load_config_minimal(path: str) -> dict[str, Any]:
|
||||
"""Minimal YAML subset parser for our config shape.
|
||||
|
||||
Supports: top-level scalar:value, top-level map-of-map,
|
||||
top-level list of maps (items:), and within an
|
||||
Supports: top-level scalar:value, top-level map-of-map (e.g.
|
||||
tier_failure_mode), top-level list of maps (items:), and within an
|
||||
item map: scalars + lists of scalars. Does NOT support nested lists,
|
||||
YAML anchors, multi-doc, or flow style.
|
||||
"""
|
||||
@@ -835,7 +835,8 @@ def render_status(
|
||||
|
||||
state is "success" if every item has at least one valid ack
|
||||
(body section presence is informational only — peer-ack is the
|
||||
real gate).
|
||||
real gate). tier:low PRs receive state="success" (soft-fail — no
|
||||
acks required); the description carries "[info tier:low]" prefix.
|
||||
"""
|
||||
n = len(items)
|
||||
fully_acked = [
|
||||
@@ -862,16 +863,35 @@ def render_status(
|
||||
return state, " — ".join(desc_parts)
|
||||
|
||||
|
||||
def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
|
||||
"""Read tier label, return 'hard' or 'soft' per cfg.tier_failure_mode."""
|
||||
labels = pr.get("labels") or []
|
||||
tier_labels = [label.get("name", "") for label in labels if (label.get("name", "") or "").startswith("tier:")]
|
||||
mode_map = cfg.get("tier_failure_mode") or {}
|
||||
default_mode = cfg.get("default_mode", "hard")
|
||||
for tl in tier_labels:
|
||||
if tl in mode_map:
|
||||
return mode_map[tl]
|
||||
return default_mode
|
||||
|
||||
|
||||
def is_high_risk(pr: dict[str, Any], cfg: dict[str, Any]) -> bool:
|
||||
"""Return True when the PR is high-risk per RFC#450 Option C.
|
||||
|
||||
A PR is high-risk when it carries any label listed in cfg.high_risk_labels.
|
||||
A PR is high-risk when ANY of:
|
||||
- it carries the `tier:high` label (mechanically strictest tier), or
|
||||
- it carries any label listed in cfg.high_risk_labels.
|
||||
|
||||
High-risk PRs use `required_teams_high_risk` (when set on an item)
|
||||
instead of the default `required_teams`. Items without
|
||||
`required_teams_high_risk` are unaffected (the default applies).
|
||||
|
||||
Governance fix for internal#442 — closes the inconsistency between
|
||||
sop-tier-check (tier-aware) and sop-checklist (was tier-blind).
|
||||
"""
|
||||
label_set = {(label.get("name") or "") for label in (pr.get("labels") or [])}
|
||||
if "tier:high" in label_set:
|
||||
return True
|
||||
high_risk_labels = set(cfg.get("high_risk_labels") or [])
|
||||
return bool(label_set & high_risk_labels)
|
||||
|
||||
@@ -1149,6 +1169,13 @@ def main(argv: list[str] | None = None) -> int:
|
||||
body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}
|
||||
|
||||
state, description = render_status(items, ack_state, body_state)
|
||||
mode = get_tier_mode(pr, cfg)
|
||||
if mode == "soft":
|
||||
# tier:low: acks are informational only — post success so BP gate passes.
|
||||
# Description carries "[info tier:low]" prefix so reviewers know acks
|
||||
# were not required (vs a tier:medium+ PR that truly passed all acks).
|
||||
state = "success"
|
||||
description = f"[info tier:low] {description}"
|
||||
if volume_skipped:
|
||||
# Above the comment-cap — we may have a partial view. Soft-pend
|
||||
# so neither BP nor the author gets stuck; surface the cap so
|
||||
@@ -1162,7 +1189,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
# Diagnostics to job log.
|
||||
print(
|
||||
f"::notice::PR #{args.pr} author={author} head={head_sha[:7]} "
|
||||
f"risk_class={'high' if high_risk else 'default'}"
|
||||
f"mode={mode} risk_class={'high' if high_risk else 'default'}"
|
||||
)
|
||||
for it in items:
|
||||
slug = it["slug"]
|
||||
|
||||
Executable
+427
@@ -0,0 +1,427 @@
|
||||
#!/usr/bin/env bash
|
||||
# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
|
||||
#
|
||||
# Reads the PR's tier label, walks approving reviewers, and checks team
|
||||
# membership against the tier's approval expression. Passes only when
|
||||
# ALL clauses in the expression are satisfied by the set of approving
|
||||
# reviewers (AND-composition; internal#189).
|
||||
#
|
||||
# Expression syntax:
|
||||
# "team-a" — OR-set: any ONE of the comma-separated teams
|
||||
# "team-a AND team-b" — AND: BOTH must each have ≥1 approver
|
||||
# "(a,b,c)" — OR-set wrapped in parens; same as "a,b,c"
|
||||
#
|
||||
# Example: "qa AND security AND (managers,ceo)" means:
|
||||
# ≥1 approver in team "qa" AND
|
||||
# ≥1 approver in team "security" AND
|
||||
# ≥1 approver in team "managers" OR "ceo"
|
||||
#
|
||||
# Per the spec (internal#189), the hard gate here pairs with the
|
||||
# advisory gate of sop-conformance LLM-judge (internal#188): each
|
||||
# required-team click must reflect real verification (visible in review
|
||||
# body or A2A messages), not rubber-stamp APPROVE. Both gates together
|
||||
# close the "teammate clicks APPROVE without verifying" gap.
|
||||
#
|
||||
# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
|
||||
# the env vars below; this script does no IO outside of stdout/stderr +
|
||||
# the Gitea API.
|
||||
#
|
||||
# Required env:
|
||||
# GITEA_TOKEN — bot PAT with read:organization,read:user,
|
||||
# read:issue,read:repository scopes
|
||||
# GITEA_HOST — e.g. git.moleculesai.app
|
||||
# REPO — owner/name (from github.repository)
|
||||
# PR_NUMBER — int (from github.event.pull_request.number)
|
||||
# PR_AUTHOR — login (from github.event.pull_request.user.login)
|
||||
#
|
||||
# Optional:
|
||||
# SOP_DEBUG=1 — print per-API-call diagnostic lines. Default: off.
|
||||
# SOP_LEGACY_CHECK=1 — revert to OR-gate (≥1 approver from any eligible
|
||||
# team). Grace window for PRs in-flight when the
|
||||
# new AND-composition was deployed. Expires 2026-05-17
|
||||
# (7-day burn-in window; internal#189 Phase 1).
|
||||
# Set by workflow for PRs merged before the deploy.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Ensure jq is available. Runners may not have it pre-installed, and the
|
||||
# workflow-level jq install can fail on runners with network restrictions
|
||||
# (GitHub releases not reachable from some runner networks — infra#241
|
||||
# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::notice::jq not found on PATH — attempting install..."
|
||||
_jq_installed="no"
|
||||
# apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
|
||||
if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
_jq_installed="yes"
|
||||
# GitHub binary as secondary fallback — may fail on restricted networks.
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq \
|
||||
&& chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
_jq_installed="yes"
|
||||
fi
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "::error::jq installation failed — apt-get and GitHub binary both failed."
|
||||
echo "::error::sop-tier-check requires jq for all JSON API parsing."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
debug() {
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
# Validate env
|
||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
||||
: "${REPO:?REPO required (owner/name)}"
|
||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
||||
: "${PR_AUTHOR:?PR_AUTHOR required}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
|
||||
|
||||
# Sanity: token resolves to a user.
|
||||
# Use || true on the jq pipeline so that set -euo pipefail (line 45) does not
|
||||
# cause the script to exit prematurely when the token is empty/invalid — the
|
||||
# if check below handles that case gracefully. Without || true, a 401 from an
|
||||
# empty/invalid token causes jq to exit 1, triggering set -e and exiting the
|
||||
# entire script before the error can be logged.
|
||||
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""') || true
|
||||
if [ -z "$WHOAMI" ]; then
|
||||
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::token resolves to user: $WHOAMI"
|
||||
|
||||
# 0.5 Read PR head SHA so we can reject stale approvals after head moves
|
||||
# (internal#816). Reviews carry the commit_id they were submitted against.
|
||||
HEAD_SHA=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}" | jq -r '.head.sha // ""') || true
|
||||
if [ -z "$HEAD_SHA" ]; then
|
||||
echo "::error::Failed to fetch PR head SHA — token may be invalid."
|
||||
exit 1
|
||||
fi
|
||||
debug "pr-head-sha=$HEAD_SHA"
|
||||
|
||||
# 1. Read tier label. || true ensures set -euo pipefail does not abort the
|
||||
# script if curl or jq fails (e.g. 401 from empty token).
|
||||
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
|
||||
TIER=""
|
||||
for L in $LABELS; do
|
||||
case "$L" in
|
||||
tier:low|tier:medium|tier:high)
|
||||
if [ -n "$TIER" ]; then
|
||||
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
|
||||
exit 1
|
||||
fi
|
||||
TIER="$L"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
if [ -z "$TIER" ]; then
|
||||
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
|
||||
exit 1
|
||||
fi
|
||||
debug "tier=$TIER"
|
||||
|
||||
# 2. Tier → required team expression (AND-composition; internal#189)
|
||||
#
|
||||
# Expression syntax:
|
||||
# clause-a AND clause-b AND ... — ALL clauses must pass
|
||||
# team-a,team-b,team-c — OR-set: ≥1 approver in ANY of these teams
|
||||
# (team-a,team-b) — same as team-a,team-b (parens optional)
|
||||
#
|
||||
# This map is the single source of truth. Update it when the team structure
|
||||
# or policy changes. Teams referenced here but absent in Gitea are treated
|
||||
# as unachievable (would always fail) — operators notice the clear error
|
||||
# and create the missing team.
|
||||
#
|
||||
# Current Gitea teams: ceo, engineers, managers
|
||||
# Future teams (create before removing "???" fallback): qa, security, security-audit
|
||||
declare -A TIER_EXPR=(
|
||||
# tier:low — same as previous OR gate: any engineer, manager, or ceo.
|
||||
["tier:low"]="engineers,managers,ceo"
|
||||
|
||||
# tier:medium — AND of (managers) AND (engineers) AND (qa???,security???)
|
||||
# The qa+security clause requires both teams to exist; when not yet
|
||||
# created, the PR author is responsible for adding them before requesting
|
||||
# approval on a tier:medium PR. Ops: create qa + security Gitea teams
|
||||
# and update this map to remove the "???" markers (internal#189 follow-up).
|
||||
["tier:medium"]="managers AND engineers AND qa???,security???"
|
||||
|
||||
# tier:high — ceo only. The AND-composition adds no value for a
|
||||
# single-team gate, but the framework is wired for consistency.
|
||||
["tier:high"]="ceo"
|
||||
)
|
||||
|
||||
EXPR="${TIER_EXPR[$TIER]-}"
|
||||
if [ -z "$EXPR" ]; then
|
||||
echo "::error::No expression defined for tier $TIER in TIER_EXPR map."
|
||||
exit 1
|
||||
fi
|
||||
debug "expression=$EXPR"
|
||||
|
||||
# 3. Legacy OR-gate override (7-day burn-in grace window; internal#189 Phase 1)
|
||||
if [ "${SOP_LEGACY_CHECK:-}" = "1" ]; then
|
||||
LEGACY_ELIGIBLE=""
|
||||
case "$TIER" in
|
||||
tier:low) LEGACY_ELIGIBLE="engineers managers ceo" ;;
|
||||
tier:medium) LEGACY_ELIGIBLE="managers ceo" ;;
|
||||
tier:high) LEGACY_ELIGIBLE="ceo" ;;
|
||||
esac
|
||||
echo "::notice::SOP_LEGACY_CHECK=1 — using OR-gate ({$LEGACY_ELIGIBLE}) for this PR."
|
||||
ELIGIBLE="$LEGACY_ELIGIBLE"
|
||||
fi
|
||||
|
||||
# 4. Resolve all team names → IDs
|
||||
# /orgs/{org}/teams/{slug}/... endpoints don't exist on Gitea 1.22;
|
||||
# we use /teams/{id}.
|
||||
# set +e prevents set -e from aborting the script if curl fails (e.g. empty token).
|
||||
ORG_TEAMS_FILE=$(mktemp)
|
||||
trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
|
||||
set +e
|
||||
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/orgs/${OWNER}/teams")
|
||||
_HTTP_EXIT=$?
|
||||
set -e
|
||||
debug "teams-list HTTP=$HTTP_CODE (curl exit=$_HTTP_EXIT) size=$(wc -c <"$ORG_TEAMS_FILE")"
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] teams-list body (first 300 chars):" >&2
|
||||
head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
|
||||
fi
|
||||
if [ "$_HTTP_EXIT" -ne 0 ] || [ "$HTTP_CODE" != "200" ]; then
|
||||
echo "::error::GET /orgs/${OWNER}/teams failed (curl exit=$_HTTP_EXIT HTTP=$HTTP_CODE) — token may lack read:org scope or be invalid."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Collect every team name that appears in the expression.
|
||||
# Bash word-splitting on $EXPR splits on spaces, so "AND" appears as a
|
||||
# token. We skip it explicitly.
|
||||
declare -A TEAM_ID
|
||||
_all_teams=""
|
||||
for _raw_clause in $EXPR; do
|
||||
# Strip parens and split on comma.
|
||||
_clause=${_raw_clause//[()]/}
|
||||
for _t in $(echo "$_clause" | tr ',' '\n'); do
|
||||
_t=$(echo "$_t" | tr -d '[:space:]')
|
||||
[ -z "$_t" ] && continue
|
||||
# Skip AND / OR operator tokens (bash word-split produced them from
|
||||
# spaces in the expression string).
|
||||
[ "$_t" = "AND" ] || [ "$_t" = "OR" ] && continue
|
||||
# Skip if already in set.
|
||||
case " $_all_teams " in
|
||||
*" $_t "*) ;; # already present
|
||||
*) _all_teams="${_all_teams} $_t " ;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
for _t in $_all_teams; do
|
||||
_t=$(echo "$_t" | tr -d ' ')
|
||||
[ -z "$_t" ] && continue
|
||||
_id=$(jq -r --arg t "$_t" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
|
||||
if [ -z "$_id" ] || [ "$_id" = "null" ]; then
|
||||
# "??" suffix marks teams that don't exist yet (tier:medium qa/security).
|
||||
# Treat as permanently failing clause; clear error message guides ops.
|
||||
if [[ "$_t" == *"???" ]]; then
|
||||
debug "team \"$_t\" not found (expected — pending team creation per internal#189)"
|
||||
continue
|
||||
fi
|
||||
_visible=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
|
||||
echo "::error::Team \"$_t\" referenced in tier $TIER expression but not found in org $OWNER. Teams visible: $_visible"
|
||||
exit 1
|
||||
fi
|
||||
TEAM_ID[$_t]="$_id"
|
||||
debug "team-id: $_t → $_id"
|
||||
done
|
||||
|
||||
# 5. Read approving reviewers. set +e disables set -e temporarily so that curl
|
||||
# failures (e.g. empty/invalid token → HTTP 401) do not abort the script before
|
||||
# set -e is restored immediately after.
|
||||
set +e
|
||||
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
|
||||
_REVIEWS_EXIT=$?
|
||||
set -e
|
||||
if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
|
||||
echo "::error::Failed to fetch reviews (curl exit=$_REVIEWS_EXIT) — token may be invalid or unreachable."
|
||||
exit 1
|
||||
fi
|
||||
APPROVERS=$(echo "$REVIEWS" | jq -r --arg head_sha "$HEAD_SHA" '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]') || true
|
||||
if [ -z "$APPROVERS" ]; then
|
||||
echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
|
||||
exit 1
|
||||
fi
|
||||
debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
|
||||
|
||||
# 6. For each approver: skip self-review; probe team membership by id.
|
||||
# Build $APPROVER_TEAMS[<user>]=space-surrounded team names (e.g. " managers ").
|
||||
# Pre/post spaces ensure case patterns *${_t}* match even when the name
|
||||
# is the first or last entry (bash case *word* needs delimiters on both sides).
|
||||
#
|
||||
# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
|
||||
#
|
||||
# This used to fall back to /orgs/{org}/members/{user} whenever every team
|
||||
# probe failed and credit any org member as a member of EVERY queried team.
|
||||
# That was a privilege-escalation: org membership is NOT team membership, so
|
||||
# a 403/visibility/token-scope gap on the team probes silently promoted a
|
||||
# plain org member to satisfy tier:high (ceo). An inability-to-verify became
|
||||
# an authorization GRANT. The fallback is REMOVED — org membership must never
|
||||
# satisfy a team-gated tier.
|
||||
#
|
||||
# A team-membership probe has exactly three meaningful outcomes:
|
||||
# 200 / 204 → the user IS a member of that team (credit it)
|
||||
# 404 → the user is definitively NOT a member (no credit, verified)
|
||||
# anything else (403 / 401 / 5xx / curl failure / non-numeric)
|
||||
# → membership CANNOT be read (cannot-verify)
|
||||
#
|
||||
# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
|
||||
# pass — and here, never an authorization grant), a cannot-verify outcome on
|
||||
# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
|
||||
# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
|
||||
# / unverifiable membership picture, because doing so could let an unverifiable
|
||||
# approver's clause silently fail-or-pass on incomplete data. Fix the token
|
||||
# scope (read:organization) or the runner network — not the gate.
|
||||
declare -A APPROVER_TEAMS
|
||||
_verify_failed="" # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
|
||||
for U in $APPROVERS; do
|
||||
[ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
|
||||
for T in "${!TEAM_ID[@]}"; do
|
||||
ID="${TEAM_ID[$T]}"
|
||||
set +e
|
||||
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/teams/${ID}/members/${U}")
|
||||
_curl_exit=$?
|
||||
set -e
|
||||
debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
|
||||
if [ "$_curl_exit" -ne 0 ]; then
|
||||
# curl itself failed (DNS, connection refused, timeout) — unreachable.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
|
||||
continue
|
||||
fi
|
||||
case "$CODE" in
|
||||
200|204)
|
||||
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
|
||||
debug "$U qualifies for team $T"
|
||||
;;
|
||||
404)
|
||||
# Definitively not a member of this team — a verified negative.
|
||||
debug "$U is NOT a member of team $T (verified 404)"
|
||||
;;
|
||||
*)
|
||||
# 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
|
||||
# member" and do NOT fall back to org membership. This is cannot-verify.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Fail-closed: if ANY membership probe could not be read, we cannot make an
|
||||
# authorization decision. Publish a loud cannot-verify / infra-failed status
|
||||
# and exit non-zero. Never grant the tier on unverifiable membership.
|
||||
if [ -n "$_verify_failed" ]; then
|
||||
echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
|
||||
echo "::error::Unreadable membership probe(s): ${_verify_failed}"
|
||||
echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
|
||||
echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 7. Evaluate the tier expression.
|
||||
#
|
||||
# legacy OR-gate: use the simplified loop from before internal#189.
|
||||
if [ -n "${LEGACY_ELIGIBLE:-}" ]; then
|
||||
OK=""
|
||||
for _u in "${!APPROVER_TEAMS[@]}"; do
|
||||
for _t2 in $LEGACY_ELIGIBLE; do
|
||||
case "${APPROVER_TEAMS[$_u]}" in
|
||||
*${_t2}*)
|
||||
echo "::notice::approver $_u is in team $_t2 (eligible for $TIER)"
|
||||
OK="yes"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
[ -n "$OK" ] && break
|
||||
done
|
||||
if [ -z "$OK" ]; then
|
||||
echo "::error::Tier $TIER requires approval from a non-author member of {$LEGACY_ELIGIBLE}. Set SOP_DEBUG=1 to see per-probe HTTP codes."
|
||||
exit 1
|
||||
fi
|
||||
echo "::notice::sop-tier-check passed: $TIER (legacy OR-gate)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# AND-gate: evaluate the expression clause by clause.
|
||||
# _passed_clauses and _failed_clauses accumulate for the status description.
|
||||
_passed_clauses=""
|
||||
_failed_clauses=""
|
||||
|
||||
for _raw_clause in $EXPR; do
|
||||
# Normalise: strip parens, replace commas with spaces so bash word-split
|
||||
# can iterate the OR-set members. The previous form
|
||||
# _clause=$(echo ... | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
|
||||
# collapsed every member into one concatenated token because
|
||||
# `tr -d '[:space:]'` strips the very newlines that just separated them
|
||||
# ("engineers,managers,ceo" -> "engineersmanagersceo"), so the OR-clause
|
||||
# only ever evaluated as a single nonsense team name and never matched
|
||||
# APPROVER_TEAMS. Fixed in #229: leave the comma-separated members as
|
||||
# space-separated tokens for `for _t in $_clause`.
|
||||
_no_parens=${_raw_clause//[()]/}
|
||||
_clause=${_no_parens//,/ }
|
||||
_clause_passed="no"
|
||||
_clause_names=""
|
||||
for _t in $_clause; do
|
||||
# Append (don't overwrite) team name to the human-readable accumulator.
|
||||
# The previous form `_clause_names="${_clause_names:+, }${_t}"`
|
||||
# rewrote the variable on every iteration, so the FAIL message only
|
||||
# ever showed the LAST team. Fixed: prepend prior value before the
|
||||
# comma-separator, then append the new team name.
|
||||
_clause_names="${_clause_names}${_clause_names:+, }${_t}"
|
||||
# Skip teams not yet in Gitea (qa??? / security??? placeholders).
|
||||
[[ "$_t" == *"???" ]] && debug "clause \"$_t\": skipped (team pending creation)" && continue
|
||||
[ -z "${TEAM_ID[$_t]:-}" ] && debug "clause \"$_t\": no ID resolved, skipping" && continue
|
||||
for _u in "${!APPROVER_TEAMS[@]}"; do
|
||||
# Note: APPROVER_TEAMS values are space-surrounded (e.g. " managers ").
|
||||
# Pattern *${_t}* matches team name anywhere in the space-padded string.
|
||||
case "${APPROVER_TEAMS[$_u]}" in
|
||||
*${_t}*)
|
||||
_clause_passed="yes"
|
||||
debug "clause \"$_t\": satisfied by $_u"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Label for display: strip "???" from pending teams.
|
||||
_label=$(echo "$_raw_clause" | tr -d '()' | tr ',' '/' | tr -d '[:space:]' | sed 's/???//g')
|
||||
|
||||
if [ "$_clause_passed" = "yes" ]; then
|
||||
# Append (don't overwrite) — same accumulator bug as _clause_names above.
|
||||
_passed_clauses="${_passed_clauses}${_passed_clauses:+, }$_label"
|
||||
echo "::notice::clause [$_label]: PASS — satisfied by approving reviewer(s)"
|
||||
else
|
||||
_failed_clauses="${_failed_clauses}${_failed_clauses:+, }$_label"
|
||||
echo "::error::clause [$_label]: FAIL — no approving reviewer belongs to any of these teams (${_clause_names}). Set SOP_DEBUG=1 to see per-team probe results."
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$_failed_clauses" ]; then
|
||||
echo ""
|
||||
echo "::error::sop-tier-check FAILED for $TIER."
|
||||
echo " Passed :${_passed_clauses}"
|
||||
echo " Missing:${_failed_clauses}"
|
||||
echo " All clauses must be satisfied. Each missing team needs an APPROVED review from one of its members."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-check PASSED: $TIER — all required clauses satisfied [${_passed_clauses}]"
|
||||
Executable
+199
@@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env bash
|
||||
# sop-tier-refire — re-evaluate sop-tier-check and POST status to PR head SHA.
|
||||
#
|
||||
# Invoked from `.gitea/workflows/sop-tier-refire.yml` when a repo
|
||||
# MEMBER/OWNER/COLLABORATOR comments `/refire-tier-check` on a PR.
|
||||
#
|
||||
# Behavior:
|
||||
#
|
||||
# 1. Resolve PR head SHA + author from PR_NUMBER.
|
||||
# 2. Rate-limit: if the sop-tier-check context has been POSTed in the
|
||||
# last 30 seconds, skip (prevents comment-spam status thrash).
|
||||
# 3. Invoke `.gitea/scripts/sop-tier-check.sh` with the same env the
|
||||
# canonical workflow provides. This is DRY: we re-use the exact AND-
|
||||
# composition gate logic, not a watered-down approving-count check.
|
||||
# 4. POST the resulting status (success on exit 0, failure on non-zero)
|
||||
# to `/repos/.../statuses/{HEAD_SHA}` with context
|
||||
# "sop-tier-check / tier-check (pull_request)" — the same context name
|
||||
# branch protection requires.
|
||||
#
|
||||
# Required env (set by sop-tier-refire.yml):
|
||||
# GITEA_TOKEN — org-level SOP_TIER_CHECK_TOKEN (read:org/user/issue/repo)
|
||||
# GITEA_HOST — e.g. git.moleculesai.app
|
||||
# REPO — owner/name
|
||||
# PR_NUMBER — PR number from issue_comment payload
|
||||
# COMMENT_AUTHOR — login of the commenter (logged for audit)
|
||||
#
|
||||
# Optional:
|
||||
# SOP_DEBUG=1 — verbose per-API-call diagnostics
|
||||
# SOP_REFIRE_RATE_LIMIT_SEC — override the 30s rate-limit (default 30)
|
||||
# SOP_REFIRE_DISABLE_RATE_LIMIT=1 — for tests; skips the rate-limit check
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
debug() {
|
||||
if [ "${SOP_DEBUG:-}" = "1" ]; then
|
||||
echo " [debug] $*" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
|
||||
: "${GITEA_HOST:?GITEA_HOST required}"
|
||||
: "${REPO:?REPO required (owner/name)}"
|
||||
: "${PR_NUMBER:?PR_NUMBER required}"
|
||||
: "${COMMENT_AUTHOR:=unknown}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
AUTH="Authorization: token ${GITEA_TOKEN}"
|
||||
CONTEXT="sop-tier-check / tier-check (pull_request)"
|
||||
RATE_LIMIT_SEC="${SOP_REFIRE_RATE_LIMIT_SEC:-30}"
|
||||
|
||||
echo "::notice::sop-tier-refire start: repo=$OWNER/$NAME pr=$PR_NUMBER commenter=$COMMENT_AUTHOR"
|
||||
|
||||
# 1. Fetch PR details — need head.sha and user.login.
|
||||
PR_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE"' EXIT
|
||||
PR_HTTP=$(curl -sS -o "$PR_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
if [ "$PR_HTTP" != "200" ]; then
|
||||
echo "::error::GET /pulls/$PR_NUMBER returned HTTP $PR_HTTP (body $(head -c 200 "$PR_FILE"))"
|
||||
exit 1
|
||||
fi
|
||||
HEAD_SHA=$(jq -r '.head.sha' <"$PR_FILE")
|
||||
PR_AUTHOR=$(jq -r '.user.login' <"$PR_FILE")
|
||||
PR_STATE=$(jq -r '.state' <"$PR_FILE")
|
||||
if [ -z "$HEAD_SHA" ] || [ "$HEAD_SHA" = "null" ]; then
|
||||
echo "::error::Could not resolve head.sha from PR #$PR_NUMBER response"
|
||||
exit 1
|
||||
fi
|
||||
debug "head_sha=$HEAD_SHA pr_author=$PR_AUTHOR state=$PR_STATE"
|
||||
|
||||
if [ "$PR_STATE" != "open" ]; then
|
||||
echo "::notice::PR #$PR_NUMBER state is $PR_STATE; refire is a no-op on closed PRs."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Rate-limit: skip if our context was updated in the last $RATE_LIMIT_SEC.
|
||||
# Gitea statuses endpoint returns latest first; we check the most recent
|
||||
# entry for our context name.
|
||||
if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
|
||||
STATUSES_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE" "$STATUSES_FILE"' EXIT
|
||||
ST_HTTP=$(curl -sS -o "$STATUSES_FILE" -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}?limit=50&sort=newest")
|
||||
debug "statuses-list HTTP=$ST_HTTP"
|
||||
if [ "$ST_HTTP" = "200" ]; then
|
||||
LAST_UPDATED=$(jq -r --arg c "$CONTEXT" \
|
||||
'[.[] | select(.context == $c)] | first | .updated_at // ""' \
|
||||
<"$STATUSES_FILE")
|
||||
if [ -n "$LAST_UPDATED" ] && [ "$LAST_UPDATED" != "null" ]; then
|
||||
# Parse RFC3339 → epoch. Use python -c for portability (date(1) -d
|
||||
# differs between BSD/GNU; the Gitea runner is Ubuntu so GNU date
|
||||
# works, but we keep python for future container variance).
|
||||
LAST_EPOCH=$(python3 -c "import sys,datetime;print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$LAST_UPDATED" 2>/dev/null || echo "0")
|
||||
NOW_EPOCH=$(date -u +%s)
|
||||
AGE=$((NOW_EPOCH - LAST_EPOCH))
|
||||
debug "last status update: $LAST_UPDATED ($AGE seconds ago)"
|
||||
if [ "$AGE" -lt "$RATE_LIMIT_SEC" ] && [ "$AGE" -ge 0 ]; then
|
||||
echo "::notice::sop-tier-refire rate-limited — last status update was ${AGE}s ago (<${RATE_LIMIT_SEC}s window). Try again shortly."
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# 3. Invoke sop-tier-check.sh with the env it expects.
|
||||
#
|
||||
# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
|
||||
# fix/core-ci-fail-closed). The previous shape was:
|
||||
# bash "$SCRIPT" || true
|
||||
# TIER_EXIT=0 # <-- hardcoded success
|
||||
# which discarded the real verdict and ALWAYS POSTed
|
||||
# `state=success` for the REQUIRED context
|
||||
# `sop-tier-check / tier-check (pull_request)`. That meant ANY
|
||||
# collaborator could comment `/refire-tier-check` to forcibly green
|
||||
# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
|
||||
# privilege bypass of branch protection. The canonical
|
||||
# pull_request_target workflow's conclusion publishes the same
|
||||
# context honestly (red on a real violation); the refire MUST mirror
|
||||
# THAT honesty, not a discarded exit code.
|
||||
#
|
||||
# We now capture the script's real exit code under `set +e` and POST
|
||||
# success ONLY when it actually exited 0. sop-tier-check.sh itself
|
||||
# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
|
||||
# so a bad token / unreachable API / missing jq → non-zero → we POST
|
||||
# `state=failure`, never a false green.
|
||||
#
|
||||
# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
|
||||
# bash 3.2 parser bug (`tier: unbound variable` from declare -A with
|
||||
# `set -u`). Linux Gitea runners ship bash 4/5 so production is fine;
|
||||
# the override exists so the bash 3.2 dev box can still exercise the
|
||||
# refire glue logic end-to-end.
|
||||
SCRIPT="${SOP_REFIRE_TIER_CHECK_SCRIPT:-$(dirname "$0")/sop-tier-check.sh}"
|
||||
if [ ! -f "$SCRIPT" ]; then
|
||||
echo "::error::sop-tier-check.sh not found at $SCRIPT — refire requires the canonical script"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Re-invoke. Pipe stdout/stderr through so the runner log shows the
|
||||
# tier-check decision inline. Capture the REAL exit code (set +e so a
|
||||
# non-zero verdict doesn't abort this script under set -e) — the POST
|
||||
# below keys off it, so a failed tier-check posts state=failure.
|
||||
set +e
|
||||
GITEA_TOKEN="$GITEA_TOKEN" \
|
||||
GITEA_HOST="$GITEA_HOST" \
|
||||
REPO="$REPO" \
|
||||
PR_NUMBER="$PR_NUMBER" \
|
||||
PR_AUTHOR="$PR_AUTHOR" \
|
||||
SOP_DEBUG="${SOP_DEBUG:-0}" \
|
||||
SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
|
||||
bash "$SCRIPT"
|
||||
TIER_EXIT=$?
|
||||
set -e
|
||||
debug "sop-tier-check.sh exit=$TIER_EXIT"
|
||||
|
||||
# 4. POST the resulting status.
|
||||
if [ "$TIER_EXIT" -eq 0 ]; then
|
||||
STATE="success"
|
||||
DESCRIPTION="Refired via /refire-tier-check by $COMMENT_AUTHOR"
|
||||
else
|
||||
STATE="failure"
|
||||
DESCRIPTION="Refired via /refire-tier-check; tier-check failed (see workflow log)"
|
||||
fi
|
||||
|
||||
# Status target_url points at the runner log so a curious reviewer can
|
||||
# follow it back. SERVER_URL + RUN_ID + JOB_ID isn't trivially constructible
|
||||
# from the bash env on Gitea 1.22.6, so we point at the PR itself.
|
||||
TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
|
||||
|
||||
POST_BODY=$(jq -nc \
|
||||
--arg state "$STATE" \
|
||||
--arg context "$CONTEXT" \
|
||||
--arg description "$DESCRIPTION" \
|
||||
--arg target_url "$TARGET_URL" \
|
||||
'{state:$state, context:$context, description:$description, target_url:$target_url}')
|
||||
|
||||
POST_FILE=$(mktemp)
|
||||
trap 'rm -f "$PR_FILE" "${STATUSES_FILE:-}" "$POST_FILE"' EXIT
|
||||
POST_HTTP=$(curl -sS -o "$POST_FILE" -w '%{http_code}' \
|
||||
-X POST -H "$AUTH" -H "Content-Type: application/json" \
|
||||
-d "$POST_BODY" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}")
|
||||
if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
|
||||
echo "::error::POST /statuses/$HEAD_SHA returned HTTP $POST_HTTP (body $(head -c 200 "$POST_FILE"))"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
|
||||
# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
|
||||
# an HONEST status. The gate VERDICT is carried by the POSTed status
|
||||
# ($STATE), which is what branch protection reads; a failing tier-check
|
||||
# posts state=failure (red on the PR), so there is no fail-open. We do
|
||||
# NOT also exit non-zero on a failing verdict — that would double-signal
|
||||
# the same failure as both a red status AND a red refire job. The
|
||||
# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
|
||||
# is fixed above by capturing the real exit code.
|
||||
exit 0
|
||||
@@ -689,8 +689,8 @@ def reap_branch(
|
||||
shas = list_recent_commit_shas(branch, limit)
|
||||
except ApiError as e:
|
||||
print(
|
||||
"::error::status-reaper cannot run: commit-list API failed "
|
||||
f"after retries: {e}"
|
||||
"::warning::status-reaper skipped this tick because the "
|
||||
f"commit list could not be read after retries: {e}"
|
||||
)
|
||||
return {
|
||||
"scanned_shas": 0,
|
||||
@@ -704,7 +704,6 @@ def reap_branch(
|
||||
"compensated_cancelled_push": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_per_sha": {},
|
||||
"sha_api_errors": 0,
|
||||
"skipped": True,
|
||||
"skip_reason": "commit-list-api-error",
|
||||
}
|
||||
@@ -721,7 +720,6 @@ def reap_branch(
|
||||
"compensated_cancelled_push": 0,
|
||||
"preserved_pr_without_push_success": 0,
|
||||
"compensated_per_sha": {},
|
||||
"sha_api_errors": 0,
|
||||
}
|
||||
|
||||
for sha in shas:
|
||||
@@ -733,9 +731,8 @@ def reap_branch(
|
||||
try:
|
||||
combined = get_combined_status(sha)
|
||||
except ApiError as e:
|
||||
aggregate["sha_api_errors"] += 1
|
||||
print(
|
||||
f"::error::get_combined_status({sha[:10]}) failed; "
|
||||
f"::warning::get_combined_status({sha[:10]}) failed; "
|
||||
f"skipping this SHA: {e}"
|
||||
)
|
||||
continue
|
||||
@@ -822,14 +819,6 @@ def main() -> int:
|
||||
sort_keys=True,
|
||||
)
|
||||
)
|
||||
# Observability: infra-failure → red. If the commit list could not be
|
||||
# read or any per-SHA status fetch failed, the tick is incomplete and
|
||||
# must be observable as a failure (non-zero exit) so the cron bot or
|
||||
# runner surface alerts.
|
||||
if counters.get("skipped"):
|
||||
return 1
|
||||
if counters.get("sha_api_errors", 0) > 0:
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
Executable
+28
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
# Mock sop-tier-check.sh for sop-tier-refire tests.
|
||||
#
|
||||
# Exits 0 ("PASS") if $MOCK_TIER_RESULT == "pass", else exits 1.
|
||||
# This lets the refire tests cover the success + failure status-POST
|
||||
# paths without invoking the real sop-tier-check.sh (which uses bash 4+
|
||||
# associative arrays — known parser bug on macOS bash 3.2 dev box).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
case "${MOCK_TIER_RESULT:-pass}" in
|
||||
pass)
|
||||
echo "::notice::mock tier-check: PASS"
|
||||
exit 0
|
||||
;;
|
||||
fail_no_label)
|
||||
echo "::error::mock tier-check: no tier label"
|
||||
exit 1
|
||||
;;
|
||||
fail_no_approvals)
|
||||
echo "::error::mock tier-check: no approving reviews"
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo "::error::mock tier-check: unknown MOCK_TIER_RESULT=${MOCK_TIER_RESULT:-}"
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
Executable
+208
@@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Stub Gitea API for sop-tier-refire test scenarios.
|
||||
|
||||
Reads $FIXTURE_STATE_DIR/scenario to decide what to return for each
|
||||
endpoint the sop-tier-refire.sh + sop-tier-check.sh scripts call.
|
||||
Captures every POST to /statuses/{sha} into posted_statuses.jsonl so
|
||||
the test can assert what the script tried to write.
|
||||
|
||||
Scenarios:
|
||||
T1_success — tier:low + APPROVED by engineer → tier-check passes
|
||||
T2_no_tier_label — no tier label → tier-check exits 1 before POST
|
||||
T3_no_approvals — tier:low but zero approving reviews → exits 1
|
||||
T4_closed — PR state=closed → refire is a no-op
|
||||
T5_rate_limited — last status update 5 seconds ago → skip
|
||||
|
||||
Usage:
|
||||
FIXTURE_STATE_DIR=/tmp/x python3 _refire_fixture.py 8080
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import http.server
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.parse
|
||||
|
||||
|
||||
STATE_DIR = os.environ["FIXTURE_STATE_DIR"]
|
||||
|
||||
|
||||
def scenario() -> str:
|
||||
p = os.path.join(STATE_DIR, "scenario")
|
||||
if not os.path.isfile(p):
|
||||
return "T1_success"
|
||||
with open(p, encoding="utf-8") as f:
|
||||
return f.read().strip()
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
return datetime.datetime.now(datetime.timezone.utc).isoformat()
|
||||
|
||||
|
||||
def append_post(body: dict) -> None:
|
||||
with open(os.path.join(STATE_DIR, "posted_statuses.jsonl"), "a") as f:
|
||||
f.write(json.dumps(body) + "\n")
|
||||
|
||||
|
||||
def pr_payload() -> dict:
|
||||
sc = scenario()
|
||||
state = "closed" if sc == "T4_closed" else "open"
|
||||
return {
|
||||
"number": 999,
|
||||
"state": state,
|
||||
"head": {"sha": "deadbeef0000111122223333444455556666"},
|
||||
"user": {"login": "feature-author"},
|
||||
}
|
||||
|
||||
|
||||
def labels_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T2_no_tier_label":
|
||||
return [{"name": "bug"}]
|
||||
# All other scenarios use tier:low
|
||||
return [{"name": "tier:low"}, {"name": "ci"}]
|
||||
|
||||
|
||||
def reviews_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T3_no_approvals":
|
||||
return []
|
||||
# All other scenarios have one APPROVED review by an engineer
|
||||
return [
|
||||
{
|
||||
"state": "APPROVED",
|
||||
"user": {"login": "reviewer-engineer"},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def teams_payload() -> list:
|
||||
# Mirror the real molecule-ai org teams referenced in TIER_EXPR
|
||||
return [
|
||||
{"id": 5, "name": "ceo"},
|
||||
{"id": 2, "name": "engineers"},
|
||||
{"id": 6, "name": "managers"},
|
||||
]
|
||||
|
||||
|
||||
def statuses_payload() -> list:
|
||||
sc = scenario()
|
||||
if sc == "T5_rate_limited":
|
||||
recent = (
|
||||
datetime.datetime.now(datetime.timezone.utc)
|
||||
- datetime.timedelta(seconds=5)
|
||||
).isoformat()
|
||||
return [
|
||||
{
|
||||
"context": "sop-tier-check / tier-check (pull_request)",
|
||||
"state": "failure",
|
||||
"updated_at": recent,
|
||||
}
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def user_payload() -> dict:
|
||||
# Mirrors the WHOAMI probe in sop-tier-check.sh
|
||||
return {"login": "sop-tier-bot-fixture"}
|
||||
|
||||
|
||||
class Handler(http.server.BaseHTTPRequestHandler):
|
||||
# Quiet — keep stdout for explicit logs only.
|
||||
def log_message(self, *args, **kwargs): # noqa: D401
|
||||
pass
|
||||
|
||||
def _json(self, code: int, body) -> None:
|
||||
payload = json.dumps(body).encode()
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _empty(self, code: int) -> None:
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Length", "0")
|
||||
self.end_headers()
|
||||
|
||||
def do_GET(self): # noqa: N802
|
||||
u = urllib.parse.urlparse(self.path)
|
||||
path = u.path
|
||||
|
||||
if path == "/_ping":
|
||||
return self._json(200, {"ok": True})
|
||||
if path == "/api/v1/user":
|
||||
return self._json(200, user_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/pulls/{n}
|
||||
m = re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/(\d+)$", path)
|
||||
if m:
|
||||
return self._json(200, pr_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/issues/{n}/labels
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/issues/\d+/labels$", path):
|
||||
return self._json(200, labels_payload())
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/pulls/{n}/reviews
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/\d+/reviews$", path):
|
||||
return self._json(200, reviews_payload())
|
||||
|
||||
# /api/v1/orgs/{owner}/teams
|
||||
if re.match(r"^/api/v1/orgs/[^/]+/teams$", path):
|
||||
return self._json(200, teams_payload())
|
||||
|
||||
# /api/v1/teams/{id}/members/{login} → 204 if user is an engineer
|
||||
m = re.match(r"^/api/v1/teams/(\d+)/members/([^/]+)$", path)
|
||||
if m:
|
||||
team_id, login = m.group(1), m.group(2)
|
||||
# In our fixture reviewer-engineer ∈ engineers (id=2)
|
||||
if team_id == "2" and login == "reviewer-engineer":
|
||||
return self._empty(204)
|
||||
return self._empty(404)
|
||||
|
||||
# /api/v1/orgs/{owner}/members/{login} — fallback path used when
|
||||
# team-member probes all 403. We don't need it for these tests.
|
||||
if re.match(r"^/api/v1/orgs/[^/]+/members/[^/]+$", path):
|
||||
return self._empty(404)
|
||||
|
||||
# /api/v1/repos/{owner}/{name}/statuses/{sha}
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
|
||||
return self._json(200, statuses_payload())
|
||||
|
||||
return self._json(404, {"path": path, "msg": "fixture: no route"})
|
||||
|
||||
def do_POST(self): # noqa: N802
|
||||
u = urllib.parse.urlparse(self.path)
|
||||
path = u.path
|
||||
length = int(self.headers.get("Content-Length") or 0)
|
||||
raw = self.rfile.read(length) if length else b""
|
||||
try:
|
||||
body = json.loads(raw) if raw else {}
|
||||
except Exception:
|
||||
body = {"_raw": raw.decode(errors="replace")}
|
||||
|
||||
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
|
||||
append_post(body)
|
||||
# Echo back something status-shaped — script only checks HTTP code.
|
||||
return self._json(
|
||||
201,
|
||||
{
|
||||
"context": body.get("context"),
|
||||
"state": body.get("state"),
|
||||
"created_at": now_iso(),
|
||||
},
|
||||
)
|
||||
|
||||
return self._json(404, {"path": path, "msg": "fixture: no route"})
|
||||
|
||||
|
||||
def main():
|
||||
port = int(sys.argv[1])
|
||||
srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
|
||||
srv.serve_forever()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -134,14 +134,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
return self._json(200, [
|
||||
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
|
||||
])
|
||||
if sc == "T23_missing_commit_id":
|
||||
# APPROVED review with NO commit_id field — the SEV-1
|
||||
# internal#812 / closed-#843 spoof-bug signature. The
|
||||
# fail-closed SSOT must REJECT (not silently accept as
|
||||
# "older Gitea row" the way the old pre-fix code did).
|
||||
return self._json(200, [
|
||||
{"state": "APPROVED", "official": True, "dismissed": False, "user": {"login": "core-devops"}},
|
||||
])
|
||||
# Default: one non-author APPROVED (current head, official)
|
||||
return self._json(200, [
|
||||
{"state": "APPROVED", "dismissed": False, "official": True, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
|
||||
|
||||
@@ -1,610 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Mutation-verified unit tests for the SSOT fail-closed approval predicate
|
||||
in _approval_validator.py (SEV-1 internal#812).
|
||||
|
||||
Each test asserts REJECTION explicitly. A reviewer who weakens the
|
||||
predicate — e.g., by removing the commit_id check, by reintroducing the
|
||||
"no commit_id is accepted" escape hatch, by changing `!=` to `==` in the
|
||||
head comparison, or by allowing official == false — will trip these
|
||||
tests in CI.
|
||||
|
||||
Run:
|
||||
cd .gitea/scripts
|
||||
python3 -m unittest tests.test_approval_validator -v
|
||||
# or
|
||||
python3 tests/test_approval_validator.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
# Same-dir import — test lives next to _approval_validator.py
|
||||
sys.path.insert(
|
||||
0,
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
|
||||
)
|
||||
from _approval_validator import ( # noqa: E402
|
||||
classify_reviews,
|
||||
is_genuine_approval,
|
||||
is_official_current_head,
|
||||
is_open_request_changes,
|
||||
)
|
||||
|
||||
HEAD = "0123456789abcdef0123456789abcdef01234567"
|
||||
OTHER_HEAD = "fedcba9876543210fedcba9876543210fedcba98"
|
||||
|
||||
|
||||
def _review(
|
||||
*,
|
||||
state: str = "APPROVED",
|
||||
official: bool = True,
|
||||
dismissed: bool = False,
|
||||
stale: bool = False,
|
||||
commit_id: object = HEAD,
|
||||
user: str = "reviewer-1",
|
||||
body: str = "",
|
||||
) -> dict:
|
||||
"""Build a minimal review row shaped like the Gitea reviews API."""
|
||||
return {
|
||||
"id": 1,
|
||||
"user": {"login": user, "id": 1},
|
||||
"body": body,
|
||||
"state": state,
|
||||
"official": official,
|
||||
"dismissed": dismissed,
|
||||
"stale": stale,
|
||||
"commit_id": commit_id,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hard contract: every fail-closed branch must reject
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class IsOfficialCurrentHeadFailClosed(unittest.TestCase):
|
||||
"""is_official_current_head is the common predicate. EVERY condition
|
||||
is mandatory. The tests below assert REJECTION for every possible
|
||||
failure of any condition."""
|
||||
|
||||
def test_accepts_canonical_review(self):
|
||||
self.assertTrue(is_official_current_head(_review(), HEAD))
|
||||
|
||||
def test_rejects_non_dict(self):
|
||||
for bad in [None, "string", 42, [], (), object()]:
|
||||
with self.subTest(bad=bad):
|
||||
self.assertFalse(is_official_current_head(bad, HEAD))
|
||||
|
||||
def test_rejects_when_official_is_false(self):
|
||||
for v in [False, None, 0, "false"]:
|
||||
with self.subTest(v=v):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(official=v), HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_when_dismissed(self):
|
||||
for v in [True, "true", 1]:
|
||||
with self.subTest(v=v):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(dismissed=v), HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_when_stale(self):
|
||||
for v in [True, "true", 1]:
|
||||
with self.subTest(v=v):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(stale=v), HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_when_commit_id_missing(self):
|
||||
"""FAIL-CLOSED #1: missing commit_id is REJECTED.
|
||||
This is the spoof signature that closed #843 (with CR2 + Researcher
|
||||
both flagging it)."""
|
||||
for bad in [None, "", 0, False, [], {}, ()]:
|
||||
with self.subTest(commit_id=bad):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(commit_id=bad), HEAD),
|
||||
f"commit_id={bad!r} must reject (fail-closed)",
|
||||
)
|
||||
|
||||
def test_rejects_when_commit_id_wrong_type(self):
|
||||
for bad in [123, 1.5, True, ["abc"], {"sha": HEAD}, ("tuple",)]:
|
||||
with self.subTest(commit_id=bad):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(commit_id=bad), HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_when_commit_id_stale(self):
|
||||
"""FAIL-CLOSED #2: present-but-wrong commit_id is REJECTED. Stale
|
||||
reviews on a previous head cannot count."""
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(commit_id=OTHER_HEAD), HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_when_head_missing(self):
|
||||
for bad in [None, "", 0, False]:
|
||||
with self.subTest(head=bad):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(), bad)
|
||||
)
|
||||
|
||||
def test_rejects_when_head_wrong_type(self):
|
||||
self.assertFalse(is_official_current_head(_review(), 123))
|
||||
self.assertFalse(is_official_current_head(_review(), ["x"]))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_genuine_approval
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class IsGenuineApprovalContract(unittest.TestCase):
|
||||
def test_accepts_canonical_approval(self):
|
||||
self.assertTrue(
|
||||
is_genuine_approval(_review(state="APPROVED"), headsha=HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_non_approved_states(self):
|
||||
for state in ("REQUEST_CHANGES", "COMMENT", "PENDING", "DISMISSED", "approve", "", "bogus"):
|
||||
with self.subTest(state=state):
|
||||
self.assertFalse(
|
||||
is_genuine_approval(_review(state=state), headsha=HEAD)
|
||||
)
|
||||
|
||||
def test_rejects_case_coerced_approved_states(self):
|
||||
"""EXACT-ENUM fail-closed (RCs 9849/9851/9852): Gitea always emits
|
||||
the canonical UPPERCASE "APPROVED". A lowercase/mixed-case/padded
|
||||
value is the signature of a forged row and MUST be rejected, not
|
||||
coerced via .upper() into an accepted APPROVED. Each of these was
|
||||
ACCEPTED before the exact-enum fix."""
|
||||
for state in (
|
||||
"approved", "Approved", "ApProVeD", "APPROVED ", " APPROVED",
|
||||
"approved\n", "\tAPPROVED",
|
||||
):
|
||||
with self.subTest(state=state):
|
||||
self.assertFalse(
|
||||
is_genuine_approval(_review(state=state), headsha=HEAD),
|
||||
f"case-coerced/padded state {state!r} must NOT count as "
|
||||
"a genuine approval",
|
||||
)
|
||||
|
||||
def test_rejects_non_official_approval(self):
|
||||
"""Comment-based / non-official 'APPROVED' is REJECTED.
|
||||
PM: 'reject comment-based / non-official reviews'."""
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(state="APPROVED", official=False), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_dismissed_approval(self):
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(state="APPROVED", dismissed=True), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_stale_head_approval(self):
|
||||
"""commit_id != head is REJECTED. Stale-on-old-head approvals cannot
|
||||
count, even if they were official and not dismissed."""
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(state="APPROVED", commit_id=OTHER_HEAD), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_missing_commit_id_approval(self):
|
||||
"""FAIL-CLOSED #3: the SEV-1 case. A APPROVED review with NO
|
||||
commit_id is the spoof-bug signature. Reject."""
|
||||
for bad in [None, "", 0, False]:
|
||||
with self.subTest(commit_id=bad):
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(state="APPROVED", commit_id=bad), headsha=HEAD
|
||||
),
|
||||
f"missing commit_id={bad!r} must reject",
|
||||
)
|
||||
|
||||
def test_reviewer_set_filters_users(self):
|
||||
self.assertTrue(
|
||||
is_genuine_approval(
|
||||
_review(user="alice"),
|
||||
headsha=HEAD,
|
||||
reviewer_set={"alice", "bob"},
|
||||
)
|
||||
)
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(user="carol"),
|
||||
headsha=HEAD,
|
||||
reviewer_set={"alice", "bob"},
|
||||
)
|
||||
)
|
||||
|
||||
def test_reviewer_set_none_skips_check(self):
|
||||
# None means "no team filter at this layer" (e.g., review-check.sh
|
||||
# applies its own team-membership probe separately).
|
||||
self.assertTrue(
|
||||
is_genuine_approval(
|
||||
_review(user="anyone"),
|
||||
headsha=HEAD,
|
||||
reviewer_set=None,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# is_open_request_changes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class IsOpenRequestChangesContract(unittest.TestCase):
|
||||
def test_accepts_canonical_request_changes(self):
|
||||
self.assertTrue(
|
||||
is_open_request_changes(
|
||||
_review(state="REQUEST_CHANGES"), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_non_request_changes_states(self):
|
||||
for state in ("APPROVED", "COMMENT", "PENDING", "DISMISSED"):
|
||||
with self.subTest(state=state):
|
||||
self.assertFalse(
|
||||
is_open_request_changes(
|
||||
_review(state=state), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_case_coerced_request_changes_states(self):
|
||||
"""EXACT-ENUM fail-closed: a lowercase/mixed-case "request_changes"
|
||||
must NOT be coerced into an open-block match. Before the exact-enum
|
||||
fix, .upper() accepted these as REQUEST_CHANGES."""
|
||||
for state in (
|
||||
"request_changes", "Request_Changes", "REQUEST_CHANGES ",
|
||||
" REQUEST_CHANGES", "request_changes\n",
|
||||
):
|
||||
with self.subTest(state=state):
|
||||
self.assertFalse(
|
||||
is_open_request_changes(
|
||||
_review(state=state), headsha=HEAD
|
||||
),
|
||||
f"case-coerced/padded state {state!r} must NOT count as "
|
||||
"an open REQUEST_CHANGES",
|
||||
)
|
||||
|
||||
def test_rejects_when_dismissed(self):
|
||||
self.assertFalse(
|
||||
is_open_request_changes(
|
||||
_review(state="REQUEST_CHANGES", dismissed=True), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_when_stale_head(self):
|
||||
self.assertFalse(
|
||||
is_open_request_changes(
|
||||
_review(state="REQUEST_CHANGES", commit_id=OTHER_HEAD),
|
||||
headsha=HEAD,
|
||||
)
|
||||
)
|
||||
|
||||
def test_rejects_when_missing_commit_id(self):
|
||||
for bad in [None, "", 0]:
|
||||
with self.subTest(commit_id=bad):
|
||||
self.assertFalse(
|
||||
is_open_request_changes(
|
||||
_review(state="REQUEST_CHANGES", commit_id=bad),
|
||||
headsha=HEAD,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# classify_reviews — the merge-queue consumer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class ClassifyReviewsContract(unittest.TestCase):
|
||||
def test_basic_approvers_and_request_changes(self):
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, {"alice"})
|
||||
self.assertEqual(request_changes, ["bob"])
|
||||
|
||||
def test_reviewer_set_filters_early(self):
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="carol", state="APPROVED", commit_id=HEAD),
|
||||
]
|
||||
approvers, _ = classify_reviews(
|
||||
reviews, headsha=HEAD, reviewer_set={"alice"}
|
||||
)
|
||||
self.assertEqual(approvers, {"alice"})
|
||||
|
||||
def test_latest_review_per_user_wins(self):
|
||||
# alice's REQUEST_CHANGES (latest) supersedes her earlier APPROVED.
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="alice", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertNotIn("alice", approvers)
|
||||
self.assertIn("alice", request_changes)
|
||||
|
||||
def test_stale_head_approval_excluded(self):
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=OTHER_HEAD),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, set())
|
||||
|
||||
def test_missing_commit_id_approval_excluded(self):
|
||||
"""The SEV-1 fail-open surface. APPROVED + no commit_id → must NOT
|
||||
count toward approvers, even with stale=False/dismissed=False."""
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=None),
|
||||
_review(user="bob", state="APPROVED", commit_id=""),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, set())
|
||||
|
||||
def test_dismissed_approval_excluded(self):
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", dismissed=True, commit_id=HEAD),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, set())
|
||||
|
||||
def test_non_official_approval_excluded(self):
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", official=False, commit_id=HEAD),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, set())
|
||||
|
||||
def test_comment_state_excluded(self):
|
||||
reviews = [
|
||||
_review(user="alice", state="COMMENT", commit_id=HEAD),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, set())
|
||||
|
||||
def test_case_coerced_approved_not_counted(self):
|
||||
"""EXACT-ENUM via the reducer: a lowercase 'approved' (otherwise
|
||||
valid official current-head row) must NOT be counted as an approver.
|
||||
Before the fix, classify_reviews coerced it via .upper()."""
|
||||
for state in ("approved", "Approved", "APPROVED "):
|
||||
with self.subTest(state=state):
|
||||
reviews = [
|
||||
_review(user="alice", state=state, commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(
|
||||
reviews, headsha=HEAD
|
||||
)
|
||||
self.assertEqual(approvers, set())
|
||||
self.assertEqual(request_changes, [])
|
||||
|
||||
def test_case_coerced_request_changes_not_silently_dropped(self):
|
||||
"""EXACT-ENUM via the reducer: a lowercase 'request_changes' must be
|
||||
rejected (not coerced into a block). Crucially, it must NOT silently
|
||||
erase a SAME-USER genuine current-head REQUEST_CHANGES posted
|
||||
earlier — the case-variant later row is invalid and is ignored, so
|
||||
the genuine block stands."""
|
||||
reviews = [
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
_review(user="bob", state="request_changes", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn("bob", request_changes)
|
||||
self.assertNotIn("bob", approvers)
|
||||
|
||||
def test_stale_head_request_changes_excluded(self):
|
||||
# A REQUEST_CHANGES on a previous head must NOT block the current head.
|
||||
reviews = [
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=OTHER_HEAD),
|
||||
]
|
||||
_, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(request_changes, [])
|
||||
|
||||
# -----------------------------------------------------------------
|
||||
# VALIDATE-BEFORE-REDUCE regression tests (SEV-1 internal#812 follow-up).
|
||||
#
|
||||
# The bug: classify_reviews reduced to the LATEST row per user FIRST and
|
||||
# validated AFTER. A later INVALID row (a COMMENT, or APPROVED/
|
||||
# REQUEST_CHANGES with a null/old commit_id) from the same user could
|
||||
# overwrite a genuine current-head review — masking an approval or
|
||||
# ERASING a REQUEST_CHANGES block. The fix validates before the reduce,
|
||||
# so an invalid later row is never eligible to be a user's "latest".
|
||||
# -----------------------------------------------------------------
|
||||
|
||||
def test_genuine_approval_not_masked_by_later_comment(self):
|
||||
"""A genuine current-head APPROVED followed by a LATER COMMENT from
|
||||
the SAME user must STILL count as an approval. A later non-
|
||||
APPROVED/RC row (COMMENT) must not erase the approval. This is the
|
||||
reduce-before-validate masking bug."""
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="alice", state="COMMENT", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn("alice", approvers)
|
||||
self.assertEqual(request_changes, [])
|
||||
|
||||
def test_genuine_approval_not_masked_by_later_null_commit_id(self):
|
||||
"""A genuine current-head APPROVED followed by a LATER APPROVED with
|
||||
a null commit_id (the spoof/invalid signature) from the SAME user
|
||||
must STILL count. The invalid later row must be ignored, not allowed
|
||||
to overwrite the valid earlier approval."""
|
||||
for bad in [None, ""]:
|
||||
with self.subTest(commit_id=bad):
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="alice", state="APPROVED", commit_id=bad),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn(
|
||||
"alice", approvers,
|
||||
f"later invalid commit_id={bad!r} must not mask the "
|
||||
"genuine current-head approval",
|
||||
)
|
||||
|
||||
def test_genuine_approval_not_masked_by_later_stale_commit_id(self):
|
||||
"""A genuine current-head APPROVED followed by a LATER APPROVED on a
|
||||
STALE (old) head from the SAME user must STILL count toward
|
||||
approvers — the stale later row is invalid and must be ignored."""
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="alice", state="APPROVED", commit_id=OTHER_HEAD),
|
||||
]
|
||||
approvers, _ = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn("alice", approvers)
|
||||
|
||||
def test_request_changes_not_erased_by_later_comment(self):
|
||||
"""A genuine current-head REQUEST_CHANGES followed by a LATER COMMENT
|
||||
from the SAME user must STILL block. The later invalid row must not
|
||||
erase the REQUEST_CHANGES — this is the worse, silently-evaporating-
|
||||
block variant of the bug."""
|
||||
reviews = [
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
_review(user="bob", state="COMMENT", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn("bob", request_changes)
|
||||
self.assertNotIn("bob", approvers)
|
||||
|
||||
def test_request_changes_not_erased_by_later_null_commit_id(self):
|
||||
"""A genuine current-head REQUEST_CHANGES followed by a LATER
|
||||
REQUEST_CHANGES with a null/old commit_id from the SAME user must
|
||||
STILL block. The invalid later row must be ignored, not allowed to
|
||||
relocate the user's verdict off the current head."""
|
||||
for bad in [None, "", OTHER_HEAD]:
|
||||
with self.subTest(commit_id=bad):
|
||||
reviews = [
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=bad),
|
||||
]
|
||||
_, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn(
|
||||
"bob", request_changes,
|
||||
f"later invalid commit_id={bad!r} must not erase the "
|
||||
"genuine current-head REQUEST_CHANGES block",
|
||||
)
|
||||
|
||||
def test_request_changes_not_erased_by_later_approved_invalid(self):
|
||||
"""A genuine current-head REQUEST_CHANGES followed by a LATER
|
||||
INVALID APPROVED (null commit_id) from the SAME user must STILL
|
||||
block AND must NOT count the user as an approver. The invalid
|
||||
approval must not flip a real block into a pass."""
|
||||
reviews = [
|
||||
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
_review(user="bob", state="APPROVED", commit_id=None),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn("bob", request_changes)
|
||||
self.assertNotIn("bob", approvers)
|
||||
|
||||
def test_genuine_request_changes_still_supersedes_genuine_approval(self):
|
||||
"""Sanity: a genuine LATER current-head REQUEST_CHANGES still
|
||||
supersedes an earlier genuine APPROVED from the same user (the
|
||||
valid-row supersession we MUST preserve — only INVALID later rows
|
||||
are ignored). Guards against an over-correction that ignores all
|
||||
later rows."""
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="alice", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertNotIn("alice", approvers)
|
||||
self.assertIn("alice", request_changes)
|
||||
|
||||
def test_genuine_approval_still_supersedes_genuine_request_changes(self):
|
||||
"""Sanity: a genuine LATER current-head APPROVED supersedes an
|
||||
earlier genuine REQUEST_CHANGES from the same user."""
|
||||
reviews = [
|
||||
_review(user="alice", state="REQUEST_CHANGES", commit_id=HEAD),
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertIn("alice", approvers)
|
||||
self.assertEqual(request_changes, [])
|
||||
|
||||
def test_two_valid_approvers_plus_one_invalid_later_row(self):
|
||||
"""Two distinct users with valid current-head approvals + a third
|
||||
user whose ONLY genuine approval is followed by an invalid later
|
||||
row → all three real approvers are counted; the invalid later row
|
||||
does not drop the third user."""
|
||||
reviews = [
|
||||
_review(user="alice", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="bob", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="carol", state="APPROVED", commit_id=HEAD),
|
||||
_review(user="carol", state="COMMENT", commit_id=HEAD),
|
||||
]
|
||||
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
|
||||
self.assertEqual(approvers, {"alice", "bob", "carol"})
|
||||
self.assertEqual(request_changes, [])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Mutation-resistance smoke checks
|
||||
#
|
||||
# These tests document the mutations a reviewer would have to apply to
|
||||
# weaken the gate. They are not synthetic; they verify that the
|
||||
# predicate is structured so each known-softening mutation would also
|
||||
# fail at least one other test in this file. We can't actually mutate
|
||||
# the source in CI, but these tests are explicit about the mutations
|
||||
# that would slip through, and the suite is dense enough that any
|
||||
# loosening of the predicate will fail multiple cases.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class MutationResistance(unittest.TestCase):
|
||||
def test_documented_mutation_remove_commit_id_check_fails(self):
|
||||
"""If a reviewer removes the commit_id check (e.g., reverts to
|
||||
the pre-fix `if isinstance(commit_id, str) and commit_id and
|
||||
headsha:` guard, or replaces `commit_id != headsha` with True),
|
||||
the missing-commit_id tests above (test_rejects_when_commit_id_missing
|
||||
in IsOfficialCurrentHeadFailClosed, test_rejects_missing_commit_id_approval
|
||||
in IsGenuineApprovalContract, test_missing_commit_id_approval_excluded
|
||||
in ClassifyReviewsContract) would all fail. The reviewer would have
|
||||
to weaken all three test categories to slip the SEV-1 surface in."""
|
||||
# Sanity: every missing-commit_id case is a False today.
|
||||
for bad in [None, "", 0, False]:
|
||||
with self.subTest(commit_id=bad):
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(commit_id=bad), HEAD)
|
||||
)
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(commit_id=bad), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
def test_documented_mutation_change_neq_to_eq_fails(self):
|
||||
"""If a reviewer changes `commit_id != headsha` to `commit_id == headsha`
|
||||
in the head comparison (inverting the check), the stale-head tests
|
||||
(test_rejects_when_commit_id_stale, test_stale_head_approval_excluded)
|
||||
would fail because the wrong head would now match."""
|
||||
self.assertFalse(
|
||||
is_official_current_head(_review(commit_id=OTHER_HEAD), HEAD)
|
||||
)
|
||||
|
||||
def test_documented_mutation_drop_official_check_fails(self):
|
||||
"""If a reviewer drops the `if not review.get('official')` check, the
|
||||
non-official tests (test_rejects_when_official_is_false,
|
||||
test_rejects_non_official_approval, test_non_official_approval_excluded)
|
||||
would all fail."""
|
||||
self.assertFalse(
|
||||
is_genuine_approval(
|
||||
_review(state="APPROVED", official=False), headsha=HEAD
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -1,193 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# test_audit_force_merge.sh — regression lock for audit-force-merge fail-closed
|
||||
# behavior. Verifies every schema validation path via direct jq filter tests.
|
||||
#
|
||||
# Usage: bash test_audit_force_merge.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
fail() { echo "FAIL: $*" >&2; exit 1; }
|
||||
pass() { echo "PASS: $*"; }
|
||||
|
||||
[ -x "$(command -v jq)" ] || { echo "SKIP: jq not on PATH"; exit 0; }
|
||||
|
||||
HEAD_SHA="deadbeef00000000000000000000000000000000"
|
||||
|
||||
# The schema validation jq expression from audit-force-merge.sh.
|
||||
validate_pr_schema() {
|
||||
jq -r '
|
||||
(.merged | type == "boolean") and
|
||||
(.merge_commit_sha | type == "string") and
|
||||
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
|
||||
(.base | type == "object") and (.base.ref | type == "string") and
|
||||
(.head | type == "object") and (.head.sha | type == "string")
|
||||
'
|
||||
}
|
||||
|
||||
validate_statuses_type() {
|
||||
jq -r '(.statuses | type) == "array"'
|
||||
}
|
||||
|
||||
# T1 — valid PR payload → true
|
||||
T1=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T1" = "true" ] || fail "T1: valid payload should pass schema"
|
||||
pass "T1: valid payload passes schema"
|
||||
|
||||
# T2 — merged=false (valid types) → true (schema is about types, not values)
|
||||
T2=$(echo '{"merged":false,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T2" = "true" ] || fail "T2: merged=false with valid types should pass schema"
|
||||
pass "T2: merged=false with valid types passes schema"
|
||||
|
||||
# T3 — missing merged field → false
|
||||
T3=$(echo '{"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T3" = "false" ] || fail "T3: missing merged should fail schema"
|
||||
pass "T3: missing merged fails schema"
|
||||
|
||||
# T4 — merged is string "true" instead of boolean → false
|
||||
T4=$(echo '{"merged":"true","merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T4" = "false" ] || fail "T4: merged as string should fail schema"
|
||||
pass "T4: merged as string fails schema"
|
||||
|
||||
# T5 — merge_commit_sha is null → false
|
||||
T5=$(echo '{"merged":true,"merge_commit_sha":null,"merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T5" = "false" ] || fail "T5: null merge_commit_sha should fail schema"
|
||||
pass "T5: null merge_commit_sha fails schema"
|
||||
|
||||
# T6 — merged_by is null → false
|
||||
T6=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":null,"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T6" = "false" ] || fail "T6: null merged_by should fail schema"
|
||||
pass "T6: null merged_by fails schema"
|
||||
|
||||
# T7 — base.ref is number → false
|
||||
T7=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":123},"head":{"sha":"def"}}' | validate_pr_schema)
|
||||
[ "$T7" = "false" ] || fail "T7: numeric base.ref should fail schema"
|
||||
pass "T7: numeric base.ref fails schema"
|
||||
|
||||
# T8 — head is missing → false
|
||||
T8=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"}}' | validate_pr_schema)
|
||||
[ "$T8" = "false" ] || fail "T8: missing head should fail schema"
|
||||
pass "T8: missing head fails schema"
|
||||
|
||||
# T9 — statuses missing → false
|
||||
T9=$(echo '{}' | validate_statuses_type)
|
||||
[ "$T9" = "false" ] || fail "T9: missing statuses should fail type check"
|
||||
pass "T9: missing statuses fails type check"
|
||||
|
||||
# T10 — statuses is string → false
|
||||
T10=$(echo '{"statuses":"unexpected"}' | validate_statuses_type)
|
||||
[ "$T10" = "false" ] || fail "T10: string statuses should fail type check"
|
||||
pass "T10: string statuses fails type check"
|
||||
|
||||
# T11 — statuses is null → false
|
||||
T11=$(echo '{"statuses":null}' | validate_statuses_type)
|
||||
[ "$T11" = "false" ] || fail "T11: null statuses should fail type check"
|
||||
pass "T11: null statuses fails type check"
|
||||
|
||||
# T12 — statuses is array → true
|
||||
T12=$(echo '{"statuses":[{"context":"c1","status":"success"}]}' | validate_statuses_type)
|
||||
[ "$T12" = "true" ] || fail "T12: array statuses should pass type check"
|
||||
pass "T12: array statuses passes type check"
|
||||
|
||||
# T13 — empty array statuses → true
|
||||
T13=$(echo '{"statuses":[]}' | validate_statuses_type)
|
||||
[ "$T13" = "true" ] || fail "T13: empty array statuses should pass type check"
|
||||
pass "T13: empty array statuses passes type check"
|
||||
|
||||
# T14-T16: REQUIRED_CHECKS_JSON branch entry validation
|
||||
validate_required_checks_json() {
|
||||
local branch="$1"
|
||||
local json="$2"
|
||||
echo "$json" | jq -r --arg branch "$branch" 'has($branch) and (.[$branch] | type == "array")'
|
||||
}
|
||||
|
||||
# T14 — branch exists and is array → true
|
||||
T14=$(validate_required_checks_json "main" '{"main":["CI / all-required"]}')
|
||||
[ "$T14" = "true" ] || fail "T14: existing array branch should pass"
|
||||
pass "T14: existing array branch passes"
|
||||
|
||||
# T15 — branch missing → false
|
||||
T15=$(validate_required_checks_json "staging" '{"main":["CI / all-required"]}')
|
||||
[ "$T15" = "false" ] || fail "T15: missing branch should fail"
|
||||
pass "T15: missing branch fails"
|
||||
|
||||
# T16 — branch entry is string instead of array → false
|
||||
T16=$(validate_required_checks_json "main" '{"main":"CI / all-required"}')
|
||||
[ "$T16" = "false" ] || fail "T16: string branch entry should fail"
|
||||
pass "T16: string branch entry fails"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# T17+ — /statuses pagination (status-pagination RCA, #2440-family).
|
||||
# The reader now pages /commits/{sha}/statuses to exhaustion instead of reading
|
||||
# the capped combined /status view. These lock the page-accumulation,
|
||||
# newest-wins collapse, short-page stop, and fail-closed contracts.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Page-body type validator used per page (bare array, not an object).
|
||||
validate_page_is_array() { jq -e 'type == "array"' >/dev/null 2>&1 && echo true || echo false; }
|
||||
|
||||
# newest-wins collapse: mirror the script's max-by-id jq (order-independent).
|
||||
collapse_newest_per_context() {
|
||||
declare -A CS
|
||||
while IFS=$'\t' read -r ctx state; do
|
||||
[ -n "$ctx" ] && CS[$ctx]="$state"
|
||||
done < <(jq -r 'group_by(.context) | map(max_by(.id)) | .[] | "\(.context)\t\(.status)"')
|
||||
state="${CS[CI / all-required (push)]:-missing}"
|
||||
echo "$state"
|
||||
}
|
||||
|
||||
# T17 — a bare JSON array page passes the per-page array check.
|
||||
T17=$(echo '[{"context":"c1","status":"success"}]' | validate_page_is_array)
|
||||
[ "$T17" = "true" ] || fail "T17: bare array page should pass array check"
|
||||
pass "T17: bare array page passes array check"
|
||||
|
||||
# T18 — a non-array page (object) fails the per-page array check → fail-closed.
|
||||
T18=$(echo '{"statuses":[]}' | validate_page_is_array)
|
||||
[ "$T18" = "false" ] || fail "T18: object page should fail array check (fail-closed)"
|
||||
pass "T18: object page fails array check (fail-closed)"
|
||||
|
||||
# T19 — required SUCCESS on PAGE 2 is FOUND after accumulation (not missing).
|
||||
# page1: 100 noise rows (older ids); page2: the required-context success.
|
||||
PAGE1=$(jq -nc '[range(0;100) | {id:., context:("noise-\(.) (push)"), status:"pending"}]')
|
||||
PAGE2='[{"id":200,"context":"CI / all-required (push)","status":"success"}]'
|
||||
# Accumulation matching the script: two-arg `jq -s '.[0] + .[1]'` over the
|
||||
# running accumulator and the new page.
|
||||
ACCUM=$(jq -s '.[0] + .[1]' <(echo "$PAGE1") <(echo "$PAGE2"))
|
||||
LEN=$(echo "$ACCUM" | jq 'length')
|
||||
[ "$LEN" = "101" ] || fail "T19: accumulated length should be 101, got $LEN"
|
||||
RESULT=$(echo "$ACCUM" | collapse_newest_per_context)
|
||||
[ "$RESULT" = "success" ] || fail "T19: required success on page2 must be FOUND, got '$RESULT'"
|
||||
pass "T19: required success on page2 is found after pagination"
|
||||
|
||||
# T20 — genuinely-absent required context across all pages stays 'missing'
|
||||
# → fail-closed (counted as not-green, flags the force-merge).
|
||||
ABSENT=$(jq -nc '[range(0;100) | {id:., context:("noise-\(.) (push)"), status:"success"}]')
|
||||
RESULT2=$(echo "$ABSENT" | collapse_newest_per_context)
|
||||
[ "$RESULT2" = "missing" ] || fail "T20: absent required context must stay 'missing', got '$RESULT2'"
|
||||
pass "T20: genuinely-absent required context stays missing (fail-closed)"
|
||||
|
||||
# T21 — non-monotonic order: newest id (157, neither first nor last in list)
|
||||
# a NEWER success row (oldest-first append → last overwrite wins).
|
||||
DUP='[{"id":155,"context":"CI / all-required (push)","status":"pending"},
|
||||
{"id":157,"context":"CI / all-required (push)","status":"success"},
|
||||
{"id":125,"context":"CI / all-required (push)","status":"failure"}]'
|
||||
RESULT3=$(echo "$DUP" | collapse_newest_per_context)
|
||||
[ "$RESULT3" = "success" ] || fail "T21: newest (success) must win over older (failure), got '$RESULT3'"
|
||||
pass "T21: newest row per context wins after pagination collapse"
|
||||
|
||||
# T22 — short-page stop condition: a page with fewer than PER_PAGE rows ends
|
||||
# the loop. Emulate the numeric comparison the script uses.
|
||||
PER_PAGE=100
|
||||
PAGE_COUNT=$(echo "$PAGE2" | jq 'length') # 1 row
|
||||
if [ "$PAGE_COUNT" -lt "$PER_PAGE" ]; then SHORT=stop; else SHORT=continue; fi
|
||||
[ "$SHORT" = "stop" ] || fail "T22: short page should stop pagination"
|
||||
pass "T22: short page stops pagination loop"
|
||||
|
||||
# T23 — a full page (== PER_PAGE) continues the loop.
|
||||
FULL=$(jq -nc '[range(0;100) | {id:., context:"x", status:"success"}]')
|
||||
FULL_COUNT=$(echo "$FULL" | jq 'length')
|
||||
if [ "$FULL_COUNT" -lt "$PER_PAGE" ]; then CONT=stop; else CONT=continue; fi
|
||||
[ "$CONT" = "continue" ] || fail "T23: full page should continue pagination"
|
||||
pass "T23: full page continues pagination loop"
|
||||
|
||||
echo
|
||||
echo "ALL AUDIT-FORCE-MERGE CHECKS PASSED"
|
||||
@@ -107,36 +107,6 @@ def test_required_checks_env_json_malformed_fails():
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_non_string_item_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", 123, "ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_empty_string_item_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", " ", "ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_duplicate_context_fails():
|
||||
doc = _make_audit_doc_json({"main": ["ctx-a", "ctx-b", "ctx-a"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sentinel_needs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -50,15 +50,15 @@ class TestQaReviewDirectTrigger:
|
||||
"pull_request_review must include 'submitted' type"
|
||||
)
|
||||
|
||||
def test_job_guard_has_no_review_state_check(self):
|
||||
def test_job_guard_requires_approved_state(self):
|
||||
wf = load_workflow("qa-review.yml")
|
||||
guard = _job_guard_string(wf)
|
||||
assert "github.event.review.state" not in guard, (
|
||||
"job guard must NOT check review.state (#2159: Gitea 1.22.6 payload unreliable); "
|
||||
"evaluator (review-check.sh) verifies actual APPROVE via API"
|
||||
assert "github.event.review.state == 'APPROVED'" in guard, (
|
||||
"job guard must check review.state for 'APPROVED'"
|
||||
)
|
||||
assert "github.event.review.state == 'approved'" in guard, (
|
||||
"job guard must check review.state for 'approved' (case fallback per #2135)"
|
||||
)
|
||||
assert "github.event_name == 'pull_request_target'" in guard
|
||||
assert "github.event_name == 'pull_request_review'" in guard
|
||||
|
||||
def test_post_step_uses_status_post_token(self):
|
||||
wf = load_workflow("qa-review.yml")
|
||||
@@ -91,15 +91,15 @@ class TestSecurityReviewDirectTrigger:
|
||||
"pull_request_review must include 'submitted' type"
|
||||
)
|
||||
|
||||
def test_job_guard_has_no_review_state_check(self):
|
||||
def test_job_guard_requires_approved_state(self):
|
||||
wf = load_workflow("security-review.yml")
|
||||
guard = _job_guard_string(wf)
|
||||
assert "github.event.review.state" not in guard, (
|
||||
"job guard must NOT check review.state (#2159: Gitea 1.22.6 payload unreliable); "
|
||||
"evaluator (review-check.sh) verifies actual APPROVE via API"
|
||||
assert "github.event.review.state == 'APPROVED'" in guard, (
|
||||
"job guard must check review.state for 'APPROVED'"
|
||||
)
|
||||
assert "github.event.review.state == 'approved'" in guard, (
|
||||
"job guard must check review.state for 'approved' (case fallback per #2135)"
|
||||
)
|
||||
assert "github.event_name == 'pull_request_target'" in guard
|
||||
assert "github.event_name == 'pull_request_review'" in guard
|
||||
|
||||
def test_post_step_uses_status_post_token(self):
|
||||
wf = load_workflow("security-review.yml")
|
||||
@@ -153,7 +153,7 @@ class TestRefireTokenSeparation:
|
||||
"qa refire must receive STATUS_POST_TOKEN env var"
|
||||
)
|
||||
# Evaluator stays on read token
|
||||
assert "SOP_CHECKLIST_GATE_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
|
||||
assert "SOP_TIER_CHECK_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
|
||||
"qa refire evaluator must stay on read-scoped token"
|
||||
)
|
||||
|
||||
@@ -163,6 +163,6 @@ class TestRefireTokenSeparation:
|
||||
assert env.get("STATUS_POST_TOKEN") == "${{ secrets.STATUS_POST_TOKEN }}", (
|
||||
"security refire must receive STATUS_POST_TOKEN env var"
|
||||
)
|
||||
assert "SOP_CHECKLIST_GATE_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
|
||||
assert "SOP_TIER_CHECK_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
|
||||
"security refire evaluator must stay on read-scoped token"
|
||||
)
|
||||
|
||||
@@ -2,8 +2,6 @@ import importlib.util
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
SCRIPT = Path(__file__).resolve().parents[1] / "gitea-merge-queue.py"
|
||||
spec = importlib.util.spec_from_file_location("gitea_merge_queue", SCRIPT)
|
||||
mq = importlib.util.module_from_spec(spec)
|
||||
@@ -14,66 +12,38 @@ spec.loader.exec_module(mq)
|
||||
def test_latest_statuses_dedupes_by_context_newest_first():
|
||||
statuses = [
|
||||
{"context": "CI / all-required (pull_request)", "status": "failure"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "state": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "state": "success"},
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
]
|
||||
|
||||
latest = mq.latest_statuses_by_context(statuses)
|
||||
|
||||
assert latest["CI / all-required (pull_request)"]["status"] == "failure"
|
||||
assert latest["sop-checklist / all-items-acked (pull_request_target)"]["state"] == "success"
|
||||
assert latest["sop-checklist / all-items-acked (pull_request)"]["state"] == "success"
|
||||
|
||||
|
||||
def test_required_contexts_green_rejects_missing_and_pending():
|
||||
latest = mq.latest_statuses_by_context([
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "pending"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "pending"},
|
||||
])
|
||||
|
||||
ok, missing_or_bad = mq.required_contexts_green(
|
||||
latest,
|
||||
[
|
||||
"CI / all-required (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request_target)",
|
||||
"qa-review / approved (pull_request_target)",
|
||||
"sop-checklist / all-items-acked (pull_request)",
|
||||
"qa-review / approved (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
assert ok is False
|
||||
assert missing_or_bad == [
|
||||
"sop-checklist / all-items-acked (pull_request_target)=pending",
|
||||
"qa-review / approved (pull_request_target)=missing",
|
||||
"sop-checklist / all-items-acked (pull_request)=pending",
|
||||
"qa-review / approved (pull_request)=missing",
|
||||
]
|
||||
|
||||
|
||||
def test_required_contexts_green_rejects_volume_skipped():
|
||||
"""volume-skipped pending is a partial view, not a genuine soft-fail.
|
||||
|
||||
Per sop-checklist.py:1179-1187, volume_skipped posts pending with a
|
||||
'[volume-skipped]' prefix. The merge queue must NOT treat this as an
|
||||
acceptable soft-fail — the gate did not finish evaluating.
|
||||
"""
|
||||
latest = mq.latest_statuses_by_context([
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{
|
||||
"context": "sop-checklist / all-items-acked (pull_request_target)",
|
||||
"status": "pending",
|
||||
"description": "[volume-skipped] comment-cap=1000 hit; please file ...",
|
||||
},
|
||||
])
|
||||
|
||||
ok, missing_or_bad = mq.required_contexts_green(
|
||||
latest,
|
||||
[
|
||||
"CI / all-required (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request_target)",
|
||||
],
|
||||
)
|
||||
|
||||
assert ok is False
|
||||
assert "sop-checklist / all-items-acked (pull_request_target)=pending" in missing_or_bad
|
||||
|
||||
|
||||
def test_choose_next_pr_sorts_by_queue_label_timestamp_then_number():
|
||||
issues = [
|
||||
{
|
||||
@@ -113,13 +83,7 @@ def test_pr_needs_update_when_base_sha_absent_from_commits():
|
||||
|
||||
|
||||
def _ready_kwargs(**overrides):
|
||||
"""Default kwargs for a fully-ready merge; override per test.
|
||||
|
||||
Includes the uniform governance checks (qa-review, security-review,
|
||||
sop-checklist) as required contexts and green statuses, matching the
|
||||
behaviour of process_once which merges GOVERNANCE_REQUIRED_CONTEXTS
|
||||
with branch-protection contexts.
|
||||
"""
|
||||
"""Default kwargs for a fully-ready merge; override per test."""
|
||||
base = dict(
|
||||
main_status={
|
||||
"state": "success",
|
||||
@@ -127,19 +91,9 @@ def _ready_kwargs(**overrides):
|
||||
},
|
||||
pr_status={
|
||||
"state": "success",
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
],
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}],
|
||||
},
|
||||
required_contexts=[
|
||||
"CI / all-required (pull_request)",
|
||||
"qa-review / approved (pull_request_target)",
|
||||
"security-review / approved (pull_request_target)",
|
||||
"sop-checklist / all-items-acked (pull_request_target)",
|
||||
],
|
||||
required_contexts=["CI / all-required (pull_request)"],
|
||||
required_approvals=2,
|
||||
approvers={"agent-reviewer-cr2", "agent-researcher"},
|
||||
request_changes=[],
|
||||
@@ -158,72 +112,13 @@ def test_merge_decision_requires_main_green_pr_green_and_current_base():
|
||||
assert decision.force is False # no non-required reds present
|
||||
|
||||
|
||||
def test_behind_main_but_mergeable_pr_merges_directly():
|
||||
"""§SOP-22 (#2358): a behind-main but CONFLICT-FREE PR (mergeable is True)
|
||||
merges DIRECTLY — no update step. Branch protection does not require strict
|
||||
up-to-date, and calling /update would dismiss the genuine approvals
|
||||
(dismiss_stale_approvals), forcing re-review every tick (the throughput
|
||||
bottleneck). This replaces the old update-before-merge behavior."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=False, mergeable=True)
|
||||
)
|
||||
|
||||
assert decision.ready is True
|
||||
assert decision.action == "merge"
|
||||
|
||||
|
||||
def test_behind_main_and_not_mergeable_pr_updates():
|
||||
"""The /update path is reached ONLY when the PR is NOT mergeable AND its head
|
||||
lacks current main — refreshing the branch may resolve a behind-main
|
||||
non-conflict; a real conflict 409s and is held (#2352)."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=False, mergeable=False)
|
||||
)
|
||||
def test_merge_decision_updates_stale_pr_before_merge():
|
||||
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_has_current_base=False))
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "update"
|
||||
|
||||
|
||||
def test_current_base_but_not_mergeable_pr_waits():
|
||||
"""Up-to-date with main yet Gitea reports not-mergeable → genuine conflict
|
||||
against current main (or still computing). The queue cannot act: WAIT,
|
||||
never update (update would not help) and never merge (fail-closed)."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=True, mergeable=False)
|
||||
)
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
assert "not mergeable" in decision.reason
|
||||
|
||||
|
||||
def test_behind_main_and_mergeable_none_waits_not_update():
|
||||
"""§SOP-22 (CR2 #2374) — the churn-residual fix. A BEHIND-MAIN PR whose
|
||||
mergeability Gitea is STILL COMPUTING (mergeable is None) must WAIT, NOT take
|
||||
the /update path. The old code collapsed None→False, so a behind-main +
|
||||
None PR returned action="update" → /pulls/{n}/update → dismiss_stale_approvals
|
||||
→ the exact rebase-churn this change eliminates, fired during the compute
|
||||
window. None and False are now DISTINCT: None waits, False updates."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=False, mergeable=None)
|
||||
)
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait" # NOT "update" — no churn during compute
|
||||
assert "computed" in decision.reason
|
||||
|
||||
|
||||
def test_current_base_and_mergeable_none_waits():
|
||||
"""Up-to-date with main + mergeable None (still computing) → WAIT (unchanged
|
||||
fail-closed; just confirming None is never merged regardless of base)."""
|
||||
decision = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_has_current_base=True, mergeable=None)
|
||||
)
|
||||
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
|
||||
|
||||
def test_MergePermissionError_inherits_from_ApiError():
|
||||
assert issubclass(mq.MergePermissionError, mq.ApiError)
|
||||
|
||||
@@ -248,7 +143,7 @@ def test_genuine_approvals_counts_two_distinct_on_current_head():
|
||||
{"state": "APPROVED", "user": {"login": "agent-reviewer-cr2"},
|
||||
"official": True, "stale": False, "dismissed": False, "commit_id": "HEAD"},
|
||||
]
|
||||
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
|
||||
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
|
||||
assert approvers == {"agent-researcher", "agent-reviewer-cr2"}
|
||||
assert rc == []
|
||||
|
||||
@@ -265,7 +160,7 @@ def test_genuine_approvals_ignores_stale_dismissed_and_wrong_head():
|
||||
{"state": "APPROVED", "user": {"login": "agent-reviewer"},
|
||||
"official": True, "stale": False, "dismissed": False, "commit_id": "OLD"},
|
||||
]
|
||||
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
|
||||
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
|
||||
assert approvers == set()
|
||||
assert rc == []
|
||||
|
||||
@@ -279,7 +174,7 @@ def test_genuine_approvals_ignores_unofficial_and_outsiders():
|
||||
{"state": "APPROVED", "user": {"login": "hongming-codex-laptop"},
|
||||
"official": True, "stale": False, "dismissed": False, "commit_id": "HEAD"},
|
||||
]
|
||||
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
|
||||
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
|
||||
assert approvers == set()
|
||||
|
||||
|
||||
@@ -291,7 +186,7 @@ def test_genuine_approvals_latest_review_supersedes_earlier():
|
||||
{"state": "REQUEST_CHANGES", "user": {"login": "agent-reviewer-cr2"},
|
||||
"official": True, "stale": False, "dismissed": False, "commit_id": "HEAD"},
|
||||
]
|
||||
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
|
||||
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
|
||||
assert approvers == set()
|
||||
assert rc == ["agent-reviewer-cr2"]
|
||||
|
||||
@@ -314,56 +209,16 @@ def test_merge_blocked_when_insufficient_genuine_approvals():
|
||||
assert "insufficient genuine approvals" in decision.reason
|
||||
|
||||
|
||||
def test_governance_red_blocks_merge():
|
||||
# Uniform gate: qa-review, security-review, sop-checklist are ALWAYS
|
||||
# required. If any of them fail/pending, the PR is blocked.
|
||||
pr_status = {
|
||||
"state": "failure",
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "failure"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "pending"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "failure"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
assert "required contexts not green" in decision.reason
|
||||
|
||||
|
||||
def test_non_required_red_does_not_block_merge():
|
||||
# Uniform gate flip (CTO #2407): qa-review, security-review, sop-checklist
|
||||
# are REQUIRED for ALL PRs. A PR with these failing/pending must NOT be
|
||||
# force-mergeable, even if BP-required CI is green and approvals are genuine.
|
||||
# Required (CI) green; non-required governance reds present → still merge,
|
||||
# and force is set so force_merge bypasses ONLY those non-required reds.
|
||||
pr_status = {
|
||||
"state": "failure",
|
||||
"state": "failure", # combined polluted by non-required reds
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request)", "status": "failure"},
|
||||
{"context": "security-review / approved (pull_request)", "status": "pending"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "failure"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
|
||||
assert decision.ready is False
|
||||
assert decision.action == "wait"
|
||||
assert "required contexts not green" in decision.reason
|
||||
assert decision.force is False
|
||||
|
||||
|
||||
def test_non_required_advisory_red_does_not_block_merge():
|
||||
# Governance checks are green; only advisory non-required reds (Staging SaaS)
|
||||
# are present → PR is still mergeable with force_merge bypassing the advisory.
|
||||
pr_status = {
|
||||
"state": "failure", # combined polluted by advisory non-required reds
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-tier-check / tier-check (pull_request)", "status": "failure"},
|
||||
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
|
||||
],
|
||||
}
|
||||
@@ -467,14 +322,8 @@ def test_process_once_holds_pr_on_permanent_merge_error(monkeypatch):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
|
||||
@@ -540,14 +389,8 @@ def _fully_ready_process_once_monkeypatch(monkeypatch, mergeable, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
|
||||
@@ -632,131 +475,6 @@ def test_process_once_merges_when_mergeable_is_true(monkeypatch):
|
||||
assert calls["hold_label"] is None
|
||||
|
||||
|
||||
def test_process_once_behind_main_mergeable_none_waits_no_update(monkeypatch):
|
||||
"""§SOP-22 (CR2 #2374) — end-to-end churn-residual regression. A BEHIND-MAIN
|
||||
PR (commits do NOT contain main_sha) whose mergeability Gitea is STILL
|
||||
COMPUTING (mergeable=None) must WAIT: process_once returns 0 and NEVER calls
|
||||
update_pull (which dismisses genuine approvals via dismiss_stale_approvals)
|
||||
NOR merge_pull NOR hold. The old None→False collapse routed this exact case
|
||||
into the /update path → approval-dismissing rebase churn during the compute
|
||||
window. This proves the durable churn elimination: no update, approvals
|
||||
preserved, re-checked next tick."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=None, calls=calls)
|
||||
# Make the head BEHIND main: commits do NOT contain main_sha. This is the
|
||||
# case the bug missed (the prior None test had current base, masking it).
|
||||
behind_head = "a" * 40
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["updated"] is False # NO /update → approvals NOT dismissed
|
||||
assert calls["merge_attempts"] == 0 # never merge on an unknown
|
||||
assert calls["hold_label"] is None # transient → not held, retried next tick
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# §SOP-22: DIRECT-MERGE throughput fix (#2358). A conflict-free 2-genuine PR
|
||||
# merges WITHOUT a pre-merge /update call, so its approvals are NOT dismissed by
|
||||
# dismiss_stale_approvals. The merge bar (2-genuine-on-current-head +
|
||||
# BP-required green + mergeable + no RC + opt-out) is UNCHANGED; only the
|
||||
# unnecessary update-before-merge churn is removed. The /update path survives
|
||||
# for the genuine case it is needed (not-mergeable + behind-main), where a real
|
||||
# conflict 409s and is held per #2352. mergeable=None stays fail-closed.
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_process_once_merges_conflict_free_pr_without_update(monkeypatch):
|
||||
"""§SOP-22(a) — the core throughput fix. A conflict-free, fully-approved PR
|
||||
merges WITHOUT update_pull ever being called. The old behavior called
|
||||
/update first whenever the head lacked current main, which dismissed the 2
|
||||
genuine approvals (dismiss_stale_approvals) and forced re-review every tick.
|
||||
Assert update_pull is NOT invoked and merge_pull IS invoked."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
|
||||
# Make the head BEHIND main: commits do NOT contain main_sha. Under the old
|
||||
# logic this alone forced an update_pull; under the fix it merges directly.
|
||||
head_sha = "a" * 40
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merge_attempts"] == 1 # merged directly
|
||||
assert calls["updated"] is False # NO update_pull → approvals NOT dismissed
|
||||
assert calls["hold_label"] is None
|
||||
|
||||
|
||||
def test_process_once_behind_main_conflict_free_merges_directly(monkeypatch):
|
||||
"""§SOP-22(b) — explicit behind-main + conflict-free case: it still merges
|
||||
directly (branch protection does not require strict up-to-date)."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
|
||||
behind_head = "a" * 40
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merge_attempts"] == 1
|
||||
assert calls["updated"] is False
|
||||
|
||||
|
||||
def test_process_once_pauses_when_main_not_green_no_direct_merge(monkeypatch):
|
||||
"""§SOP-22 backstop — the serialized safety that makes direct-merge safe:
|
||||
when main's required push contexts are NOT green (e.g. a prior direct merge
|
||||
introduced a semantic main-break caught by post-merge main CI), the queue
|
||||
PAUSES — it does NOT merge the next PR onto an unverified/red main."""
|
||||
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
|
||||
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
|
||||
main_sha = "b" * 40
|
||||
|
||||
def red_main_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "failure",
|
||||
"statuses": [{"context": "CI / all-required (push)", "status": "failure"}]}
|
||||
return {"state": "success",
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", red_main_combined)
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merge_attempts"] == 0 # paused — no merge onto red main
|
||||
assert calls["updated"] is False
|
||||
|
||||
|
||||
def test_direct_merge_bar_unchanged_behind_main(monkeypatch):
|
||||
"""§SOP-22(d) — the merge bar is UNCHANGED on the new direct-merge path. A
|
||||
behind-main + conflict-free PR is still rejected (no merge) when ANY gate
|
||||
fails: insufficient genuine approvals, red required context, open
|
||||
REQUEST_CHANGES, or opt-out label. Direct-merge removes the update churn, it
|
||||
does NOT weaken the bar — fail-closed on every gate."""
|
||||
head_sha = "a" * 40
|
||||
behind_main = dict(pr_has_current_base=False, mergeable=True)
|
||||
|
||||
# <2 genuine approvals → wait, not merge.
|
||||
d = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(approvers={"agent-researcher"}, **behind_main)
|
||||
)
|
||||
assert d.action == "wait" and d.ready is False
|
||||
|
||||
# Red required context → wait, not merge.
|
||||
red_required = {"state": "failure", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "failure"}]}
|
||||
d = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(pr_status=red_required, **behind_main)
|
||||
)
|
||||
assert d.action == "wait" and d.ready is False
|
||||
|
||||
# Open REQUEST_CHANGES on current head → wait, not merge.
|
||||
d = mq.evaluate_merge_readiness(
|
||||
**_ready_kwargs(request_changes=["agent-reviewer-cr2"], **behind_main)
|
||||
)
|
||||
assert d.action == "wait" and d.ready is False
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Fix 3: status fetch is fail-closed (failed fetch != green)
|
||||
# --------------------------------------------------------------------------
|
||||
@@ -814,61 +532,6 @@ def test_status_fetch_failure_is_fail_closed(monkeypatch):
|
||||
assert merged["called"] is False
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Pagination: api_paginated loops pages and is fail-closed on page errors
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
def test_api_paginated_loops_pages_until_partial(monkeypatch):
|
||||
"""api_paginated fetches all pages and stops when a page is < page_size."""
|
||||
calls = []
|
||||
|
||||
def fake_api(method, path, *, query=None, **kw):
|
||||
page = int((query or {}).get("page", "1"))
|
||||
limit = int((query or {}).get("limit", "50"))
|
||||
calls.append((page, limit))
|
||||
if page == 1:
|
||||
return 200, [{"number": 1}, {"number": 2}]
|
||||
if page == 2:
|
||||
return 200, [{"number": 3}]
|
||||
return 200, []
|
||||
|
||||
monkeypatch.setattr(mq, "api", fake_api)
|
||||
results = mq.api_paginated("GET", "/repos/o/r/issues", page_size=2)
|
||||
assert len(results) == 3
|
||||
assert results[0]["number"] == 1
|
||||
assert results[1]["number"] == 2
|
||||
assert results[2]["number"] == 3
|
||||
assert calls == [(1, 2), (2, 2)]
|
||||
|
||||
|
||||
def test_api_paginated_raises_on_non_list(monkeypatch):
|
||||
"""A page that returns a dict instead of list is an error."""
|
||||
def fake_api(method, path, *, query=None, **kw):
|
||||
return 200, {"message": "not found"}
|
||||
|
||||
monkeypatch.setattr(mq, "api", fake_api)
|
||||
with pytest.raises(mq.ApiError):
|
||||
mq.api_paginated("GET", "/repos/o/r/issues")
|
||||
|
||||
|
||||
def test_get_combined_status_propagates_paginated_statuses_error(monkeypatch):
|
||||
"""If the paginated /statuses enrichment raises, the error propagates
|
||||
(fail-closed — we do NOT silently fall back to an incomplete status set)."""
|
||||
monkeypatch.setattr(mq, "OWNER", "o")
|
||||
monkeypatch.setattr(mq, "NAME", "r")
|
||||
|
||||
def fake_api(method, path, *, query=None, **kw):
|
||||
if path.endswith("/status"):
|
||||
return 200, {"state": "success", "statuses": [{"context": "c1", "status": "success", "id": 1}]}
|
||||
if path.endswith("/statuses"):
|
||||
raise mq.ApiError("GET /statuses -> HTTP 502")
|
||||
raise mq.ApiError(f"unexpected {path}")
|
||||
|
||||
monkeypatch.setattr(mq, "api", fake_api)
|
||||
with pytest.raises(mq.ApiError, match="GET /statuses"):
|
||||
mq.get_combined_status("a" * 40)
|
||||
|
||||
|
||||
def test_process_once_holds_tick_when_branch_protection_unavailable(monkeypatch):
|
||||
"""BP enumeration failure → HOLD the whole tick (no merge, rc 0)."""
|
||||
merged = {"called": False}
|
||||
@@ -951,30 +614,20 @@ def _stale_pr_update_409_monkeypatch(monkeypatch, queued_issues, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
# Scan-loop process_once enumerates candidates via list_candidate_issues.
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: queued_issues)
|
||||
monkeypatch.setattr(mq, "get_pull", lambda n: {
|
||||
"state": "open", "number": n, "mergeable": False,
|
||||
"state": "open", "number": n, "mergeable": True,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head_sha, "repo_id": 1},
|
||||
"labels": [{"name": "merge-queue"}],
|
||||
})
|
||||
# NOTE: mergeable is False (real conflict) AND commits do NOT contain
|
||||
# main_sha → pr_has_current_base is False → decision.action == "update".
|
||||
# Under the #2358 direct-merge fix the update path is reached ONLY when the
|
||||
# PR is NOT mergeable; a mergeable=True behind-main PR would merge directly,
|
||||
# so this fixture sets mergeable=False to exercise the #2352 409-on-update
|
||||
# hold path.
|
||||
# NOTE: commits do NOT contain main_sha → pr_has_current_base is False →
|
||||
# decision.action == "update".
|
||||
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
|
||||
monkeypatch.setattr(mq, "get_pull_reviews", lambda n: [
|
||||
{"state": "APPROVED", "user": {"login": "agent-researcher"},
|
||||
@@ -1203,7 +856,7 @@ def test_list_candidate_issues_omits_label_filter_when_auto_discover(monkeypatch
|
||||
assert captured["query"].get("type") == "pulls"
|
||||
|
||||
mq.list_candidate_issues(auto_discover=False)
|
||||
assert captured["query"].get("label") == "merge-queue"
|
||||
assert captured["query"].get("labels") == "merge-queue"
|
||||
|
||||
|
||||
def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
|
||||
@@ -1226,16 +879,8 @@ def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == main_sha:
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (push)", "status": "success"},
|
||||
]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
|
||||
monkeypatch.setattr(mq, "get_pull", lambda n: dict(pr_payload, number=n))
|
||||
@@ -1416,14 +1061,8 @@ def _wire_multi_candidate_process_once(monkeypatch, *, issues, pulls, reviews, c
|
||||
monkeypatch.setattr(mq, "get_branch_head", lambda branch: MAIN_SHA)
|
||||
|
||||
def fake_combined(sha):
|
||||
if sha == MAIN_SHA:
|
||||
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
return {"state": "success", "statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
]}
|
||||
ctx = "CI / all-required (push)" if sha == MAIN_SHA else "CI / all-required (pull_request)"
|
||||
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
|
||||
@@ -1555,12 +1194,7 @@ def test_hol_unready_red_required_ci_is_skipped_for_ready_pr(monkeypatch):
|
||||
"statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
|
||||
state = "failure" if sha == red_head else "success"
|
||||
return {"state": state,
|
||||
"statuses": [
|
||||
{"context": "CI / all-required (pull_request)", "status": state},
|
||||
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "security-review / approved (pull_request_target)", "status": "success"},
|
||||
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
|
||||
]}
|
||||
"statuses": [{"context": "CI / all-required (pull_request)", "status": state}]}
|
||||
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
|
||||
|
||||
rc = mq.process_once(dry_run=False)
|
||||
@@ -1655,126 +1289,3 @@ def test_process_once_defensive_skip_when_pull_payload_opted_out(monkeypatch):
|
||||
|
||||
assert rc == 0
|
||||
assert calls["merged"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# readiness-enumeration + post-batch summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_enumerate_readiness_evaluates_all_candidates(monkeypatch):
|
||||
"""enumerate_readiness returns every candidate's state, not stopping at
|
||||
the first actionable one."""
|
||||
old_head, new_head = "a" * 40, "c" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(500, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(501, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
500: {"state": "open", "mergeable": False, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": old_head, "repo_id": 1}, "labels": []},
|
||||
501: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": new_head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={500: _two_approvals(old_head), 501: _two_approvals(new_head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
assert len(entries) == 2
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[500].decision is not None
|
||||
assert by_num[500].decision.ready is False
|
||||
assert by_num[501].decision is not None
|
||||
assert by_num[501].decision.ready is True
|
||||
|
||||
|
||||
def test_enumerate_readiness_includes_ineligible_pr(monkeypatch):
|
||||
"""enumerate_readiness marks fork / wrong-base PRs as ineligible
|
||||
(decision=None) while still evaluating the rest."""
|
||||
head = "a" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(600, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(601, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
600: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 2}, "labels": []}, # fork
|
||||
601: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={600: _two_approvals(head), 601: _two_approvals(head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[600].decision is None
|
||||
assert "not merge-eligible" in by_num[600].reason
|
||||
assert by_num[601].decision is not None
|
||||
assert by_num[601].decision.ready is True
|
||||
|
||||
|
||||
def test_enumerate_readiness_fail_closed_on_api_error(monkeypatch):
|
||||
"""If get_pull raises for one candidate, that candidate is recorded as
|
||||
unverifiable; other candidates are still evaluated."""
|
||||
head = "a" * 40
|
||||
_wire_multi_candidate_process_once(
|
||||
monkeypatch,
|
||||
issues=[
|
||||
_issue(700, labels=[], created="2026-06-01T01:00:00Z"),
|
||||
_issue(701, labels=[], created="2026-06-01T02:00:00Z"),
|
||||
],
|
||||
pulls={
|
||||
700: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
701: {"state": "open", "mergeable": True, "draft": False,
|
||||
"base": {"ref": "main", "repo_id": 1},
|
||||
"head": {"sha": head, "repo_id": 1}, "labels": []},
|
||||
},
|
||||
reviews={700: _two_approvals(head), 701: _two_approvals(head)},
|
||||
calls={},
|
||||
)
|
||||
|
||||
original_get_pull = mq.get_pull
|
||||
def failing_get_pull(n):
|
||||
if n == 700:
|
||||
raise mq.ApiError("simulated API failure")
|
||||
return original_get_pull(n)
|
||||
monkeypatch.setattr(mq, "get_pull", failing_get_pull)
|
||||
|
||||
entries = mq.enumerate_readiness(dry_run=False)
|
||||
|
||||
by_num = {e.pr_number: e for e in entries}
|
||||
assert by_num[700].decision is None
|
||||
assert "unverifiable" in by_num[700].reason
|
||||
assert by_num[701].decision is not None
|
||||
assert by_num[701].decision.ready is True
|
||||
|
||||
|
||||
def test_print_post_batch_summary_counts_correctly(capsys):
|
||||
entries = [
|
||||
mq.ReadinessEntry(pr_number=1, decision=mq.MergeDecision(True, "merge", "ready"), reason="ready"),
|
||||
mq.ReadinessEntry(pr_number=2, decision=mq.MergeDecision(False, "wait", "CI red"), reason="CI red"),
|
||||
mq.ReadinessEntry(pr_number=3, decision=None, reason="draft"),
|
||||
]
|
||||
mq.print_post_batch_summary(entries)
|
||||
captured = capsys.readouterr()
|
||||
out = captured.out
|
||||
assert "total_candidates=3" in out
|
||||
assert "ready=1" in out
|
||||
assert "waiting=1" in out
|
||||
assert "ineligible/unverifiable=1" in out
|
||||
assert "PR #1: state=ready" in out
|
||||
assert "PR #2: state=waiting" in out
|
||||
assert "PR #3: state=ineligible" in out
|
||||
|
||||
@@ -320,10 +320,10 @@ class TestVerifyFlip(unittest.TestCase):
|
||||
self.assertEqual(len(verdict["fail_runs"]), 1)
|
||||
self.assertEqual(verdict["fail_runs"][0]["status"], "failure")
|
||||
|
||||
def test_unreadable_log_on_success_blocks(self):
|
||||
# Fail-closed: log fetch 404 (None) on a success status is a
|
||||
# potential Quirk #10 mask — we cannot verify it's genuine, so
|
||||
# we block the flip rather than allowing it.
|
||||
def test_unreadable_log_warns_not_blocks(self):
|
||||
# Acceptance test #5: log fetch 404 (None) → warn, not block.
|
||||
# Status is `success`, log is None — we can't tell, so we warn
|
||||
# and allow.
|
||||
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1"]):
|
||||
with mock.patch.object(
|
||||
lpfc, "combined_status",
|
||||
@@ -332,8 +332,7 @@ class TestVerifyFlip(unittest.TestCase):
|
||||
with mock.patch.object(lpfc, "fetch_log", return_value=None):
|
||||
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
|
||||
self.assertEqual(verdict["fail_runs"], [])
|
||||
self.assertEqual(len(verdict["masked_runs"]), 1)
|
||||
self.assertIn("log unavailable", verdict["masked_runs"][0]["samples"][0])
|
||||
self.assertEqual(verdict["masked_runs"], [])
|
||||
self.assertTrue(any("log unavailable" in w for w in verdict["warnings"]))
|
||||
|
||||
def test_unreadable_log_with_failure_status_still_blocks(self):
|
||||
@@ -350,9 +349,9 @@ class TestVerifyFlip(unittest.TestCase):
|
||||
self.assertEqual(len(verdict["fail_runs"]), 1)
|
||||
self.assertIn("log unavailable", verdict["fail_runs"][0]["samples"][0])
|
||||
|
||||
def test_zero_runs_history_blocks(self):
|
||||
# No commits with a matching context — cannot verify the flip.
|
||||
# Fail-closed: treat as masked rather than allowing.
|
||||
def test_zero_runs_history_warns_allows(self):
|
||||
# No commits with a matching context — newly added workflow.
|
||||
# Allow with warning.
|
||||
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1", "sha2"]):
|
||||
with mock.patch.object(
|
||||
lpfc, "combined_status",
|
||||
@@ -361,32 +360,17 @@ class TestVerifyFlip(unittest.TestCase):
|
||||
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
|
||||
self.assertEqual(verdict["checked_commits"], 0)
|
||||
self.assertEqual(verdict["fail_runs"], [])
|
||||
self.assertEqual(len(verdict["masked_runs"]), 1)
|
||||
self.assertIn("cannot verify flip", verdict["masked_runs"][0]["samples"][0])
|
||||
self.assertEqual(verdict["masked_runs"], [])
|
||||
self.assertTrue(any("no runs of" in w for w in verdict["warnings"]))
|
||||
|
||||
def test_zero_commits_blocks(self):
|
||||
# Empty branch (newly created repo, e.g.). Fail-closed: block.
|
||||
def test_zero_commits_warns_allows(self):
|
||||
# Empty branch (newly created repo, e.g.). Allow with warning.
|
||||
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=[]):
|
||||
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
|
||||
self.assertEqual(verdict["checked_commits"], 0)
|
||||
self.assertEqual(verdict["fail_runs"], [])
|
||||
self.assertEqual(len(verdict["masked_runs"]), 1)
|
||||
self.assertIn("cannot verify flip", verdict["masked_runs"][0]["samples"][0])
|
||||
|
||||
def test_combined_status_api_error_blocks(self):
|
||||
# Fail-closed: combined_status ApiError means the check history is
|
||||
# unreadable — we cannot verify the flip, so block as masked.
|
||||
with mock.patch.object(lpfc, "recent_commits_on_branch", return_value=["sha1"]):
|
||||
with mock.patch.object(
|
||||
lpfc, "combined_status",
|
||||
side_effect=lpfc.ApiError("GET /statuses/sha → HTTP 500"),
|
||||
):
|
||||
verdict = lpfc.verify_flip(FLIP_FIXTURE, "main", 5)
|
||||
self.assertEqual(verdict["checked_commits"], 0)
|
||||
self.assertEqual(verdict["fail_runs"], [])
|
||||
# One masked_run from the ApiError, one from zero checked_commits.
|
||||
self.assertEqual(len(verdict["masked_runs"]), 2)
|
||||
self.assertIn("API error", verdict["masked_runs"][0]["samples"][0])
|
||||
self.assertEqual(verdict["masked_runs"], [])
|
||||
self.assertTrue(any("no recent commits" in w for w in verdict["warnings"]))
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
@@ -17,7 +17,7 @@ wd.REPO = "molecule-ai/molecule-core"
|
||||
wd.OWNER = "molecule-ai"
|
||||
wd.NAME = "molecule-core"
|
||||
wd.WATCH_BRANCH = "main"
|
||||
wd.RED_LABEL = "ci-bp-drift"
|
||||
wd.RED_LABEL = "tier:high"
|
||||
wd.API = "https://git.example.com/api/v1"
|
||||
|
||||
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
# Anti-regression gate for #2403: fail if any SOP tier artifact reappears.
|
||||
|
||||
cd "$(dirname "$0")/../../.."
|
||||
|
||||
fail=0
|
||||
|
||||
# 1. Deleted workflow files must stay deleted
|
||||
for f in .gitea/workflows/sop-tier-check.yml .gitea/workflows/sop-tier-refire.yml; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 2. Deleted script files must stay deleted
|
||||
for f in .gitea/scripts/sop-tier-check.sh .gitea/scripts/sop-tier-refire.sh; do
|
||||
if [ -e "$f" ]; then
|
||||
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 3. No tier branching logic in gate_check.py
|
||||
if grep -qE '_get_pr_tier|TIER_AGENTS' tools/gate-check-v3/gate_check.py; then
|
||||
echo "FAIL: tier branching reappeared in gate_check.py" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 4. No _is_tier_low_pending_ok in merge queue
|
||||
if grep -q '_is_tier_low_pending_ok' .gitea/scripts/gitea-merge-queue.py; then
|
||||
echo "FAIL: tier soft-fail reappeared in gitea-merge-queue.py" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 5. No sop-tier-check context references in workflow YAML
|
||||
if grep -rI --exclude-dir='__pycache__' 'sop-tier-check' .gitea/workflows/; then
|
||||
echo "FAIL: sop-tier-check context reappeared in workflows" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 6. No SOP_TIER_CHECK_TOKEN references in workflow YAML or scripts
|
||||
if grep -rI --exclude-dir='__pycache__' --exclude='test_no_tier_regression.sh' 'SOP_TIER_CHECK_TOKEN' .gitea/workflows/ .gitea/scripts/; then
|
||||
echo "FAIL: SOP_TIER_CHECK_TOKEN reference reappeared (use SOP_CHECKLIST_GATE_TOKEN)" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
# 7. qa-review and security-review must have labeled/unlabeled triggers (#2139)
|
||||
for f in .gitea/workflows/qa-review.yml .gitea/workflows/security-review.yml; do
|
||||
if ! grep -q 'labeled, unlabeled' "$f"; then
|
||||
echo "FAIL: $f missing labeled/unlabeled triggers (#2139)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
# 8. qa-review and security-review must NOT have review.state guard (#2159)
|
||||
for f in .gitea/workflows/qa-review.yml .gitea/workflows/security-review.yml; do
|
||||
if grep -q 'github.event.review.state' "$f"; then
|
||||
echo "FAIL: $f has review.state guard reappeared (#2159)" >&2
|
||||
fail=1
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$fail" -eq 1 ]; then
|
||||
echo "TIER_REGRESSION_DETECTED" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: no tier regression detected"
|
||||
@@ -105,25 +105,16 @@ def test_build_plan_disable_flag_short_circuits_before_credentials():
|
||||
assert plan["disabled_reason"] == "PROD_AUTO_DEPLOY_DISABLED=true"
|
||||
|
||||
|
||||
def test_latest_status_for_context_picks_newest_by_id_regardless_of_order():
|
||||
# The exhaustively-paginated /statuses list is ascending id order
|
||||
# (oldest-first), the opposite of the combined /status view. The selector
|
||||
# must collapse duplicate context rows to the NEWEST (max id) so a stale
|
||||
# earlier run never shadows the current result, whichever way they arrive.
|
||||
def test_latest_status_for_context_uses_first_matching_status():
|
||||
statuses = [
|
||||
{"id": 10, "context": "CI / all-required (push)", "status": "pending"},
|
||||
{"id": 11, "context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"id": 12, "context": "CI / all-required (push)", "status": "success"},
|
||||
{"context": "CI / all-required (push)", "status": "pending"},
|
||||
{"context": "CI / all-required (pull_request)", "status": "success"},
|
||||
{"context": "CI / all-required (push)", "status": "success"},
|
||||
]
|
||||
|
||||
latest = prod.latest_status_for_context(statuses, "CI / all-required (push)")
|
||||
|
||||
assert latest == {"id": 12, "context": "CI / all-required (push)", "status": "success"}
|
||||
|
||||
# Same rows shuffled (newest-first, as the combined view would deliver)
|
||||
# must still resolve to the same newest row.
|
||||
latest_rev = prod.latest_status_for_context(list(reversed(statuses)), "CI / all-required (push)")
|
||||
assert latest_rev == {"id": 12, "context": "CI / all-required (push)", "status": "success"}
|
||||
assert latest == {"context": "CI / all-required (push)", "status": "pending"}
|
||||
|
||||
|
||||
def test_ci_context_state_handles_missing_and_gitea_status_key():
|
||||
@@ -621,123 +612,3 @@ def test_superseded_by_none_for_latest_job_so_it_still_rolls(monkeypatch):
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /statuses pagination — required-context SUCCESS on page 2+ must be FOUND,
|
||||
# genuinely-absent context must STILL fail-closed (no fail-open).
|
||||
# Regression for the single-page-status bug (#2440-family, pagination RCA):
|
||||
# the combined /status view caps `statuses` at ~30, so on a high-churn commit
|
||||
# the still-current required-context row is pushed past page 1 and the reader
|
||||
# falsely reports it `missing`.
|
||||
# ---------------------------------------------------------------------------
|
||||
def _paged_statuses_stub(pages):
|
||||
"""Return a fake _api_json_list that serves `pages` keyed by ?page=N."""
|
||||
def fake(url, _token):
|
||||
# url looks like .../statuses?page=N&limit=100
|
||||
page = 1
|
||||
for part in url.split("?", 1)[-1].split("&"):
|
||||
if part.startswith("page="):
|
||||
page = int(part.split("=", 1)[1])
|
||||
return pages.get(page, [])
|
||||
return fake
|
||||
|
||||
|
||||
def test_fetch_all_statuses_finds_required_success_on_page_two(monkeypatch):
|
||||
# Page 1 is a full 100 rows of unrelated/older churn; the required-context
|
||||
# SUCCESS only appears on page 2. A single-page reader would miss it.
|
||||
page1 = [
|
||||
{"id": i, "context": f"noise-{i} (push)", "status": "pending"}
|
||||
for i in range(100)
|
||||
]
|
||||
page2 = [
|
||||
{"id": 200, "context": "CI / all-required (push)", "status": "success"},
|
||||
{"id": 201, "context": "Secret scan / Scan diff for credential-shaped strings (push)",
|
||||
"status": "success"},
|
||||
]
|
||||
monkeypatch.setattr(prod, "_api_json_list", _paged_statuses_stub({1: page1, 2: page2}))
|
||||
|
||||
rows = prod.fetch_all_statuses("git.moleculesai.app", "molecule-ai/molecule-core", "a" * 40, "tok")
|
||||
# Must have walked to page 2 and accumulated every row.
|
||||
assert len(rows) == 102
|
||||
assert prod.ci_context_state(rows, "CI / all-required (push)") == "success"
|
||||
assert (
|
||||
prod.ci_context_state(
|
||||
rows, "Secret scan / Scan diff for credential-shaped strings (push)"
|
||||
)
|
||||
== "success"
|
||||
)
|
||||
|
||||
|
||||
def test_fetch_all_statuses_genuinely_absent_context_stays_missing(monkeypatch):
|
||||
# The required context is on NO page → fail-closed: ci_context_state must
|
||||
# report "missing", which context_is_satisfied() rejects → gate stays shut.
|
||||
page1 = [
|
||||
{"id": i, "context": f"noise-{i} (push)", "status": "success"}
|
||||
for i in range(100)
|
||||
]
|
||||
page2 = [{"id": 200, "context": "some-other (push)", "status": "success"}]
|
||||
monkeypatch.setattr(prod, "_api_json_list", _paged_statuses_stub({1: page1, 2: page2}))
|
||||
|
||||
rows = prod.fetch_all_statuses("git.moleculesai.app", "molecule-ai/molecule-core", "b" * 40, "tok")
|
||||
state = prod.ci_context_state(rows, "CI / all-required (push)")
|
||||
assert state == "missing"
|
||||
assert prod.context_is_satisfied(state) is False
|
||||
|
||||
|
||||
def test_fetch_all_statuses_fail_closed_on_page_error(monkeypatch):
|
||||
# A page that raises (unreadable) must propagate, never silently truncate
|
||||
# the scan and let the caller treat a partial list as complete.
|
||||
def boom(url, _token):
|
||||
if "page=2" in url:
|
||||
raise RuntimeError("GET .../statuses?page=2 -> HTTP 502: bad gateway")
|
||||
return [{"id": i, "context": f"n-{i}", "status": "success"} for i in range(100)]
|
||||
|
||||
monkeypatch.setattr(prod, "_api_json_list", boom)
|
||||
try:
|
||||
prod.fetch_all_statuses("h", "r", "c" * 40, "tok")
|
||||
except RuntimeError as exc:
|
||||
assert "502" in str(exc)
|
||||
else:
|
||||
raise AssertionError("expected page-2 error to propagate (fail-closed)")
|
||||
|
||||
|
||||
def test_wait_for_ci_context_succeeds_when_required_status_is_past_page_one(monkeypatch):
|
||||
# End-to-end: the gate reads the EXHAUSTIVE list, so a required SUCCESS that
|
||||
# only exists past page 1 lets the deploy proceed instead of timing out.
|
||||
full = [
|
||||
{"id": i, "context": f"noise-{i} (push)", "status": "success"}
|
||||
for i in range(100)
|
||||
] + [
|
||||
{"id": 500, "context": "CI / all-required (push)", "status": "success"},
|
||||
{"id": 501, "context": "Secret scan / Scan diff for credential-shaped strings (push)",
|
||||
"status": "success"},
|
||||
]
|
||||
monkeypatch.setattr(prod, "fetch_all_statuses", lambda *a, **k: full)
|
||||
result = prod.wait_for_ci_context(
|
||||
{"GITHUB_SHA": "d" * 40, "GITEA_TOKEN": "tok", "CI_STATUS_TIMEOUT_SECONDS": "30"}
|
||||
)
|
||||
assert result == "success"
|
||||
|
||||
|
||||
def test_wait_for_ci_context_times_out_fail_closed_when_required_absent(monkeypatch):
|
||||
# Genuinely-absent required context across all pages → never satisfied →
|
||||
# the gate times out rather than green-lighting the deploy (no fail-open).
|
||||
present_but_irrelevant = [
|
||||
{"id": 500, "context": "some-other (push)", "status": "success"},
|
||||
]
|
||||
monkeypatch.setattr(prod, "fetch_all_statuses", lambda *a, **k: present_but_irrelevant)
|
||||
# Zero timeout + 0 interval → single poll then TimeoutError.
|
||||
try:
|
||||
prod.wait_for_ci_context(
|
||||
{
|
||||
"GITHUB_SHA": "e" * 40,
|
||||
"GITEA_TOKEN": "tok",
|
||||
"CI_STATUS_TIMEOUT_SECONDS": "1",
|
||||
"CI_STATUS_POLL_INTERVAL_SECONDS": "1",
|
||||
}
|
||||
)
|
||||
except TimeoutError as exc:
|
||||
assert "missing" in str(exc)
|
||||
else:
|
||||
raise AssertionError("expected fail-closed TimeoutError, not a satisfied gate")
|
||||
|
||||
@@ -25,11 +25,6 @@
|
||||
# T20 — ai-sop-ack APPROVED review excluded from security-review gate
|
||||
# T21 — stale-head APPROVED review → exit 1 (commit_id mismatch)
|
||||
# T22 — missing/non-official APPROVED review → exit 1 (official != true)
|
||||
# T23 — missing-commit_id APPROVED review → exit 1 (SEV-1 internal#812
|
||||
# fail-closed contract: a missing/empty commit_id is REJECTED, not
|
||||
# silently accepted as "older Gitea row" the way the pre-fix
|
||||
# gitea-merge-queue.py did. Closes the spoof-bug surface that
|
||||
# #843 had.)
|
||||
#
|
||||
# Hostile-self-review (per feedback_assert_exact_not_substring):
|
||||
# this test MUST FAIL if the script is absent. Verified by running
|
||||
@@ -432,22 +427,6 @@ T22_RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
assert_eq "T22 exit code 1 (missing official rejected)" "1" "$T22_RC"
|
||||
assert_contains "T22 no candidates error" "no candidates from reviews API or issue comments" "$T22_OUT"
|
||||
|
||||
# T23 — missing-commit_id APPROVED review must be rejected.
|
||||
# SEV-1 internal#812 (supersedes closed internal#843). A review with NO
|
||||
# commit_id field is the spoof-bug signature: a real reviewer cannot
|
||||
# have submitted against a commit that doesn't exist. The fail-closed
|
||||
# SSOT must REJECT — the pre-fix gitea-merge-queue.py silently accepted
|
||||
# these (the "older Gitea row" escape hatch), which is the exact surface
|
||||
# that closed #843 had. The Python unit tests in
|
||||
# test_approval_validator.py cover the predicate at the unit level;
|
||||
# this T23 covers the bash + jq pipeline end-to-end.
|
||||
echo
|
||||
echo "== T23 missing commit_id APPROVED review rejected (SEV-1 fail-closed) =="
|
||||
T23_OUT=$(run_review_check "T23_missing_commit_id")
|
||||
T23_RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
assert_eq "T23 exit code 1 (missing commit_id rejected)" "1" "$T23_RC"
|
||||
assert_contains "T23 no candidates error" "no candidates from reviews API or issue comments" "$T23_OUT"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
# - compute_ack_state (self-ack rejected, team probe applied, revoke
|
||||
# invalidates own prior ack, peer's ack survives unrevoked)
|
||||
# - render_status (state + description format)
|
||||
# - is_high_risk (label-driven, default fallback)
|
||||
# - get_tier_mode (label-driven, default fallback)
|
||||
# - load_config (default config parses cleanly with both PyYAML and
|
||||
# the bundled minimal parser)
|
||||
#
|
||||
@@ -432,6 +432,37 @@ class TestRenderStatus(unittest.TestCase):
|
||||
self.assertIn("body-unfilled", desc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_tier_mode
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetTierMode(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.cfg = sop.load_config(CONFIG_PATH)
|
||||
|
||||
def test_tier_high_is_hard(self):
|
||||
pr = {"labels": [{"name": "tier:high"}, {"name": "area:ci"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_tier_medium_is_hard(self):
|
||||
pr = {"labels": [{"name": "tier:medium"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_tier_low_is_soft(self):
|
||||
pr = {"labels": [{"name": "tier:low"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "soft")
|
||||
|
||||
def test_no_tier_label_defaults_to_hard(self):
|
||||
# Per feedback_fix_root_not_symptom — never silently lower the bar.
|
||||
pr = {"labels": [{"name": "area:ci"}]}
|
||||
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
|
||||
|
||||
def test_no_labels_defaults_to_hard(self):
|
||||
self.assertEqual(sop.get_tier_mode({"labels": []}, self.cfg), "hard")
|
||||
self.assertEqual(sop.get_tier_mode({}, self.cfg), "hard")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# load_config
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -456,6 +487,13 @@ class TestLoadConfig(unittest.TestCase):
|
||||
},
|
||||
)
|
||||
|
||||
def test_default_config_tier_mode_shape(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:high"], "hard")
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:medium"], "hard")
|
||||
self.assertEqual(cfg["tier_failure_mode"]["tier:low"], "soft")
|
||||
self.assertEqual(cfg["default_mode"], "hard")
|
||||
|
||||
def test_each_item_has_required_fields(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
for it in cfg["items"]:
|
||||
@@ -589,7 +627,7 @@ class TestComputeNaState(unittest.TestCase):
|
||||
class TestIsHighRisk(unittest.TestCase):
|
||||
"""The high-risk predicate decides which required_teams list applies.
|
||||
|
||||
Predicate: any label in cfg.high_risk_labels.
|
||||
Predicate: tier:high label OR any label in cfg.high_risk_labels.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
@@ -599,8 +637,23 @@ class TestIsHighRisk(unittest.TestCase):
|
||||
pr = {"labels": []}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_high_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "tier:high"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_low_is_default_class(self):
|
||||
pr = {"labels": [{"name": "tier:low"}]}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_tier_medium_is_default_class(self):
|
||||
# tier:medium alone is NOT high-risk (Option C — medium routes
|
||||
# to the wider engineers OR-set).
|
||||
pr = {"labels": [{"name": "tier:medium"}]}
|
||||
self.assertFalse(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_security_label_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "area:security"}]}
|
||||
pr = {"labels": [{"name": "tier:medium"}, {"name": "area:security"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_schema_label_is_high_risk(self):
|
||||
pr = {"labels": [{"name": "area:schema"}]}
|
||||
@@ -615,7 +668,7 @@ class TestIsHighRisk(unittest.TestCase):
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
def test_area_gate_meta_label_is_high_risk(self):
|
||||
# Gate-meta = changes to sop-checklist/sop-checklist itself.
|
||||
# Gate-meta = changes to sop-checklist/sop-tier-check itself.
|
||||
pr = {"labels": [{"name": "area:gate-meta"}]}
|
||||
self.assertTrue(sop.is_high_risk(pr, self.cfg))
|
||||
|
||||
@@ -669,7 +722,7 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
|
||||
root-cause / no-backwards-compat for the default class.
|
||||
|
||||
The dead-managers/ceo-persona-token gridlock is the symptom; the
|
||||
root cause is that sop-checklist ignored high-risk class. These tests
|
||||
root cause is that sop-checklist ignored tier-class. These tests
|
||||
pin the new wider-default behavior so it can't regress silently.
|
||||
"""
|
||||
|
||||
@@ -740,7 +793,7 @@ class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):
|
||||
|
||||
def test_root_cause_high_risk_elevated_to_ceo_only(self):
|
||||
items = _items_by_slug()
|
||||
# area:schema alone makes the PR high-risk → root-cause needs ceo.
|
||||
# tier:high alone makes the PR high-risk → root-cause needs ceo.
|
||||
self.assertEqual(
|
||||
sop.resolve_required_teams(items["root-cause"], high_risk=True),
|
||||
["ceo"],
|
||||
|
||||
+272
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env bash
|
||||
# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
|
||||
#
|
||||
# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
|
||||
# sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
|
||||
# If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
|
||||
# any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
|
||||
# and credited that org member as a member of EVERY queried team. The
|
||||
# evaluator then treated those synthetic memberships as real, so a plain
|
||||
# NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
|
||||
# a real highest-tier authorization PASS — privilege escalation.
|
||||
#
|
||||
# Fix (fail-closed authorization):
|
||||
# - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
|
||||
# membership is never credited as team membership.
|
||||
# - A team probe that returns anything other than 200/204 (member) or 404
|
||||
# (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
|
||||
# (exit 1) with a cannot-verify status and never grants the tier.
|
||||
#
|
||||
# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
|
||||
# that serves canned Gitea API responses keyed by URL, then runs the REAL
|
||||
# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
|
||||
# path — no logic is re-implemented in the test.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
|
||||
|
||||
# sop-tier-check.sh uses `declare -A` (associative arrays), which require
|
||||
# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
|
||||
# bash >= 4 to run the script under.
|
||||
pick_bash() {
|
||||
local c
|
||||
for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
|
||||
local p; p="$(command -v "$c" 2>/dev/null || true)"
|
||||
[ -n "$p" ] || continue
|
||||
local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
|
||||
if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
|
||||
echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1" expected="$2" got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (missing substring: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_not_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " FAIL $label (unexpected substring present: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake-curl harness.
|
||||
#
|
||||
# The real script calls curl in two shapes:
|
||||
# (a) body capture: curl -sS -H AUTH URL -> prints JSON body
|
||||
# (b) http-code: curl -sS -o FILE -w '%{http_code}' -H AUTH URL
|
||||
# (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
|
||||
#
|
||||
# Our fake reads the URL (last non-flag arg), looks up a response in fixture
|
||||
# files under $FIXDIR, and emits body and/or http-code accordingly.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
make_harness() {
|
||||
# $1 = scenario dir to populate with fixtures
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
mkdir -p "$BIN"
|
||||
cat > "$BIN/curl" <<'FAKE'
|
||||
#!/usr/bin/env bash
|
||||
# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
|
||||
set -u
|
||||
FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
|
||||
|
||||
url=""
|
||||
out=""
|
||||
want_code="no"
|
||||
prev=""
|
||||
for a in "$@"; do
|
||||
case "$prev" in
|
||||
-o) out="$a" ;;
|
||||
esac
|
||||
case "$a" in
|
||||
http*://*) url="$a" ;;
|
||||
'%{http_code}') want_code="yes" ;;
|
||||
esac
|
||||
# -w '%{http_code}' arrives as the value of the -w flag
|
||||
if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
|
||||
prev="$a"
|
||||
done
|
||||
|
||||
# Map URL -> fixture key (a filename-safe slug).
|
||||
# We only need the path after /api/v1.
|
||||
path="${url#*/api/v1}"
|
||||
slug="$(printf '%s' "$path" | tr '/?=&' '____')"
|
||||
|
||||
body_file="$FIXDIR/body${slug}"
|
||||
code_file="$FIXDIR/code${slug}"
|
||||
|
||||
# Emit body to -o target (or capture for stdout) when a body fixture exists.
|
||||
body=""
|
||||
if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
|
||||
if [ -n "$out" ]; then
|
||||
printf '%s' "$body" > "$out"
|
||||
else
|
||||
printf '%s' "$body"
|
||||
fi
|
||||
|
||||
# Emit http code when requested.
|
||||
if [ "$want_code" = "yes" ]; then
|
||||
if [ -f "$code_file" ]; then
|
||||
printf '%s' "$(cat "$code_file")"
|
||||
else
|
||||
printf '200'
|
||||
fi
|
||||
fi
|
||||
exit 0
|
||||
FAKE
|
||||
chmod +x "$BIN/curl"
|
||||
echo "$BIN"
|
||||
}
|
||||
|
||||
# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
|
||||
# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
|
||||
seed_common() {
|
||||
local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
|
||||
mkdir -p "$FIXDIR"
|
||||
# /user -> whoami
|
||||
printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
|
||||
# PR head sha
|
||||
printf '%s' '{"head":{"sha":"headsha1"}}' \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
|
||||
# labels
|
||||
printf '%s' "[{\"name\":\"$tier\"}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
|
||||
# org teams list
|
||||
printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
|
||||
printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
|
||||
# reviews: one APPROVED on current head by $approver
|
||||
printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
|
||||
}
|
||||
|
||||
run_script() {
|
||||
# $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
set +e
|
||||
OUT=$(
|
||||
SOP_TEST_FIXDIR="$FIXDIR" \
|
||||
PATH="$BIN:$PATH" \
|
||||
GITEA_TOKEN="faketoken" \
|
||||
GITEA_HOST="git.moleculesai.app" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="42" \
|
||||
PR_AUTHOR="pr-author" \
|
||||
SOP_DEBUG="0" \
|
||||
SOP_LEGACY_CHECK="0" \
|
||||
"$BASH4" "$SCRIPT" 2>&1
|
||||
)
|
||||
RC=$?
|
||||
set -e
|
||||
printf '%s' "$OUT"
|
||||
return $RC
|
||||
}
|
||||
|
||||
TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
|
||||
|
||||
echo "=============================================================="
|
||||
echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
|
||||
echo " is a plain org member but NOT in ceo team."
|
||||
echo " EXPECT: tier NOT granted (fail-closed cannot-verify)."
|
||||
echo "=============================================================="
|
||||
S1="$(mktemp -d)"
|
||||
make_harness "$S1" >/dev/null
|
||||
seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
|
||||
# Team membership probe for ceo (id=10) returns 403 — cannot read.
|
||||
printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
|
||||
# The OLD bug path: org membership probe would 204 and synthetic-credit.
|
||||
printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
|
||||
set +e
|
||||
OUT1="$(run_script "$S1")"; RC1=$?
|
||||
set -e
|
||||
echo "$OUT1" | sed 's/^/ /'
|
||||
echo " (exit=$RC1)"
|
||||
assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
|
||||
assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
|
||||
assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
|
||||
rm -rf "$S1"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
|
||||
echo " EXPECT: tier GRANTED."
|
||||
echo "=============================================================="
|
||||
S2="$(mktemp -d)"
|
||||
make_harness "$S2" >/dev/null
|
||||
seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '204' > "$S2/code_teams_10_members_real-ceo" # ceo team: member
|
||||
set +e
|
||||
OUT2="$(run_script "$S2")"; RC2=$?
|
||||
set -e
|
||||
echo "$OUT2" | sed 's/^/ /'
|
||||
echo " (exit=$RC2)"
|
||||
assert_eq "S2 exit zero (granted)" "0" "$RC2"
|
||||
assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
|
||||
rm -rf "$S2"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
|
||||
echo " non-member of ceo (team probe 404). Org probe would"
|
||||
echo " 204 — must NEVER be synthetic-credited."
|
||||
echo " EXPECT: tier NOT granted (clause FAIL), no fallback."
|
||||
echo "=============================================================="
|
||||
S3="$(mktemp -d)"
|
||||
make_harness "$S3" >/dev/null
|
||||
seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol" # verified NOT in ceo
|
||||
printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
|
||||
set +e
|
||||
OUT3="$(run_script "$S3")"; RC3=$?
|
||||
set -e
|
||||
echo "$OUT3" | sed 's/^/ /'
|
||||
echo " (exit=$RC3)"
|
||||
assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
|
||||
assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
|
||||
assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
|
||||
rm -rf "$S3"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
+101
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for #229 — sop-tier-check tier:low OR-clause splitter.
|
||||
#
|
||||
# Bug (PR #225 → still broken after PR #231):
|
||||
# Line ~289 of sop-tier-check.sh used:
|
||||
# _clause=$(echo "$_raw_clause" | tr -d '()' | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
|
||||
# `tr -d '[:space:]'` strips the newlines that `tr ',' '\n'` just
|
||||
# inserted, collapsing "engineers,managers,ceo" into a single token
|
||||
# "engineersmanagersceo". The for-loop then iterates ONCE on a name
|
||||
# that matches no team, so every tier:low PR fails:
|
||||
# ::error::clause [engineers/managers/ceo]: FAIL — no approving
|
||||
# reviewer belongs to any of these teamsengineersmanagersceo
|
||||
# (note also: missing separators in the error string is bug #2 —
|
||||
# `_clause_names` used "${var:+, }$x" which OVERWRITES per iteration).
|
||||
#
|
||||
# Fix shape (this PR):
|
||||
# _no_parens=${_raw_clause//[()]/}
|
||||
# _clause=${_no_parens//,/ } # comma -> space, bash word-split iterates
|
||||
# _clause_names="${_clause_names}${_clause_names:+, }${_t}" # APPEND, not overwrite
|
||||
#
|
||||
# This test extracts the splitter logic and asserts it produces the right
|
||||
# token list for each of the three tier expressions live in the script.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ----- Splitter under test (mirrors the fixed sop-tier-check.sh block) -----
|
||||
split_clause() {
|
||||
local raw="$1"
|
||||
local no_parens=${raw//[()]/}
|
||||
local clause=${no_parens//,/ }
|
||||
local out=""
|
||||
for _t in $clause; do
|
||||
out="${out}${out:+|}$_t"
|
||||
done
|
||||
echo "$out"
|
||||
}
|
||||
|
||||
echo "test: tier:low OR-clause splits to 3 tokens"
|
||||
assert_eq "tier:low" "engineers|managers|ceo" "$(split_clause "engineers,managers,ceo")"
|
||||
|
||||
echo "test: tier:medium AND-expression — bash word-split on \$EXPR yields 5 tokens"
|
||||
EXPR="managers AND engineers AND qa???,security???"
|
||||
out=""
|
||||
for _raw in $EXPR; do
|
||||
out="${out}${out:+ ; }$(split_clause "$_raw")"
|
||||
done
|
||||
assert_eq "tier:medium" "managers ; AND ; engineers ; AND ; qa???|security???" "$out"
|
||||
|
||||
echo "test: tier:high single-team OR-clause"
|
||||
assert_eq "tier:high" "ceo" "$(split_clause "ceo")"
|
||||
|
||||
echo "test: paren-wrapped OR-set unwraps + splits"
|
||||
assert_eq "paren OR" "managers|ceo" "$(split_clause "(managers,ceo)")"
|
||||
|
||||
# ----- _clause_names accumulator (was overwriting per iteration) -----
|
||||
acc=""
|
||||
for t in engineers managers ceo; do
|
||||
acc="${acc}${acc:+, }${t}"
|
||||
done
|
||||
assert_eq "_clause_names append" "engineers, managers, ceo" "$acc"
|
||||
|
||||
# ----- _failed_clauses / _passed_clauses accumulator across raw clauses -----
|
||||
acc=""
|
||||
for c in clauseA clauseB clauseC; do
|
||||
acc="${acc}${acc:+, }${c}"
|
||||
done
|
||||
assert_eq "_failed_clauses append" "clauseA, clauseB, clauseC" "$acc"
|
||||
|
||||
# ----- End-to-end OR-gate: simulate APPROVER_TEAMS[core-lead]=' managers ' -----
|
||||
# The script's case pattern is *${_t}* with a space-padded value.
|
||||
APPROVER_TEAMS_VAL=" managers "
|
||||
matched=""
|
||||
for _t in $(split_clause "engineers,managers,ceo" | tr '|' ' '); do
|
||||
case "$APPROVER_TEAMS_VAL" in
|
||||
*${_t}*) matched="$_t"; break ;;
|
||||
esac
|
||||
done
|
||||
assert_eq "OR-gate matches managers" "managers" "$matched"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for internal#816 — sop-tier-check must ignore APPROVED
|
||||
# reviews that were submitted against an old PR head SHA.
|
||||
#
|
||||
# Bug: the script collected approvers with
|
||||
# jq '[.[] | select(.state=="APPROVED") | .user.login]'
|
||||
# without filtering on .commit_id == HEAD_SHA. After a PR head moved,
|
||||
# stale approvals looked valid to the tier gate.
|
||||
#
|
||||
# Fix: the jq filter now includes
|
||||
# select(.state=="APPROVED" and .commit_id == $head_sha)
|
||||
# where $head_sha is the current PR head fetched from the API.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# jq may not be on PATH in all environments (e.g. dev containers).
|
||||
PATH="/tmp/bin:$PATH"
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Sample reviews matching the shape from Gitea API
|
||||
REVIEWS_JSON='[
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"bob"}},
|
||||
{"state":"APPROVED","commit_id":"old456","user":{"login":"alice"}},
|
||||
{"state":"COMMENT","commit_id":"abc123","user":{"login":"carol"}},
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"dave"}},
|
||||
{"state":"REQUEST_CHANGES","commit_id":"abc123","user":{"login":"eve"}}
|
||||
]'
|
||||
|
||||
echo "test: jq filter keeps only APPROVED on current head"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "current-head approvers" "bob dave" "$(echo "$GOT" | tr '\n' ' ' | sed 's/ $//')"
|
||||
|
||||
echo "test: jq filter with all-stale reviews yields empty"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "new789" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "all-stale yields empty" "" "$GOT"
|
||||
|
||||
echo "test: jq filter handles null commit_id gracefully"
|
||||
NULL_JSON='[{"state":"APPROVED","commit_id":null,"user":{"login":"mallory"}}]'
|
||||
GOT=$(echo "$NULL_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "null commit_id excluded" "" "$GOT"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
Executable
+304
@@ -0,0 +1,304 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tests for sop-tier-refire.{yml,sh} — internal#292.
|
||||
#
|
||||
# Behavior matrix:
|
||||
#
|
||||
# T1: PR open + APPROVED via tier:low → script invokes sop-tier-check
|
||||
# and POSTs status=success.
|
||||
# T2: PR open + missing tier label → sop-tier-check exits non-zero;
|
||||
# refire still POSTs status=success, matching the canonical
|
||||
# pull_request_target workflow's fail-open job conclusion.
|
||||
# T3: PR open + tier:low but NO approving reviews → sop-tier-check
|
||||
# exits non-zero; refire still POSTs status=success for the same reason.
|
||||
# T4: PR CLOSED → refire exits 0 with no status POST (no-op on closed).
|
||||
# T5: Rate-limit — recent status update within 30s → refire skips,
|
||||
# no new POST.
|
||||
# T6 (yaml-lint): workflow `if:` expression contains author_association
|
||||
# gate + slash-command-trigger gate + PR-not-issue gate.
|
||||
# T7 (yaml-lint): workflow file is parseable YAML.
|
||||
#
|
||||
# Tests T1-T5 run the real script against a local-fixture HTTP server
|
||||
# (python http.server with a stub handler — `tests/_refire_fixture.py`)
|
||||
# so the script's Gitea API calls hit the fixture, not the real Gitea.
|
||||
#
|
||||
# Tests T6/T7 are pure YAML checks against the workflow file.
|
||||
#
|
||||
# Hostile-self-review (per feedback_assert_exact_not_substring):
|
||||
# this test MUST FAIL if the workflow or script is absent. Verified by
|
||||
# running the test before the files exist (covered in the PR body).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
WORKFLOW_DIR="$(cd "$THIS_DIR/../../workflows" && pwd)"
|
||||
WORKFLOW="$WORKFLOW_DIR/sop-tier-refire.yml"
|
||||
DISPATCH_WORKFLOW="$WORKFLOW_DIR/sop-checklist.yml"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-refire.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
FAILED_TESTS=""
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1"
|
||||
local needle="$2"
|
||||
local haystack="$3"
|
||||
if printf '%s' "$haystack" | grep -qF "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " needle: <$needle>"
|
||||
echo " haystack: <$(printf '%s' "$haystack" | head -c 400)>"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_file_exists() {
|
||||
local label="$1"
|
||||
local path="$2"
|
||||
if [ -f "$path" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (not found: $path)"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} ${label}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Existence (foundation — every other test depends on these)
|
||||
echo
|
||||
echo "== existence =="
|
||||
assert_file_exists "workflow file exists" "$WORKFLOW"
|
||||
assert_file_exists "SSOT dispatcher workflow file exists" "$DISPATCH_WORKFLOW"
|
||||
assert_file_exists "script file exists" "$SCRIPT"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL (existence)"
|
||||
echo "Cannot proceed without these files."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# T6 / T7 — workflow YAML structure
|
||||
echo
|
||||
echo "== T6/T7 workflow yaml =="
|
||||
|
||||
# YAML parseability
|
||||
PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$WORKFLOW" 2>&1 || true)
|
||||
assert_eq "T7 workflow parses as YAML" "ok" "$PARSE_OUT"
|
||||
|
||||
# The old per-workflow issue_comment listener caused queue storms because
|
||||
# Gitea queues jobs before evaluating job-level `if:`. The script remains,
|
||||
# but comment-triggered refires route through the single dispatcher.
|
||||
WORKFLOW_CONTENT=$(cat "$WORKFLOW")
|
||||
if printf '%s' "$WORKFLOW_CONTENT" | grep -q '^ issue_comment:'; then
|
||||
echo " FAIL T6a manual fallback workflow must not listen on issue_comment"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} T6a"
|
||||
else
|
||||
echo " PASS T6a manual fallback workflow does not listen on issue_comment"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
assert_contains "T6b workflow exposes workflow_dispatch" \
|
||||
"workflow_dispatch" "$WORKFLOW_CONTENT"
|
||||
assert_contains "T6c workflow documents unsupported manual inputs" \
|
||||
"workflow_dispatch inputs" "$WORKFLOW_CONTENT"
|
||||
# Does NOT check out PR HEAD (security)
|
||||
if grep -q 'ref: \${{ github.event.pull_request.head' "$WORKFLOW"; then
|
||||
echo " FAIL T6d workflow MUST NOT check out PR head (security)"
|
||||
FAIL=$((FAIL + 1))
|
||||
FAILED_TESTS="${FAILED_TESTS} T6d"
|
||||
else
|
||||
echo " PASS T6d workflow does not check out PR head"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
|
||||
DISPATCH_PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$DISPATCH_WORKFLOW" 2>&1 || true)
|
||||
assert_eq "T6e SSOT dispatcher workflow parses as YAML" "ok" "$DISPATCH_PARSE_OUT"
|
||||
DISPATCH_CONTENT=$(cat "$DISPATCH_WORKFLOW")
|
||||
assert_contains "T6f SSOT dispatcher listens on issue_comment" \
|
||||
"issue_comment" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6g SSOT dispatcher handles /qa-recheck" \
|
||||
"/qa-recheck" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6h SSOT dispatcher handles /security-recheck" \
|
||||
"/security-recheck" "$DISPATCH_CONTENT"
|
||||
assert_contains "T6i SSOT dispatcher handles /refire-tier-check" \
|
||||
"/refire-tier-check" "$DISPATCH_CONTENT"
|
||||
|
||||
# T1-T5 — script behavior against a local Gitea-fixture
|
||||
echo
|
||||
echo "== T1-T5 script behavior (vs local fixture) =="
|
||||
|
||||
# Spin up the fixture HTTP server.
|
||||
FIXTURE_DIR=$(mktemp -d)
|
||||
trap 'rm -rf "$FIXTURE_DIR"; [ -n "${FIX_PID:-}" ] && kill "$FIX_PID" 2>/dev/null || true' EXIT
|
||||
FIXTURE_PY="$THIS_DIR/_refire_fixture.py"
|
||||
if [ ! -f "$FIXTURE_PY" ]; then
|
||||
echo "::error::fixture server $FIXTURE_PY missing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
FIX_LOG="$FIXTURE_DIR/fixture.log"
|
||||
FIX_STATE_DIR="$FIXTURE_DIR/state"
|
||||
mkdir -p "$FIX_STATE_DIR"
|
||||
|
||||
# Find an unused port.
|
||||
FIX_PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("127.0.0.1",0));print(s.getsockname()[1]);s.close()')
|
||||
|
||||
FIXTURE_STATE_DIR="$FIX_STATE_DIR" python3 "$FIXTURE_PY" "$FIX_PORT" \
|
||||
>"$FIX_LOG" 2>&1 &
|
||||
FIX_PID=$!
|
||||
|
||||
# Wait for fixture readiness.
|
||||
for _ in $(seq 1 50); do
|
||||
if curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
if ! curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
|
||||
echo "::error::fixture server failed to start. Log:"
|
||||
cat "$FIX_LOG"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Helper: set fixture state for a scenario, then run the script.
|
||||
# tier_result is one of: pass | fail_no_label | fail_no_approvals.
|
||||
# The refire script's tier-check invocation is mocked because the real
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays — incompatible with
|
||||
# the macOS bash 3.2 dev shell. Linux Gitea runners use bash 4/5 so
|
||||
# production runs the real script. The mock exercises the success +
|
||||
# failure branches of refire's status-POST glue.
|
||||
run_scenario() {
|
||||
local scenario="$1"
|
||||
local tier_result="${2:-pass}"
|
||||
echo "$scenario" >"$FIX_STATE_DIR/scenario"
|
||||
: >"$FIX_STATE_DIR/posted_statuses.jsonl" # clear status log
|
||||
|
||||
local out
|
||||
set +e
|
||||
out=$(
|
||||
PATH="$FIXTURE_DIR/bin:$PATH" \
|
||||
GITEA_TOKEN="fixture-token" \
|
||||
GITEA_HOST="fixture.local" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
COMMENT_AUTHOR="test-runner" \
|
||||
SOP_REFIRE_DISABLE_RATE_LIMIT="1" \
|
||||
SOP_REFIRE_TIER_CHECK_SCRIPT="$THIS_DIR/_mock_tier_check.sh" \
|
||||
MOCK_TIER_RESULT="$tier_result" \
|
||||
FIXTURE_PORT="$FIX_PORT" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
)
|
||||
local rc=$?
|
||||
set -e
|
||||
echo "$out" >"$FIX_STATE_DIR/last_run.log"
|
||||
echo "$rc" >"$FIX_STATE_DIR/last_rc"
|
||||
}
|
||||
|
||||
# Install a curl shim that rewrites https://fixture.local → http://127.0.0.1:$PORT
|
||||
# Use bash prefix-strip (${var#prefix}) — it sidesteps the `/` delimiter
|
||||
# confusion of ${var/pattern/replacement}.
|
||||
mkdir -p "$FIXTURE_DIR/bin"
|
||||
cat >"$FIXTURE_DIR/bin/curl" <<SHIM
|
||||
#!/usr/bin/env bash
|
||||
# Test shim: rewrite https://fixture.local/* -> http://127.0.0.1:${FIX_PORT}/*
|
||||
# The fixture doesn't authenticate; -H Authorization passes through harmlessly.
|
||||
new_args=()
|
||||
for a in "\$@"; do
|
||||
if [[ "\$a" == https://fixture.local/* ]]; then
|
||||
rest="\${a#https://fixture.local}"
|
||||
a="http://127.0.0.1:${FIX_PORT}\${rest}"
|
||||
fi
|
||||
new_args+=("\$a")
|
||||
done
|
||||
exec /usr/bin/curl "\${new_args[@]}"
|
||||
SHIM
|
||||
chmod +x "$FIXTURE_DIR/bin/curl"
|
||||
|
||||
# T1: tier:low + 1 APPROVED + author is in engineers team → success
|
||||
run_scenario "T1_success" "pass"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T1 exit code 0 (success)" "0" "$RC"
|
||||
assert_contains "T1 POSTed state=success" '"state": "success"' "$POSTED"
|
||||
assert_contains "T1 POST context is sop-tier-check / tier-check" \
|
||||
'"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
|
||||
assert_contains "T1 description names commenter" "test-runner" "$POSTED"
|
||||
|
||||
# T2: missing tier label → tier-check fails internally (mock exits 1).
|
||||
# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
|
||||
# REAL exit code and POSTs state=failure — it does NOT forge a green on
|
||||
# the required context. The refire job itself still exits 0 (it succeeded
|
||||
# at posting an honest failure status).
|
||||
run_scenario "T2_no_tier_label" "fail_no_label"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T3: tier:low present but ZERO approving reviews → internal tier check
|
||||
# fails (mock exits 1). Refire POSTs state=failure, never a false green.
|
||||
run_scenario "T3_no_approvals" "fail_no_approvals"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T4: closed PR — refire is a no-op (no POST, exit 0)
|
||||
run_scenario "T4_closed" "pass"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T4 closed PR exits 0" "0" "$RC"
|
||||
assert_eq "T4 closed PR posts no status" "" "$POSTED"
|
||||
|
||||
# T5: rate-limit — disable the env override and let scenario set a
|
||||
# recent statuses entry. Re-enable rate-limit for this scenario by NOT
|
||||
# passing SOP_REFIRE_DISABLE_RATE_LIMIT.
|
||||
echo "T5_rate_limited" >"$FIX_STATE_DIR/scenario"
|
||||
: >"$FIX_STATE_DIR/posted_statuses.jsonl"
|
||||
set +e
|
||||
T5_OUT=$(
|
||||
PATH="$FIXTURE_DIR/bin:$PATH" \
|
||||
GITEA_TOKEN="fixture-token" \
|
||||
GITEA_HOST="fixture.local" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
COMMENT_AUTHOR="test-runner" \
|
||||
FIXTURE_PORT="$FIX_PORT" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
)
|
||||
T5_RC=$?
|
||||
set -e
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T5 rate-limited exits 0" "0" "$T5_RC"
|
||||
assert_contains "T5 rate-limited log says skipped" "rate-limited" "$T5_OUT"
|
||||
assert_eq "T5 rate-limited posts no status" "" "$POSTED"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
if [ "$FAIL" -gt 0 ]; then
|
||||
echo "Failed:$FAILED_TESTS"
|
||||
fi
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -1,474 +0,0 @@
|
||||
import importlib.util
|
||||
import json
|
||||
import pathlib
|
||||
import urllib.error
|
||||
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
SCRIPT = ROOT / "umbrella-reaper.py"
|
||||
|
||||
|
||||
def load_reaper():
|
||||
spec = importlib.util.spec_from_file_location("umbrella_reaper", SCRIPT)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(mod)
|
||||
mod.API = "https://git.example.test/api/v1"
|
||||
mod.GITEA_TOKEN = "fixture-token"
|
||||
mod.GITEA_HOST = "git.example.test"
|
||||
mod.REPO = "owner/repo"
|
||||
return mod
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
status = 200
|
||||
|
||||
def __init__(self, payload):
|
||||
self.payload = payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
return json.dumps(self.payload).encode("utf-8")
|
||||
|
||||
|
||||
def _pr_fixture(number: int, sha: str) -> dict:
|
||||
return {"number": number, "head": {"sha": sha}}
|
||||
|
||||
|
||||
def _status_entry(context: str, state: str) -> dict:
|
||||
return {"context": context, "status": state}
|
||||
|
||||
|
||||
def test_process_pr_compensates_when_all_sub_jobs_success(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
|
||||
def fake_post_status(sha, context, description):
|
||||
posted.append((sha, context, description))
|
||||
|
||||
monkeypatch.setattr(mod, "post_status", fake_post_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(1, "abc123")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert len(posted) == 1
|
||||
assert posted[0][0] == "abc123"
|
||||
assert posted[0][1] == "CI / all-required (pull_request)"
|
||||
assert "Compensating status" in posted[0][2]
|
||||
|
||||
|
||||
def test_process_pr_skips_when_umbrella_missing(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(mod, "REQUIRED_SUB_JOBS", ["CI / Platform (Go) (pull_request)"])
|
||||
|
||||
pr = _pr_fixture(2, "def456")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert posted == []
|
||||
|
||||
|
||||
def test_process_pr_skips_when_sub_job_pending(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(3, "ghi789")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "pending"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert posted == []
|
||||
|
||||
|
||||
def test_process_pr_skips_when_sub_job_failure(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(4, "jkl012")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "failure"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert posted == []
|
||||
|
||||
|
||||
def test_process_pr_returns_false_on_post_failure(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_post_status(sha, context, description):
|
||||
raise mod.ApiError("POST /statuses/abc123 -> HTTP 500: simulated failure")
|
||||
|
||||
monkeypatch.setattr(mod, "post_status", fake_post_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(5, "abc123")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_main_exits_nonzero_when_any_post_fails(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
|
||||
monkeypatch.setenv("GITEA_HOST", "git.example.test")
|
||||
monkeypatch.setenv("REPO", "owner/repo")
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"list_open_prs",
|
||||
lambda limit: [
|
||||
_pr_fixture(1, "abc123"),
|
||||
_pr_fixture(2, "def456"),
|
||||
],
|
||||
)
|
||||
|
||||
calls = {"n": 0}
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
def fake_post_status(sha, context, description):
|
||||
calls["n"] += 1
|
||||
if calls["n"] == 2:
|
||||
raise mod.ApiError("simulated failure")
|
||||
|
||||
monkeypatch.setattr(mod, "post_status", fake_post_status)
|
||||
|
||||
exit_code = mod.main()
|
||||
assert exit_code == 1
|
||||
|
||||
|
||||
def test_main_exits_zero_when_all_posts_succeed(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
|
||||
monkeypatch.setenv("GITEA_HOST", "git.example.test")
|
||||
monkeypatch.setenv("REPO", "owner/repo")
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"list_open_prs",
|
||||
lambda limit: [_pr_fixture(1, "abc123")],
|
||||
)
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
|
||||
|
||||
exit_code = mod.main()
|
||||
assert exit_code == 0
|
||||
|
||||
|
||||
def test_dry_run_does_not_post(monkeypatch):
|
||||
mod = load_reaper()
|
||||
api_calls = []
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
api_calls.append((method, path, body))
|
||||
return 200, {"ok": True}
|
||||
|
||||
monkeypatch.setattr(mod, "api", fake_api)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(6, "mno345")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
monkeypatch.setattr(mod, "DRY_RUN", True)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
# DRY_RUN should prevent the POST /statuses call
|
||||
assert not any(
|
||||
method == "POST" and "/statuses/" in path for method, path, _ in api_calls
|
||||
)
|
||||
|
||||
|
||||
def test_duplicate_contexts_use_latest_state(monkeypatch):
|
||||
mod = load_reaper()
|
||||
posted = []
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(7, "pqr678")
|
||||
|
||||
def fake_combined_status(sha):
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
# duplicate: first pending, then success — the loop overwrites
|
||||
_status_entry("CI / Detect changes (pull_request)", "pending"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is True
|
||||
assert len(posted) == 1
|
||||
|
||||
|
||||
def test_load_required_sub_jobs_from_ci_yml_pull_request_event():
|
||||
mod = load_reaper()
|
||||
# UMBRELLA_CONTEXT defaults to pull_request, so derivation should yield
|
||||
# the pull_request suffix.
|
||||
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
|
||||
assert all(j.endswith(" (pull_request)") for j in jobs)
|
||||
assert "CI / Detect changes (pull_request)" in jobs
|
||||
assert "CI / Python Lint & Test (pull_request)" in jobs
|
||||
|
||||
|
||||
def test_load_required_sub_jobs_from_ci_yml_push_event(monkeypatch):
|
||||
mod = load_reaper()
|
||||
monkeypatch.setattr(mod, "UMBRELLA_CONTEXT", "CI / all-required (push)")
|
||||
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
|
||||
assert all(j.endswith(" (push)") for j in jobs)
|
||||
assert "CI / Detect changes (push)" in jobs
|
||||
|
||||
|
||||
def test_list_open_prs_paginates(monkeypatch):
|
||||
mod = load_reaper()
|
||||
calls = []
|
||||
|
||||
def fake_api(method, path, *, body=None, query=None, expect_json=True):
|
||||
calls.append(query)
|
||||
page = int(query.get("page", 1))
|
||||
limit = int(query.get("limit", 50))
|
||||
if page == 1:
|
||||
return 200, [{"number": 1}, {"number": 2}]
|
||||
if page == 2:
|
||||
return 200, [{"number": 3}]
|
||||
return 200, []
|
||||
|
||||
monkeypatch.setattr(mod, "api", fake_api)
|
||||
prs = mod.list_open_prs(limit=2)
|
||||
assert len(prs) == 3
|
||||
assert prs[0]["number"] == 1
|
||||
assert prs[2]["number"] == 3
|
||||
assert calls[0]["page"] == "1"
|
||||
assert calls[1]["page"] == "2"
|
||||
|
||||
|
||||
def test_process_pr_returns_false_on_status_fetch_failure(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_get_combined_status(sha):
|
||||
raise mod.ApiError("GET /statuses/abc123 -> HTTP 500: simulated outage")
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
["CI / Detect changes (pull_request)"],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(8, "abc123")
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_process_pr_returns_false_on_missing_statuses_array(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
def fake_get_combined_status(sha):
|
||||
return {"state": "success"} # missing 'statuses' array
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
["CI / Detect changes (pull_request)"],
|
||||
)
|
||||
|
||||
pr = _pr_fixture(9, "def456")
|
||||
ok = mod.process_pr(pr)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_main_exits_nonzero_when_any_status_read_fails(monkeypatch):
|
||||
mod = load_reaper()
|
||||
|
||||
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
|
||||
monkeypatch.setenv("GITEA_HOST", "git.example.test")
|
||||
monkeypatch.setenv("REPO", "owner/repo")
|
||||
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"REQUIRED_SUB_JOBS",
|
||||
[
|
||||
"CI / Detect changes (pull_request)",
|
||||
"CI / Platform (Go) (pull_request)",
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
mod,
|
||||
"list_open_prs",
|
||||
lambda limit: [
|
||||
_pr_fixture(1, "abc123"),
|
||||
_pr_fixture(2, "def456"),
|
||||
],
|
||||
)
|
||||
|
||||
def fake_combined_status(sha):
|
||||
if sha == "abc123":
|
||||
return {
|
||||
"statuses": [
|
||||
_status_entry("CI / all-required (pull_request)", "failure"),
|
||||
_status_entry("CI / Detect changes (pull_request)", "success"),
|
||||
_status_entry("CI / Platform (Go) (pull_request)", "success"),
|
||||
]
|
||||
}
|
||||
raise mod.ApiError("simulated status fetch failure")
|
||||
|
||||
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
|
||||
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
|
||||
|
||||
exit_code = mod.main()
|
||||
assert exit_code == 1
|
||||
@@ -1,360 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""umbrella-reaper — auto-recovery for stale CI umbrella statuses on PRs.
|
||||
|
||||
Tracking: molecule-core#1780.
|
||||
|
||||
Sibling to status-reaper.py (default-branch push-suffix compensation),
|
||||
but scoped to pull_request umbrellas instead of main-branch contexts.
|
||||
|
||||
What this script does, per `.gitea/workflows/umbrella-reaper.yml` invocation:
|
||||
|
||||
1. List open PRs via GET /repos/{o}/{r}/pulls?state=open&limit={N}.
|
||||
2. For EACH PR:
|
||||
- GET combined commit status for PR head SHA.
|
||||
- Look for the umbrella context (default: "CI / all-required (pull_request)").
|
||||
- If umbrella state is "failure":
|
||||
- Verify ALL required sub-job contexts are "success".
|
||||
- If yes → POST compensating success to /statuses/{sha} with the
|
||||
same umbrella context and an honest description.
|
||||
- If any required sub-job is NOT success → skip (umbrella correctly
|
||||
reflects reality; do NOT lie).
|
||||
- If umbrella state is "success" or "pending" → skip.
|
||||
3. Exit 0. Re-running is idempotent — Gitea de-dups by context.
|
||||
|
||||
What it does NOT do:
|
||||
- Touch non-umbrella contexts.
|
||||
- Compensate when ANY required sub-job is missing, pending, failure, or
|
||||
cancelled. Only the "all sub-jobs green, umbrella stale" race.
|
||||
- Merge PRs. It only posts a status; branch protection still requires
|
||||
human approval.
|
||||
- Run on closed PRs.
|
||||
|
||||
Halt conditions:
|
||||
- Missing required env vars → exit 1 with ::error:: message.
|
||||
- API 5xx on PR list → fail-loud (can't assess state).
|
||||
- API 5xx on an individual PR's status → ::warning:: + continue to next PR.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _load_required_sub_jobs_from_ci_yml(workflows_dir: str) -> list[str]:
|
||||
"""Parse ci.yml and extract the all-required sentinel's sub-job contexts.
|
||||
|
||||
Supports two shapes of the all-required job run block:
|
||||
1. Legacy Python f-string list (pre-2026-06-01):
|
||||
f"CI / Detect changes ({event})"
|
||||
2. Current shell-script shape (post-2026-06-01 scheduler fix):
|
||||
check "Detect changes" "$CHANGES_RESULT"
|
||||
|
||||
Raises RuntimeError if ci.yml is missing, has no all-required job, or the
|
||||
run block cannot be parsed.
|
||||
"""
|
||||
ci_path = Path(workflows_dir) / "ci.yml"
|
||||
if not ci_path.exists():
|
||||
raise RuntimeError(f"ci.yml not found at {ci_path}")
|
||||
|
||||
# PyYAML is installed by the workflow (same as status-reaper.py).
|
||||
import yaml
|
||||
|
||||
with ci_path.open() as f:
|
||||
doc = yaml.safe_load(f)
|
||||
|
||||
jobs = doc.get("jobs", {})
|
||||
all_required = jobs.get("all-required")
|
||||
if not isinstance(all_required, dict):
|
||||
raise RuntimeError("ci.yml missing 'all-required' job")
|
||||
|
||||
steps = all_required.get("steps", [])
|
||||
run_block = ""
|
||||
for step in steps:
|
||||
if isinstance(step, dict):
|
||||
run_text = step.get("run", "")
|
||||
if run_text:
|
||||
run_block = run_text
|
||||
break
|
||||
|
||||
if not run_block:
|
||||
raise RuntimeError("all-required job missing run block")
|
||||
|
||||
# Determine event suffix from the umbrella context we are watching.
|
||||
if UMBRELLA_CONTEXT.endswith(" (pull_request)"):
|
||||
suffix = "(pull_request)"
|
||||
elif UMBRELLA_CONTEXT.endswith(" (push)"):
|
||||
suffix = "(push)"
|
||||
else:
|
||||
m = re.search(r' \(([^)]+)\)$', UMBRELLA_CONTEXT)
|
||||
suffix = m.group(1) if m else "pull_request"
|
||||
|
||||
# Try legacy f-string format first.
|
||||
if "({event})" in run_block:
|
||||
matches = re.findall(r'f["\'](.*?\(\{event\}\))["\']', run_block)
|
||||
if matches:
|
||||
return [m.replace("({event})", suffix) for m in matches]
|
||||
|
||||
# Try current shell-script format: check "Name" "$RESULT"
|
||||
matches = re.findall(r'check\s+"([^"]+)"', run_block)
|
||||
if matches:
|
||||
return [f"CI / {name} {suffix}" for name in matches]
|
||||
|
||||
raise RuntimeError("unable to derive required sub-jobs from all-required run block")
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Environment
|
||||
# --------------------------------------------------------------------------
|
||||
def _env(key: str, *, default: str = "") -> str:
|
||||
return os.environ.get(key, default)
|
||||
|
||||
|
||||
GITEA_TOKEN = _env("GITEA_TOKEN")
|
||||
GITEA_HOST = _env("GITEA_HOST")
|
||||
REPO = _env("REPO")
|
||||
DRY_RUN = _env("DRY_RUN", default="").lower() in ("1", "true", "yes")
|
||||
|
||||
# The umbrella context to watch. Must match the branch-protection name
|
||||
# exactly (Gitea de-dups by context string).
|
||||
UMBRELLA_CONTEXT = _env("UMBRELLA_CONTEXT", default="CI / all-required (pull_request)")
|
||||
|
||||
# Required sub-job contexts. The umbrella is only compensated when ALL of
|
||||
# these are "success" on the same SHA. Order does not matter.
|
||||
#
|
||||
# Derive from ci.yml at runtime to prevent drift (CR2 blocker #1).
|
||||
# The env var REQUIRED_SUB_JOBS overrides derivation for emergency
|
||||
# tuning or local testing.
|
||||
_REQUIRED_SUB_JOBS_OVERRIDE = _env("REQUIRED_SUB_JOBS")
|
||||
if _REQUIRED_SUB_JOBS_OVERRIDE:
|
||||
REQUIRED_SUB_JOBS = [
|
||||
ctx.strip()
|
||||
for ctx in _REQUIRED_SUB_JOBS_OVERRIDE.split(";")
|
||||
if ctx.strip()
|
||||
]
|
||||
else:
|
||||
try:
|
||||
REQUIRED_SUB_JOBS = _load_required_sub_jobs_from_ci_yml(".gitea/workflows")
|
||||
except Exception as exc:
|
||||
sys.stderr.write(
|
||||
f"::error::Failed to derive REQUIRED_SUB_JOBS from ci.yml: {exc}\n"
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
PR_LIMIT = int(_env("PR_LIMIT", default="50"))
|
||||
|
||||
|
||||
def _require_runtime_env() -> None:
|
||||
for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO"):
|
||||
if not os.environ.get(key):
|
||||
sys.stderr.write(f"::error::missing required env var: {key}\n")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Tiny HTTP helper
|
||||
# --------------------------------------------------------------------------
|
||||
class ApiError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
def api(
|
||||
method: str,
|
||||
path: str,
|
||||
*,
|
||||
body: dict | None = None,
|
||||
query: dict[str, str] | None = None,
|
||||
expect_json: bool = True,
|
||||
) -> tuple[int, Any]:
|
||||
url = f"{API}{path}"
|
||||
if query:
|
||||
url = f"{url}?{urllib.parse.urlencode(query)}"
|
||||
data = None
|
||||
headers = {
|
||||
"Authorization": f"token {GITEA_TOKEN}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
if body is not None:
|
||||
data = json.dumps(body).encode("utf-8")
|
||||
headers["Content-Type"] = "application/json"
|
||||
req = urllib.request.Request(url, method=method, data=data, headers=headers)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
raw = resp.read()
|
||||
status = resp.status
|
||||
except urllib.error.HTTPError as e:
|
||||
raw = e.read()
|
||||
status = e.code
|
||||
|
||||
if not (200 <= status < 300):
|
||||
snippet = raw[:500].decode("utf-8", errors="replace") if raw else ""
|
||||
raise ApiError(f"{method} {path} -> HTTP {status}: {snippet}")
|
||||
|
||||
if not raw:
|
||||
return status, None
|
||||
try:
|
||||
return status, json.loads(raw)
|
||||
except json.JSONDecodeError as e:
|
||||
if expect_json:
|
||||
raise ApiError(
|
||||
f"{method} {path} -> HTTP {status} but body is not JSON: {e}"
|
||||
) from e
|
||||
return status, {"_raw": raw.decode("utf-8", errors="replace")}
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Gitea reads / writes
|
||||
# --------------------------------------------------------------------------
|
||||
def list_open_prs(limit: int = 50) -> list[dict]:
|
||||
"""Paginate through all open PR pages. Fail closed on non-list responses."""
|
||||
all_prs: list[dict] = []
|
||||
page = 1
|
||||
while True:
|
||||
_, body = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/pulls",
|
||||
query={"state": "open", "limit": str(limit), "page": str(page)},
|
||||
)
|
||||
if not isinstance(body, list):
|
||||
raise ApiError(f"PR list page {page} response is not a JSON array")
|
||||
if not body:
|
||||
break
|
||||
all_prs.extend(body)
|
||||
if len(body) < limit:
|
||||
break
|
||||
page += 1
|
||||
return all_prs
|
||||
|
||||
|
||||
def get_combined_status(sha: str) -> dict:
|
||||
_, body = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
|
||||
if not isinstance(body, dict):
|
||||
raise ApiError(f"status for {sha} response is not a JSON object")
|
||||
return body
|
||||
|
||||
|
||||
def post_status(sha: str, context: str, description: str) -> None:
|
||||
payload = {
|
||||
"context": context,
|
||||
"state": "success",
|
||||
"description": description,
|
||||
}
|
||||
if DRY_RUN:
|
||||
print(f"[DRY-RUN] Would POST /statuses/{sha}: {json.dumps(payload)}")
|
||||
return
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/statuses/{sha}", body=payload)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# Core logic
|
||||
# --------------------------------------------------------------------------
|
||||
def _entry_state(s: dict) -> str:
|
||||
return s.get("status") or s.get("state") or ""
|
||||
|
||||
|
||||
def process_pr(pr: dict) -> bool:
|
||||
"""Process a single PR. Returns True if the tick succeeded for this PR
|
||||
(including no-op skips), False if a compensating POST failed.
|
||||
"""
|
||||
num = pr.get("number")
|
||||
sha = pr.get("head", {}).get("sha")
|
||||
if not sha:
|
||||
print(f"::warning::PR #{num}: missing head.sha; skipping")
|
||||
return True
|
||||
|
||||
try:
|
||||
status = get_combined_status(sha)
|
||||
except ApiError as e:
|
||||
print(f"::error::PR #{num}: status fetch failed: {e}")
|
||||
return False
|
||||
|
||||
statuses = status.get("statuses")
|
||||
if not isinstance(statuses, list):
|
||||
print(f"::error::PR #{num}: combined status missing 'statuses' array")
|
||||
return False
|
||||
umbrella_entry = None
|
||||
subjob_states: dict[str, str] = {}
|
||||
|
||||
for s in statuses:
|
||||
if not isinstance(s, dict):
|
||||
continue
|
||||
ctx = s.get("context", "")
|
||||
state = _entry_state(s)
|
||||
if ctx == UMBRELLA_CONTEXT:
|
||||
umbrella_entry = s
|
||||
if ctx in REQUIRED_SUB_JOBS:
|
||||
subjob_states[ctx] = state
|
||||
|
||||
if umbrella_entry is None:
|
||||
print(f"::notice::PR #{num}: no umbrella context '{UMBRELLA_CONTEXT}'; skipping")
|
||||
return True
|
||||
|
||||
umbrella_state = _entry_state(umbrella_entry)
|
||||
if umbrella_state != "failure":
|
||||
print(f"::notice::PR #{num}: umbrella is '{umbrella_state}'; skipping")
|
||||
return True
|
||||
|
||||
# Verify ALL required sub-jobs are present and success
|
||||
missing = [ctx for ctx in REQUIRED_SUB_JOBS if ctx not in subjob_states]
|
||||
if missing:
|
||||
print(
|
||||
f"::notice::PR #{num}: umbrella=failure, but missing sub-jobs: {missing}; "
|
||||
"skipping (sub-jobs may still be running)"
|
||||
)
|
||||
return True
|
||||
|
||||
not_success = [ctx for ctx in REQUIRED_SUB_JOBS if subjob_states[ctx] != "success"]
|
||||
if not_success:
|
||||
print(
|
||||
f"::notice::PR #{num}: umbrella=failure, but sub-jobs not all success: "
|
||||
f"{[(ctx, subjob_states[ctx]) for ctx in not_success]}; skipping"
|
||||
)
|
||||
return True
|
||||
|
||||
# All checks pass — post compensating status
|
||||
desc = (
|
||||
"Compensating status: all required sub-jobs verified success; "
|
||||
"umbrella stale due to commit-status propagation race. "
|
||||
f"Auto-posted by umbrella-reaper for PR #{num}."
|
||||
)
|
||||
try:
|
||||
post_status(sha, UMBRELLA_CONTEXT, desc)
|
||||
print(f"::notice::PR #{num}: posted compensating success for {UMBRELLA_CONTEXT}")
|
||||
return True
|
||||
except ApiError as e:
|
||||
print(f"::error::PR #{num}: failed to post compensating status: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main() -> int:
|
||||
_require_runtime_env()
|
||||
|
||||
# Drift guard: ci.yml derivation already happened at module load, but
|
||||
# we sanity-check it is non-empty so the loop below doesn't trivially
|
||||
# no-op because of a parse bug.
|
||||
if not REQUIRED_SUB_JOBS:
|
||||
sys.stderr.write("::error::REQUIRED_SUB_JOBS is empty; bailing out\n")
|
||||
return 1
|
||||
|
||||
prs = list_open_prs(limit=PR_LIMIT)
|
||||
print(f"::notice::Scanning {len(prs)} open PRs for stale umbrella statuses")
|
||||
compensated = 0
|
||||
failed = 0
|
||||
for pr in prs:
|
||||
ok = process_pr(pr)
|
||||
if not ok:
|
||||
failed += 1
|
||||
print(f"::notice::umbrella-reaper complete (failed POSTs={failed})")
|
||||
return 1 if failed else 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -55,22 +55,38 @@
|
||||
|
||||
version: 1
|
||||
|
||||
# Uniform hard-fail mode (CTO 2026-06-07):
|
||||
# Every PR uses the same gate — no tier branching.
|
||||
# Missing acks → status `failure`, blocks merge via branch protection.
|
||||
# Tier-aware failure mode (RFC#351 open question 2):
|
||||
# For tier:high — hard-fail (status `failure`, blocks merge via BP).
|
||||
# For tier:medium — hard-fail (same as high; medium is non-trivial).
|
||||
# For tier:low — soft-fail (status `pending` with `acked: N/M` in the
|
||||
# description). BP can choose to require the context
|
||||
# or not for low-tier PRs.
|
||||
# If no tier label is present, default to medium (hard-fail) — every PR
|
||||
# should have a tier label per sop-tier-check, and absence indicates
|
||||
# a missing-tier defect we should surface, not silently lower the bar.
|
||||
tier_failure_mode:
|
||||
"tier:high": hard
|
||||
"tier:medium": hard
|
||||
"tier:low": soft
|
||||
default_mode: hard # used when no tier:* label is present
|
||||
|
||||
# High-risk class (RFC#450 Option C, governance-fix for internal#442).
|
||||
#
|
||||
# A PR is "high-risk" when ANY of the listed labels are applied.
|
||||
# A PR is "high-risk" when ANY of the listed labels are applied OR when
|
||||
# the PR has `tier:high` (mechanically the strictest existing tier).
|
||||
# High-risk items use `required_teams_high_risk` (when present on the
|
||||
# item); non-high-risk items use the default `required_teams`.
|
||||
#
|
||||
# Risk-classed two-eyes shape:
|
||||
# - Default class (not high-risk): a non-author engineers/managers/ceo
|
||||
# ack satisfies the item — 25+ live identities, no dependency on a
|
||||
# dead/inactive senior persona token.
|
||||
# - High-risk class (any high_risk_label): still requires a non-author
|
||||
# ceo ack (durable human team).
|
||||
# This closes the inconsistency that the SOP charter already mandates
|
||||
# `tier:high → ceo only` for the sibling `sop-tier-check` gate; the
|
||||
# sop-checklist's `root-cause` and `no-backwards-compat` items now
|
||||
# follow the same risk-classed two-eyes shape:
|
||||
# - Default class (tier:low/medium, not high-risk): a non-author
|
||||
# engineers/managers/ceo ack satisfies the item — 25+ live
|
||||
# identities, no dependency on a dead/inactive senior persona
|
||||
# token.
|
||||
# - High-risk class (tier:high OR any high_risk_label): still
|
||||
# requires a non-author ceo ack (durable human team).
|
||||
#
|
||||
# Tightening: add labels to high_risk_labels.
|
||||
# Loosening: remove labels.
|
||||
@@ -149,11 +165,7 @@ items:
|
||||
|
||||
- slug: memory-consulted
|
||||
numeric_alias: 7
|
||||
# #1973: normalize marker so it matches the slug. Previously the
|
||||
# slash produced a checklist status that never resolved because
|
||||
# normalize_slug() collapses / to - and the Gitea PR body parser
|
||||
# would not find the expected heading.
|
||||
pr_section_marker: "Memory consulted"
|
||||
pr_section_marker: "Memory/saved-feedback consulted"
|
||||
required_teams: [engineers]
|
||||
ai_ack_eligible: true
|
||||
description: >-
|
||||
|
||||
@@ -13,14 +13,14 @@
|
||||
# the structured JSON shape is forward-compatible.
|
||||
#
|
||||
# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
|
||||
# extract pattern as sop-checklist.
|
||||
# extract pattern as sop-tier-check.
|
||||
|
||||
name: audit-force-merge
|
||||
|
||||
# pull_request_target loads from the base branch — same security model
|
||||
# as sop-checklist. Without this, an attacker could rewrite the
|
||||
# as sop-tier-check. Without this, an attacker could rewrite the
|
||||
# workflow on a PR and skip the audit emission for their own
|
||||
# force-merge. See `.gitea/workflows/sop-checklist.yml` for the full
|
||||
# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
|
||||
# rationale.
|
||||
on:
|
||||
pull_request_target:
|
||||
@@ -41,8 +41,8 @@ jobs:
|
||||
ref: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Detect force-merge + emit audit event
|
||||
env:
|
||||
# Same org-level secret the sop-checklist workflow uses.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Same org-level secret the sop-tier-check workflow uses.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# required checks) for each branch listed here.
|
||||
#
|
||||
# Declared here rather than fetched from /branch_protections
|
||||
# because that endpoint requires admin write — sop-checklist-bot is
|
||||
# because that endpoint requires admin write — sop-tier-bot is
|
||||
# read-only by design (least-privilege).
|
||||
REQUIRED_CHECKS_JSON: |
|
||||
{
|
||||
|
||||
@@ -34,8 +34,6 @@ jobs:
|
||||
check:
|
||||
name: Block forbidden paths
|
||||
runs-on: ubuntu-latest
|
||||
# Hard gate — detected internal-path leaks fail the workflow.
|
||||
# continue-on-error removed per directive (fail-open → fail-closed).
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
name: boot-to-registration-e2e (advisory)
|
||||
|
||||
# cp#455 — Minimal-cell boot-to-registration e2e.
|
||||
# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
|
||||
# it should now go GREEN since the fix is live."
|
||||
#
|
||||
# Stage 1 of 5-stage rollout. Reuses the dispatch-only EC2
|
||||
# provisioning path from test_staging_full_saas.sh but reduced to
|
||||
# the minimum boot-to-registration surface:
|
||||
#
|
||||
# 1. Provision request accepted; workspace transitions to booting/running
|
||||
# 2. Controlplane receives /registry/register for that workspace_id
|
||||
# 3. JSON-RPC/completion route returns successful minimal response
|
||||
# 4. Teardown terminates workspace even on failure (trap)
|
||||
#
|
||||
# Advisory (non-blocking) per Researcher Stage 2 design — RED on
|
||||
# current main is expected pre-cp#469-cluster. After cp#477 deploy
|
||||
# (888efceb) + PR #2167 merge, cell should turn GREEN. THAT green
|
||||
# is the cluster-proof signal.
|
||||
#
|
||||
# Cost controls (mandatory):
|
||||
# - SPOT instances (tagged run_id/workspace_id for cost attribution)
|
||||
# - Fast teardown (~3-5 min wall-clock) even on assertion failure
|
||||
# - Structured per-cell results JSON (runtime/provider/model/
|
||||
# billing_mode/workspace_id/register_status/completion_status/
|
||||
# teardown_status/elapsed_seconds)
|
||||
#
|
||||
# Inputs:
|
||||
# runtime : default claude-code
|
||||
# billing_mode : default platform_managed (the cp#469-cluster path)
|
||||
# provider : default platform (vs direct-to-provider)
|
||||
# model : default moonshot/kimi-k2.6 (CTO-specified)
|
||||
#
|
||||
# PR target: molecule-core (this file). Companion harness extension
|
||||
# (test_minimal_boot_cell.sh) lives in tests/e2e/ alongside
|
||||
# test_staging_full_saas.sh — same repo, same branch.
|
||||
#
|
||||
# Note: cp#455 was originally spec'd to live in molecule-controlplane
|
||||
# (`.gitea/workflows/` path), but molecule-core's CI is the home for
|
||||
# tenant-boot e2e tests in this stage. Stage 2 may move the path.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# Note: Gitea 1.22.6 does not support workflow_dispatch.inputs
|
||||
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Defaults
|
||||
# are hardcoded in the job env below. Stage 2 can add matrix/
|
||||
# param support once the Gitea version supports it.
|
||||
|
||||
# Advisory: no cron schedule, manual dispatch only. Branch protection
|
||||
# doesn't require this — RED on main is expected pre-cp#469-cluster
|
||||
# deploy, GREEN signals the cluster is live.
|
||||
permissions:
|
||||
contents: read
|
||||
# No issue-write; failures surface as red runs in workflow history.
|
||||
|
||||
concurrency:
|
||||
group: boot-to-registration-e2e
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory e2e — non-gating, manual dispatch only (cp#455 Stage 1)
|
||||
minimal-cell:
|
||||
name: Minimal cell (claude-code + platform + moonshot/kimi-k2.6)
|
||||
runs-on: ubuntu-latest
|
||||
# Bounded at 12 min. Wall-clock budget breakdown:
|
||||
# - cold EC2 provision: ~3-4 min (SPOT)
|
||||
# - /registry/register wait: ~30s
|
||||
# - completion call: ~10s
|
||||
# - teardown: ~30-60s
|
||||
# - tail headroom: ~6-7 min
|
||||
timeout-minutes: 12
|
||||
env:
|
||||
# Hardcoded defaults — Gitea 1.22.6 does not support workflow_dispatch.inputs
|
||||
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Stage 2 can add
|
||||
# matrix/param support once the Gitea version supports it.
|
||||
E2E_RUNTIME: claude-code
|
||||
E2E_BILLING_MODE: platform_managed
|
||||
E2E_PROVIDER: platform
|
||||
E2E_MODEL: moonshot/kimi-k2.6
|
||||
E2E_RUN_ID: cp455-${{ github.run_id }}
|
||||
E2E_PROVISION_TIMEOUT_SECS: '300' # 5 min — fast teardown budget
|
||||
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify required secrets present
|
||||
run: |
|
||||
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — minimal-cell e2e cannot run"
|
||||
echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install required tools
|
||||
run: |
|
||||
for cmd in jq curl python3; do
|
||||
command -v "$cmd" >/dev/null 2>&1 || {
|
||||
echo "::error::required tool '$cmd' not on PATH — runner image regression?"
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
|
||||
- name: Run minimal-cell boot-to-registration harness
|
||||
# The harness script handles its own teardown via EXIT trap;
|
||||
# even on assertion failure (provision timeout, register
|
||||
# timeout, completion failure), the workspace is deprovisioned
|
||||
# and a leak is reported. Exit code propagates from the script.
|
||||
# Structured per-cell results are emitted to ${GITHUB_STEP_SUMMARY}
|
||||
# so operators see pass/fail per assertion without scrolling.
|
||||
run: |
|
||||
bash tests/e2e/test_minimal_boot_cell.sh
|
||||
|
||||
- name: Emit structured per-cell results
|
||||
if: always()
|
||||
# Always run (even on failure) so the structured results are
|
||||
# visible in the workflow summary. The script writes a JSON
|
||||
# file at /tmp/cell-result.json; this step renders it as a
|
||||
# job summary.
|
||||
run: |
|
||||
if [ -f /tmp/cell-result.json ]; then
|
||||
echo "## Minimal-cell results" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```json' >> "$GITHUB_STEP_SUMMARY"
|
||||
cat /tmp/cell-result.json >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo '```' >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "## Minimal-cell results: NO_RESULT_FILE" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "Harness did not produce /tmp/cell-result.json — likely crashed before trap fired." >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
|
||||
- name: Failure summary
|
||||
if: failure()
|
||||
run: |
|
||||
{
|
||||
echo "## cp#455 minimal-cell FAILED"
|
||||
echo ""
|
||||
echo "**Run ID:** ${{ github.run_id }}"
|
||||
echo "**Runtime:** ${E2E_RUNTIME}"
|
||||
echo "**Billing mode:** ${E2E_BILLING_MODE}"
|
||||
echo "**Provider:** ${E2E_PROVIDER}"
|
||||
echo "**Model:** ${E2E_MODEL}"
|
||||
echo "**Slug:** ${E2E_RUN_ID}"
|
||||
echo ""
|
||||
echo "### What this means"
|
||||
echo ""
|
||||
echo "The minimal claude-code+kimi cell did not pass all 4 assertions:"
|
||||
echo "1. Provision request accepted; workspace transitions to booting/running"
|
||||
echo "2. Controlplane receives /registry/register for that workspace_id"
|
||||
echo "3. JSON-RPC/completion route returns successful minimal response"
|
||||
echo "4. Teardown terminates workspace even on failure (trap)"
|
||||
echo ""
|
||||
echo "RED is expected pre-cp#469-cluster. After cp#477 deploy (888efceb) + PR #2167 merge,"
|
||||
echo "this should turn GREEN. Persistent RED after both merge = cluster bug, not e2e bug."
|
||||
echo ""
|
||||
echo "### Next steps"
|
||||
echo ""
|
||||
echo "1. Check the harness output above for the assertion that failed"
|
||||
echo "2. If assertion 1 fails: provision path broken — check CP admin API + EC2 quota"
|
||||
echo "3. If assertion 2 fails: /registry/register path broken — check workspace-server boot"
|
||||
echo "4. If assertion 3 fails: LLM proxy / completion path broken — check cp#469 cluster"
|
||||
echo "5. If assertion 4 fails: teardown trap broken — leak risk, fix immediately"
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
@@ -12,7 +12,7 @@
|
||||
# (SHA 0adf2098) per RFC internal#219 Phase 2b+c — replicate repo-by-repo.
|
||||
#
|
||||
# When any pair diverges, a `[ci-drift]` issue is opened or updated
|
||||
# (idempotent by title) and labelled `ci-bp-drift`. This is the
|
||||
# (idempotent by title) and labelled `tier:high`. This is the
|
||||
# auto-detection that closes the regression class identified in
|
||||
# RFC §1 finding 3 (protection only listed 2 of 6 real jobs for
|
||||
# ~weeks, undetected) and §6 (audit env drifts silently from
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
# Gitea persona whose ONLY job is reading branch_protections
|
||||
# and posting the [ci-drift] tracking issue. The endpoint
|
||||
# `GET /repos/.../branch_protections/{branch}` requires
|
||||
# repo-ADMIN role (Gitea 1.22.6) — the default GITHUB_TOKEN and the
|
||||
# repo-ADMIN role (Gitea 1.22.6) — SOP_TIER_CHECK_TOKEN and the
|
||||
# auto-injected GITHUB_TOKEN do NOT have it (read-only / write
|
||||
# without admin), so the previous fallback chain 403'd.
|
||||
# Mirrors the controlplane fix landed in CP PR#134.
|
||||
@@ -106,7 +106,7 @@ jobs:
|
||||
AUDIT_WORKFLOW_PATH: '.gitea/workflows/audit-force-merge.yml'
|
||||
# Path to the CI workflow with the sentinel + the jobs.
|
||||
CI_WORKFLOW_PATH: '.gitea/workflows/ci.yml'
|
||||
# Issue label applied on file/update. `ci-bp-drift` exists in
|
||||
# Issue label applied on file/update. `tier:high` exists in
|
||||
# the molecule-core label set (verified 2026-05-11, label id 9).
|
||||
DRIFT_LABEL: 'ci-bp-drift'
|
||||
DRIFT_LABEL: 'tier:high'
|
||||
run: python3 .gitea/scripts/ci-required-drift.py
|
||||
|
||||
+7
-18
@@ -148,11 +148,6 @@ jobs:
|
||||
run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./...
|
||||
- if: ${{ needs.changes.outputs.platform == 'true' }}
|
||||
name: Diagnostic — per-package verbose 60s
|
||||
# DIAGNOSTIC ONLY (continue-on-error below): this step exists to dump
|
||||
# verbose per-package output for triage, NOT to gate. The blocking gate
|
||||
# is "Run tests with coverage (blocking gate)" immediately below. The
|
||||
# `set +e` / swallowed exits here are intentional — do not "fix" them
|
||||
# like a gate; the real gate is the next step.
|
||||
run: |
|
||||
set +e
|
||||
go test -race -v -timeout 60s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log
|
||||
@@ -314,11 +309,6 @@ jobs:
|
||||
# #1815 — wires coverage into CI so we get a baseline visible on
|
||||
# every PR. No threshold gate yet; thresholds dial in (Step 3, also
|
||||
# tracked in #1815) after the team sees what current coverage is.
|
||||
# Memory: the full vitest+v8-coverage process tree peaks at ~1.33 GB
|
||||
# (measured 2026-06-08), comfortably within the runner — so this single
|
||||
# run is BOTH the pass/fail gate and the coverage artifact (one SSOT, no
|
||||
# split). The earlier intermittent red here was a DisplayTab paste-race
|
||||
# (fixed in this PR), NOT a coverage OOM.
|
||||
run: npx vitest run --coverage
|
||||
- name: Upload coverage summary as artifact
|
||||
if: ${{ needs.changes.outputs.canvas == 'true' }}
|
||||
@@ -428,9 +418,10 @@ jobs:
|
||||
# a manual action that determinism made obsolete.
|
||||
name: Canvas Deploy Status
|
||||
runs-on: docker-host
|
||||
# Per-step no-op (not job-level `if:`) so the job reaches SUCCESS on PRs
|
||||
# instead of skipped — skipped poisons the PR combined status (internal#817).
|
||||
# Job-level `if:` so ci-required-drift.py's ci_job_names() detects this as
|
||||
# github.ref-gated and skips it from the required-context F1 set (mc#1982).
|
||||
# Step-level exit 0 handles the "not a canvas main push" case.
|
||||
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
|
||||
needs: [changes, canvas-build]
|
||||
steps:
|
||||
- name: Record canvas ordered-deploy status
|
||||
@@ -509,7 +500,7 @@ jobs:
|
||||
# `CI / all-required (pull_request)` per issue #1473.
|
||||
#
|
||||
# Closes the failure mode where status_check_contexts on molecule-core/main
|
||||
# only listed `Secret scan` + `sop-checklist` (the 2 meta-gates), so real
|
||||
# only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
|
||||
# `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
|
||||
# red silently merged through. See internal#286 for the three concrete
|
||||
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
|
||||
@@ -542,8 +533,9 @@ jobs:
|
||||
# The `needs:` list MUST stay in lockstep with ci-required-drift.py's
|
||||
# F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
|
||||
# whose `if:` gates on github.event_name/github.ref). canvas-deploy-
|
||||
# status is per-step-gated (not job-level `if:`) so it reaches SUCCESS
|
||||
# on PRs and is included here — internal#817. If a new always-running
|
||||
# reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
|
||||
# so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
|
||||
# skipped job would never let the sentinel run. If a new always-running
|
||||
# CI job is added, add it here too or ci-required-drift F1 will flag it.
|
||||
#
|
||||
# Stays on the dedicated `ci-meta` lane (no docker work, so the
|
||||
@@ -557,7 +549,6 @@ jobs:
|
||||
- canvas-build
|
||||
- shellcheck
|
||||
- python-lint
|
||||
- canvas-deploy-status
|
||||
continue-on-error: false
|
||||
runs-on: ci-meta
|
||||
timeout-minutes: 5
|
||||
@@ -576,7 +567,6 @@ jobs:
|
||||
CANVAS_RESULT: ${{ needs.canvas-build.result }}
|
||||
SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
|
||||
PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
|
||||
CANVAS_DEPLOY_RESULT: ${{ needs.canvas-deploy-status.result }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
fail=0
|
||||
@@ -598,7 +588,6 @@ jobs:
|
||||
check "Canvas (Next.js)" "$CANVAS_RESULT"
|
||||
check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
|
||||
check "Python Lint & Test" "$PYTHON_LINT_RESULT"
|
||||
check "Canvas Deploy Status" "$CANVAS_DEPLOY_RESULT"
|
||||
if [ "$fail" -ne 0 ]; then
|
||||
echo "::error::all-required: one or more aggregated CI jobs did not succeed"
|
||||
exit 1
|
||||
|
||||
@@ -131,9 +131,9 @@ jobs:
|
||||
# on the per-runtime default ("sonnet" → routes to direct
|
||||
# Anthropic, defeats the cost saving). Operators can override
|
||||
# via workflow_dispatch by setting a different E2E_MODEL_SLUG
|
||||
# input if they need to exercise a specific model. MiniMax-M2.7 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7' }}
|
||||
# input if they need to exercise a specific model. MiniMax-M2 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke.
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2' }}
|
||||
# Bound to 10 min so a stuck provision fails the run instead of
|
||||
# holding up the next cron firing. 15-min default in the script
|
||||
# is for the on-PR full lifecycle where we have more headroom.
|
||||
|
||||
@@ -429,9 +429,6 @@ jobs:
|
||||
# round-trip is covered by the priority-runtimes `mock` arm, not here.
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_keyless_feature_contracts_e2e.sh
|
||||
- name: Run user_tasks E2E (REST + MCP — agent→user action requests)
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_user_tasks_e2e.sh
|
||||
- name: Run secrets-dispatch contract test (keyless SECRETS_JSON branch order)
|
||||
# Previously orphaned (no workflow referenced it). Hermetic unit-style
|
||||
# contract over test_staging_full_saas.sh's LLM-key branch precedence —
|
||||
|
||||
@@ -250,20 +250,6 @@ jobs:
|
||||
echo "CANVAS_PORT=${CANVAS_PORT}" >> "$GITHUB_ENV"
|
||||
echo "Canvas host port: ${CANVAS_PORT}"
|
||||
|
||||
- name: Set deterministic admin token
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
run: |
|
||||
# PR #2291 made auth fail-closed everywhere (no dev-mode escape).
|
||||
# The platform server requires ADMIN_TOKEN; the canvas requires the
|
||||
# matching NEXT_PUBLIC_ADMIN_TOKEN or every API call 401s.
|
||||
# We set a deterministic per-run value so the ephemeral platform
|
||||
# and canvas are paired correctly.
|
||||
E2E_ADMIN_TOKEN="e2e-chat-admin-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "NEXT_PUBLIC_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "Admin token configured for e2e-chat platform + canvas."
|
||||
|
||||
- name: Start platform (background)
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
working-directory: workspace-server
|
||||
|
||||
@@ -54,13 +54,6 @@ on:
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- 'tests/e2e/test_staging_concierge_e2e.sh'
|
||||
- 'tests/e2e/test_staging_concierge_creates_workspace_e2e.sh'
|
||||
- 'workspace-server/internal/staginge2e/**'
|
||||
- 'workspace-server/internal/handlers/platform_agent.go'
|
||||
- 'workspace-server/internal/handlers/user_tasks.go'
|
||||
- 'workspace-server/internal/handlers/llm_billing_mode_handler.go'
|
||||
- 'workspace-server/internal/handlers/discovery.go'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
@@ -76,13 +69,6 @@ on:
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- 'tests/e2e/test_staging_concierge_e2e.sh'
|
||||
- 'tests/e2e/test_staging_concierge_creates_workspace_e2e.sh'
|
||||
- 'workspace-server/internal/staginge2e/**'
|
||||
- 'workspace-server/internal/handlers/platform_agent.go'
|
||||
- 'workspace-server/internal/handlers/user_tasks.go'
|
||||
- 'workspace-server/internal/handlers/llm_billing_mode_handler.go'
|
||||
- 'workspace-server/internal/handlers/discovery.go'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
@@ -510,341 +496,3 @@ jobs:
|
||||
echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
# ── CONCIERGE user_tasks PRIMITIVE (Feature 3) — real-staging REST+MCP+authz ──
|
||||
#
|
||||
# Drives tests/e2e/test_staging_concierge_e2e.sh against a fresh throwaway
|
||||
# tenant: the full agent→user "ask" contract over BOTH surfaces (REST +
|
||||
# the MCP tools/call envelope a canvas concierge agent uses) PLUS the
|
||||
# cross-workspace authz scoping (ws-B can't touch ws-A's task). Reuses the
|
||||
# same CP-admin org-provision/teardown scaffolding + _lib.sh + AWS-leak-check
|
||||
# lib as the full-SaaS harness (the script SOURCEs them — no duplication).
|
||||
#
|
||||
# GATING (no continue-on-error): user_tasks is a pure DB/handler primitive
|
||||
# with NO LLM container dependency (workspaces are created 'external' — row
|
||||
# only, no EC2), so this is fast (~provision + TLS, no 10-min cold boot) and
|
||||
# NOT subject to the cp#245 boot-timeout flake the full-SaaS job carries. It
|
||||
# therefore has no honest reason to be masked. Runs on push-to-main /
|
||||
# workflow_dispatch / cron only (needs live staging infra — never on PR, where
|
||||
# the pr-validate job above already posts the workflow's PR status).
|
||||
# bp-required: pending #2430
|
||||
e2e-staging-concierge-user-tasks:
|
||||
name: E2E Staging Concierge user_tasks
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
||||
timeout-minutes: 30
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
E2E_AWS_LEAK_CHECK: required
|
||||
E2E_AWS_TERMINATE_LEAKS: '1'
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Verify admin token + AWS creds present
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
echo "::error::$var not set — EC2 leak verification cannot run"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
echo "Admin token + AWS creds present ✓"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy ✓"
|
||||
|
||||
- name: Run concierge user_tasks E2E
|
||||
run: bash tests/e2e/test_staging_concierge_e2e.sh
|
||||
|
||||
- name: Teardown safety net (runs on cancel/failure)
|
||||
if: always()
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
run: |
|
||||
# Sweep any e2e-cncrg-YYYYMMDD-<run_id>-* org this run created if the
|
||||
# script died before its EXIT trap fired. Run-id scoped so it never
|
||||
# stomps a concurrent run's fresh tenant (see the saas job's note).
|
||||
set +e
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, os, datetime
|
||||
run_id = os.environ.get('GITHUB_RUN_ID', '')
|
||||
d = json.load(sys.stdin)
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
|
||||
if run_id:
|
||||
prefixes = tuple(f'e2e-cncrg-{d}-{run_id}-' for d in dates)
|
||||
else:
|
||||
prefixes = tuple(f'e2e-cncrg-{d}-' for d in dates)
|
||||
candidates = [o['slug'] for o in d.get('orgs', [])
|
||||
if any(o.get('slug','').startswith(p) for p in prefixes)
|
||||
and o.get('instance_status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
set +e
|
||||
curl -sS -o /tmp/cncrg-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/cncrg-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/cncrg-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::concierge teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/cncrg-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::concierge teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
# ── CONCIERGE FUNCTIONAL: it ACTUALLY CREATES A WORKSPACE (real-LLM) ─────────
|
||||
#
|
||||
# Drives tests/e2e/test_staging_concierge_creates_workspace_e2e.sh — the
|
||||
# RFC docs/design/rfc-platform-agent.md §11.4 "Reach" check turned into a gate:
|
||||
# send the org concierge a natural-language A2A message ("create a workspace
|
||||
# named e2e-cncrg-worker-<runid> with role engineer") and assert the
|
||||
# DETERMINISTIC SIDE EFFECT — that named workspace now EXISTS in GET /workspaces
|
||||
# — which can only happen if the concierge's LLM really invoked the
|
||||
# create_workspace platform-MCP tool (a real org mutation), NOT just that a REST
|
||||
# API returned 200.
|
||||
#
|
||||
# GATING (no continue-on-error), but FALSE-GREEN-PROOF via E2E_REQUIRE_LIVE=1:
|
||||
# this is a REAL-LLM, REAL-tool test, so it depends on the concierge being
|
||||
# provisioned on the DEDICATED platform-agent image (Dockerfile.platform-agent,
|
||||
# ships /opt/molecule-mcp-server — the ONLY image where create_workspace lights
|
||||
# up; see platform_agent.go's SELF-HOST CAVEAT). A parallel agent is wiring that
|
||||
# image into the staging provision path. The script SKIPs LOUD when the
|
||||
# concierge is absent / not online / not on the platform-agent image — but with
|
||||
# E2E_REQUIRE_LIVE=1 the harness converts that skip into a HARD FAIL (exit 5) so
|
||||
# a silently-missing platform-agent image can NEVER false-green this gate. Runs
|
||||
# on push-to-main / workflow_dispatch / cron only (needs live staging infra +
|
||||
# a model — never on PR, where pr-validate posts the workflow's PR status).
|
||||
# bp-required: pending #2430
|
||||
e2e-staging-concierge-creates-workspace:
|
||||
name: E2E Staging Concierge Creates Workspace
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
E2E_AWS_LEAK_CHECK: required
|
||||
E2E_AWS_TERMINATE_LEAKS: '1'
|
||||
# The concierge is platform_managed on SaaS (the CP-exported LLM proxy
|
||||
# supplies its model — no BYOK key needed for the concierge itself). The
|
||||
# MiniMax key is wired anyway so a staging image that boots the concierge
|
||||
# BYOK-MiniMax (parallel-agent image work) still has a model; harmless when
|
||||
# the concierge is platform-managed.
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
# False-green guard: a concierge that is absent / not on the platform-agent
|
||||
# image / never online must FAIL this gate (exit 5), not silently skip.
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Verify admin token + AWS creds present
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
echo "::error::$var not set — EC2 leak verification cannot run"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
echo "Admin token + AWS creds present ✓"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy ✓"
|
||||
|
||||
- name: Run concierge-creates-workspace functional E2E
|
||||
run: bash tests/e2e/test_staging_concierge_creates_workspace_e2e.sh
|
||||
|
||||
- name: Teardown safety net (runs on cancel/failure)
|
||||
if: always()
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
run: |
|
||||
# Sweep any e2e-cncrg-mk-YYYYMMDD-<run_id>-* org this run created if the
|
||||
# script died before its EXIT trap fired. Run-id scoped so it never
|
||||
# stomps a concurrent run's fresh tenant.
|
||||
set +e
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, os, datetime
|
||||
run_id = os.environ.get('GITHUB_RUN_ID', '')
|
||||
d = json.load(sys.stdin)
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
|
||||
if run_id:
|
||||
prefixes = tuple(f'e2e-cncrg-mk-{d}-{run_id}-' for d in dates)
|
||||
else:
|
||||
prefixes = tuple(f'e2e-cncrg-mk-{d}-' for d in dates)
|
||||
candidates = [o['slug'] for o in d.get('orgs', [])
|
||||
if any(o.get('slug','').startswith(p) for p in prefixes)
|
||||
and o.get('instance_status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
set +e
|
||||
curl -sS -o /tmp/cncrg-mk-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/cncrg-mk-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/cncrg-mk-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::concierge-mk teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/cncrg-mk-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::concierge-mk teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
# ── CONCIERGE / PLATFORM-AGENT Go staginge2e (Features 1,2,4,5,6) ────────────
|
||||
#
|
||||
# Drives TestConciergePlatformAgent_Staging (workspace-server/internal/
|
||||
# staginge2e/concierge_platform_test.go), which REUSES the lifecycle suite's
|
||||
# harness (requireStagingEnv / adminCreateOrg / tenantAdminToken /
|
||||
# tenantCreateWorkspace / doTenantJSON / jsonField) to assert, against a real
|
||||
# tenant: platform-agent install + /org/identity (1), kind on the workspace
|
||||
# API (2), discovery peers admin-auth regression guard (4), BYOK billing-mode
|
||||
# round-trip (5), and the concierge config-tab auth sweep (6). It asserts
|
||||
# OBSERVABLE state (sole root re-parenting, kind discriminator, resolved_mode,
|
||||
# non-401 tabs) — not just HTTP 200.
|
||||
#
|
||||
# Two jobs, mirroring e2e-workspace-lifecycle.yml's honest pattern:
|
||||
# • concierge-compile-skip (every push/PR/dispatch): proves the staginge2e
|
||||
# suite still COMPILES under -tags=staging_e2e and SKIPs LOUD without
|
||||
# creds. GATING (no mask) — a broken test file fails at PR time.
|
||||
# • concierge-staging (push-to-main/dispatch/cron): the real live run with
|
||||
# staging creds + t.Cleanup teardown.
|
||||
# bp-exempt: PR-time compile-only check (build the concierge e2e test, then
|
||||
# skip execution — no staging creds on PR). pr-validate posts the workflow's
|
||||
# PR status; this job is not itself a branch-protection gate.
|
||||
e2e-staging-concierge-compile-skip:
|
||||
name: E2E Staging Concierge (compile+skip)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
- name: go vet (staging_e2e tag)
|
||||
working-directory: workspace-server
|
||||
run: go vet -tags staging_e2e ./internal/staginge2e/...
|
||||
- name: Compile + skip-run (must SKIP LOUD without STAGING_E2E)
|
||||
working-directory: workspace-server
|
||||
run: |
|
||||
# No STAGING_E2E / creds → the suite MUST skip (not pass-with-zero-
|
||||
# assertions). go test exit 0 with a SKIP line is the contract.
|
||||
out=$(go test -tags staging_e2e ./internal/staginge2e/ -run TestConciergePlatformAgent -count=1 -v 2>&1)
|
||||
echo "$out"
|
||||
echo "$out" | grep -q "SKIP: TestConciergePlatformAgent_Staging" \
|
||||
|| { echo "::error::expected a LOUD skip of TestConciergePlatformAgent_Staging without creds"; exit 1; }
|
||||
|
||||
# bp-required: pending #2430
|
||||
e2e-staging-concierge-platform:
|
||||
name: E2E Staging Concierge Platform Agent
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
||||
timeout-minutes: 40
|
||||
permissions:
|
||||
contents: read
|
||||
env:
|
||||
CP_BASE_URL: https://staging-api.moleculesai.app
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
STAGING_E2E: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
if [ -z "$CP_ADMIN_API_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
echo "Admin token present"
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$CP_BASE_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (HTTP $code) — infra, not a concierge bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy"
|
||||
- name: Run concierge/platform-agent staginge2e
|
||||
working-directory: workspace-server
|
||||
run: go test -tags staging_e2e ./internal/staginge2e/ -run TestConciergePlatformAgent_Staging -count=1 -v -timeout 35m
|
||||
# Teardown: the test installs a t.Cleanup admin-DELETE of its own tenant
|
||||
# (e2e-cncrg-* slug), running even on a t.Fatal. The age-guarded
|
||||
# sweep-stale-e2e-orgs workflow (30-min floor, e2e- prefix) is the final
|
||||
# net for a tenant orphaned by a hard runner cancel.
|
||||
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
- name: Run gate-check-v3 (single PR mode)
|
||||
if: github.event_name == 'pull_request_target' || github.event.inputs.pr_number != ''
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number || github.event.inputs.pr_number }}
|
||||
POST_COMMENT: ${{ github.event.inputs.post_comment || 'true' }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
- name: Run gate-check-v3 (all open PRs — cron mode)
|
||||
if: github.event_name == 'schedule'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
# NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
|
||||
# queue now reads the required status contexts from BRANCH PROTECTION
|
||||
# (status_check_contexts) so non-required governance reds (qa-review,
|
||||
# security-review, sop-checklist when not branch-required,
|
||||
# security-review, sop-tier, sop-checklist when not branch-required,
|
||||
# E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
|
||||
# If branch protection cannot be enumerated the queue HOLDS
|
||||
# (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
|
||||
|
||||
@@ -244,12 +244,7 @@ jobs:
|
||||
# fail if any didn't land — that would be a real regression we
|
||||
# want loud.
|
||||
# workspace_schedules added for the #2149 scheduler integration tests.
|
||||
# workspace_auth_tokens + org_api_tokens added for the #2156
|
||||
# registry-auth TestIntegration_ suite (#2148). Without this
|
||||
# guard, a silently-skipped migration 020 (workspace_auth_tokens)
|
||||
# or 035 (org_api_tokens) would let the auth tests run against
|
||||
# missing tables and falsely green.
|
||||
for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules workspace_auth_tokens org_api_tokens; do
|
||||
for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules; do
|
||||
if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
|
||||
-c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
|
||||
| grep -q 1; then
|
||||
@@ -290,33 +285,6 @@ jobs:
|
||||
# / workspaces all landed by the migration replay step above).
|
||||
go test -tags=integration -timeout 5m -v ./internal/scheduler/ -run "^TestIntegration_"
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Migration replay-from-scratch gate (#2150)
|
||||
env:
|
||||
PGPASSWORD: test
|
||||
run: |
|
||||
# Issue #2150 (SOP internal#765): prove the FULL forward migration
|
||||
# chain (.up + legacy .sql) replays from a blank schema via the
|
||||
# PRODUCTION db.RunMigrations entrypoint — hard-fail on any error.
|
||||
#
|
||||
# This is the gap the psql apply loop above does NOT cover: that
|
||||
# loop deliberately SKIPS failing migrations (`⊘ skipped`), so it
|
||||
# stays green even if the chain stops replaying. The Go test below
|
||||
# uses the real boot-time runner with hard-fail semantics, catching
|
||||
# the #211 .down-wipe class and the 045 non-idempotent crash-loop
|
||||
# class (it runs the chain twice).
|
||||
#
|
||||
# Run against a SEPARATE database so the destructive
|
||||
# `DROP SCHEMA public CASCADE` inside the test never touches the
|
||||
# `molecule` DB the handlers integration tests above migrated. No
|
||||
# ordering coupling with the handlers step.
|
||||
createdb -h "${PG_HOST}" -U postgres molecule_replay 2>/dev/null || \
|
||||
psql -h "${PG_HOST}" -U postgres -d molecule \
|
||||
-c "CREATE DATABASE molecule_replay" >/dev/null 2>&1 || true
|
||||
INTEGRATION_DB_URL="postgres://postgres:test@${PG_HOST}:5432/molecule_replay?sslmode=disable" \
|
||||
go test -tags=integration -timeout 5m -v ./internal/db/ \
|
||||
-run '^TestIntegration_Migration|^TestIntegration_InitPostgres'
|
||||
|
||||
- if: failure() && needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Diagnostic dump on failure
|
||||
env:
|
||||
|
||||
@@ -61,9 +61,11 @@ name: Lint pre-flip continue-on-error
|
||||
# feedback_no_shared_persona_token_use.
|
||||
#
|
||||
# Phase contract (RFC internal#219 §1 ladder):
|
||||
# - Flipped to `continue-on-error: false` after Researcher live-verified
|
||||
# clean runs. The script's own 35 pytest tests pass and recent PR
|
||||
# history shows no masked regressions — the gate is now enforcing.
|
||||
# - This workflow lands at `continue-on-error: true` (Phase 3 —
|
||||
# surface defects without blocking). Follow-up PR flips it to
|
||||
# `false` ONLY after this workflow's own recent runs on `main`
|
||||
# are confirmed clean — exactly the discipline the workflow
|
||||
# itself enforces. Eat your own dogfood.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -95,9 +97,10 @@ jobs:
|
||||
name: Verify continue-on-error flips have run-log proof
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 8
|
||||
# Fail-closed: the lint script is verified clean (35/35 tests pass,
|
||||
# Researcher live-check confirmed). Masking removed per mc#1982 close-out.
|
||||
continue-on-error: false
|
||||
# Phase 3 (RFC internal#219 §1): surface broken flips without blocking
|
||||
# the PR yet. Follow-up flips this to `false` once the workflow itself
|
||||
# has clean recent runs on main. mc#1982 interim — remove when CoE→false.
|
||||
continue-on-error: true # mc#1982
|
||||
steps:
|
||||
- name: Check out PR head (full history for base-SHA access)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
# Forward-compat scope:
|
||||
# Today (2026-05-11) molecule-core/main protects 3 contexts:
|
||||
# - "Secret scan / Scan diff for credential-shaped strings (pull_request)"
|
||||
# - "sop-checklist / all-items-acked (pull_request)"
|
||||
# - "sop-tier-check / tier-check (pull_request)"
|
||||
# - "CI / all-required (pull_request)"
|
||||
# Per RFC#324 Step 2 the required-list expands to ~5 contexts
|
||||
# (qa-review, security-review added). Each new required context's
|
||||
|
||||
@@ -40,7 +40,6 @@ env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: informational lint enforcing docker-host/publish pin convention (internal#512), not a merge gate
|
||||
lint-docker-host-pin:
|
||||
name: Lint docker-host pin on docker-touching workflows
|
||||
runs-on: docker-host
|
||||
|
||||
@@ -16,7 +16,7 @@ name: Lint workflow YAML (Gitea-1.22.6-hostile shapes)
|
||||
#
|
||||
# Empirical history this hardens against:
|
||||
# - status-reaper rev1 caught rule-4 (name-collision) class
|
||||
# - sop-checklist DOA'd on rule-2 (workflow_run partial)
|
||||
# - sop-tier-refire DOA'd on rule-2 (workflow_run partial)
|
||||
# - #319 bootstrap-paradox (chained-defect class, related)
|
||||
# - internal#329 dispatcher race (adjacent)
|
||||
# - 2026-05-11 publish-runtime: rule-1, 24h PyPI freeze
|
||||
|
||||
@@ -1,387 +0,0 @@
|
||||
name: Local Provision Lifecycle E2E
|
||||
|
||||
# MANDATORY coverage for the LOCAL Docker provisioner (MOLECULE_ENV=development,
|
||||
# docker.sock) — the path self-hosters + dev runs use. Every OTHER e2e exercises
|
||||
# the SaaS/EC2 (control-plane) provisioner; nothing mandatory drove the local
|
||||
# Docker path, which is why a config-volume restart-survival bug went undetected.
|
||||
# This workflow provisions a REAL workspace via the local Docker provisioner and
|
||||
# asserts the full lifecycle, INCLUDING the restart-survival assertion.
|
||||
#
|
||||
# Two jobs:
|
||||
# * lifecycle-stub (REQUIRED gate) — builds the tiny stub runtime image, tags
|
||||
# it to the provisioner's RegistryModeLocal cache tag, and runs the full
|
||||
# lifecycle e2e (provision -> online -> restart-survive -> proxy-reach). Fast
|
||||
# (seconds of agent boot, no LLM, no 2.5GB image).
|
||||
# * lifecycle-real (ADVISORY, continue-on-error) — runs the SAME script against
|
||||
# the real claude-code template image with a REAL MiniMax BYOK credential
|
||||
# (LIFECYCLE_LLM=minimax). The proxy-reach step asserts an ACTUAL model reply
|
||||
# (real round-trip through the ws-<id>:8000 proxy), not just reachability.
|
||||
# MiniMax is the cheapest LLM the platform offers, and its `minimax` provider
|
||||
# dials api.minimax.io directly (no CP proxy needed on this local stack).
|
||||
# Heavy + network-dependent (pulls/builds the template + a real LLM call), so
|
||||
# it is non-blocking. Needs the MOLECULE_STAGING_MINIMAX_API_KEY CI secret:
|
||||
# when ABSENT the script SKIPS loud (exit 0) — it never reds on a missing
|
||||
# secret (serving-e2e skip-if-absent pattern).
|
||||
#
|
||||
# SUBSTRATE REQUIREMENT (read before wiring into branch protection)
|
||||
# -----------------------------------------------------------------
|
||||
# This workflow provisions SIBLING docker containers from a HOST Go binary via
|
||||
# the runner's docker.sock — exactly like e2e-api.yml, which already provisions
|
||||
# the `mock` + `priority-runtimes` arms on `docker-host`. So the docker-in-runner
|
||||
# capability IS available on the molecule-runner-* (docker-host) lane. If the
|
||||
# operator ever moves these to a runner WITHOUT docker.sock access for the
|
||||
# platform binary, this lane will red — keep it on `docker-host`.
|
||||
#
|
||||
# Both jobs pin `runs-on: docker-host` (Linux operator-host runners with the
|
||||
# molecule-core-net bridge + a working docker.sock). The bare `ubuntu-latest`
|
||||
# label is also advertised by the Windows act_runner, where docker.sock-bound
|
||||
# steps fail non-deterministically — see lint-required-workflows-docker-host-
|
||||
# pinned.yml + internal#512.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main, staging]
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
|
||||
concurrency:
|
||||
# Per-SHA grouping (mirrors e2e-api.yml). cancel-in-progress:false so a queued
|
||||
# run for an older SHA isn't cancelled by a newer push (auto-promote brittleness).
|
||||
group: local-provision-e2e-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# ===========================================================================
|
||||
# REQUIRED gate — stub runtime, fast. This IS meant to be a required merge gate
|
||||
# (the only mandatory coverage for the LOCAL Docker provisioner), but the new
|
||||
# context is not yet in branch_protections/main — wire it in once the operator
|
||||
# confirms the docker-host runners reliably provision sibling containers from
|
||||
# the host platform binary for this lane (see SUBSTRATE REQUIREMENT above), then
|
||||
# flip the directive below to `# bp-required: yes`. Until then it runs gating
|
||||
# locally (continue-on-error: false) but un-wired in BP, an acknowledged
|
||||
# asymmetry tracked for follow-up. (Earlier this block read `# bp-exempt`, which
|
||||
# contradicted "REQUIRED gate" and tripped lint-required-context-exists-in-bp.)
|
||||
# bp-required: pending #2409
|
||||
# ===========================================================================
|
||||
lifecycle-stub:
|
||||
name: Local Provision Lifecycle E2E (stub)
|
||||
runs-on: docker-host
|
||||
continue-on-error: false
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
PG_CONTAINER: pg-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
REDIS_CONTAINER: redis-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
|
||||
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
|
||||
MOLECULE_ENV: development
|
||||
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
|
||||
- name: Ensure provisioner network + pre-pull alpine
|
||||
run: |
|
||||
# The local provisioner attaches workspace containers to
|
||||
# molecule-core-net and seeds /configs via an alpine helper; the
|
||||
# lifecycle script also uses alpine to seed config.yaml into the
|
||||
# named config volume. Pre-pull + ensure the bridge (idempotent).
|
||||
docker pull alpine:3 >/dev/null
|
||||
docker network create molecule-core-net >/dev/null 2>&1 || true
|
||||
echo "alpine:3 pre-pulled; molecule-core-net ensured."
|
||||
|
||||
- name: Start Postgres (docker, ephemeral host port)
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$PG_CONTAINER" \
|
||||
-e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule \
|
||||
-p 0:5432 postgres:16 >/dev/null
|
||||
PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
|
||||
[ -z "$PG_PORT" ] && PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | head -1 | awk -F: '{print $NF}')
|
||||
if [ -z "$PG_PORT" ]; then echo "::error::no host port for $PG_CONTAINER"; docker logs "$PG_CONTAINER" || true; exit 1; fi
|
||||
echo "DATABASE_URL=postgres://dev:dev@127.0.0.1:${PG_PORT}/molecule?sslmode=disable" >> "$GITHUB_ENV"
|
||||
for i in $(seq 1 30); do
|
||||
docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1 && { echo "pg ready ${i}s"; exit 0; }
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Postgres not ready in 30s"; docker logs "$PG_CONTAINER" || true; exit 1
|
||||
|
||||
- name: Start Redis (docker, ephemeral host port)
|
||||
run: |
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$REDIS_CONTAINER" -p 0:6379 redis:7 >/dev/null
|
||||
REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
|
||||
[ -z "$REDIS_PORT" ] && REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | head -1 | awk -F: '{print $NF}')
|
||||
if [ -z "$REDIS_PORT" ]; then echo "::error::no host port for $REDIS_CONTAINER"; docker logs "$REDIS_CONTAINER" || true; exit 1; fi
|
||||
echo "REDIS_URL=redis://127.0.0.1:${REDIS_PORT}" >> "$GITHUB_ENV"
|
||||
for i in $(seq 1 15); do
|
||||
docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG && { echo "redis ready ${i}s"; exit 0; }
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Redis not ready in 15s"; docker logs "$REDIS_CONTAINER" || true; exit 1
|
||||
|
||||
- name: Configure platform env (admin token + local Docker provisioner)
|
||||
run: |
|
||||
# Allocate an unused ephemeral port to avoid collision with concurrent
|
||||
# jobs or stale processes from prior cancelled runs (see #2450).
|
||||
PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
|
||||
echo "PORT=${PORT}" >> "$GITHUB_ENV"
|
||||
echo "BASE=http://localhost:${PORT}" >> "$GITHUB_ENV"
|
||||
# Deterministic admin token: the script sends MOLECULE_ADMIN_TOKEN as the
|
||||
# bearer; the platform checks ADMIN_TOKEN. Set both to the same value.
|
||||
T="lpe2e-admin-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
|
||||
# MOLECULE_ENV=development: dev posture. MOLECULE_ORG_ID is left UNSET so
|
||||
# main.go wires the LOCAL Docker provisioner (not the CP provisioner), and
|
||||
# MOLECULE_IMAGE_REGISTRY is left UNSET so image resolution uses
|
||||
# RegistryModeLocal (the dockerHasTag cache-check the stub pre-tags into).
|
||||
echo "MOLECULE_ENV=development" >> "$GITHUB_ENV"
|
||||
echo "SECRETS_ENCRYPTION_KEY=lpe2e-test-encryption-key-32bytes!!" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Build platform
|
||||
working-directory: workspace-server
|
||||
run: go build -o platform-server ./cmd/server
|
||||
|
||||
- name: Kill stale platform-server before start (issue #1046)
|
||||
run: |
|
||||
# Dynamic port allocation (see #2450) eliminates the fixed-port race
|
||||
# that caused this gate to red when a prior run left a zombie process.
|
||||
# We still sweep by process name to avoid leaking platform-server
|
||||
# processes on the shared runner.
|
||||
killed=0
|
||||
for pid in $(grep -l "platform-serve" /proc/[0-9]*/comm 2>/dev/null); do
|
||||
kpid="${pid%/comm}"; kpid="${kpid##*/}"
|
||||
cmdline=$(cat "/proc/${kpid}/cmdline" 2>/dev/null | tr '\0' ' ')
|
||||
if echo "$cmdline" | grep -q "platform-server"; then
|
||||
echo "Killing stale platform-server pid ${kpid}: ${cmdline}"
|
||||
kill "$kpid" 2>/dev/null || true
|
||||
killed=$((killed + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$killed" -gt 0 ]; then echo "Killed $killed stale platform-server process(es)."; else echo "No platform-server-named process found."; fi
|
||||
sleep 1
|
||||
|
||||
- name: Start platform (background)
|
||||
working-directory: workspace-server
|
||||
run: |
|
||||
# Bind to the dynamically allocated port (see #2450).
|
||||
# DATABASE_URL/REDIS_URL/ADMIN_TOKEN/MOLECULE_ENV are inherited from
|
||||
# $GITHUB_ENV.
|
||||
PORT=$PORT ./platform-server > platform.log 2>&1 &
|
||||
echo $! > platform.pid
|
||||
|
||||
- name: Wait for /health (+ migrations applied)
|
||||
run: |
|
||||
DEADLINE=300; PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"; start=$(date +%s)
|
||||
while :; do
|
||||
# Verify OUR server is still alive before trusting /health. Our server
|
||||
# binds the allocated port or exits FATAL, so "our PID alive" <=>
|
||||
# "we own the port"; checking it first stops a squatter that answers
|
||||
# /health on the same port (our bind having failed) from false-positiving
|
||||
# the gate (no-flakes RCA).
|
||||
if [ -n "$PID" ] && ! kill -0 "$PID" 2>/dev/null; then
|
||||
echo "::error::platform-server exited early (failed to bind or crashed)"; cat workspace-server/platform.log || true; exit 1
|
||||
fi
|
||||
if curl -sf "$BASE/health" >/dev/null; then
|
||||
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \
|
||||
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo 0)
|
||||
[ "$tables" = "1" ] && { echo "healthy + migrated after $(( $(date +%s) - start ))s"; exit 0; }
|
||||
fi
|
||||
[ "$(( $(date +%s) - start ))" -ge "$DEADLINE" ] && { echo "::error::platform not healthy in ${DEADLINE}s"; cat workspace-server/platform.log || true; exit 1; }
|
||||
sleep 1
|
||||
done
|
||||
|
||||
- name: Run local-provision lifecycle E2E (stub — REQUIRED)
|
||||
run: bash tests/e2e/test_local_provision_lifecycle_e2e.sh
|
||||
|
||||
- name: Dump platform log on failure
|
||||
if: failure()
|
||||
run: cat workspace-server/platform.log || true
|
||||
|
||||
- name: Stop platform
|
||||
if: always()
|
||||
run: |
|
||||
[ -f workspace-server/platform.pid ] && kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
|
||||
|
||||
- name: Stop service containers
|
||||
if: always()
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
|
||||
# ===========================================================================
|
||||
# ADVISORY — real claude-code image, lifecycle-only. Non-blocking. It pulls/
|
||||
# builds the 2.5GB template image, makes a real (cheap) MiniMax LLM call, and is
|
||||
# network-dependent, so a miss must not block. It proves the REAL runtime
|
||||
# survives a restart AND serves a genuine LLM round-trip on the local
|
||||
# provisioner (proxy-reach asserts a real MiniMax reply, not just reachability).
|
||||
# ===========================================================================
|
||||
# bp-exempt: advisory lane (continue-on-error: true) — informational, never a merge gate.
|
||||
lifecycle-real:
|
||||
name: Local Provision Lifecycle E2E (real image + MiniMax LLM, advisory)
|
||||
runs-on: docker-host
|
||||
# Serialise behind the gating stub job: both jobs share the same docker-host
|
||||
# runner and provision sibling containers. `needs:` forces this advisory job
|
||||
# to start only AFTER lifecycle-stub finishes, avoiding resource contention.
|
||||
# (Dynamic ports eliminated the fixed-port race; serialisation remains for
|
||||
# docker-host capacity hygiene.) continue-on-error keeps a real-job miss
|
||||
# non-blocking; `needs:` does NOT gate on the stub's success (a failed
|
||||
# required gate still lets this advisory dependent run).
|
||||
needs: lifecycle-stub
|
||||
if: ${{ always() }}
|
||||
# Tracker for lint-continue-on-error-tracking (Tier 2e / internal#350): this
|
||||
# mask has a forced 14-day renewal cycle. mc#2408 tracks promoting this
|
||||
# advisory MiniMax round-trip to a gating job (then flip to false).
|
||||
continue-on-error: true # mc#2408 — promote advisory MiniMax e2e to gating
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
PG_CONTAINER: pg-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
REDIS_CONTAINER: redis-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
|
||||
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
|
||||
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
|
||||
MOLECULE_ENV: development
|
||||
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
|
||||
- name: Ensure provisioner network + pre-pull alpine
|
||||
run: |
|
||||
docker pull alpine:3 >/dev/null
|
||||
docker network create molecule-core-net >/dev/null 2>&1 || true
|
||||
|
||||
- name: Start Postgres (docker, ephemeral host port)
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$PG_CONTAINER" \
|
||||
-e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule \
|
||||
-p 0:5432 postgres:16 >/dev/null
|
||||
PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
|
||||
[ -z "$PG_PORT" ] && PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | head -1 | awk -F: '{print $NF}')
|
||||
if [ -z "$PG_PORT" ]; then echo "::error::no host port"; docker logs "$PG_CONTAINER" || true; exit 1; fi
|
||||
echo "DATABASE_URL=postgres://dev:dev@127.0.0.1:${PG_PORT}/molecule?sslmode=disable" >> "$GITHUB_ENV"
|
||||
for i in $(seq 1 30); do
|
||||
docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1 && { echo "pg ready ${i}s"; exit 0; }
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Postgres not ready"; docker logs "$PG_CONTAINER" || true; exit 1
|
||||
|
||||
- name: Start Redis (docker, ephemeral host port)
|
||||
run: |
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
docker run -d --name "$REDIS_CONTAINER" -p 0:6379 redis:7 >/dev/null
|
||||
REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
|
||||
[ -z "$REDIS_PORT" ] && REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | head -1 | awk -F: '{print $NF}')
|
||||
if [ -z "$REDIS_PORT" ]; then echo "::error::no host port"; docker logs "$REDIS_CONTAINER" || true; exit 1; fi
|
||||
echo "REDIS_URL=redis://127.0.0.1:${REDIS_PORT}" >> "$GITHUB_ENV"
|
||||
for i in $(seq 1 15); do
|
||||
docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG && { echo "redis ready ${i}s"; exit 0; }
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Redis not ready"; docker logs "$REDIS_CONTAINER" || true; exit 1
|
||||
|
||||
- name: Configure platform env
|
||||
run: |
|
||||
# Allocate an unused ephemeral port to avoid collision with concurrent
|
||||
# jobs or stale processes from prior cancelled runs (see #2450).
|
||||
PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
|
||||
echo "PORT=${PORT}" >> "$GITHUB_ENV"
|
||||
echo "BASE=http://localhost:${PORT}" >> "$GITHUB_ENV"
|
||||
T="lpe2e-real-admin-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ENV=development" >> "$GITHUB_ENV"
|
||||
echo "SECRETS_ENCRYPTION_KEY=lpe2e-test-encryption-key-32bytes!!" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Build platform
|
||||
working-directory: workspace-server
|
||||
run: go build -o platform-server ./cmd/server
|
||||
|
||||
- name: Kill stale platform-server before start (issue #1046)
|
||||
run: |
|
||||
# Dynamic port allocation (see #2450) eliminates the fixed-port race.
|
||||
# We still sweep by process name to avoid leaking platform-server
|
||||
# processes on the shared runner.
|
||||
killed=0
|
||||
for pid in $(grep -l "platform-serve" /proc/[0-9]*/comm 2>/dev/null); do
|
||||
kpid="${pid%/comm}"; kpid="${kpid##*/}"
|
||||
cmdline=$(cat "/proc/${kpid}/cmdline" 2>/dev/null | tr '\0' ' ')
|
||||
if echo "$cmdline" | grep -q "platform-server"; then
|
||||
echo "Killing stale platform-server pid ${kpid}: ${cmdline}"
|
||||
kill "$kpid" 2>/dev/null || true
|
||||
killed=$((killed + 1))
|
||||
fi
|
||||
done
|
||||
if [ "$killed" -gt 0 ]; then echo "Killed $killed stale platform-server process(es)."; else echo "No platform-server-named process found."; fi
|
||||
sleep 1
|
||||
|
||||
- name: Start platform (background)
|
||||
working-directory: workspace-server
|
||||
run: |
|
||||
PORT=$PORT ./platform-server > platform.log 2>&1 &
|
||||
echo $! > platform.pid
|
||||
|
||||
- name: Wait for /health (+ migrations applied)
|
||||
run: |
|
||||
DEADLINE=300; PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"; start=$(date +%s)
|
||||
while :; do
|
||||
# Verify OUR server is still alive before trusting /health. Our server
|
||||
# binds the allocated port or exits FATAL, so checking our PID first
|
||||
# stops a squatter from false-positiving the gate (no-flakes RCA).
|
||||
if [ -n "$PID" ] && ! kill -0 "$PID" 2>/dev/null; then
|
||||
echo "::error::platform-server exited early (failed to bind or crashed)"; cat workspace-server/platform.log || true; exit 1
|
||||
fi
|
||||
if curl -sf "$BASE/health" >/dev/null; then
|
||||
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \
|
||||
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo 0)
|
||||
[ "$tables" = "1" ] && { echo "healthy after $(( $(date +%s) - start ))s"; exit 0; }
|
||||
fi
|
||||
[ "$(( $(date +%s) - start ))" -ge "$DEADLINE" ] && { echo "::error::platform not healthy in ${DEADLINE}s"; cat workspace-server/platform.log || true; exit 1; }
|
||||
sleep 1
|
||||
done
|
||||
|
||||
- name: Run local-provision lifecycle E2E (real image + MiniMax LLM — ADVISORY)
|
||||
env:
|
||||
# LIFECYCLE_LLM=minimax: provision the REAL claude-code template image
|
||||
# (the mode forces LIFECYCLE_PROVISIONER_BUILDS=1 — the provisioner
|
||||
# clones + docker-builds the template from Gitea via RegistryModeLocal)
|
||||
# with a real MiniMax BYOK credential, and assert an ACTUAL model reply
|
||||
# at the proxy-reach step (a genuine round-trip through ws-<id>:8000).
|
||||
# MiniMax is the cheapest LLM the platform offers; its `minimax`
|
||||
# provider dials api.minimax.io directly, so no CP proxy env is needed.
|
||||
#
|
||||
# Key wiring (DO NOT hardcode): the script reads MINIMAX_API_KEY from
|
||||
# the env; we feed it from the MOLECULE_STAGING_MINIMAX_API_KEY CI
|
||||
# secret (the same secret the staging-smoke + e2e-api MiniMax arms use).
|
||||
# When that secret is ABSENT, MINIMAX_API_KEY is empty and the script
|
||||
# SKIPS loud (exit 0) — it never reds on a missing secret (serving-e2e
|
||||
# skip-if-absent pattern). The advisory job stays green either way.
|
||||
LIFECYCLE_LLM: minimax
|
||||
MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
run: bash tests/e2e/test_local_provision_lifecycle_e2e.sh
|
||||
|
||||
- name: Dump platform log on failure
|
||||
if: failure()
|
||||
run: cat workspace-server/platform.log || true
|
||||
|
||||
- name: Stop platform
|
||||
if: always()
|
||||
run: |
|
||||
[ -f workspace-server/platform.pid ] && kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
|
||||
|
||||
- name: Stop service containers
|
||||
if: always()
|
||||
run: |
|
||||
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
|
||||
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
|
||||
@@ -95,10 +95,10 @@ jobs:
|
||||
# included here — staging green is a separate gate
|
||||
# (`feedback_staging_e2e_merge_gate`).
|
||||
WATCH_BRANCH: 'main'
|
||||
# Issue label applied on file/open. `ci-bp-drift` exists in the
|
||||
# Issue label applied on file/open. `tier:high` exists in the
|
||||
# molecule-core label set (verified 2026-05-11, label id 9).
|
||||
# Rationale for high: main red blocks the promotion train and
|
||||
# poisons every PR's auto-rebase base; treat as a fire even
|
||||
# if intermittent.
|
||||
RED_LABEL: 'ci-bp-drift'
|
||||
RED_LABEL: 'tier:high'
|
||||
run: python3 .gitea/scripts/main-red-watchdog.py
|
||||
|
||||
@@ -248,36 +248,16 @@ jobs:
|
||||
--tag "${STAGING_TENANT_IMAGE_NAME}:${TAG_LATEST}"
|
||||
)
|
||||
|
||||
# Retry loop: buildkit EOF (internal#2468) is often transient on the
|
||||
# publish runner under memory pressure. Up to 3 attempts with a fresh
|
||||
# builder each time so a crashed buildkit doesn't poison the next try.
|
||||
for attempt in 1 2 3; do
|
||||
echo "::notice::Tenant image build attempt ${attempt}/3 ..."
|
||||
builder="tenant-builder-${GITHUB_RUN_ID}-${attempt}"
|
||||
docker buildx create --name "${builder}" --use >/dev/null 2>&1 || true
|
||||
if docker buildx build \
|
||||
--builder "${builder}" \
|
||||
--file ./workspace-server/Dockerfile.tenant \
|
||||
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
||||
--build-arg GIT_SHA="${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
|
||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
|
||||
"${build_tags[@]}" \
|
||||
--push .; then
|
||||
docker buildx rm "${builder}" >/dev/null 2>&1 || true
|
||||
echo "::notice::Tenant image build succeeded on attempt ${attempt}"
|
||||
break
|
||||
fi
|
||||
echo "::warning::Tenant image build attempt ${attempt} failed — cleaning builder and retrying"
|
||||
docker buildx rm "${builder}" >/dev/null 2>&1 || true
|
||||
sleep 10
|
||||
if [ "$attempt" -eq 3 ]; then
|
||||
echo "::error::Tenant image build failed after 3 attempts"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
docker buildx build \
|
||||
--file ./workspace-server/Dockerfile.tenant \
|
||||
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
|
||||
--build-arg GIT_SHA="${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
|
||||
--label "org.opencontainers.image.revision=${GIT_SHA}" \
|
||||
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
|
||||
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
|
||||
"${build_tags[@]}" \
|
||||
--push .
|
||||
|
||||
# bp-exempt: production deploy side-effect; merge is gated by CI / all-required and this job waits for push CI before acting.
|
||||
deploy-production:
|
||||
|
||||
@@ -7,25 +7,18 @@
|
||||
#
|
||||
# A1-α (refire mechanism):
|
||||
# Triggers on:
|
||||
# - `pull_request_target`: opened, synchronize, reopened, labeled, unlabeled
|
||||
# → initial status posts when PR opens / re-pushes, and re-evaluates
|
||||
# when labels change (e.g. risk-indicator labels).
|
||||
# - `pull_request_target`: opened, synchronize, reopened
|
||||
# → initial status posts when PR opens / re-pushes
|
||||
# - `pull_request_review` types: [submitted]
|
||||
# → re-evaluate when a team member submits an APPROVE review so
|
||||
# the gate flips immediately (no wait for the next push or
|
||||
# slash-command). Verified live: sop-checklist.yml uses this
|
||||
# slash-command). Verified live: sop-tier-check.yml uses this
|
||||
# same event and provably fires (produces
|
||||
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
|
||||
# The job-level `if:` does NOT guard on review.state (issue
|
||||
# #2159): Gitea 1.22.6's payload shape for this event does not
|
||||
# reliably expose the state field that the GitHub-style guard
|
||||
# expects. The evaluator (review-check.sh) reads actual reviews
|
||||
# from the API and checks for a real APPROVE, so running on
|
||||
# COMMENT or REQUEST_CHANGES is harmless (read-only,
|
||||
# idempotent). Branch-protection requires the
|
||||
# `(pull_request_target)` context variant, so the review-event
|
||||
# path EXPLICITLY POSTS the required context via the API. Trust
|
||||
# boundary preserved (BASE ref, no PR-head).
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so
|
||||
# only APPROVE reviews run the evaluator; COMMENT and
|
||||
# REQUEST_CHANGES are skipped at the job level.
|
||||
# Branch-protection requires the `(pull_request_target)`
|
||||
# context variant, so the review-event path EXPLICITLY POSTS
|
||||
# the required context via the API. Trust boundary preserved
|
||||
@@ -60,7 +53,7 @@
|
||||
#
|
||||
# We MUST NOT use `github.event.comment.author_association` (the
|
||||
# field doesn't exist on Gitea 1.22.6 webhook payload — this was
|
||||
# 's defect #1).
|
||||
# sop-tier-refire's defect #1).
|
||||
#
|
||||
# A4 (no PR-head checkout under pull_request_target):
|
||||
# We check out the BASE ref explicitly so the review-check.sh script is
|
||||
@@ -80,7 +73,7 @@
|
||||
# also not in qa/security teams → also 403.
|
||||
#
|
||||
# Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
|
||||
# identity that IS in both `qa` and `security` teams (Owners-level
|
||||
# identity that IS in both `qa` and `security` teams (Owners-tier
|
||||
# claude-ceo-assistant, or a new service-bot added to both teams).
|
||||
# Provisioning of this secret is tracked as a follow-up issue (filed by
|
||||
# core-devops at PR open).
|
||||
@@ -103,7 +96,7 @@ name: qa-review
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened, labeled, unlabeled]
|
||||
types: [opened, synchronize, reopened]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
@@ -117,19 +110,13 @@ jobs:
|
||||
approved:
|
||||
# Gate the job:
|
||||
# - On pull_request_target events: always run.
|
||||
# - On pull_request_review events: always run. We do NOT guard on
|
||||
# review.state here because Gitea 1.22.6's payload shape for this
|
||||
# event does not reliably expose the state field (issue #2159).
|
||||
# The evaluator (review-check.sh) reads actual reviews from the
|
||||
# API and checks for a real APPROVE, so running on COMMENT or
|
||||
# REQUEST_CHANGES is harmless (read-only, idempotent).
|
||||
# - On labeled/unlabeled events: re-evaluate when labels change.
|
||||
# This ensures qa-review flips when risk-indicator labels are
|
||||
# added or removed.
|
||||
# - On pull_request_review_approved events: run so the gate flips
|
||||
# immediately when a team member submits an APPROVE review.
|
||||
# Comment-triggered refires live in sop-checklist.yml review-refire job.
|
||||
if: |
|
||||
github.event_name == 'pull_request_target' ||
|
||||
github.event_name == 'pull_request_review'
|
||||
(github.event_name == 'pull_request_review' &&
|
||||
(github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
|
||||
@@ -143,7 +130,7 @@ jobs:
|
||||
# no comment.user.login so the step is a no-op skip there.
|
||||
if: github.event_name == 'issue_comment'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
login="${{ github.event.comment.user.login }}"
|
||||
@@ -175,7 +162,7 @@ jobs:
|
||||
- name: Evaluate qa-review
|
||||
id: eval
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
# PR number lives in different places per event:
|
||||
@@ -198,7 +185,7 @@ jobs:
|
||||
# TOKEN FIX (RC 8326): uses STATUS_POST_TOKEN (CTO-granted,
|
||||
# msg d52cc72a). Dedicated narrow-scoped write:repository token
|
||||
# for the explicit status POST. Evaluator step stays on
|
||||
# SOP_CHECKLIST_GATE_TOKEN (read-only) per deliberate security
|
||||
# SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
|
||||
# separation: eval computes, POST writes, never the same cred.
|
||||
if: github.event_name == 'pull_request_review' && always()
|
||||
env:
|
||||
|
||||
@@ -21,21 +21,15 @@ on:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- '.gitea/scripts/review-check.sh'
|
||||
- '.gitea/scripts/_approval_validator.py'
|
||||
- '.gitea/scripts/_review_check_filter.py'
|
||||
- '.gitea/scripts/tests/test_review_check.sh'
|
||||
- '.gitea/scripts/tests/_review_check_fixture.py'
|
||||
- '.gitea/scripts/tests/test_approval_validator.py'
|
||||
- '.gitea/workflows/review-check-tests.yml'
|
||||
pull_request:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- '.gitea/scripts/review-check.sh'
|
||||
- '.gitea/scripts/_approval_validator.py'
|
||||
- '.gitea/scripts/_review_check_filter.py'
|
||||
- '.gitea/scripts/tests/test_review_check.sh'
|
||||
- '.gitea/scripts/tests/_review_check_fixture.py'
|
||||
- '.gitea/scripts/tests/test_approval_validator.py'
|
||||
- '.gitea/workflows/review-check-tests.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
@@ -76,16 +70,3 @@ jobs:
|
||||
|
||||
- name: Run review-check.sh regression suite
|
||||
run: bash .gitea/scripts/tests/test_review_check.sh
|
||||
|
||||
- name: SSOT approval-validator unit tests (SEV-1 internal#812)
|
||||
# The Python unit tests for _approval_validator.py are
|
||||
# mutation-verified — every fail-closed branch has an explicit
|
||||
# REJECT assertion. A reviewer who weakens the predicate trips
|
||||
# these in CI.
|
||||
run: |
|
||||
# The test file lives in .gitea/scripts/tests/ with no __init__.py,
|
||||
# so `unittest discover -s .gitea/scripts` finds 0 tests (the SEV-1
|
||||
# suite silently never ran — a CI gap fixed alongside internal#812).
|
||||
# Run the file directly; it self-inserts its sys.path and calls
|
||||
# unittest.main(), so a failing assertion exits non-zero and fails CI.
|
||||
python3 .gitea/scripts/tests/test_approval_validator.py -v
|
||||
|
||||
@@ -10,23 +10,20 @@
|
||||
# A1-α addendum (internal#760): review-event trigger added so the security
|
||||
# gate flips immediately when a team member submits an APPROVE review.
|
||||
# Uses `pull_request_review` types: [submitted] — verified live via
|
||||
# sop-checklist.yml which provably fires this event (produces
|
||||
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
|
||||
# The job-level `if:` does NOT guard on review.state (issue #2159):
|
||||
# Gitea 1.22.6's payload shape for this event does not reliably expose
|
||||
# the state field that the GitHub-style guard expects. The evaluator
|
||||
# (review-check.sh) reads actual reviews from the API and checks for a
|
||||
# real APPROVE, so running on COMMENT or REQUEST_CHANGES is harmless
|
||||
# (read-only, idempotent). Branch-protection requires the
|
||||
# `(pull_request_target)` context variant, so the review-event path
|
||||
# EXPLICITLY POSTS the required context via the API. Trust boundary
|
||||
# preserved (BASE ref, no PR-head).
|
||||
# sop-tier-check.yml which provably fires this event (produces
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
|
||||
# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
|
||||
# the job level. Branch-protection requires the `(pull_request_target)`
|
||||
# context variant, so the review-event path EXPLICITLY POSTS the required
|
||||
# context via the API. Trust boundary preserved (BASE ref, no PR-head).
|
||||
|
||||
name: security-review
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened, labeled, unlabeled]
|
||||
types: [opened, synchronize, reopened]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
@@ -40,19 +37,13 @@ jobs:
|
||||
approved:
|
||||
# Gate the job:
|
||||
# - On pull_request_target events: always run.
|
||||
# - On pull_request_review events: always run. We do NOT guard on
|
||||
# review.state here because Gitea 1.22.6's payload shape for this
|
||||
# event does not reliably expose the state field (issue #2159).
|
||||
# The evaluator (review-check.sh) reads actual reviews from the
|
||||
# API and checks for a real APPROVE, so running on COMMENT or
|
||||
# REQUEST_CHANGES is harmless (read-only, idempotent).
|
||||
# - On labeled/unlabeled events: re-evaluate when labels change.
|
||||
# This ensures security-review flips when risk-indicator labels
|
||||
# are added or removed.
|
||||
# - On pull_request_review_approved events: run so the gate flips
|
||||
# immediately when a team member submits an APPROVE review.
|
||||
# Comment-triggered refires live in sop-checklist.yml review-refire job.
|
||||
if: |
|
||||
github.event_name == 'pull_request_target' ||
|
||||
github.event_name == 'pull_request_review'
|
||||
(github.event_name == 'pull_request_review' &&
|
||||
(github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
|
||||
@@ -61,7 +52,7 @@ jobs:
|
||||
# so re-running on a non-collaborator comment is harmless.
|
||||
if: github.event_name == 'issue_comment'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
login="${{ github.event.comment.user.login }}"
|
||||
@@ -87,7 +78,7 @@ jobs:
|
||||
- name: Evaluate security-review
|
||||
id: eval
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
|
||||
@@ -107,7 +98,7 @@ jobs:
|
||||
# TOKEN FIX (RC 8326): uses STATUS_POST_TOKEN (CTO-granted,
|
||||
# msg d52cc72a). Dedicated narrow-scoped write:repository token
|
||||
# for the explicit status POST. Evaluator step stays on
|
||||
# SOP_CHECKLIST_GATE_TOKEN (read-only) per deliberate security
|
||||
# SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
|
||||
# separation: eval computes, POST writes, never the same cred.
|
||||
if: github.event_name == 'pull_request_review' && always()
|
||||
env:
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
# Fix (PR #1345 / issue #1280):
|
||||
# - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
|
||||
# - all-items-acked job: pull_request_target OR sop slash-command comments
|
||||
# - review-refire job: qa/security refire slash commands
|
||||
# - review-refire job: qa/security/tier refire slash commands
|
||||
# → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
|
||||
#
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-checklist security note):
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
|
||||
# `pull_request_target` (not `pull_request`) — workflow def is loaded
|
||||
# from BASE branch, so a PR cannot rewrite this workflow to exfiltrate
|
||||
# the token. The `actions/checkout` step pins `ref: base.sha` so the
|
||||
@@ -34,6 +34,14 @@
|
||||
# via a repo secret `SOP_CHECKLIST_GATE_TOKEN`. Provisioning of that
|
||||
# secret is a follow-up authorization step (separate from this PR).
|
||||
#
|
||||
# Failure mode: tier-aware (RFC#351 open question 2):
|
||||
# - tier:high → state=failure (hard-fail; BP blocks merge)
|
||||
# - tier:medium → state=failure (hard-fail; same)
|
||||
# - tier:low → state=pending (soft-fail; BP can choose to require
|
||||
# this context or skip for low-tier PRs)
|
||||
# - missing/no-tier → state=failure (default-mode: hard — never lower
|
||||
# the bar per feedback_fix_root_not_symptom)
|
||||
#
|
||||
# Slash-command contract (RFC#351 v1 + §A1.1-style notes from RFC#324):
|
||||
#
|
||||
# /sop-ack <slug-or-numeric-alias> [optional note]
|
||||
@@ -53,7 +61,7 @@
|
||||
# — declare a gate (qa-review, security-review) N/A.
|
||||
# — see sop-checklist-config.yaml n/a_gates section.
|
||||
#
|
||||
# /qa-recheck /security-recheck
|
||||
# /qa-recheck /security-recheck /refire-tier-check
|
||||
# — refire the corresponding status check on the PR head.
|
||||
#
|
||||
# The eval is read-only + idempotent (read PR + comments + team
|
||||
@@ -141,6 +149,7 @@ jobs:
|
||||
{
|
||||
echo "run_qa=false"
|
||||
echo "run_security=false"
|
||||
echo "run_tier=false"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
|
||||
case "$first_line" in
|
||||
@@ -150,6 +159,9 @@ jobs:
|
||||
/security-recheck*)
|
||||
echo "run_security=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/refire-tier-check*)
|
||||
echo "run_tier=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "::notice::no supported review refire slash command; no-op"
|
||||
;;
|
||||
@@ -158,7 +170,8 @@ jobs:
|
||||
- name: Check out BASE ref for trusted scripts
|
||||
if: |
|
||||
steps.classify.outputs.run_qa == 'true' ||
|
||||
steps.classify.outputs.run_security == 'true'
|
||||
steps.classify.outputs.run_security == 'true' ||
|
||||
steps.classify.outputs.run_tier == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
@@ -167,7 +180,7 @@ jobs:
|
||||
if: steps.classify.outputs.run_qa == 'true'
|
||||
env:
|
||||
# Evaluator (review-check.sh + GET /pulls) stays on read-scoped token.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Explicit POST /statuses uses narrow-scoped write:repository token.
|
||||
STATUS_POST_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
@@ -186,7 +199,7 @@ jobs:
|
||||
if: steps.classify.outputs.run_security == 'true'
|
||||
env:
|
||||
# Evaluator (review-check.sh + GET /pulls) stays on read-scoped token.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Explicit POST /statuses uses narrow-scoped write:repository token.
|
||||
STATUS_POST_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
@@ -200,3 +213,13 @@ jobs:
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire sop-tier-check status
|
||||
if: steps.classify.outputs.run_tier == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
SOP_DEBUG: '0'
|
||||
run: bash .gitea/scripts/sop-tier-refire.sh
|
||||
|
||||
@@ -0,0 +1,162 @@
|
||||
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
|
||||
#
|
||||
# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
|
||||
# from the previous inline-bash version). The script is the single source
|
||||
# of truth; this workflow file just sets env + invokes it.
|
||||
#
|
||||
# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
|
||||
# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
|
||||
# §SOP-6 PR gate enforced. Pair with branch protection on the protected
|
||||
# branch:
|
||||
# required_status_checks: ["sop-tier-check / tier-check (pull_request)"]
|
||||
# required_approving_reviews: 1
|
||||
# approving_review_teams: ["ceo", "managers", "engineers"]
|
||||
#
|
||||
# Tier → required-team expression (internal#189 AND-composition):
|
||||
# tier:low → engineers,managers,ceo (OR: any one suffices)
|
||||
# tier:medium → managers AND engineers AND qa???,security??? (AND: all required)
|
||||
# tier:high → ceo (OR: single team, wired for AND)
|
||||
#
|
||||
# "???" = teams not yet created in Gitea. When qa + security teams are
|
||||
# added, update TIER_EXPR["tier:medium"] in the script to remove the
|
||||
# markers. PRs already in-flight when qa/security are created continue
|
||||
# to work because their authors explicitly requested those reviews.
|
||||
#
|
||||
# Force-merge: Owners-team override remains available out-of-band via
|
||||
# the Gitea merge API; force-merge writes `incident.force_merge` to
|
||||
# `structure_events` per §Persistent structured logging gate (Phase 3).
|
||||
#
|
||||
# Environment variables:
|
||||
# SOP_DEBUG=1 — per-API-call diagnostic lines. Default: off.
|
||||
# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Intended for
|
||||
# emergency use only; burn-in window closed
|
||||
# 2026-05-17 (internal#189 Phase 1).
|
||||
#
|
||||
# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
|
||||
# window closed. As of 2026-06-04 the residual masks left behind by the
|
||||
# burn-in are removed for real (the comment previously claimed this while
|
||||
# the masks still persisted — that was stale):
|
||||
# - continue-on-error: true on the jq-install step (redundant; the step
|
||||
# already exits 0) and on the tier-check step (the burn-in mask).
|
||||
# - the `|| true` after the sop-tier-check.sh invocation, which masked
|
||||
# real tier-gate verdicts.
|
||||
# AND-composition is now fully enforced and the tier-check step can
|
||||
# honestly red CI on a real SOP-6 violation.
|
||||
#
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
|
||||
# REQUIRED branch-protected gate on `pull_request_target` (always
|
||||
# same-repo, secrets always present — no fork/advisory split). Failing
|
||||
# open on a token/network/jq fault greened the SOP-6 approval gate
|
||||
# WITHOUT verifying approvals — a fail-open on a required context. The
|
||||
# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
|
||||
# the gate. If you ever need to temporarily re-introduce a mask, file a
|
||||
# tracker and follow the mc#1982 protocol.
|
||||
|
||||
name: sop-tier-check
|
||||
|
||||
# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
|
||||
# `pull_request_target` loads the workflow definition from the BASE
|
||||
# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
|
||||
# with write access to a feature branch could rewrite this file in
|
||||
# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
|
||||
# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
|
||||
# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
|
||||
# is the documented mitigation.
|
||||
#
|
||||
# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
|
||||
# untrusted code is ever executed in the runner — we only HTTP-call the
|
||||
# Gitea API. If a future change adds a checkout step, it MUST pin to
|
||||
# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
|
||||
# the trust boundary.
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
||||
pull_request_review:
|
||||
types: [submitted, dismissed, edited]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
tier-check:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
secrets: read
|
||||
steps:
|
||||
- name: Check out base branch (for the script)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
# Pin to base.sha — pull_request_target's protection only
|
||||
# works if we never check out PR HEAD. Same SHA the workflow
|
||||
# itself was loaded from.
|
||||
ref: ${{ github.event.pull_request.base.sha }}
|
||||
- name: Install jq
|
||||
# Gitea Actions runners (ubuntu-latest label) do not bundle jq.
|
||||
# The sop-tier-check script uses jq for all JSON API parsing.
|
||||
# Install jq before the script runs so sop-tier-check can pass.
|
||||
#
|
||||
# Method: apt-get first (reliable for Ubuntu runners with internet
|
||||
# access to package mirrors). Falls back to GitHub binary download.
|
||||
# GitHub releases may be unreachable from some runner networks
|
||||
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense, and this step's final command
|
||||
# (`jq --version ... || echo`) already exits 0 unconditionally — so
|
||||
# the step cannot fail the job on its own.
|
||||
# continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
|
||||
# and remove, do not renew). It was redundant masking, not a gate.
|
||||
run: |
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
# reachable from runner containers. GitHub releases may be blocked
|
||||
# or slow on some networks (infra#241 follow-up).
|
||||
if apt-get update -qq && apt-get install -y -qq jq; then
|
||||
echo "::notice::jq installed via apt-get: $(jq --version)"
|
||||
elif timeout 120 curl -sSL \
|
||||
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
|
||||
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
|
||||
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
|
||||
else
|
||||
echo "::warning::jq install failed — apt-get and GitHub download both failed."
|
||||
fi
|
||||
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
|
||||
|
||||
- name: Verify tier label + reviewer team membership
|
||||
# continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
|
||||
# burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
|
||||
# remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
|
||||
# (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
|
||||
# faults too (see the env block below), not just on a real verdict.
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||
SOP_DEBUG: '0'
|
||||
SOP_LEGACY_CHECK: '0'
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
|
||||
#
|
||||
# This is the REQUIRED branch-protected gate
|
||||
# `sop-tier-check / tier-check (pull_request)`. It runs on
|
||||
# `pull_request_target`, which ALWAYS executes from the base
|
||||
# branch WITH secrets present — there is NO fork/advisory split
|
||||
# and no legitimate "secrets genuinely absent" degradation here.
|
||||
#
|
||||
# SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
|
||||
# token, an unreachable Gitea API, or missing jq — i.e. an AUTH
|
||||
# FAILURE or unreachable-dependency would green the SOP-6
|
||||
# approval gate WITHOUT verifying that the required teams
|
||||
# actually approved. That is a fail-open on a required gate: a
|
||||
# mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
|
||||
# merge past the approval requirement.
|
||||
#
|
||||
# Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
|
||||
# sop-tier-check.sh → every guarded `exit 0` branch instead falls
|
||||
# through to `exit 1`. Infra faults (bad token / API down / no
|
||||
# jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
|
||||
# SOP-6 violation. Fix the token/runner, not the gate.
|
||||
run: |
|
||||
bash .gitea/scripts/sop-tier-check.sh
|
||||
@@ -0,0 +1,52 @@
|
||||
# sop-tier-refire — manual fallback for sop-tier-check refire.
|
||||
#
|
||||
# Closes internal#292. Gitea 1.22.6 doesn't refire workflows on the
|
||||
# `pull_request_review` event (go-gitea/gitea#33700); the `sop-tier-check`
|
||||
# workflow's review-event subscription is silently dead. The result:
|
||||
# PRs that get their approving review AFTER the tier-check ran on open/
|
||||
# synchronize keep their failing status check forever, and the only way
|
||||
# to merge is the admin force-merge path (audited via `audit-force-merge`
|
||||
# but the audit trail keeps growing; see `feedback_never_admin_merge_bypass`).
|
||||
#
|
||||
# Comment-triggered refires now live in `review-refire-comments.yml`. Gitea
|
||||
# queues issue_comment workflows before evaluating job-level `if:`, so having
|
||||
# qa-review, security-review, sop-checklist, and sop-tier-refire all subscribe
|
||||
# to every comment caused queue storms on SOP-heavy PRs. This workflow is a
|
||||
# non-automatic breadcrumb only; Gitea 1.22.6 does not support
|
||||
# workflow_dispatch inputs, so real refires must use `/refire-tier-check`.
|
||||
#
|
||||
# SECURITY MODEL:
|
||||
#
|
||||
# 1. `pull_request` exists on the issue (issue_comment fires on issues
|
||||
# AND PRs; we only want PRs).
|
||||
# 2. `comment.author_association` must be MEMBER/OWNER/COLLABORATOR.
|
||||
# Per the internal#292 core-security review (review#1066 ask): anyone
|
||||
# can comment, but only repo collaborators+ can flip the status.
|
||||
# Without this gate, a drive-by commenter on a public-issue-tracker
|
||||
# surface could trigger a status flip.
|
||||
# 3. Comment body must contain `/refire-tier-check` — a slash-command-
|
||||
# shaped trigger (not just any comment word). Prevents accidental
|
||||
# triggering from prose like "we should refire tests" in a review.
|
||||
# 4. This workflow does NOT check out PR HEAD code. Like sop-tier-check,
|
||||
# it only HTTP-calls the Gitea API. Trust boundary preserved.
|
||||
#
|
||||
# Note: `issue_comment` fires from the BASE branch's workflow file. There
|
||||
# is no `pull_request_target` equivalent to set; the trigger inherently
|
||||
# loads the workflow from the default branch.
|
||||
#
|
||||
# Rate-limit: a 1s pre-sleep + a "skip if status posted in last 30s"
|
||||
# guard prevents comment-spam from thrashing the status. See the script.
|
||||
|
||||
name: sop-tier-check refire (manual)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
refire:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Explain supported refire path
|
||||
run: |
|
||||
echo "::error::Gitea 1.22.6 does not support workflow_dispatch inputs here; comment /refire-tier-check on the PR instead."
|
||||
exit 1
|
||||
@@ -112,9 +112,9 @@ jobs:
|
||||
E2E_RUNTIME: claude-code
|
||||
# Pin the smoke to a specific MiniMax model rather than relying
|
||||
# on the per-runtime default (which could resolve to "sonnet" →
|
||||
# direct Anthropic and defeat the cost saving). MiniMax-M2.7 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
|
||||
E2E_MODEL_SLUG: MiniMax-M2.7
|
||||
# direct Anthropic and defeat the cost saving). MiniMax-M2 is the
|
||||
# stable staging MiniMax path used by the full-SaaS smoke.
|
||||
E2E_MODEL_SLUG: MiniMax-M2
|
||||
E2E_RUN_ID: "smoke-${{ github.run_id }}"
|
||||
# Debug-only: when an operator dispatches with keep_on_failure=true,
|
||||
# the smoke script's E2E_KEEP_ORG=1 path skips teardown so the
|
||||
|
||||
@@ -34,10 +34,8 @@ name: Sweep stale Cloudflare DNS records
|
||||
# scripts/ops/test_sweep_cf_decide.py (#2027) cover the rule
|
||||
# classifier.
|
||||
#
|
||||
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
|
||||
# (operator-host canonical name) are accepted — the workflow falls back
|
||||
# automatically. Same for CF_ZONE_ID / CLOUDFLARE_ZONE_ID. Confirmed
|
||||
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
|
||||
# Secrets: CF_API_TOKEN, CF_ZONE_ID, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
|
||||
# are confirmed existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
|
||||
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
|
||||
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
|
||||
|
||||
@@ -81,8 +79,8 @@ jobs:
|
||||
# each individually capped at 10s by the script's curl -m flag.
|
||||
timeout-minutes: 3
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
|
||||
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID || secrets.CLOUDFLARE_ZONE_ID }}
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
@@ -131,7 +129,6 @@ jobs:
|
||||
fi
|
||||
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
|
||||
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
|
||||
echo "::error::Cloudflare secrets accept either the CI-scoped name (CF_API_TOKEN / CF_ZONE_ID) or the operator-host canonical name (CLOUDFLARE_API_TOKEN / CLOUDFLARE_ZONE_ID)."
|
||||
echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -29,12 +29,10 @@ name: Sweep stale Cloudflare Tunnels
|
||||
# the DNS sweep's 50% because tenant-shaped tunnels are mostly
|
||||
# orphans by design) refuses to nuke past the threshold.
|
||||
#
|
||||
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
|
||||
# (operator-host canonical name) are accepted — the workflow falls back
|
||||
# automatically. Same for CF_ACCOUNT_ID / CLOUDFLARE_ACCOUNT_ID. Confirmed
|
||||
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
|
||||
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
|
||||
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
|
||||
# Secrets: CF_API_TOKEN, CF_ACCOUNT_ID are confirmed existing per
|
||||
# issue #425 §425 audit. CP_ADMIN_API_TOKEN and CP_STAGING_ADMIN_API_TOKEN
|
||||
# are unconfirmed — if missing, the verify step (schedule → hard-fail,
|
||||
# dispatch → soft-skip) surfaces it clearly.
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -76,8 +74,8 @@ jobs:
|
||||
# the sweep-cf-orphans companion job).
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
|
||||
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID || secrets.CLOUDFLARE_ACCOUNT_ID }}
|
||||
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
|
||||
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
|
||||
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
|
||||
|
||||
@@ -58,51 +58,22 @@ jobs:
|
||||
python-version: '3.11'
|
||||
- name: Install .gitea script test dependencies
|
||||
run: python -m pip install --quiet 'pytest==9.0.2' 'PyYAML==6.0.2'
|
||||
- name: Run scripts/ unittests (fail-closed on 0 collected)
|
||||
- name: Run scripts/ unittests, if any
|
||||
# Top-level scripts/ tests live alongside their target file. The
|
||||
# runtime packaging tests moved to molecule-ai-workspace-runtime, so
|
||||
# this pass may legitimately find NO test files today.
|
||||
#
|
||||
# Gate-integrity fix: the previous guard keyed off `rc==5` to detect
|
||||
# "no tests collected", but Python 3.12's unittest exits 0 (not 5)
|
||||
# when discovery finds 0 tests ("NO TESTS RAN"). The guard therefore
|
||||
# never fired, so any test_*.py added here would silently run 0 tests
|
||||
# while this step stayed GREEN. A green step that runs 0 tests is
|
||||
# worse than a red one. We now fail-closed:
|
||||
# - genuinely NO test_*.py present -> loud SKIP (legitimate no-op)
|
||||
# - test_*.py present but 0 collected -> FAIL (broken import/empty)
|
||||
# this pass may legitimately find no tests.
|
||||
working-directory: scripts
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Non-recursive count: scripts/ has no __init__.py, so unittest
|
||||
# discover does not recurse into subdirs (ops/ is run separately
|
||||
# below) — top-level files are the entire discovery scope here.
|
||||
nfiles=$(find . -maxdepth 1 -name 'test_*.py' | wc -l | tr -d ' ')
|
||||
if [ "$nfiles" -eq 0 ]; then
|
||||
echo "SKIP: no top-level scripts/ test_*.py files present (genuine no-op)."
|
||||
set +e
|
||||
python -m unittest discover -t . -p 'test_*.py' -v
|
||||
rc=$?
|
||||
if [ "$rc" -eq 5 ]; then
|
||||
echo "No top-level scripts/ unittest files found; skipping."
|
||||
exit 0
|
||||
fi
|
||||
echo "Found $nfiles top-level scripts/ test_*.py file(s); asserting they collect >0 tests."
|
||||
ncollected=$(python -c "import unittest; print(unittest.TestLoader().discover('.', pattern='test_*.py', top_level_dir='.').countTestCases())")
|
||||
echo "Collected $ncollected test case(s)."
|
||||
if [ "$ncollected" -eq 0 ]; then
|
||||
echo "FAIL: test_*.py file(s) present but 0 tests collected (broken import / empty file / discovery error)."
|
||||
exit 1
|
||||
fi
|
||||
python -m unittest discover -t . -p 'test_*.py' -v
|
||||
exit "$rc"
|
||||
- name: Run scripts/ops/ unittests (sweep_cf_decide, ...)
|
||||
# Real gate: scripts/ops/ must always run tests. Assert >0 collected so
|
||||
# deleting all test files (or breaking an import) can't pass GREEN by
|
||||
# running 0 tests — same gate-integrity class as the scripts/ step.
|
||||
working-directory: scripts/ops
|
||||
run: |
|
||||
set -euo pipefail
|
||||
ncollected=$(python -c "import unittest; print(unittest.TestLoader().discover('.', pattern='test_*.py').countTestCases())")
|
||||
echo "scripts/ops/ collected $ncollected test case(s)."
|
||||
if [ "$ncollected" -eq 0 ]; then
|
||||
echo "FAIL: scripts/ops/ collected 0 tests — this gate must run real tests (deleted/broken import?)."
|
||||
exit 1
|
||||
fi
|
||||
python -m unittest discover -p 'test_*.py' -v
|
||||
run: python -m unittest discover -p 'test_*.py' -v
|
||||
- name: Run .gitea/scripts pytest suite
|
||||
run: python -m pytest .gitea/scripts/tests -q
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
# umbrella-reaper — auto-recovery for stale CI umbrella statuses on open PRs.
|
||||
#
|
||||
# Tracking: molecule-core#1780.
|
||||
#
|
||||
# Problem: when `CI / all-required (pull_request)` reports failure due to
|
||||
# a propagation/timing race despite all required sub-jobs being success,
|
||||
# branch protection blocks the merge. Operators currently recover manually
|
||||
# per docs/runbooks/ci-umbrella-stale-compensating-status.md.
|
||||
#
|
||||
# This workflow automates that recovery: it scans open PRs and posts a
|
||||
# compensating success status when the umbrella is stale but all sub-jobs
|
||||
# are verified green.
|
||||
#
|
||||
# Trust boundary: the script only reads PR lists + statuses and POSTs to
|
||||
# /statuses/{sha}. It never checks out PR HEAD code. The Gitea token has
|
||||
# write:repository scope for statuses only.
|
||||
#
|
||||
# Sibling: .gitea/workflows/status-reaper.yml (default-branch push-suffix
|
||||
# compensation). Same persona provisioning model.
|
||||
|
||||
name: umbrella-reaper
|
||||
|
||||
# IMPORTANT — Schedule moved to operator-config:
|
||||
# /etc/cron.d/molecule-core-umbrella-reaper ->
|
||||
# /usr/local/bin/molecule-core-cron-bot.sh umbrella-reaper
|
||||
#
|
||||
# This keeps the compensation cadence but stops a maintenance bot from
|
||||
# consuming Gitea Actions runner slots during PR merge waves.
|
||||
# Gitea 1.22.6 parser quirk per
|
||||
# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an
|
||||
# `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as
|
||||
# "unknown on type" when `workflow_dispatch.inputs.X` is present.
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# NOTE: NO `concurrency:` block is intentional — same reasoning as
|
||||
# status-reaper.yml. Gitea 1.22.6 doesn't honor cancel-in-progress for
|
||||
# queued ticks; the POST is idempotent so concurrent ticks are safe.
|
||||
|
||||
jobs:
|
||||
reap:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 8
|
||||
steps:
|
||||
- name: Check out repo at default-branch HEAD
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install PyYAML
|
||||
run: python -m pip install --quiet 'PyYAML==6.0.2'
|
||||
|
||||
- name: Compensate stale PR umbrella statuses
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.UMBRELLA_REAPER_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_LIMIT: "50"
|
||||
run: python3 .gitea/scripts/umbrella-reaper.py
|
||||
@@ -26,7 +26,7 @@ name: verify-providers-gen
|
||||
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
|
||||
# from branch protection (turning it into a hard merge gate has blast radius
|
||||
# — operator GO required, same pattern as sop-checklist / verify-providers-gen
|
||||
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
|
||||
# on controlplane). Promote it into branch protection in a follow-up once
|
||||
# P2 has soaked.
|
||||
# Until then it behaves like secret-scan / block-internal-paths: a standalone
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
# use this Makefile; CI calls docker compose / go test directly so the
|
||||
# Makefile can evolve without breaking the build.
|
||||
|
||||
.PHONY: help dev up down logs build test e2e-peer-visibility e2e-concierge-creates-workspace openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
|
||||
.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
|
||||
|
||||
# ─── Provider-registry SSOT codegen (internal#718) ─────────────────────
|
||||
# The Go module lives in workspace-server/. The checked-in artifact
|
||||
@@ -57,16 +57,6 @@ test: ## Run Go unit tests in workspace-server/.
|
||||
e2e-peer-visibility: ## Run the LOCAL peer-visibility MCP gate vs the running stack (needs `make up` first).
|
||||
bash tests/e2e/test_peer_visibility_mcp_local.sh
|
||||
|
||||
# FUNCTIONAL local proof that the org concierge actually DOES org-management:
|
||||
# send it a natural-language A2A request and assert it really CREATES a workspace
|
||||
# via its platform MCP (create_workspace) — the deterministic side effect, not a
|
||||
# REST 200. SKIPs LOUD (exit 0) unless the local concierge is seeded, online, and
|
||||
# running on the platform-agent image (so create_workspace exists). To run it
|
||||
# green locally: seed the concierge (MOLECULE_SEED_PLATFORM_AGENT=1) on the
|
||||
# platform-agent image WITH a model key. See the script header for the contract.
|
||||
e2e-concierge-creates-workspace: ## Prove the concierge actually creates a workspace via its platform MCP (skips loud if not runnable).
|
||||
bash tests/e2e/test_concierge_creates_workspace_local.sh
|
||||
|
||||
# ─── OpenAPI spec generation (RFC #1706, Phase 1) ─────────────────────
|
||||
# Regenerate workspace-server/docs/openapi/swagger.{yaml,json} from
|
||||
# swaggo annotations on the gin handlers. Commit the output. CI runs
|
||||
|
||||
@@ -1,14 +1,7 @@
|
||||
import { test, expect } from "@playwright/test";
|
||||
import type { Page } from "@playwright/test";
|
||||
import { startEchoRuntime } from "./fixtures/echo-runtime";
|
||||
import { seedWorkspace, startHeartbeat, cleanupWorkspace } from "./fixtures/chat-seed";
|
||||
|
||||
/** Enter the Org-map view so the Canvas (React Flow graph) mounts. */
|
||||
async function enterMapView(page: Page): Promise<void> {
|
||||
const btn = page.getByTestId("nav-map");
|
||||
await expect(btn, "rail button nav-map missing").toBeVisible({ timeout: 10_000 });
|
||||
await btn.click();
|
||||
}
|
||||
|
||||
test.describe("Desktop ChatTab", () => {
|
||||
let cleanup: () => Promise<void> = async () => {};
|
||||
@@ -36,7 +29,6 @@ test.describe("Desktop ChatTab", () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.setViewportSize({ width: 1280, height: 800 });
|
||||
await page.goto("/");
|
||||
await enterMapView(page);
|
||||
await page.waitForSelector(".react-flow__node", { timeout: 10_000 });
|
||||
// Dismiss onboarding guide if present.
|
||||
const skipGuide = page.getByText("Skip guide");
|
||||
@@ -75,7 +67,6 @@ test.describe("Desktop ChatTab", () => {
|
||||
await expect(page.getByText("Echo: Persistence test")).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
await page.reload();
|
||||
await enterMapView(page);
|
||||
await page.waitForSelector(".react-flow__node", { timeout: 10_000 });
|
||||
await page.getByText(workspaceName, { exact: true }).first().click();
|
||||
await page.locator('#tab-chat').click();
|
||||
@@ -152,7 +143,6 @@ test.describe("Desktop ChatTab — Markdown rendering", () => {
|
||||
test.beforeEach(async ({ page }) => {
|
||||
await page.setViewportSize({ width: 1280, height: 800 });
|
||||
await page.goto("/");
|
||||
await enterMapView(page);
|
||||
await page.waitForSelector(".react-flow__node", { timeout: 10_000 });
|
||||
const skipGuide2 = page.getByText("Skip guide");
|
||||
if (await skipGuide2.isVisible().catch(() => false)) {
|
||||
|
||||
@@ -27,13 +27,9 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
|
||||
// 1. Create external workspace pointing at the in-process echo runtime.
|
||||
const runId = Math.random().toString(36).slice(2, 8);
|
||||
const wsName = `Chat E2E Agent ${runId}`;
|
||||
const adminToken = process.env.E2E_ADMIN_TOKEN ?? process.env.ADMIN_TOKEN;
|
||||
const createRes = await fetch(`${PLATFORM_URL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(adminToken ? { Authorization: `Bearer ${adminToken}` } : {}),
|
||||
},
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
name: wsName,
|
||||
tier: 1,
|
||||
|
||||
@@ -1,648 +0,0 @@
|
||||
/**
|
||||
* Staging concierge canvas E2E — exercises the platform-agent CONCIERGE shell
|
||||
* (canvas/src/components/concierge/ConciergeShell.tsx and the Settings split)
|
||||
* against a fresh staging org provisioned by the shared global setup
|
||||
* (e2e/staging-setup.ts). Each `test.describe` covers ONE concierge function
|
||||
* and asserts the behaviour works — not merely that an element exists.
|
||||
*
|
||||
* Why this is a SEPARATE spec from staging-tabs.spec.ts (which drives the
|
||||
* Org-map SidePanel tab UI): the two assert different surfaces of the same
|
||||
* tenant. Both reuse the EXACT shared harness — same global setup (one
|
||||
* provisioned org/workspace), same Playwright staging config (matched by the
|
||||
* `staging-*.spec.ts` testMatch), same gated `Canvas tabs E2E` workflow check.
|
||||
* No new harness, no new seeding mechanism.
|
||||
*
|
||||
* One extra precondition this spec needs that staging-tabs does NOT: a
|
||||
* kind='platform' concierge ROW. The CI/SaaS tenant does not self-seed one
|
||||
* (MOLECULE_SEED_PLATFORM_AGENT is unset on CI — workspace-server
|
||||
* cmd/server/main.go), so without it the concierge shell falls back to
|
||||
* roots[0] as a *pseudo*-platform surface and the platform-specific
|
||||
* behaviours (root tag, hidden-from-map) can't be asserted. So this spec
|
||||
* installs one via the SAME admin endpoint the control plane uses at
|
||||
* org-provision time — POST /admin/org/platform-agent (AdminAuth, accepts the
|
||||
* per-tenant admin bearer that global setup already exports). Installing it
|
||||
* re-parents the provisioned hermes workspace UNDER the platform agent
|
||||
* (handlers/platform_agent.go installPlatformAgent), giving us a real
|
||||
* platform ROOT + a real child workspace — exactly the topology the concierge
|
||||
* Home tree and Org-map filter are built to handle.
|
||||
*
|
||||
* This install mutates the shared tenant (re-parents the workspace). It is the
|
||||
* LAST staging spec alphabetically among the topology-touching ones, and
|
||||
* staging-tabs / staging-display read the workspace by id (not by root-ness),
|
||||
* so the re-parent does not break them; Playwright runs workers=1 in file
|
||||
* order, and the install is idempotent.
|
||||
*
|
||||
* Auth model is identical to staging-tabs.spec.ts: feed the per-tenant admin
|
||||
* token as an Authorization: Bearer header on every browser request, mock
|
||||
* /cp/auth/me so AuthGate resolves, and fall any non-auth 401 back to an
|
||||
* empty 200 so a workspace-scoped 401 can't yank us to AuthKit.
|
||||
*/
|
||||
|
||||
import { test, expect, type Page, type BrowserContext } from "@playwright/test";
|
||||
|
||||
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
|
||||
|
||||
// Fail-closed, not skip-green (mirrors staging-tabs.spec.ts): a staging run
|
||||
// that was REQUESTED (CANVAS_E2E_STAGING=1) but has no tenant state is a
|
||||
// provisioning failure, asserted loudly inside the test body — not a skip.
|
||||
// CANVAS_E2E_STAGING unset = operator did not request staging = clean skip.
|
||||
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — staging-only suite, not requested");
|
||||
|
||||
/** Resolve + validate the tenant handoff that global setup exported. */
|
||||
function tenantEnv() {
|
||||
const tenantURL = process.env.STAGING_TENANT_URL;
|
||||
const tenantToken = process.env.STAGING_TENANT_TOKEN;
|
||||
const workspaceId = process.env.STAGING_WORKSPACE_ID;
|
||||
const orgID = process.env.STAGING_ORG_ID;
|
||||
if (!tenantURL || !tenantToken || !workspaceId) {
|
||||
throw new Error(
|
||||
"staging-setup.ts did not export STAGING_TENANT_URL / " +
|
||||
"STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID. CANVAS_E2E_STAGING=1 was " +
|
||||
"set (staging WAS requested) but global setup produced no tenant — a " +
|
||||
"provisioning failure, NOT a reason to skip. See the [staging-setup] " +
|
||||
"log above.",
|
||||
);
|
||||
}
|
||||
return { tenantURL, tenantToken, workspaceId, orgID };
|
||||
}
|
||||
|
||||
// A fixed, valid uuid for the installed platform agent. Any valid uuid works
|
||||
// (the install upserts on this id); reusing one constant keeps re-runs
|
||||
// idempotent on the same row. Chosen out of the e2e namespace so it can't
|
||||
// collide with a CP-derived org id.
|
||||
const PLATFORM_AGENT_ID = "e2e0c1e2-0000-4000-a000-000000c0ce0e";
|
||||
const PLATFORM_AGENT_NAME = "E2E Concierge";
|
||||
|
||||
/**
|
||||
* Idempotently install the platform-agent (concierge) row on the shared
|
||||
* tenant so the concierge shell resolves a REAL kind='platform' root. Uses
|
||||
* the per-tenant admin bearer + org-id headers, same as staging-display.spec.
|
||||
* Tolerant of a pre-existing install (the endpoint is idempotent) and of a
|
||||
* backend that predates the endpoint (404/405) — in that degraded case the
|
||||
* spec proceeds against the roots[0] fallback and the two platform-specific
|
||||
* assertions self-document why they're loosened.
|
||||
*/
|
||||
async function installPlatformAgent(
|
||||
page: Page,
|
||||
tenantURL: string,
|
||||
tenantToken: string,
|
||||
orgID: string | undefined,
|
||||
): Promise<{ installed: boolean }> {
|
||||
const headers: Record<string, string> = {
|
||||
Authorization: `Bearer ${tenantToken}`,
|
||||
"Content-Type": "application/json",
|
||||
};
|
||||
if (orgID) headers["X-Molecule-Org-Id"] = orgID;
|
||||
const resp = await page.request.post(`${tenantURL}/admin/org/platform-agent`, {
|
||||
headers,
|
||||
data: { id: PLATFORM_AGENT_ID, name: PLATFORM_AGENT_NAME },
|
||||
});
|
||||
const status = resp.status();
|
||||
if (status >= 200 && status < 300) {
|
||||
console.log(`[staging-concierge] platform agent installed (HTTP ${status})`);
|
||||
return { installed: true };
|
||||
}
|
||||
// Endpoint absent on an older backend — proceed against the fallback root.
|
||||
if (status === 404 || status === 405) {
|
||||
console.warn(
|
||||
`[staging-concierge] POST /admin/org/platform-agent returned ${status} — ` +
|
||||
`backend predates the platform-agent endpoint. Proceeding against the ` +
|
||||
`roots[0] concierge fallback; the platform-root / map-hidden assertions ` +
|
||||
`are loosened accordingly.`,
|
||||
);
|
||||
return { installed: false };
|
||||
}
|
||||
throw new Error(
|
||||
`POST /admin/org/platform-agent ${status}: ${await resp.text().catch(() => "")}`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wire the per-tenant bearer + the /cp/auth/me mock + the 401→empty-200
|
||||
* fallback. Verbatim contract from staging-tabs.spec.ts so the concierge spec
|
||||
* authenticates identically (no WorkOS session available to Playwright).
|
||||
*/
|
||||
async function authenticate(
|
||||
context: BrowserContext,
|
||||
tenantToken: string,
|
||||
workspaceId: string,
|
||||
): Promise<void> {
|
||||
await context.setExtraHTTPHeaders({ Authorization: `Bearer ${tenantToken}` });
|
||||
|
||||
await context.route("**/cp/auth/me", (route) =>
|
||||
route.fulfill({
|
||||
status: 200,
|
||||
contentType: "application/json",
|
||||
body: JSON.stringify({
|
||||
user_id: `e2e-test-user-${workspaceId}`,
|
||||
org_id: "e2e-test-org",
|
||||
email: "e2e@test.local",
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
await context.route("**", async (route, request) => {
|
||||
if (request.resourceType() !== "fetch") return route.fallback();
|
||||
if (request.url().includes("/cp/auth/me")) return route.fallback();
|
||||
let resp;
|
||||
try {
|
||||
resp = await route.fetch();
|
||||
} catch {
|
||||
return route.fallback();
|
||||
}
|
||||
if (resp.status() !== 401) return route.fulfill({ response: resp });
|
||||
const lastSeg =
|
||||
new URL(request.url()).pathname.split("/").filter(Boolean).pop() || "";
|
||||
const looksLikeList = !/^[0-9a-f-]{8,}$/.test(lastSeg);
|
||||
await route.fulfill({
|
||||
status: 200,
|
||||
contentType: "application/json",
|
||||
body: looksLikeList ? "[]" : "{}",
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the concierge shell and wait for hydration. Returns once the icon rail
|
||||
* (the concierge's left nav) is visible — the rail is the shell's outermost
|
||||
* stable landmark and only renders after the canvas store has hydrated.
|
||||
*/
|
||||
async function loadConcierge(page: Page, tenantURL: string): Promise<void> {
|
||||
page.on("console", (msg) => {
|
||||
if (msg.type() === "error") console.log(`[e2e/console-error] ${msg.text()}`);
|
||||
});
|
||||
await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
|
||||
|
||||
// The canvas store hydrates /workspaces before the desktop shell paints.
|
||||
// Wait for the concierge nav rail OR the hydration-error banner — whichever
|
||||
// wins. Don't wait on networkidle: the shell keeps a WS + polling open.
|
||||
await page.waitForSelector(
|
||||
'[data-testid="nav-home"], [data-testid="hydration-error"]',
|
||||
{ timeout: 45_000 },
|
||||
);
|
||||
const hydrationErr = await page
|
||||
.locator('[data-testid="hydration-error"]')
|
||||
.count();
|
||||
expect(
|
||||
hydrationErr,
|
||||
"canvas hydration failed — check staging CP + tenant reachability",
|
||||
).toBe(0);
|
||||
await expect(
|
||||
page.getByText("Something went wrong", { exact: false }),
|
||||
"app-level ErrorBoundary tripped during concierge hydration",
|
||||
).toHaveCount(0);
|
||||
}
|
||||
|
||||
/** Switch the concierge top-level view via the left rail. */
|
||||
async function navTo(page: Page, view: "home" | "map" | "settings"): Promise<void> {
|
||||
const btn = page.getByTestId(`nav-${view}`);
|
||||
await expect(btn, `rail button nav-${view} missing`).toBeVisible({ timeout: 10_000 });
|
||||
await btn.click();
|
||||
}
|
||||
|
||||
// ── shared per-spec setup ──────────────────────────────────────────────────
|
||||
// Each test gets a freshly-authenticated context + an installed platform
|
||||
// agent. Install lives in beforeEach (idempotent) so any single test can run
|
||||
// in isolation (`--grep`), not only in whole-file order.
|
||||
let platformInstalled = false;
|
||||
|
||||
test.beforeEach(async ({ page, context }) => {
|
||||
const { tenantURL, tenantToken, workspaceId, orgID } = tenantEnv();
|
||||
await authenticate(context, tenantToken, workspaceId);
|
||||
const { installed } = await installPlatformAgent(page, tenantURL, tenantToken, orgID);
|
||||
platformInstalled = installed;
|
||||
});
|
||||
|
||||
/* ───────────────────────── 1. Concierge shell / nav ──────────────────────── */
|
||||
test.describe("concierge shell + nav", () => {
|
||||
test("left rail switches Home / Org map / Settings; topbar shows the org name", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
|
||||
// All three rail destinations are present.
|
||||
for (const v of ["home", "map", "settings"] as const) {
|
||||
await expect(page.getByTestId(`nav-${v}`)).toBeVisible();
|
||||
}
|
||||
|
||||
// Topbar org name is dynamic from GET /org/identity. The endpoint returns
|
||||
// MOLECULE_ORG_NAME (may be "" on a staging tenant), in which case the
|
||||
// shell falls back to "Molecule AI". Either way it must render a
|
||||
// non-empty name — assert the element resolves to real text.
|
||||
const orgName = page.getByTestId("topbar-org-name");
|
||||
await expect(orgName).toBeVisible();
|
||||
await expect
|
||||
.poll(async () => ((await orgName.innerText()) || "").trim().length, {
|
||||
message: "topbar org name never resolved to non-empty text",
|
||||
timeout: 10_000,
|
||||
})
|
||||
.toBeGreaterThan(0);
|
||||
|
||||
// Nav actually switches the active view. Home → Settings → Map → Home,
|
||||
// asserting the destination rail button reflects active state each hop
|
||||
// (the shell toggles the active class; we assert the view content too).
|
||||
await navTo(page, "settings");
|
||||
await expect(page.getByRole("heading", { name: "Settings" })).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
await navTo(page, "map");
|
||||
await expect(page.locator('[aria-label="Agent canvas"]')).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
|
||||
await navTo(page, "home");
|
||||
// Home shows the agents/tasks/approvals sub-tab bar.
|
||||
await expect(page.getByTestId("home-subtab-agents")).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
/* ─────────────────────────────── 2. Home ─────────────────────────────────── */
|
||||
test.describe("concierge Home", () => {
|
||||
test("renders the canonical ChatTab, Agents/Tasks/Approvals sub-tabs, and the platform agent as ROOT", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
await navTo(page, "home");
|
||||
|
||||
// (a) The Home chat panel reuses the EXACT canonical ChatTab — so it must
|
||||
// expose the My Chat / Agent Comms sub-tabs, a message input, and the
|
||||
// attachment affordance, exactly like the map SidePanel chat. The
|
||||
// [data-testid="chat-panel"] root is ChatTab's own marker (canvas/src/
|
||||
// components/tabs/ChatTab.tsx) — asserting it proves the canonical
|
||||
// component is mounted, not a bespoke concierge re-implementation.
|
||||
const chatPanel = page.getByTestId("chat-panel");
|
||||
await expect(chatPanel, "Home did not mount the canonical ChatTab").toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
await expect(chatPanel.locator("#chat-tab-my-chat")).toHaveText(/My Chat/);
|
||||
await expect(chatPanel.locator("#chat-tab-agent-comms")).toHaveText(/Agent Comms/);
|
||||
// Switching the chat sub-tab works (My Chat active by default → Agent Comms).
|
||||
await chatPanel.locator("#chat-tab-agent-comms").click();
|
||||
await expect(chatPanel.locator("#chat-tab-agent-comms")).toHaveAttribute(
|
||||
"aria-selected",
|
||||
"true",
|
||||
);
|
||||
await chatPanel.locator("#chat-tab-my-chat").click();
|
||||
await expect(chatPanel.locator("#chat-tab-my-chat")).toHaveAttribute(
|
||||
"aria-selected",
|
||||
"true",
|
||||
);
|
||||
// Message input + attachment affordance (My Chat panel). The attach
|
||||
// control is the labelled button (the underlying <input type=file> is
|
||||
// aria-hidden); both are always present (disabled when the agent is
|
||||
// unreachable), so assert presence, not enabled-state.
|
||||
await expect(
|
||||
chatPanel.locator('textarea[aria-label="Message to agent"]'),
|
||||
"ChatTab message input missing",
|
||||
).toHaveCount(1);
|
||||
await expect(
|
||||
chatPanel.locator('button[aria-label="Attach file"]'),
|
||||
"ChatTab attachment affordance missing",
|
||||
).toHaveCount(1);
|
||||
|
||||
// (b) Agents / Tasks / Approvals sub-tabs switch the Home sidebar pane.
|
||||
await page.getByTestId("home-subtab-tasks").click();
|
||||
await expect(page.getByTestId("home-subtab-tasks")).toHaveClass(/active/);
|
||||
await page.getByTestId("home-subtab-approvals").click();
|
||||
await expect(page.getByTestId("home-subtab-approvals")).toHaveClass(/active/);
|
||||
await page.getByTestId("home-subtab-agents").click();
|
||||
await expect(page.getByTestId("home-subtab-agents")).toHaveClass(/active/);
|
||||
|
||||
// (c) The agent tree shows the platform agent as ROOT. After install the
|
||||
// platform agent is a kind='platform' root carrying the "root" tag, with
|
||||
// the provisioned workspace re-parented under it (depth>0). When the
|
||||
// backend predates the install endpoint, roots[0] is the pseudo-root and
|
||||
// the "root" tag is absent (it only renders for a real kind='platform'
|
||||
// root) — so we gate the strong assertion on a successful install.
|
||||
const tree = page.getByTestId("agent-tree-node");
|
||||
await expect(tree.first(), "agent tree rendered no nodes").toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
if (platformInstalled) {
|
||||
// The depth-0 node is the platform agent and it carries the root tag.
|
||||
const rootNode = page
|
||||
.locator('[data-testid="agent-tree-node"][data-depth="0"]')
|
||||
.first();
|
||||
await expect(rootNode).toHaveAttribute("data-platform", "true");
|
||||
await expect(
|
||||
rootNode.locator('[data-testid="agent-tree-root-tag"]'),
|
||||
"platform root is missing the ROOT tag",
|
||||
).toBeVisible();
|
||||
// And the provisioned workspace is nested beneath it (a child node exists).
|
||||
await expect(
|
||||
page.locator('[data-testid="agent-tree-node"][data-depth="1"]'),
|
||||
"the provisioned workspace did not re-parent under the platform root",
|
||||
).toHaveCount(1, { timeout: 10_000 });
|
||||
} else {
|
||||
// Degraded backend: at least the tree renders a root-level node.
|
||||
await expect(
|
||||
page.locator('[data-testid="agent-tree-node"][data-depth="0"]'),
|
||||
).not.toHaveCount(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/* ─────────────────────────────── 3. Org map ──────────────────────────────── */
|
||||
test.describe("concierge Org map", () => {
|
||||
test("hides the platform agent from the node graph; normal workspaces render", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
await navTo(page, "map");
|
||||
|
||||
// The React Flow canvas renders.
|
||||
await expect(page.locator('[aria-label="Molecule AI workspace canvas"]')).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
|
||||
// Normal workspaces render as map node cards (WorkspaceNode →
|
||||
// data-testid="workspace-node"). The provisioned hermes workspace must
|
||||
// appear. expect.poll lets React Flow finish its layout pass.
|
||||
await expect
|
||||
.poll(async () => page.locator('[data-testid="workspace-node"]').count(), {
|
||||
message: "no workspace nodes rendered on the org map",
|
||||
timeout: 15_000,
|
||||
})
|
||||
.toBeGreaterThan(0);
|
||||
|
||||
// The concierge (platform agent) is HIDDEN from the graph: no map node
|
||||
// carries its name. WorkspaceNode's aria-label is "<name> workspace —
|
||||
// <status>" — assert none matches the platform agent name. This is the
|
||||
// real behaviour stripPlatformRootForMap implements (Canvas.tsx /
|
||||
// canvas-topology.ts). Only meaningful when we actually installed one.
|
||||
if (platformInstalled) {
|
||||
const platformNode = page.locator(
|
||||
`[data-testid="workspace-node"][aria-label^="${PLATFORM_AGENT_NAME} workspace"]`,
|
||||
);
|
||||
await expect(
|
||||
platformNode,
|
||||
"the platform agent (concierge) leaked into the org-map node graph — " +
|
||||
"stripPlatformRootForMap should exclude it",
|
||||
).toHaveCount(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
/* ─────────────────────── 4. Settings — two tabs ──────────────────────────── */
|
||||
test.describe("concierge Settings — two tabs", () => {
|
||||
test("Platform-agent config and Org & canvas settings are separate panes; platform tab shows the full WorkspacePanelTabs defaulting to Config", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
await navTo(page, "settings");
|
||||
|
||||
const platformTab = page.getByTestId("settings-tab-platform");
|
||||
const orgTab = page.getByTestId("settings-tab-org");
|
||||
await expect(platformTab).toBeVisible({ timeout: 10_000 });
|
||||
await expect(orgTab).toBeVisible();
|
||||
|
||||
// Platform tab is the default; its pane is shown and the org pane is not.
|
||||
await expect(platformTab).toHaveAttribute("aria-selected", "true");
|
||||
await expect(page.getByTestId("settings-pane-platform")).toBeVisible();
|
||||
await expect(page.getByTestId("settings-pane-org")).toHaveCount(0);
|
||||
|
||||
// The platform pane embeds the FULL WorkspacePanelTabs (the SAME tablist
|
||||
// the map SidePanel renders) and defaults to the Config tab. Assert the
|
||||
// canonical workspace tablist is present, that Config is the active tab,
|
||||
// and that the other signature tabs exist (Plugins, Container, Display,
|
||||
// Details, Activity, Terminal, Channels, Schedule).
|
||||
const wsTablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
|
||||
await expect(
|
||||
wsTablist,
|
||||
"platform-agent Settings tab did not embed WorkspacePanelTabs",
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
await expect(page.locator("#tab-config")).toHaveAttribute(
|
||||
"aria-selected",
|
||||
"true",
|
||||
);
|
||||
for (const id of [
|
||||
"config",
|
||||
"skills",
|
||||
"container-config",
|
||||
"display",
|
||||
"details",
|
||||
"activity",
|
||||
"terminal",
|
||||
"channels",
|
||||
"schedule",
|
||||
]) {
|
||||
await expect(
|
||||
page.locator(`#tab-${id}`),
|
||||
`WorkspacePanelTabs is missing #tab-${id}`,
|
||||
).toHaveCount(1);
|
||||
}
|
||||
|
||||
// Clicking the OTHER settings tab switches panes (not just toggles a
|
||||
// class): the org pane mounts and the platform pane unmounts.
|
||||
await orgTab.click();
|
||||
await expect(orgTab).toHaveAttribute("aria-selected", "true");
|
||||
await expect(page.getByTestId("settings-pane-org")).toBeVisible();
|
||||
await expect(page.getByTestId("settings-pane-platform")).toHaveCount(0);
|
||||
|
||||
// And back.
|
||||
await platformTab.click();
|
||||
await expect(page.getByTestId("settings-pane-platform")).toBeVisible();
|
||||
await expect(page.getByTestId("settings-pane-org")).toHaveCount(0);
|
||||
});
|
||||
});
|
||||
|
||||
/* ─────────────────────── 5. Settings — Config tab ────────────────────────── */
|
||||
test.describe("concierge Settings — Config tab dropdowns", () => {
|
||||
test("runtime dropdown is SSOT-driven; provider hides Platform on self-host but lists BYOK; model follows provider", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
await navTo(page, "settings");
|
||||
|
||||
// Platform tab defaults to the Config tab — the runtime select is in the
|
||||
// ConfigTab "Runtime" section (label "Runtime"). Wait for it to settle.
|
||||
await expect(
|
||||
page.getByRole("tablist", { name: "Workspace panel tabs" }),
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
// The runtime <select> sits under the "Runtime" label inside the Config
|
||||
// panel. Use the label association for a stable hook.
|
||||
const runtimeByLabel = page.locator('#panel-config').getByLabel("Runtime", {
|
||||
exact: true,
|
||||
});
|
||||
await expect(
|
||||
runtimeByLabel,
|
||||
"ConfigTab runtime dropdown never rendered",
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
// (a) Runtime dropdown is SSOT-driven: the options come from GET
|
||||
// /templates (loadRuntimesFromManifest), so the live tenant must serve a
|
||||
// non-trivial set. Assert >= 1 runtime option AND that the provisioned
|
||||
// workspace's runtime (hermes) is among them — proving the list reflects
|
||||
// what /templates actually serves, not a stale hard-coded allowlist.
|
||||
const runtimeOptionValues = await runtimeByLabel
|
||||
.locator("option")
|
||||
.evaluateAll((els) => els.map((e) => (e as HTMLOptionElement).value));
|
||||
expect(
|
||||
runtimeOptionValues.length,
|
||||
"runtime dropdown rendered no options — SSOT /templates feed is empty",
|
||||
).toBeGreaterThan(0);
|
||||
expect(
|
||||
runtimeOptionValues,
|
||||
"runtime dropdown does not list the provisioned 'hermes' runtime — the " +
|
||||
"SSOT /templates list has drifted",
|
||||
).toContain("hermes");
|
||||
|
||||
// (b) Provider dropdown: on self-host (no platform proxy) it must NOT
|
||||
// offer the "Platform" billing option but MUST list BYOK providers. The
|
||||
// ProviderModelSelector exposes data-testid="provider-select". Read its
|
||||
// option labels: none should be the "Platform" proxy entry, and the list
|
||||
// must be non-empty (BYOK providers present). /org/identity's
|
||||
// platform_managed_available=false on a staging tenant drives this.
|
||||
const providerSelect = page.getByTestId("provider-select");
|
||||
await expect(
|
||||
providerSelect,
|
||||
"ConfigTab provider dropdown (ProviderModelSelector) never rendered",
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
const providerLabels = await providerSelect
|
||||
.locator("option")
|
||||
.evaluateAll((els) =>
|
||||
els
|
||||
.map((e) => (e.textContent || "").trim())
|
||||
.filter((t) => t && !t.startsWith("—")),
|
||||
);
|
||||
expect(
|
||||
providerLabels.length,
|
||||
"provider dropdown lists no BYOK providers",
|
||||
).toBeGreaterThan(0);
|
||||
expect(
|
||||
providerLabels.map((l) => l.toLowerCase()),
|
||||
'provider dropdown offered the "Platform" proxy option on a self-host / ' +
|
||||
"no-proxy tenant (platform_managed_available should hide it)",
|
||||
).not.toContain("platform");
|
||||
|
||||
// (c) Model dropdown follows the provider. The model control is
|
||||
// data-testid="model-select" (dropdown) or model-input (free-text
|
||||
// wildcard). Whichever renders, it must be present — proving the model
|
||||
// control is wired to the provider selection.
|
||||
const modelControl = page
|
||||
.locator('[data-testid="model-select"], [data-testid="model-input"]')
|
||||
.first();
|
||||
await expect(
|
||||
modelControl,
|
||||
"model control did not follow the provider selection",
|
||||
).toBeVisible({ timeout: 10_000 });
|
||||
});
|
||||
});
|
||||
|
||||
/* ────────────────── 6. Settings — Org & canvas settings ──────────────────── */
|
||||
test.describe("concierge Settings — Org & canvas", () => {
|
||||
test("Secrets / Workspace Tokens / Org API Keys / Organization sub-tabs render; Organization shows the org (no 404)", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
await navTo(page, "settings");
|
||||
|
||||
await page.getByTestId("settings-tab-org").click();
|
||||
const orgPane = page.getByTestId("settings-pane-org");
|
||||
await expect(orgPane).toBeVisible({ timeout: 10_000 });
|
||||
|
||||
// The four SettingsTabs (canvas/src/components/settings/SettingsTabs.tsx)
|
||||
// render as a radix tablist labelled "Settings sections". Assert all four
|
||||
// triggers are present.
|
||||
const settingsTablist = orgPane.getByRole("tablist", {
|
||||
name: "Settings sections",
|
||||
});
|
||||
await expect(settingsTablist).toBeVisible({ timeout: 10_000 });
|
||||
for (const label of [
|
||||
"Secrets",
|
||||
"Workspace Tokens",
|
||||
"Org API Keys",
|
||||
"Organization",
|
||||
]) {
|
||||
await expect(
|
||||
settingsTablist.getByRole("tab", { name: label }),
|
||||
`Org & canvas settings is missing the "${label}" sub-tab`,
|
||||
).toBeVisible();
|
||||
}
|
||||
|
||||
// Click the Organization sub-tab — on self-host the canvas reads
|
||||
// /org/identity (NOT the CP /cp/orgs endpoint), so it must render the org
|
||||
// identity card and NOT a 404 / error state. Assert the pane settles to
|
||||
// real, non-error content.
|
||||
await settingsTablist.getByRole("tab", { name: "Organization" }).click();
|
||||
const orgInfoPanel = orgPane.locator(
|
||||
'[role="tabpanel"]:not([hidden])',
|
||||
);
|
||||
await expect(orgInfoPanel).toBeVisible({ timeout: 10_000 });
|
||||
await expect
|
||||
.poll(
|
||||
async () => {
|
||||
const text = ((await orgInfoPanel.innerText()) || "").trim();
|
||||
return text.length > 0 && !/404|not found/i.test(text);
|
||||
},
|
||||
{
|
||||
message:
|
||||
"Organization sub-tab rendered empty or a 404/not-found — the " +
|
||||
"self-host /org/identity path is broken",
|
||||
timeout: 15_000,
|
||||
},
|
||||
)
|
||||
.toBe(true);
|
||||
// And no visible error alert inside the org settings pane.
|
||||
await expect(orgPane.locator('[role="alert"]:visible')).toHaveCount(0);
|
||||
});
|
||||
});
|
||||
|
||||
/* ───────────────────────────── 7. Map toolbar ────────────────────────────── */
|
||||
test.describe("concierge Org map toolbar", () => {
|
||||
test("settings gear, theme toggle and legend are NOT on the map toolbar (moved to Settings/topbar)", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = tenantEnv();
|
||||
await loadConcierge(page, tenantURL);
|
||||
await navTo(page, "map");
|
||||
await expect(page.locator('[aria-label="Molecule AI workspace canvas"]')).toBeVisible({
|
||||
timeout: 15_000,
|
||||
});
|
||||
|
||||
// The map toolbar no longer carries a settings gear, a theme toggle, or a
|
||||
// legend — those moved to the concierge Settings (left rail) + topbar
|
||||
// (Toolbar.tsx: "Theme picker + settings gear removed from the map
|
||||
// toolbar"). Assert the map view contains none of them.
|
||||
//
|
||||
// Scope to the map mount (<main aria-label="Agent canvas">, ConciergeShell)
|
||||
// so the legitimate left-rail Settings button + the topbar theme toggle
|
||||
// (which live OUTSIDE the map) are not counted.
|
||||
const mapRegion = page.locator('[aria-label="Agent canvas"]');
|
||||
await expect(mapRegion).toBeVisible({ timeout: 10_000 });
|
||||
|
||||
// No settings-gear control inside the map. The old gear used
|
||||
// title="Settings" / aria-label "Settings".
|
||||
await expect(
|
||||
mapRegion.locator('button[title="Settings"], button[aria-label="Settings"]'),
|
||||
"a settings gear is still on the map toolbar (should be moved to Settings)",
|
||||
).toHaveCount(0);
|
||||
|
||||
// No theme toggle inside the map. The toggle's accessible name is
|
||||
// "Toggle theme" — it now lives only in the topbar.
|
||||
await expect(
|
||||
mapRegion.locator('button[title="Toggle theme"], button[aria-label*="theme" i]'),
|
||||
"a theme toggle is still on the map toolbar (should be in the topbar)",
|
||||
).toHaveCount(0);
|
||||
|
||||
// No legend inside the map. The Legend component's controls have accessible
|
||||
// names "Show legend" / "Hide legend" and the panel carries
|
||||
// data-testid="legend-panel" (canvas/src/components/Legend.tsx). It is no
|
||||
// longer mounted in Canvas/Toolbar at all — assert none of its surfaces.
|
||||
await expect(
|
||||
mapRegion.locator(
|
||||
'[data-testid="legend-panel"], button[aria-label="Show legend"], button[aria-label="Hide legend"]',
|
||||
),
|
||||
"a legend is still on the map toolbar (should be removed)",
|
||||
).toHaveCount(0);
|
||||
});
|
||||
});
|
||||
+32
-56
@@ -234,44 +234,30 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
"Authorization": `Bearer ${tenantToken}`,
|
||||
"X-Molecule-Org-Id": orgID,
|
||||
};
|
||||
// Retry workspace creation on transient 5xx / timeout — staging CP can
|
||||
// return 502/503/504 under load and a single-shot failure kills the
|
||||
// entire E2E run. 3 attempts with 3s exponential backoff (3s, 6s, 12s)
|
||||
// gives ~21s total budget, well inside the 20-min provision envelope.
|
||||
let workspaceId = "";
|
||||
for (let attempt = 1; attempt <= 3; attempt++) {
|
||||
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: tenantAuth,
|
||||
body: JSON.stringify({
|
||||
name: "E2E Canvas Test",
|
||||
runtime: "hermes",
|
||||
tier: 2,
|
||||
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
|
||||
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
|
||||
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
|
||||
// platform). The old `gpt-4o` was never a registered hermes model and
|
||||
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
|
||||
// defaults closed to platform_managed (see the boot-shape note below),
|
||||
// so a platform-namespaced model id is the registry-correct choice.
|
||||
model: "moonshot/kimi-k2.6",
|
||||
}),
|
||||
});
|
||||
if (ws.status >= 200 && ws.status < 300 && ws.body?.id) {
|
||||
workspaceId = ws.body.id as string;
|
||||
break;
|
||||
}
|
||||
const isTransient = ws.status >= 500 || ws.status === 0;
|
||||
if (!isTransient || attempt === 3) {
|
||||
throw new Error(`Workspace create ${ws.status} (attempt ${attempt}): ${JSON.stringify(ws.body)}`);
|
||||
}
|
||||
const backoff = 3000 * Math.pow(2, attempt - 1);
|
||||
console.log(`[staging-setup] Workspace create transient ${ws.status}, retrying in ${backoff}ms...`);
|
||||
await new Promise((r) => setTimeout(r, backoff));
|
||||
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
|
||||
method: "POST",
|
||||
headers: tenantAuth,
|
||||
body: JSON.stringify({
|
||||
name: "E2E Canvas Test",
|
||||
runtime: "hermes",
|
||||
tier: 2,
|
||||
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
|
||||
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
|
||||
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
|
||||
// platform). The old `gpt-4o` was never a registered hermes model and
|
||||
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
|
||||
// defaults closed to platform_managed (see the boot-shape note below),
|
||||
// so a platform-namespaced model id is the registry-correct choice.
|
||||
model: "moonshot/kimi-k2.6",
|
||||
}),
|
||||
});
|
||||
if (ws.status >= 400 || !ws.body?.id) {
|
||||
throw new Error(`Workspace create ${ws.status}: ${JSON.stringify(ws.body)}`);
|
||||
}
|
||||
const workspaceId = ws.body.id as string;
|
||||
console.log(`[staging-setup] Workspace created: ${workspaceId}`);
|
||||
|
||||
// 6. Wait for workspace online
|
||||
// 6. Wait for workspace RENDERABLE.
|
||||
//
|
||||
// This harness exists to verify the canvas *tab UI* renders (staging-
|
||||
// tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
|
||||
@@ -280,16 +266,6 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
// it needs is a workspace ROW that the canvas lists so the node renders
|
||||
// and the side-panel tabs open. A fully-`online` agent is NOT required.
|
||||
//
|
||||
// Hermes cold-boot takes 10-13 min on slow apt days (apt + uv + hermes
|
||||
// install + npm browser-tools). The controlplane bootstrap-watcher
|
||||
// deadline fires at 5 min and sets status=failed prematurely; heartbeat
|
||||
// then transitions failed → online after install.sh finishes. The ONLY
|
||||
// failed shape we tolerate is the pre-start credential-abort
|
||||
// (uptime_seconds=0, no last_sample_error) — the agent never ran. Real
|
||||
// boot regressions (image pull error, panic, PYTHONPATH, etc.) still
|
||||
// hard-throw immediately so triage gets detail without waiting for a
|
||||
// polling timeout. See test_staging_full_saas.sh step 7/11 and issue #2632.
|
||||
//
|
||||
// That distinction became load-bearing on 2026-06-03: workspace-server
|
||||
// #2162 (fix(provision): platform-managed workspace must fail-closed when
|
||||
// CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
|
||||
@@ -311,10 +287,8 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
// the node + tabs render, proceed. We do NOT mask a real boot regression:
|
||||
// any `failed` carrying a last_sample_error, OR a non-zero uptime (the
|
||||
// agent started then crashed — image pull, panic, PYTHONPATH, etc.),
|
||||
// still hard-throws immediately so triage gets boot_stage / last_error /
|
||||
// image fields without waiting for a polling timeout.
|
||||
// Genuine *infra* provision failure is already caught loud one step
|
||||
// earlier at the org level (instance_status === "failed").
|
||||
// still hard-throws. Genuine *infra* provision failure is already caught
|
||||
// loud one step earlier at the org level (instance_status === "failed").
|
||||
await waitFor<boolean>(
|
||||
async () => {
|
||||
const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
|
||||
@@ -341,15 +315,17 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
);
|
||||
return true;
|
||||
}
|
||||
// #2032: tolerate transient 'failed' during boot — some runtimes
|
||||
// briefly report failed before recovering to online (e.g. agent
|
||||
// restart during init). Retry instead of hard-throwing; genuine
|
||||
// terminal failures will still surface via waitFor timeout.
|
||||
// last_sample_error is often empty when the failure happens before
|
||||
// the agent emits a sample (e.g. boot crash, image pull error,
|
||||
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
|
||||
// body gives triage the boot_stage / last_error / image fields it
|
||||
// needs without a second probe. Otherwise this propagates as a
|
||||
// bare "Workspace failed: " — the exact useless message that
|
||||
// sent #2632 to the issue tracker.
|
||||
const detail = sampleErr
|
||||
? sampleErr
|
||||
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
|
||||
console.warn(`[staging-setup] transient failed (retrying): ${detail}`);
|
||||
return null;
|
||||
throw new Error(`Workspace failed: ${detail}`);
|
||||
}
|
||||
return null;
|
||||
},
|
||||
@@ -357,7 +333,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
10_000,
|
||||
"workspace online",
|
||||
);
|
||||
console.log(`[staging-setup] Workspace online`);
|
||||
console.log(`[staging-setup] Workspace renderable`);
|
||||
|
||||
// 7. Hand state off to tests + teardown — overwrite the slug-only
|
||||
// bootstrap state with the full state spec tests need.
|
||||
|
||||
@@ -370,7 +370,7 @@ test.describe("staging canvas tabs", () => {
|
||||
|
||||
// The tablist appears once the side panel mounts. Condition-based
|
||||
// wait — no fixed delay.
|
||||
const tablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
|
||||
const tablist = page.locator('[role="tablist"]');
|
||||
await expect(
|
||||
tablist,
|
||||
"side panel tablist never appeared after clicking the workspace node",
|
||||
|
||||
@@ -52,10 +52,8 @@ describe("prefers-reduced-motion compliance", () => {
|
||||
expect(src).toContain("motion-safe:animate-pulse");
|
||||
});
|
||||
|
||||
it("WorkspacePanelTabs.tsx uses motion-safe:animate-pulse", () => {
|
||||
// The connection-status dot moved out of SidePanel.tsx into the extracted
|
||||
// WorkspacePanelTabs.tsx; verify the reduced-motion guard followed it.
|
||||
const src = readSrc("components/WorkspacePanelTabs.tsx");
|
||||
it("SidePanel.tsx uses motion-safe:animate-pulse", () => {
|
||||
const src = readSrc("components/SidePanel.tsx");
|
||||
expect(src.includes("animate-pulse") && !src.includes("motion-safe:animate-pulse")).toBe(false);
|
||||
expect(src).toContain("motion-safe:animate-pulse");
|
||||
});
|
||||
|
||||
@@ -10,7 +10,7 @@ import { describe, it, expect, vi } from "vitest";
|
||||
// transform). We import layout.tsx only for its exported `metadata`
|
||||
// constant — mock the font module to a constructor-returning stub.
|
||||
vi.mock("next/font/google", () => ({
|
||||
Hanken_Grotesk: () => ({ variable: "--font-hanken" }),
|
||||
Inter: () => ({ variable: "--font-inter" }),
|
||||
JetBrains_Mono: () => ({ variable: "--font-jetbrains" }),
|
||||
}));
|
||||
|
||||
|
||||
+38
-50
@@ -42,52 +42,48 @@
|
||||
* before paint to eliminate flash.
|
||||
*/
|
||||
@theme {
|
||||
/* Org Concierge palette (RFC platform-agent / canvas redesign). Warm-paper
|
||||
light theme + purple accent replacing the old blue brand. */
|
||||
/* Surface — page / elevated card / sunken input / deep card */
|
||||
--color-surface: #f1efe8;
|
||||
--color-surface: #fafaf7;
|
||||
--color-surface-elevated: #ffffff;
|
||||
--color-surface-sunken: #f6f4ee;
|
||||
--color-surface-card: #faf9f4;
|
||||
--color-surface-sunken: #f3f1ec;
|
||||
--color-surface-card: #efece4;
|
||||
|
||||
/* Borders */
|
||||
--color-line: #ddd9cf;
|
||||
--color-line-soft: #ebe8df;
|
||||
--color-line: #e6e2d8;
|
||||
--color-line-soft: #efece4;
|
||||
|
||||
/* Text */
|
||||
--color-ink: #21201b;
|
||||
--color-ink-mid: #5c5a52;
|
||||
--color-ink-soft: #6f6c62;
|
||||
--color-ink: #15181c;
|
||||
--color-ink-mid: #5a5e66;
|
||||
--color-ink-soft: #8b8e95;
|
||||
|
||||
/* Brand + state — purple accent (concept #7c3aed); light good/bad kept
|
||||
slightly darker than the raw concept hues for WCAG AA on the paper tints. */
|
||||
--color-accent: #7c3aed;
|
||||
--color-accent-strong: #6d28d9;
|
||||
--color-warm: #c47e12;
|
||||
--color-good: #0c8a52;
|
||||
--color-bad: #c2403c;
|
||||
/* Brand + state */
|
||||
--color-accent: #3b5bdb;
|
||||
--color-accent-strong: #1a2f99;
|
||||
--color-warm: #c0532b;
|
||||
--color-good: #2f7a4d;
|
||||
--color-bad: #b94e4a;
|
||||
}
|
||||
|
||||
[data-theme="dark"] {
|
||||
/* Org Concierge dark palette — near-black panels, bright purple accent. */
|
||||
--color-surface: #08080a;
|
||||
--color-surface-elevated: #16161d;
|
||||
--color-surface-sunken: #0d0d11;
|
||||
--color-surface-card: #1b1b23;
|
||||
--color-surface: #0e1014;
|
||||
--color-surface-elevated: #15181c;
|
||||
--color-surface-sunken: #0a0b0e;
|
||||
--color-surface-card: #1a1d23;
|
||||
|
||||
--color-line: #26262e;
|
||||
--color-line-soft: #1b1b22;
|
||||
--color-line: #2a2f3a;
|
||||
--color-line-soft: #1f2329;
|
||||
|
||||
--color-ink: #ececf1;
|
||||
--color-ink-mid: #9b9baa;
|
||||
--color-ink-soft: #65656f;
|
||||
--color-ink: #f4f1e9;
|
||||
--color-ink-mid: #c8c2b4;
|
||||
--color-ink-soft: #8d92a0;
|
||||
|
||||
/* Purple accent brightened for AA on the near-black surfaces. */
|
||||
--color-accent: #a78bfa;
|
||||
--color-accent-strong: #c4b5fd;
|
||||
--color-warm: #fbbf24;
|
||||
--color-good: #34d399;
|
||||
--color-bad: #f87171;
|
||||
/* Accents brighten slightly for AA contrast on dark backgrounds. */
|
||||
--color-accent: #6883e8;
|
||||
--color-accent-strong: #8aa1ee;
|
||||
--color-warm: #d96f48;
|
||||
--color-good: #4ca06e;
|
||||
--color-bad: #d27773;
|
||||
}
|
||||
|
||||
:root {
|
||||
@@ -111,22 +107,15 @@
|
||||
* component, not per theme.
|
||||
*/
|
||||
@theme {
|
||||
/* Org Concierge canvas palette (near-black + purple). */
|
||||
--color-bg: rgb(8 8 10); /* concept --bg #08080a */
|
||||
--color-bg-elev: rgb(22 22 29); /* concept --card #16161d */
|
||||
--color-bg-card: rgb(27 27 35); /* concept --card-2 #1b1b23 */
|
||||
--color-line-strong: rgb(54 54 64);
|
||||
--color-ink-mute: rgb(155 155 170); /* concept --tx-2 */
|
||||
--color-ink-dim: rgb(101 101 111); /* concept --tx-3 */
|
||||
--color-accent-dim: rgb(167 139 250);/* concept --accent-2 #a78bfa */
|
||||
--color-plasma: rgb(139 92 246); /* concept --accent #8b5cf6 */
|
||||
--color-bg: rgb(9 9 11); /* zinc-950 */
|
||||
--color-bg-elev: rgb(24 24 27); /* zinc-900 */
|
||||
--color-bg-card: rgb(39 39 42); /* zinc-800 */
|
||||
--color-line-strong: rgb(63 63 70); /* zinc-700 */
|
||||
--color-ink-mute: rgb(161 161 170); /* zinc-400 */
|
||||
--color-ink-dim: rgb(113 113 122); /* zinc-500 */
|
||||
--color-accent-dim: rgb(96 165 250);/* blue-400 */
|
||||
--color-plasma: rgb(59 130 246); /* blue-500 */
|
||||
--color-warn: rgb(251 191 36); /* amber-400 */
|
||||
|
||||
/* Typography — Org Concierge (Hanken Grotesk UI, JetBrains Mono code).
|
||||
next/font variables are set on <html> in the canvas layout. */
|
||||
--font-sans: var(--font-hanken), ui-sans-serif, system-ui, -apple-system,
|
||||
"Segoe UI", Roboto, sans-serif;
|
||||
--font-mono: var(--font-jetbrains), ui-monospace, "SF Mono", Menlo, monospace;
|
||||
}
|
||||
|
||||
body {
|
||||
@@ -135,8 +124,7 @@ body {
|
||||
overflow: hidden;
|
||||
background-color: var(--color-surface);
|
||||
color: var(--color-ink);
|
||||
font-family: var(--font-hanken), -apple-system, BlinkMacSystemFont, "Segoe UI",
|
||||
Roboto, "Helvetica Neue", sans-serif;
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", sans-serif;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import type { Metadata } from "next";
|
||||
import { Hanken_Grotesk, JetBrains_Mono } from "next/font/google";
|
||||
import { Inter, JetBrains_Mono } from "next/font/google";
|
||||
import { cookies, headers } from "next/headers";
|
||||
import "./globals.css";
|
||||
|
||||
@@ -7,13 +7,10 @@ import "./globals.css";
|
||||
// because Next.js serves the .woff2 from /_next/static). Exposed as
|
||||
// CSS variables so the mobile palette can reference them without
|
||||
// importing this module.
|
||||
// Org Concierge UI typeface (canvas redesign): Hanken Grotesk, exposed as
|
||||
// --font-hanken and consumed by the --font-sans theme token in globals.css.
|
||||
const interFont = Hanken_Grotesk({
|
||||
const interFont = Inter({
|
||||
subsets: ["latin"],
|
||||
weight: ["400", "500", "600", "700"],
|
||||
display: "swap",
|
||||
variable: "--font-hanken",
|
||||
variable: "--font-inter",
|
||||
});
|
||||
const monoFont = JetBrains_Mono({
|
||||
subsets: ["latin"],
|
||||
@@ -164,12 +161,6 @@ export default async function RootLayout({
|
||||
*/}
|
||||
<script
|
||||
nonce={nonce}
|
||||
// The browser strips the nonce attribute off <script> after applying
|
||||
// CSP, so the hydrated DOM shows nonce="" while React's tree carries
|
||||
// the real value — a benign, expected server/client diff. Suppress
|
||||
// the hydration warning for this element (same rationale as the
|
||||
// <html> suppressHydrationWarning above).
|
||||
suppressHydrationWarning
|
||||
dangerouslySetInnerHTML={{ __html: themeBootScript }}
|
||||
/>
|
||||
{/*
|
||||
@@ -195,7 +186,6 @@ export default async function RootLayout({
|
||||
<script
|
||||
type="application/ld+json"
|
||||
nonce={nonce}
|
||||
suppressHydrationWarning
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: JSON.stringify({
|
||||
"@context": "https://schema.org",
|
||||
|
||||
@@ -179,6 +179,7 @@ function Shell({
|
||||
<p className="mt-2 text-ink-mid">
|
||||
Each org is an isolated Molecule workspace.
|
||||
</p>
|
||||
<DataResidencyNotice />
|
||||
<div className="mt-8">{children}</div>
|
||||
</div>
|
||||
</TermsGate>
|
||||
@@ -219,6 +220,25 @@ function AccountBar({ session }: { session: Session }) {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// DataResidencyNotice surfaces where workspace data lives so EU-based
|
||||
// signups can make an informed choice (GDPR Art. 13 disclosure
|
||||
// requirement). Plain text, no icon — the goal is clarity, not
|
||||
// decoration. A future EU region selector can replace this with a
|
||||
// region dropdown.
|
||||
function DataResidencyNotice() {
|
||||
return (
|
||||
<p className="mt-3 rounded border border-line bg-surface-sunken/60 px-3 py-2 text-xs text-ink-mid">
|
||||
Workspaces run in AWS us-east-2 (Ohio, United States). EU region support is on the roadmap — reach out to
|
||||
{" "}
|
||||
<a href="mailto:support@moleculesai.app" className="underline">
|
||||
support@moleculesai.app
|
||||
</a>
|
||||
{" "}if you need data residency in another region today.
|
||||
</p>
|
||||
);
|
||||
}
|
||||
|
||||
function OrgRow({ org }: { org: Org }) {
|
||||
return (
|
||||
<li className="rounded-lg border border-line bg-surface-sunken p-4">
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useState } from "react";
|
||||
import { ConciergeShell } from "@/components/concierge/ConciergeShell";
|
||||
import { Canvas } from "@/components/Canvas";
|
||||
import { Legend } from "@/components/Legend";
|
||||
import { CommunicationOverlay } from "@/components/CommunicationOverlay";
|
||||
import { MobileApp } from "@/components/mobile/MobileApp";
|
||||
import { Spinner } from "@/components/Spinner";
|
||||
import { connectSocket, disconnectSocket } from "@/store/socket";
|
||||
@@ -113,7 +115,11 @@ export default function Home() {
|
||||
|
||||
return (
|
||||
<>
|
||||
<ConciergeShell />
|
||||
<main aria-label="Agent canvas">
|
||||
<Canvas />
|
||||
</main>
|
||||
<Legend />
|
||||
<CommunicationOverlay />
|
||||
{hydrationError && (
|
||||
<div
|
||||
role="alert"
|
||||
|
||||
@@ -13,11 +13,8 @@ import {
|
||||
import "@xyflow/react/dist/style.css";
|
||||
|
||||
import { useCanvasStore } from "@/store/canvas";
|
||||
import { WORKSPACE_KIND } from "@/lib/workspace-kind";
|
||||
import { stripPlatformRootForMap } from "@/store/canvas-topology";
|
||||
import { useTheme } from "@/lib/theme-provider";
|
||||
import { A2ATopologyOverlay } from "./A2ATopologyOverlay";
|
||||
import { MessageFlightLayer } from "./MessageFlightLayer";
|
||||
import { WorkspaceNode } from "./WorkspaceNode";
|
||||
import { SidePanel } from "./SidePanel";
|
||||
import { CreateWorkspaceButton } from "./CreateWorkspaceDialog";
|
||||
@@ -81,38 +78,15 @@ function CanvasInner() {
|
||||
// half-themed page. Pull resolvedTheme so the canvas matches the user's
|
||||
// selected mode (and the system preference when they pick "system").
|
||||
const { resolvedTheme } = useTheme();
|
||||
const storeNodes = useCanvasStore((s) => s.nodes);
|
||||
const storeEdges = useCanvasStore((s) => s.edges);
|
||||
const rawNodes = useCanvasStore((s) => s.nodes);
|
||||
const edges = useCanvasStore((s) => s.edges);
|
||||
const a2aEdges = useCanvasStore((s) => s.a2aEdges);
|
||||
const showA2AEdges = useCanvasStore((s) => s.showA2AEdges);
|
||||
const deletingIds = useCanvasStore((s) => s.deletingIds);
|
||||
// Hide the org-level platform agent (the concierge) from the map graph: it is
|
||||
// the undeletable org ROOT surfaced in the shell (topbar + Home tree), not a
|
||||
// draggable/deletable map node. Its direct children are reparented to
|
||||
// top-level and tree edges touching it are dropped. The store keeps the full
|
||||
// node set, so the shell's Home agent tree still renders it as ROOT.
|
||||
const { nodes: rawNodes, edges } = useMemo(
|
||||
() => stripPlatformRootForMap(storeNodes, storeEdges),
|
||||
[storeNodes, storeEdges],
|
||||
const allEdges = useMemo(
|
||||
() => (showA2AEdges ? [...edges, ...a2aEdges] : edges),
|
||||
[edges, a2aEdges, showA2AEdges],
|
||||
);
|
||||
const platformIds = useMemo(
|
||||
() =>
|
||||
new Set(
|
||||
storeNodes
|
||||
.filter((n) => n.data.kind === WORKSPACE_KIND.Platform)
|
||||
.map((n) => n.id),
|
||||
),
|
||||
[storeNodes],
|
||||
);
|
||||
const allEdges = useMemo(() => {
|
||||
if (!showA2AEdges) return edges;
|
||||
// Drop A2A edges that touch the hidden platform root so React Flow doesn't
|
||||
// warn about an edge to a missing node.
|
||||
const a2a = a2aEdges.filter(
|
||||
(e) => !platformIds.has(e.source) && !platformIds.has(e.target),
|
||||
);
|
||||
return [...edges, ...a2a];
|
||||
}, [edges, a2aEdges, showA2AEdges, platformIds]);
|
||||
// Drag-lock during a system-owned operation (deploy OR delete).
|
||||
// React Flow respects Node.draggable, which stops the gesture
|
||||
// before it starts — preventDefault() on the drag-start callback
|
||||
@@ -303,7 +277,7 @@ function CanvasInner() {
|
||||
>
|
||||
Skip to canvas
|
||||
</a>
|
||||
<main id="canvas-main" className="w-full h-full bg-surface">
|
||||
<main id="canvas-main" className="w-screen h-screen bg-surface">
|
||||
<ReactFlow
|
||||
colorMode={resolvedTheme}
|
||||
nodes={nodes}
|
||||
@@ -372,10 +346,6 @@ function CanvasInner() {
|
||||
nodeBorderRadius={4}
|
||||
/>
|
||||
<DropTargetBadge />
|
||||
{/* Flies an envelope between agents on each delegate/message event.
|
||||
Inside <ReactFlow> so its ViewportPortal renders in flow coords
|
||||
and tracks pan/zoom. */}
|
||||
<MessageFlightLayer />
|
||||
</ReactFlow>
|
||||
|
||||
{/* Screen-reader live region — announces workspace count on initial load and
|
||||
|
||||
@@ -172,7 +172,7 @@ export function ContextMenu() {
|
||||
const nodeId = contextMenu.nodeId;
|
||||
closeContextMenu();
|
||||
try {
|
||||
await api.post(`/workspaces/${nodeId}/pause?cascade=true`, {});
|
||||
await api.post(`/workspaces/${nodeId}/pause`, {});
|
||||
updateNodeData(nodeId, { status: "paused" });
|
||||
} catch (e) {
|
||||
showToast("Pause failed", "error");
|
||||
@@ -184,7 +184,7 @@ export function ContextMenu() {
|
||||
const nodeId = contextMenu.nodeId;
|
||||
closeContextMenu();
|
||||
try {
|
||||
await api.post(`/workspaces/${nodeId}/resume?cascade=true`, {});
|
||||
await api.post(`/workspaces/${nodeId}/resume`, {});
|
||||
updateNodeData(nodeId, { status: "provisioning" });
|
||||
} catch (e) {
|
||||
showToast("Resume failed", "error");
|
||||
|
||||
@@ -60,16 +60,6 @@ const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "goog
|
||||
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
|
||||
const DEFAULT_HEADLESS_ROOT_GB = 30;
|
||||
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
|
||||
|
||||
// Per-workspace cloud/compute backend (multi-provider RFC). "aws" is the default
|
||||
// EC2 path; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner. A
|
||||
// workspace whose cloud differs from its tenant's is reached over a per-workspace
|
||||
// Cloudflare tunnel (runtime#95). Distinct from the LLM/model provider.
|
||||
const CLOUD_PROVIDER_OPTIONS = [
|
||||
{ value: "aws", label: "AWS (default)" },
|
||||
{ value: "gcp", label: "GCP" },
|
||||
{ value: "hetzner", label: "Hetzner" },
|
||||
];
|
||||
const DEFAULT_DISPLAY_ROOT_GB = 80;
|
||||
|
||||
export function CreateWorkspaceButton() {
|
||||
@@ -87,10 +77,6 @@ export function CreateWorkspaceButton() {
|
||||
const [displayInstanceType, setDisplayInstanceType] = useState(DEFAULT_DISPLAY_INSTANCE_TYPE);
|
||||
const [displayRootGB, setDisplayRootGB] = useState(String(DEFAULT_DISPLAY_ROOT_GB));
|
||||
const [displayResolution, setDisplayResolution] = useState("1920x1080");
|
||||
// Cloud/compute backend for the workspace box (multi-provider, per-workspace).
|
||||
// "aws" default; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner
|
||||
// (a non-tenant-cloud box is reached over a per-workspace tunnel, runtime#95).
|
||||
const [cloudProvider, setCloudProvider] = useState("aws");
|
||||
// Templates fetched from /api/templates — drives the dynamic provider
|
||||
// filter below. Same data source ConfigTab uses (PR #2454). When the
|
||||
// selected template declares `runtime_config.providers` in its
|
||||
@@ -280,7 +266,6 @@ export function CreateWorkspaceButton() {
|
||||
setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
|
||||
setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
|
||||
setDisplayResolution("1920x1080");
|
||||
setCloudProvider("aws");
|
||||
setExternalRuntime("external");
|
||||
setLLMSelection({ providerId: "", model: "", envVars: [] });
|
||||
setLLMSecret("");
|
||||
@@ -370,16 +355,11 @@ export function CreateWorkspaceButton() {
|
||||
width: Number.isFinite(displayWidth) ? displayWidth : 1920,
|
||||
height: Number.isFinite(displayHeight) ? displayHeight : 1080,
|
||||
},
|
||||
// Only meaningful when CP provisions the box (SaaS), where
|
||||
// the picker is shown. Omit on self-hosted so the payload is
|
||||
// unchanged there.
|
||||
...(isSaaS ? { provider: cloudProvider } : {}),
|
||||
}
|
||||
: {
|
||||
instance_type: DEFAULT_HEADLESS_INSTANCE_TYPE,
|
||||
volume: { root_gb: DEFAULT_HEADLESS_ROOT_GB },
|
||||
display: { mode: "none" },
|
||||
...(isSaaS ? { provider: cloudProvider } : {}),
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
@@ -619,26 +599,6 @@ export function CreateWorkspaceButton() {
|
||||
<div className="mb-2 text-[11px] font-medium text-ink-mid">
|
||||
Container Config
|
||||
</div>
|
||||
{/* Cloud provider — only meaningful when CP provisions the box
|
||||
(SaaS). A non-tenant-cloud workspace is reached over a
|
||||
per-workspace Cloudflare tunnel (runtime#95). */}
|
||||
{isSaaS && (
|
||||
<label htmlFor="workspace-cloud-provider" className="mb-3 grid gap-1">
|
||||
<span className="text-xs font-medium text-ink">Cloud provider</span>
|
||||
<select
|
||||
id="workspace-cloud-provider"
|
||||
value={cloudProvider}
|
||||
onChange={(e) => setCloudProvider(e.target.value)}
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
|
||||
>
|
||||
{CLOUD_PROVIDER_OPTIONS.map((p) => (
|
||||
<option key={p.value} value={p.value}>
|
||||
{p.label}
|
||||
</option>
|
||||
))}
|
||||
</select>
|
||||
</label>
|
||||
)}
|
||||
<label className="flex items-center justify-between gap-3">
|
||||
<span className="text-xs font-medium text-ink">Display</span>
|
||||
<input
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
/** FlightEnvelope — a single envelope that animates from `from` to `to` and
|
||||
* fades out, used by both the canvas (flow coords inside a ViewportPortal) and
|
||||
* the concierge home (screen coords inside a fixed overlay). The parent owns
|
||||
* the coordinate space; this component only animates the translate delta.
|
||||
*
|
||||
* Uses the Web Animations API so the from/to delta can be dynamic per flight
|
||||
* (a static CSS @keyframes can't translate to a runtime-computed point). */
|
||||
import { useEffect, useRef } from "react";
|
||||
import { FLIGHT_DURATION_MS, type A2AFlightKind } from "@/hooks/useA2AFlights";
|
||||
|
||||
/** Stroke colour by activity kind — mirrors CommunicationOverlay's palette
|
||||
* (send = cyan, receive = violet/accent, task = warm) so the two surfaces
|
||||
* read as the same event. */
|
||||
const KIND_COLOR: Record<A2AFlightKind, string> = {
|
||||
send: "#22d3ee",
|
||||
receive: "#8b5cf6",
|
||||
task: "#f5a623",
|
||||
};
|
||||
|
||||
export interface Point {
|
||||
x: number;
|
||||
y: number;
|
||||
}
|
||||
|
||||
export function FlightEnvelope({
|
||||
from,
|
||||
to,
|
||||
kind,
|
||||
}: {
|
||||
from: Point;
|
||||
to: Point;
|
||||
kind: A2AFlightKind;
|
||||
}) {
|
||||
const ref = useRef<HTMLDivElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const el = ref.current;
|
||||
// Element.animate is unavailable in some test/SSR environments — degrade to
|
||||
// a static (instantly-finished) envelope rather than throw.
|
||||
if (!el || typeof el.animate !== "function") return;
|
||||
const dx = to.x - from.x;
|
||||
const dy = to.y - from.y;
|
||||
const anim = el.animate(
|
||||
[
|
||||
{ transform: "translate(-50%,-50%) translate(0px,0px) scale(0.45)", opacity: 0 },
|
||||
{ opacity: 1, offset: 0.16 },
|
||||
{ opacity: 1, offset: 0.8 },
|
||||
{ transform: `translate(-50%,-50%) translate(${dx}px,${dy}px) scale(1)`, opacity: 0 },
|
||||
],
|
||||
{ duration: FLIGHT_DURATION_MS, easing: "cubic-bezier(0.45, 0, 0.25, 1)", fill: "forwards" },
|
||||
);
|
||||
return () => anim.cancel();
|
||||
}, [from.x, from.y, to.x, to.y]);
|
||||
|
||||
const color = KIND_COLOR[kind];
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
data-testid="flight-envelope"
|
||||
aria-hidden="true"
|
||||
style={{
|
||||
position: "absolute",
|
||||
left: from.x,
|
||||
top: from.y,
|
||||
pointerEvents: "none",
|
||||
willChange: "transform, opacity",
|
||||
filter: "drop-shadow(0 1px 3px rgba(0,0,0,0.45))",
|
||||
zIndex: 6,
|
||||
}}
|
||||
>
|
||||
<svg width="22" height="22" viewBox="0 0 24 24" fill="none" aria-hidden="true">
|
||||
<rect x="2.5" y="5.5" width="19" height="13" rx="2.5" fill="#0b0b0f" stroke={color} strokeWidth="1.6" />
|
||||
<path
|
||||
d="M3.5 7.5l8.5 6 8.5-6"
|
||||
stroke={color}
|
||||
strokeWidth="1.6"
|
||||
fill="none"
|
||||
strokeLinecap="round"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/** MessageFlightLayer — flies an envelope from the source agent to the target
|
||||
* agent on the spatial canvas whenever a delegate / message event fires.
|
||||
*
|
||||
* Mounted INSIDE <ReactFlow> so its ViewportPortal places the envelope in flow
|
||||
* coordinates; it therefore pans and zooms with the canvas for free. The
|
||||
* flight lifecycle (which events become envelopes, reduced-motion opt-out,
|
||||
* expiry) lives in useA2AFlights — this component only resolves node centres
|
||||
* and renders. */
|
||||
import { ViewportPortal, type Node } from "@xyflow/react";
|
||||
import { useCanvasStore } from "@/store/canvas";
|
||||
import { useA2AFlights } from "@/hooks/useA2AFlights";
|
||||
import { FlightEnvelope, type Point } from "./FlightEnvelope";
|
||||
import type { WorkspaceNodeData } from "@/store/canvas";
|
||||
|
||||
// Fallback node footprint when React Flow has not measured a node yet. Matches
|
||||
// WorkspaceNode's leaf size (w-[300px] min-h-[176px]); a slightly-off centre
|
||||
// for the first frame after mount is invisible at flight scale.
|
||||
const DEFAULT_W = 300;
|
||||
const DEFAULT_H = 176;
|
||||
|
||||
function nodeCenter(n: Node<WorkspaceNodeData>): Point {
|
||||
const w = n.measured?.width ?? DEFAULT_W;
|
||||
const h = n.measured?.height ?? DEFAULT_H;
|
||||
return { x: n.position.x + w / 2, y: n.position.y + h / 2 };
|
||||
}
|
||||
|
||||
export function MessageFlightLayer() {
|
||||
const flights = useA2AFlights();
|
||||
const nodes = useCanvasStore((s) => s.nodes);
|
||||
|
||||
if (flights.length === 0) return null;
|
||||
|
||||
return (
|
||||
<ViewportPortal>
|
||||
{flights.map((f) => {
|
||||
const src = nodes.find((n) => n.id === f.sourceId);
|
||||
const dst = nodes.find((n) => n.id === f.targetId);
|
||||
// Both endpoints must be on-canvas to draw a path between them.
|
||||
if (!src || !dst) return null;
|
||||
return (
|
||||
<FlightEnvelope key={f.key} from={nodeCenter(src)} to={nodeCenter(dst)} kind={f.kind} />
|
||||
);
|
||||
})}
|
||||
</ViewportPortal>
|
||||
);
|
||||
}
|
||||
@@ -12,7 +12,6 @@ import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
findProviderForModel,
|
||||
isPlatformManagedProvider,
|
||||
type SelectorValue,
|
||||
} from "./ProviderModelSelector";
|
||||
|
||||
@@ -268,21 +267,10 @@ function ProviderPickerModal({
|
||||
setSelectorValue(initial);
|
||||
}, [open, initial]);
|
||||
|
||||
// #2248: filter out provisioner-injected internal tokens for platform-managed
|
||||
// providers so the user can't clobber them. Memoized so the array reference is
|
||||
// stable across renders and does not churn the entries useEffect.
|
||||
const userEditableEnvVars = useMemo(() => {
|
||||
const selectedProvider = catalog.find((p) => p.id === selectorValue.providerId);
|
||||
const isPlatformManaged = selectedProvider ? isPlatformManagedProvider(selectedProvider) : false;
|
||||
return isPlatformManaged
|
||||
? selectorValue.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
|
||||
: selectorValue.envVars;
|
||||
}, [catalog, selectorValue.providerId, selectorValue.envVars]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
setEntries(
|
||||
userEditableEnvVars.map((key) => ({
|
||||
selectorValue.envVars.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
// Pre-mark as saved when the key is already in the configured
|
||||
@@ -295,7 +283,7 @@ function ProviderPickerModal({
|
||||
);
|
||||
setOptionalEntries(
|
||||
optionalKeys
|
||||
.filter((key) => !userEditableEnvVars.includes(key))
|
||||
.filter((key) => !selectorValue.envVars.includes(key))
|
||||
.map((key) => ({
|
||||
key,
|
||||
value: "",
|
||||
@@ -304,7 +292,7 @@ function ProviderPickerModal({
|
||||
error: null,
|
||||
})),
|
||||
);
|
||||
}, [open, userEditableEnvVars, configuredKeys, optionalKeys]);
|
||||
}, [open, selectorValue.envVars, configuredKeys, optionalKeys]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
|
||||
@@ -91,7 +91,6 @@ export interface RegistryModel {
|
||||
name?: string;
|
||||
provider?: string;
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
required_env?: string[];
|
||||
}
|
||||
|
||||
export interface SelectorValue {
|
||||
|
||||
@@ -1,9 +1,25 @@
|
||||
"use client";
|
||||
|
||||
import { useState, useCallback, useRef, useEffect } from "react";
|
||||
import { useCanvasStore } from "@/store/canvas";
|
||||
import { useCanvasStore, type PanelTab } from "@/store/canvas";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
import { StatusDot } from "./StatusDot";
|
||||
import { WorkspacePanelTabs } from "./WorkspacePanelTabs";
|
||||
import { Tooltip } from "./Tooltip";
|
||||
import { DetailsTab } from "./tabs/DetailsTab";
|
||||
import { SkillsTab } from "./tabs/SkillsTab";
|
||||
import { ChatTab } from "./tabs/ChatTab";
|
||||
import { ConfigTab } from "./tabs/ConfigTab";
|
||||
import { ContainerConfigTab } from "./tabs/ContainerConfigTab";
|
||||
import { DisplayTab } from "./tabs/DisplayTab";
|
||||
import { TerminalTab } from "./tabs/TerminalTab";
|
||||
import { FilesTab } from "./tabs/FilesTab";
|
||||
import { MemoryInspectorPanel } from "./MemoryInspectorPanel";
|
||||
import { AuditTrailPanel } from "./AuditTrailPanel";
|
||||
import { TracesTab } from "./tabs/TracesTab";
|
||||
import { EventsTab } from "./tabs/EventsTab";
|
||||
import { ActivityTab } from "./tabs/ActivityTab";
|
||||
import { ScheduleTab } from "./tabs/ScheduleTab";
|
||||
import { ChannelsTab } from "./tabs/ChannelsTab";
|
||||
import { summarizeWorkspaceCapabilities } from "@/store/canvas";
|
||||
|
||||
const SIDEPANEL_WIDTH_KEY = "molecule:sidepanel-width";
|
||||
@@ -11,6 +27,24 @@ const SIDEPANEL_DEFAULT_WIDTH = 480;
|
||||
const SIDEPANEL_MIN_WIDTH = 320;
|
||||
const SIDEPANEL_MAX_WIDTH = 800;
|
||||
|
||||
const TABS: { id: PanelTab; label: string; icon: string }[] = [
|
||||
{ id: "chat", label: "Chat", icon: "◈" },
|
||||
{ id: "activity", label: "Activity", icon: "⊙" },
|
||||
{ id: "details", label: "Details", icon: "◉" },
|
||||
{ id: "skills", label: "Plugins", icon: "✦" },
|
||||
{ id: "terminal", label: "Terminal", icon: "▸" },
|
||||
{ id: "display", label: "Display", icon: "▣" },
|
||||
{ id: "container-config", label: "Container", icon: "▤" },
|
||||
{ id: "config", label: "Config", icon: "⚙" },
|
||||
{ id: "schedule", label: "Schedule", icon: "⏲" },
|
||||
{ id: "channels", label: "Channels", icon: "⇌" },
|
||||
{ id: "files", label: "Files", icon: "⊞" },
|
||||
{ id: "memory", label: "Memory", icon: "◇" },
|
||||
{ id: "traces", label: "Traces", icon: "◎" },
|
||||
{ id: "events", label: "Events", icon: "◊" },
|
||||
{ id: "audit", label: "Audit", icon: "⊟" },
|
||||
];
|
||||
|
||||
export function SidePanel() {
|
||||
const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
|
||||
const panelTab = useCanvasStore((s) => s.panelTab);
|
||||
@@ -185,12 +219,104 @@ export function SidePanel() {
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Tabs + tab content — extracted into WorkspacePanelTabs so the same
|
||||
tab bar/body is reused verbatim by the concierge Settings page. The
|
||||
map drawer stays store-driven: we thread the global panelTab /
|
||||
setPanelTab through as the controlled active-tab pair, preserving the
|
||||
existing selection + keyboard behaviour. */}
|
||||
<WorkspacePanelTabs node={node} activeTab={panelTab} onTabChange={setPanelTab} />
|
||||
{/* Tabs — relative wrapper lets the fade gradient position against the scroll container */}
|
||||
<div className="relative border-b border-line/40">
|
||||
{/* Right-edge fade: signals more tabs are hidden off-screen when the bar overflows */}
|
||||
<div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-surface to-transparent z-10" aria-hidden="true" />
|
||||
<div
|
||||
role="tablist"
|
||||
aria-label="Workspace panel tabs"
|
||||
className="flex overflow-x-auto bg-surface-sunken/20 px-1"
|
||||
onKeyDown={(e) => {
|
||||
const idx = TABS.findIndex((t) => t.id === panelTab);
|
||||
let next: number | null = null;
|
||||
if (e.key === "ArrowRight") { e.preventDefault(); next = (idx + 1) % TABS.length; }
|
||||
else if (e.key === "ArrowLeft") { e.preventDefault(); next = (idx - 1 + TABS.length) % TABS.length; }
|
||||
else if (e.key === "Home") { e.preventDefault(); next = 0; }
|
||||
else if (e.key === "End") { e.preventDefault(); next = TABS.length - 1; }
|
||||
if (next !== null) {
|
||||
setPanelTab(TABS[next].id);
|
||||
requestAnimationFrame(() => { const el = document.getElementById(`tab-${TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
|
||||
}
|
||||
}}
|
||||
>
|
||||
{TABS.map((tab) => (
|
||||
<button
|
||||
type="button"
|
||||
key={tab.id}
|
||||
id={`tab-${tab.id}`}
|
||||
role="tab"
|
||||
aria-selected={panelTab === tab.id}
|
||||
aria-controls={`panel-${tab.id}`}
|
||||
tabIndex={panelTab === tab.id ? 0 : -1}
|
||||
onClick={() => setPanelTab(tab.id)}
|
||||
className={`shrink-0 px-3 py-2.5 text-[10px] font-medium tracking-wide transition-all rounded-t-lg mx-0.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 ${
|
||||
panelTab === tab.id
|
||||
? "text-ink bg-surface-card border-b-2 border-accent"
|
||||
: "text-ink-mid hover:text-ink hover:bg-surface-card/60"
|
||||
}`}
|
||||
>
|
||||
<span className="mr-1 opacity-50" aria-hidden="true">{tab.icon}</span>
|
||||
{tab.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Needs Restart Banner */}
|
||||
{node.data.needsRestart && !node.data.currentTask && selectedNodeId && (
|
||||
<div className="px-4 py-2 bg-sky-950/20 border-b border-sky-800/20 flex items-center justify-between">
|
||||
<span className="text-[10px] text-sky-300/90">Config changed — restart to apply</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
useCanvasStore.getState().restartWorkspace(selectedNodeId).catch(() => showToast("Restart failed", "error"));
|
||||
}}
|
||||
className="text-[11px] px-2 py-1 bg-sky-800/40 hover:bg-sky-700/50 text-sky-200 rounded transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1"
|
||||
>
|
||||
Restart Now
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Current Task Banner */}
|
||||
{node.data.currentTask && (
|
||||
<Tooltip text={node.data.currentTask as string}>
|
||||
<div className="px-4 py-2 bg-amber-950/20 border-b border-amber-800/20 flex items-center gap-2 cursor-default">
|
||||
<div className="w-1.5 h-1.5 rounded-full bg-amber-400 motion-safe:animate-pulse shrink-0" />
|
||||
<span className="text-[10px] text-warm/90 truncate">
|
||||
{node.data.currentTask}
|
||||
</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{/* Tab Content */}
|
||||
<div
|
||||
role="tabpanel"
|
||||
id={`panel-${panelTab}`}
|
||||
aria-labelledby={`tab-${panelTab}`}
|
||||
tabIndex={0}
|
||||
className="flex-1 overflow-y-auto focus:outline-none"
|
||||
>
|
||||
{panelTab === "details" && <DetailsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "skills" && <SkillsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "activity" && <ActivityTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "chat" && <ChatTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "terminal" && <TerminalTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "display" && <DisplayTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "container-config" && selectedNodeId && (
|
||||
<ContainerConfigTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />
|
||||
)}
|
||||
{panelTab === "config" && <ConfigTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "schedule" && <ScheduleTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "channels" && <ChannelsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "files" && <FilesTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
|
||||
{panelTab === "memory" && <MemoryInspectorPanel key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "traces" && <TracesTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "events" && <EventsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
{panelTab === "audit" && <AuditTrailPanel key={selectedNodeId} workspaceId={selectedNodeId} />}
|
||||
</div>
|
||||
|
||||
{/* Footer — workspace ID */}
|
||||
<div className="px-4 sm:px-5 py-2 border-t border-line/40 bg-surface-sunken/20">
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
import { useMemo, useState, useCallback, useEffect, useRef } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import { useCanvasStore } from "@/store/canvas";
|
||||
import { WORKSPACE_KIND } from "@/lib/workspace-kind";
|
||||
import { SettingsButton } from "@/components/settings/SettingsButton";
|
||||
import { settingsGearRef } from "@/components/settings/SettingsPanel";
|
||||
import { ConfirmDialog } from "@/components/ConfirmDialog";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
import { ThemeToggle } from "@/components/ThemeToggle";
|
||||
import { statusDotClass } from "@/lib/design-tokens";
|
||||
import { KeyboardShortcutsDialog } from "@/components/KeyboardShortcutsDialog";
|
||||
|
||||
@@ -53,11 +55,8 @@ export function Toolbar() {
|
||||
}, [wsStatus]);
|
||||
|
||||
const counts = useMemo(() => {
|
||||
// Exclude the org-level platform agent (the concierge) — it's the
|
||||
// undeletable org root surfaced in the shell, not a counted map workspace.
|
||||
const mapNodes = nodes.filter((n) => n.data.kind !== WORKSPACE_KIND.Platform);
|
||||
const c = { total: mapNodes.length, roots: 0, children: 0, online: 0, offline: 0, failed: 0, provisioning: 0, activeTasks: 0 };
|
||||
for (const n of mapNodes) {
|
||||
const c = { total: nodes.length, roots: 0, children: 0, online: 0, offline: 0, failed: 0, provisioning: 0, activeTasks: 0 };
|
||||
for (const n of nodes) {
|
||||
if (n.data.parentId) c.children++; else c.roots++;
|
||||
const s = n.data.status;
|
||||
if (s === "online") c.online++;
|
||||
@@ -461,8 +460,11 @@ export function Toolbar() {
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Theme picker + settings gear removed from the map toolbar — both now
|
||||
live in the concierge global Settings (left rail) + topbar. */}
|
||||
{/* Theme picker — System / Light / Dark */}
|
||||
<ThemeToggle />
|
||||
|
||||
{/* Settings gear icon */}
|
||||
<SettingsButton ref={settingsGearRef} />
|
||||
|
||||
<ConfirmDialog
|
||||
open={restartConfirmOpen}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"use client";
|
||||
|
||||
import { useMemo, type KeyboardEvent } from "react";
|
||||
import { Handle, Position, type NodeProps, type Node } from "@xyflow/react";
|
||||
import { useCallback, useMemo, type KeyboardEvent } from "react";
|
||||
import { Handle, NodeResizer, Position, type NodeProps, type Node } from "@xyflow/react";
|
||||
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
|
||||
import { getConfigurationError, getConfigurationStatus } from "@/store/canvas-topology";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
@@ -21,8 +21,7 @@ function useDescendantCount(nodeId: string): number {
|
||||
return useMemo(() => countDescendants(nodeId, nodes), [nodeId, nodes]);
|
||||
}
|
||||
|
||||
/** Boolean flag used to drive the container's system-controlled size
|
||||
* (leaves render fixed-size; parents grow to fit children).
|
||||
/** Boolean flag used to drive min-size and NodeResizer dimensions.
|
||||
* Selecting `nodes` stably avoids re-render loops (same issue as
|
||||
* useDescendantCount). */
|
||||
function useHasChildren(nodeId: string): boolean {
|
||||
@@ -88,9 +87,16 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* Free-resize removed (was NodeResizer). Container size + shape are now
|
||||
* system-controlled: leaf workspaces render at a fixed width; parent
|
||||
* workspaces grow to fit their nested children (store grow logic). */}
|
||||
{/* NodeResizer — visible only on the selected card. Lets the user
|
||||
* drag any edge/corner to grow or shrink the workspace, which is
|
||||
* useful on cards that contain nested child workspaces. */}
|
||||
<NodeResizer
|
||||
isVisible={isSelected}
|
||||
minWidth={hasChildren ? 360 : 210}
|
||||
minHeight={hasChildren ? 200 : 110}
|
||||
lineClassName="!border-accent/40"
|
||||
handleClassName="!w-2 !h-2 !bg-accent !border !border-blue-300"
|
||||
/>
|
||||
<div
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
@@ -155,22 +161,20 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
}
|
||||
}}
|
||||
className={`
|
||||
group relative rounded-xl
|
||||
${hasChildren && !data.collapsed
|
||||
? "h-full w-full min-w-[420px] min-h-[240px]"
|
||||
: "w-[300px] min-h-[176px]"}
|
||||
group relative rounded-xl h-full w-full
|
||||
${hasChildren && !data.collapsed ? "min-w-[360px] min-h-[200px]" : "min-w-[210px]"}
|
||||
cursor-pointer overflow-hidden
|
||||
transition-all duration-200 ease-out
|
||||
${isDragTarget
|
||||
? "bg-emerald-950/40 border-2 border-emerald-400/60 ring-2 ring-emerald-400/20 scale-[1.03]"
|
||||
: isBatchSelected
|
||||
? "bg-surface-sunken/95 border-2 border-accent/80 ring-2 ring-accent/30 shadow-lg shadow-accent/15"
|
||||
? "bg-surface-sunken/95 border-2 border-accent/80 ring-2 ring-accent/30 shadow-lg shadow-blue-500/15"
|
||||
: isSelected
|
||||
? "bg-surface-sunken/95 border border-accent/70 ring-1 ring-accent/30 shadow-lg shadow-accent/10"
|
||||
: "bg-surface-sunken/90 border border-line/80 hover:border-ink-soft/60 shadow-lg shadow-black/30 hover:shadow-xl hover:shadow-black/40"
|
||||
? "bg-surface-sunken/95 border border-accent/70 ring-1 ring-accent/30 shadow-lg shadow-blue-500/10"
|
||||
: "bg-surface-sunken/90 border border-line/80 hover:border-zinc-500/60 shadow-lg shadow-black/30 hover:shadow-xl hover:shadow-black/40"
|
||||
}
|
||||
backdrop-blur-sm
|
||||
focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:ring-offset-1 focus-visible:ring-offset-surface
|
||||
focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950
|
||||
${deploy.isActivelyProvisioning ? "mol-deploy-shimmer" : ""}
|
||||
${deploy.isLockedChild ? "mol-deploy-locked" : ""}
|
||||
`}
|
||||
@@ -208,45 +212,27 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
}
|
||||
}
|
||||
}}
|
||||
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-top-0.5 hover:!bg-accent hover:!h-1.5 focus-visible:!bg-accent focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface transition-all"
|
||||
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-top-0.5 hover:!bg-blue-400 hover:!h-1.5 focus-visible:!bg-blue-400 focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-blue-400/60 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950 transition-all"
|
||||
/>
|
||||
|
||||
<div className="relative px-4 py-3.5">
|
||||
<div className="relative px-3.5 py-2.5">
|
||||
{/* Header row */}
|
||||
<div className="flex items-center justify-between gap-2 mb-2.5">
|
||||
<div className="flex items-center gap-2.5 min-w-0">
|
||||
<div className={`w-2.5 h-2.5 rounded-full shrink-0 ${statusCfg.dot} ${statusCfg.glow} shadow-sm`} />
|
||||
<span className="text-[15px] font-semibold text-ink truncate leading-tight">
|
||||
<div className="flex items-center justify-between gap-2 mb-1">
|
||||
<div className="flex items-center gap-2 min-w-0">
|
||||
<div className={`w-2 h-2 rounded-full shrink-0 ${statusCfg.dot} ${statusCfg.glow} shadow-sm`} />
|
||||
<span className="text-[13px] font-semibold text-ink truncate leading-tight">
|
||||
{data.name}
|
||||
</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-1.5 shrink-0">
|
||||
{/* Model pill (concept top-right). Shortens the agent_card model to
|
||||
a family label (Opus/Sonnet/Haiku/Kimi); falls back to the raw
|
||||
last segment, then to the tier badge when no model is known. */}
|
||||
{(() => {
|
||||
const m = (data.agentCard as Record<string, unknown> | null)?.model;
|
||||
const model = typeof m === "string" && m ? m : null;
|
||||
if (!model) {
|
||||
return (
|
||||
<span className={`text-[11px] font-mono px-2 py-1 rounded-md ${tierCfg.color}`}>
|
||||
{tierCfg.label}
|
||||
</span>
|
||||
);
|
||||
}
|
||||
const label = /opus/i.test(model) ? "Opus"
|
||||
: /sonnet/i.test(model) ? "Sonnet"
|
||||
: /haiku/i.test(model) ? "Haiku"
|
||||
: /kimi/i.test(model) ? "Kimi"
|
||||
: /gpt|openai/i.test(model) ? "GPT"
|
||||
: /gemini/i.test(model) ? "Gemini"
|
||||
: (model.split(/[/:]/).pop() || model);
|
||||
return (
|
||||
<span className="text-[11px] font-mono px-2 py-1 rounded-md text-white bg-accent" title={model}>
|
||||
{label}
|
||||
</span>
|
||||
);
|
||||
})()}
|
||||
{hasChildren && (
|
||||
<span className="text-[10px] font-mono text-accent bg-accent/15 border border-accent/40 px-1.5 py-0.5 rounded-md">
|
||||
{descendantCount} sub
|
||||
</span>
|
||||
)}
|
||||
<span className={`text-[10px] font-mono px-1.5 py-0.5 rounded-md ${tierCfg.color}`}>
|
||||
{tierCfg.label}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -256,9 +242,6 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
We treat empty-string DB values as "missing" so an unbackfilled
|
||||
row falls through to the agent-card value rather than rendering
|
||||
a blank pill. */}
|
||||
{/* Role pill (concept) — uppercase, accent-bordered. Platform root
|
||||
shows "PLATFORM · ROOT"; Phase 30 external-runtime agents get the
|
||||
REMOTE marker alongside. */}
|
||||
{(() => {
|
||||
const dbRuntime = typeof data.runtime === "string" && data.runtime !== ""
|
||||
? data.runtime : null;
|
||||
@@ -266,46 +249,32 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
? (data.agentCard as Record<string, string>).runtime
|
||||
: null;
|
||||
const runtime = dbRuntime ?? cardRuntime;
|
||||
const isRemote = !!runtime && isExternalLikeRuntime(runtime);
|
||||
const isPlatformRoot = !data.parentId && hasChildren;
|
||||
const roleLabel = isPlatformRoot ? "PLATFORM · ROOT" : (data.role || null);
|
||||
if (!roleLabel && !isRemote) return null;
|
||||
if (!runtime) return null;
|
||||
return (
|
||||
<div className="mb-2.5 flex items-center gap-1.5">
|
||||
{roleLabel && (
|
||||
<span className="max-w-[220px] truncate text-[10px] font-mono uppercase tracking-[0.04em] px-2 py-1 rounded-md text-accent bg-accent/12 border border-accent/35">
|
||||
{roleLabel}
|
||||
</span>
|
||||
)}
|
||||
{isRemote && (
|
||||
<div className="mb-1 flex items-center gap-1">
|
||||
{isExternalLikeRuntime(runtime) ? (
|
||||
<span
|
||||
className="text-[10px] font-mono uppercase px-2 py-1 rounded-md text-white bg-violet-800 border border-violet-900"
|
||||
className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-white bg-violet-800 border border-violet-900"
|
||||
title="Phase 30 remote agent — runs outside this platform's Docker network. Lifecycle managed via heartbeat-based polling, not Docker exec."
|
||||
>
|
||||
★ REMOTE
|
||||
</span>
|
||||
) : (
|
||||
<span className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-ink-mid bg-surface-card border border-line">
|
||||
{runtime}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})()}
|
||||
|
||||
{/* Status line (concept) — uppercase status, "· N AGENTS" for parents,
|
||||
with a queued pill on the right. */}
|
||||
<div className="mb-2 flex items-center justify-between gap-2">
|
||||
<span className={`text-[11px] font-mono uppercase tracking-[0.04em] ${
|
||||
isOnline ? "text-good"
|
||||
: effectiveStatus === "failed" ? "text-bad"
|
||||
: (effectiveStatus === "provisioning" || effectiveStatus === "degraded") ? "text-warm"
|
||||
: "text-ink-soft"
|
||||
}`}>
|
||||
{statusCfg.label}{hasChildren ? ` · ${descendantCount} agents` : ""}
|
||||
</span>
|
||||
{data.activeTasks > 0 && (
|
||||
<span className="shrink-0 text-[11px] font-mono px-2 py-1 rounded-md text-ink-mid bg-surface-card border border-line">
|
||||
≡ {data.activeTasks} queued
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
{/* Role — clamp to 2 lines. Without this, a verbose role
|
||||
* description (common on org-template imports) lets the card
|
||||
* grow arbitrarily tall, which wrecks the grid-slot layout
|
||||
* because siblings all plan for the same CHILD_DEFAULT_HEIGHT. */}
|
||||
{data.role && (
|
||||
<div className="text-[10px] text-ink-mid mb-1.5 leading-tight line-clamp-2">{data.role}</div>
|
||||
)}
|
||||
|
||||
{/* Skills */}
|
||||
{skills.length > 0 && (
|
||||
@@ -359,7 +328,29 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
</button>
|
||||
)}
|
||||
|
||||
{/* (status + queued now rendered above, concept-style) */}
|
||||
{/* Bottom row: status / active tasks */}
|
||||
<div className="flex items-center justify-between mt-0.5">
|
||||
{effectiveStatus !== "online" ? (
|
||||
<div className={`text-[10px] uppercase tracking-widest font-medium ${
|
||||
effectiveStatus === "failed" ? "text-bad" :
|
||||
effectiveStatus === "degraded" ? "text-warm" :
|
||||
effectiveStatus === "not_configured" ? "text-warm" :
|
||||
effectiveStatus === "provisioning" ? "text-accent" :
|
||||
"text-ink-mid"
|
||||
}`}>
|
||||
{statusCfg.label}
|
||||
</div>
|
||||
) : <div />}
|
||||
|
||||
{data.activeTasks > 0 && (
|
||||
<div className="flex items-center gap-1">
|
||||
<div className="w-1 h-1 rounded-full bg-warm motion-safe:animate-pulse" />
|
||||
<span className="text-[10px] text-warm tabular-nums">
|
||||
{data.activeTasks} task{data.activeTasks > 1 ? "s" : ""}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Degraded error preview */}
|
||||
{data.status === "degraded" && data.lastSampleError && (
|
||||
@@ -404,7 +395,7 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
|
||||
}
|
||||
}
|
||||
}}
|
||||
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-bottom-0.5 hover:!bg-accent hover:!h-1.5 focus-visible:!bg-accent focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface transition-all"
|
||||
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-bottom-0.5 hover:!bg-blue-400 hover:!h-1.5 focus-visible:!bg-blue-400 focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-blue-400/60 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950 transition-all"
|
||||
/>
|
||||
</div>
|
||||
</>
|
||||
|
||||
@@ -1,195 +0,0 @@
|
||||
"use client";
|
||||
|
||||
import { useState } from "react";
|
||||
import type { Node } from "@xyflow/react";
|
||||
import {
|
||||
useCanvasStore,
|
||||
type PanelTab,
|
||||
type WorkspaceNodeData,
|
||||
} from "@/store/canvas";
|
||||
import { showToast } from "@/components/Toaster";
|
||||
import { Tooltip } from "./Tooltip";
|
||||
import { DetailsTab } from "./tabs/DetailsTab";
|
||||
import { SkillsTab } from "./tabs/SkillsTab";
|
||||
import { ChatTab } from "./tabs/ChatTab";
|
||||
import { ConfigTab } from "./tabs/ConfigTab";
|
||||
import { ContainerConfigTab } from "./tabs/ContainerConfigTab";
|
||||
import { DisplayTab } from "./tabs/DisplayTab";
|
||||
import { TerminalTab } from "./tabs/TerminalTab";
|
||||
import { FilesTab } from "./tabs/FilesTab";
|
||||
import { MemoryInspectorPanel } from "./MemoryInspectorPanel";
|
||||
import { AuditTrailPanel } from "./AuditTrailPanel";
|
||||
import { TracesTab } from "./tabs/TracesTab";
|
||||
import { EventsTab } from "./tabs/EventsTab";
|
||||
import { ActivityTab } from "./tabs/ActivityTab";
|
||||
import { ScheduleTab } from "./tabs/ScheduleTab";
|
||||
import { ChannelsTab } from "./tabs/ChannelsTab";
|
||||
|
||||
/**
|
||||
* Canonical workspace tab set — the SAME ids/labels/icons the map's
|
||||
* SidePanel has always rendered. Single source of truth so the map drawer
|
||||
* and any other host (the concierge Settings page) can't drift.
|
||||
*/
|
||||
export const WORKSPACE_PANEL_TABS: { id: PanelTab; label: string; icon: string }[] = [
|
||||
{ id: "chat", label: "Chat", icon: "◈" },
|
||||
{ id: "activity", label: "Activity", icon: "⊙" },
|
||||
{ id: "details", label: "Details", icon: "◉" },
|
||||
{ id: "skills", label: "Plugins", icon: "✦" },
|
||||
{ id: "terminal", label: "Terminal", icon: "▸" },
|
||||
{ id: "display", label: "Display", icon: "▣" },
|
||||
{ id: "container-config", label: "Container", icon: "▤" },
|
||||
{ id: "config", label: "Config", icon: "⚙" },
|
||||
{ id: "schedule", label: "Schedule", icon: "⏲" },
|
||||
{ id: "channels", label: "Channels", icon: "⇌" },
|
||||
{ id: "files", label: "Files", icon: "⊞" },
|
||||
{ id: "memory", label: "Memory", icon: "◇" },
|
||||
{ id: "traces", label: "Traces", icon: "◎" },
|
||||
{ id: "events", label: "Events", icon: "◊" },
|
||||
{ id: "audit", label: "Audit", icon: "⊟" },
|
||||
];
|
||||
|
||||
interface Props {
|
||||
/** The workspace node whose tabs to render (id + data blob). */
|
||||
node: Node<WorkspaceNodeData>;
|
||||
/**
|
||||
* Controlled active tab. When provided together with `onTabChange`, the
|
||||
* caller owns the active-tab state (the map's SidePanel threads the global
|
||||
* `panelTab`/`setPanelTab` here so the store stays the source of truth and
|
||||
* the existing keyboard/selection behaviour is preserved verbatim).
|
||||
* When omitted, the component manages its OWN local active-tab state —
|
||||
* which is what the concierge Settings page uses so the embedded tabs
|
||||
* don't fight the map's selection.
|
||||
*/
|
||||
activeTab?: PanelTab;
|
||||
onTabChange?: (tab: PanelTab) => void;
|
||||
/** Initial tab for the uncontrolled (local-state) mode. Defaults to "chat". */
|
||||
defaultTab?: PanelTab;
|
||||
}
|
||||
|
||||
/**
|
||||
* The workspace tab bar + tab body, extracted from SidePanel so it can be
|
||||
* reused verbatim outside the map (e.g. the concierge Settings "Platform
|
||||
* agent configuration" section). Renders the canonical ARIA tablist and the
|
||||
* exact same tab content components keyed on the active tab.
|
||||
*
|
||||
* Does NOT render the workspace header / meta pills / resize handle / footer —
|
||||
* those are host chrome and stay in the host (SidePanel for the map).
|
||||
*/
|
||||
export function WorkspacePanelTabs({ node, activeTab, onTabChange, defaultTab = "chat" }: Props) {
|
||||
const restartWorkspace = useCanvasStore((s) => s.restartWorkspace);
|
||||
|
||||
// Controlled when both props are present; otherwise own the state locally.
|
||||
const controlled = activeTab !== undefined && onTabChange !== undefined;
|
||||
const [localTab, setLocalTab] = useState<PanelTab>(defaultTab);
|
||||
const tab = controlled ? (activeTab as PanelTab) : localTab;
|
||||
const setTab = (next: PanelTab) => {
|
||||
if (controlled) onTabChange!(next);
|
||||
else setLocalTab(next);
|
||||
};
|
||||
|
||||
const workspaceId = node.id;
|
||||
const data = node.data;
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* Tabs — relative wrapper lets the fade gradient position against the scroll container */}
|
||||
<div className="relative border-b border-line/40">
|
||||
{/* Right-edge fade: signals more tabs are hidden off-screen when the bar overflows */}
|
||||
<div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-surface to-transparent z-10" aria-hidden="true" />
|
||||
<div
|
||||
role="tablist"
|
||||
aria-label="Workspace panel tabs"
|
||||
className="flex overflow-x-auto bg-surface-sunken/20 px-1"
|
||||
onKeyDown={(e) => {
|
||||
const idx = WORKSPACE_PANEL_TABS.findIndex((t) => t.id === tab);
|
||||
let next: number | null = null;
|
||||
if (e.key === "ArrowRight") { e.preventDefault(); next = (idx + 1) % WORKSPACE_PANEL_TABS.length; }
|
||||
else if (e.key === "ArrowLeft") { e.preventDefault(); next = (idx - 1 + WORKSPACE_PANEL_TABS.length) % WORKSPACE_PANEL_TABS.length; }
|
||||
else if (e.key === "Home") { e.preventDefault(); next = 0; }
|
||||
else if (e.key === "End") { e.preventDefault(); next = WORKSPACE_PANEL_TABS.length - 1; }
|
||||
if (next !== null) {
|
||||
setTab(WORKSPACE_PANEL_TABS[next].id);
|
||||
requestAnimationFrame(() => { const el = document.getElementById(`tab-${WORKSPACE_PANEL_TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
|
||||
}
|
||||
}}
|
||||
>
|
||||
{WORKSPACE_PANEL_TABS.map((t) => (
|
||||
<button
|
||||
type="button"
|
||||
key={t.id}
|
||||
id={`tab-${t.id}`}
|
||||
role="tab"
|
||||
aria-selected={tab === t.id}
|
||||
aria-controls={`panel-${t.id}`}
|
||||
tabIndex={tab === t.id ? 0 : -1}
|
||||
onClick={() => setTab(t.id)}
|
||||
className={`shrink-0 px-3 py-2.5 text-[10px] font-medium tracking-wide transition-all rounded-t-lg mx-0.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 ${
|
||||
tab === t.id
|
||||
? "text-ink bg-surface-card border-b-2 border-accent"
|
||||
: "text-ink-mid hover:text-ink hover:bg-surface-card/60"
|
||||
}`}
|
||||
>
|
||||
<span className="mr-1 opacity-50" aria-hidden="true">{t.icon}</span>
|
||||
{t.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Needs Restart Banner */}
|
||||
{data.needsRestart && !data.currentTask && (
|
||||
<div className="px-4 py-2 bg-sky-950/20 border-b border-sky-800/20 flex items-center justify-between">
|
||||
<span className="text-[10px] text-sky-300/90">Config changed — restart to apply</span>
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => {
|
||||
restartWorkspace(workspaceId).catch(() => showToast("Restart failed", "error"));
|
||||
}}
|
||||
className="text-[11px] px-2 py-1 bg-sky-800/40 hover:bg-sky-700/50 text-sky-200 rounded transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1"
|
||||
>
|
||||
Restart Now
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Current Task Banner */}
|
||||
{data.currentTask && (
|
||||
<Tooltip text={data.currentTask as string}>
|
||||
<div className="px-4 py-2 bg-amber-950/20 border-b border-amber-800/20 flex items-center gap-2 cursor-default">
|
||||
<div className="w-1.5 h-1.5 rounded-full bg-amber-400 motion-safe:animate-pulse shrink-0" />
|
||||
<span className="text-[10px] text-warm/90 truncate">
|
||||
{data.currentTask}
|
||||
</span>
|
||||
</div>
|
||||
</Tooltip>
|
||||
)}
|
||||
|
||||
{/* Tab Content */}
|
||||
<div
|
||||
role="tabpanel"
|
||||
id={`panel-${tab}`}
|
||||
aria-labelledby={`tab-${tab}`}
|
||||
tabIndex={0}
|
||||
className="flex-1 overflow-y-auto focus:outline-none"
|
||||
>
|
||||
{tab === "details" && <DetailsTab key={workspaceId} workspaceId={workspaceId} data={data} />}
|
||||
{tab === "skills" && <SkillsTab key={workspaceId} workspaceId={workspaceId} data={data} />}
|
||||
{tab === "activity" && <ActivityTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "chat" && <ChatTab key={workspaceId} workspaceId={workspaceId} data={data} />}
|
||||
{tab === "terminal" && <TerminalTab key={workspaceId} workspaceId={workspaceId} data={data} />}
|
||||
{tab === "display" && <DisplayTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "container-config" && (
|
||||
<ContainerConfigTab key={workspaceId} workspaceId={workspaceId} data={data} />
|
||||
)}
|
||||
{tab === "config" && <ConfigTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "schedule" && <ScheduleTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "channels" && <ChannelsTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "files" && <FilesTab key={workspaceId} workspaceId={workspaceId} data={data} />}
|
||||
{tab === "memory" && <MemoryInspectorPanel key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "traces" && <TracesTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "events" && <EventsTab key={workspaceId} workspaceId={workspaceId} />}
|
||||
{tab === "audit" && <AuditTrailPanel key={workspaceId} workspaceId={workspaceId} />}
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -385,7 +385,7 @@ describe("ContextMenu — item actions", () => {
|
||||
render(<ContextMenu />);
|
||||
fireEvent.click(screen.getByRole("menuitem", { name: /pause/i }));
|
||||
await act(async () => { /* flush */ });
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause?cascade=true", {});
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause", {});
|
||||
expect(mockStoreState.updateNodeData).toHaveBeenCalledWith("n1", { status: "paused" });
|
||||
});
|
||||
|
||||
@@ -395,7 +395,7 @@ describe("ContextMenu — item actions", () => {
|
||||
render(<ContextMenu />);
|
||||
fireEvent.click(screen.getByRole("menuitem", { name: /resume/i }));
|
||||
await act(async () => { /* flush */ });
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume?cascade=true", {});
|
||||
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume", {});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1,84 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// SaaS-mode coverage for the per-workspace cloud-provider picker. The main
|
||||
// CreateWorkspaceDialog.test.tsx runs non-SaaS (the picker is hidden and the
|
||||
// payload omits `provider`); this file forces SaaS by mocking isSaaSTenant so
|
||||
// the picker renders and the selected provider flows into compute.provider.
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
|
||||
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), post: vi.fn() },
|
||||
}));
|
||||
|
||||
// Force SaaS so the Cloud provider picker is shown and the payload carries it.
|
||||
vi.mock("@/lib/tenant", async (importOriginal) => ({
|
||||
...(await importOriginal<typeof import("@/lib/tenant")>()),
|
||||
isSaaSTenant: () => true,
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockPost = vi.mocked(api.post);
|
||||
|
||||
const SAMPLE_TEMPLATES = [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code Agent",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
providers: ["platform", "minimax"],
|
||||
models: [{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] }],
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
if (url === "/templates") return SAMPLE_TEMPLATES as any;
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return [] as any;
|
||||
});
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPost.mockResolvedValue({} as any);
|
||||
});
|
||||
|
||||
afterEach(() => cleanup());
|
||||
|
||||
async function openDialog() {
|
||||
render(<CreateWorkspaceButton />);
|
||||
const btn = screen.getAllByRole("button").find((b) => b.textContent?.includes("New Workspace"));
|
||||
fireEvent.click(btn!);
|
||||
await waitFor(() => expect(screen.getByText("Create Workspace")).toBeTruthy());
|
||||
}
|
||||
|
||||
describe("CreateWorkspaceDialog — cloud provider (SaaS)", () => {
|
||||
it("shows the Cloud provider picker, defaulting to AWS", async () => {
|
||||
await openDialog();
|
||||
const select = screen.getByLabelText("Cloud provider") as HTMLSelectElement;
|
||||
expect(select).toBeTruthy();
|
||||
expect(select.value).toBe("aws");
|
||||
});
|
||||
|
||||
it("defaults compute.provider to aws when the picker is untouched", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "AWS Agent" } });
|
||||
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.compute).toMatchObject({ provider: "aws" });
|
||||
});
|
||||
|
||||
it("threads the selected cloud provider into compute.provider", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "GCP Agent" } });
|
||||
fireEvent.change(screen.getByLabelText("Cloud provider"), { target: { value: "gcp" } });
|
||||
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.compute).toMatchObject({ provider: "gcp" });
|
||||
});
|
||||
});
|
||||
@@ -1,175 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* Regression tests for #2248 — platform-managed provider credential suppression.
|
||||
*
|
||||
* Covers:
|
||||
* - MOLECULE_LLM_USAGE_TOKEN is hidden when the selected provider is platform-managed
|
||||
* - MOLECULE_LLM_USAGE_TOKEN is still shown for BYOK providers
|
||||
* - No render churn from unstable array references (useMemo guard)
|
||||
*/
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
|
||||
import { MissingKeysModal } from "../MissingKeysModal";
|
||||
import type { ModelSpec, ProviderChoice } from "@/lib/deploy-preflight";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), put: vi.fn() },
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/deploy-preflight", async () => {
|
||||
const actual = await vi.importActual<typeof import("@/lib/deploy-preflight")>(
|
||||
"@/lib/deploy-preflight",
|
||||
);
|
||||
return actual;
|
||||
});
|
||||
|
||||
const PLATFORM_MANAGED_MODELS: ModelSpec[] = [
|
||||
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
const BYOK_MODELS: ModelSpec[] = [
|
||||
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
function makeProviders(billingMode: "platform_managed" | "byok"): ProviderChoice[] {
|
||||
const main = {
|
||||
id: billingMode === "platform_managed" ? "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" : "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: billingMode === "platform_managed" ? "Platform Anthropic" : "BYOK Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode,
|
||||
};
|
||||
// Need ≥2 providers so MissingKeysModal enters picker mode (pickerMode = providers.length > 1).
|
||||
const dummy = {
|
||||
id: "openai|OPENAI_API_KEY",
|
||||
label: "OpenAI",
|
||||
envVars: ["OPENAI_API_KEY"],
|
||||
};
|
||||
return [main, dummy];
|
||||
}
|
||||
|
||||
describe("ProviderPickerModal — platform-managed suppression (#2248)", () => {
|
||||
afterEach(() => cleanup());
|
||||
|
||||
it("hides MOLECULE_LLM_USAGE_TOKEN when provider is platform-managed", () => {
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("platform_managed")}
|
||||
models={PLATFORM_MANAGED_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
// Only ANTHROPIC_API_KEY should be rendered; MOLECULE_LLM_USAGE_TOKEN suppressed
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
});
|
||||
|
||||
it("shows MOLECULE_LLM_USAGE_TOKEN when provider is BYOK", () => {
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("byok")}
|
||||
models={BYOK_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
// Both keys visible for BYOK
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does not churn renders when the modal is open and platform-managed", () => {
|
||||
let renderCount = 0;
|
||||
|
||||
function RenderSpy({ children }: { children: React.ReactNode }) {
|
||||
renderCount++;
|
||||
return <>{children}</>;
|
||||
}
|
||||
|
||||
render(
|
||||
<RenderSpy>
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={makeProviders("platform_managed")}
|
||||
models={PLATFORM_MANAGED_MODELS}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>
|
||||
</RenderSpy>,
|
||||
);
|
||||
|
||||
const countAfterInitial = renderCount;
|
||||
|
||||
// Wait a tick — if useEffect were looping, renderCount would climb.
|
||||
// In jsdom without real timers there's no automatic re-render, so we
|
||||
// just assert the count is stable immediately after the single
|
||||
// commit required by the initial open state.
|
||||
expect(renderCount).toBe(countAfterInitial);
|
||||
expect(renderCount).toBeLessThanOrEqual(2); // StrictMode double-render ceiling
|
||||
});
|
||||
|
||||
it("updates suppression correctly when switching from BYOK to platform-managed", async () => {
|
||||
const providers: ProviderChoice[] = [
|
||||
{
|
||||
id: "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: "BYOK Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode: "byok",
|
||||
},
|
||||
{
|
||||
id: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
|
||||
label: "Platform Anthropic",
|
||||
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billingMode: "platform_managed",
|
||||
},
|
||||
{
|
||||
id: "openai|OPENAI_API_KEY",
|
||||
label: "OpenAI",
|
||||
envVars: ["OPENAI_API_KEY"],
|
||||
},
|
||||
];
|
||||
|
||||
const models: ModelSpec[] = [
|
||||
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
];
|
||||
|
||||
render(
|
||||
<MissingKeysModal
|
||||
open
|
||||
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
|
||||
providers={providers}
|
||||
models={models}
|
||||
runtime="claude-code"
|
||||
onKeysAdded={vi.fn()}
|
||||
onCancel={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
|
||||
// Default selection is providers[0] (BYOK) — both keys visible
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
|
||||
|
||||
// Switch to platform-managed provider
|
||||
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
|
||||
act(() => {
|
||||
fireEvent.change(providerSelect, {
|
||||
target: { value: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" },
|
||||
});
|
||||
});
|
||||
|
||||
// MOLECULE_LLM_USAGE_TOKEN should now be suppressed
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
|
||||
});
|
||||
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -275,9 +275,9 @@ describe("WorkspaceNode — status states", () => {
|
||||
expect(screen.getByText("STARTING")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("shows status label for online node (concept: status always visible)", () => {
|
||||
it("suppresses status label for online node", () => {
|
||||
renderNode({ status: "online" });
|
||||
expect(screen.getByText("ONLINE")).toBeTruthy();
|
||||
expect(screen.queryByText("ONLINE")).toBeNull();
|
||||
});
|
||||
|
||||
it("shows degraded error preview when status is degraded and lastSampleError is set", () => {
|
||||
@@ -404,18 +404,14 @@ describe("WorkspaceNode — double-click interactions", () => {
|
||||
});
|
||||
|
||||
describe("WorkspaceNode — active tasks", () => {
|
||||
it("shows the queued count when activeTasks > 0", () => {
|
||||
it("shows active tasks badge when activeTasks > 0", () => {
|
||||
renderNode({ activeTasks: 3 });
|
||||
expect(
|
||||
screen.getByText((_, el) => el?.tagName === "SPAN" && (el.textContent ?? "").includes("3 queued")),
|
||||
).toBeTruthy();
|
||||
expect(screen.getByText("3 tasks")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("shows the queued count for a single task", () => {
|
||||
it("shows singular 'task' when activeTasks is 1", () => {
|
||||
renderNode({ activeTasks: 1 });
|
||||
expect(
|
||||
screen.getByText((_, el) => el?.tagName === "SPAN" && (el.textContent ?? "").includes("1 queued")),
|
||||
).toBeTruthy();
|
||||
expect(screen.getByText("1 task")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("suppresses badge when no active tasks", () => {
|
||||
@@ -475,15 +471,13 @@ describe("WorkspaceNode — needs restart", () => {
|
||||
});
|
||||
|
||||
describe("WorkspaceNode — descendant badge", () => {
|
||||
it("shows the agent count in the status line when node has children", () => {
|
||||
it("shows descendant count badge when node has children in store", () => {
|
||||
store().nodes = [
|
||||
makeNode({ id: "ws-1" }),
|
||||
{ id: "child-1", data: { ...makeNode({ id: "ws-1" }).data, parentId: "ws-1" } },
|
||||
];
|
||||
renderNode();
|
||||
expect(
|
||||
screen.getByText((_, el) => el?.tagName === "SPAN" && (el.textContent ?? "").includes("1 agents")),
|
||||
).toBeTruthy();
|
||||
expect(screen.getByText("1 sub")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("suppresses badge when node has no children", () => {
|
||||
@@ -533,9 +527,9 @@ describe("WorkspaceNode — skills pills", () => {
|
||||
});
|
||||
|
||||
describe("WorkspaceNode — runtime badge", () => {
|
||||
it("shows the role pill (runtime pill replaced by role pill in the concept redesign)", () => {
|
||||
renderNode({ role: "researcher" });
|
||||
expect(screen.getByText("researcher")).toBeTruthy();
|
||||
it("shows runtime badge when runtime is set", () => {
|
||||
renderNode({ runtime: "hermes" });
|
||||
expect(screen.getByText("hermes")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("shows REMOTE badge for external runtime", () => {
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
import { describe, it, expect, vi, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, cleanup } from "@testing-library/react";
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// ── Mock every tab content component to a sentinel so we can assert which
|
||||
// body renders without dragging in API calls / heavy children. ───────────
|
||||
vi.mock("../tabs/DetailsTab", () => ({ DetailsTab: () => <div data-testid="body-details" /> }));
|
||||
vi.mock("../tabs/SkillsTab", () => ({ SkillsTab: () => <div data-testid="body-skills" /> }));
|
||||
vi.mock("../tabs/ChatTab", () => ({ ChatTab: () => <div data-testid="body-chat" /> }));
|
||||
vi.mock("../tabs/ConfigTab", () => ({ ConfigTab: () => <div data-testid="body-config" /> }));
|
||||
vi.mock("../tabs/ContainerConfigTab", () => ({ ContainerConfigTab: () => <div data-testid="body-container" /> }));
|
||||
vi.mock("../tabs/DisplayTab", () => ({ DisplayTab: () => <div data-testid="body-display" /> }));
|
||||
vi.mock("../tabs/TerminalTab", () => ({ TerminalTab: () => <div data-testid="body-terminal" /> }));
|
||||
vi.mock("../tabs/FilesTab", () => ({ FilesTab: () => <div data-testid="body-files" /> }));
|
||||
vi.mock("../MemoryInspectorPanel", () => ({ MemoryInspectorPanel: () => <div data-testid="body-memory" /> }));
|
||||
vi.mock("../tabs/TracesTab", () => ({ TracesTab: () => <div data-testid="body-traces" /> }));
|
||||
vi.mock("../tabs/EventsTab", () => ({ EventsTab: () => <div data-testid="body-events" /> }));
|
||||
vi.mock("../tabs/ActivityTab", () => ({ ActivityTab: () => <div data-testid="body-activity" /> }));
|
||||
vi.mock("../tabs/ScheduleTab", () => ({ ScheduleTab: () => <div data-testid="body-schedule" /> }));
|
||||
vi.mock("../tabs/ChannelsTab", () => ({ ChannelsTab: () => <div data-testid="body-channels" /> }));
|
||||
vi.mock("../AuditTrailPanel", () => ({ AuditTrailPanel: () => <div data-testid="body-audit" /> }));
|
||||
|
||||
vi.mock("../Tooltip", () => ({
|
||||
Tooltip: ({ children }: { children: React.ReactNode }) => <>{children}</>,
|
||||
}));
|
||||
vi.mock("@/components/Toaster", () => ({ showToast: vi.fn() }));
|
||||
|
||||
// The store is only consulted for restartWorkspace.
|
||||
const mockRestart = vi.fn(() => Promise.resolve());
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: vi.fn((selector: (s: { restartWorkspace: typeof mockRestart }) => unknown) =>
|
||||
selector({ restartWorkspace: mockRestart })
|
||||
),
|
||||
}));
|
||||
|
||||
import { WorkspacePanelTabs, WORKSPACE_PANEL_TABS } from "../WorkspacePanelTabs";
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const node: any = {
|
||||
id: "platform-1",
|
||||
data: {
|
||||
name: "Org Concierge",
|
||||
status: "online",
|
||||
tier: 0,
|
||||
role: "platform",
|
||||
parentId: null,
|
||||
needsRestart: false,
|
||||
currentTask: null,
|
||||
agentCard: null,
|
||||
},
|
||||
};
|
||||
|
||||
describe("WorkspacePanelTabs — uncontrolled (Settings usage)", () => {
|
||||
it("renders the canonical 15-tab tablist for an explicit node", () => {
|
||||
render(<WorkspacePanelTabs node={node} />);
|
||||
const tablist = screen.getByRole("tablist");
|
||||
expect(tablist.getAttribute("aria-label")).toBe("Workspace panel tabs");
|
||||
expect(screen.getAllByRole("tab").length).toBe(WORKSPACE_PANEL_TABS.length);
|
||||
expect(WORKSPACE_PANEL_TABS.length).toBe(15);
|
||||
});
|
||||
|
||||
it("defaults to the chat tab when no defaultTab is given", () => {
|
||||
render(<WorkspacePanelTabs node={node} />);
|
||||
expect(screen.getByTestId("body-chat")).toBeTruthy();
|
||||
expect(document.getElementById("tab-chat")?.getAttribute("aria-selected")).toBe("true");
|
||||
});
|
||||
|
||||
it("honours defaultTab='config' (the concierge Settings entry point)", () => {
|
||||
render(<WorkspacePanelTabs node={node} defaultTab="config" />);
|
||||
expect(screen.getByTestId("body-config")).toBeTruthy();
|
||||
expect(document.getElementById("tab-config")?.getAttribute("aria-selected")).toBe("true");
|
||||
});
|
||||
|
||||
it("clicking a tab swaps the body using local state (no store panelTab)", () => {
|
||||
render(<WorkspacePanelTabs node={node} />);
|
||||
fireEvent.click(document.getElementById("tab-channels")!);
|
||||
expect(screen.getByTestId("body-channels")).toBeTruthy();
|
||||
expect(document.getElementById("tab-channels")?.getAttribute("aria-selected")).toBe("true");
|
||||
});
|
||||
});
|
||||
|
||||
describe("WorkspacePanelTabs — controlled (SidePanel usage)", () => {
|
||||
it("renders activeTab and calls onTabChange instead of local state", () => {
|
||||
const onTabChange = vi.fn();
|
||||
render(<WorkspacePanelTabs node={node} activeTab="details" onTabChange={onTabChange} />);
|
||||
expect(screen.getByTestId("body-details")).toBeTruthy();
|
||||
fireEvent.click(document.getElementById("tab-config")!);
|
||||
expect(onTabChange).toHaveBeenCalledWith("config");
|
||||
// Controlled: body does NOT change on its own (parent owns the state).
|
||||
expect(screen.getByTestId("body-details")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("ArrowRight from chat calls onTabChange with the next tab", () => {
|
||||
const onTabChange = vi.fn();
|
||||
render(<WorkspacePanelTabs node={node} activeTab="chat" onTabChange={onTabChange} />);
|
||||
fireEvent.keyDown(screen.getByRole("tablist"), { key: "ArrowRight" });
|
||||
expect(onTabChange).toHaveBeenCalledWith("activity");
|
||||
});
|
||||
});
|
||||
@@ -188,13 +188,11 @@ describe("DropTargetBadge — renders ghost slot + badge for valid drag target",
|
||||
});
|
||||
render(<DropTargetBadge />);
|
||||
expect(screen.getByTestId("ghost-slot")).toBeTruthy();
|
||||
// Ghost spans one default child slot at zoom 2: width = CHILD_DEFAULT_WIDTH
|
||||
// (300) × 2 = 600; height = CHILD_DEFAULT_HEIGHT (176) × 2 = 352. left/top
|
||||
// are the column-0/row-0 slot origin (unchanged by the card-size bump).
|
||||
// Ghost uses slotBR from 3rd call: slotBR - slotTL = (712-232, 920-660)
|
||||
expect(screen.getByTestId("ghost-slot").style.left).toBe("232px");
|
||||
expect(screen.getByTestId("ghost-slot").style.top).toBe("660px");
|
||||
expect(screen.getByTestId("ghost-slot").style.width).toBe("600px");
|
||||
expect(screen.getByTestId("ghost-slot").style.height).toBe("352px");
|
||||
expect(screen.getByTestId("ghost-slot").style.width).toBe("480px");
|
||||
expect(screen.getByTestId("ghost-slot").style.height).toBe("260px");
|
||||
});
|
||||
|
||||
it("ghost is hidden when slot falls entirely outside parent bounds", () => {
|
||||
|
||||
@@ -325,7 +325,7 @@ describe("all shortcuts respect inInput guard", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("Cmd/Ctrl+Arrow — free-resize removed (system-controlled sizing)", () => {
|
||||
describe("Cmd/Ctrl+Arrow — keyboard node resize", () => {
|
||||
beforeEach(() => {
|
||||
mockStoreState.nodes = [
|
||||
{
|
||||
@@ -340,15 +340,81 @@ describe("Cmd/Ctrl+Arrow — free-resize removed (system-controlled sizing)", ()
|
||||
renderWithProvider();
|
||||
});
|
||||
|
||||
it("no longer resizes the node on Cmd/Ctrl+Arrow (free-resize removed)", () => {
|
||||
// Sizing is system-controlled now: leaves render fixed-size and parents
|
||||
// grow to fit their children, so Cmd/Ctrl+Arrow must not emit a
|
||||
// `dimensions` change anymore.
|
||||
it("resizes height down (smaller) on Cmd/Ctrl+ArrowUp", () => {
|
||||
// Node starts at minHeight=110 (no children). Shrinking clamps to min —
|
||||
// height stays 110. Width is unchanged.
|
||||
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true });
|
||||
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
|
||||
expect.objectContaining({
|
||||
type: "dimensions",
|
||||
id: "n1",
|
||||
dimensions: { width: 210, height: 110 },
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("resizes height up (larger) on Cmd/Ctrl+ArrowDown", () => {
|
||||
fireEvent.keyDown(window, { key: "ArrowDown", ctrlKey: true });
|
||||
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
|
||||
expect.objectContaining({
|
||||
type: "dimensions",
|
||||
id: "n1",
|
||||
dimensions: { width: 210, height: 120 },
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("resizes width down (smaller) on Cmd/Ctrl+ArrowLeft", () => {
|
||||
// Node starts at minWidth=210 (no children). Shrinking clamps to min —
|
||||
// width stays 210. Height is unchanged.
|
||||
fireEvent.keyDown(window, { key: "ArrowLeft", metaKey: true });
|
||||
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
|
||||
expect.objectContaining({
|
||||
type: "dimensions",
|
||||
id: "n1",
|
||||
dimensions: { width: 210, height: 110 },
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("resizes width up (larger) on Cmd/Ctrl+ArrowRight", () => {
|
||||
fireEvent.keyDown(window, { key: "ArrowRight", ctrlKey: true });
|
||||
expect(mockStoreState.onNodesChange).not.toHaveBeenCalled();
|
||||
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
|
||||
expect.objectContaining({
|
||||
type: "dimensions",
|
||||
id: "n1",
|
||||
dimensions: { width: 220, height: 110 },
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("uses 2px step with Shift held", () => {
|
||||
// Step is 2px with Shift, but minHeight=110 clamps the result.
|
||||
// 110 - 2 = 108, Math.max(110, 108) = 110. Width is unchanged.
|
||||
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true, shiftKey: true });
|
||||
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
|
||||
expect.objectContaining({
|
||||
dimensions: { width: 210, height: 110 },
|
||||
}),
|
||||
]);
|
||||
});
|
||||
|
||||
it("respects min-height constraint (no children)", () => {
|
||||
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true });
|
||||
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true });
|
||||
// After shrinking from 110 to 100, another ArrowUp hits min-height of 110
|
||||
// (110 - 10 = 100, but 100 < 110 so it should stay at 110)
|
||||
// Actually: 110 -> 100 -> 110 (resets to min)
|
||||
// Let me check: the hook does Math.max(minHeight, currentHeight - step)
|
||||
// minHeight=110, step=10, so 110 - 10 = 100, but Math.max(110, 100) = 110
|
||||
// So two ArrowUp calls should both result in height=100 then height=110?
|
||||
// Wait: 110 - 10 = 100, Math.max(110, 100) = 110 (not 100)
|
||||
// So the height never goes below 110. After first: 110 -> 100, but clamped to 110.
|
||||
// Actually Math.max(110, 100) = 110, so the height never changes.
|
||||
// The min constraint is respected — height stays at 110.
|
||||
expect(mockStoreState.onNodesChange).toHaveBeenLastCalledWith([
|
||||
expect.objectContaining({ dimensions: { width: 210, height: 110 } }),
|
||||
]);
|
||||
});
|
||||
|
||||
it("does NOT fire when no node is selected", () => {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user