Compare commits

..

2 Commits

Author SHA1 Message Date
devops-engineer 81630a36f8 Merge branch 'main' into test/delegate-record-db-errors
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 6s
CI / Python Lint & Test (pull_request) Successful in 5s
CI / Detect changes (pull_request) Successful in 13s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 9s
CI / Canvas (Next.js) (pull_request) Successful in 4s
E2E Chat / detect-changes (pull_request) Successful in 16s
E2E API Smoke Test / detect-changes (pull_request) Successful in 21s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 8s
Harness Replays / detect-changes (pull_request) Successful in 13s
CI / Canvas Deploy Status (pull_request) Has been skipped
Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 8s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 17s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 24s
E2E Chat / E2E Chat (pull_request) Successful in 5s
Harness Replays / Harness Replays (pull_request) Successful in 6s
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 18s
security-review / approved (pull_request_target) Failing after 11s
gate-check-v3 / gate-check (pull_request_target) Failing after 16s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 19s
qa-review / approved (pull_request_target) Failing after 14s
sop-checklist / review-refire (pull_request_target) Has been skipped
sop-checklist / all-items-acked (pull_request) [info tier:low] acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, l
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 8s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request_target) Successful in 6s
sop-tier-check / tier-check (pull_request_target) Failing after 7s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m8s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m17s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 2m8s
CI / Platform (Go) (pull_request) Successful in 4m10s
CI / all-required (pull_request) Successful in 4s
2026-06-06 18:50:49 +00:00
fullstack-engineer a60033dc16 test(handlers): add missing DB-error tests for Record and SessionSearch
E2E API Smoke Test / E2E API Smoke Test (pull_request) Blocked by required conditions
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Blocked by required conditions
Harness Replays / Harness Replays (pull_request) Blocked by required conditions
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Waiting to run
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Waiting to run
lint-mask-pr-atomicity / lint-mask-pr-atomicity (pull_request) Waiting to run
publish-runtime-autobump / bump-and-tag (pull_request) Waiting to run
MCP Stdio Transport Regression / MCP stdio with regular-file stdout (pull_request) Successful in 3m8s
publish-runtime-autobump / pr-validate (pull_request) Successful in 1m22s
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 2m11s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 2m41s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 3m6s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Failing after 1m59s
audit-force-merge / audit (pull_request) Waiting to run
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Blocked by required conditions
Runtime PR-Built Compatibility / PR-built wheel + import smoke (pull_request) Blocked by required conditions
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 34s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 43s
CI / Detect changes (pull_request) Successful in 1m13s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 24s
E2E API Smoke Test / detect-changes (pull_request) Successful in 1m16s
Harness Replays / detect-changes (pull_request) Successful in 53s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 1m57s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 38s
Runtime PR-Built Compatibility / detect-changes (pull_request) Successful in 58s
qa-review / approved (pull_request) Failing after 48s
gate-check-v3 / gate-check (pull_request) Failing after 55s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m51s
security-review / approved (pull_request) Failing after 41s
sop-tier-check / tier-check (pull_request) Successful in 40s
CI / Python Lint & Test (pull_request) Successful in 8m14s
CI / Canvas (Next.js) (pull_request) Successful in 19m47s
CI / Platform (Go) (pull_request) Successful in 21m16s
CI / all-required (pull_request) Successful in 21m27s
CI / Canvas Deploy Reminder (pull_request) Successful in 8s
sop-checklist / all-items-acked (pull_request) [info tier:low] acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, l
- TestDelegationRecord_DBInsertFails: verifies 500 on activity_logs insert failure
- TestSessionSearch_DBError: verifies 500 on WITH query failure

Both are regression coverage for error paths that lacked test coverage.

🤖 Generated with [Claude Code](https://claude.ai/claude-code)
2026-05-15 07:38:15 +00:00
232 changed files with 3356 additions and 20104 deletions
-247
View File
@@ -1,247 +0,0 @@
#!/usr/bin/env python3
"""
SSOT fail-closed approval validator (SEV-1 internal#812).
This module is the SINGLE source of truth for whether a Gitea review counts
as a "genuine" approval. Both consumers must call into it — they MUST NOT
duplicate the predicate:
- .gitea/scripts/gitea-merge-queue.py (Python) — imports directly.
- .gitea/scripts/review-check.sh (bash, jq) — calls the Python helper
at .gitea/scripts/_review_check_filter.py, which in turn calls this
module. There is no separate jq / bash copy of the predicate; a
reviewer who wants to weaken the gate has to weaken this one file.
# The fail-closed contract
A review counts as a GENUINE APPROVED on the current head ONLY IF ALL hold:
1. state == "APPROVED"
2. official == true
3. dismissed != true
4. stale != true
5. commit_id is present and equals the PR's current head SHA
ANY failure of any of the above → REJECT.
# The bug this fixes
The previous gitea-merge-queue.py predicate had a `if isinstance(commit_id,
str) and commit_id and headsha:` guard that *skipped* the commit_id check
when the review carried no commit_id. The previous review-check.sh jq
filter required `commit_id == $head`, which is also implicitly fail-closed
on missing commit_id (null != head), but only one of the two consumers
behaved correctly — a code-drift trap.
Both behaviors are now defined here, as a single fail-closed predicate.
A MISSING commit_id is the Gitea row signature of a spoofed or pre-commit
review: a real reviewer cannot have submitted against a commit that
doesn't exist. Accepting these is exactly the fail-open that SEV-1
internal#812 describes and the re-opened path that closed #843 (with CR2
+ Researcher both flagging it) addresses.
# Mutation-resistance
The unit tests in tests/test_approval_validator.py assert rejection
explicitly for each fail-closed case (missing commit_id, stale head,
non-official, dismissed, etc.). A reviewer who tries to weaken the
predicate by removing the commit_id check, by re-introducing the
"no commit_id is accepted" escape hatch, or by changing `!=` to `==`
in the head comparison will trip those tests in CI.
"""
from __future__ import annotations
from typing import Iterable, Optional, Tuple
# ---------------------------------------------------------------------------
# Canonical Gitea review-state enum (EXACT match -- no case coercion).
# ---------------------------------------------------------------------------
#
# Gitea's reviews API emits review.state as one of a fixed set of
# UPPERCASE string constants: "APPROVED", "REQUEST_CHANGES",
# "REQUEST_REVIEW", "COMMENT", "PENDING", "DISMISSED" (verified
# against the live API across real molecule-core PRs). They are ALWAYS
# uppercase on the wire.
#
# FAIL-CLOSED: we compare review.state to these constants with EXACT
# equality. The previous code used str(state or "").upper(), which
# coerced a lowercase/mixed-case "approved" or "request_changes" into
# the canonical value and ACCEPTED it. A real Gitea row never carries a
# lowercase state, so a case-variant value is the signature of a
# hand-forged / spoofed row, not a legitimate review. Coercing it was a
# residual fail-open (SEV-1 internal#812, RCs 9849/9851/9852). We reject
# anything that is not byte-for-byte the canonical constant.
STATE_APPROVED = "APPROVED"
STATE_REQUEST_CHANGES = "REQUEST_CHANGES"
# ---------------------------------------------------------------------------
# Shared predicate — fail-closed on every condition
# ---------------------------------------------------------------------------
def is_official_current_head(review: object, headsha: object) -> bool:
"""Common predicate: review is official, not dismissed, not stale, and
bound to the PR's current head SHA. EVERY condition is mandatory and
fail-closed. Both is_genuine_approval and is_open_request_changes build
on this so the rule cannot drift between the two cases.
`official` is checked with `is not True` (NOT `not review.get("official")`).
The latter is truthy on the string "false" or the integer 1, which is
exactly the fail-open surface we are closing here — a non-boolean
pass-through is treated as official. Gitea emits a real boolean, so
the stricter check rejects anything that isn't literally True.
"""
if not isinstance(review, dict):
return False
if review.get("official") is not True:
return False
if review.get("dismissed"):
return False
if review.get("stale"):
return False
commit_id = review.get("commit_id")
# FAIL-CLOSED: a missing/empty/non-string commit_id is REJECTED. The
# previous code had `if isinstance(commit_id, str) and commit_id and
# headsha:` which SKIPPED the check when the review carried no
# commit_id. That was the spoof-bug surface.
if not isinstance(commit_id, str) or not commit_id:
return False
# FAIL-CLOSED: a present-but-wrong commit_id is also REJECTED. Stale
# reviews (on a previous head) cannot count.
if not isinstance(headsha, str) or not headsha or commit_id != headsha:
return False
return True
# ---------------------------------------------------------------------------
# Per-verdict predicates
# ---------------------------------------------------------------------------
def is_genuine_approval(
review: object,
*,
headsha: str,
reviewer_set: Optional[Iterable[str]] = None,
) -> bool:
"""Return True iff `review` is a genuine APPROVED on the current head.
When `reviewer_set` is provided, the review's `user.login` must be in
the set (the merge-queue uses this to count only "recognised"
reviewers for the 2-genuine floor; review-check.sh applies its own
team-membership probe separately and so does not pass a set).
"""
if not isinstance(review, dict):
return False
# EXACT-ENUM (fail-closed): no .upper()/.strip() coercion. A
# case-variant or whitespace-padded state is a forged row and is
# rejected, not normalised into APPROVED.
if review.get("state") != STATE_APPROVED:
return False
if not is_official_current_head(review, headsha):
return False
if reviewer_set is not None:
user = (review.get("user") or {}).get("login")
if not isinstance(user, str) or user not in set(reviewer_set):
return False
return True
def is_open_request_changes(review: object, *, headsha: str) -> bool:
"""Return True iff `review` is an open official REQUEST_CHANGES on the
current head. Same fail-closed contract as is_genuine_approval —
a missing commit_id is REJECTED, not silently treated as 'still
blocking the merge from an old head'.
"""
if not isinstance(review, dict):
return False
# EXACT-ENUM (fail-closed): same contract as is_genuine_approval. A
# lowercase/mixed-case "request_changes" must NOT be coerced into a
# block-erasing match; an exact REQUEST_CHANGES is required.
if review.get("state") != STATE_REQUEST_CHANGES:
return False
if not is_official_current_head(review, headsha):
return False
return True
# ---------------------------------------------------------------------------
# Consumer-facing reducer (returns the two call sites need)
# ---------------------------------------------------------------------------
def classify_reviews(
reviews: Iterable[object],
*,
headsha: str,
reviewer_set: Optional[Iterable[str]] = None,
) -> Tuple[set[str], list[str]]:
"""Reduce a PR's reviews to (approvers, request_changes) on the CURRENT head.
approvers: distinct logins whose LATEST official review on the current
head is APPROVED.
request_changes: distinct logins whose LATEST official review on the
current head is REQUEST_CHANGES.
Gitea returns reviews oldest-first. We keep the latest *VALID*
submission per user (later VALID entries overwrite earlier ones; an
invalid later row — a COMMENT, or a review with a null/old commit_id —
is ignored and can NOT overwrite or erase a genuine review). See the
inline VALIDATE-BEFORE-REDUCE note below for the exploit this closes.
"""
reviewer_set_set = set(reviewer_set) if reviewer_set is not None else None
# VALIDATE-BEFORE-REDUCE (SEV-1 internal#812 follow-up).
#
# The earlier implementation reduced FIRST (latest row per user, keyed
# only on state in {APPROVED, REQUEST_CHANGES}) and validated the single
# surviving row AFTER. That is reduce-before-validate, and it is
# exploitable: a user posts a genuine current-head APPROVED (or
# REQUEST_CHANGES), then posts a LATER row that fails the fail-closed
# predicate (a COMMENT, or an APPROVED with a null/old commit_id). The
# later INVALID row overwrote the genuine one in latest_by_user, so a
# real approval was masked, and — worse — a real current-head
# REQUEST_CHANGES could be erased and the block silently evaporate.
#
# The fix: filter to VALID reviews FIRST (each row must pass
# is_official_current_head AND carry an APPROVED/REQUEST_CHANGES state),
# and only then reduce to the latest VALID review per user. An invalid
# later row is never eligible to become a user's "latest" state, so it
# cannot overwrite or erase a genuine review. A user's verdict is the
# state of their latest VALID (official, current-head, non-dismissed,
# non-stale, commit_id-present-and-matching) review.
latest_valid_by_user: dict = {}
for review in reviews:
if not isinstance(review, dict):
continue
user = (review.get("user") or {}).get("login")
if not isinstance(user, str):
continue
if reviewer_set_set is not None and user not in reviewer_set_set:
continue
# EXACT-ENUM (fail-closed): exact constants only, no coercion. A
# case-coerced row must not become eligible to overwrite/erase a
# genuine per-user verdict in the reduce below.
state = review.get("state")
if state not in (STATE_APPROVED, STATE_REQUEST_CHANGES):
continue
# Fail-closed predicate BEFORE the reduce: official, not dismissed,
# not stale, commit_id present AND == head. Invalid rows are dropped
# here and so can never become the per-user "latest".
if not is_official_current_head(review, headsha):
continue
latest_valid_by_user[user] = review
approvers: set[str] = set()
request_changes: list[str] = []
for user, review in latest_valid_by_user.items():
# Each surviving review already passed is_official_current_head, so
# the state alone determines the verdict. We still go through the
# per-verdict SSOT predicates so the rule cannot drift.
if is_genuine_approval(review, headsha=headsha, reviewer_set=None):
approvers.add(user)
elif is_open_request_changes(review, headsha=headsha):
request_changes.append(user)
return approvers, request_changes
-74
View File
@@ -1,74 +0,0 @@
#!/usr/bin/env python3
"""
Helper for review-check.sh: applies the SSOT approval predicate to a
PR's reviews and prints the candidate approver logins on stdout (one per
line, de-duplicated, author excluded).
review-check.sh uses this in place of its previous inline jq filter so the
predicate is single-sourced. The jq filter is gone; if you want to change
the predicate, edit .gitea/scripts/_approval_validator.py, not this file.
Usage:
python3 _review_check_filter.py <reviews.json> <head-sha> <author-login>
Output:
- Candidate approver logins, one per line, de-duplicated, sorted.
- Excludes `author-login` (the PR author cannot approve their own PR).
- Empty output → review-check.sh interprets as "no candidates" and exits 1
after the team-membership probe.
"""
from __future__ import annotations
import json
import sys
from pathlib import Path
# Same-dir import — script lives next to _approval_validator.py
sys.path.insert(0, str(Path(__file__).resolve().parent))
from _approval_validator import is_genuine_approval # noqa: E402
def main(argv: list[str]) -> int:
if len(argv) != 4:
print(
f"usage: {argv[0] if argv else '_review_check_filter.py'} "
"<reviews.json> <head-sha> <author-login>",
file=sys.stderr,
)
return 2
reviews_path = Path(argv[1])
headsha = argv[2]
author = argv[3]
try:
reviews = json.loads(reviews_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
print(f"::error::could not read reviews JSON: {exc}", file=sys.stderr)
return 2
if not isinstance(reviews, list):
print("::error::reviews JSON was not a list", file=sys.stderr)
return 2
candidates: set[str] = set()
for review in reviews:
# We pass reviewer_set=None here because review-check.sh applies its
# own team-membership probe (CURL_AUTH_FILE + 200/204/403/404 logic)
# separately. The SSOT predicate enforces only the fail-closed
# commit_id / state / official / dismissed / stale contract here.
if not is_genuine_approval(review, headsha=headsha, reviewer_set=None):
continue
user = (review.get("user") or {}).get("login")
if not isinstance(user, str) or not user:
continue
if user == author:
continue
candidates.add(user)
for user in sorted(candidates):
print(user)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
+21 -99
View File
@@ -31,7 +31,7 @@
#
# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
# JSON variable is not set. Declared in the workflow YAML rather than
# fetched from /branch_protections (which needs admin scope —
# fetched from /branch_protections (which needs admin scope — sop-tier-bot
# has read-only). Trade dynamism for simplicity: when the required-check
# set changes, update both branch protection AND this env. Keeping them
# in sync is less complexity than granting the audit bot admin perms on
@@ -54,57 +54,32 @@ API="https://${GITEA_HOST}/api/v1"
AUTH="Authorization: token ${GITEA_TOKEN}"
# 1. Fetch the PR. If not merged, no-op.
# Fail-closed: verify HTTP 200 before parsing. A 401/403/404 means the token
# is invalid or the PR is inaccessible — we must NOT silently treat that as
# "not merged" and skip the audit.
PR_TMP=$(mktemp)
PR_HTTP=$(curl -sS -o "$PR_TMP" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
PR=$(cat "$PR_TMP")
rm -f "$PR_TMP"
if [ "$PR_HTTP" != "200" ]; then
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${PR_HTTP} — cannot evaluate merge state."
exit 1
fi
# FAIL-CLOSED: a 200 response with a missing/malformed `merged` field must
# NOT be treated as "not merged" (that would silently skip the audit).
# We verify both presence AND correct type for every field we consume.
PR_SCHEMA_OK=$(echo "$PR" | jq -r '
(.merged | type == "boolean") and
(.merge_commit_sha | type == "string") and
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
(.base | type == "object") and (.base.ref | type == "string") and
(.head | type == "object") and (.head.sha | type == "string")
')
if [ "$PR_SCHEMA_OK" != "true" ]; then
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP 200 but one or more required fields are missing, null, or of wrong type — cannot evaluate force-merge."
exit 1
fi
MERGED=$(echo "$PR" | jq -r '.merged')
PR=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
MERGED=$(echo "$PR" | jq -r '.merged // false')
if [ "$MERGED" != "true" ]; then
echo "::notice::PR #${PR_NUMBER} closed without merge — no audit emission."
exit 0
fi
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha')
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login')
# NOTE: no || true — with set -euo pipefail, jq parse failures (e.g. field
# missing from API response) propagate as hard errors. Use jq's // operator
# for graceful defaults instead of bash || true guards. This was re-added by
# 8c343e3a ("fix(gitea): add || true guards to jq pipelines") — reverted
# here because the guards mask silent failures that hide malformed API responses.
MERGE_SHA=$(echo "$PR" | jq -r '.merge_commit_sha // empty')
MERGED_BY=$(echo "$PR" | jq -r '.merged_by.login // "unknown"')
TITLE=$(echo "$PR" | jq -r '.title // ""')
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref')
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha')
BASE_BRANCH=$(echo "$PR" | jq -r '.base.ref // "main"')
HEAD_SHA=$(echo "$PR" | jq -r '.head.sha // empty')
if [ -z "$MERGE_SHA" ]; then
echo "::warning::PR #${PR_NUMBER} merged=true but no merge_commit_sha — cannot evaluate force-merge."
exit 0
fi
# 2. Required status checks — branch-aware JSON dict takes precedence.
if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
# FAIL-CLOSED: if REQUIRED_CHECKS_JSON is set, the branch entry must exist
# and be an array. A missing branch or non-array value means the config is
# malformed or drifted — we must NOT silently treat it as "no checks".
_RC_JSON_OK=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '
has($branch) and (.[$branch] | type == "array")
')
if [ "$_RC_JSON_OK" != "true" ]; then
echo "::error::REQUIRED_CHECKS_JSON missing or non-array entry for branch '$BASE_BRANCH' — cannot evaluate required checks."
exit 1
fi
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] | .[]')
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
else
REQUIRED="$REQUIRED_CHECKS"
fi
@@ -116,65 +91,12 @@ fi
# 3. Status-check state at the PR HEAD (where checks ran). The merge
# commit doesn't get its own checks; we evaluate the PR's last
# commit, which is what branch protection compared against.
#
# Pagination (status-pagination RCA, #2440-family): the combined
# /commits/{sha}/status endpoint caps its embedded `statuses` array at the
# Gitea default page size (~30). On a high-churn PR an older-but-still-current
# required-context SUCCESS row is pushed PAST that cap, so reading the combined
# view would record that context as `missing` and emit a FALSE-POSITIVE
# force-merge. We instead page through the dedicated /commits/{sha}/statuses
# list to EXHAUSTION (until a short/empty page), accumulating every row.
#
# Fail-closed is preserved end to end: any non-200 page, or a page whose body
# is not a JSON array, aborts with exit 1 (we never treat an unreadable/partial
# page as "no checks"). A genuinely-absent required context appears on NO page,
# so CHECK_STATE has no entry for it → `${...:-missing}` below keeps it
# `missing` → it is still counted as not-green. No fail-open path is added.
PER_PAGE=100
page=1
ALL_STATUSES_TMP=$(mktemp)
printf '[]' > "$ALL_STATUSES_TMP" # accumulator: a single JSON array of rows
while :; do
STATUS_TMP=$(mktemp)
STATUS_HTTP=$(curl -sS -o "$STATUS_TMP" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/statuses?page=${page}&limit=${PER_PAGE}")
PAGE_BODY=$(cat "$STATUS_TMP")
rm -f "$STATUS_TMP"
if [ "$STATUS_HTTP" != "200" ]; then
rm -f "$ALL_STATUSES_TMP"
echo "::error::GET /commits/${HEAD_SHA}/statuses?page=${page} returned HTTP ${STATUS_HTTP} — cannot evaluate required checks."
exit 1
fi
# FAIL-CLOSED: the /statuses endpoint returns a bare JSON array. A non-array
# body (null/object/string) means the response is malformed — we must NOT
# treat that as "no checks", which would silently declare all checks green.
if ! echo "$PAGE_BODY" | jq -e 'type == "array"' >/dev/null 2>&1; then
rm -f "$ALL_STATUSES_TMP"
echo "::error::GET /commits/${HEAD_SHA}/statuses?page=${page} returned HTTP 200 but body is not a JSON array — cannot evaluate required checks."
exit 1
fi
PAGE_COUNT=$(echo "$PAGE_BODY" | jq 'length')
# Append this page's rows to the accumulator (insertion order is preserved
# but NOT relied upon — the collapse below selects max-by-id per context).
COMBINED=$(jq -s '.[0] + .[1]' "$ALL_STATUSES_TMP" <(echo "$PAGE_BODY"))
printf '%s' "$COMBINED" > "$ALL_STATUSES_TMP"
# Short page (fewer than PER_PAGE rows) ⇒ last page ⇒ stop.
if [ "$PAGE_COUNT" -lt "$PER_PAGE" ]; then
break
fi
page=$((page + 1))
done
STATUS=$(cat "$ALL_STATUSES_TMP")
rm -f "$ALL_STATUSES_TMP"
STATUS=$(curl -sS -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/commits/${HEAD_SHA}/status")
declare -A CHECK_STATE
# Gitea's /commits/{sha}/statuses is roughly newest-first but NOT strictly
# monotonic by id (observed first ids 157,155,156,… — local inversions from
# re-runs and page boundaries), so neither first- nor last-occurrence reliably
# yields the current row. Select the MAX-id row per context explicitly
# (order-independent), matching prod-auto-deploy.py's latest_status_for_context.
while IFS=$'\t' read -r ctx state; do
[ -n "$ctx" ] && CHECK_STATE[$ctx]="$state"
done < <(echo "$STATUS" | jq -r 'group_by(.context) | map(max_by(.id)) | .[] | "\(.context)\t\(.status)"')
done < <(echo "$STATUS" | jq -r '.statuses // [] | .[] | "\(.context)\t\(.status)"')
# 4. For each required check, was it green at merge? YAML block scalars
# (`|`) leave a trailing newline; skip blank/whitespace-only lines.
+16 -53
View File
@@ -317,33 +317,7 @@ def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
)
sys.exit(3)
# Fail-closed validation: every entry must be a non-empty string.
# Reject null, int, dict, or empty/whitespace strings silently —
# they indicate a malformed manifest that drift-detect must not
# normalize away (that would hide config errors).
validated: set[str] = set()
for idx, item in enumerate(branch_checks):
if not isinstance(item, str):
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
f"{type(item).__name__} (value={item!r}), expected str\n"
)
sys.exit(3)
stripped = item.strip()
if not stripped:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'][{idx}] is "
f"empty/whitespace string\n"
)
sys.exit(3)
if stripped in validated:
sys.stderr.write(
f"::error::REQUIRED_CHECKS_JSON['{branch}'] contains "
f"duplicate context '{stripped}' at index {idx}\n"
)
sys.exit(3)
validated.add(stripped)
return validated
return {str(item).strip() for item in branch_checks if str(item).strip()}
# Legacy variant fallback.
if found_legacy:
@@ -578,34 +552,23 @@ def find_open_issue(title: str) -> dict | None:
hourly; failing one cycle loudly is strictly better than silently
duplicating.
Paginates through all open issues (limit=50 per page) until the
title is found or the result set is exhausted. Previously only one
page was fetched, causing duplicate [ci-drift] issues when the
existing tracking issue fell beyond page 1.
Gitea issue search returns at most page=50 per page; one page is
enough as long as `[ci-drift]` issues are a tiny minority. (See
follow-up issue for Link-header pagination.)
"""
page = 1
while True:
_, results = api(
"GET",
f"/repos/{OWNER}/{NAME}/issues",
query={
"state": "open",
"type": "issues",
"limit": "50",
"page": str(page),
},
_, results = api(
"GET",
f"/repos/{OWNER}/{NAME}/issues",
query={"state": "open", "type": "issues", "limit": "50"},
)
if not isinstance(results, list):
raise ApiError(
f"issue search returned non-list body (got {type(results).__name__})"
)
if not isinstance(results, list):
raise ApiError(
f"issue search returned non-list body (got {type(results).__name__})"
)
for issue in results:
if issue.get("title") == title:
return issue
# Fewer than limit results means last page reached.
if len(results) < 50:
return None
page += 1
for issue in results:
if issue.get("title") == title:
return issue
return None
def render_body(branch: str, findings: list[str], debug: dict) -> str:
-11
View File
@@ -26,21 +26,10 @@ PROFILES: dict[str, dict[str, str]] = {
"handlers": (
r"^workspace-server/internal/handlers/"
r"|^workspace-server/internal/wsauth/"
# #2148: registry-auth real-PG integration tests (CanCommunicate
# parent_id hierarchy lives in internal/registry; org-admin token
# revoke/validate lives in internal/orgtoken) run in this same
# workflow, so a regression in either package MUST trigger the job.
r"|^workspace-server/internal/registry/"
r"|^workspace-server/internal/orgtoken/"
# #2149: the scheduler real-PG integration tests run in this same
# workflow (they reuse its migrated Postgres), so changes to the
# scheduler package must trigger the job too.
r"|^workspace-server/internal/scheduler/"
# #2150: the db package's real-PG migration-replay-from-scratch
# + InitPostgres ping tests also run in this same workflow (they
# reuse its sibling Postgres, against a separate `molecule_replay`
# database). Changes to db must trigger the job too.
r"|^workspace-server/internal/db/"
r"|^workspace-server/migrations/"
r"|^\.gitea/workflows/handlers-postgres-integration\.yml$"
),
+111 -260
View File
@@ -9,43 +9,27 @@ queue. This script provides the missing serialized policy in user space:
candidate (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals,
or red required CI) is SKIPPED so it cannot head-of-line block newer ready
PRs; the scan continues to the next candidate.
2. Refuse to act unless main's BP-required contexts are green. This is also
the serialized backstop for direct-merge (see below): after a direct merge,
main re-runs push CI and this gate PAUSES the queue if main goes red, so no
merge piles onto an unverified/red main (issue #2358).
2. Refuse to act unless main's BP-required contexts are green.
3. Refuse fork PRs; the queue may only mutate same-repo branches.
4. DIRECT-MERGE when conflict-free (issue #2358). When Gitea reports the PR
conflict-free (mergeable is True) and the merge bar below is met, MERGE IT
DIRECTLY — even if its head does not contain current main. We do NOT call
/pulls/{n}/update first: branch protection does not require strict
up-to-date, so behind-main conflict-free PRs merge cleanly, and calling
/update would trigger Gitea dismiss_stale_approvals (dismissing the genuine
approvals and forcing a re-review every tick — the rebase-churn bottleneck).
The /update path is used ONLY when the PR is DEFINITIVELY not mergeable
(mergeable is literal False) AND its head lacks current main — refreshing the
branch may resolve a behind-main non-conflict; a real conflict returns HTTP
409 and the PR is HELD per #2352. mergeable=None/missing (Gitea STILL
COMPUTING conflict state) is a distinct fail-closed WAIT: never merged AND
never /update'd — calling /update during the compute window would dismiss the
PR's genuine approvals (dismiss_stale_approvals) and re-introduce the exact
rebase-churn this queue eliminates. None is re-checked next tick.
4. If the PR branch does not contain current main, call Gitea's
/pulls/{n}/update endpoint and stop. CI must rerun on the updated head.
5. Merge ONLY when, on the PR's CURRENT head sha:
- >= REQUIRED_APPROVALS distinct GENUINE official APPROVED reviews from
the recognised reviewer set (not stale, not dismissed, commit_id ==
current head), AND
- no open official REQUEST_CHANGES on the current head, AND
- every BP-required status context is green, AND
- the PR is mergeable (Gitea reports it conflict-free).
- the PR is mergeable.
Authoritative gates (fail-closed):
- The REQUIRED status contexts come from BRANCH PROTECTION
(`status_check_contexts`) PLUS the hardcoded governance checks
(qa-review, security-review, sop-checklist). If branch protection
cannot be enumerated, the queue HOLDS (does not merge blindly).
- NON-required reds (E2E Chat, Staging SaaS, ci-arm64-advisory, any
(`status_check_contexts`), not a hand-maintained env list. If branch
protection cannot be enumerated, the queue HOLDS (does not merge blindly).
- NON-required reds (qa-review, security-review, sop-tier, sop-checklist
when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
continue-on-error job) MUST NOT block. They are reported, never gating.
- `force_merge=true` is used ONLY when the merge is blocked *solely* by
missing-but-non-required advisory contexts (required are green + genuine
missing-but-non-required governance contexts (required are green + genuine
approvals present). It is NEVER used to bypass a failing REQUIRED context
or missing approvals.
@@ -105,12 +89,6 @@ import urllib.parse
import urllib.request
from typing import Any
# SSOT fail-closed approval predicate (SEV-1 internal#812). review-check.sh
# consumes the same module via _review_check_filter.py — do NOT duplicate
# the predicate here. See _approval_validator.py for the fail-closed contract.
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from _approval_validator import classify_reviews as _classify_reviews_ssot # noqa: E402
def _env(key: str, *, default: str = "") -> str:
return os.environ.get(key, default)
@@ -150,28 +128,6 @@ OPT_OUT_LABELS = {
).split(",")
if name.strip()
} | ({HOLD_LABEL} if HOLD_LABEL else set())
# Governance checks that are ALWAYS required for every PR, regardless of
# branch-protection configuration. These are the uniform-gate checks that
# must pass before any PR can merge (SOP tier removal makes them mandatory
# for all PRs, not just tier:medium/tier:high).
#
# Context names use the (pull_request_target) suffix (not pull_request)
# to match the workflow event_type that actually emits them — verified
# live against PR#2419/#2331/etc.: the qa-review/security-review
# workflows run on pull_request_target (their `on:` block uses
# pull_request_target, not pull_request), and sop-checklist's
# all-items-acked job also uses pull_request_target. The previous
# (pull_request) suffix never matched the live emitted contexts,
# which is what was painting ~16 ready PRs red (gate appeared
# "missing" qa-review/security-review even after both passed).
# Verified against the lint-bp-context-emit-match test which already
# asserts (pull_request_target) for these names. No requirement
# dropped; just a name correction.
GOVERNANCE_REQUIRED_CONTEXTS = [
"qa-review / approved (pull_request_target)",
"security-review / approved (pull_request_target)",
"sop-checklist / all-items-acked (pull_request_target)",
]
REQUIRED_CONTEXTS_RAW = _env(
"REQUIRED_CONTEXTS",
default=(
@@ -365,15 +321,41 @@ def latest_statuses_by_context(statuses: list[dict]) -> dict[str, dict]:
return latest
def _is_tier_low_pending_ok(
latest_statuses: dict[str, dict],
context: str,
pr_labels: set[str],
) -> bool:
"""Return True if tier:low PR can tolerate sop-checklist pending state.
GENERIC PENDING-AS-GREEN REMOVED (Researcher + CR2 RC on #2368):
The prior soft-fail accepted ANY pending sop-checklist for tier:low,
which allowed required checks to pass without genuine verification.
Pending required sop-checklist must now always HOLD and appear in
missing_or_bad. This function is retained as a policy hook but
currently always returns False so pending never counts green.
If a positively identifiable genuine soft-fail state is defined in
future (e.g., a specific check-run conclusion), implement it here
with strict positive identification — never default to pass.
"""
return False
def required_contexts_green(
latest_statuses: dict[str, dict],
contexts: list[str],
pr_labels: set[str] | None = None,
) -> tuple[bool, list[str]]:
missing_or_bad: list[str] = []
for context in contexts:
status = latest_statuses.get(context)
state = status_state(status or {})
if state != "success":
if pr_labels and _is_tier_low_pending_ok(
latest_statuses, context, pr_labels
):
continue # tier:low soft-fail: accept pending sop-checklist
missing_or_bad.append(f"{context}={state or 'missing'}")
return not missing_or_bad, missing_or_bad
@@ -430,26 +412,57 @@ def get_branch_protection(branch: str) -> BranchProtection:
def genuine_approvals(
reviews: list[dict],
*,
headsha: str,
head_sha: str,
reviewer_set: set[str],
) -> tuple[set[str], list[str]]:
"""Thin wrapper over the SSOT predicate in _approval_validator.py.
"""Reduce a PR's reviews to genuine official approvals on the CURRENT head.
All logic — the per-review commit_id / state / official / dismissed /
stale contract — lives in _approval_validator.classify_reviews. This
wrapper exists only to keep the call site (and external readers of
the symbol) stable. Do NOT add any per-review logic here; if you need
to change the predicate, edit _approval_validator.py.
Returns (approvers, request_changes) where:
- approvers is the set of distinct logins (in reviewer_set) whose LATEST
review on the current head is an official, non-stale, non-dismissed
APPROVED, and
- request_changes is the list of logins (in reviewer_set) whose latest
official review on the current head is REQUEST_CHANGES.
See _approval_validator.py for the full fail-closed contract
(SEV-1 internal#812). The previous inline implementation had a
`if isinstance(commit_id, str) and commit_id and headsha:` guard that
silently accepted reviews with no commit_id; that fail-open surface is
now closed at the SSOT.
"Current head" is enforced two ways, because Gitea exposes both signals:
a review must be `official` and NOT `stale`/`dismissed`, AND when the
review carries a commit_id it must equal head_sha. A review with no
commit_id but stale=False/dismissed=False is accepted (older Gitea rows).
We take each reviewer's LATEST submission (reviews arrive oldest-first), so
a later REQUEST_CHANGES correctly supersedes an earlier APPROVED and vice
versa.
"""
return _classify_reviews_ssot(
reviews, headsha=headsha, reviewer_set=reviewer_set
)
latest_by_user: dict[str, dict] = {}
for review in reviews:
if not isinstance(review, dict):
continue
user = (review.get("user") or {}).get("login")
if not isinstance(user, str) or user not in reviewer_set:
continue
state = str(review.get("state") or "").upper()
if state not in {"APPROVED", "REQUEST_CHANGES"}:
continue # ignore COMMENT/PENDING/DISMISSED-state rows
# reviews are returned oldest-first; later entries overwrite → latest wins
latest_by_user[user] = review
approvers: set[str] = set()
request_changes: list[str] = []
for user, review in latest_by_user.items():
if not review.get("official"):
continue
if review.get("stale") or review.get("dismissed"):
continue
commit_id = review.get("commit_id")
if isinstance(commit_id, str) and commit_id and head_sha:
if commit_id != head_sha:
continue # review was on a previous head
state = str(review.get("state") or "").upper()
if state == "APPROVED":
approvers.add(user)
elif state == "REQUEST_CHANGES":
request_changes.append(user)
return approvers, request_changes
def get_pull_reviews(pr_number: int) -> list[dict]:
_, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/reviews")
@@ -609,32 +622,29 @@ def evaluate_merge_readiness(
approvers: set[str],
request_changes: list[str],
pr_has_current_base: bool,
mergeable: bool | None,
mergeable: bool,
pr_labels: set[str] | None = None,
) -> MergeDecision:
# 1) Main's push-required contexts must be green. Combined state can be
# "failure" due to non-blocking jobs (continue-on-error: true) that do
# not gate merges, so check the explicit required set, not combined.
#
# This main-green gate is ALSO the serialized backstop that makes the
# direct-merge (no update) path safe (issue #2358): after a direct merge
# of a behind-main PR, main re-runs its push CI; if a semantic main-break
# slips through (PR green standalone but broken when combined with newer
# main), main's required contexts go red and this gate PAUSES the queue —
# no further merge piles onto an unverified/red main until it is green.
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
if not main_ok:
return MergeDecision(False, "pause", "main required contexts not green: " + ", ".join(main_bad))
# 2) No open official REQUEST_CHANGES on the current head.
# 2) PR head must contain current main.
if not pr_has_current_base:
return MergeDecision(False, "update", "PR head does not contain current main")
# 3) No open official REQUEST_CHANGES on the current head.
if request_changes:
return MergeDecision(
False, "wait",
"open REQUEST_CHANGES on current head from: " + ", ".join(sorted(request_changes)),
)
# 3) Enough distinct genuine official approvals on the current head.
# 4) Enough distinct genuine official approvals on the current head.
if len(approvers) < required_approvals:
return MergeDecision(
False, "wait",
@@ -643,63 +653,26 @@ def evaluate_merge_readiness(
f"need {required_approvals}",
)
# 4) Every REQUIRED status context must be green. This includes both
# branch-protection-required contexts AND the hardcoded governance checks
# (qa-review, security-review, sop-checklist). NON-required reds (E2E
# Chat, Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
# 5) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
# the authoritative status gate — NON-required reds (qa-review,
# security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
# Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
# consulted here and must not block.
latest = latest_statuses_by_context(pr_status.get("statuses") or [])
ok, missing_or_bad = required_contexts_green(latest, required_contexts)
ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
if not ok:
return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
# 5) DIRECT-MERGE when conflict-free (issue #2358 — throughput fix).
# If Gitea reports the PR conflict-free (mergeable is True), MERGE IT
# DIRECTLY even if its head does not yet contain current main. Branch
# protection does NOT require strict up-to-date, so a behind-main but
# conflict-free PR merges cleanly. We deliberately do NOT call
# /pulls/{n}/update first: update triggers Gitea dismiss_stale_approvals,
# which would dismiss the PR's genuine approvals and force a full
# re-review every tick — the rebase-churn bottleneck that collapsed
# throughput to ~0/hr with dozens of mergeable PRs open.
#
# The merge bar is UNCHANGED: we only reach here with main green +
# >= required genuine approvals on the current head + no open
# REQUEST_CHANGES + every BP-required context green. The trade-off is
# that the PR's CI ran on a possibly-behind base, so a SEMANTIC main-break
# is caught by POST-merge main CI (step 1's pause backstop) rather than
# pre-merge. force_merge is used ONLY for missing-but-non-required
# governance reds (required are green + approvals genuine), never to
# bypass a failing required context or an approval shortfall.
if mergeable is True:
force = _non_required_red_present(latest, required_contexts)
return MergeDecision(True, "merge", "ready", force=force)
# 6) Gitea must consider the PR mergeable (no conflicts).
if not mergeable:
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
# 6) NOT (yet) mergeable. TRI-STATE, fail-closed — never merge on an unknown.
# We MUST distinguish "still computing" (None/missing) from a "definitive
# conflict" (False); collapsing them would route a behind-main but
# STILL-COMPUTING PR into the /update path, whose dismiss_stale_approvals
# is the rebase-churn this change eliminates.
#
# mergeable is None → Gitea has NOT finished computing conflict state.
# WAIT: do nothing this tick — never /update (would dismiss genuine
# approvals during the compute window → churn), never merge. Re-check next
# tick once Gitea reports a decisive True/False.
if mergeable is None:
return MergeDecision(
False, "wait",
"PR mergeability is still being computed (mergeable=None) — waiting",
)
# mergeable is False → DEFINITIVE not-mergeable. If the head also does not
# contain current main, try the /update path to refresh the branch (this
# may resolve a behind-main non-conflict; a real conflict returns HTTP 409
# and process_once HOLDs the PR per #2352). If the head already contains
# current main yet Gitea still reports not-mergeable, there is nothing the
# queue can do (genuine conflict against current main) — WAIT.
if not pr_has_current_base:
return MergeDecision(False, "update", "PR not mergeable and head does not contain current main")
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
# Ready. Use force_merge ONLY if the merge would otherwise be blocked by
# missing-but-non-required governance contexts. Required are green and
# approvals are genuine, so force only bypasses non-required reds — never a
# failing required context or missing approval.
force = _non_required_red_present(latest, required_contexts)
return MergeDecision(True, "merge", "ready", force=force)
def get_branch_head(branch: str) -> str:
@@ -754,7 +727,7 @@ def list_queued_issues() -> list[dict]:
query={
"state": "open",
"type": "pulls",
"label": QUEUE_LABEL,
"labels": QUEUE_LABEL,
},
)
@@ -916,9 +889,7 @@ def process_once(*, dry_run: bool = False) -> int:
f"unavailable (fail-closed): {exc}\n"
)
return 0
# Uniform gate: governance checks are ALWAYS required, even if branch
# protection does not enumerate them. Deduplicate against BP list.
contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
contexts = bp.required_contexts
required_approvals = bp.required_approvals
print(
f"::notice::queue policy from branch protection: "
@@ -1105,24 +1076,16 @@ def _evaluate_candidate(
# never treated as green).
pr_status = get_combined_status(head_sha)
pr_labels = label_names(pr)
# FAIL-CLOSED, TRI-STATE: Gitea returns mergeable=None (or omits the field)
# while it is still COMPUTING conflict state, mergeable=False for a definitive
# conflict, and mergeable=True only when it has proven the PR conflict-free.
# We preserve all THREE states (do NOT collapse None/missing into False):
# - True → direct-merge eligible (step 5).
# - None / missing → still computing → WAIT (never merge, never update,
# never dismiss approvals); re-check next tick.
# - False → definitive conflict → the update/hold path (step 6).
# Collapsing None→False would route a behind-main but STILL-COMPUTING PR into
# the /update path, which triggers dismiss_stale_approvals — the exact
# rebase-churn this change eliminates. Normalize only to the literal True /
# False / None set (some Gitea versions omit the key entirely → None).
raw_mergeable = pr.get("mergeable")
mergeable: bool | None = raw_mergeable if isinstance(raw_mergeable, bool) else None
# FAIL-CLOSED: Gitea returns mergeable=None (or omits the field) while it is
# still COMPUTING conflict state. Only the literal True is decisive proof the
# PR is conflict-free; None and False both mean "not (yet) mergeable". We must
# NOT autonomously merge on an unknown — treat anything but True as not-yet-
# mergeable so evaluate_merge_readiness returns a "wait" decision.
mergeable = pr.get("mergeable") is True
reviews = get_pull_reviews(pr_number)
approvers, request_changes = genuine_approvals(
reviews, headsha=head_sha, reviewer_set=REVIEWER_SET
reviews, head_sha=head_sha, reviewer_set=REVIEWER_SET
)
decision = evaluate_merge_readiness(
@@ -1139,124 +1102,12 @@ def _evaluate_candidate(
return decision, ctx
@dataclasses.dataclass(frozen=True)
class ReadinessEntry:
"""One candidate's readiness state."""
pr_number: int
decision: MergeDecision | None
reason: str
def enumerate_readiness(*, dry_run: bool = False) -> list[ReadinessEntry]:
"""Evaluate ALL candidates and return their readiness states.
Fail-closed: if branch protection cannot be fetched, raise
BranchProtectionUnavailable (caller must handle). Unlike
process_once, this does NOT stop at the first actionable candidate;
it evaluates every eligible PR and returns the full list so a
post-batch summary can be printed.
"""
bp = get_branch_protection(WATCH_BRANCH)
# Uniform gate: governance checks are ALWAYS required, even if branch
# protection does not enumerate them. Deduplicate against BP list.
contexts = list(dict.fromkeys(bp.required_contexts + GOVERNANCE_REQUIRED_CONTEXTS))
required_approvals = bp.required_approvals
main_sha = get_branch_head(WATCH_BRANCH)
main_status = get_combined_status(main_sha)
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
candidates = choose_candidate_issues(
list_candidate_issues(auto_discover=AUTO_DISCOVER),
queue_label=QUEUE_LABEL,
opt_out_labels=OPT_OUT_LABELS,
auto_discover=AUTO_DISCOVER,
)
entries: list[ReadinessEntry] = []
for issue in candidates:
pr_number = int(issue["number"])
try:
decision, ctx = _evaluate_candidate(
issue,
main_sha=main_sha,
main_status=main_status,
required_contexts=contexts,
required_approvals=required_approvals,
dry_run=dry_run,
)
except ApiError as exc:
# Fail-closed per candidate: an unreadable PR is recorded as
# unverifiable, not skipped silently.
entries.append(
ReadinessEntry(
pr_number=pr_number,
decision=None,
reason=f"unverifiable (API error: {exc})",
)
)
continue
if decision is None:
entries.append(
ReadinessEntry(
pr_number=pr_number,
decision=None,
reason="not merge-eligible (opt-out/draft/fork/wrong-base)",
)
)
continue
entries.append(
ReadinessEntry(
pr_number=pr_number,
decision=decision,
reason=decision.reason,
)
)
return entries
def print_post_batch_summary(entries: list[ReadinessEntry]) -> None:
"""Print a structured summary of all candidates' readiness.
Emits ::notice:: lines for machine parsing and a human-readable
block for operator visibility.
"""
ready = [e for e in entries if e.decision and e.decision.ready]
waiting = [e for e in entries if e.decision and not e.decision.ready]
ineligible = [e for e in entries if e.decision is None]
print("::group::merge-queue readiness summary")
print(f"total_candidates={len(entries)}")
print(f"ready={len(ready)}")
print(f"waiting={len(waiting)}")
print(f"ineligible/unverifiable={len(ineligible)}")
print("")
for e in entries:
state = "ready" if e.decision and e.decision.ready else (
"waiting" if e.decision else "ineligible"
)
action = e.decision.action if e.decision else "n/a"
print(f"PR #{e.pr_number}: state={state} action={action} reason={e.reason}")
print("::endgroup::")
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--dry-run", action="store_true")
parser.add_argument(
"--enumerate",
action="store_true",
help="Evaluate all candidates and print a readiness summary without merging.",
)
args = parser.parse_args()
_require_runtime_env()
try:
if args.enumerate:
entries = enumerate_readiness(dry_run=args.dry_run)
print_post_batch_summary(entries)
return 0
return process_once(dry_run=args.dry_run)
except ApiError as exc:
# FAIL-CLOSED: API errors are not "transient success" — they mean
+1 -1
View File
@@ -165,7 +165,7 @@ def api(
# Format: "<workflow_name> / <job_name_or_key> (<event>)"
# Examples observed on molecule-core/main:
# "Secret scan / Scan diff for credential-shaped strings (pull_request)"
# "sop-checklist / all-items-acked (pull_request)"
# "sop-tier-check / tier-check (pull_request)"
#
# Split strategy: peel off the trailing ` (<event>)` first, then split
# the leading `<workflow> / <rest>` on the FIRST ` / ` (workflow names
+2 -2
View File
@@ -17,7 +17,7 @@ Rules (4 fatal + 1 fatal cross-file + 1 heuristic-warn):
enumeration; task #81). Workflow registers, fires for 0 events.
3. `name:` containing `/` — breaks the
`<workflow> / <job> (<event>)` commit-status context convention;
downstream parsers (sop-checklist, status-reaper) tokenize on `/`.
downstream parsers (sop-tier-check, status-reaper) tokenize on `/`.
4. `name:` collision across files — Gitea routes commit-status updates
by `name` and behavior on collision is undefined (status-reaper
rev1 fail-loud).
@@ -150,7 +150,7 @@ def check_name_with_slash(filename: str, doc: Any) -> list[str]:
f"::error file={filename}::Rule 3 (FATAL): workflow `name: "
f"{name!r}` contains `/`. The commit-status context convention "
f"is `<workflow> / <job> (<event>)`; embedding `/` in the "
f"workflow name makes downstream parsers (sop-checklist, "
f"workflow name makes downstream parsers (sop-tier-check, "
f"status-reaper) tokenize ambiguously. Rename to use `-` or "
f"` ` instead."
)
+4 -3
View File
@@ -49,7 +49,8 @@ Daily scheduled run + workflow_dispatch:
4. If orphans exist:
- File or PATCH a `[ci-bp-drift]` issue (idempotency contract:
search for exact title prefix, edit existing if open).
- Apply label `ci-bp-drift` (lookup ID per repo).
- Apply labels `tier:high` + `ci-bp-drift` (lookup IDs per
repo; per `feedback_tier_label_ids_are_per_repo`).
- Exit 1.
5. If no orphans:
@@ -81,7 +82,7 @@ Memory cross-links
------------------
- internal#350 (the RFC that specs this lint)
- feedback_phantom_required_check_after_gitea_migration
- feedback_label_ids_are_per_repo
- feedback_tier_label_ids_are_per_repo
- reference_post_suspension_pipeline
"""
from __future__ import annotations
@@ -358,7 +359,7 @@ def file_or_update_issue(
existing = h
break
label_ids = _ensure_labels(repo, ["ci-bp-drift"])
label_ids = _ensure_labels(repo, ["ci-bp-drift", "tier:high"])
if existing:
api(
@@ -305,9 +305,9 @@ def validate_tracker(
if status == "error":
sys.stderr.write(
f"::error::issue {slug}#{num} fetch errored — treating as "
f"unverified, FAILING CLOSED (do not skip on outage).\n"
f"unverified, skipping this check.\n"
)
return (False, f"{slug}#{num} fetch erroredcannot verify tracker")
return (True, "fetch-error — skipped")
assert payload is not None
state = payload.get("state", "")
+2 -2
View File
@@ -50,7 +50,7 @@ runtime contract enforcement lives in `_require_runtime_env()`.
Run locally (dry-run, no API mutation):
GITEA_TOKEN=... GITEA_HOST=git.moleculesai.app REPO=owner/repo \\
WATCH_BRANCH=main RED_LABEL=ci-bp-drift \\
WATCH_BRANCH=main RED_LABEL=tier:high \\
python3 .gitea/scripts/main-red-watchdog.py --dry-run
"""
from __future__ import annotations
@@ -81,7 +81,7 @@ GITEA_TOKEN = _env("GITEA_TOKEN")
GITEA_HOST = _env("GITEA_HOST")
REPO = _env("REPO")
WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
RED_LABEL = _env("RED_LABEL", default="ci-bp-drift")
RED_LABEL = _env("RED_LABEL", default="tier:high")
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
+11 -77
View File
@@ -95,27 +95,17 @@ def build_plan(env: dict[str, str]) -> dict:
def latest_status_for_context(statuses: list[dict], context: str) -> dict | None:
"""Return the NEWEST status row for ``context`` (highest ``id``).
"""Return the first matching status.
This must work for BOTH orderings Gitea exposes: the combined
``/status`` view is newest-first, but the exhaustively-paginated
``/statuses`` list (see ``fetch_all_statuses``) is ascending id order
(oldest-first). Selecting by max ``id`` collapses duplicate context rows
to the current one regardless of input order, so a stale earlier run can
never shadow the latest result. Rows without an ``id`` are treated as
oldest (id -1) so a well-formed newer row always wins.
Gitea's combined-status response is newest-first in practice. The merge
queue relies on the same contract; keeping the selector explicit makes
stale duplicate contexts easy to test.
"""
newest: dict | None = None
newest_id = -1
for status in statuses:
if status.get("context") != context:
continue
raw_id = status.get("id")
sid = raw_id if isinstance(raw_id, int) else -1
if newest is None or sid >= newest_id:
newest = status
newest_id = sid
return newest
if status.get("context") == context:
return status
return None
def ci_context_state(statuses: list[dict], context: str) -> str:
@@ -361,55 +351,6 @@ def _api_json(url: str, token: str) -> dict:
raise RuntimeError(f"GET {url} -> HTTP {exc.code}: {body}") from exc
def _api_json_list(url: str, token: str) -> list:
"""GET a Gitea list endpoint and return the JSON array.
Like ``_api_json`` but asserts the body is a list. Fail-closed: a non-list
body (or HTTP error) raises so the caller never mistakes an unreadable page
for "no more statuses" and silently truncates the required-context scan.
"""
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
try:
with urllib.request.urlopen(req, timeout=20) as resp:
body = json.loads(resp.read())
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="replace")[:500]
raise RuntimeError(f"GET {url} -> HTTP {exc.code}: {detail}") from exc
if not isinstance(body, list):
raise RuntimeError(f"GET {url} -> expected JSON array, got {type(body).__name__}")
return body
def fetch_all_statuses(host: str, repo: str, sha: str, token: str, page_size: int = 100) -> list[dict]:
"""Return EVERY commit-status row for ``sha``, paginating to exhaustion.
The combined ``/commits/{sha}/status`` endpoint caps its embedded
``statuses`` array at the Gitea default page size (~30). On a high-churn
commit, an older-but-still-current required-context SUCCESS row is pushed
PAST that cap, so a reader of the combined view sees the required context
as ``missing`` and either blocks (force-merge audit) or waits forever
(this deploy gate). We instead walk ``/commits/{sha}/statuses`` page by
page until a short/empty page, accumulating ALL rows.
Fail-closed: any page that errors or is not a list raises (see
``_api_json_list``) — we never degrade to a partial list and call a deploy
green. A genuinely-absent required context simply never appears on ANY
page, so the caller's ``ci_context_state`` still reports ``missing`` and
the gate stays closed.
"""
base = f"https://{host}/api/v1/repos/{repo}/commits/{sha}/statuses"
results: list[dict] = []
page = 1
while True:
page_url = f"{base}?page={page}&limit={page_size}"
rows = _api_json_list(page_url, token)
results.extend(r for r in rows if isinstance(r, dict))
if len(rows) < page_size:
break
page += 1
return results
def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
try:
@@ -531,19 +472,12 @@ def wait_for_ci_context(env: dict[str, str]) -> str:
if not token:
raise ValueError("GITEA_TOKEN is required to wait for CI status")
url = f"https://{host}/api/v1/repos/{repo}/commits/{sha}/status"
deadline = time.time() + timeout
last_states: dict[str, str] = {}
while time.time() <= deadline:
# Read the FULL, exhaustively-paginated /statuses list — NOT the
# combined /status view, whose embedded `statuses` array is capped at
# the Gitea page size (~30). On a high-churn commit a required-context
# SUCCESS row lands past that cap and the combined view would report
# it `missing`, so this gate would wait until timeout and refuse a
# legitimate prod deploy. Fetching every page closes that hole.
# Fail-closed is preserved: a genuinely-absent required context is on
# NO page, so ci_context_state() still returns "missing" → never
# satisfied → the deploy stays blocked.
statuses = fetch_all_statuses(host, repo, sha, token)
body = _api_json(url, token)
statuses = body.get("statuses") or []
states = {context: ci_context_state(statuses, context) for context in contexts}
for context, state in states.items():
if state != last_states.get(context):
+11 -7
View File
@@ -197,13 +197,17 @@ if [ "$HTTP_CODE" != "200" ]; then
exit 1
fi
# Filter via the SSOT fail-closed predicate in _approval_validator.py
# (same module gitea-merge-queue.py imports). The jq filter is gone
# entirely — any change to the predicate must be made in
# _approval_validator.py. See SEV-1 internal#812 for the fail-closed
# contract this closes.
SCRIPT_DIR_HERE="$(cd "$(dirname "$0")" && pwd)"
REVIEW_CANDIDATES=$(python3 "$SCRIPT_DIR_HERE/_review_check_filter.py" "$REVIEWS_JSON" "$PR_HEAD_SHA" "$PR_AUTHOR")
# Filter: state=APPROVED, official=true, not-dismissed, non-author,
# commit_id matches current PR head. All conditions are mandatory.
JQ_FILTER='.[]
| select(.state == "APPROVED")
| select(.official == true)
| select(.dismissed != true)
| select(.user.login != $author)
| select(.commit_id == $head)
| .user.login'
REVIEW_CANDIDATES=$(jq -r --arg author "$PR_AUTHOR" --arg head "$PR_HEAD_SHA" "$JQ_FILTER" "$REVIEWS_JSON" | sort -u)
debug "candidate non-author approvers: $(echo "$REVIEW_CANDIDATES" | tr '\n' ' ')"
if [ -z "$REVIEW_CANDIDATES" ]; then
+33 -6
View File
@@ -11,7 +11,7 @@
#
# Flow:
# 1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, labels
# 2. GET /repos/{R}/pulls/{N} — author, head.sha, tier label
# 3. GET /repos/{R}/issues/{N}/comments — extract /sop-ack and /sop-revoke
# 4. For each checklist item:
# a. Is the section marker present in PR body? (author answered)
@@ -665,8 +665,8 @@ def load_config(path: str) -> dict[str, Any]:
def _load_config_minimal(path: str) -> dict[str, Any]:
"""Minimal YAML subset parser for our config shape.
Supports: top-level scalar:value, top-level map-of-map,
top-level list of maps (items:), and within an
Supports: top-level scalar:value, top-level map-of-map (e.g.
tier_failure_mode), top-level list of maps (items:), and within an
item map: scalars + lists of scalars. Does NOT support nested lists,
YAML anchors, multi-doc, or flow style.
"""
@@ -835,7 +835,8 @@ def render_status(
state is "success" if every item has at least one valid ack
(body section presence is informational only — peer-ack is the
real gate).
real gate). tier:low PRs receive state="success" (soft-fail — no
acks required); the description carries "[info tier:low]" prefix.
"""
n = len(items)
fully_acked = [
@@ -862,16 +863,35 @@ def render_status(
return state, "".join(desc_parts)
def get_tier_mode(pr: dict[str, Any], cfg: dict[str, Any]) -> str:
"""Read tier label, return 'hard' or 'soft' per cfg.tier_failure_mode."""
labels = pr.get("labels") or []
tier_labels = [label.get("name", "") for label in labels if (label.get("name", "") or "").startswith("tier:")]
mode_map = cfg.get("tier_failure_mode") or {}
default_mode = cfg.get("default_mode", "hard")
for tl in tier_labels:
if tl in mode_map:
return mode_map[tl]
return default_mode
def is_high_risk(pr: dict[str, Any], cfg: dict[str, Any]) -> bool:
"""Return True when the PR is high-risk per RFC#450 Option C.
A PR is high-risk when it carries any label listed in cfg.high_risk_labels.
A PR is high-risk when ANY of:
- it carries the `tier:high` label (mechanically strictest tier), or
- it carries any label listed in cfg.high_risk_labels.
High-risk PRs use `required_teams_high_risk` (when set on an item)
instead of the default `required_teams`. Items without
`required_teams_high_risk` are unaffected (the default applies).
Governance fix for internal#442 — closes the inconsistency between
sop-tier-check (tier-aware) and sop-checklist (was tier-blind).
"""
label_set = {(label.get("name") or "") for label in (pr.get("labels") or [])}
if "tier:high" in label_set:
return True
high_risk_labels = set(cfg.get("high_risk_labels") or [])
return bool(label_set & high_risk_labels)
@@ -1149,6 +1169,13 @@ def main(argv: list[str] | None = None) -> int:
body_state = {it["slug"]: section_marker_present(body, it["pr_section_marker"]) for it in items}
state, description = render_status(items, ack_state, body_state)
mode = get_tier_mode(pr, cfg)
if mode == "soft":
# tier:low: acks are informational only — post success so BP gate passes.
# Description carries "[info tier:low]" prefix so reviewers know acks
# were not required (vs a tier:medium+ PR that truly passed all acks).
state = "success"
description = f"[info tier:low] {description}"
if volume_skipped:
# Above the comment-cap — we may have a partial view. Soft-pend
# so neither BP nor the author gets stuck; surface the cap so
@@ -1162,7 +1189,7 @@ def main(argv: list[str] | None = None) -> int:
# Diagnostics to job log.
print(
f"::notice::PR #{args.pr} author={author} head={head_sha[:7]} "
f"risk_class={'high' if high_risk else 'default'}"
f"mode={mode} risk_class={'high' if high_risk else 'default'}"
)
for it in items:
slug = it["slug"]
+427
View File
@@ -0,0 +1,427 @@
#!/usr/bin/env bash
# sop-tier-check — verify a Gitea PR satisfies the §SOP-6 approval gate.
#
# Reads the PR's tier label, walks approving reviewers, and checks team
# membership against the tier's approval expression. Passes only when
# ALL clauses in the expression are satisfied by the set of approving
# reviewers (AND-composition; internal#189).
#
# Expression syntax:
# "team-a" — OR-set: any ONE of the comma-separated teams
# "team-a AND team-b" — AND: BOTH must each have ≥1 approver
# "(a,b,c)" — OR-set wrapped in parens; same as "a,b,c"
#
# Example: "qa AND security AND (managers,ceo)" means:
# ≥1 approver in team "qa" AND
# ≥1 approver in team "security" AND
# ≥1 approver in team "managers" OR "ceo"
#
# Per the spec (internal#189), the hard gate here pairs with the
# advisory gate of sop-conformance LLM-judge (internal#188): each
# required-team click must reflect real verification (visible in review
# body or A2A messages), not rubber-stamp APPROVE. Both gates together
# close the "teammate clicks APPROVE without verifying" gap.
#
# Invoked from `.gitea/workflows/sop-tier-check.yml`. The workflow sets
# the env vars below; this script does no IO outside of stdout/stderr +
# the Gitea API.
#
# Required env:
# GITEA_TOKEN — bot PAT with read:organization,read:user,
# read:issue,read:repository scopes
# GITEA_HOST — e.g. git.moleculesai.app
# REPO — owner/name (from github.repository)
# PR_NUMBER — int (from github.event.pull_request.number)
# PR_AUTHOR — login (from github.event.pull_request.user.login)
#
# Optional:
# SOP_DEBUG=1 — print per-API-call diagnostic lines. Default: off.
# SOP_LEGACY_CHECK=1 — revert to OR-gate (≥1 approver from any eligible
# team). Grace window for PRs in-flight when the
# new AND-composition was deployed. Expires 2026-05-17
# (7-day burn-in window; internal#189 Phase 1).
# Set by workflow for PRs merged before the deploy.
set -euo pipefail
# Ensure jq is available. Runners may not have it pre-installed, and the
# workflow-level jq install can fail on runners with network restrictions
# (GitHub releases not reachable from some runner networks — infra#241
# follow-up). This fallback is idempotent — no-op when jq is already on PATH.
if ! command -v jq >/dev/null 2>&1; then
echo "::notice::jq not found on PATH — attempting install..."
_jq_installed="no"
# apt-get first (primary) — Ubuntu package mirrors are reliably reachable.
if apt-get update -qq && apt-get install -y -qq jq 2>/dev/null; then
echo "::notice::jq installed via apt-get: $(jq --version)"
_jq_installed="yes"
# GitHub binary as secondary fallback — may fail on restricted networks.
elif timeout 120 curl -sSL \
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
-o /usr/local/bin/jq \
&& chmod +x /usr/local/bin/jq; then
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
_jq_installed="yes"
fi
if ! command -v jq >/dev/null 2>&1; then
echo "::error::jq installation failed — apt-get and GitHub binary both failed."
echo "::error::sop-tier-check requires jq for all JSON API parsing."
exit 1
fi
fi
debug() {
if [ "${SOP_DEBUG:-}" = "1" ]; then
echo " [debug] $*" >&2
fi
}
# Validate env
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
: "${GITEA_HOST:?GITEA_HOST required}"
: "${REPO:?REPO required (owner/name)}"
: "${PR_NUMBER:?PR_NUMBER required}"
: "${PR_AUTHOR:?PR_AUTHOR required}"
OWNER="${REPO%%/*}"
NAME="${REPO##*/}"
API="https://${GITEA_HOST}/api/v1"
AUTH="Authorization: token ${GITEA_TOKEN}"
echo "::notice::tier-check start: repo=$OWNER/$NAME pr=$PR_NUMBER author=$PR_AUTHOR"
# Sanity: token resolves to a user.
# Use || true on the jq pipeline so that set -euo pipefail (line 45) does not
# cause the script to exit prematurely when the token is empty/invalid — the
# if check below handles that case gracefully. Without || true, a 401 from an
# empty/invalid token causes jq to exit 1, triggering set -e and exiting the
# entire script before the error can be logged.
WHOAMI=$(curl -sS -H "$AUTH" "${API}/user" | jq -r '.login // ""') || true
if [ -z "$WHOAMI" ]; then
echo "::error::GITEA_TOKEN cannot resolve a user via /api/v1/user — check the token scope and that the secret is wired correctly."
exit 1
fi
echo "::notice::token resolves to user: $WHOAMI"
# 0.5 Read PR head SHA so we can reject stale approvals after head moves
# (internal#816). Reviews carry the commit_id they were submitted against.
HEAD_SHA=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}" | jq -r '.head.sha // ""') || true
if [ -z "$HEAD_SHA" ]; then
echo "::error::Failed to fetch PR head SHA — token may be invalid."
exit 1
fi
debug "pr-head-sha=$HEAD_SHA"
# 1. Read tier label. || true ensures set -euo pipefail does not abort the
# script if curl or jq fails (e.g. 401 from empty token).
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
TIER=""
for L in $LABELS; do
case "$L" in
tier:low|tier:medium|tier:high)
if [ -n "$TIER" ]; then
echo "::error::Multiple tier labels: $TIER + $L. Apply exactly one."
exit 1
fi
TIER="$L"
;;
esac
done
if [ -z "$TIER" ]; then
echo "::error::PR has no tier:low|tier:medium|tier:high label. Apply one before merge."
exit 1
fi
debug "tier=$TIER"
# 2. Tier → required team expression (AND-composition; internal#189)
#
# Expression syntax:
# clause-a AND clause-b AND ... — ALL clauses must pass
# team-a,team-b,team-c — OR-set: ≥1 approver in ANY of these teams
# (team-a,team-b) — same as team-a,team-b (parens optional)
#
# This map is the single source of truth. Update it when the team structure
# or policy changes. Teams referenced here but absent in Gitea are treated
# as unachievable (would always fail) — operators notice the clear error
# and create the missing team.
#
# Current Gitea teams: ceo, engineers, managers
# Future teams (create before removing "???" fallback): qa, security, security-audit
declare -A TIER_EXPR=(
# tier:low — same as previous OR gate: any engineer, manager, or ceo.
["tier:low"]="engineers,managers,ceo"
# tier:medium — AND of (managers) AND (engineers) AND (qa???,security???)
# The qa+security clause requires both teams to exist; when not yet
# created, the PR author is responsible for adding them before requesting
# approval on a tier:medium PR. Ops: create qa + security Gitea teams
# and update this map to remove the "???" markers (internal#189 follow-up).
["tier:medium"]="managers AND engineers AND qa???,security???"
# tier:high — ceo only. The AND-composition adds no value for a
# single-team gate, but the framework is wired for consistency.
["tier:high"]="ceo"
)
EXPR="${TIER_EXPR[$TIER]-}"
if [ -z "$EXPR" ]; then
echo "::error::No expression defined for tier $TIER in TIER_EXPR map."
exit 1
fi
debug "expression=$EXPR"
# 3. Legacy OR-gate override (7-day burn-in grace window; internal#189 Phase 1)
if [ "${SOP_LEGACY_CHECK:-}" = "1" ]; then
LEGACY_ELIGIBLE=""
case "$TIER" in
tier:low) LEGACY_ELIGIBLE="engineers managers ceo" ;;
tier:medium) LEGACY_ELIGIBLE="managers ceo" ;;
tier:high) LEGACY_ELIGIBLE="ceo" ;;
esac
echo "::notice::SOP_LEGACY_CHECK=1 — using OR-gate ({$LEGACY_ELIGIBLE}) for this PR."
ELIGIBLE="$LEGACY_ELIGIBLE"
fi
# 4. Resolve all team names → IDs
# /orgs/{org}/teams/{slug}/... endpoints don't exist on Gitea 1.22;
# we use /teams/{id}.
# set +e prevents set -e from aborting the script if curl fails (e.g. empty token).
ORG_TEAMS_FILE=$(mktemp)
trap 'rm -f "$ORG_TEAMS_FILE"' EXIT
set +e
HTTP_CODE=$(curl -sS -o "$ORG_TEAMS_FILE" -w '%{http_code}' -H "$AUTH" \
"${API}/orgs/${OWNER}/teams")
_HTTP_EXIT=$?
set -e
debug "teams-list HTTP=$HTTP_CODE (curl exit=$_HTTP_EXIT) size=$(wc -c <"$ORG_TEAMS_FILE")"
if [ "${SOP_DEBUG:-}" = "1" ]; then
echo " [debug] teams-list body (first 300 chars):" >&2
head -c 300 "$ORG_TEAMS_FILE" >&2; echo >&2
fi
if [ "$_HTTP_EXIT" -ne 0 ] || [ "$HTTP_CODE" != "200" ]; then
echo "::error::GET /orgs/${OWNER}/teams failed (curl exit=$_HTTP_EXIT HTTP=$HTTP_CODE) — token may lack read:org scope or be invalid."
exit 1
fi
# Collect every team name that appears in the expression.
# Bash word-splitting on $EXPR splits on spaces, so "AND" appears as a
# token. We skip it explicitly.
declare -A TEAM_ID
_all_teams=""
for _raw_clause in $EXPR; do
# Strip parens and split on comma.
_clause=${_raw_clause//[()]/}
for _t in $(echo "$_clause" | tr ',' '\n'); do
_t=$(echo "$_t" | tr -d '[:space:]')
[ -z "$_t" ] && continue
# Skip AND / OR operator tokens (bash word-split produced them from
# spaces in the expression string).
[ "$_t" = "AND" ] || [ "$_t" = "OR" ] && continue
# Skip if already in set.
case " $_all_teams " in
*" $_t "*) ;; # already present
*) _all_teams="${_all_teams} $_t " ;;
esac
done
done
for _t in $_all_teams; do
_t=$(echo "$_t" | tr -d ' ')
[ -z "$_t" ] && continue
_id=$(jq -r --arg t "$_t" '.[] | select(.name==$t) | .id' <"$ORG_TEAMS_FILE" | head -1)
if [ -z "$_id" ] || [ "$_id" = "null" ]; then
# "??" suffix marks teams that don't exist yet (tier:medium qa/security).
# Treat as permanently failing clause; clear error message guides ops.
if [[ "$_t" == *"???" ]]; then
debug "team \"$_t\" not found (expected — pending team creation per internal#189)"
continue
fi
_visible=$(jq -r '.[]?.name? // empty' <"$ORG_TEAMS_FILE" 2>/dev/null | tr '\n' ' ')
echo "::error::Team \"$_t\" referenced in tier $TIER expression but not found in org $OWNER. Teams visible: $_visible"
exit 1
fi
TEAM_ID[$_t]="$_id"
debug "team-id: $_t$_id"
done
# 5. Read approving reviewers. set +e disables set -e temporarily so that curl
# failures (e.g. empty/invalid token → HTTP 401) do not abort the script before
# set -e is restored immediately after.
set +e
REVIEWS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}/reviews")
_REVIEWS_EXIT=$?
set -e
if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
echo "::error::Failed to fetch reviews (curl exit=$_REVIEWS_EXIT) — token may be invalid or unreachable."
exit 1
fi
APPROVERS=$(echo "$REVIEWS" | jq -r --arg head_sha "$HEAD_SHA" '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]') || true
if [ -z "$APPROVERS" ]; then
echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
exit 1
fi
debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
# 6. For each approver: skip self-review; probe team membership by id.
# Build $APPROVER_TEAMS[<user>]=space-surrounded team names (e.g. " managers ").
# Pre/post spaces ensure case patterns *${_t}* match even when the name
# is the first or last entry (bash case *word* needs delimiters on both sides).
#
# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
#
# This used to fall back to /orgs/{org}/members/{user} whenever every team
# probe failed and credit any org member as a member of EVERY queried team.
# That was a privilege-escalation: org membership is NOT team membership, so
# a 403/visibility/token-scope gap on the team probes silently promoted a
# plain org member to satisfy tier:high (ceo). An inability-to-verify became
# an authorization GRANT. The fallback is REMOVED — org membership must never
# satisfy a team-gated tier.
#
# A team-membership probe has exactly three meaningful outcomes:
# 200 / 204 → the user IS a member of that team (credit it)
# 404 → the user is definitively NOT a member (no credit, verified)
# anything else (403 / 401 / 5xx / curl failure / non-numeric)
# → membership CANNOT be read (cannot-verify)
#
# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
# pass — and here, never an authorization grant), a cannot-verify outcome on
# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
# / unverifiable membership picture, because doing so could let an unverifiable
# approver's clause silently fail-or-pass on incomplete data. Fix the token
# scope (read:organization) or the runner network — not the gate.
declare -A APPROVER_TEAMS
_verify_failed="" # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
for U in $APPROVERS; do
[ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
for T in "${!TEAM_ID[@]}"; do
ID="${TEAM_ID[$T]}"
set +e
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
"${API}/teams/${ID}/members/${U}")
_curl_exit=$?
set -e
debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
if [ "$_curl_exit" -ne 0 ]; then
# curl itself failed (DNS, connection refused, timeout) — unreachable.
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
continue
fi
case "$CODE" in
200|204)
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
debug "$U qualifies for team $T"
;;
404)
# Definitively not a member of this team — a verified negative.
debug "$U is NOT a member of team $T (verified 404)"
;;
*)
# 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
# member" and do NOT fall back to org membership. This is cannot-verify.
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
;;
esac
done
done
# Fail-closed: if ANY membership probe could not be read, we cannot make an
# authorization decision. Publish a loud cannot-verify / infra-failed status
# and exit non-zero. Never grant the tier on unverifiable membership.
if [ -n "$_verify_failed" ]; then
echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
echo "::error::Unreadable membership probe(s): ${_verify_failed}"
echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
exit 1
fi
# 7. Evaluate the tier expression.
#
# legacy OR-gate: use the simplified loop from before internal#189.
if [ -n "${LEGACY_ELIGIBLE:-}" ]; then
OK=""
for _u in "${!APPROVER_TEAMS[@]}"; do
for _t2 in $LEGACY_ELIGIBLE; do
case "${APPROVER_TEAMS[$_u]}" in
*${_t2}*)
echo "::notice::approver $_u is in team $_t2 (eligible for $TIER)"
OK="yes"
break
;;
esac
done
[ -n "$OK" ] && break
done
if [ -z "$OK" ]; then
echo "::error::Tier $TIER requires approval from a non-author member of {$LEGACY_ELIGIBLE}. Set SOP_DEBUG=1 to see per-probe HTTP codes."
exit 1
fi
echo "::notice::sop-tier-check passed: $TIER (legacy OR-gate)"
exit 0
fi
# AND-gate: evaluate the expression clause by clause.
# _passed_clauses and _failed_clauses accumulate for the status description.
_passed_clauses=""
_failed_clauses=""
for _raw_clause in $EXPR; do
# Normalise: strip parens, replace commas with spaces so bash word-split
# can iterate the OR-set members. The previous form
# _clause=$(echo ... | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
# collapsed every member into one concatenated token because
# `tr -d '[:space:]'` strips the very newlines that just separated them
# ("engineers,managers,ceo" -> "engineersmanagersceo"), so the OR-clause
# only ever evaluated as a single nonsense team name and never matched
# APPROVER_TEAMS. Fixed in #229: leave the comma-separated members as
# space-separated tokens for `for _t in $_clause`.
_no_parens=${_raw_clause//[()]/}
_clause=${_no_parens//,/ }
_clause_passed="no"
_clause_names=""
for _t in $_clause; do
# Append (don't overwrite) team name to the human-readable accumulator.
# The previous form `_clause_names="${_clause_names:+, }${_t}"`
# rewrote the variable on every iteration, so the FAIL message only
# ever showed the LAST team. Fixed: prepend prior value before the
# comma-separator, then append the new team name.
_clause_names="${_clause_names}${_clause_names:+, }${_t}"
# Skip teams not yet in Gitea (qa??? / security??? placeholders).
[[ "$_t" == *"???" ]] && debug "clause \"$_t\": skipped (team pending creation)" && continue
[ -z "${TEAM_ID[$_t]:-}" ] && debug "clause \"$_t\": no ID resolved, skipping" && continue
for _u in "${!APPROVER_TEAMS[@]}"; do
# Note: APPROVER_TEAMS values are space-surrounded (e.g. " managers ").
# Pattern *${_t}* matches team name anywhere in the space-padded string.
case "${APPROVER_TEAMS[$_u]}" in
*${_t}*)
_clause_passed="yes"
debug "clause \"$_t\": satisfied by $_u"
break
;;
esac
done
done
# Label for display: strip "???" from pending teams.
_label=$(echo "$_raw_clause" | tr -d '()' | tr ',' '/' | tr -d '[:space:]' | sed 's/???//g')
if [ "$_clause_passed" = "yes" ]; then
# Append (don't overwrite) — same accumulator bug as _clause_names above.
_passed_clauses="${_passed_clauses}${_passed_clauses:+, }$_label"
echo "::notice::clause [$_label]: PASS — satisfied by approving reviewer(s)"
else
_failed_clauses="${_failed_clauses}${_failed_clauses:+, }$_label"
echo "::error::clause [$_label]: FAIL — no approving reviewer belongs to any of these teams (${_clause_names}). Set SOP_DEBUG=1 to see per-team probe results."
fi
done
if [ -n "$_failed_clauses" ]; then
echo ""
echo "::error::sop-tier-check FAILED for $TIER."
echo " Passed :${_passed_clauses}"
echo " Missing:${_failed_clauses}"
echo " All clauses must be satisfied. Each missing team needs an APPROVED review from one of its members."
exit 1
fi
echo "::notice::sop-tier-check PASSED: $TIER — all required clauses satisfied [${_passed_clauses}]"
+199
View File
@@ -0,0 +1,199 @@
#!/usr/bin/env bash
# sop-tier-refire — re-evaluate sop-tier-check and POST status to PR head SHA.
#
# Invoked from `.gitea/workflows/sop-tier-refire.yml` when a repo
# MEMBER/OWNER/COLLABORATOR comments `/refire-tier-check` on a PR.
#
# Behavior:
#
# 1. Resolve PR head SHA + author from PR_NUMBER.
# 2. Rate-limit: if the sop-tier-check context has been POSTed in the
# last 30 seconds, skip (prevents comment-spam status thrash).
# 3. Invoke `.gitea/scripts/sop-tier-check.sh` with the same env the
# canonical workflow provides. This is DRY: we re-use the exact AND-
# composition gate logic, not a watered-down approving-count check.
# 4. POST the resulting status (success on exit 0, failure on non-zero)
# to `/repos/.../statuses/{HEAD_SHA}` with context
# "sop-tier-check / tier-check (pull_request)" — the same context name
# branch protection requires.
#
# Required env (set by sop-tier-refire.yml):
# GITEA_TOKEN — org-level SOP_TIER_CHECK_TOKEN (read:org/user/issue/repo)
# GITEA_HOST — e.g. git.moleculesai.app
# REPO — owner/name
# PR_NUMBER — PR number from issue_comment payload
# COMMENT_AUTHOR — login of the commenter (logged for audit)
#
# Optional:
# SOP_DEBUG=1 — verbose per-API-call diagnostics
# SOP_REFIRE_RATE_LIMIT_SEC — override the 30s rate-limit (default 30)
# SOP_REFIRE_DISABLE_RATE_LIMIT=1 — for tests; skips the rate-limit check
set -euo pipefail
debug() {
if [ "${SOP_DEBUG:-}" = "1" ]; then
echo " [debug] $*" >&2
fi
}
: "${GITEA_TOKEN:?GITEA_TOKEN required}"
: "${GITEA_HOST:?GITEA_HOST required}"
: "${REPO:?REPO required (owner/name)}"
: "${PR_NUMBER:?PR_NUMBER required}"
: "${COMMENT_AUTHOR:=unknown}"
OWNER="${REPO%%/*}"
NAME="${REPO##*/}"
API="https://${GITEA_HOST}/api/v1"
AUTH="Authorization: token ${GITEA_TOKEN}"
CONTEXT="sop-tier-check / tier-check (pull_request)"
RATE_LIMIT_SEC="${SOP_REFIRE_RATE_LIMIT_SEC:-30}"
echo "::notice::sop-tier-refire start: repo=$OWNER/$NAME pr=$PR_NUMBER commenter=$COMMENT_AUTHOR"
# 1. Fetch PR details — need head.sha and user.login.
PR_FILE=$(mktemp)
trap 'rm -f "$PR_FILE"' EXIT
PR_HTTP=$(curl -sS -o "$PR_FILE" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
if [ "$PR_HTTP" != "200" ]; then
echo "::error::GET /pulls/$PR_NUMBER returned HTTP $PR_HTTP (body $(head -c 200 "$PR_FILE"))"
exit 1
fi
HEAD_SHA=$(jq -r '.head.sha' <"$PR_FILE")
PR_AUTHOR=$(jq -r '.user.login' <"$PR_FILE")
PR_STATE=$(jq -r '.state' <"$PR_FILE")
if [ -z "$HEAD_SHA" ] || [ "$HEAD_SHA" = "null" ]; then
echo "::error::Could not resolve head.sha from PR #$PR_NUMBER response"
exit 1
fi
debug "head_sha=$HEAD_SHA pr_author=$PR_AUTHOR state=$PR_STATE"
if [ "$PR_STATE" != "open" ]; then
echo "::notice::PR #$PR_NUMBER state is $PR_STATE; refire is a no-op on closed PRs."
exit 0
fi
# 2. Rate-limit: skip if our context was updated in the last $RATE_LIMIT_SEC.
# Gitea statuses endpoint returns latest first; we check the most recent
# entry for our context name.
if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
STATUSES_FILE=$(mktemp)
trap 'rm -f "$PR_FILE" "$STATUSES_FILE"' EXIT
ST_HTTP=$(curl -sS -o "$STATUSES_FILE" -w '%{http_code}' -H "$AUTH" \
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}?limit=50&sort=newest")
debug "statuses-list HTTP=$ST_HTTP"
if [ "$ST_HTTP" = "200" ]; then
LAST_UPDATED=$(jq -r --arg c "$CONTEXT" \
'[.[] | select(.context == $c)] | first | .updated_at // ""' \
<"$STATUSES_FILE")
if [ -n "$LAST_UPDATED" ] && [ "$LAST_UPDATED" != "null" ]; then
# Parse RFC3339 → epoch. Use python -c for portability (date(1) -d
# differs between BSD/GNU; the Gitea runner is Ubuntu so GNU date
# works, but we keep python for future container variance).
LAST_EPOCH=$(python3 -c "import sys,datetime;print(int(datetime.datetime.fromisoformat(sys.argv[1].replace('Z','+00:00')).timestamp()))" "$LAST_UPDATED" 2>/dev/null || echo "0")
NOW_EPOCH=$(date -u +%s)
AGE=$((NOW_EPOCH - LAST_EPOCH))
debug "last status update: $LAST_UPDATED ($AGE seconds ago)"
if [ "$AGE" -lt "$RATE_LIMIT_SEC" ] && [ "$AGE" -ge 0 ]; then
echo "::notice::sop-tier-refire rate-limited — last status update was ${AGE}s ago (<${RATE_LIMIT_SEC}s window). Try again shortly."
exit 0
fi
fi
fi
fi
# 3. Invoke sop-tier-check.sh with the env it expects.
#
# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
# fix/core-ci-fail-closed). The previous shape was:
# bash "$SCRIPT" || true
# TIER_EXIT=0 # <-- hardcoded success
# which discarded the real verdict and ALWAYS POSTed
# `state=success` for the REQUIRED context
# `sop-tier-check / tier-check (pull_request)`. That meant ANY
# collaborator could comment `/refire-tier-check` to forcibly green
# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
# privilege bypass of branch protection. The canonical
# pull_request_target workflow's conclusion publishes the same
# context honestly (red on a real violation); the refire MUST mirror
# THAT honesty, not a discarded exit code.
#
# We now capture the script's real exit code under `set +e` and POST
# success ONLY when it actually exited 0. sop-tier-check.sh itself
# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
# so a bad token / unreachable API / missing jq → non-zero → we POST
# `state=failure`, never a false green.
#
# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
# bash 3.2 parser bug (`tier: unbound variable` from declare -A with
# `set -u`). Linux Gitea runners ship bash 4/5 so production is fine;
# the override exists so the bash 3.2 dev box can still exercise the
# refire glue logic end-to-end.
SCRIPT="${SOP_REFIRE_TIER_CHECK_SCRIPT:-$(dirname "$0")/sop-tier-check.sh}"
if [ ! -f "$SCRIPT" ]; then
echo "::error::sop-tier-check.sh not found at $SCRIPT — refire requires the canonical script"
exit 1
fi
# Re-invoke. Pipe stdout/stderr through so the runner log shows the
# tier-check decision inline. Capture the REAL exit code (set +e so a
# non-zero verdict doesn't abort this script under set -e) — the POST
# below keys off it, so a failed tier-check posts state=failure.
set +e
GITEA_TOKEN="$GITEA_TOKEN" \
GITEA_HOST="$GITEA_HOST" \
REPO="$REPO" \
PR_NUMBER="$PR_NUMBER" \
PR_AUTHOR="$PR_AUTHOR" \
SOP_DEBUG="${SOP_DEBUG:-0}" \
SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
bash "$SCRIPT"
TIER_EXIT=$?
set -e
debug "sop-tier-check.sh exit=$TIER_EXIT"
# 4. POST the resulting status.
if [ "$TIER_EXIT" -eq 0 ]; then
STATE="success"
DESCRIPTION="Refired via /refire-tier-check by $COMMENT_AUTHOR"
else
STATE="failure"
DESCRIPTION="Refired via /refire-tier-check; tier-check failed (see workflow log)"
fi
# Status target_url points at the runner log so a curious reviewer can
# follow it back. SERVER_URL + RUN_ID + JOB_ID isn't trivially constructible
# from the bash env on Gitea 1.22.6, so we point at the PR itself.
TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
POST_BODY=$(jq -nc \
--arg state "$STATE" \
--arg context "$CONTEXT" \
--arg description "$DESCRIPTION" \
--arg target_url "$TARGET_URL" \
'{state:$state, context:$context, description:$description, target_url:$target_url}')
POST_FILE=$(mktemp)
trap 'rm -f "$PR_FILE" "${STATUSES_FILE:-}" "$POST_FILE"' EXIT
POST_HTTP=$(curl -sS -o "$POST_FILE" -w '%{http_code}' \
-X POST -H "$AUTH" -H "Content-Type: application/json" \
-d "$POST_BODY" \
"${API}/repos/${OWNER}/${NAME}/statuses/${HEAD_SHA}")
if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
echo "::error::POST /statuses/$HEAD_SHA returned HTTP $POST_HTTP (body $(head -c 200 "$POST_FILE"))"
exit 1
fi
echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
# an HONEST status. The gate VERDICT is carried by the POSTed status
# ($STATE), which is what branch protection reads; a failing tier-check
# posts state=failure (red on the PR), so there is no fail-open. We do
# NOT also exit non-zero on a failing verdict — that would double-signal
# the same failure as both a red status AND a red refire job. The
# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
# is fixed above by capturing the real exit code.
exit 0
+28
View File
@@ -0,0 +1,28 @@
#!/usr/bin/env bash
# Mock sop-tier-check.sh for sop-tier-refire tests.
#
# Exits 0 ("PASS") if $MOCK_TIER_RESULT == "pass", else exits 1.
# This lets the refire tests cover the success + failure status-POST
# paths without invoking the real sop-tier-check.sh (which uses bash 4+
# associative arrays — known parser bug on macOS bash 3.2 dev box).
set -euo pipefail
case "${MOCK_TIER_RESULT:-pass}" in
pass)
echo "::notice::mock tier-check: PASS"
exit 0
;;
fail_no_label)
echo "::error::mock tier-check: no tier label"
exit 1
;;
fail_no_approvals)
echo "::error::mock tier-check: no approving reviews"
exit 1
;;
*)
echo "::error::mock tier-check: unknown MOCK_TIER_RESULT=${MOCK_TIER_RESULT:-}"
exit 2
;;
esac
+208
View File
@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""Stub Gitea API for sop-tier-refire test scenarios.
Reads $FIXTURE_STATE_DIR/scenario to decide what to return for each
endpoint the sop-tier-refire.sh + sop-tier-check.sh scripts call.
Captures every POST to /statuses/{sha} into posted_statuses.jsonl so
the test can assert what the script tried to write.
Scenarios:
T1_success — tier:low + APPROVED by engineer → tier-check passes
T2_no_tier_label — no tier label → tier-check exits 1 before POST
T3_no_approvals — tier:low but zero approving reviews → exits 1
T4_closed — PR state=closed → refire is a no-op
T5_rate_limited — last status update 5 seconds ago → skip
Usage:
FIXTURE_STATE_DIR=/tmp/x python3 _refire_fixture.py 8080
"""
import datetime
import http.server
import json
import os
import re
import sys
import urllib.parse
STATE_DIR = os.environ["FIXTURE_STATE_DIR"]
def scenario() -> str:
p = os.path.join(STATE_DIR, "scenario")
if not os.path.isfile(p):
return "T1_success"
with open(p, encoding="utf-8") as f:
return f.read().strip()
def now_iso() -> str:
return datetime.datetime.now(datetime.timezone.utc).isoformat()
def append_post(body: dict) -> None:
with open(os.path.join(STATE_DIR, "posted_statuses.jsonl"), "a") as f:
f.write(json.dumps(body) + "\n")
def pr_payload() -> dict:
sc = scenario()
state = "closed" if sc == "T4_closed" else "open"
return {
"number": 999,
"state": state,
"head": {"sha": "deadbeef0000111122223333444455556666"},
"user": {"login": "feature-author"},
}
def labels_payload() -> list:
sc = scenario()
if sc == "T2_no_tier_label":
return [{"name": "bug"}]
# All other scenarios use tier:low
return [{"name": "tier:low"}, {"name": "ci"}]
def reviews_payload() -> list:
sc = scenario()
if sc == "T3_no_approvals":
return []
# All other scenarios have one APPROVED review by an engineer
return [
{
"state": "APPROVED",
"user": {"login": "reviewer-engineer"},
}
]
def teams_payload() -> list:
# Mirror the real molecule-ai org teams referenced in TIER_EXPR
return [
{"id": 5, "name": "ceo"},
{"id": 2, "name": "engineers"},
{"id": 6, "name": "managers"},
]
def statuses_payload() -> list:
sc = scenario()
if sc == "T5_rate_limited":
recent = (
datetime.datetime.now(datetime.timezone.utc)
- datetime.timedelta(seconds=5)
).isoformat()
return [
{
"context": "sop-tier-check / tier-check (pull_request)",
"state": "failure",
"updated_at": recent,
}
]
return []
def user_payload() -> dict:
# Mirrors the WHOAMI probe in sop-tier-check.sh
return {"login": "sop-tier-bot-fixture"}
class Handler(http.server.BaseHTTPRequestHandler):
# Quiet — keep stdout for explicit logs only.
def log_message(self, *args, **kwargs): # noqa: D401
pass
def _json(self, code: int, body) -> None:
payload = json.dumps(body).encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(payload)))
self.end_headers()
self.wfile.write(payload)
def _empty(self, code: int) -> None:
self.send_response(code)
self.send_header("Content-Length", "0")
self.end_headers()
def do_GET(self): # noqa: N802
u = urllib.parse.urlparse(self.path)
path = u.path
if path == "/_ping":
return self._json(200, {"ok": True})
if path == "/api/v1/user":
return self._json(200, user_payload())
# /api/v1/repos/{owner}/{name}/pulls/{n}
m = re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/(\d+)$", path)
if m:
return self._json(200, pr_payload())
# /api/v1/repos/{owner}/{name}/issues/{n}/labels
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/issues/\d+/labels$", path):
return self._json(200, labels_payload())
# /api/v1/repos/{owner}/{name}/pulls/{n}/reviews
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/pulls/\d+/reviews$", path):
return self._json(200, reviews_payload())
# /api/v1/orgs/{owner}/teams
if re.match(r"^/api/v1/orgs/[^/]+/teams$", path):
return self._json(200, teams_payload())
# /api/v1/teams/{id}/members/{login} → 204 if user is an engineer
m = re.match(r"^/api/v1/teams/(\d+)/members/([^/]+)$", path)
if m:
team_id, login = m.group(1), m.group(2)
# In our fixture reviewer-engineer ∈ engineers (id=2)
if team_id == "2" and login == "reviewer-engineer":
return self._empty(204)
return self._empty(404)
# /api/v1/orgs/{owner}/members/{login} — fallback path used when
# team-member probes all 403. We don't need it for these tests.
if re.match(r"^/api/v1/orgs/[^/]+/members/[^/]+$", path):
return self._empty(404)
# /api/v1/repos/{owner}/{name}/statuses/{sha}
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
return self._json(200, statuses_payload())
return self._json(404, {"path": path, "msg": "fixture: no route"})
def do_POST(self): # noqa: N802
u = urllib.parse.urlparse(self.path)
path = u.path
length = int(self.headers.get("Content-Length") or 0)
raw = self.rfile.read(length) if length else b""
try:
body = json.loads(raw) if raw else {}
except Exception:
body = {"_raw": raw.decode(errors="replace")}
if re.match(r"^/api/v1/repos/[^/]+/[^/]+/statuses/[^/]+$", path):
append_post(body)
# Echo back something status-shaped — script only checks HTTP code.
return self._json(
201,
{
"context": body.get("context"),
"state": body.get("state"),
"created_at": now_iso(),
},
)
return self._json(404, {"path": path, "msg": "fixture: no route"})
def main():
port = int(sys.argv[1])
srv = http.server.ThreadingHTTPServer(("127.0.0.1", port), Handler)
srv.serve_forever()
if __name__ == "__main__":
main()
@@ -134,14 +134,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
return self._json(200, [
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
])
if sc == "T23_missing_commit_id":
# APPROVED review with NO commit_id field — the SEV-1
# internal#812 / closed-#843 spoof-bug signature. The
# fail-closed SSOT must REJECT (not silently accept as
# "older Gitea row" the way the old pre-fix code did).
return self._json(200, [
{"state": "APPROVED", "official": True, "dismissed": False, "user": {"login": "core-devops"}},
])
# Default: one non-author APPROVED (current head, official)
return self._json(200, [
{"state": "APPROVED", "dismissed": False, "official": True, "user": {"login": "core-devops"}, "commit_id": "deadbeef0000111122223333444455556666"},
@@ -1,610 +0,0 @@
#!/usr/bin/env python3
"""
Mutation-verified unit tests for the SSOT fail-closed approval predicate
in _approval_validator.py (SEV-1 internal#812).
Each test asserts REJECTION explicitly. A reviewer who weakens the
predicate — e.g., by removing the commit_id check, by reintroducing the
"no commit_id is accepted" escape hatch, by changing `!=` to `==` in the
head comparison, or by allowing official == false — will trip these
tests in CI.
Run:
cd .gitea/scripts
python3 -m unittest tests.test_approval_validator -v
# or
python3 tests/test_approval_validator.py
"""
from __future__ import annotations
import os
import sys
import unittest
# Same-dir import — test lives next to _approval_validator.py
sys.path.insert(
0,
os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
)
from _approval_validator import ( # noqa: E402
classify_reviews,
is_genuine_approval,
is_official_current_head,
is_open_request_changes,
)
HEAD = "0123456789abcdef0123456789abcdef01234567"
OTHER_HEAD = "fedcba9876543210fedcba9876543210fedcba98"
def _review(
*,
state: str = "APPROVED",
official: bool = True,
dismissed: bool = False,
stale: bool = False,
commit_id: object = HEAD,
user: str = "reviewer-1",
body: str = "",
) -> dict:
"""Build a minimal review row shaped like the Gitea reviews API."""
return {
"id": 1,
"user": {"login": user, "id": 1},
"body": body,
"state": state,
"official": official,
"dismissed": dismissed,
"stale": stale,
"commit_id": commit_id,
}
# ---------------------------------------------------------------------------
# Hard contract: every fail-closed branch must reject
# ---------------------------------------------------------------------------
class IsOfficialCurrentHeadFailClosed(unittest.TestCase):
"""is_official_current_head is the common predicate. EVERY condition
is mandatory. The tests below assert REJECTION for every possible
failure of any condition."""
def test_accepts_canonical_review(self):
self.assertTrue(is_official_current_head(_review(), HEAD))
def test_rejects_non_dict(self):
for bad in [None, "string", 42, [], (), object()]:
with self.subTest(bad=bad):
self.assertFalse(is_official_current_head(bad, HEAD))
def test_rejects_when_official_is_false(self):
for v in [False, None, 0, "false"]:
with self.subTest(v=v):
self.assertFalse(
is_official_current_head(_review(official=v), HEAD)
)
def test_rejects_when_dismissed(self):
for v in [True, "true", 1]:
with self.subTest(v=v):
self.assertFalse(
is_official_current_head(_review(dismissed=v), HEAD)
)
def test_rejects_when_stale(self):
for v in [True, "true", 1]:
with self.subTest(v=v):
self.assertFalse(
is_official_current_head(_review(stale=v), HEAD)
)
def test_rejects_when_commit_id_missing(self):
"""FAIL-CLOSED #1: missing commit_id is REJECTED.
This is the spoof signature that closed #843 (with CR2 + Researcher
both flagging it)."""
for bad in [None, "", 0, False, [], {}, ()]:
with self.subTest(commit_id=bad):
self.assertFalse(
is_official_current_head(_review(commit_id=bad), HEAD),
f"commit_id={bad!r} must reject (fail-closed)",
)
def test_rejects_when_commit_id_wrong_type(self):
for bad in [123, 1.5, True, ["abc"], {"sha": HEAD}, ("tuple",)]:
with self.subTest(commit_id=bad):
self.assertFalse(
is_official_current_head(_review(commit_id=bad), HEAD)
)
def test_rejects_when_commit_id_stale(self):
"""FAIL-CLOSED #2: present-but-wrong commit_id is REJECTED. Stale
reviews on a previous head cannot count."""
self.assertFalse(
is_official_current_head(_review(commit_id=OTHER_HEAD), HEAD)
)
def test_rejects_when_head_missing(self):
for bad in [None, "", 0, False]:
with self.subTest(head=bad):
self.assertFalse(
is_official_current_head(_review(), bad)
)
def test_rejects_when_head_wrong_type(self):
self.assertFalse(is_official_current_head(_review(), 123))
self.assertFalse(is_official_current_head(_review(), ["x"]))
# ---------------------------------------------------------------------------
# is_genuine_approval
# ---------------------------------------------------------------------------
class IsGenuineApprovalContract(unittest.TestCase):
def test_accepts_canonical_approval(self):
self.assertTrue(
is_genuine_approval(_review(state="APPROVED"), headsha=HEAD)
)
def test_rejects_non_approved_states(self):
for state in ("REQUEST_CHANGES", "COMMENT", "PENDING", "DISMISSED", "approve", "", "bogus"):
with self.subTest(state=state):
self.assertFalse(
is_genuine_approval(_review(state=state), headsha=HEAD)
)
def test_rejects_case_coerced_approved_states(self):
"""EXACT-ENUM fail-closed (RCs 9849/9851/9852): Gitea always emits
the canonical UPPERCASE "APPROVED". A lowercase/mixed-case/padded
value is the signature of a forged row and MUST be rejected, not
coerced via .upper() into an accepted APPROVED. Each of these was
ACCEPTED before the exact-enum fix."""
for state in (
"approved", "Approved", "ApProVeD", "APPROVED ", " APPROVED",
"approved\n", "\tAPPROVED",
):
with self.subTest(state=state):
self.assertFalse(
is_genuine_approval(_review(state=state), headsha=HEAD),
f"case-coerced/padded state {state!r} must NOT count as "
"a genuine approval",
)
def test_rejects_non_official_approval(self):
"""Comment-based / non-official 'APPROVED' is REJECTED.
PM: 'reject comment-based / non-official reviews'."""
self.assertFalse(
is_genuine_approval(
_review(state="APPROVED", official=False), headsha=HEAD
)
)
def test_rejects_dismissed_approval(self):
self.assertFalse(
is_genuine_approval(
_review(state="APPROVED", dismissed=True), headsha=HEAD
)
)
def test_rejects_stale_head_approval(self):
"""commit_id != head is REJECTED. Stale-on-old-head approvals cannot
count, even if they were official and not dismissed."""
self.assertFalse(
is_genuine_approval(
_review(state="APPROVED", commit_id=OTHER_HEAD), headsha=HEAD
)
)
def test_rejects_missing_commit_id_approval(self):
"""FAIL-CLOSED #3: the SEV-1 case. A APPROVED review with NO
commit_id is the spoof-bug signature. Reject."""
for bad in [None, "", 0, False]:
with self.subTest(commit_id=bad):
self.assertFalse(
is_genuine_approval(
_review(state="APPROVED", commit_id=bad), headsha=HEAD
),
f"missing commit_id={bad!r} must reject",
)
def test_reviewer_set_filters_users(self):
self.assertTrue(
is_genuine_approval(
_review(user="alice"),
headsha=HEAD,
reviewer_set={"alice", "bob"},
)
)
self.assertFalse(
is_genuine_approval(
_review(user="carol"),
headsha=HEAD,
reviewer_set={"alice", "bob"},
)
)
def test_reviewer_set_none_skips_check(self):
# None means "no team filter at this layer" (e.g., review-check.sh
# applies its own team-membership probe separately).
self.assertTrue(
is_genuine_approval(
_review(user="anyone"),
headsha=HEAD,
reviewer_set=None,
)
)
# ---------------------------------------------------------------------------
# is_open_request_changes
# ---------------------------------------------------------------------------
class IsOpenRequestChangesContract(unittest.TestCase):
def test_accepts_canonical_request_changes(self):
self.assertTrue(
is_open_request_changes(
_review(state="REQUEST_CHANGES"), headsha=HEAD
)
)
def test_rejects_non_request_changes_states(self):
for state in ("APPROVED", "COMMENT", "PENDING", "DISMISSED"):
with self.subTest(state=state):
self.assertFalse(
is_open_request_changes(
_review(state=state), headsha=HEAD
)
)
def test_rejects_case_coerced_request_changes_states(self):
"""EXACT-ENUM fail-closed: a lowercase/mixed-case "request_changes"
must NOT be coerced into an open-block match. Before the exact-enum
fix, .upper() accepted these as REQUEST_CHANGES."""
for state in (
"request_changes", "Request_Changes", "REQUEST_CHANGES ",
" REQUEST_CHANGES", "request_changes\n",
):
with self.subTest(state=state):
self.assertFalse(
is_open_request_changes(
_review(state=state), headsha=HEAD
),
f"case-coerced/padded state {state!r} must NOT count as "
"an open REQUEST_CHANGES",
)
def test_rejects_when_dismissed(self):
self.assertFalse(
is_open_request_changes(
_review(state="REQUEST_CHANGES", dismissed=True), headsha=HEAD
)
)
def test_rejects_when_stale_head(self):
self.assertFalse(
is_open_request_changes(
_review(state="REQUEST_CHANGES", commit_id=OTHER_HEAD),
headsha=HEAD,
)
)
def test_rejects_when_missing_commit_id(self):
for bad in [None, "", 0]:
with self.subTest(commit_id=bad):
self.assertFalse(
is_open_request_changes(
_review(state="REQUEST_CHANGES", commit_id=bad),
headsha=HEAD,
)
)
# ---------------------------------------------------------------------------
# classify_reviews — the merge-queue consumer
# ---------------------------------------------------------------------------
class ClassifyReviewsContract(unittest.TestCase):
def test_basic_approvers_and_request_changes(self):
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, {"alice"})
self.assertEqual(request_changes, ["bob"])
def test_reviewer_set_filters_early(self):
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="carol", state="APPROVED", commit_id=HEAD),
]
approvers, _ = classify_reviews(
reviews, headsha=HEAD, reviewer_set={"alice"}
)
self.assertEqual(approvers, {"alice"})
def test_latest_review_per_user_wins(self):
# alice's REQUEST_CHANGES (latest) supersedes her earlier APPROVED.
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="alice", state="REQUEST_CHANGES", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertNotIn("alice", approvers)
self.assertIn("alice", request_changes)
def test_stale_head_approval_excluded(self):
reviews = [
_review(user="alice", state="APPROVED", commit_id=OTHER_HEAD),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, set())
def test_missing_commit_id_approval_excluded(self):
"""The SEV-1 fail-open surface. APPROVED + no commit_id → must NOT
count toward approvers, even with stale=False/dismissed=False."""
reviews = [
_review(user="alice", state="APPROVED", commit_id=None),
_review(user="bob", state="APPROVED", commit_id=""),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, set())
def test_dismissed_approval_excluded(self):
reviews = [
_review(user="alice", state="APPROVED", dismissed=True, commit_id=HEAD),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, set())
def test_non_official_approval_excluded(self):
reviews = [
_review(user="alice", state="APPROVED", official=False, commit_id=HEAD),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, set())
def test_comment_state_excluded(self):
reviews = [
_review(user="alice", state="COMMENT", commit_id=HEAD),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, set())
def test_case_coerced_approved_not_counted(self):
"""EXACT-ENUM via the reducer: a lowercase 'approved' (otherwise
valid official current-head row) must NOT be counted as an approver.
Before the fix, classify_reviews coerced it via .upper()."""
for state in ("approved", "Approved", "APPROVED "):
with self.subTest(state=state):
reviews = [
_review(user="alice", state=state, commit_id=HEAD),
]
approvers, request_changes = classify_reviews(
reviews, headsha=HEAD
)
self.assertEqual(approvers, set())
self.assertEqual(request_changes, [])
def test_case_coerced_request_changes_not_silently_dropped(self):
"""EXACT-ENUM via the reducer: a lowercase 'request_changes' must be
rejected (not coerced into a block). Crucially, it must NOT silently
erase a SAME-USER genuine current-head REQUEST_CHANGES posted
earlier — the case-variant later row is invalid and is ignored, so
the genuine block stands."""
reviews = [
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
_review(user="bob", state="request_changes", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertIn("bob", request_changes)
self.assertNotIn("bob", approvers)
def test_stale_head_request_changes_excluded(self):
# A REQUEST_CHANGES on a previous head must NOT block the current head.
reviews = [
_review(user="bob", state="REQUEST_CHANGES", commit_id=OTHER_HEAD),
]
_, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(request_changes, [])
# -----------------------------------------------------------------
# VALIDATE-BEFORE-REDUCE regression tests (SEV-1 internal#812 follow-up).
#
# The bug: classify_reviews reduced to the LATEST row per user FIRST and
# validated AFTER. A later INVALID row (a COMMENT, or APPROVED/
# REQUEST_CHANGES with a null/old commit_id) from the same user could
# overwrite a genuine current-head review — masking an approval or
# ERASING a REQUEST_CHANGES block. The fix validates before the reduce,
# so an invalid later row is never eligible to be a user's "latest".
# -----------------------------------------------------------------
def test_genuine_approval_not_masked_by_later_comment(self):
"""A genuine current-head APPROVED followed by a LATER COMMENT from
the SAME user must STILL count as an approval. A later non-
APPROVED/RC row (COMMENT) must not erase the approval. This is the
reduce-before-validate masking bug."""
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="alice", state="COMMENT", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertIn("alice", approvers)
self.assertEqual(request_changes, [])
def test_genuine_approval_not_masked_by_later_null_commit_id(self):
"""A genuine current-head APPROVED followed by a LATER APPROVED with
a null commit_id (the spoof/invalid signature) from the SAME user
must STILL count. The invalid later row must be ignored, not allowed
to overwrite the valid earlier approval."""
for bad in [None, ""]:
with self.subTest(commit_id=bad):
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="alice", state="APPROVED", commit_id=bad),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertIn(
"alice", approvers,
f"later invalid commit_id={bad!r} must not mask the "
"genuine current-head approval",
)
def test_genuine_approval_not_masked_by_later_stale_commit_id(self):
"""A genuine current-head APPROVED followed by a LATER APPROVED on a
STALE (old) head from the SAME user must STILL count toward
approvers — the stale later row is invalid and must be ignored."""
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="alice", state="APPROVED", commit_id=OTHER_HEAD),
]
approvers, _ = classify_reviews(reviews, headsha=HEAD)
self.assertIn("alice", approvers)
def test_request_changes_not_erased_by_later_comment(self):
"""A genuine current-head REQUEST_CHANGES followed by a LATER COMMENT
from the SAME user must STILL block. The later invalid row must not
erase the REQUEST_CHANGES — this is the worse, silently-evaporating-
block variant of the bug."""
reviews = [
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
_review(user="bob", state="COMMENT", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertIn("bob", request_changes)
self.assertNotIn("bob", approvers)
def test_request_changes_not_erased_by_later_null_commit_id(self):
"""A genuine current-head REQUEST_CHANGES followed by a LATER
REQUEST_CHANGES with a null/old commit_id from the SAME user must
STILL block. The invalid later row must be ignored, not allowed to
relocate the user's verdict off the current head."""
for bad in [None, "", OTHER_HEAD]:
with self.subTest(commit_id=bad):
reviews = [
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
_review(user="bob", state="REQUEST_CHANGES", commit_id=bad),
]
_, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertIn(
"bob", request_changes,
f"later invalid commit_id={bad!r} must not erase the "
"genuine current-head REQUEST_CHANGES block",
)
def test_request_changes_not_erased_by_later_approved_invalid(self):
"""A genuine current-head REQUEST_CHANGES followed by a LATER
INVALID APPROVED (null commit_id) from the SAME user must STILL
block AND must NOT count the user as an approver. The invalid
approval must not flip a real block into a pass."""
reviews = [
_review(user="bob", state="REQUEST_CHANGES", commit_id=HEAD),
_review(user="bob", state="APPROVED", commit_id=None),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertIn("bob", request_changes)
self.assertNotIn("bob", approvers)
def test_genuine_request_changes_still_supersedes_genuine_approval(self):
"""Sanity: a genuine LATER current-head REQUEST_CHANGES still
supersedes an earlier genuine APPROVED from the same user (the
valid-row supersession we MUST preserve — only INVALID later rows
are ignored). Guards against an over-correction that ignores all
later rows."""
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="alice", state="REQUEST_CHANGES", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertNotIn("alice", approvers)
self.assertIn("alice", request_changes)
def test_genuine_approval_still_supersedes_genuine_request_changes(self):
"""Sanity: a genuine LATER current-head APPROVED supersedes an
earlier genuine REQUEST_CHANGES from the same user."""
reviews = [
_review(user="alice", state="REQUEST_CHANGES", commit_id=HEAD),
_review(user="alice", state="APPROVED", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertIn("alice", approvers)
self.assertEqual(request_changes, [])
def test_two_valid_approvers_plus_one_invalid_later_row(self):
"""Two distinct users with valid current-head approvals + a third
user whose ONLY genuine approval is followed by an invalid later
row → all three real approvers are counted; the invalid later row
does not drop the third user."""
reviews = [
_review(user="alice", state="APPROVED", commit_id=HEAD),
_review(user="bob", state="APPROVED", commit_id=HEAD),
_review(user="carol", state="APPROVED", commit_id=HEAD),
_review(user="carol", state="COMMENT", commit_id=HEAD),
]
approvers, request_changes = classify_reviews(reviews, headsha=HEAD)
self.assertEqual(approvers, {"alice", "bob", "carol"})
self.assertEqual(request_changes, [])
# ---------------------------------------------------------------------------
# Mutation-resistance smoke checks
#
# These tests document the mutations a reviewer would have to apply to
# weaken the gate. They are not synthetic; they verify that the
# predicate is structured so each known-softening mutation would also
# fail at least one other test in this file. We can't actually mutate
# the source in CI, but these tests are explicit about the mutations
# that would slip through, and the suite is dense enough that any
# loosening of the predicate will fail multiple cases.
# ---------------------------------------------------------------------------
class MutationResistance(unittest.TestCase):
def test_documented_mutation_remove_commit_id_check_fails(self):
"""If a reviewer removes the commit_id check (e.g., reverts to
the pre-fix `if isinstance(commit_id, str) and commit_id and
headsha:` guard, or replaces `commit_id != headsha` with True),
the missing-commit_id tests above (test_rejects_when_commit_id_missing
in IsOfficialCurrentHeadFailClosed, test_rejects_missing_commit_id_approval
in IsGenuineApprovalContract, test_missing_commit_id_approval_excluded
in ClassifyReviewsContract) would all fail. The reviewer would have
to weaken all three test categories to slip the SEV-1 surface in."""
# Sanity: every missing-commit_id case is a False today.
for bad in [None, "", 0, False]:
with self.subTest(commit_id=bad):
self.assertFalse(
is_official_current_head(_review(commit_id=bad), HEAD)
)
self.assertFalse(
is_genuine_approval(
_review(commit_id=bad), headsha=HEAD
)
)
def test_documented_mutation_change_neq_to_eq_fails(self):
"""If a reviewer changes `commit_id != headsha` to `commit_id == headsha`
in the head comparison (inverting the check), the stale-head tests
(test_rejects_when_commit_id_stale, test_stale_head_approval_excluded)
would fail because the wrong head would now match."""
self.assertFalse(
is_official_current_head(_review(commit_id=OTHER_HEAD), HEAD)
)
def test_documented_mutation_drop_official_check_fails(self):
"""If a reviewer drops the `if not review.get('official')` check, the
non-official tests (test_rejects_when_official_is_false,
test_rejects_non_official_approval, test_non_official_approval_excluded)
would all fail."""
self.assertFalse(
is_genuine_approval(
_review(state="APPROVED", official=False), headsha=HEAD
)
)
if __name__ == "__main__":
unittest.main()
@@ -1,193 +0,0 @@
#!/usr/bin/env bash
# test_audit_force_merge.sh — regression lock for audit-force-merge fail-closed
# behavior. Verifies every schema validation path via direct jq filter tests.
#
# Usage: bash test_audit_force_merge.sh
set -euo pipefail
fail() { echo "FAIL: $*" >&2; exit 1; }
pass() { echo "PASS: $*"; }
[ -x "$(command -v jq)" ] || { echo "SKIP: jq not on PATH"; exit 0; }
HEAD_SHA="deadbeef00000000000000000000000000000000"
# The schema validation jq expression from audit-force-merge.sh.
validate_pr_schema() {
jq -r '
(.merged | type == "boolean") and
(.merge_commit_sha | type == "string") and
(.merged_by | type == "object") and (.merged_by.login | type == "string") and
(.base | type == "object") and (.base.ref | type == "string") and
(.head | type == "object") and (.head.sha | type == "string")
'
}
validate_statuses_type() {
jq -r '(.statuses | type) == "array"'
}
# T1 — valid PR payload → true
T1=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T1" = "true" ] || fail "T1: valid payload should pass schema"
pass "T1: valid payload passes schema"
# T2 — merged=false (valid types) → true (schema is about types, not values)
T2=$(echo '{"merged":false,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T2" = "true" ] || fail "T2: merged=false with valid types should pass schema"
pass "T2: merged=false with valid types passes schema"
# T3 — missing merged field → false
T3=$(echo '{"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T3" = "false" ] || fail "T3: missing merged should fail schema"
pass "T3: missing merged fails schema"
# T4 — merged is string "true" instead of boolean → false
T4=$(echo '{"merged":"true","merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T4" = "false" ] || fail "T4: merged as string should fail schema"
pass "T4: merged as string fails schema"
# T5 — merge_commit_sha is null → false
T5=$(echo '{"merged":true,"merge_commit_sha":null,"merged_by":{"login":"u"},"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T5" = "false" ] || fail "T5: null merge_commit_sha should fail schema"
pass "T5: null merge_commit_sha fails schema"
# T6 — merged_by is null → false
T6=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":null,"base":{"ref":"main"},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T6" = "false" ] || fail "T6: null merged_by should fail schema"
pass "T6: null merged_by fails schema"
# T7 — base.ref is number → false
T7=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":123},"head":{"sha":"def"}}' | validate_pr_schema)
[ "$T7" = "false" ] || fail "T7: numeric base.ref should fail schema"
pass "T7: numeric base.ref fails schema"
# T8 — head is missing → false
T8=$(echo '{"merged":true,"merge_commit_sha":"abc","merged_by":{"login":"u"},"base":{"ref":"main"}}' | validate_pr_schema)
[ "$T8" = "false" ] || fail "T8: missing head should fail schema"
pass "T8: missing head fails schema"
# T9 — statuses missing → false
T9=$(echo '{}' | validate_statuses_type)
[ "$T9" = "false" ] || fail "T9: missing statuses should fail type check"
pass "T9: missing statuses fails type check"
# T10 — statuses is string → false
T10=$(echo '{"statuses":"unexpected"}' | validate_statuses_type)
[ "$T10" = "false" ] || fail "T10: string statuses should fail type check"
pass "T10: string statuses fails type check"
# T11 — statuses is null → false
T11=$(echo '{"statuses":null}' | validate_statuses_type)
[ "$T11" = "false" ] || fail "T11: null statuses should fail type check"
pass "T11: null statuses fails type check"
# T12 — statuses is array → true
T12=$(echo '{"statuses":[{"context":"c1","status":"success"}]}' | validate_statuses_type)
[ "$T12" = "true" ] || fail "T12: array statuses should pass type check"
pass "T12: array statuses passes type check"
# T13 — empty array statuses → true
T13=$(echo '{"statuses":[]}' | validate_statuses_type)
[ "$T13" = "true" ] || fail "T13: empty array statuses should pass type check"
pass "T13: empty array statuses passes type check"
# T14-T16: REQUIRED_CHECKS_JSON branch entry validation
validate_required_checks_json() {
local branch="$1"
local json="$2"
echo "$json" | jq -r --arg branch "$branch" 'has($branch) and (.[$branch] | type == "array")'
}
# T14 — branch exists and is array → true
T14=$(validate_required_checks_json "main" '{"main":["CI / all-required"]}')
[ "$T14" = "true" ] || fail "T14: existing array branch should pass"
pass "T14: existing array branch passes"
# T15 — branch missing → false
T15=$(validate_required_checks_json "staging" '{"main":["CI / all-required"]}')
[ "$T15" = "false" ] || fail "T15: missing branch should fail"
pass "T15: missing branch fails"
# T16 — branch entry is string instead of array → false
T16=$(validate_required_checks_json "main" '{"main":"CI / all-required"}')
[ "$T16" = "false" ] || fail "T16: string branch entry should fail"
pass "T16: string branch entry fails"
# ---------------------------------------------------------------------------
# T17+ — /statuses pagination (status-pagination RCA, #2440-family).
# The reader now pages /commits/{sha}/statuses to exhaustion instead of reading
# the capped combined /status view. These lock the page-accumulation,
# newest-wins collapse, short-page stop, and fail-closed contracts.
# ---------------------------------------------------------------------------
# Page-body type validator used per page (bare array, not an object).
validate_page_is_array() { jq -e 'type == "array"' >/dev/null 2>&1 && echo true || echo false; }
# newest-wins collapse: mirror the script's max-by-id jq (order-independent).
collapse_newest_per_context() {
declare -A CS
while IFS=$'\t' read -r ctx state; do
[ -n "$ctx" ] && CS[$ctx]="$state"
done < <(jq -r 'group_by(.context) | map(max_by(.id)) | .[] | "\(.context)\t\(.status)"')
state="${CS[CI / all-required (push)]:-missing}"
echo "$state"
}
# T17 — a bare JSON array page passes the per-page array check.
T17=$(echo '[{"context":"c1","status":"success"}]' | validate_page_is_array)
[ "$T17" = "true" ] || fail "T17: bare array page should pass array check"
pass "T17: bare array page passes array check"
# T18 — a non-array page (object) fails the per-page array check → fail-closed.
T18=$(echo '{"statuses":[]}' | validate_page_is_array)
[ "$T18" = "false" ] || fail "T18: object page should fail array check (fail-closed)"
pass "T18: object page fails array check (fail-closed)"
# T19 — required SUCCESS on PAGE 2 is FOUND after accumulation (not missing).
# page1: 100 noise rows (older ids); page2: the required-context success.
PAGE1=$(jq -nc '[range(0;100) | {id:., context:("noise-\(.) (push)"), status:"pending"}]')
PAGE2='[{"id":200,"context":"CI / all-required (push)","status":"success"}]'
# Accumulation matching the script: two-arg `jq -s '.[0] + .[1]'` over the
# running accumulator and the new page.
ACCUM=$(jq -s '.[0] + .[1]' <(echo "$PAGE1") <(echo "$PAGE2"))
LEN=$(echo "$ACCUM" | jq 'length')
[ "$LEN" = "101" ] || fail "T19: accumulated length should be 101, got $LEN"
RESULT=$(echo "$ACCUM" | collapse_newest_per_context)
[ "$RESULT" = "success" ] || fail "T19: required success on page2 must be FOUND, got '$RESULT'"
pass "T19: required success on page2 is found after pagination"
# T20 — genuinely-absent required context across all pages stays 'missing'
# → fail-closed (counted as not-green, flags the force-merge).
ABSENT=$(jq -nc '[range(0;100) | {id:., context:("noise-\(.) (push)"), status:"success"}]')
RESULT2=$(echo "$ABSENT" | collapse_newest_per_context)
[ "$RESULT2" = "missing" ] || fail "T20: absent required context must stay 'missing', got '$RESULT2'"
pass "T20: genuinely-absent required context stays missing (fail-closed)"
# T21 — non-monotonic order: newest id (157, neither first nor last in list)
# a NEWER success row (oldest-first append → last overwrite wins).
DUP='[{"id":155,"context":"CI / all-required (push)","status":"pending"},
{"id":157,"context":"CI / all-required (push)","status":"success"},
{"id":125,"context":"CI / all-required (push)","status":"failure"}]'
RESULT3=$(echo "$DUP" | collapse_newest_per_context)
[ "$RESULT3" = "success" ] || fail "T21: newest (success) must win over older (failure), got '$RESULT3'"
pass "T21: newest row per context wins after pagination collapse"
# T22 — short-page stop condition: a page with fewer than PER_PAGE rows ends
# the loop. Emulate the numeric comparison the script uses.
PER_PAGE=100
PAGE_COUNT=$(echo "$PAGE2" | jq 'length') # 1 row
if [ "$PAGE_COUNT" -lt "$PER_PAGE" ]; then SHORT=stop; else SHORT=continue; fi
[ "$SHORT" = "stop" ] || fail "T22: short page should stop pagination"
pass "T22: short page stops pagination loop"
# T23 — a full page (== PER_PAGE) continues the loop.
FULL=$(jq -nc '[range(0;100) | {id:., context:"x", status:"success"}]')
FULL_COUNT=$(echo "$FULL" | jq 'length')
if [ "$FULL_COUNT" -lt "$PER_PAGE" ]; then CONT=stop; else CONT=continue; fi
[ "$CONT" = "continue" ] || fail "T23: full page should continue pagination"
pass "T23: full page continues pagination loop"
echo
echo "ALL AUDIT-FORCE-MERGE CHECKS PASSED"
@@ -107,36 +107,6 @@ def test_required_checks_env_json_malformed_fails():
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_non_string_item_fails():
doc = _make_audit_doc_json({"main": ["ctx-a", 123, "ctx-b"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_empty_string_item_fails():
doc = _make_audit_doc_json({"main": ["ctx-a", " ", "ctx-b"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
def test_required_checks_env_json_duplicate_context_fails():
doc = _make_audit_doc_json({"main": ["ctx-a", "ctx-b", "ctx-a"]})
try:
drift.required_checks_env(doc, "main")
except SystemExit as exc:
assert exc.code == 3
else:
raise AssertionError("expected SystemExit(3)")
# ---------------------------------------------------------------------------
# sentinel_needs
# ---------------------------------------------------------------------------
@@ -50,15 +50,15 @@ class TestQaReviewDirectTrigger:
"pull_request_review must include 'submitted' type"
)
def test_job_guard_has_no_review_state_check(self):
def test_job_guard_requires_approved_state(self):
wf = load_workflow("qa-review.yml")
guard = _job_guard_string(wf)
assert "github.event.review.state" not in guard, (
"job guard must NOT check review.state (#2159: Gitea 1.22.6 payload unreliable); "
"evaluator (review-check.sh) verifies actual APPROVE via API"
assert "github.event.review.state == 'APPROVED'" in guard, (
"job guard must check review.state for 'APPROVED'"
)
assert "github.event.review.state == 'approved'" in guard, (
"job guard must check review.state for 'approved' (case fallback per #2135)"
)
assert "github.event_name == 'pull_request_target'" in guard
assert "github.event_name == 'pull_request_review'" in guard
def test_post_step_uses_status_post_token(self):
wf = load_workflow("qa-review.yml")
@@ -91,15 +91,15 @@ class TestSecurityReviewDirectTrigger:
"pull_request_review must include 'submitted' type"
)
def test_job_guard_has_no_review_state_check(self):
def test_job_guard_requires_approved_state(self):
wf = load_workflow("security-review.yml")
guard = _job_guard_string(wf)
assert "github.event.review.state" not in guard, (
"job guard must NOT check review.state (#2159: Gitea 1.22.6 payload unreliable); "
"evaluator (review-check.sh) verifies actual APPROVE via API"
assert "github.event.review.state == 'APPROVED'" in guard, (
"job guard must check review.state for 'APPROVED'"
)
assert "github.event.review.state == 'approved'" in guard, (
"job guard must check review.state for 'approved' (case fallback per #2135)"
)
assert "github.event_name == 'pull_request_target'" in guard
assert "github.event_name == 'pull_request_review'" in guard
def test_post_step_uses_status_post_token(self):
wf = load_workflow("security-review.yml")
@@ -153,7 +153,7 @@ class TestRefireTokenSeparation:
"qa refire must receive STATUS_POST_TOKEN env var"
)
# Evaluator stays on read token
assert "SOP_CHECKLIST_GATE_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
assert "SOP_TIER_CHECK_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
"qa refire evaluator must stay on read-scoped token"
)
@@ -163,6 +163,6 @@ class TestRefireTokenSeparation:
assert env.get("STATUS_POST_TOKEN") == "${{ secrets.STATUS_POST_TOKEN }}", (
"security refire must receive STATUS_POST_TOKEN env var"
)
assert "SOP_CHECKLIST_GATE_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
assert "SOP_TIER_CHECK_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
"security refire evaluator must stay on read-scoped token"
)
+41 -444
View File
@@ -14,49 +14,49 @@ spec.loader.exec_module(mq)
def test_latest_statuses_dedupes_by_context_newest_first():
statuses = [
{"context": "CI / all-required (pull_request)", "status": "failure"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "state": "success"},
{"context": "sop-checklist / all-items-acked (pull_request)", "state": "success"},
{"context": "CI / all-required (pull_request)", "status": "success"},
]
latest = mq.latest_statuses_by_context(statuses)
assert latest["CI / all-required (pull_request)"]["status"] == "failure"
assert latest["sop-checklist / all-items-acked (pull_request_target)"]["state"] == "success"
assert latest["sop-checklist / all-items-acked (pull_request)"]["state"] == "success"
def test_required_contexts_green_rejects_missing_and_pending():
latest = mq.latest_statuses_by_context([
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "pending"},
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "pending"},
])
ok, missing_or_bad = mq.required_contexts_green(
latest,
[
"CI / all-required (pull_request)",
"sop-checklist / all-items-acked (pull_request_target)",
"qa-review / approved (pull_request_target)",
"sop-checklist / all-items-acked (pull_request)",
"qa-review / approved (pull_request)",
],
)
assert ok is False
assert missing_or_bad == [
"sop-checklist / all-items-acked (pull_request_target)=pending",
"qa-review / approved (pull_request_target)=missing",
"sop-checklist / all-items-acked (pull_request)=pending",
"qa-review / approved (pull_request)=missing",
]
def test_required_contexts_green_rejects_volume_skipped():
def test_required_contexts_green_rejects_volume_skipped_even_for_tier_low():
"""volume-skipped pending is a partial view, not a genuine soft-fail.
Per sop-checklist.py:1179-1187, volume_skipped posts pending with a
'[volume-skipped]' prefix. The merge queue must NOT treat this as an
acceptable soft-fail — the gate did not finish evaluating.
acceptable soft-fail for tier:low — the gate did not finish evaluating.
"""
latest = mq.latest_statuses_by_context([
{"context": "CI / all-required (pull_request)", "status": "success"},
{
"context": "sop-checklist / all-items-acked (pull_request_target)",
"context": "sop-checklist / all-items-acked (pull_request)",
"status": "pending",
"description": "[volume-skipped] comment-cap=1000 hit; please file ...",
},
@@ -66,12 +66,13 @@ def test_required_contexts_green_rejects_volume_skipped():
latest,
[
"CI / all-required (pull_request)",
"sop-checklist / all-items-acked (pull_request_target)",
"sop-checklist / all-items-acked (pull_request)",
],
pr_labels={"tier:low"},
)
assert ok is False
assert "sop-checklist / all-items-acked (pull_request_target)=pending" in missing_or_bad
assert "sop-checklist / all-items-acked (pull_request)=pending" in missing_or_bad
def test_choose_next_pr_sorts_by_queue_label_timestamp_then_number():
@@ -113,13 +114,7 @@ def test_pr_needs_update_when_base_sha_absent_from_commits():
def _ready_kwargs(**overrides):
"""Default kwargs for a fully-ready merge; override per test.
Includes the uniform governance checks (qa-review, security-review,
sop-checklist) as required contexts and green statuses, matching the
behaviour of process_once which merges GOVERNANCE_REQUIRED_CONTEXTS
with branch-protection contexts.
"""
"""Default kwargs for a fully-ready merge; override per test."""
base = dict(
main_status={
"state": "success",
@@ -127,19 +122,9 @@ def _ready_kwargs(**overrides):
},
pr_status={
"state": "success",
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
],
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}],
},
required_contexts=[
"CI / all-required (pull_request)",
"qa-review / approved (pull_request_target)",
"security-review / approved (pull_request_target)",
"sop-checklist / all-items-acked (pull_request_target)",
],
required_contexts=["CI / all-required (pull_request)"],
required_approvals=2,
approvers={"agent-reviewer-cr2", "agent-researcher"},
request_changes=[],
@@ -158,72 +143,13 @@ def test_merge_decision_requires_main_green_pr_green_and_current_base():
assert decision.force is False # no non-required reds present
def test_behind_main_but_mergeable_pr_merges_directly():
"""§SOP-22 (#2358): a behind-main but CONFLICT-FREE PR (mergeable is True)
merges DIRECTLY — no update step. Branch protection does not require strict
up-to-date, and calling /update would dismiss the genuine approvals
(dismiss_stale_approvals), forcing re-review every tick (the throughput
bottleneck). This replaces the old update-before-merge behavior."""
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_has_current_base=False, mergeable=True)
)
assert decision.ready is True
assert decision.action == "merge"
def test_behind_main_and_not_mergeable_pr_updates():
"""The /update path is reached ONLY when the PR is NOT mergeable AND its head
lacks current main — refreshing the branch may resolve a behind-main
non-conflict; a real conflict 409s and is held (#2352)."""
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_has_current_base=False, mergeable=False)
)
def test_merge_decision_updates_stale_pr_before_merge():
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_has_current_base=False))
assert decision.ready is False
assert decision.action == "update"
def test_current_base_but_not_mergeable_pr_waits():
"""Up-to-date with main yet Gitea reports not-mergeable → genuine conflict
against current main (or still computing). The queue cannot act: WAIT,
never update (update would not help) and never merge (fail-closed)."""
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_has_current_base=True, mergeable=False)
)
assert decision.ready is False
assert decision.action == "wait"
assert "not mergeable" in decision.reason
def test_behind_main_and_mergeable_none_waits_not_update():
"""§SOP-22 (CR2 #2374) — the churn-residual fix. A BEHIND-MAIN PR whose
mergeability Gitea is STILL COMPUTING (mergeable is None) must WAIT, NOT take
the /update path. The old code collapsed None→False, so a behind-main +
None PR returned action="update" → /pulls/{n}/update → dismiss_stale_approvals
→ the exact rebase-churn this change eliminates, fired during the compute
window. None and False are now DISTINCT: None waits, False updates."""
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_has_current_base=False, mergeable=None)
)
assert decision.ready is False
assert decision.action == "wait" # NOT "update" — no churn during compute
assert "computed" in decision.reason
def test_current_base_and_mergeable_none_waits():
"""Up-to-date with main + mergeable None (still computing) → WAIT (unchanged
fail-closed; just confirming None is never merged regardless of base)."""
decision = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_has_current_base=True, mergeable=None)
)
assert decision.ready is False
assert decision.action == "wait"
def test_MergePermissionError_inherits_from_ApiError():
assert issubclass(mq.MergePermissionError, mq.ApiError)
@@ -248,7 +174,7 @@ def test_genuine_approvals_counts_two_distinct_on_current_head():
{"state": "APPROVED", "user": {"login": "agent-reviewer-cr2"},
"official": True, "stale": False, "dismissed": False, "commit_id": "HEAD"},
]
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
assert approvers == {"agent-researcher", "agent-reviewer-cr2"}
assert rc == []
@@ -265,7 +191,7 @@ def test_genuine_approvals_ignores_stale_dismissed_and_wrong_head():
{"state": "APPROVED", "user": {"login": "agent-reviewer"},
"official": True, "stale": False, "dismissed": False, "commit_id": "OLD"},
]
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
assert approvers == set()
assert rc == []
@@ -279,7 +205,7 @@ def test_genuine_approvals_ignores_unofficial_and_outsiders():
{"state": "APPROVED", "user": {"login": "hongming-codex-laptop"},
"official": True, "stale": False, "dismissed": False, "commit_id": "HEAD"},
]
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
assert approvers == set()
@@ -291,7 +217,7 @@ def test_genuine_approvals_latest_review_supersedes_earlier():
{"state": "REQUEST_CHANGES", "user": {"login": "agent-reviewer-cr2"},
"official": True, "stale": False, "dismissed": False, "commit_id": "HEAD"},
]
approvers, rc = mq.genuine_approvals(reviews, headsha="HEAD", reviewer_set=REVIEWERS)
approvers, rc = mq.genuine_approvals(reviews, head_sha="HEAD", reviewer_set=REVIEWERS)
assert approvers == set()
assert rc == ["agent-reviewer-cr2"]
@@ -314,56 +240,16 @@ def test_merge_blocked_when_insufficient_genuine_approvals():
assert "insufficient genuine approvals" in decision.reason
def test_governance_red_blocks_merge():
# Uniform gate: qa-review, security-review, sop-checklist are ALWAYS
# required. If any of them fail/pending, the PR is blocked.
pr_status = {
"state": "failure",
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "failure"},
{"context": "security-review / approved (pull_request_target)", "status": "pending"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "failure"},
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
],
}
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
assert decision.ready is False
assert decision.action == "wait"
assert "required contexts not green" in decision.reason
def test_non_required_red_does_not_block_merge():
# Uniform gate flip (CTO #2407): qa-review, security-review, sop-checklist
# are REQUIRED for ALL PRs. A PR with these failing/pending must NOT be
# force-mergeable, even if BP-required CI is green and approvals are genuine.
# Required (CI) green; non-required governance reds present → still merge,
# and force is set so force_merge bypasses ONLY those non-required reds.
pr_status = {
"state": "failure",
"state": "failure", # combined polluted by non-required reds
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request)", "status": "failure"},
{"context": "security-review / approved (pull_request)", "status": "pending"},
{"context": "sop-checklist / all-items-acked (pull_request)", "status": "failure"},
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
],
}
decision = mq.evaluate_merge_readiness(**_ready_kwargs(pr_status=pr_status))
assert decision.ready is False
assert decision.action == "wait"
assert "required contexts not green" in decision.reason
assert decision.force is False
def test_non_required_advisory_red_does_not_block_merge():
# Governance checks are green; only advisory non-required reds (Staging SaaS)
# are present → PR is still mergeable with force_merge bypassing the advisory.
pr_status = {
"state": "failure", # combined polluted by advisory non-required reds
"statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
{"context": "sop-tier-check / tier-check (pull_request)", "status": "failure"},
{"context": "Staging SaaS / e2e (pull_request)", "status": "failure"},
],
}
@@ -467,14 +353,8 @@ def test_process_once_holds_pr_on_permanent_merge_error(monkeypatch):
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
def fake_combined(sha):
if sha == main_sha:
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
]}
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
@@ -540,14 +420,8 @@ def _fully_ready_process_once_monkeypatch(monkeypatch, mergeable, calls):
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
def fake_combined(sha):
if sha == main_sha:
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
]}
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: [
@@ -632,131 +506,6 @@ def test_process_once_merges_when_mergeable_is_true(monkeypatch):
assert calls["hold_label"] is None
def test_process_once_behind_main_mergeable_none_waits_no_update(monkeypatch):
"""§SOP-22 (CR2 #2374) — end-to-end churn-residual regression. A BEHIND-MAIN
PR (commits do NOT contain main_sha) whose mergeability Gitea is STILL
COMPUTING (mergeable=None) must WAIT: process_once returns 0 and NEVER calls
update_pull (which dismisses genuine approvals via dismiss_stale_approvals)
NOR merge_pull NOR hold. The old None→False collapse routed this exact case
into the /update path → approval-dismissing rebase churn during the compute
window. This proves the durable churn elimination: no update, approvals
preserved, re-checked next tick."""
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=None, calls=calls)
# Make the head BEHIND main: commits do NOT contain main_sha. This is the
# case the bug missed (the prior None test had current base, masking it).
behind_head = "a" * 40
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
rc = mq.process_once(dry_run=False)
assert rc == 0
assert calls["updated"] is False # NO /update → approvals NOT dismissed
assert calls["merge_attempts"] == 0 # never merge on an unknown
assert calls["hold_label"] is None # transient → not held, retried next tick
# --------------------------------------------------------------------------
# §SOP-22: DIRECT-MERGE throughput fix (#2358). A conflict-free 2-genuine PR
# merges WITHOUT a pre-merge /update call, so its approvals are NOT dismissed by
# dismiss_stale_approvals. The merge bar (2-genuine-on-current-head +
# BP-required green + mergeable + no RC + opt-out) is UNCHANGED; only the
# unnecessary update-before-merge churn is removed. The /update path survives
# for the genuine case it is needed (not-mergeable + behind-main), where a real
# conflict 409s and is held per #2352. mergeable=None stays fail-closed.
# --------------------------------------------------------------------------
def test_process_once_merges_conflict_free_pr_without_update(monkeypatch):
"""§SOP-22(a) — the core throughput fix. A conflict-free, fully-approved PR
merges WITHOUT update_pull ever being called. The old behavior called
/update first whenever the head lacked current main, which dismissed the 2
genuine approvals (dismiss_stale_approvals) and forced re-review every tick.
Assert update_pull is NOT invoked and merge_pull IS invoked."""
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
# Make the head BEHIND main: commits do NOT contain main_sha. Under the old
# logic this alone forced an update_pull; under the fix it merges directly.
head_sha = "a" * 40
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
rc = mq.process_once(dry_run=False)
assert rc == 0
assert calls["merge_attempts"] == 1 # merged directly
assert calls["updated"] is False # NO update_pull → approvals NOT dismissed
assert calls["hold_label"] is None
def test_process_once_behind_main_conflict_free_merges_directly(monkeypatch):
"""§SOP-22(b) — explicit behind-main + conflict-free case: it still merges
directly (branch protection does not require strict up-to-date)."""
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
behind_head = "a" * 40
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": behind_head}])
rc = mq.process_once(dry_run=False)
assert rc == 0
assert calls["merge_attempts"] == 1
assert calls["updated"] is False
def test_process_once_pauses_when_main_not_green_no_direct_merge(monkeypatch):
"""§SOP-22 backstop — the serialized safety that makes direct-merge safe:
when main's required push contexts are NOT green (e.g. a prior direct merge
introduced a semantic main-break caught by post-merge main CI), the queue
PAUSES — it does NOT merge the next PR onto an unverified/red main."""
calls = {"merge_attempts": 0, "hold_label": None, "updated": False}
_fully_ready_process_once_monkeypatch(monkeypatch, mergeable=True, calls=calls)
main_sha = "b" * 40
def red_main_combined(sha):
if sha == main_sha:
return {"state": "failure",
"statuses": [{"context": "CI / all-required (push)", "status": "failure"}]}
return {"state": "success",
"statuses": [{"context": "CI / all-required (pull_request)", "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", red_main_combined)
rc = mq.process_once(dry_run=False)
assert rc == 0
assert calls["merge_attempts"] == 0 # paused — no merge onto red main
assert calls["updated"] is False
def test_direct_merge_bar_unchanged_behind_main(monkeypatch):
"""§SOP-22(d) — the merge bar is UNCHANGED on the new direct-merge path. A
behind-main + conflict-free PR is still rejected (no merge) when ANY gate
fails: insufficient genuine approvals, red required context, open
REQUEST_CHANGES, or opt-out label. Direct-merge removes the update churn, it
does NOT weaken the bar — fail-closed on every gate."""
head_sha = "a" * 40
behind_main = dict(pr_has_current_base=False, mergeable=True)
# <2 genuine approvals → wait, not merge.
d = mq.evaluate_merge_readiness(
**_ready_kwargs(approvers={"agent-researcher"}, **behind_main)
)
assert d.action == "wait" and d.ready is False
# Red required context → wait, not merge.
red_required = {"state": "failure", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "failure"}]}
d = mq.evaluate_merge_readiness(
**_ready_kwargs(pr_status=red_required, **behind_main)
)
assert d.action == "wait" and d.ready is False
# Open REQUEST_CHANGES on current head → wait, not merge.
d = mq.evaluate_merge_readiness(
**_ready_kwargs(request_changes=["agent-reviewer-cr2"], **behind_main)
)
assert d.action == "wait" and d.ready is False
# --------------------------------------------------------------------------
# Fix 3: status fetch is fail-closed (failed fetch != green)
# --------------------------------------------------------------------------
@@ -951,30 +700,20 @@ def _stale_pr_update_409_monkeypatch(monkeypatch, queued_issues, calls):
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
def fake_combined(sha):
if sha == main_sha:
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
]}
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
# Scan-loop process_once enumerates candidates via list_candidate_issues.
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: queued_issues)
monkeypatch.setattr(mq, "get_pull", lambda n: {
"state": "open", "number": n, "mergeable": False,
"state": "open", "number": n, "mergeable": True,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": head_sha, "repo_id": 1},
"labels": [{"name": "merge-queue"}],
})
# NOTE: mergeable is False (real conflict) AND commits do NOT contain
# main_sha → pr_has_current_base is False → decision.action == "update".
# Under the #2358 direct-merge fix the update path is reached ONLY when the
# PR is NOT mergeable; a mergeable=True behind-main PR would merge directly,
# so this fixture sets mergeable=False to exercise the #2352 409-on-update
# hold path.
# NOTE: commits do NOT contain main_sha → pr_has_current_base is False →
# decision.action == "update".
monkeypatch.setattr(mq, "get_pull_commits", lambda n: [{"sha": head_sha}])
monkeypatch.setattr(mq, "get_pull_reviews", lambda n: [
{"state": "APPROVED", "user": {"login": "agent-researcher"},
@@ -1203,7 +942,7 @@ def test_list_candidate_issues_omits_label_filter_when_auto_discover(monkeypatch
assert captured["query"].get("type") == "pulls"
mq.list_candidate_issues(auto_discover=False)
assert captured["query"].get("label") == "merge-queue"
assert captured["query"].get("labels") == "merge-queue"
def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
@@ -1226,16 +965,8 @@ def _wire_ready_process_once(monkeypatch, *, issues, pr_payload, calls):
monkeypatch.setattr(mq, "get_branch_head", lambda branch: main_sha)
def fake_combined(sha):
if sha == main_sha:
return {"state": "success", "statuses": [
{"context": "CI / all-required (push)", "status": "success"},
]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
]}
ctx = "CI / all-required (push)" if sha == main_sha else "CI / all-required (pull_request)"
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
monkeypatch.setattr(mq, "get_pull", lambda n: dict(pr_payload, number=n))
@@ -1416,14 +1147,8 @@ def _wire_multi_candidate_process_once(monkeypatch, *, issues, pulls, reviews, c
monkeypatch.setattr(mq, "get_branch_head", lambda branch: MAIN_SHA)
def fake_combined(sha):
if sha == MAIN_SHA:
return {"state": "success", "statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
return {"state": "success", "statuses": [
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
]}
ctx = "CI / all-required (push)" if sha == MAIN_SHA else "CI / all-required (pull_request)"
return {"state": "success", "statuses": [{"context": ctx, "status": "success"}]}
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
monkeypatch.setattr(mq, "list_candidate_issues", lambda *, auto_discover: issues)
@@ -1555,12 +1280,7 @@ def test_hol_unready_red_required_ci_is_skipped_for_ready_pr(monkeypatch):
"statuses": [{"context": "CI / all-required (push)", "status": "success"}]}
state = "failure" if sha == red_head else "success"
return {"state": state,
"statuses": [
{"context": "CI / all-required (pull_request)", "status": state},
{"context": "qa-review / approved (pull_request_target)", "status": "success"},
{"context": "security-review / approved (pull_request_target)", "status": "success"},
{"context": "sop-checklist / all-items-acked (pull_request_target)", "status": "success"},
]}
"statuses": [{"context": "CI / all-required (pull_request)", "status": state}]}
monkeypatch.setattr(mq, "get_combined_status", fake_combined)
rc = mq.process_once(dry_run=False)
@@ -1655,126 +1375,3 @@ def test_process_once_defensive_skip_when_pull_payload_opted_out(monkeypatch):
assert rc == 0
assert calls["merged"] is None
# ---------------------------------------------------------------------------
# readiness-enumeration + post-batch summary
# ---------------------------------------------------------------------------
def test_enumerate_readiness_evaluates_all_candidates(monkeypatch):
"""enumerate_readiness returns every candidate's state, not stopping at
the first actionable one."""
old_head, new_head = "a" * 40, "c" * 40
_wire_multi_candidate_process_once(
monkeypatch,
issues=[
_issue(500, labels=[], created="2026-06-01T01:00:00Z"),
_issue(501, labels=[], created="2026-06-01T02:00:00Z"),
],
pulls={
500: {"state": "open", "mergeable": False, "draft": False,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": old_head, "repo_id": 1}, "labels": []},
501: {"state": "open", "mergeable": True, "draft": False,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": new_head, "repo_id": 1}, "labels": []},
},
reviews={500: _two_approvals(old_head), 501: _two_approvals(new_head)},
calls={},
)
entries = mq.enumerate_readiness(dry_run=False)
assert len(entries) == 2
by_num = {e.pr_number: e for e in entries}
assert by_num[500].decision is not None
assert by_num[500].decision.ready is False
assert by_num[501].decision is not None
assert by_num[501].decision.ready is True
def test_enumerate_readiness_includes_ineligible_pr(monkeypatch):
"""enumerate_readiness marks fork / wrong-base PRs as ineligible
(decision=None) while still evaluating the rest."""
head = "a" * 40
_wire_multi_candidate_process_once(
monkeypatch,
issues=[
_issue(600, labels=[], created="2026-06-01T01:00:00Z"),
_issue(601, labels=[], created="2026-06-01T02:00:00Z"),
],
pulls={
600: {"state": "open", "mergeable": True, "draft": False,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": head, "repo_id": 2}, "labels": []}, # fork
601: {"state": "open", "mergeable": True, "draft": False,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": head, "repo_id": 1}, "labels": []},
},
reviews={600: _two_approvals(head), 601: _two_approvals(head)},
calls={},
)
entries = mq.enumerate_readiness(dry_run=False)
by_num = {e.pr_number: e for e in entries}
assert by_num[600].decision is None
assert "not merge-eligible" in by_num[600].reason
assert by_num[601].decision is not None
assert by_num[601].decision.ready is True
def test_enumerate_readiness_fail_closed_on_api_error(monkeypatch):
"""If get_pull raises for one candidate, that candidate is recorded as
unverifiable; other candidates are still evaluated."""
head = "a" * 40
_wire_multi_candidate_process_once(
monkeypatch,
issues=[
_issue(700, labels=[], created="2026-06-01T01:00:00Z"),
_issue(701, labels=[], created="2026-06-01T02:00:00Z"),
],
pulls={
700: {"state": "open", "mergeable": True, "draft": False,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": head, "repo_id": 1}, "labels": []},
701: {"state": "open", "mergeable": True, "draft": False,
"base": {"ref": "main", "repo_id": 1},
"head": {"sha": head, "repo_id": 1}, "labels": []},
},
reviews={700: _two_approvals(head), 701: _two_approvals(head)},
calls={},
)
original_get_pull = mq.get_pull
def failing_get_pull(n):
if n == 700:
raise mq.ApiError("simulated API failure")
return original_get_pull(n)
monkeypatch.setattr(mq, "get_pull", failing_get_pull)
entries = mq.enumerate_readiness(dry_run=False)
by_num = {e.pr_number: e for e in entries}
assert by_num[700].decision is None
assert "unverifiable" in by_num[700].reason
assert by_num[701].decision is not None
assert by_num[701].decision.ready is True
def test_print_post_batch_summary_counts_correctly(capsys):
entries = [
mq.ReadinessEntry(pr_number=1, decision=mq.MergeDecision(True, "merge", "ready"), reason="ready"),
mq.ReadinessEntry(pr_number=2, decision=mq.MergeDecision(False, "wait", "CI red"), reason="CI red"),
mq.ReadinessEntry(pr_number=3, decision=None, reason="draft"),
]
mq.print_post_batch_summary(entries)
captured = capsys.readouterr()
out = captured.out
assert "total_candidates=3" in out
assert "ready=1" in out
assert "waiting=1" in out
assert "ineligible/unverifiable=1" in out
assert "PR #1: state=ready" in out
assert "PR #2: state=waiting" in out
assert "PR #3: state=ineligible" in out
@@ -17,7 +17,7 @@ wd.REPO = "molecule-ai/molecule-core"
wd.OWNER = "molecule-ai"
wd.NAME = "molecule-core"
wd.WATCH_BRANCH = "main"
wd.RED_LABEL = "ci-bp-drift"
wd.RED_LABEL = "tier:high"
wd.API = "https://git.example.com/api/v1"
@@ -1,70 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# Anti-regression gate for #2403: fail if any SOP tier artifact reappears.
cd "$(dirname "$0")/../../.."
fail=0
# 1. Deleted workflow files must stay deleted
for f in .gitea/workflows/sop-tier-check.yml .gitea/workflows/sop-tier-refire.yml; do
if [ -e "$f" ]; then
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
fail=1
fi
done
# 2. Deleted script files must stay deleted
for f in .gitea/scripts/sop-tier-check.sh .gitea/scripts/sop-tier-refire.sh; do
if [ -e "$f" ]; then
echo "FAIL: $f was re-added (must stay deleted per #2403)" >&2
fail=1
fi
done
# 3. No tier branching logic in gate_check.py
if grep -qE '_get_pr_tier|TIER_AGENTS' tools/gate-check-v3/gate_check.py; then
echo "FAIL: tier branching reappeared in gate_check.py" >&2
fail=1
fi
# 4. No _is_tier_low_pending_ok in merge queue
if grep -q '_is_tier_low_pending_ok' .gitea/scripts/gitea-merge-queue.py; then
echo "FAIL: tier soft-fail reappeared in gitea-merge-queue.py" >&2
fail=1
fi
# 5. No sop-tier-check context references in workflow YAML
if grep -rI --exclude-dir='__pycache__' 'sop-tier-check' .gitea/workflows/; then
echo "FAIL: sop-tier-check context reappeared in workflows" >&2
fail=1
fi
# 6. No SOP_TIER_CHECK_TOKEN references in workflow YAML or scripts
if grep -rI --exclude-dir='__pycache__' --exclude='test_no_tier_regression.sh' 'SOP_TIER_CHECK_TOKEN' .gitea/workflows/ .gitea/scripts/; then
echo "FAIL: SOP_TIER_CHECK_TOKEN reference reappeared (use SOP_CHECKLIST_GATE_TOKEN)" >&2
fail=1
fi
# 7. qa-review and security-review must have labeled/unlabeled triggers (#2139)
for f in .gitea/workflows/qa-review.yml .gitea/workflows/security-review.yml; do
if ! grep -q 'labeled, unlabeled' "$f"; then
echo "FAIL: $f missing labeled/unlabeled triggers (#2139)" >&2
fail=1
fi
done
# 8. qa-review and security-review must NOT have review.state guard (#2159)
for f in .gitea/workflows/qa-review.yml .gitea/workflows/security-review.yml; do
if grep -q 'github.event.review.state' "$f"; then
echo "FAIL: $f has review.state guard reappeared (#2159)" >&2
fail=1
fi
done
if [ "$fail" -eq 1 ]; then
echo "TIER_REGRESSION_DETECTED" >&2
exit 1
fi
echo "PASS: no tier regression detected"
+5 -134
View File
@@ -105,25 +105,16 @@ def test_build_plan_disable_flag_short_circuits_before_credentials():
assert plan["disabled_reason"] == "PROD_AUTO_DEPLOY_DISABLED=true"
def test_latest_status_for_context_picks_newest_by_id_regardless_of_order():
# The exhaustively-paginated /statuses list is ascending id order
# (oldest-first), the opposite of the combined /status view. The selector
# must collapse duplicate context rows to the NEWEST (max id) so a stale
# earlier run never shadows the current result, whichever way they arrive.
def test_latest_status_for_context_uses_first_matching_status():
statuses = [
{"id": 10, "context": "CI / all-required (push)", "status": "pending"},
{"id": 11, "context": "CI / all-required (pull_request)", "status": "success"},
{"id": 12, "context": "CI / all-required (push)", "status": "success"},
{"context": "CI / all-required (push)", "status": "pending"},
{"context": "CI / all-required (pull_request)", "status": "success"},
{"context": "CI / all-required (push)", "status": "success"},
]
latest = prod.latest_status_for_context(statuses, "CI / all-required (push)")
assert latest == {"id": 12, "context": "CI / all-required (push)", "status": "success"}
# Same rows shuffled (newest-first, as the combined view would deliver)
# must still resolve to the same newest row.
latest_rev = prod.latest_status_for_context(list(reversed(statuses)), "CI / all-required (push)")
assert latest_rev == {"id": 12, "context": "CI / all-required (push)", "status": "success"}
assert latest == {"context": "CI / all-required (push)", "status": "pending"}
def test_ci_context_state_handles_missing_and_gitea_status_key():
@@ -621,123 +612,3 @@ def test_superseded_by_none_for_latest_job_so_it_still_rolls(monkeypatch):
)
is None
)
# ---------------------------------------------------------------------------
# /statuses pagination — required-context SUCCESS on page 2+ must be FOUND,
# genuinely-absent context must STILL fail-closed (no fail-open).
# Regression for the single-page-status bug (#2440-family, pagination RCA):
# the combined /status view caps `statuses` at ~30, so on a high-churn commit
# the still-current required-context row is pushed past page 1 and the reader
# falsely reports it `missing`.
# ---------------------------------------------------------------------------
def _paged_statuses_stub(pages):
"""Return a fake _api_json_list that serves `pages` keyed by ?page=N."""
def fake(url, _token):
# url looks like .../statuses?page=N&limit=100
page = 1
for part in url.split("?", 1)[-1].split("&"):
if part.startswith("page="):
page = int(part.split("=", 1)[1])
return pages.get(page, [])
return fake
def test_fetch_all_statuses_finds_required_success_on_page_two(monkeypatch):
# Page 1 is a full 100 rows of unrelated/older churn; the required-context
# SUCCESS only appears on page 2. A single-page reader would miss it.
page1 = [
{"id": i, "context": f"noise-{i} (push)", "status": "pending"}
for i in range(100)
]
page2 = [
{"id": 200, "context": "CI / all-required (push)", "status": "success"},
{"id": 201, "context": "Secret scan / Scan diff for credential-shaped strings (push)",
"status": "success"},
]
monkeypatch.setattr(prod, "_api_json_list", _paged_statuses_stub({1: page1, 2: page2}))
rows = prod.fetch_all_statuses("git.moleculesai.app", "molecule-ai/molecule-core", "a" * 40, "tok")
# Must have walked to page 2 and accumulated every row.
assert len(rows) == 102
assert prod.ci_context_state(rows, "CI / all-required (push)") == "success"
assert (
prod.ci_context_state(
rows, "Secret scan / Scan diff for credential-shaped strings (push)"
)
== "success"
)
def test_fetch_all_statuses_genuinely_absent_context_stays_missing(monkeypatch):
# The required context is on NO page → fail-closed: ci_context_state must
# report "missing", which context_is_satisfied() rejects → gate stays shut.
page1 = [
{"id": i, "context": f"noise-{i} (push)", "status": "success"}
for i in range(100)
]
page2 = [{"id": 200, "context": "some-other (push)", "status": "success"}]
monkeypatch.setattr(prod, "_api_json_list", _paged_statuses_stub({1: page1, 2: page2}))
rows = prod.fetch_all_statuses("git.moleculesai.app", "molecule-ai/molecule-core", "b" * 40, "tok")
state = prod.ci_context_state(rows, "CI / all-required (push)")
assert state == "missing"
assert prod.context_is_satisfied(state) is False
def test_fetch_all_statuses_fail_closed_on_page_error(monkeypatch):
# A page that raises (unreadable) must propagate, never silently truncate
# the scan and let the caller treat a partial list as complete.
def boom(url, _token):
if "page=2" in url:
raise RuntimeError("GET .../statuses?page=2 -> HTTP 502: bad gateway")
return [{"id": i, "context": f"n-{i}", "status": "success"} for i in range(100)]
monkeypatch.setattr(prod, "_api_json_list", boom)
try:
prod.fetch_all_statuses("h", "r", "c" * 40, "tok")
except RuntimeError as exc:
assert "502" in str(exc)
else:
raise AssertionError("expected page-2 error to propagate (fail-closed)")
def test_wait_for_ci_context_succeeds_when_required_status_is_past_page_one(monkeypatch):
# End-to-end: the gate reads the EXHAUSTIVE list, so a required SUCCESS that
# only exists past page 1 lets the deploy proceed instead of timing out.
full = [
{"id": i, "context": f"noise-{i} (push)", "status": "success"}
for i in range(100)
] + [
{"id": 500, "context": "CI / all-required (push)", "status": "success"},
{"id": 501, "context": "Secret scan / Scan diff for credential-shaped strings (push)",
"status": "success"},
]
monkeypatch.setattr(prod, "fetch_all_statuses", lambda *a, **k: full)
result = prod.wait_for_ci_context(
{"GITHUB_SHA": "d" * 40, "GITEA_TOKEN": "tok", "CI_STATUS_TIMEOUT_SECONDS": "30"}
)
assert result == "success"
def test_wait_for_ci_context_times_out_fail_closed_when_required_absent(monkeypatch):
# Genuinely-absent required context across all pages → never satisfied →
# the gate times out rather than green-lighting the deploy (no fail-open).
present_but_irrelevant = [
{"id": 500, "context": "some-other (push)", "status": "success"},
]
monkeypatch.setattr(prod, "fetch_all_statuses", lambda *a, **k: present_but_irrelevant)
# Zero timeout + 0 interval → single poll then TimeoutError.
try:
prod.wait_for_ci_context(
{
"GITHUB_SHA": "e" * 40,
"GITEA_TOKEN": "tok",
"CI_STATUS_TIMEOUT_SECONDS": "1",
"CI_STATUS_POLL_INTERVAL_SECONDS": "1",
}
)
except TimeoutError as exc:
assert "missing" in str(exc)
else:
raise AssertionError("expected fail-closed TimeoutError, not a satisfied gate")
-21
View File
@@ -25,11 +25,6 @@
# T20 — ai-sop-ack APPROVED review excluded from security-review gate
# T21 — stale-head APPROVED review → exit 1 (commit_id mismatch)
# T22 — missing/non-official APPROVED review → exit 1 (official != true)
# T23 — missing-commit_id APPROVED review → exit 1 (SEV-1 internal#812
# fail-closed contract: a missing/empty commit_id is REJECTED, not
# silently accepted as "older Gitea row" the way the pre-fix
# gitea-merge-queue.py did. Closes the spoof-bug surface that
# #843 had.)
#
# Hostile-self-review (per feedback_assert_exact_not_substring):
# this test MUST FAIL if the script is absent. Verified by running
@@ -432,22 +427,6 @@ T22_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T22 exit code 1 (missing official rejected)" "1" "$T22_RC"
assert_contains "T22 no candidates error" "no candidates from reviews API or issue comments" "$T22_OUT"
# T23 — missing-commit_id APPROVED review must be rejected.
# SEV-1 internal#812 (supersedes closed internal#843). A review with NO
# commit_id field is the spoof-bug signature: a real reviewer cannot
# have submitted against a commit that doesn't exist. The fail-closed
# SSOT must REJECT — the pre-fix gitea-merge-queue.py silently accepted
# these (the "older Gitea row" escape hatch), which is the exact surface
# that closed #843 had. The Python unit tests in
# test_approval_validator.py cover the predicate at the unit level;
# this T23 covers the bash + jq pipeline end-to-end.
echo
echo "== T23 missing commit_id APPROVED review rejected (SEV-1 fail-closed) =="
T23_OUT=$(run_review_check "T23_missing_commit_id")
T23_RC=$(cat "$FIX_STATE_DIR/last_rc")
assert_eq "T23 exit code 1 (missing commit_id rejected)" "1" "$T23_RC"
assert_contains "T23 no candidates error" "no candidates from reviews API or issue comments" "$T23_OUT"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
+59 -6
View File
@@ -11,7 +11,7 @@
# - compute_ack_state (self-ack rejected, team probe applied, revoke
# invalidates own prior ack, peer's ack survives unrevoked)
# - render_status (state + description format)
# - is_high_risk (label-driven, default fallback)
# - get_tier_mode (label-driven, default fallback)
# - load_config (default config parses cleanly with both PyYAML and
# the bundled minimal parser)
#
@@ -432,6 +432,37 @@ class TestRenderStatus(unittest.TestCase):
self.assertIn("body-unfilled", desc)
# ---------------------------------------------------------------------------
# get_tier_mode
# ---------------------------------------------------------------------------
class TestGetTierMode(unittest.TestCase):
def setUp(self):
self.cfg = sop.load_config(CONFIG_PATH)
def test_tier_high_is_hard(self):
pr = {"labels": [{"name": "tier:high"}, {"name": "area:ci"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
def test_tier_medium_is_hard(self):
pr = {"labels": [{"name": "tier:medium"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
def test_tier_low_is_soft(self):
pr = {"labels": [{"name": "tier:low"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "soft")
def test_no_tier_label_defaults_to_hard(self):
# Per feedback_fix_root_not_symptom — never silently lower the bar.
pr = {"labels": [{"name": "area:ci"}]}
self.assertEqual(sop.get_tier_mode(pr, self.cfg), "hard")
def test_no_labels_defaults_to_hard(self):
self.assertEqual(sop.get_tier_mode({"labels": []}, self.cfg), "hard")
self.assertEqual(sop.get_tier_mode({}, self.cfg), "hard")
# ---------------------------------------------------------------------------
# load_config
# ---------------------------------------------------------------------------
@@ -456,6 +487,13 @@ class TestLoadConfig(unittest.TestCase):
},
)
def test_default_config_tier_mode_shape(self):
cfg = sop.load_config(CONFIG_PATH)
self.assertEqual(cfg["tier_failure_mode"]["tier:high"], "hard")
self.assertEqual(cfg["tier_failure_mode"]["tier:medium"], "hard")
self.assertEqual(cfg["tier_failure_mode"]["tier:low"], "soft")
self.assertEqual(cfg["default_mode"], "hard")
def test_each_item_has_required_fields(self):
cfg = sop.load_config(CONFIG_PATH)
for it in cfg["items"]:
@@ -589,7 +627,7 @@ class TestComputeNaState(unittest.TestCase):
class TestIsHighRisk(unittest.TestCase):
"""The high-risk predicate decides which required_teams list applies.
Predicate: any label in cfg.high_risk_labels.
Predicate: tier:high label OR any label in cfg.high_risk_labels.
"""
def setUp(self):
@@ -599,8 +637,23 @@ class TestIsHighRisk(unittest.TestCase):
pr = {"labels": []}
self.assertFalse(sop.is_high_risk(pr, self.cfg))
def test_tier_high_is_high_risk(self):
pr = {"labels": [{"name": "tier:high"}]}
self.assertTrue(sop.is_high_risk(pr, self.cfg))
def test_tier_low_is_default_class(self):
pr = {"labels": [{"name": "tier:low"}]}
self.assertFalse(sop.is_high_risk(pr, self.cfg))
def test_tier_medium_is_default_class(self):
# tier:medium alone is NOT high-risk (Option C — medium routes
# to the wider engineers OR-set).
pr = {"labels": [{"name": "tier:medium"}]}
self.assertFalse(sop.is_high_risk(pr, self.cfg))
def test_area_security_label_is_high_risk(self):
pr = {"labels": [{"name": "area:security"}]}
pr = {"labels": [{"name": "tier:medium"}, {"name": "area:security"}]}
self.assertTrue(sop.is_high_risk(pr, self.cfg))
def test_area_schema_label_is_high_risk(self):
pr = {"labels": [{"name": "area:schema"}]}
@@ -615,7 +668,7 @@ class TestIsHighRisk(unittest.TestCase):
self.assertTrue(sop.is_high_risk(pr, self.cfg))
def test_area_gate_meta_label_is_high_risk(self):
# Gate-meta = changes to sop-checklist/sop-checklist itself.
# Gate-meta = changes to sop-checklist/sop-tier-check itself.
pr = {"labels": [{"name": "area:gate-meta"}]}
self.assertTrue(sop.is_high_risk(pr, self.cfg))
@@ -669,7 +722,7 @@ class TestRootCauseAckEligibilityWidened(unittest.TestCase):
root-cause / no-backwards-compat for the default class.
The dead-managers/ceo-persona-token gridlock is the symptom; the
root cause is that sop-checklist ignored high-risk class. These tests
root cause is that sop-checklist ignored tier-class. These tests
pin the new wider-default behavior so it can't regress silently.
"""
@@ -740,7 +793,7 @@ class TestHighRiskClassUsesElevatedListInConfig(unittest.TestCase):
def test_root_cause_high_risk_elevated_to_ceo_only(self):
items = _items_by_slug()
# area:schema alone makes the PR high-risk → root-cause needs ceo.
# tier:high alone makes the PR high-risk → root-cause needs ceo.
self.assertEqual(
sop.resolve_required_teams(items["root-cause"], high_risk=True),
["ceo"],
+272
View File
@@ -0,0 +1,272 @@
#!/usr/bin/env bash
# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
#
# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
# sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
# If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
# any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
# and credited that org member as a member of EVERY queried team. The
# evaluator then treated those synthetic memberships as real, so a plain
# NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
# a real highest-tier authorization PASS — privilege escalation.
#
# Fix (fail-closed authorization):
# - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
# membership is never credited as team membership.
# - A team probe that returns anything other than 200/204 (member) or 404
# (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
# (exit 1) with a cannot-verify status and never grants the tier.
#
# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
# that serves canned Gitea API responses keyed by URL, then runs the REAL
# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
# path — no logic is re-implemented in the test.
set -euo pipefail
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
# sop-tier-check.sh uses `declare -A` (associative arrays), which require
# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
# bash >= 4 to run the script under.
pick_bash() {
local c
for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
local p; p="$(command -v "$c" 2>/dev/null || true)"
[ -n "$p" ] || continue
local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
done
return 1
}
BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
PASS=0
FAIL=0
assert_eq() {
local label="$1" expected="$2" got="$3"
if [ "$expected" = "$got" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " expected: <$expected>"
echo " got: <$got>"
FAIL=$((FAIL + 1))
fi
}
assert_contains() {
local label="$1" haystack="$2" needle="$3"
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label (missing substring: <$needle>)"
FAIL=$((FAIL + 1))
fi
}
assert_not_contains() {
local label="$1" haystack="$2" needle="$3"
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
echo " FAIL $label (unexpected substring present: <$needle>)"
FAIL=$((FAIL + 1))
else
echo " PASS $label"
PASS=$((PASS + 1))
fi
}
# ---------------------------------------------------------------------------
# Fake-curl harness.
#
# The real script calls curl in two shapes:
# (a) body capture: curl -sS -H AUTH URL -> prints JSON body
# (b) http-code: curl -sS -o FILE -w '%{http_code}' -H AUTH URL
# (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
#
# Our fake reads the URL (last non-flag arg), looks up a response in fixture
# files under $FIXDIR, and emits body and/or http-code accordingly.
# ---------------------------------------------------------------------------
make_harness() {
# $1 = scenario dir to populate with fixtures
local FIXDIR="$1"
local BIN="$FIXDIR/bin"
mkdir -p "$BIN"
cat > "$BIN/curl" <<'FAKE'
#!/usr/bin/env bash
# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
set -u
FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
url=""
out=""
want_code="no"
prev=""
for a in "$@"; do
case "$prev" in
-o) out="$a" ;;
esac
case "$a" in
http*://*) url="$a" ;;
'%{http_code}') want_code="yes" ;;
esac
# -w '%{http_code}' arrives as the value of the -w flag
if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
prev="$a"
done
# Map URL -> fixture key (a filename-safe slug).
# We only need the path after /api/v1.
path="${url#*/api/v1}"
slug="$(printf '%s' "$path" | tr '/?=&' '____')"
body_file="$FIXDIR/body${slug}"
code_file="$FIXDIR/code${slug}"
# Emit body to -o target (or capture for stdout) when a body fixture exists.
body=""
if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
if [ -n "$out" ]; then
printf '%s' "$body" > "$out"
else
printf '%s' "$body"
fi
# Emit http code when requested.
if [ "$want_code" = "yes" ]; then
if [ -f "$code_file" ]; then
printf '%s' "$(cat "$code_file")"
else
printf '200'
fi
fi
exit 0
FAKE
chmod +x "$BIN/curl"
echo "$BIN"
}
# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
seed_common() {
local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
mkdir -p "$FIXDIR"
# /user -> whoami
printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
# PR head sha
printf '%s' '{"head":{"sha":"headsha1"}}' \
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
# labels
printf '%s' "[{\"name\":\"$tier\"}]" \
> "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
# org teams list
printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
# reviews: one APPROVED on current head by $approver
printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
}
run_script() {
# $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
local FIXDIR="$1"
local BIN="$FIXDIR/bin"
set +e
OUT=$(
SOP_TEST_FIXDIR="$FIXDIR" \
PATH="$BIN:$PATH" \
GITEA_TOKEN="faketoken" \
GITEA_HOST="git.moleculesai.app" \
REPO="molecule-ai/molecule-core" \
PR_NUMBER="42" \
PR_AUTHOR="pr-author" \
SOP_DEBUG="0" \
SOP_LEGACY_CHECK="0" \
"$BASH4" "$SCRIPT" 2>&1
)
RC=$?
set -e
printf '%s' "$OUT"
return $RC
}
TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
echo "=============================================================="
echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
echo " is a plain org member but NOT in ceo team."
echo " EXPECT: tier NOT granted (fail-closed cannot-verify)."
echo "=============================================================="
S1="$(mktemp -d)"
make_harness "$S1" >/dev/null
seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
# Team membership probe for ceo (id=10) returns 403 — cannot read.
printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
# The OLD bug path: org membership probe would 204 and synthetic-credit.
printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
set +e
OUT1="$(run_script "$S1")"; RC1=$?
set -e
echo "$OUT1" | sed 's/^/ /'
echo " (exit=$RC1)"
assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
rm -rf "$S1"
echo
echo "=============================================================="
echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
echo " EXPECT: tier GRANTED."
echo "=============================================================="
S2="$(mktemp -d)"
make_harness "$S2" >/dev/null
seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
printf '%s' '204' > "$S2/code_teams_10_members_real-ceo" # ceo team: member
set +e
OUT2="$(run_script "$S2")"; RC2=$?
set -e
echo "$OUT2" | sed 's/^/ /'
echo " (exit=$RC2)"
assert_eq "S2 exit zero (granted)" "0" "$RC2"
assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
rm -rf "$S2"
echo
echo "=============================================================="
echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
echo " non-member of ceo (team probe 404). Org probe would"
echo " 204 — must NEVER be synthetic-credited."
echo " EXPECT: tier NOT granted (clause FAIL), no fallback."
echo "=============================================================="
S3="$(mktemp -d)"
make_harness "$S3" >/dev/null
seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol" # verified NOT in ceo
printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
set +e
OUT3="$(run_script "$S3")"; RC3=$?
set -e
echo "$OUT3" | sed 's/^/ /'
echo " (exit=$RC3)"
assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
rm -rf "$S3"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
[ "$FAIL" -eq 0 ]
+101
View File
@@ -0,0 +1,101 @@
#!/usr/bin/env bash
# Regression test for #229 — sop-tier-check tier:low OR-clause splitter.
#
# Bug (PR #225 → still broken after PR #231):
# Line ~289 of sop-tier-check.sh used:
# _clause=$(echo "$_raw_clause" | tr -d '()' | tr ',' '\n' | tr -d '[:space:]' | grep -v '^$')
# `tr -d '[:space:]'` strips the newlines that `tr ',' '\n'` just
# inserted, collapsing "engineers,managers,ceo" into a single token
# "engineersmanagersceo". The for-loop then iterates ONCE on a name
# that matches no team, so every tier:low PR fails:
# ::error::clause [engineers/managers/ceo]: FAIL — no approving
# reviewer belongs to any of these teamsengineersmanagersceo
# (note also: missing separators in the error string is bug #2 —
# `_clause_names` used "${var:+, }$x" which OVERWRITES per iteration).
#
# Fix shape (this PR):
# _no_parens=${_raw_clause//[()]/}
# _clause=${_no_parens//,/ } # comma -> space, bash word-split iterates
# _clause_names="${_clause_names}${_clause_names:+, }${_t}" # APPEND, not overwrite
#
# This test extracts the splitter logic and asserts it produces the right
# token list for each of the three tier expressions live in the script.
set -euo pipefail
PASS=0
FAIL=0
assert_eq() {
local label="$1"
local expected="$2"
local got="$3"
if [ "$expected" = "$got" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " expected: <$expected>"
echo " got: <$got>"
FAIL=$((FAIL + 1))
fi
}
# ----- Splitter under test (mirrors the fixed sop-tier-check.sh block) -----
split_clause() {
local raw="$1"
local no_parens=${raw//[()]/}
local clause=${no_parens//,/ }
local out=""
for _t in $clause; do
out="${out}${out:+|}$_t"
done
echo "$out"
}
echo "test: tier:low OR-clause splits to 3 tokens"
assert_eq "tier:low" "engineers|managers|ceo" "$(split_clause "engineers,managers,ceo")"
echo "test: tier:medium AND-expression — bash word-split on \$EXPR yields 5 tokens"
EXPR="managers AND engineers AND qa???,security???"
out=""
for _raw in $EXPR; do
out="${out}${out:+ ; }$(split_clause "$_raw")"
done
assert_eq "tier:medium" "managers ; AND ; engineers ; AND ; qa???|security???" "$out"
echo "test: tier:high single-team OR-clause"
assert_eq "tier:high" "ceo" "$(split_clause "ceo")"
echo "test: paren-wrapped OR-set unwraps + splits"
assert_eq "paren OR" "managers|ceo" "$(split_clause "(managers,ceo)")"
# ----- _clause_names accumulator (was overwriting per iteration) -----
acc=""
for t in engineers managers ceo; do
acc="${acc}${acc:+, }${t}"
done
assert_eq "_clause_names append" "engineers, managers, ceo" "$acc"
# ----- _failed_clauses / _passed_clauses accumulator across raw clauses -----
acc=""
for c in clauseA clauseB clauseC; do
acc="${acc}${acc:+, }${c}"
done
assert_eq "_failed_clauses append" "clauseA, clauseB, clauseC" "$acc"
# ----- End-to-end OR-gate: simulate APPROVER_TEAMS[core-lead]=' managers ' -----
# The script's case pattern is *${_t}* with a space-padded value.
APPROVER_TEAMS_VAL=" managers "
matched=""
for _t in $(split_clause "engineers,managers,ceo" | tr '|' ' '); do
case "$APPROVER_TEAMS_VAL" in
*${_t}*) matched="$_t"; break ;;
esac
done
assert_eq "OR-gate matches managers" "managers" "$matched"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
[ "$FAIL" -eq 0 ]
+66
View File
@@ -0,0 +1,66 @@
#!/usr/bin/env bash
# Regression test for internal#816 — sop-tier-check must ignore APPROVED
# reviews that were submitted against an old PR head SHA.
#
# Bug: the script collected approvers with
# jq '[.[] | select(.state=="APPROVED") | .user.login]'
# without filtering on .commit_id == HEAD_SHA. After a PR head moved,
# stale approvals looked valid to the tier gate.
#
# Fix: the jq filter now includes
# select(.state=="APPROVED" and .commit_id == $head_sha)
# where $head_sha is the current PR head fetched from the API.
set -euo pipefail
# jq may not be on PATH in all environments (e.g. dev containers).
PATH="/tmp/bin:$PATH"
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
PASS=0
FAIL=0
assert_eq() {
local label="$1"
local expected="$2"
local got="$3"
if [ "$expected" = "$got" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " expected: <$expected>"
echo " got: <$got>"
FAIL=$((FAIL + 1))
fi
}
# Sample reviews matching the shape from Gitea API
REVIEWS_JSON='[
{"state":"APPROVED","commit_id":"abc123","user":{"login":"bob"}},
{"state":"APPROVED","commit_id":"old456","user":{"login":"alice"}},
{"state":"COMMENT","commit_id":"abc123","user":{"login":"carol"}},
{"state":"APPROVED","commit_id":"abc123","user":{"login":"dave"}},
{"state":"REQUEST_CHANGES","commit_id":"abc123","user":{"login":"eve"}}
]'
echo "test: jq filter keeps only APPROVED on current head"
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "abc123" \
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
assert_eq "current-head approvers" "bob dave" "$(echo "$GOT" | tr '\n' ' ' | sed 's/ $//')"
echo "test: jq filter with all-stale reviews yields empty"
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "new789" \
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
assert_eq "all-stale yields empty" "" "$GOT"
echo "test: jq filter handles null commit_id gracefully"
NULL_JSON='[{"state":"APPROVED","commit_id":null,"user":{"login":"mallory"}}]'
GOT=$(echo "$NULL_JSON" | jq -r --arg head_sha "abc123" \
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
assert_eq "null commit_id excluded" "" "$GOT"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
[ "$FAIL" -eq 0 ]
+304
View File
@@ -0,0 +1,304 @@
#!/usr/bin/env bash
# Tests for sop-tier-refire.{yml,sh} — internal#292.
#
# Behavior matrix:
#
# T1: PR open + APPROVED via tier:low → script invokes sop-tier-check
# and POSTs status=success.
# T2: PR open + missing tier label → sop-tier-check exits non-zero;
# refire still POSTs status=success, matching the canonical
# pull_request_target workflow's fail-open job conclusion.
# T3: PR open + tier:low but NO approving reviews → sop-tier-check
# exits non-zero; refire still POSTs status=success for the same reason.
# T4: PR CLOSED → refire exits 0 with no status POST (no-op on closed).
# T5: Rate-limit — recent status update within 30s → refire skips,
# no new POST.
# T6 (yaml-lint): workflow `if:` expression contains author_association
# gate + slash-command-trigger gate + PR-not-issue gate.
# T7 (yaml-lint): workflow file is parseable YAML.
#
# Tests T1-T5 run the real script against a local-fixture HTTP server
# (python http.server with a stub handler — `tests/_refire_fixture.py`)
# so the script's Gitea API calls hit the fixture, not the real Gitea.
#
# Tests T6/T7 are pure YAML checks against the workflow file.
#
# Hostile-self-review (per feedback_assert_exact_not_substring):
# this test MUST FAIL if the workflow or script is absent. Verified by
# running the test before the files exist (covered in the PR body).
set -euo pipefail
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
WORKFLOW_DIR="$(cd "$THIS_DIR/../../workflows" && pwd)"
WORKFLOW="$WORKFLOW_DIR/sop-tier-refire.yml"
DISPATCH_WORKFLOW="$WORKFLOW_DIR/sop-checklist.yml"
SCRIPT="$SCRIPT_DIR/sop-tier-refire.sh"
PASS=0
FAIL=0
FAILED_TESTS=""
assert_eq() {
local label="$1"
local expected="$2"
local got="$3"
if [ "$expected" = "$got" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " expected: <$expected>"
echo " got: <$got>"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} ${label}"
fi
}
assert_contains() {
local label="$1"
local needle="$2"
local haystack="$3"
if printf '%s' "$haystack" | grep -qF "$needle"; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label"
echo " needle: <$needle>"
echo " haystack: <$(printf '%s' "$haystack" | head -c 400)>"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} ${label}"
fi
}
assert_file_exists() {
local label="$1"
local path="$2"
if [ -f "$path" ]; then
echo " PASS $label"
PASS=$((PASS + 1))
else
echo " FAIL $label (not found: $path)"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} ${label}"
fi
}
# Existence (foundation — every other test depends on these)
echo
echo "== existence =="
assert_file_exists "workflow file exists" "$WORKFLOW"
assert_file_exists "SSOT dispatcher workflow file exists" "$DISPATCH_WORKFLOW"
assert_file_exists "script file exists" "$SCRIPT"
if [ "$FAIL" -gt 0 ]; then
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL (existence)"
echo "Cannot proceed without these files."
exit 1
fi
# T6 / T7 — workflow YAML structure
echo
echo "== T6/T7 workflow yaml =="
# YAML parseability
PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$WORKFLOW" 2>&1 || true)
assert_eq "T7 workflow parses as YAML" "ok" "$PARSE_OUT"
# The old per-workflow issue_comment listener caused queue storms because
# Gitea queues jobs before evaluating job-level `if:`. The script remains,
# but comment-triggered refires route through the single dispatcher.
WORKFLOW_CONTENT=$(cat "$WORKFLOW")
if printf '%s' "$WORKFLOW_CONTENT" | grep -q '^ issue_comment:'; then
echo " FAIL T6a manual fallback workflow must not listen on issue_comment"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} T6a"
else
echo " PASS T6a manual fallback workflow does not listen on issue_comment"
PASS=$((PASS + 1))
fi
assert_contains "T6b workflow exposes workflow_dispatch" \
"workflow_dispatch" "$WORKFLOW_CONTENT"
assert_contains "T6c workflow documents unsupported manual inputs" \
"workflow_dispatch inputs" "$WORKFLOW_CONTENT"
# Does NOT check out PR HEAD (security)
if grep -q 'ref: \${{ github.event.pull_request.head' "$WORKFLOW"; then
echo " FAIL T6d workflow MUST NOT check out PR head (security)"
FAIL=$((FAIL + 1))
FAILED_TESTS="${FAILED_TESTS} T6d"
else
echo " PASS T6d workflow does not check out PR head"
PASS=$((PASS + 1))
fi
DISPATCH_PARSE_OUT=$(python3 -c 'import sys,yaml;yaml.safe_load(open(sys.argv[1]).read());print("ok")' "$DISPATCH_WORKFLOW" 2>&1 || true)
assert_eq "T6e SSOT dispatcher workflow parses as YAML" "ok" "$DISPATCH_PARSE_OUT"
DISPATCH_CONTENT=$(cat "$DISPATCH_WORKFLOW")
assert_contains "T6f SSOT dispatcher listens on issue_comment" \
"issue_comment" "$DISPATCH_CONTENT"
assert_contains "T6g SSOT dispatcher handles /qa-recheck" \
"/qa-recheck" "$DISPATCH_CONTENT"
assert_contains "T6h SSOT dispatcher handles /security-recheck" \
"/security-recheck" "$DISPATCH_CONTENT"
assert_contains "T6i SSOT dispatcher handles /refire-tier-check" \
"/refire-tier-check" "$DISPATCH_CONTENT"
# T1-T5 — script behavior against a local Gitea-fixture
echo
echo "== T1-T5 script behavior (vs local fixture) =="
# Spin up the fixture HTTP server.
FIXTURE_DIR=$(mktemp -d)
trap 'rm -rf "$FIXTURE_DIR"; [ -n "${FIX_PID:-}" ] && kill "$FIX_PID" 2>/dev/null || true' EXIT
FIXTURE_PY="$THIS_DIR/_refire_fixture.py"
if [ ! -f "$FIXTURE_PY" ]; then
echo "::error::fixture server $FIXTURE_PY missing"
exit 1
fi
FIX_LOG="$FIXTURE_DIR/fixture.log"
FIX_STATE_DIR="$FIXTURE_DIR/state"
mkdir -p "$FIX_STATE_DIR"
# Find an unused port.
FIX_PORT=$(python3 -c 'import socket;s=socket.socket();s.bind(("127.0.0.1",0));print(s.getsockname()[1]);s.close()')
FIXTURE_STATE_DIR="$FIX_STATE_DIR" python3 "$FIXTURE_PY" "$FIX_PORT" \
>"$FIX_LOG" 2>&1 &
FIX_PID=$!
# Wait for fixture readiness.
for _ in $(seq 1 50); do
if curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
break
fi
sleep 0.1
done
if ! curl -fsS "http://127.0.0.1:${FIX_PORT}/_ping" >/dev/null 2>&1; then
echo "::error::fixture server failed to start. Log:"
cat "$FIX_LOG"
exit 1
fi
# Helper: set fixture state for a scenario, then run the script.
# tier_result is one of: pass | fail_no_label | fail_no_approvals.
# The refire script's tier-check invocation is mocked because the real
# sop-tier-check.sh uses bash 4+ associative arrays — incompatible with
# the macOS bash 3.2 dev shell. Linux Gitea runners use bash 4/5 so
# production runs the real script. The mock exercises the success +
# failure branches of refire's status-POST glue.
run_scenario() {
local scenario="$1"
local tier_result="${2:-pass}"
echo "$scenario" >"$FIX_STATE_DIR/scenario"
: >"$FIX_STATE_DIR/posted_statuses.jsonl" # clear status log
local out
set +e
out=$(
PATH="$FIXTURE_DIR/bin:$PATH" \
GITEA_TOKEN="fixture-token" \
GITEA_HOST="fixture.local" \
REPO="molecule-ai/molecule-core" \
PR_NUMBER="999" \
COMMENT_AUTHOR="test-runner" \
SOP_REFIRE_DISABLE_RATE_LIMIT="1" \
SOP_REFIRE_TIER_CHECK_SCRIPT="$THIS_DIR/_mock_tier_check.sh" \
MOCK_TIER_RESULT="$tier_result" \
FIXTURE_PORT="$FIX_PORT" \
bash "$SCRIPT" 2>&1
)
local rc=$?
set -e
echo "$out" >"$FIX_STATE_DIR/last_run.log"
echo "$rc" >"$FIX_STATE_DIR/last_rc"
}
# Install a curl shim that rewrites https://fixture.local → http://127.0.0.1:$PORT
# Use bash prefix-strip (${var#prefix}) — it sidesteps the `/` delimiter
# confusion of ${var/pattern/replacement}.
mkdir -p "$FIXTURE_DIR/bin"
cat >"$FIXTURE_DIR/bin/curl" <<SHIM
#!/usr/bin/env bash
# Test shim: rewrite https://fixture.local/* -> http://127.0.0.1:${FIX_PORT}/*
# The fixture doesn't authenticate; -H Authorization passes through harmlessly.
new_args=()
for a in "\$@"; do
if [[ "\$a" == https://fixture.local/* ]]; then
rest="\${a#https://fixture.local}"
a="http://127.0.0.1:${FIX_PORT}\${rest}"
fi
new_args+=("\$a")
done
exec /usr/bin/curl "\${new_args[@]}"
SHIM
chmod +x "$FIXTURE_DIR/bin/curl"
# T1: tier:low + 1 APPROVED + author is in engineers team → success
run_scenario "T1_success" "pass"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T1 exit code 0 (success)" "0" "$RC"
assert_contains "T1 POSTed state=success" '"state": "success"' "$POSTED"
assert_contains "T1 POST context is sop-tier-check / tier-check" \
'"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
assert_contains "T1 description names commenter" "test-runner" "$POSTED"
# T2: missing tier label → tier-check fails internally (mock exits 1).
# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
# REAL exit code and POSTs state=failure — it does NOT forge a green on
# the required context. The refire job itself still exits 0 (it succeeded
# at posting an honest failure status).
run_scenario "T2_no_tier_label" "fail_no_label"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
# T3: tier:low present but ZERO approving reviews → internal tier check
# fails (mock exits 1). Refire POSTs state=failure, never a false green.
run_scenario "T3_no_approvals" "fail_no_approvals"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
# T4: closed PR — refire is a no-op (no POST, exit 0)
run_scenario "T4_closed" "pass"
RC=$(cat "$FIX_STATE_DIR/last_rc")
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T4 closed PR exits 0" "0" "$RC"
assert_eq "T4 closed PR posts no status" "" "$POSTED"
# T5: rate-limit — disable the env override and let scenario set a
# recent statuses entry. Re-enable rate-limit for this scenario by NOT
# passing SOP_REFIRE_DISABLE_RATE_LIMIT.
echo "T5_rate_limited" >"$FIX_STATE_DIR/scenario"
: >"$FIX_STATE_DIR/posted_statuses.jsonl"
set +e
T5_OUT=$(
PATH="$FIXTURE_DIR/bin:$PATH" \
GITEA_TOKEN="fixture-token" \
GITEA_HOST="fixture.local" \
REPO="molecule-ai/molecule-core" \
PR_NUMBER="999" \
COMMENT_AUTHOR="test-runner" \
FIXTURE_PORT="$FIX_PORT" \
bash "$SCRIPT" 2>&1
)
T5_RC=$?
set -e
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
assert_eq "T5 rate-limited exits 0" "0" "$T5_RC"
assert_contains "T5 rate-limited log says skipped" "rate-limited" "$T5_OUT"
assert_eq "T5 rate-limited posts no status" "" "$POSTED"
echo
echo "------"
echo "PASS=$PASS FAIL=$FAIL"
if [ "$FAIL" -gt 0 ]; then
echo "Failed:$FAILED_TESTS"
fi
[ "$FAIL" -eq 0 ]
@@ -1,474 +0,0 @@
import importlib.util
import json
import pathlib
import urllib.error
ROOT = pathlib.Path(__file__).resolve().parents[1]
SCRIPT = ROOT / "umbrella-reaper.py"
def load_reaper():
spec = importlib.util.spec_from_file_location("umbrella_reaper", SCRIPT)
mod = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(mod)
mod.API = "https://git.example.test/api/v1"
mod.GITEA_TOKEN = "fixture-token"
mod.GITEA_HOST = "git.example.test"
mod.REPO = "owner/repo"
return mod
class FakeResponse:
status = 200
def __init__(self, payload):
self.payload = payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
return json.dumps(self.payload).encode("utf-8")
def _pr_fixture(number: int, sha: str) -> dict:
return {"number": number, "head": {"sha": sha}}
def _status_entry(context: str, state: str) -> dict:
return {"context": context, "status": state}
def test_process_pr_compensates_when_all_sub_jobs_success(monkeypatch):
mod = load_reaper()
posted = []
def fake_post_status(sha, context, description):
posted.append((sha, context, description))
monkeypatch.setattr(mod, "post_status", fake_post_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(1, "abc123")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert len(posted) == 1
assert posted[0][0] == "abc123"
assert posted[0][1] == "CI / all-required (pull_request)"
assert "Compensating status" in posted[0][2]
def test_process_pr_skips_when_umbrella_missing(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(mod, "REQUIRED_SUB_JOBS", ["CI / Platform (Go) (pull_request)"])
pr = _pr_fixture(2, "def456")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert posted == []
def test_process_pr_skips_when_sub_job_pending(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(3, "ghi789")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "pending"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert posted == []
def test_process_pr_skips_when_sub_job_failure(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(4, "jkl012")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "failure"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert posted == []
def test_process_pr_returns_false_on_post_failure(monkeypatch):
mod = load_reaper()
def fake_post_status(sha, context, description):
raise mod.ApiError("POST /statuses/abc123 -> HTTP 500: simulated failure")
monkeypatch.setattr(mod, "post_status", fake_post_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(5, "abc123")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is False
def test_main_exits_nonzero_when_any_post_fails(monkeypatch):
mod = load_reaper()
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
monkeypatch.setenv("GITEA_HOST", "git.example.test")
monkeypatch.setenv("REPO", "owner/repo")
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
monkeypatch.setattr(
mod,
"list_open_prs",
lambda limit: [
_pr_fixture(1, "abc123"),
_pr_fixture(2, "def456"),
],
)
calls = {"n": 0}
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
def fake_post_status(sha, context, description):
calls["n"] += 1
if calls["n"] == 2:
raise mod.ApiError("simulated failure")
monkeypatch.setattr(mod, "post_status", fake_post_status)
exit_code = mod.main()
assert exit_code == 1
def test_main_exits_zero_when_all_posts_succeed(monkeypatch):
mod = load_reaper()
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
monkeypatch.setenv("GITEA_HOST", "git.example.test")
monkeypatch.setenv("REPO", "owner/repo")
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
monkeypatch.setattr(
mod,
"list_open_prs",
lambda limit: [_pr_fixture(1, "abc123")],
)
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
exit_code = mod.main()
assert exit_code == 0
def test_dry_run_does_not_post(monkeypatch):
mod = load_reaper()
api_calls = []
def fake_api(method, path, *, body=None, query=None, expect_json=True):
api_calls.append((method, path, body))
return 200, {"ok": True}
monkeypatch.setattr(mod, "api", fake_api)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
pr = _pr_fixture(6, "mno345")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
monkeypatch.setattr(mod, "DRY_RUN", True)
ok = mod.process_pr(pr)
assert ok is True
# DRY_RUN should prevent the POST /statuses call
assert not any(
method == "POST" and "/statuses/" in path for method, path, _ in api_calls
)
def test_duplicate_contexts_use_latest_state(monkeypatch):
mod = load_reaper()
posted = []
monkeypatch.setattr(mod, "post_status", lambda *a, **k: posted.append(a))
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
],
)
pr = _pr_fixture(7, "pqr678")
def fake_combined_status(sha):
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
# duplicate: first pending, then success — the loop overwrites
_status_entry("CI / Detect changes (pull_request)", "pending"),
_status_entry("CI / Detect changes (pull_request)", "success"),
]
}
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
ok = mod.process_pr(pr)
assert ok is True
assert len(posted) == 1
def test_load_required_sub_jobs_from_ci_yml_pull_request_event():
mod = load_reaper()
# UMBRELLA_CONTEXT defaults to pull_request, so derivation should yield
# the pull_request suffix.
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
assert all(j.endswith(" (pull_request)") for j in jobs)
assert "CI / Detect changes (pull_request)" in jobs
assert "CI / Python Lint & Test (pull_request)" in jobs
def test_load_required_sub_jobs_from_ci_yml_push_event(monkeypatch):
mod = load_reaper()
monkeypatch.setattr(mod, "UMBRELLA_CONTEXT", "CI / all-required (push)")
jobs = mod._load_required_sub_jobs_from_ci_yml(".gitea/workflows")
assert all(j.endswith(" (push)") for j in jobs)
assert "CI / Detect changes (push)" in jobs
def test_list_open_prs_paginates(monkeypatch):
mod = load_reaper()
calls = []
def fake_api(method, path, *, body=None, query=None, expect_json=True):
calls.append(query)
page = int(query.get("page", 1))
limit = int(query.get("limit", 50))
if page == 1:
return 200, [{"number": 1}, {"number": 2}]
if page == 2:
return 200, [{"number": 3}]
return 200, []
monkeypatch.setattr(mod, "api", fake_api)
prs = mod.list_open_prs(limit=2)
assert len(prs) == 3
assert prs[0]["number"] == 1
assert prs[2]["number"] == 3
assert calls[0]["page"] == "1"
assert calls[1]["page"] == "2"
def test_process_pr_returns_false_on_status_fetch_failure(monkeypatch):
mod = load_reaper()
def fake_get_combined_status(sha):
raise mod.ApiError("GET /statuses/abc123 -> HTTP 500: simulated outage")
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
["CI / Detect changes (pull_request)"],
)
pr = _pr_fixture(8, "abc123")
ok = mod.process_pr(pr)
assert ok is False
def test_process_pr_returns_false_on_missing_statuses_array(monkeypatch):
mod = load_reaper()
def fake_get_combined_status(sha):
return {"state": "success"} # missing 'statuses' array
monkeypatch.setattr(mod, "get_combined_status", fake_get_combined_status)
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
["CI / Detect changes (pull_request)"],
)
pr = _pr_fixture(9, "def456")
ok = mod.process_pr(pr)
assert ok is False
def test_main_exits_nonzero_when_any_status_read_fails(monkeypatch):
mod = load_reaper()
monkeypatch.setenv("GITEA_TOKEN", "fixture-token")
monkeypatch.setenv("GITEA_HOST", "git.example.test")
monkeypatch.setenv("REPO", "owner/repo")
monkeypatch.setattr(
mod,
"REQUIRED_SUB_JOBS",
[
"CI / Detect changes (pull_request)",
"CI / Platform (Go) (pull_request)",
],
)
monkeypatch.setattr(
mod,
"list_open_prs",
lambda limit: [
_pr_fixture(1, "abc123"),
_pr_fixture(2, "def456"),
],
)
def fake_combined_status(sha):
if sha == "abc123":
return {
"statuses": [
_status_entry("CI / all-required (pull_request)", "failure"),
_status_entry("CI / Detect changes (pull_request)", "success"),
_status_entry("CI / Platform (Go) (pull_request)", "success"),
]
}
raise mod.ApiError("simulated status fetch failure")
monkeypatch.setattr(mod, "get_combined_status", fake_combined_status)
monkeypatch.setattr(mod, "post_status", lambda *a, **k: None)
exit_code = mod.main()
assert exit_code == 1
-360
View File
@@ -1,360 +0,0 @@
#!/usr/bin/env python3
"""umbrella-reaper — auto-recovery for stale CI umbrella statuses on PRs.
Tracking: molecule-core#1780.
Sibling to status-reaper.py (default-branch push-suffix compensation),
but scoped to pull_request umbrellas instead of main-branch contexts.
What this script does, per `.gitea/workflows/umbrella-reaper.yml` invocation:
1. List open PRs via GET /repos/{o}/{r}/pulls?state=open&limit={N}.
2. For EACH PR:
- GET combined commit status for PR head SHA.
- Look for the umbrella context (default: "CI / all-required (pull_request)").
- If umbrella state is "failure":
- Verify ALL required sub-job contexts are "success".
- If yes POST compensating success to /statuses/{sha} with the
same umbrella context and an honest description.
- If any required sub-job is NOT success skip (umbrella correctly
reflects reality; do NOT lie).
- If umbrella state is "success" or "pending" skip.
3. Exit 0. Re-running is idempotent Gitea de-dups by context.
What it does NOT do:
- Touch non-umbrella contexts.
- Compensate when ANY required sub-job is missing, pending, failure, or
cancelled. Only the "all sub-jobs green, umbrella stale" race.
- Merge PRs. It only posts a status; branch protection still requires
human approval.
- Run on closed PRs.
Halt conditions:
- Missing required env vars exit 1 with ::error:: message.
- API 5xx on PR list fail-loud (can't assess state).
- API 5xx on an individual PR's status → ::warning:: + continue to next PR.
"""
from __future__ import annotations
import json
import os
import re
import sys
import urllib.error
import urllib.parse
import urllib.request
from pathlib import Path
from typing import Any
def _load_required_sub_jobs_from_ci_yml(workflows_dir: str) -> list[str]:
"""Parse ci.yml and extract the all-required sentinel's sub-job contexts.
Supports two shapes of the all-required job run block:
1. Legacy Python f-string list (pre-2026-06-01):
f"CI / Detect changes ({event})"
2. Current shell-script shape (post-2026-06-01 scheduler fix):
check "Detect changes" "$CHANGES_RESULT"
Raises RuntimeError if ci.yml is missing, has no all-required job, or the
run block cannot be parsed.
"""
ci_path = Path(workflows_dir) / "ci.yml"
if not ci_path.exists():
raise RuntimeError(f"ci.yml not found at {ci_path}")
# PyYAML is installed by the workflow (same as status-reaper.py).
import yaml
with ci_path.open() as f:
doc = yaml.safe_load(f)
jobs = doc.get("jobs", {})
all_required = jobs.get("all-required")
if not isinstance(all_required, dict):
raise RuntimeError("ci.yml missing 'all-required' job")
steps = all_required.get("steps", [])
run_block = ""
for step in steps:
if isinstance(step, dict):
run_text = step.get("run", "")
if run_text:
run_block = run_text
break
if not run_block:
raise RuntimeError("all-required job missing run block")
# Determine event suffix from the umbrella context we are watching.
if UMBRELLA_CONTEXT.endswith(" (pull_request)"):
suffix = "(pull_request)"
elif UMBRELLA_CONTEXT.endswith(" (push)"):
suffix = "(push)"
else:
m = re.search(r' \(([^)]+)\)$', UMBRELLA_CONTEXT)
suffix = m.group(1) if m else "pull_request"
# Try legacy f-string format first.
if "({event})" in run_block:
matches = re.findall(r'f["\'](.*?\(\{event\}\))["\']', run_block)
if matches:
return [m.replace("({event})", suffix) for m in matches]
# Try current shell-script format: check "Name" "$RESULT"
matches = re.findall(r'check\s+"([^"]+)"', run_block)
if matches:
return [f"CI / {name} {suffix}" for name in matches]
raise RuntimeError("unable to derive required sub-jobs from all-required run block")
# --------------------------------------------------------------------------
# Environment
# --------------------------------------------------------------------------
def _env(key: str, *, default: str = "") -> str:
return os.environ.get(key, default)
GITEA_TOKEN = _env("GITEA_TOKEN")
GITEA_HOST = _env("GITEA_HOST")
REPO = _env("REPO")
DRY_RUN = _env("DRY_RUN", default="").lower() in ("1", "true", "yes")
# The umbrella context to watch. Must match the branch-protection name
# exactly (Gitea de-dups by context string).
UMBRELLA_CONTEXT = _env("UMBRELLA_CONTEXT", default="CI / all-required (pull_request)")
# Required sub-job contexts. The umbrella is only compensated when ALL of
# these are "success" on the same SHA. Order does not matter.
#
# Derive from ci.yml at runtime to prevent drift (CR2 blocker #1).
# The env var REQUIRED_SUB_JOBS overrides derivation for emergency
# tuning or local testing.
_REQUIRED_SUB_JOBS_OVERRIDE = _env("REQUIRED_SUB_JOBS")
if _REQUIRED_SUB_JOBS_OVERRIDE:
REQUIRED_SUB_JOBS = [
ctx.strip()
for ctx in _REQUIRED_SUB_JOBS_OVERRIDE.split(";")
if ctx.strip()
]
else:
try:
REQUIRED_SUB_JOBS = _load_required_sub_jobs_from_ci_yml(".gitea/workflows")
except Exception as exc:
sys.stderr.write(
f"::error::Failed to derive REQUIRED_SUB_JOBS from ci.yml: {exc}\n"
)
sys.exit(1)
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
PR_LIMIT = int(_env("PR_LIMIT", default="50"))
def _require_runtime_env() -> None:
for key in ("GITEA_TOKEN", "GITEA_HOST", "REPO"):
if not os.environ.get(key):
sys.stderr.write(f"::error::missing required env var: {key}\n")
sys.exit(1)
# --------------------------------------------------------------------------
# Tiny HTTP helper
# --------------------------------------------------------------------------
class ApiError(RuntimeError):
pass
def api(
method: str,
path: str,
*,
body: dict | None = None,
query: dict[str, str] | None = None,
expect_json: bool = True,
) -> tuple[int, Any]:
url = f"{API}{path}"
if query:
url = f"{url}?{urllib.parse.urlencode(query)}"
data = None
headers = {
"Authorization": f"token {GITEA_TOKEN}",
"Accept": "application/json",
}
if body is not None:
data = json.dumps(body).encode("utf-8")
headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, method=method, data=data, headers=headers)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
raw = resp.read()
status = resp.status
except urllib.error.HTTPError as e:
raw = e.read()
status = e.code
if not (200 <= status < 300):
snippet = raw[:500].decode("utf-8", errors="replace") if raw else ""
raise ApiError(f"{method} {path} -> HTTP {status}: {snippet}")
if not raw:
return status, None
try:
return status, json.loads(raw)
except json.JSONDecodeError as e:
if expect_json:
raise ApiError(
f"{method} {path} -> HTTP {status} but body is not JSON: {e}"
) from e
return status, {"_raw": raw.decode("utf-8", errors="replace")}
# --------------------------------------------------------------------------
# Gitea reads / writes
# --------------------------------------------------------------------------
def list_open_prs(limit: int = 50) -> list[dict]:
"""Paginate through all open PR pages. Fail closed on non-list responses."""
all_prs: list[dict] = []
page = 1
while True:
_, body = api(
"GET",
f"/repos/{OWNER}/{NAME}/pulls",
query={"state": "open", "limit": str(limit), "page": str(page)},
)
if not isinstance(body, list):
raise ApiError(f"PR list page {page} response is not a JSON array")
if not body:
break
all_prs.extend(body)
if len(body) < limit:
break
page += 1
return all_prs
def get_combined_status(sha: str) -> dict:
_, body = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
if not isinstance(body, dict):
raise ApiError(f"status for {sha} response is not a JSON object")
return body
def post_status(sha: str, context: str, description: str) -> None:
payload = {
"context": context,
"state": "success",
"description": description,
}
if DRY_RUN:
print(f"[DRY-RUN] Would POST /statuses/{sha}: {json.dumps(payload)}")
return
api("POST", f"/repos/{OWNER}/{NAME}/statuses/{sha}", body=payload)
# --------------------------------------------------------------------------
# Core logic
# --------------------------------------------------------------------------
def _entry_state(s: dict) -> str:
return s.get("status") or s.get("state") or ""
def process_pr(pr: dict) -> bool:
"""Process a single PR. Returns True if the tick succeeded for this PR
(including no-op skips), False if a compensating POST failed.
"""
num = pr.get("number")
sha = pr.get("head", {}).get("sha")
if not sha:
print(f"::warning::PR #{num}: missing head.sha; skipping")
return True
try:
status = get_combined_status(sha)
except ApiError as e:
print(f"::error::PR #{num}: status fetch failed: {e}")
return False
statuses = status.get("statuses")
if not isinstance(statuses, list):
print(f"::error::PR #{num}: combined status missing 'statuses' array")
return False
umbrella_entry = None
subjob_states: dict[str, str] = {}
for s in statuses:
if not isinstance(s, dict):
continue
ctx = s.get("context", "")
state = _entry_state(s)
if ctx == UMBRELLA_CONTEXT:
umbrella_entry = s
if ctx in REQUIRED_SUB_JOBS:
subjob_states[ctx] = state
if umbrella_entry is None:
print(f"::notice::PR #{num}: no umbrella context '{UMBRELLA_CONTEXT}'; skipping")
return True
umbrella_state = _entry_state(umbrella_entry)
if umbrella_state != "failure":
print(f"::notice::PR #{num}: umbrella is '{umbrella_state}'; skipping")
return True
# Verify ALL required sub-jobs are present and success
missing = [ctx for ctx in REQUIRED_SUB_JOBS if ctx not in subjob_states]
if missing:
print(
f"::notice::PR #{num}: umbrella=failure, but missing sub-jobs: {missing}; "
"skipping (sub-jobs may still be running)"
)
return True
not_success = [ctx for ctx in REQUIRED_SUB_JOBS if subjob_states[ctx] != "success"]
if not_success:
print(
f"::notice::PR #{num}: umbrella=failure, but sub-jobs not all success: "
f"{[(ctx, subjob_states[ctx]) for ctx in not_success]}; skipping"
)
return True
# All checks pass — post compensating status
desc = (
"Compensating status: all required sub-jobs verified success; "
"umbrella stale due to commit-status propagation race. "
f"Auto-posted by umbrella-reaper for PR #{num}."
)
try:
post_status(sha, UMBRELLA_CONTEXT, desc)
print(f"::notice::PR #{num}: posted compensating success for {UMBRELLA_CONTEXT}")
return True
except ApiError as e:
print(f"::error::PR #{num}: failed to post compensating status: {e}")
return False
def main() -> int:
_require_runtime_env()
# Drift guard: ci.yml derivation already happened at module load, but
# we sanity-check it is non-empty so the loop below doesn't trivially
# no-op because of a parse bug.
if not REQUIRED_SUB_JOBS:
sys.stderr.write("::error::REQUIRED_SUB_JOBS is empty; bailing out\n")
return 1
prs = list_open_prs(limit=PR_LIMIT)
print(f"::notice::Scanning {len(prs)} open PRs for stale umbrella statuses")
compensated = 0
failed = 0
for pr in prs:
ok = process_pr(pr)
if not ok:
failed += 1
print(f"::notice::umbrella-reaper complete (failed POSTs={failed})")
return 1 if failed else 0
if __name__ == "__main__":
sys.exit(main())
+27 -15
View File
@@ -55,22 +55,38 @@
version: 1
# Uniform hard-fail mode (CTO 2026-06-07):
# Every PR uses the same gate — no tier branching.
# Missing acks → status `failure`, blocks merge via branch protection.
# Tier-aware failure mode (RFC#351 open question 2):
# For tier:high — hard-fail (status `failure`, blocks merge via BP).
# For tier:medium — hard-fail (same as high; medium is non-trivial).
# For tier:low — soft-fail (status `pending` with `acked: N/M` in the
# description). BP can choose to require the context
# or not for low-tier PRs.
# If no tier label is present, default to medium (hard-fail) — every PR
# should have a tier label per sop-tier-check, and absence indicates
# a missing-tier defect we should surface, not silently lower the bar.
tier_failure_mode:
"tier:high": hard
"tier:medium": hard
"tier:low": soft
default_mode: hard # used when no tier:* label is present
# High-risk class (RFC#450 Option C, governance-fix for internal#442).
#
# A PR is "high-risk" when ANY of the listed labels are applied.
# A PR is "high-risk" when ANY of the listed labels are applied OR when
# the PR has `tier:high` (mechanically the strictest existing tier).
# High-risk items use `required_teams_high_risk` (when present on the
# item); non-high-risk items use the default `required_teams`.
#
# Risk-classed two-eyes shape:
# - Default class (not high-risk): a non-author engineers/managers/ceo
# ack satisfies the item — 25+ live identities, no dependency on a
# dead/inactive senior persona token.
# - High-risk class (any high_risk_label): still requires a non-author
# ceo ack (durable human team).
# This closes the inconsistency that the SOP charter already mandates
# `tier:high → ceo only` for the sibling `sop-tier-check` gate; the
# sop-checklist's `root-cause` and `no-backwards-compat` items now
# follow the same risk-classed two-eyes shape:
# - Default class (tier:low/medium, not high-risk): a non-author
# engineers/managers/ceo ack satisfies the item — 25+ live
# identities, no dependency on a dead/inactive senior persona
# token.
# - High-risk class (tier:high OR any high_risk_label): still
# requires a non-author ceo ack (durable human team).
#
# Tightening: add labels to high_risk_labels.
# Loosening: remove labels.
@@ -149,11 +165,7 @@ items:
- slug: memory-consulted
numeric_alias: 7
# #1973: normalize marker so it matches the slug. Previously the
# slash produced a checklist status that never resolved because
# normalize_slug() collapses / to - and the Gitea PR body parser
# would not find the expected heading.
pr_section_marker: "Memory consulted"
pr_section_marker: "Memory/saved-feedback consulted"
required_teams: [engineers]
ai_ack_eligible: true
description: >-
+6 -6
View File
@@ -13,14 +13,14 @@
# the structured JSON shape is forward-compatible.
#
# Logic in `.gitea/scripts/audit-force-merge.sh` per the same script-
# extract pattern as sop-checklist.
# extract pattern as sop-tier-check.
name: audit-force-merge
# pull_request_target loads from the base branch — same security model
# as sop-checklist. Without this, an attacker could rewrite the
# as sop-tier-check. Without this, an attacker could rewrite the
# workflow on a PR and skip the audit emission for their own
# force-merge. See `.gitea/workflows/sop-checklist.yml` for the full
# force-merge. See `.gitea/workflows/sop-tier-check.yml` for the full
# rationale.
on:
pull_request_target:
@@ -41,8 +41,8 @@ jobs:
ref: ${{ github.event.pull_request.base.sha }}
- name: Detect force-merge + emit audit event
env:
# Same org-level secret the sop-checklist workflow uses.
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
# Same org-level secret the sop-tier-check workflow uses.
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number }}
@@ -54,7 +54,7 @@ jobs:
# required checks) for each branch listed here.
#
# Declared here rather than fetched from /branch_protections
# because that endpoint requires admin write — sop-checklist-bot is
# because that endpoint requires admin write — sop-tier-bot is
# read-only by design (least-privilege).
REQUIRED_CHECKS_JSON: |
{
@@ -34,8 +34,6 @@ jobs:
check:
name: Block forbidden paths
runs-on: ubuntu-latest
# Hard gate — detected internal-path leaks fail the workflow.
# continue-on-error removed per directive (fail-open → fail-closed).
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
@@ -1,165 +0,0 @@
name: boot-to-registration-e2e (advisory)
# cp#455 — Minimal-cell boot-to-registration e2e.
# CTO directive 14eb4f07: "build the minimal claude-code+kimi cell,
# it should now go GREEN since the fix is live."
#
# Stage 1 of 5-stage rollout. Reuses the dispatch-only EC2
# provisioning path from test_staging_full_saas.sh but reduced to
# the minimum boot-to-registration surface:
#
# 1. Provision request accepted; workspace transitions to booting/running
# 2. Controlplane receives /registry/register for that workspace_id
# 3. JSON-RPC/completion route returns successful minimal response
# 4. Teardown terminates workspace even on failure (trap)
#
# Advisory (non-blocking) per Researcher Stage 2 design — RED on
# current main is expected pre-cp#469-cluster. After cp#477 deploy
# (888efceb) + PR #2167 merge, cell should turn GREEN. THAT green
# is the cluster-proof signal.
#
# Cost controls (mandatory):
# - SPOT instances (tagged run_id/workspace_id for cost attribution)
# - Fast teardown (~3-5 min wall-clock) even on assertion failure
# - Structured per-cell results JSON (runtime/provider/model/
# billing_mode/workspace_id/register_status/completion_status/
# teardown_status/elapsed_seconds)
#
# Inputs:
# runtime : default claude-code
# billing_mode : default platform_managed (the cp#469-cluster path)
# provider : default platform (vs direct-to-provider)
# model : default moonshot/kimi-k2.6 (CTO-specified)
#
# PR target: molecule-core (this file). Companion harness extension
# (test_minimal_boot_cell.sh) lives in tests/e2e/ alongside
# test_staging_full_saas.sh — same repo, same branch.
#
# Note: cp#455 was originally spec'd to live in molecule-controlplane
# (`.gitea/workflows/` path), but molecule-core's CI is the home for
# tenant-boot e2e tests in this stage. Stage 2 may move the path.
on:
workflow_dispatch:
# Note: Gitea 1.22.6 does not support workflow_dispatch.inputs
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Defaults
# are hardcoded in the job env below. Stage 2 can add matrix/
# param support once the Gitea version supports it.
# Advisory: no cron schedule, manual dispatch only. Branch protection
# doesn't require this — RED on main is expected pre-cp#469-cluster
# deploy, GREEN signals the cluster is live.
permissions:
contents: read
# No issue-write; failures surface as red runs in workflow history.
concurrency:
group: boot-to-registration-e2e
cancel-in-progress: false
jobs:
# bp-exempt: advisory e2e — non-gating, manual dispatch only (cp#455 Stage 1)
minimal-cell:
name: Minimal cell (claude-code + platform + moonshot/kimi-k2.6)
runs-on: ubuntu-latest
# Bounded at 12 min. Wall-clock budget breakdown:
# - cold EC2 provision: ~3-4 min (SPOT)
# - /registry/register wait: ~30s
# - completion call: ~10s
# - teardown: ~30-60s
# - tail headroom: ~6-7 min
timeout-minutes: 12
env:
# Hardcoded defaults — Gitea 1.22.6 does not support workflow_dispatch.inputs
# (feedback_gitea_workflow_dispatch_inputs_unsupported). Stage 2 can add
# matrix/param support once the Gitea version supports it.
E2E_RUNTIME: claude-code
E2E_BILLING_MODE: platform_managed
E2E_PROVIDER: platform
E2E_MODEL: moonshot/kimi-k2.6
E2E_RUN_ID: cp455-${{ github.run_id }}
E2E_PROVISION_TIMEOUT_SECS: '300' # 5 min — fast teardown budget
MOLECULE_CP_URL: ${{ vars.STAGING_CP_URL || 'https://staging-api.moleculesai.app' }}
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Verify required secrets present
run: |
if [ -z "${MOLECULE_ADMIN_TOKEN:-}" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret missing — minimal-cell e2e cannot run"
echo "::error::Set it at Settings → Secrets and Variables → Actions; pull from staging-CP's CP_ADMIN_API_TOKEN env in Railway."
exit 1
fi
- name: Install required tools
run: |
for cmd in jq curl python3; do
command -v "$cmd" >/dev/null 2>&1 || {
echo "::error::required tool '$cmd' not on PATH — runner image regression?"
exit 1
}
done
- name: Run minimal-cell boot-to-registration harness
# The harness script handles its own teardown via EXIT trap;
# even on assertion failure (provision timeout, register
# timeout, completion failure), the workspace is deprovisioned
# and a leak is reported. Exit code propagates from the script.
# Structured per-cell results are emitted to ${GITHUB_STEP_SUMMARY}
# so operators see pass/fail per assertion without scrolling.
run: |
bash tests/e2e/test_minimal_boot_cell.sh
- name: Emit structured per-cell results
if: always()
# Always run (even on failure) so the structured results are
# visible in the workflow summary. The script writes a JSON
# file at /tmp/cell-result.json; this step renders it as a
# job summary.
run: |
if [ -f /tmp/cell-result.json ]; then
echo "## Minimal-cell results" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo '```json' >> "$GITHUB_STEP_SUMMARY"
cat /tmp/cell-result.json >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo '```' >> "$GITHUB_STEP_SUMMARY"
else
echo "## Minimal-cell results: NO_RESULT_FILE" >> "$GITHUB_STEP_SUMMARY"
echo "" >> "$GITHUB_STEP_SUMMARY"
echo "Harness did not produce /tmp/cell-result.json — likely crashed before trap fired." >> "$GITHUB_STEP_SUMMARY"
fi
- name: Failure summary
if: failure()
run: |
{
echo "## cp#455 minimal-cell FAILED"
echo ""
echo "**Run ID:** ${{ github.run_id }}"
echo "**Runtime:** ${E2E_RUNTIME}"
echo "**Billing mode:** ${E2E_BILLING_MODE}"
echo "**Provider:** ${E2E_PROVIDER}"
echo "**Model:** ${E2E_MODEL}"
echo "**Slug:** ${E2E_RUN_ID}"
echo ""
echo "### What this means"
echo ""
echo "The minimal claude-code+kimi cell did not pass all 4 assertions:"
echo "1. Provision request accepted; workspace transitions to booting/running"
echo "2. Controlplane receives /registry/register for that workspace_id"
echo "3. JSON-RPC/completion route returns successful minimal response"
echo "4. Teardown terminates workspace even on failure (trap)"
echo ""
echo "RED is expected pre-cp#469-cluster. After cp#477 deploy (888efceb) + PR #2167 merge,"
echo "this should turn GREEN. Persistent RED after both merge = cluster bug, not e2e bug."
echo ""
echo "### Next steps"
echo ""
echo "1. Check the harness output above for the assertion that failed"
echo "2. If assertion 1 fails: provision path broken — check CP admin API + EC2 quota"
echo "3. If assertion 2 fails: /registry/register path broken — check workspace-server boot"
echo "4. If assertion 3 fails: LLM proxy / completion path broken — check cp#469 cluster"
echo "5. If assertion 4 fails: teardown trap broken — leak risk, fix immediately"
} >> "$GITHUB_STEP_SUMMARY"
+4 -4
View File
@@ -12,7 +12,7 @@
# (SHA 0adf2098) per RFC internal#219 Phase 2b+c — replicate repo-by-repo.
#
# When any pair diverges, a `[ci-drift]` issue is opened or updated
# (idempotent by title) and labelled `ci-bp-drift`. This is the
# (idempotent by title) and labelled `tier:high`. This is the
# auto-detection that closes the regression class identified in
# RFC §1 finding 3 (protection only listed 2 of 6 real jobs for
# ~weeks, undetected) and §6 (audit env drifts silently from
@@ -81,7 +81,7 @@ jobs:
# Gitea persona whose ONLY job is reading branch_protections
# and posting the [ci-drift] tracking issue. The endpoint
# `GET /repos/.../branch_protections/{branch}` requires
# repo-ADMIN role (Gitea 1.22.6) — the default GITHUB_TOKEN and the
# repo-ADMIN role (Gitea 1.22.6) — SOP_TIER_CHECK_TOKEN and the
# auto-injected GITHUB_TOKEN do NOT have it (read-only / write
# without admin), so the previous fallback chain 403'd.
# Mirrors the controlplane fix landed in CP PR#134.
@@ -106,7 +106,7 @@ jobs:
AUDIT_WORKFLOW_PATH: '.gitea/workflows/audit-force-merge.yml'
# Path to the CI workflow with the sentinel + the jobs.
CI_WORKFLOW_PATH: '.gitea/workflows/ci.yml'
# Issue label applied on file/update. `ci-bp-drift` exists in
# Issue label applied on file/update. `tier:high` exists in
# the molecule-core label set (verified 2026-05-11, label id 9).
DRIFT_LABEL: 'ci-bp-drift'
DRIFT_LABEL: 'tier:high'
run: python3 .gitea/scripts/ci-required-drift.py
+7 -18
View File
@@ -148,11 +148,6 @@ jobs:
run: $(go env GOPATH)/bin/golangci-lint run --timeout 3m ./...
- if: ${{ needs.changes.outputs.platform == 'true' }}
name: Diagnostic — per-package verbose 60s
# DIAGNOSTIC ONLY (continue-on-error below): this step exists to dump
# verbose per-package output for triage, NOT to gate. The blocking gate
# is "Run tests with coverage (blocking gate)" immediately below. The
# `set +e` / swallowed exits here are intentional — do not "fix" them
# like a gate; the real gate is the next step.
run: |
set +e
go test -race -v -timeout 60s ./internal/handlers/... 2>&1 | tee /tmp/test-handlers.log
@@ -314,11 +309,6 @@ jobs:
# #1815 — wires coverage into CI so we get a baseline visible on
# every PR. No threshold gate yet; thresholds dial in (Step 3, also
# tracked in #1815) after the team sees what current coverage is.
# Memory: the full vitest+v8-coverage process tree peaks at ~1.33 GB
# (measured 2026-06-08), comfortably within the runner — so this single
# run is BOTH the pass/fail gate and the coverage artifact (one SSOT, no
# split). The earlier intermittent red here was a DisplayTab paste-race
# (fixed in this PR), NOT a coverage OOM.
run: npx vitest run --coverage
- name: Upload coverage summary as artifact
if: ${{ needs.changes.outputs.canvas == 'true' }}
@@ -428,9 +418,10 @@ jobs:
# a manual action that determinism made obsolete.
name: Canvas Deploy Status
runs-on: docker-host
# Per-step no-op (not job-level `if:`) so the job reaches SUCCESS on PRs
# instead of skipped — skipped poisons the PR combined status (internal#817).
# Job-level `if:` so ci-required-drift.py's ci_job_names() detects this as
# github.ref-gated and skips it from the required-context F1 set (mc#1982).
# Step-level exit 0 handles the "not a canvas main push" case.
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
needs: [changes, canvas-build]
steps:
- name: Record canvas ordered-deploy status
@@ -509,7 +500,7 @@ jobs:
# `CI / all-required (pull_request)` per issue #1473.
#
# Closes the failure mode where status_check_contexts on molecule-core/main
# only listed `Secret scan` + `sop-checklist` (the 2 meta-gates), so real
# only listed `Secret scan` + `sop-tier-check` (the 2 meta-gates), so real
# `Platform (Go)` / `Canvas (Next.js)` / `Python Lint & Test` / `Shellcheck`
# red silently merged through. See internal#286 for the three concrete
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
@@ -542,8 +533,9 @@ jobs:
# The `needs:` list MUST stay in lockstep with ci-required-drift.py's
# F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
# whose `if:` gates on github.event_name/github.ref). canvas-deploy-
# status is per-step-gated (not job-level `if:`) so it reaches SUCCESS
# on PRs and is included here — internal#817. If a new always-running
# reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
# so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
# skipped job would never let the sentinel run. If a new always-running
# CI job is added, add it here too or ci-required-drift F1 will flag it.
#
# Stays on the dedicated `ci-meta` lane (no docker work, so the
@@ -557,7 +549,6 @@ jobs:
- canvas-build
- shellcheck
- python-lint
- canvas-deploy-status
continue-on-error: false
runs-on: ci-meta
timeout-minutes: 5
@@ -576,7 +567,6 @@ jobs:
CANVAS_RESULT: ${{ needs.canvas-build.result }}
SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
CANVAS_DEPLOY_RESULT: ${{ needs.canvas-deploy-status.result }}
run: |
set -euo pipefail
fail=0
@@ -598,7 +588,6 @@ jobs:
check "Canvas (Next.js)" "$CANVAS_RESULT"
check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
check "Python Lint & Test" "$PYTHON_LINT_RESULT"
check "Canvas Deploy Status" "$CANVAS_DEPLOY_RESULT"
if [ "$fail" -ne 0 ]; then
echo "::error::all-required: one or more aggregated CI jobs did not succeed"
exit 1
+3 -3
View File
@@ -131,9 +131,9 @@ jobs:
# on the per-runtime default ("sonnet" → routes to direct
# Anthropic, defeats the cost saving). Operators can override
# via workflow_dispatch by setting a different E2E_MODEL_SLUG
# input if they need to exercise a specific model. MiniMax-M2.7 is the
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2.7' }}
# input if they need to exercise a specific model. MiniMax-M2 is the
# stable staging MiniMax path used by the full-SaaS smoke.
E2E_MODEL_SLUG: ${{ github.event.inputs.model_slug || 'MiniMax-M2' }}
# Bound to 10 min so a stuck provision fails the run instead of
# holding up the next cron firing. 15-min default in the script
# is for the on-PR full lifecycle where we have more headroom.
-3
View File
@@ -429,9 +429,6 @@ jobs:
# round-trip is covered by the priority-runtimes `mock` arm, not here.
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_keyless_feature_contracts_e2e.sh
- name: Run user_tasks E2E (REST + MCP — agent→user action requests)
if: needs.detect-changes.outputs.api == 'true'
run: bash tests/e2e/test_user_tasks_e2e.sh
- name: Run secrets-dispatch contract test (keyless SECRETS_JSON branch order)
# Previously orphaned (no workflow referenced it). Hermetic unit-style
# contract over test_staging_full_saas.sh's LLM-key branch precedence —
-14
View File
@@ -250,20 +250,6 @@ jobs:
echo "CANVAS_PORT=${CANVAS_PORT}" >> "$GITHUB_ENV"
echo "Canvas host port: ${CANVAS_PORT}"
- name: Set deterministic admin token
if: needs.detect-changes.outputs.chat == 'true'
run: |
# PR #2291 made auth fail-closed everywhere (no dev-mode escape).
# The platform server requires ADMIN_TOKEN; the canvas requires the
# matching NEXT_PUBLIC_ADMIN_TOKEN or every API call 401s.
# We set a deterministic per-run value so the ephemeral platform
# and canvas are paired correctly.
E2E_ADMIN_TOKEN="e2e-chat-admin-${{ github.run_id }}-${{ github.run_attempt }}"
echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "NEXT_PUBLIC_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
echo "Admin token configured for e2e-chat platform + canvas."
- name: Start platform (background)
if: needs.detect-changes.outputs.chat == 'true'
working-directory: workspace-server
-352
View File
@@ -54,13 +54,6 @@ on:
- 'tests/e2e/lib/model_slug.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- 'tests/e2e/test_staging_concierge_e2e.sh'
- 'tests/e2e/test_staging_concierge_creates_workspace_e2e.sh'
- 'workspace-server/internal/staginge2e/**'
- 'workspace-server/internal/handlers/platform_agent.go'
- 'workspace-server/internal/handlers/user_tasks.go'
- 'workspace-server/internal/handlers/llm_billing_mode_handler.go'
- 'workspace-server/internal/handlers/discovery.go'
- '.gitea/workflows/e2e-staging-saas.yml'
pull_request:
branches: [main]
@@ -76,13 +69,6 @@ on:
- 'tests/e2e/lib/model_slug.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- 'tests/e2e/test_staging_concierge_e2e.sh'
- 'tests/e2e/test_staging_concierge_creates_workspace_e2e.sh'
- 'workspace-server/internal/staginge2e/**'
- 'workspace-server/internal/handlers/platform_agent.go'
- 'workspace-server/internal/handlers/user_tasks.go'
- 'workspace-server/internal/handlers/llm_billing_mode_handler.go'
- 'workspace-server/internal/handlers/discovery.go'
- '.gitea/workflows/e2e-staging-saas.yml'
workflow_dispatch:
schedule:
@@ -510,341 +496,3 @@ jobs:
echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
fi
exit 0
# ── CONCIERGE user_tasks PRIMITIVE (Feature 3) — real-staging REST+MCP+authz ──
#
# Drives tests/e2e/test_staging_concierge_e2e.sh against a fresh throwaway
# tenant: the full agent→user "ask" contract over BOTH surfaces (REST +
# the MCP tools/call envelope a canvas concierge agent uses) PLUS the
# cross-workspace authz scoping (ws-B can't touch ws-A's task). Reuses the
# same CP-admin org-provision/teardown scaffolding + _lib.sh + AWS-leak-check
# lib as the full-SaaS harness (the script SOURCEs them — no duplication).
#
# GATING (no continue-on-error): user_tasks is a pure DB/handler primitive
# with NO LLM container dependency (workspaces are created 'external' — row
# only, no EC2), so this is fast (~provision + TLS, no 10-min cold boot) and
# NOT subject to the cp#245 boot-timeout flake the full-SaaS job carries. It
# therefore has no honest reason to be masked. Runs on push-to-main /
# workflow_dispatch / cron only (needs live staging infra — never on PR, where
# the pr-validate job above already posts the workflow's PR status).
# bp-required: pending #2430
e2e-staging-concierge-user-tasks:
name: E2E Staging Concierge user_tasks
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
timeout-minutes: 30
permissions:
contents: read
env:
MOLECULE_CP_URL: https://staging-api.moleculesai.app
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
E2E_AWS_LEAK_CHECK: required
E2E_AWS_TERMINATE_LEAKS: '1'
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"
- name: Verify admin token + AWS creds present
run: |
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
exit 2
fi
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
if [ -z "${!var:-}" ]; then
echo "::error::$var not set — EC2 leak verification cannot run"
exit 2
fi
done
echo "Admin token + AWS creds present ✓"
- name: CP staging health preflight
run: |
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
if [ "$code" != "200" ]; then
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
exit 1
fi
echo "Staging CP healthy ✓"
- name: Run concierge user_tasks E2E
run: bash tests/e2e/test_staging_concierge_e2e.sh
- name: Teardown safety net (runs on cancel/failure)
if: always()
env:
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
run: |
# Sweep any e2e-cncrg-YYYYMMDD-<run_id>-* org this run created if the
# script died before its EXIT trap fired. Run-id scoped so it never
# stomps a concurrent run's fresh tenant (see the saas job's note).
set +e
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "
import json, sys, os, datetime
run_id = os.environ.get('GITHUB_RUN_ID', '')
d = json.load(sys.stdin)
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
if run_id:
prefixes = tuple(f'e2e-cncrg-{d}-{run_id}-' for d in dates)
else:
prefixes = tuple(f'e2e-cncrg-{d}-' for d in dates)
candidates = [o['slug'] for o in d.get('orgs', [])
if any(o.get('slug','').startswith(p) for p in prefixes)
and o.get('instance_status') not in ('purged',)]
print('\n'.join(candidates))
" 2>/dev/null)
leaks=()
for slug in $orgs; do
echo "Safety-net teardown: $slug"
set +e
curl -sS -o /tmp/cncrg-cleanup.out -w "%{http_code}" \
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"confirm\":\"$slug\"}" >/tmp/cncrg-cleanup.code
set -e
code=$(cat /tmp/cncrg-cleanup.code 2>/dev/null || echo "000")
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
echo "[teardown] deleted $slug (HTTP $code)"
else
echo "::warning::concierge teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/cncrg-cleanup.out 2>/dev/null)"
leaks+=("$slug")
fi
done
if [ ${#leaks[@]} -gt 0 ]; then
echo "::warning::concierge teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
fi
exit 0
# ── CONCIERGE FUNCTIONAL: it ACTUALLY CREATES A WORKSPACE (real-LLM) ─────────
#
# Drives tests/e2e/test_staging_concierge_creates_workspace_e2e.sh — the
# RFC docs/design/rfc-platform-agent.md §11.4 "Reach" check turned into a gate:
# send the org concierge a natural-language A2A message ("create a workspace
# named e2e-cncrg-worker-<runid> with role engineer") and assert the
# DETERMINISTIC SIDE EFFECT — that named workspace now EXISTS in GET /workspaces
# — which can only happen if the concierge's LLM really invoked the
# create_workspace platform-MCP tool (a real org mutation), NOT just that a REST
# API returned 200.
#
# GATING (no continue-on-error), but FALSE-GREEN-PROOF via E2E_REQUIRE_LIVE=1:
# this is a REAL-LLM, REAL-tool test, so it depends on the concierge being
# provisioned on the DEDICATED platform-agent image (Dockerfile.platform-agent,
# ships /opt/molecule-mcp-server — the ONLY image where create_workspace lights
# up; see platform_agent.go's SELF-HOST CAVEAT). A parallel agent is wiring that
# image into the staging provision path. The script SKIPs LOUD when the
# concierge is absent / not online / not on the platform-agent image — but with
# E2E_REQUIRE_LIVE=1 the harness converts that skip into a HARD FAIL (exit 5) so
# a silently-missing platform-agent image can NEVER false-green this gate. Runs
# on push-to-main / workflow_dispatch / cron only (needs live staging infra +
# a model — never on PR, where pr-validate posts the workflow's PR status).
# bp-required: pending #2430
e2e-staging-concierge-creates-workspace:
name: E2E Staging Concierge Creates Workspace
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
timeout-minutes: 45
permissions:
contents: read
env:
MOLECULE_CP_URL: https://staging-api.moleculesai.app
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
E2E_AWS_LEAK_CHECK: required
E2E_AWS_TERMINATE_LEAKS: '1'
# The concierge is platform_managed on SaaS (the CP-exported LLM proxy
# supplies its model — no BYOK key needed for the concierge itself). The
# MiniMax key is wired anyway so a staging image that boots the concierge
# BYOK-MiniMax (parallel-agent image work) still has a model; harmless when
# the concierge is platform-managed.
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
# False-green guard: a concierge that is absent / not on the platform-agent
# image / never online must FAIL this gate (exit 5), not silently skip.
E2E_REQUIRE_LIVE: '1'
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"
- name: Verify admin token + AWS creds present
run: |
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
exit 2
fi
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
if [ -z "${!var:-}" ]; then
echo "::error::$var not set — EC2 leak verification cannot run"
exit 2
fi
done
echo "Admin token + AWS creds present ✓"
- name: CP staging health preflight
run: |
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
if [ "$code" != "200" ]; then
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
exit 1
fi
echo "Staging CP healthy ✓"
- name: Run concierge-creates-workspace functional E2E
run: bash tests/e2e/test_staging_concierge_creates_workspace_e2e.sh
- name: Teardown safety net (runs on cancel/failure)
if: always()
env:
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
run: |
# Sweep any e2e-cncrg-mk-YYYYMMDD-<run_id>-* org this run created if the
# script died before its EXIT trap fired. Run-id scoped so it never
# stomps a concurrent run's fresh tenant.
set +e
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
| python3 -c "
import json, sys, os, datetime
run_id = os.environ.get('GITHUB_RUN_ID', '')
d = json.load(sys.stdin)
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
if run_id:
prefixes = tuple(f'e2e-cncrg-mk-{d}-{run_id}-' for d in dates)
else:
prefixes = tuple(f'e2e-cncrg-mk-{d}-' for d in dates)
candidates = [o['slug'] for o in d.get('orgs', [])
if any(o.get('slug','').startswith(p) for p in prefixes)
and o.get('instance_status') not in ('purged',)]
print('\n'.join(candidates))
" 2>/dev/null)
leaks=()
for slug in $orgs; do
echo "Safety-net teardown: $slug"
set +e
curl -sS -o /tmp/cncrg-mk-cleanup.out -w "%{http_code}" \
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
-H "Authorization: Bearer $ADMIN_TOKEN" \
-H "Content-Type: application/json" \
-d "{\"confirm\":\"$slug\"}" >/tmp/cncrg-mk-cleanup.code
set -e
code=$(cat /tmp/cncrg-mk-cleanup.code 2>/dev/null || echo "000")
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
echo "[teardown] deleted $slug (HTTP $code)"
else
echo "::warning::concierge-mk teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/cncrg-mk-cleanup.out 2>/dev/null)"
leaks+=("$slug")
fi
done
if [ ${#leaks[@]} -gt 0 ]; then
echo "::warning::concierge-mk teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
fi
exit 0
# ── CONCIERGE / PLATFORM-AGENT Go staginge2e (Features 1,2,4,5,6) ────────────
#
# Drives TestConciergePlatformAgent_Staging (workspace-server/internal/
# staginge2e/concierge_platform_test.go), which REUSES the lifecycle suite's
# harness (requireStagingEnv / adminCreateOrg / tenantAdminToken /
# tenantCreateWorkspace / doTenantJSON / jsonField) to assert, against a real
# tenant: platform-agent install + /org/identity (1), kind on the workspace
# API (2), discovery peers admin-auth regression guard (4), BYOK billing-mode
# round-trip (5), and the concierge config-tab auth sweep (6). It asserts
# OBSERVABLE state (sole root re-parenting, kind discriminator, resolved_mode,
# non-401 tabs) — not just HTTP 200.
#
# Two jobs, mirroring e2e-workspace-lifecycle.yml's honest pattern:
# • concierge-compile-skip (every push/PR/dispatch): proves the staginge2e
# suite still COMPILES under -tags=staging_e2e and SKIPs LOUD without
# creds. GATING (no mask) — a broken test file fails at PR time.
# • concierge-staging (push-to-main/dispatch/cron): the real live run with
# staging creds + t.Cleanup teardown.
# bp-exempt: PR-time compile-only check (build the concierge e2e test, then
# skip execution — no staging creds on PR). pr-validate posts the workflow's
# PR status; this job is not itself a branch-protection gate.
e2e-staging-concierge-compile-skip:
name: E2E Staging Concierge (compile+skip)
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: go vet (staging_e2e tag)
working-directory: workspace-server
run: go vet -tags staging_e2e ./internal/staginge2e/...
- name: Compile + skip-run (must SKIP LOUD without STAGING_E2E)
working-directory: workspace-server
run: |
# No STAGING_E2E / creds → the suite MUST skip (not pass-with-zero-
# assertions). go test exit 0 with a SKIP line is the contract.
out=$(go test -tags staging_e2e ./internal/staginge2e/ -run TestConciergePlatformAgent -count=1 -v 2>&1)
echo "$out"
echo "$out" | grep -q "SKIP: TestConciergePlatformAgent_Staging" \
|| { echo "::error::expected a LOUD skip of TestConciergePlatformAgent_Staging without creds"; exit 1; }
# bp-required: pending #2430
e2e-staging-concierge-platform:
name: E2E Staging Concierge Platform Agent
runs-on: ubuntu-latest
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
timeout-minutes: 40
permissions:
contents: read
env:
CP_BASE_URL: https://staging-api.moleculesai.app
CP_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
STAGING_E2E: '1'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: Verify admin token present
run: |
if [ -z "$CP_ADMIN_API_TOKEN" ]; then
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
exit 2
fi
echo "Admin token present"
- name: CP staging health preflight
run: |
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$CP_BASE_URL/health")
if [ "$code" != "200" ]; then
echo "::error::Staging CP unhealthy (HTTP $code) — infra, not a concierge bug."
exit 1
fi
echo "Staging CP healthy"
- name: Run concierge/platform-agent staginge2e
working-directory: workspace-server
run: go test -tags staging_e2e ./internal/staginge2e/ -run TestConciergePlatformAgent_Staging -count=1 -v -timeout 35m
# Teardown: the test installs a t.Cleanup admin-DELETE of its own tenant
# (e2e-cncrg-* slug), running even on a t.Fatal. The age-guarded
# sweep-stale-e2e-orgs workflow (30-min floor, e2e- prefix) is the final
# net for a tenant orphaned by a hard runner cancel.
+2 -2
View File
@@ -82,7 +82,7 @@ jobs:
- name: Run gate-check-v3 (single PR mode)
if: github.event_name == 'pull_request_target' || github.event.inputs.pr_number != ''
env:
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
PR_NUMBER: ${{ github.event.pull_request.number || github.event.inputs.pr_number }}
POST_COMMENT: ${{ github.event.inputs.post_comment || 'true' }}
@@ -97,7 +97,7 @@ jobs:
- name: Run gate-check-v3 (all open PRs — cron mode)
if: github.event_name == 'schedule'
env:
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
REPO: ${{ github.repository }}
run: |
+1 -1
View File
@@ -73,7 +73,7 @@ jobs:
# NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
# queue now reads the required status contexts from BRANCH PROTECTION
# (status_check_contexts) so non-required governance reds (qa-review,
# security-review, sop-checklist when not branch-required,
# security-review, sop-tier, sop-checklist when not branch-required,
# E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
# If branch protection cannot be enumerated the queue HOLDS
# (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
@@ -244,12 +244,7 @@ jobs:
# fail if any didn't land — that would be a real regression we
# want loud.
# workspace_schedules added for the #2149 scheduler integration tests.
# workspace_auth_tokens + org_api_tokens added for the #2156
# registry-auth TestIntegration_ suite (#2148). Without this
# guard, a silently-skipped migration 020 (workspace_auth_tokens)
# or 035 (org_api_tokens) would let the auth tests run against
# missing tables and falsely green.
for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules workspace_auth_tokens org_api_tokens; do
for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules; do
if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
-c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
| grep -q 1; then
@@ -290,33 +285,6 @@ jobs:
# / workspaces all landed by the migration replay step above).
go test -tags=integration -timeout 5m -v ./internal/scheduler/ -run "^TestIntegration_"
- if: needs.detect-changes.outputs.handlers == 'true'
name: Migration replay-from-scratch gate (#2150)
env:
PGPASSWORD: test
run: |
# Issue #2150 (SOP internal#765): prove the FULL forward migration
# chain (.up + legacy .sql) replays from a blank schema via the
# PRODUCTION db.RunMigrations entrypoint — hard-fail on any error.
#
# This is the gap the psql apply loop above does NOT cover: that
# loop deliberately SKIPS failing migrations (`⊘ skipped`), so it
# stays green even if the chain stops replaying. The Go test below
# uses the real boot-time runner with hard-fail semantics, catching
# the #211 .down-wipe class and the 045 non-idempotent crash-loop
# class (it runs the chain twice).
#
# Run against a SEPARATE database so the destructive
# `DROP SCHEMA public CASCADE` inside the test never touches the
# `molecule` DB the handlers integration tests above migrated. No
# ordering coupling with the handlers step.
createdb -h "${PG_HOST}" -U postgres molecule_replay 2>/dev/null || \
psql -h "${PG_HOST}" -U postgres -d molecule \
-c "CREATE DATABASE molecule_replay" >/dev/null 2>&1 || true
INTEGRATION_DB_URL="postgres://postgres:test@${PG_HOST}:5432/molecule_replay?sslmode=disable" \
go test -tags=integration -timeout 5m -v ./internal/db/ \
-run '^TestIntegration_Migration|^TestIntegration_InitPostgres'
- if: failure() && needs.detect-changes.outputs.handlers == 'true'
name: Diagnostic dump on failure
env:
+1 -1
View File
@@ -19,7 +19,7 @@
# Forward-compat scope:
# Today (2026-05-11) molecule-core/main protects 3 contexts:
# - "Secret scan / Scan diff for credential-shaped strings (pull_request)"
# - "sop-checklist / all-items-acked (pull_request)"
# - "sop-tier-check / tier-check (pull_request)"
# - "CI / all-required (pull_request)"
# Per RFC#324 Step 2 the required-list expands to ~5 contexts
# (qa-review, security-review added). Each new required context's
@@ -40,7 +40,6 @@ env:
GITHUB_SERVER_URL: https://git.moleculesai.app
jobs:
# bp-exempt: informational lint enforcing docker-host/publish pin convention (internal#512), not a merge gate
lint-docker-host-pin:
name: Lint docker-host pin on docker-touching workflows
runs-on: docker-host
+1 -1
View File
@@ -16,7 +16,7 @@ name: Lint workflow YAML (Gitea-1.22.6-hostile shapes)
#
# Empirical history this hardens against:
# - status-reaper rev1 caught rule-4 (name-collision) class
# - sop-checklist DOA'd on rule-2 (workflow_run partial)
# - sop-tier-refire DOA'd on rule-2 (workflow_run partial)
# - #319 bootstrap-paradox (chained-defect class, related)
# - internal#329 dispatcher race (adjacent)
# - 2026-05-11 publish-runtime: rule-1, 24h PyPI freeze
-474
View File
@@ -1,474 +0,0 @@
name: Local Provision Lifecycle E2E
# MANDATORY coverage for the LOCAL Docker provisioner (MOLECULE_ENV=development,
# docker.sock) — the path self-hosters + dev runs use. Every OTHER e2e exercises
# the SaaS/EC2 (control-plane) provisioner; nothing mandatory drove the local
# Docker path, which is why a config-volume restart-survival bug went undetected.
# This workflow provisions a REAL workspace via the local Docker provisioner and
# asserts the full lifecycle, INCLUDING the restart-survival assertion.
#
# Two jobs:
# * lifecycle-stub (REQUIRED gate) — builds the tiny stub runtime image, tags
# it to the provisioner's RegistryModeLocal cache tag, and runs the full
# lifecycle e2e (provision -> online -> restart-survive -> proxy-reach). Fast
# (seconds of agent boot, no LLM, no 2.5GB image).
# * lifecycle-real (ADVISORY, continue-on-error) — runs the SAME script against
# the real claude-code template image with a REAL MiniMax BYOK credential
# (LIFECYCLE_LLM=minimax). The proxy-reach step asserts an ACTUAL model reply
# (real round-trip through the ws-<id>:8000 proxy), not just reachability.
# MiniMax is the cheapest LLM the platform offers, and its `minimax` provider
# dials api.minimax.io directly (no CP proxy needed on this local stack).
# Heavy + network-dependent (pulls/builds the template + a real LLM call), so
# it is non-blocking. Needs the MOLECULE_STAGING_MINIMAX_API_KEY CI secret:
# when ABSENT the script SKIPS loud (exit 0) — it never reds on a missing
# secret (serving-e2e skip-if-absent pattern).
#
# SUBSTRATE REQUIREMENT (read before wiring into branch protection)
# -----------------------------------------------------------------
# This workflow provisions SIBLING docker containers from a HOST Go binary via
# the runner's docker.sock — exactly like e2e-api.yml, which already provisions
# the `mock` + `priority-runtimes` arms on `docker-host`. So the docker-in-runner
# capability IS available on the molecule-runner-* (docker-host) lane. If the
# operator ever moves these to a runner WITHOUT docker.sock access for the
# platform binary, this lane will red — keep it on `docker-host`.
#
# Both jobs pin `runs-on: docker-host` (Linux operator-host runners with the
# molecule-core-net bridge + a working docker.sock). The bare `ubuntu-latest`
# label is also advertised by the Windows act_runner, where docker.sock-bound
# steps fail non-deterministically — see lint-required-workflows-docker-host-
# pinned.yml + internal#512.
on:
push:
branches: [main, staging]
pull_request:
branches: [main, staging]
concurrency:
# Per-SHA grouping (mirrors e2e-api.yml). cancel-in-progress:false so a queued
# run for an older SHA isn't cancelled by a newer push (auto-promote brittleness).
group: local-provision-e2e-${{ github.event.pull_request.head.sha || github.sha }}
cancel-in-progress: false
env:
GITHUB_SERVER_URL: https://git.moleculesai.app
jobs:
# ===========================================================================
# REQUIRED gate — stub runtime, fast. This IS meant to be a required merge gate
# (the only mandatory coverage for the LOCAL Docker provisioner), but the new
# context is not yet in branch_protections/main — wire it in once the operator
# confirms the docker-host runners reliably provision sibling containers from
# the host platform binary for this lane (see SUBSTRATE REQUIREMENT above), then
# flip the directive below to `# bp-required: yes`. Until then it runs gating
# locally (continue-on-error: false) but un-wired in BP, an acknowledged
# asymmetry tracked for follow-up. (Earlier this block read `# bp-exempt`, which
# contradicted "REQUIRED gate" and tripped lint-required-context-exists-in-bp.)
# bp-required: pending #2409
# ===========================================================================
lifecycle-stub:
name: Local Provision Lifecycle E2E (stub)
runs-on: docker-host
continue-on-error: false
timeout-minutes: 15
env:
PG_CONTAINER: pg-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
REDIS_CONTAINER: redis-lpe2e-${{ github.run_id }}-${{ github.run_attempt }}
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
MOLECULE_ENV: development
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
# act_runner runs the job inside a Docker container, so /.dockerenv exists
# and the platform auto-detects platformInDocker=true. But the job container
# is NOT on molecule-core-net, so it cannot resolve workspace container
# hostnames (ws-<id>:8000). Force false so the proxy keeps using the
# host-mapped 127.0.0.1:<ephemeral_port> URL, which IS reachable.
MOLECULE_IN_DOCKER: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: Ensure provisioner network + pre-pull alpine
run: |
# The local provisioner attaches workspace containers to
# molecule-core-net and seeds /configs via an alpine helper; the
# lifecycle script also uses alpine to seed config.yaml into the
# named config volume. Pre-pull + ensure the bridge (idempotent).
docker pull alpine:3 >/dev/null
docker network create molecule-core-net >/dev/null 2>&1 || true
echo "alpine:3 pre-pulled; molecule-core-net ensured."
- name: Start Postgres (docker, ephemeral host port)
run: |
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
docker run -d --name "$PG_CONTAINER" \
-e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule \
-p 0:5432 postgres:16 >/dev/null
PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
[ -z "$PG_PORT" ] && PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | head -1 | awk -F: '{print $NF}')
if [ -z "$PG_PORT" ]; then echo "::error::no host port for $PG_CONTAINER"; docker logs "$PG_CONTAINER" || true; exit 1; fi
echo "DATABASE_URL=postgres://dev:dev@127.0.0.1:${PG_PORT}/molecule?sslmode=disable" >> "$GITHUB_ENV"
for i in $(seq 1 30); do
docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1 && { echo "pg ready ${i}s"; exit 0; }
sleep 1
done
echo "::error::Postgres not ready in 30s"; docker logs "$PG_CONTAINER" || true; exit 1
- name: Start Redis (docker, ephemeral host port)
run: |
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
docker run -d --name "$REDIS_CONTAINER" -p 0:6379 redis:7 >/dev/null
REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
[ -z "$REDIS_PORT" ] && REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | head -1 | awk -F: '{print $NF}')
if [ -z "$REDIS_PORT" ]; then echo "::error::no host port for $REDIS_CONTAINER"; docker logs "$REDIS_CONTAINER" || true; exit 1; fi
echo "REDIS_URL=redis://127.0.0.1:${REDIS_PORT}" >> "$GITHUB_ENV"
for i in $(seq 1 15); do
docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG && { echo "redis ready ${i}s"; exit 0; }
sleep 1
done
echo "::error::Redis not ready in 15s"; docker logs "$REDIS_CONTAINER" || true; exit 1
- name: Configure platform env (admin token + local Docker provisioner)
run: |
# Allocate an unused ephemeral port to avoid collision with concurrent
# jobs or stale processes from prior cancelled runs (see #2450).
PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
echo "PORT=${PORT}" >> "$GITHUB_ENV"
echo "BASE=http://127.0.0.1:${PORT}" >> "$GITHUB_ENV"
# Discover an IP that Docker containers can use to reach the host platform.
# host.docker.internal is not reliably available on Linux (act_runner), so
# workspace containers cannot resolve it and fail to register/heartbeat.
# Workspace containers join molecule-core-net; the host is reachable via that
# network's gateway. Ensure the network exists first (the provisioner creates
# it lazily, but we need the gateway BEFORE starting the platform).
docker network inspect molecule-core-net >/dev/null 2>&1 || docker network create molecule-core-net >/dev/null
# Parse Gateway from raw JSON because --format '{{.IPAM.Config}}' is
# inconsistent across Docker versions (sometimes omits Gateway field).
PLATFORM_HOST_IP=$(docker network inspect molecule-core-net 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
if [ -z "$PLATFORM_HOST_IP" ]; then
PLATFORM_HOST_IP=$(docker network inspect bridge 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
fi
if [ -z "$PLATFORM_HOST_IP" ]; then
PLATFORM_HOST_IP=$(ip route | awk '/default/ {print $3}' | head -1 || true)
fi
if [ -z "$PLATFORM_HOST_IP" ]; then
echo "::error::Could not determine PLATFORM_HOST_IP for Docker containers to reach the platform"
exit 1
fi
echo "PLATFORM_HOST_IP=${PLATFORM_HOST_IP}"
echo "PLATFORM_URL=http://${PLATFORM_HOST_IP}:${PORT}" >> "$GITHUB_ENV"
# Deterministic admin token: the script sends MOLECULE_ADMIN_TOKEN as the
# bearer; the platform checks ADMIN_TOKEN. Set both to the same value.
T="lpe2e-admin-${{ github.run_id }}-${{ github.run_attempt }}"
echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
# MOLECULE_ENV=development: dev posture. MOLECULE_ORG_ID is left UNSET so
# main.go wires the LOCAL Docker provisioner (not the CP provisioner), and
# MOLECULE_IMAGE_REGISTRY is left UNSET so image resolution uses
# RegistryModeLocal (the dockerHasTag cache-check the stub pre-tags into).
echo "MOLECULE_ENV=development" >> "$GITHUB_ENV"
echo "SECRETS_ENCRYPTION_KEY=lpe2e-test-encryption-key-32bytes!!" >> "$GITHUB_ENV"
- name: Build platform
working-directory: workspace-server
run: go build -o platform-server ./cmd/server
- name: Kill stale platform-server before start (issue #1046)
run: |
# Dynamic port allocation (see #2450) eliminates the fixed-port race
# that caused this gate to red when a prior run left a zombie process.
# We still sweep by process name to avoid leaking platform-server
# processes on the shared runner.
killed=0
for pid in $(grep -l "platform-serve" /proc/[0-9]*/comm 2>/dev/null); do
kpid="${pid%/comm}"; kpid="${kpid##*/}"
cmdline=$(cat "/proc/${kpid}/cmdline" 2>/dev/null | tr '\0' ' ')
if echo "$cmdline" | grep -q "platform-server"; then
echo "Killing stale platform-server pid ${kpid}: ${cmdline}"
kill "$kpid" 2>/dev/null || true
killed=$((killed + 1))
fi
done
if [ "$killed" -gt 0 ]; then echo "Killed $killed stale platform-server process(es)."; else echo "No platform-server-named process found."; fi
sleep 1
- name: Start platform (background)
working-directory: workspace-server
run: |
# Bind to the dynamically allocated port (see #2450).
# DATABASE_URL/REDIS_URL/ADMIN_TOKEN/MOLECULE_ENV are inherited from
# $GITHUB_ENV. PLATFORM_URL is also passed explicitly because
# $GITHUB_ENV propagation can be flaky on act_runner (#2468 RCA).
echo "starting platform with PLATFORM_URL=${PLATFORM_URL:-<fallback>} PORT=$PORT BIND_ADDR=0.0.0.0"
PORT=$PORT BIND_ADDR=0.0.0.0 PLATFORM_URL="${PLATFORM_URL:-http://host.docker.internal:$PORT}" ./platform-server > platform.log 2>&1 &
echo $! > platform.pid
- name: Wait for /health (+ migrations applied)
run: |
DEADLINE=300; PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"; start=$(date +%s)
while :; do
# Verify OUR server is still alive before trusting /health. Our server
# binds the allocated port or exits FATAL, so "our PID alive" <=>
# "we own the port"; checking it first stops a squatter that answers
# /health on the same port (our bind having failed) from false-positiving
# the gate (no-flakes RCA).
if [ -n "$PID" ] && ! kill -0 "$PID" 2>/dev/null; then
echo "::error::platform-server exited early (failed to bind or crashed)"; cat workspace-server/platform.log || true; exit 1
fi
if curl -sf "$BASE/health" >/dev/null; then
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo 0)
[ "$tables" = "1" ] && { echo "healthy + migrated after $(( $(date +%s) - start ))s"; exit 0; }
fi
[ "$(( $(date +%s) - start ))" -ge "$DEADLINE" ] && { echo "::error::platform not healthy in ${DEADLINE}s"; cat workspace-server/platform.log || true; exit 1; }
sleep 1
done
- name: Verify platform reachable from molecule-core-net
run: |
echo "Testing platform reachability from molecule-core-net container..."
docker run --rm --network molecule-core-net alpine:latest sh -c "wget -qO- http://${PLATFORM_URL#http://}/health" || echo "WARN: platform not reachable from molecule-core-net"
- name: Run local-provision lifecycle E2E (stub — REQUIRED)
run: bash tests/e2e/test_local_provision_lifecycle_e2e.sh
- name: Dump platform log on failure
if: failure()
run: cat workspace-server/platform.log || true
- name: Dump workspace container logs on failure
if: failure()
run: |
WS_NAME=$(docker ps --filter "name=ws-" --format '{{.Names}}' | head -1 || true)
if [ -n "$WS_NAME" ]; then
echo "=== Workspace container logs for $WS_NAME ==="
docker logs "$WS_NAME" 2>&1 | tail -n 80 || true
fi
- name: Stop platform
if: always()
run: |
[ -f workspace-server/platform.pid ] && kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
- name: Stop service containers
if: always()
run: |
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
# ===========================================================================
# ADVISORY — real claude-code image, lifecycle-only. Non-blocking. It pulls/
# builds the 2.5GB template image, makes a real (cheap) MiniMax LLM call, and is
# network-dependent, so a miss must not block. It proves the REAL runtime
# survives a restart AND serves a genuine LLM round-trip on the local
# provisioner (proxy-reach asserts a real MiniMax reply, not just reachability).
# ===========================================================================
# bp-exempt: advisory lane (continue-on-error: true) — informational, never a merge gate.
lifecycle-real:
name: Local Provision Lifecycle E2E (real image + MiniMax LLM, advisory)
runs-on: docker-host
# Serialise behind the gating stub job: both jobs share the same docker-host
# runner and provision sibling containers. `needs:` forces this advisory job
# to start only AFTER lifecycle-stub finishes, avoiding resource contention.
# (Dynamic ports eliminated the fixed-port race; serialisation remains for
# docker-host capacity hygiene.) continue-on-error keeps a real-job miss
# non-blocking; `needs:` does NOT gate on the stub's success (a failed
# required gate still lets this advisory dependent run).
needs: lifecycle-stub
if: ${{ always() }}
# Tracker for lint-continue-on-error-tracking (Tier 2e / internal#350): this
# mask has a forced 14-day renewal cycle. mc#2408 tracks promoting this
# advisory MiniMax round-trip to a gating job (then flip to false).
continue-on-error: true # mc#2408 — promote advisory MiniMax e2e to gating
timeout-minutes: 30
env:
PG_CONTAINER: pg-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
REDIS_CONTAINER: redis-lpe2e-real-${{ github.run_id }}-${{ github.run_attempt }}
# Hard-code dev mode at the job level so the platform server ALWAYS sees it,
# even if the runner's $GITHUB_ENV propagation is flaky (#2468 RCA).
MOLECULE_ENV: development
SECRETS_ENCRYPTION_KEY: lpe2e-test-encryption-key-32bytes!!
# act_runner runs the job inside a Docker container, so /.dockerenv exists
# and the platform auto-detects platformInDocker=true. But the job container
# is NOT on molecule-core-net, so it cannot resolve workspace container
# hostnames (ws-<id>:8000). Force false so the proxy keeps using the
# host-mapped 127.0.0.1:<ephemeral_port> URL, which IS reachable.
MOLECULE_IN_DOCKER: false
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version: 'stable'
cache: true
cache-dependency-path: workspace-server/go.sum
- name: Ensure provisioner network + pre-pull alpine
run: |
docker pull alpine:3 >/dev/null
docker network create molecule-core-net >/dev/null 2>&1 || true
- name: Start Postgres (docker, ephemeral host port)
run: |
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
docker run -d --name "$PG_CONTAINER" \
-e POSTGRES_USER=dev -e POSTGRES_PASSWORD=dev -e POSTGRES_DB=molecule \
-p 0:5432 postgres:16 >/dev/null
PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
[ -z "$PG_PORT" ] && PG_PORT=$(docker port "$PG_CONTAINER" 5432/tcp | head -1 | awk -F: '{print $NF}')
if [ -z "$PG_PORT" ]; then echo "::error::no host port"; docker logs "$PG_CONTAINER" || true; exit 1; fi
echo "DATABASE_URL=postgres://dev:dev@127.0.0.1:${PG_PORT}/molecule?sslmode=disable" >> "$GITHUB_ENV"
for i in $(seq 1 30); do
docker exec "$PG_CONTAINER" pg_isready -U dev >/dev/null 2>&1 && { echo "pg ready ${i}s"; exit 0; }
sleep 1
done
echo "::error::Postgres not ready"; docker logs "$PG_CONTAINER" || true; exit 1
- name: Start Redis (docker, ephemeral host port)
run: |
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
docker run -d --name "$REDIS_CONTAINER" -p 0:6379 redis:7 >/dev/null
REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | awk -F: '/^0\.0\.0\.0:/ {print $2; exit}')
[ -z "$REDIS_PORT" ] && REDIS_PORT=$(docker port "$REDIS_CONTAINER" 6379/tcp | head -1 | awk -F: '{print $NF}')
if [ -z "$REDIS_PORT" ]; then echo "::error::no host port"; docker logs "$REDIS_CONTAINER" || true; exit 1; fi
echo "REDIS_URL=redis://127.0.0.1:${REDIS_PORT}" >> "$GITHUB_ENV"
for i in $(seq 1 15); do
docker exec "$REDIS_CONTAINER" redis-cli ping 2>/dev/null | grep -q PONG && { echo "redis ready ${i}s"; exit 0; }
sleep 1
done
echo "::error::Redis not ready"; docker logs "$REDIS_CONTAINER" || true; exit 1
- name: Configure platform env
run: |
# Allocate an unused ephemeral port to avoid collision with concurrent
# jobs or stale processes from prior cancelled runs (see #2450).
PORT=$(python3 -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
echo "PORT=${PORT}" >> "$GITHUB_ENV"
echo "BASE=http://127.0.0.1:${PORT}" >> "$GITHUB_ENV"
# Discover an IP that Docker containers can use to reach the host platform.
# host.docker.internal is not reliably available on Linux (act_runner), so
# workspace containers cannot resolve it and fail to register/heartbeat.
# Workspace containers join molecule-core-net; the host is reachable via that
# network's gateway. Ensure the network exists first (the provisioner creates
# it lazily, but we need the gateway BEFORE starting the platform).
docker network inspect molecule-core-net >/dev/null 2>&1 || docker network create molecule-core-net >/dev/null
# Parse Gateway from raw JSON because --format '{{.IPAM.Config}}' is
# inconsistent across Docker versions (sometimes omits Gateway field).
PLATFORM_HOST_IP=$(docker network inspect molecule-core-net 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
if [ -z "$PLATFORM_HOST_IP" ]; then
PLATFORM_HOST_IP=$(docker network inspect bridge 2>/dev/null | sed -n 's/.*"Gateway": "\([^"]*\)".*/\1/p' | head -1)
fi
if [ -z "$PLATFORM_HOST_IP" ]; then
PLATFORM_HOST_IP=$(ip route | awk '/default/ {print $3}' | head -1 || true)
fi
if [ -z "$PLATFORM_HOST_IP" ]; then
echo "::error::Could not determine PLATFORM_HOST_IP for Docker containers to reach the platform"
exit 1
fi
echo "PLATFORM_HOST_IP=${PLATFORM_HOST_IP}"
echo "PLATFORM_URL=http://${PLATFORM_HOST_IP}:${PORT}" >> "$GITHUB_ENV"
T="lpe2e-real-admin-${{ github.run_id }}-${{ github.run_attempt }}"
echo "ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
echo "MOLECULE_ADMIN_TOKEN=${T}" >> "$GITHUB_ENV"
echo "MOLECULE_ENV=development" >> "$GITHUB_ENV"
echo "SECRETS_ENCRYPTION_KEY=lpe2e-test-encryption-key-32bytes!!" >> "$GITHUB_ENV"
- name: Build platform
working-directory: workspace-server
run: go build -o platform-server ./cmd/server
- name: Kill stale platform-server before start (issue #1046)
run: |
# Dynamic port allocation (see #2450) eliminates the fixed-port race.
# We still sweep by process name to avoid leaking platform-server
# processes on the shared runner.
killed=0
for pid in $(grep -l "platform-serve" /proc/[0-9]*/comm 2>/dev/null); do
kpid="${pid%/comm}"; kpid="${kpid##*/}"
cmdline=$(cat "/proc/${kpid}/cmdline" 2>/dev/null | tr '\0' ' ')
if echo "$cmdline" | grep -q "platform-server"; then
echo "Killing stale platform-server pid ${kpid}: ${cmdline}"
kill "$kpid" 2>/dev/null || true
killed=$((killed + 1))
fi
done
if [ "$killed" -gt 0 ]; then echo "Killed $killed stale platform-server process(es)."; else echo "No platform-server-named process found."; fi
sleep 1
- name: Start platform (background)
working-directory: workspace-server
run: |
echo "starting platform with PLATFORM_URL=${PLATFORM_URL:-<fallback>} PORT=$PORT BIND_ADDR=0.0.0.0"
PORT=$PORT BIND_ADDR=0.0.0.0 PLATFORM_URL="${PLATFORM_URL:-http://host.docker.internal:$PORT}" ./platform-server > platform.log 2>&1 &
echo $! > platform.pid
- name: Wait for /health (+ migrations applied)
run: |
DEADLINE=300; PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"; start=$(date +%s)
while :; do
# Verify OUR server is still alive before trusting /health. Our server
# binds the allocated port or exits FATAL, so checking our PID first
# stops a squatter from false-positiving the gate (no-flakes RCA).
if [ -n "$PID" ] && ! kill -0 "$PID" 2>/dev/null; then
echo "::error::platform-server exited early (failed to bind or crashed)"; cat workspace-server/platform.log || true; exit 1
fi
if curl -sf "$BASE/health" >/dev/null; then
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo 0)
[ "$tables" = "1" ] && { echo "healthy after $(( $(date +%s) - start ))s"; exit 0; }
fi
[ "$(( $(date +%s) - start ))" -ge "$DEADLINE" ] && { echo "::error::platform not healthy in ${DEADLINE}s"; cat workspace-server/platform.log || true; exit 1; }
sleep 1
done
- name: Verify platform reachable from molecule-core-net
run: |
echo "Testing platform reachability from molecule-core-net container..."
docker run --rm --network molecule-core-net alpine:latest sh -c "wget -qO- http://${PLATFORM_URL#http://}/health" || echo "WARN: platform not reachable from molecule-core-net"
- name: Run local-provision lifecycle E2E (real image + MiniMax LLM — ADVISORY)
env:
# LIFECYCLE_LLM=minimax: provision the REAL claude-code template image
# (the mode forces LIFECYCLE_PROVISIONER_BUILDS=1 — the provisioner
# clones + docker-builds the template from Gitea via RegistryModeLocal)
# with a real MiniMax BYOK credential, and assert an ACTUAL model reply
# at the proxy-reach step (a genuine round-trip through ws-<id>:8000).
# MiniMax is the cheapest LLM the platform offers; its `minimax`
# provider dials api.minimax.io directly, so no CP proxy env is needed.
#
# Key wiring (DO NOT hardcode): the script reads MINIMAX_API_KEY from
# the env; we feed it from the MOLECULE_STAGING_MINIMAX_API_KEY CI
# secret (the same secret the staging-smoke + e2e-api MiniMax arms use).
# When that secret is ABSENT, MINIMAX_API_KEY is empty and the script
# SKIPS loud (exit 0) — it never reds on a missing secret (serving-e2e
# skip-if-absent pattern). The advisory job stays green either way.
LIFECYCLE_LLM: minimax
MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
run: bash tests/e2e/test_local_provision_lifecycle_e2e.sh
- name: Dump platform log on failure
if: failure()
run: cat workspace-server/platform.log || true
- name: Dump workspace container logs on failure
if: failure()
run: |
WS_NAME=$(docker ps --filter "name=ws-" --format '{{.Names}}' | head -1 || true)
if [ -n "$WS_NAME" ]; then
echo "=== Workspace container logs for $WS_NAME ==="
docker logs "$WS_NAME" 2>&1 | tail -n 80 || true
fi
- name: Stop platform
if: always()
run: |
[ -f workspace-server/platform.pid ] && kill "$(cat workspace-server/platform.pid)" 2>/dev/null || true
- name: Stop service containers
if: always()
run: |
docker rm -f "$PG_CONTAINER" 2>/dev/null || true
docker rm -f "$REDIS_CONTAINER" 2>/dev/null || true
+2 -2
View File
@@ -95,10 +95,10 @@ jobs:
# included here — staging green is a separate gate
# (`feedback_staging_e2e_merge_gate`).
WATCH_BRANCH: 'main'
# Issue label applied on file/open. `ci-bp-drift` exists in the
# Issue label applied on file/open. `tier:high` exists in the
# molecule-core label set (verified 2026-05-11, label id 9).
# Rationale for high: main red blocks the promotion train and
# poisons every PR's auto-rebase base; treat as a fire even
# if intermittent.
RED_LABEL: 'ci-bp-drift'
RED_LABEL: 'tier:high'
run: python3 .gitea/scripts/main-red-watchdog.py
@@ -248,36 +248,16 @@ jobs:
--tag "${STAGING_TENANT_IMAGE_NAME}:${TAG_LATEST}"
)
# Retry loop: buildkit EOF (internal#2468) is often transient on the
# publish runner under memory pressure. Up to 3 attempts with a fresh
# builder each time so a crashed buildkit doesn't poison the next try.
for attempt in 1 2 3; do
echo "::notice::Tenant image build attempt ${attempt}/3 ..."
builder="tenant-builder-${GITHUB_RUN_ID}-${attempt}"
docker buildx create --name "${builder}" --use >/dev/null 2>&1 || true
if docker buildx build \
--builder "${builder}" \
--file ./workspace-server/Dockerfile.tenant \
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
--build-arg GIT_SHA="${GIT_SHA}" \
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
--label "org.opencontainers.image.revision=${GIT_SHA}" \
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
"${build_tags[@]}" \
--push .; then
docker buildx rm "${builder}" >/dev/null 2>&1 || true
echo "::notice::Tenant image build succeeded on attempt ${attempt}"
break
fi
echo "::warning::Tenant image build attempt ${attempt} failed — cleaning builder and retrying"
docker buildx rm "${builder}" >/dev/null 2>&1 || true
sleep 10
if [ "$attempt" -eq 3 ]; then
echo "::error::Tenant image build failed after 3 attempts"
exit 1
fi
done
docker buildx build \
--file ./workspace-server/Dockerfile.tenant \
--build-arg NEXT_PUBLIC_PLATFORM_URL= \
--build-arg GIT_SHA="${GIT_SHA}" \
--label "org.opencontainers.image.source=https://git.moleculesai.app/molecule-ai/${REPO}" \
--label "org.opencontainers.image.revision=${GIT_SHA}" \
--label "org.opencontainers.image.created=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--label "molecule.workflow.run_id=${GITHUB_RUN_ID}" \
"${build_tags[@]}" \
--push .
# bp-exempt: production deploy side-effect; merge is gated by CI / all-required and this job waits for push CI before acting.
deploy-production:
+18 -31
View File
@@ -7,25 +7,18 @@
#
# A1-α (refire mechanism):
# Triggers on:
# - `pull_request_target`: opened, synchronize, reopened, labeled, unlabeled
# → initial status posts when PR opens / re-pushes, and re-evaluates
# when labels change (e.g. risk-indicator labels).
# - `pull_request_target`: opened, synchronize, reopened
# → initial status posts when PR opens / re-pushes
# - `pull_request_review` types: [submitted]
# → re-evaluate when a team member submits an APPROVE review so
# the gate flips immediately (no wait for the next push or
# slash-command). Verified live: sop-checklist.yml uses this
# slash-command). Verified live: sop-tier-check.yml uses this
# same event and provably fires (produces
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
# The job-level `if:` does NOT guard on review.state (issue
# #2159): Gitea 1.22.6's payload shape for this event does not
# reliably expose the state field that the GitHub-style guard
# expects. The evaluator (review-check.sh) reads actual reviews
# from the API and checks for a real APPROVE, so running on
# COMMENT or REQUEST_CHANGES is harmless (read-only,
# idempotent). Branch-protection requires the
# `(pull_request_target)` context variant, so the review-event
# path EXPLICITLY POSTS the required context via the API. Trust
# boundary preserved (BASE ref, no PR-head).
# `sop-tier-check / tier-check (pull_request_review)` contexts).
# The job-level `if:` guard checks
# `github.event.review.state == 'APPROVED' || 'approved'` so
# only APPROVE reviews run the evaluator; COMMENT and
# REQUEST_CHANGES are skipped at the job level.
# Branch-protection requires the `(pull_request_target)`
# context variant, so the review-event path EXPLICITLY POSTS
# the required context via the API. Trust boundary preserved
@@ -60,7 +53,7 @@
#
# We MUST NOT use `github.event.comment.author_association` (the
# field doesn't exist on Gitea 1.22.6 webhook payload — this was
# 's defect #1).
# sop-tier-refire's defect #1).
#
# A4 (no PR-head checkout under pull_request_target):
# We check out the BASE ref explicitly so the review-check.sh script is
@@ -80,7 +73,7 @@
# also not in qa/security teams → also 403.
#
# Resolution: a dedicated `RFC_324_TEAM_READ_TOKEN` secret, owned by an
# identity that IS in both `qa` and `security` teams (Owners-level
# identity that IS in both `qa` and `security` teams (Owners-tier
# claude-ceo-assistant, or a new service-bot added to both teams).
# Provisioning of this secret is tracked as a follow-up issue (filed by
# core-devops at PR open).
@@ -103,7 +96,7 @@ name: qa-review
on:
pull_request_target:
types: [opened, synchronize, reopened, labeled, unlabeled]
types: [opened, synchronize, reopened]
pull_request_review:
types: [submitted]
@@ -117,19 +110,13 @@ jobs:
approved:
# Gate the job:
# - On pull_request_target events: always run.
# - On pull_request_review events: always run. We do NOT guard on
# review.state here because Gitea 1.22.6's payload shape for this
# event does not reliably expose the state field (issue #2159).
# The evaluator (review-check.sh) reads actual reviews from the
# API and checks for a real APPROVE, so running on COMMENT or
# REQUEST_CHANGES is harmless (read-only, idempotent).
# - On labeled/unlabeled events: re-evaluate when labels change.
# This ensures qa-review flips when risk-indicator labels are
# added or removed.
# - On pull_request_review_approved events: run so the gate flips
# immediately when a team member submits an APPROVE review.
# Comment-triggered refires live in sop-checklist.yml review-refire job.
if: |
github.event_name == 'pull_request_target' ||
github.event_name == 'pull_request_review'
(github.event_name == 'pull_request_review' &&
(github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
runs-on: ubuntu-latest
steps:
- name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
@@ -143,7 +130,7 @@ jobs:
# no comment.user.login so the step is a no-op skip there.
if: github.event_name == 'issue_comment'
env:
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
login="${{ github.event.comment.user.login }}"
@@ -175,7 +162,7 @@ jobs:
- name: Evaluate qa-review
id: eval
env:
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
# PR number lives in different places per event:
@@ -198,7 +185,7 @@ jobs:
# TOKEN FIX (RC 8326): uses STATUS_POST_TOKEN (CTO-granted,
# msg d52cc72a). Dedicated narrow-scoped write:repository token
# for the explicit status POST. Evaluator step stays on
# SOP_CHECKLIST_GATE_TOKEN (read-only) per deliberate security
# SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
# separation: eval computes, POST writes, never the same cred.
if: github.event_name == 'pull_request_review' && always()
env:
-19
View File
@@ -21,21 +21,15 @@ on:
branches: [main, staging]
paths:
- '.gitea/scripts/review-check.sh'
- '.gitea/scripts/_approval_validator.py'
- '.gitea/scripts/_review_check_filter.py'
- '.gitea/scripts/tests/test_review_check.sh'
- '.gitea/scripts/tests/_review_check_fixture.py'
- '.gitea/scripts/tests/test_approval_validator.py'
- '.gitea/workflows/review-check-tests.yml'
pull_request:
branches: [main, staging]
paths:
- '.gitea/scripts/review-check.sh'
- '.gitea/scripts/_approval_validator.py'
- '.gitea/scripts/_review_check_filter.py'
- '.gitea/scripts/tests/test_review_check.sh'
- '.gitea/scripts/tests/_review_check_fixture.py'
- '.gitea/scripts/tests/test_approval_validator.py'
- '.gitea/workflows/review-check-tests.yml'
workflow_dispatch:
@@ -76,16 +70,3 @@ jobs:
- name: Run review-check.sh regression suite
run: bash .gitea/scripts/tests/test_review_check.sh
- name: SSOT approval-validator unit tests (SEV-1 internal#812)
# The Python unit tests for _approval_validator.py are
# mutation-verified — every fail-closed branch has an explicit
# REJECT assertion. A reviewer who weakens the predicate trips
# these in CI.
run: |
# The test file lives in .gitea/scripts/tests/ with no __init__.py,
# so `unittest discover -s .gitea/scripts` finds 0 tests (the SEV-1
# suite silently never ran — a CI gap fixed alongside internal#812).
# Run the file directly; it self-inserts its sys.path and calls
# unittest.main(), so a failing assertion exits non-zero and fails CI.
python3 .gitea/scripts/tests/test_approval_validator.py -v
+16 -25
View File
@@ -10,23 +10,20 @@
# A1-α addendum (internal#760): review-event trigger added so the security
# gate flips immediately when a team member submits an APPROVE review.
# Uses `pull_request_review` types: [submitted] — verified live via
# sop-checklist.yml which provably fires this event (produces
# `sop-checklist / all-items-acked (pull_request_review)` contexts).
# The job-level `if:` does NOT guard on review.state (issue #2159):
# Gitea 1.22.6's payload shape for this event does not reliably expose
# the state field that the GitHub-style guard expects. The evaluator
# (review-check.sh) reads actual reviews from the API and checks for a
# real APPROVE, so running on COMMENT or REQUEST_CHANGES is harmless
# (read-only, idempotent). Branch-protection requires the
# `(pull_request_target)` context variant, so the review-event path
# EXPLICITLY POSTS the required context via the API. Trust boundary
# preserved (BASE ref, no PR-head).
# sop-tier-check.yml which provably fires this event (produces
# `sop-tier-check / tier-check (pull_request_review)` contexts).
# The job-level `if:` guard checks
# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
# the job level. Branch-protection requires the `(pull_request_target)`
# context variant, so the review-event path EXPLICITLY POSTS the required
# context via the API. Trust boundary preserved (BASE ref, no PR-head).
name: security-review
on:
pull_request_target:
types: [opened, synchronize, reopened, labeled, unlabeled]
types: [opened, synchronize, reopened]
pull_request_review:
types: [submitted]
@@ -40,19 +37,13 @@ jobs:
approved:
# Gate the job:
# - On pull_request_target events: always run.
# - On pull_request_review events: always run. We do NOT guard on
# review.state here because Gitea 1.22.6's payload shape for this
# event does not reliably expose the state field (issue #2159).
# The evaluator (review-check.sh) reads actual reviews from the
# API and checks for a real APPROVE, so running on COMMENT or
# REQUEST_CHANGES is harmless (read-only, idempotent).
# - On labeled/unlabeled events: re-evaluate when labels change.
# This ensures security-review flips when risk-indicator labels
# are added or removed.
# - On pull_request_review_approved events: run so the gate flips
# immediately when a team member submits an APPROVE review.
# Comment-triggered refires live in sop-checklist.yml review-refire job.
if: |
github.event_name == 'pull_request_target' ||
github.event_name == 'pull_request_review'
(github.event_name == 'pull_request_review' &&
(github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
runs-on: ubuntu-latest
steps:
- name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
@@ -61,7 +52,7 @@ jobs:
# so re-running on a non-collaborator comment is harmless.
if: github.event_name == 'issue_comment'
env:
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
login="${{ github.event.comment.user.login }}"
@@ -87,7 +78,7 @@ jobs:
- name: Evaluate security-review
id: eval
env:
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
@@ -107,7 +98,7 @@ jobs:
# TOKEN FIX (RC 8326): uses STATUS_POST_TOKEN (CTO-granted,
# msg d52cc72a). Dedicated narrow-scoped write:repository token
# for the explicit status POST. Evaluator step stays on
# SOP_CHECKLIST_GATE_TOKEN (read-only) per deliberate security
# SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
# separation: eval computes, POST writes, never the same cred.
if: github.event_name == 'pull_request_review' && always()
env:
+29 -6
View File
@@ -14,10 +14,10 @@
# Fix (PR #1345 / issue #1280):
# - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
# - all-items-acked job: pull_request_target OR sop slash-command comments
# - review-refire job: qa/security refire slash commands
# - review-refire job: qa/security/tier refire slash commands
# → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
#
# Trust boundary (mirrors RFC#324 §A4 + sop-checklist security note):
# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
# `pull_request_target` (not `pull_request`) — workflow def is loaded
# from BASE branch, so a PR cannot rewrite this workflow to exfiltrate
# the token. The `actions/checkout` step pins `ref: base.sha` so the
@@ -34,6 +34,14 @@
# via a repo secret `SOP_CHECKLIST_GATE_TOKEN`. Provisioning of that
# secret is a follow-up authorization step (separate from this PR).
#
# Failure mode: tier-aware (RFC#351 open question 2):
# - tier:high → state=failure (hard-fail; BP blocks merge)
# - tier:medium → state=failure (hard-fail; same)
# - tier:low → state=pending (soft-fail; BP can choose to require
# this context or skip for low-tier PRs)
# - missing/no-tier → state=failure (default-mode: hard — never lower
# the bar per feedback_fix_root_not_symptom)
#
# Slash-command contract (RFC#351 v1 + §A1.1-style notes from RFC#324):
#
# /sop-ack <slug-or-numeric-alias> [optional note]
@@ -53,7 +61,7 @@
# — declare a gate (qa-review, security-review) N/A.
# — see sop-checklist-config.yaml n/a_gates section.
#
# /qa-recheck /security-recheck
# /qa-recheck /security-recheck /refire-tier-check
# — refire the corresponding status check on the PR head.
#
# The eval is read-only + idempotent (read PR + comments + team
@@ -141,6 +149,7 @@ jobs:
{
echo "run_qa=false"
echo "run_security=false"
echo "run_tier=false"
} >> "$GITHUB_OUTPUT"
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
case "$first_line" in
@@ -150,6 +159,9 @@ jobs:
/security-recheck*)
echo "run_security=true" >> "$GITHUB_OUTPUT"
;;
/refire-tier-check*)
echo "run_tier=true" >> "$GITHUB_OUTPUT"
;;
*)
echo "::notice::no supported review refire slash command; no-op"
;;
@@ -158,7 +170,8 @@ jobs:
- name: Check out BASE ref for trusted scripts
if: |
steps.classify.outputs.run_qa == 'true' ||
steps.classify.outputs.run_security == 'true'
steps.classify.outputs.run_security == 'true' ||
steps.classify.outputs.run_tier == 'true'
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ github.event.repository.default_branch }}
@@ -167,7 +180,7 @@ jobs:
if: steps.classify.outputs.run_qa == 'true'
env:
# Evaluator (review-check.sh + GET /pulls) stays on read-scoped token.
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
# Explicit POST /statuses uses narrow-scoped write:repository token.
STATUS_POST_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
GITEA_HOST: git.moleculesai.app
@@ -186,7 +199,7 @@ jobs:
if: steps.classify.outputs.run_security == 'true'
env:
# Evaluator (review-check.sh + GET /pulls) stays on read-scoped token.
GITEA_TOKEN: ${{ secrets.SOP_CHECKLIST_GATE_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
# Explicit POST /statuses uses narrow-scoped write:repository token.
STATUS_POST_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
GITEA_HOST: git.moleculesai.app
@@ -200,3 +213,13 @@ jobs:
run: |
set -euo pipefail
.gitea/scripts/review-refire-status.sh
- name: Refire sop-tier-check status
if: steps.classify.outputs.run_tier == 'true'
env:
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.issue.number }}
SOP_DEBUG: '0'
run: bash .gitea/scripts/sop-tier-refire.sh
+162
View File
@@ -0,0 +1,162 @@
# sop-tier-check — canonical Gitea Actions workflow for §SOP-6 enforcement.
#
# Logic lives in `.gitea/scripts/sop-tier-check.sh` (extracted 2026-05-09
# from the previous inline-bash version). The script is the single source
# of truth; this workflow file just sets env + invokes it.
#
# Copy BOTH files (`.gitea/workflows/sop-tier-check.yml` +
# `.gitea/scripts/sop-tier-check.sh`) into any repo that wants the
# §SOP-6 PR gate enforced. Pair with branch protection on the protected
# branch:
# required_status_checks: ["sop-tier-check / tier-check (pull_request)"]
# required_approving_reviews: 1
# approving_review_teams: ["ceo", "managers", "engineers"]
#
# Tier → required-team expression (internal#189 AND-composition):
# tier:low → engineers,managers,ceo (OR: any one suffices)
# tier:medium → managers AND engineers AND qa???,security??? (AND: all required)
# tier:high → ceo (OR: single team, wired for AND)
#
# "???" = teams not yet created in Gitea. When qa + security teams are
# added, update TIER_EXPR["tier:medium"] in the script to remove the
# markers. PRs already in-flight when qa/security are created continue
# to work because their authors explicitly requested those reviews.
#
# Force-merge: Owners-team override remains available out-of-band via
# the Gitea merge API; force-merge writes `incident.force_merge` to
# `structure_events` per §Persistent structured logging gate (Phase 3).
#
# Environment variables:
# SOP_DEBUG=1 — per-API-call diagnostic lines. Default: off.
# SOP_LEGACY_CHECK=1 — revert to OR-gate for this run. Intended for
# emergency use only; burn-in window closed
# 2026-05-17 (internal#189 Phase 1).
#
# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
# window closed. As of 2026-06-04 the residual masks left behind by the
# burn-in are removed for real (the comment previously claimed this while
# the masks still persisted — that was stale):
# - continue-on-error: true on the jq-install step (redundant; the step
# already exits 0) and on the tier-check step (the burn-in mask).
# - the `|| true` after the sop-tier-check.sh invocation, which masked
# real tier-gate verdicts.
# AND-composition is now fully enforced and the tier-check step can
# honestly red CI on a real SOP-6 violation.
#
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
# REQUIRED branch-protected gate on `pull_request_target` (always
# same-repo, secrets always present — no fork/advisory split). Failing
# open on a token/network/jq fault greened the SOP-6 approval gate
# WITHOUT verifying approvals — a fail-open on a required context. The
# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
# the gate. If you ever need to temporarily re-introduce a mask, file a
# tracker and follow the mc#1982 protocol.
name: sop-tier-check
# SECURITY: triggers MUST use `pull_request_target`, not `pull_request`.
# `pull_request_target` loads the workflow definition from the BASE
# branch (i.e. `main`), not the PR's HEAD. With `pull_request`, anyone
# with write access to a feature branch could rewrite this file in
# their PR to dump SOP_TIER_CHECK_TOKEN (org-read scope) to logs and
# exfiltrate it. Verified 2026-05-09 against Gitea 1.22.6 —
# `pull_request_target` (added in Gitea 1.21 via go-gitea/gitea#25229)
# is the documented mitigation.
#
# This workflow does NOT call `actions/checkout` of PR HEAD code, so no
# untrusted code is ever executed in the runner — we only HTTP-call the
# Gitea API. If a future change adds a checkout step, it MUST pin to
# `${{ github.event.pull_request.base.sha }}` (NOT `head.sha`) to keep
# the trust boundary.
on:
pull_request_target:
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
pull_request_review:
types: [submitted, dismissed, edited]
concurrency:
group: ${{ github.repository }}-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
tier-check:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
secrets: read
steps:
- name: Check out base branch (for the script)
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
# Pin to base.sha — pull_request_target's protection only
# works if we never check out PR HEAD. Same SHA the workflow
# itself was loaded from.
ref: ${{ github.event.pull_request.base.sha }}
- name: Install jq
# Gitea Actions runners (ubuntu-latest label) do not bundle jq.
# The sop-tier-check script uses jq for all JSON API parsing.
# Install jq before the script runs so sop-tier-check can pass.
#
# Method: apt-get first (reliable for Ubuntu runners with internet
# access to package mirrors). Falls back to GitHub binary download.
# GitHub releases may be unreachable from some runner networks
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
# runners). The sop-tier-check script has its own fallback as a
# third line of defense, and this step's final command
# (`jq --version ... || echo`) already exits 0 unconditionally — so
# the step cannot fail the job on its own.
# continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
# and remove, do not renew). It was redundant masking, not a gate.
run: |
# apt-get is the primary method — Ubuntu package mirrors are reliably
# reachable from runner containers. GitHub releases may be blocked
# or slow on some networks (infra#241 follow-up).
if apt-get update -qq && apt-get install -y -qq jq; then
echo "::notice::jq installed via apt-get: $(jq --version)"
elif timeout 120 curl -sSL \
"https://github.com/jqlang/jq/releases/download/jq-1.7.1/jq-linux-amd64" \
-o /usr/local/bin/jq && chmod +x /usr/local/bin/jq; then
echo "::notice::jq binary downloaded: $(/usr/local/bin/jq --version)"
else
echo "::warning::jq install failed — apt-get and GitHub download both failed."
fi
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
- name: Verify tier label + reviewer team membership
# continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
# burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
# remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
# (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
# faults too (see the env block below), not just on a real verdict.
env:
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
SOP_DEBUG: '0'
SOP_LEGACY_CHECK: '0'
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
#
# This is the REQUIRED branch-protected gate
# `sop-tier-check / tier-check (pull_request)`. It runs on
# `pull_request_target`, which ALWAYS executes from the base
# branch WITH secrets present — there is NO fork/advisory split
# and no legitimate "secrets genuinely absent" degradation here.
#
# SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
# token, an unreachable Gitea API, or missing jq — i.e. an AUTH
# FAILURE or unreachable-dependency would green the SOP-6
# approval gate WITHOUT verifying that the required teams
# actually approved. That is a fail-open on a required gate: a
# mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
# merge past the approval requirement.
#
# Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
# sop-tier-check.sh → every guarded `exit 0` branch instead falls
# through to `exit 1`. Infra faults (bad token / API down / no
# jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
# SOP-6 violation. Fix the token/runner, not the gate.
run: |
bash .gitea/scripts/sop-tier-check.sh
+52
View File
@@ -0,0 +1,52 @@
# sop-tier-refire — manual fallback for sop-tier-check refire.
#
# Closes internal#292. Gitea 1.22.6 doesn't refire workflows on the
# `pull_request_review` event (go-gitea/gitea#33700); the `sop-tier-check`
# workflow's review-event subscription is silently dead. The result:
# PRs that get their approving review AFTER the tier-check ran on open/
# synchronize keep their failing status check forever, and the only way
# to merge is the admin force-merge path (audited via `audit-force-merge`
# but the audit trail keeps growing; see `feedback_never_admin_merge_bypass`).
#
# Comment-triggered refires now live in `review-refire-comments.yml`. Gitea
# queues issue_comment workflows before evaluating job-level `if:`, so having
# qa-review, security-review, sop-checklist, and sop-tier-refire all subscribe
# to every comment caused queue storms on SOP-heavy PRs. This workflow is a
# non-automatic breadcrumb only; Gitea 1.22.6 does not support
# workflow_dispatch inputs, so real refires must use `/refire-tier-check`.
#
# SECURITY MODEL:
#
# 1. `pull_request` exists on the issue (issue_comment fires on issues
# AND PRs; we only want PRs).
# 2. `comment.author_association` must be MEMBER/OWNER/COLLABORATOR.
# Per the internal#292 core-security review (review#1066 ask): anyone
# can comment, but only repo collaborators+ can flip the status.
# Without this gate, a drive-by commenter on a public-issue-tracker
# surface could trigger a status flip.
# 3. Comment body must contain `/refire-tier-check` — a slash-command-
# shaped trigger (not just any comment word). Prevents accidental
# triggering from prose like "we should refire tests" in a review.
# 4. This workflow does NOT check out PR HEAD code. Like sop-tier-check,
# it only HTTP-calls the Gitea API. Trust boundary preserved.
#
# Note: `issue_comment` fires from the BASE branch's workflow file. There
# is no `pull_request_target` equivalent to set; the trigger inherently
# loads the workflow from the default branch.
#
# Rate-limit: a 1s pre-sleep + a "skip if status posted in last 30s"
# guard prevents comment-spam from thrashing the status. See the script.
name: sop-tier-check refire (manual)
on:
workflow_dispatch:
jobs:
refire:
runs-on: ubuntu-latest
steps:
- name: Explain supported refire path
run: |
echo "::error::Gitea 1.22.6 does not support workflow_dispatch inputs here; comment /refire-tier-check on the PR instead."
exit 1
+3 -3
View File
@@ -112,9 +112,9 @@ jobs:
E2E_RUNTIME: claude-code
# Pin the smoke to a specific MiniMax model rather than relying
# on the per-runtime default (which could resolve to "sonnet" →
# direct Anthropic and defeat the cost saving). MiniMax-M2.7 is the
# stable staging MiniMax path used by the full-SaaS smoke (#1997).
E2E_MODEL_SLUG: MiniMax-M2.7
# direct Anthropic and defeat the cost saving). MiniMax-M2 is the
# stable staging MiniMax path used by the full-SaaS smoke.
E2E_MODEL_SLUG: MiniMax-M2
E2E_RUN_ID: "smoke-${{ github.run_id }}"
# Debug-only: when an operator dispatches with keep_on_failure=true,
# the smoke script's E2E_KEEP_ORG=1 path skips teardown so the
+4 -7
View File
@@ -34,10 +34,8 @@ name: Sweep stale Cloudflare DNS records
# scripts/ops/test_sweep_cf_decide.py (#2027) cover the rule
# classifier.
#
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
# (operator-host canonical name) are accepted — the workflow falls back
# automatically. Same for CF_ZONE_ID / CLOUDFLARE_ZONE_ID. Confirmed
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
# Secrets: CF_API_TOKEN, CF_ZONE_ID, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
# are confirmed existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
@@ -81,8 +79,8 @@ jobs:
# each individually capped at 10s by the script's curl -m flag.
timeout-minutes: 3
env:
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID || secrets.CLOUDFLARE_ZONE_ID }}
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
CF_ZONE_ID: ${{ secrets.CF_ZONE_ID }}
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
@@ -131,7 +129,6 @@ jobs:
fi
echo "::error::sweep cannot run — required secrets missing: ${missing[*]}"
echo "::error::set them at Settings → Secrets and Variables → Actions, or disable this workflow."
echo "::error::Cloudflare secrets accept either the CI-scoped name (CF_API_TOKEN / CF_ZONE_ID) or the operator-host canonical name (CLOUDFLARE_API_TOKEN / CLOUDFLARE_ZONE_ID)."
echo "::error::a silent skip masked an active CF DNS leak (152/200 zone records) caught only by a manual audit on 2026-04-28; this gate exists to make the gap visible."
exit 1
fi
+6 -8
View File
@@ -29,12 +29,10 @@ name: Sweep stale Cloudflare Tunnels
# the DNS sweep's 50% because tenant-shaped tunnels are mostly
# orphans by design) refuses to nuke past the threshold.
#
# Secrets: CF_API_TOKEN (preferred CI-scoped name) or CLOUDFLARE_API_TOKEN
# (operator-host canonical name) are accepted — the workflow falls back
# automatically. Same for CF_ACCOUNT_ID / CLOUDFLARE_ACCOUNT_ID. Confirmed
# existing per issue #425 §425 audit. CP_ADMIN_API_TOKEN and
# CP_STAGING_ADMIN_API_TOKEN are unconfirmed — if missing, the verify step
# (schedule → hard-fail, dispatch → soft-skip) surfaces it clearly.
# Secrets: CF_API_TOKEN, CF_ACCOUNT_ID are confirmed existing per
# issue #425 §425 audit. CP_ADMIN_API_TOKEN and CP_STAGING_ADMIN_API_TOKEN
# are unconfirmed — if missing, the verify step (schedule → hard-fail,
# dispatch → soft-skip) surfaces it clearly.
on:
schedule:
@@ -76,8 +74,8 @@ jobs:
# the sweep-cf-orphans companion job).
timeout-minutes: 30
env:
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN || secrets.CLOUDFLARE_API_TOKEN }}
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID || secrets.CLOUDFLARE_ACCOUNT_ID }}
CF_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
CF_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
CP_ADMIN_API_TOKEN: ${{ secrets.CP_ADMIN_API_TOKEN }}
CP_STAGING_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
MAX_DELETE_PCT: ${{ github.event.inputs.max_delete_pct || '90' }}
+9 -38
View File
@@ -58,51 +58,22 @@ jobs:
python-version: '3.11'
- name: Install .gitea script test dependencies
run: python -m pip install --quiet 'pytest==9.0.2' 'PyYAML==6.0.2'
- name: Run scripts/ unittests (fail-closed on 0 collected)
- name: Run scripts/ unittests, if any
# Top-level scripts/ tests live alongside their target file. The
# runtime packaging tests moved to molecule-ai-workspace-runtime, so
# this pass may legitimately find NO test files today.
#
# Gate-integrity fix: the previous guard keyed off `rc==5` to detect
# "no tests collected", but Python 3.12's unittest exits 0 (not 5)
# when discovery finds 0 tests ("NO TESTS RAN"). The guard therefore
# never fired, so any test_*.py added here would silently run 0 tests
# while this step stayed GREEN. A green step that runs 0 tests is
# worse than a red one. We now fail-closed:
# - genuinely NO test_*.py present -> loud SKIP (legitimate no-op)
# - test_*.py present but 0 collected -> FAIL (broken import/empty)
# this pass may legitimately find no tests.
working-directory: scripts
run: |
set -euo pipefail
# Non-recursive count: scripts/ has no __init__.py, so unittest
# discover does not recurse into subdirs (ops/ is run separately
# below) — top-level files are the entire discovery scope here.
nfiles=$(find . -maxdepth 1 -name 'test_*.py' | wc -l | tr -d ' ')
if [ "$nfiles" -eq 0 ]; then
echo "SKIP: no top-level scripts/ test_*.py files present (genuine no-op)."
set +e
python -m unittest discover -t . -p 'test_*.py' -v
rc=$?
if [ "$rc" -eq 5 ]; then
echo "No top-level scripts/ unittest files found; skipping."
exit 0
fi
echo "Found $nfiles top-level scripts/ test_*.py file(s); asserting they collect >0 tests."
ncollected=$(python -c "import unittest; print(unittest.TestLoader().discover('.', pattern='test_*.py', top_level_dir='.').countTestCases())")
echo "Collected $ncollected test case(s)."
if [ "$ncollected" -eq 0 ]; then
echo "FAIL: test_*.py file(s) present but 0 tests collected (broken import / empty file / discovery error)."
exit 1
fi
python -m unittest discover -t . -p 'test_*.py' -v
exit "$rc"
- name: Run scripts/ops/ unittests (sweep_cf_decide, ...)
# Real gate: scripts/ops/ must always run tests. Assert >0 collected so
# deleting all test files (or breaking an import) can't pass GREEN by
# running 0 tests — same gate-integrity class as the scripts/ step.
working-directory: scripts/ops
run: |
set -euo pipefail
ncollected=$(python -c "import unittest; print(unittest.TestLoader().discover('.', pattern='test_*.py').countTestCases())")
echo "scripts/ops/ collected $ncollected test case(s)."
if [ "$ncollected" -eq 0 ]; then
echo "FAIL: scripts/ops/ collected 0 tests — this gate must run real tests (deleted/broken import?)."
exit 1
fi
python -m unittest discover -p 'test_*.py' -v
run: python -m unittest discover -p 'test_*.py' -v
- name: Run .gitea/scripts pytest suite
run: python -m pytest .gitea/scripts/tests -q
-67
View File
@@ -1,67 +0,0 @@
# umbrella-reaper — auto-recovery for stale CI umbrella statuses on open PRs.
#
# Tracking: molecule-core#1780.
#
# Problem: when `CI / all-required (pull_request)` reports failure due to
# a propagation/timing race despite all required sub-jobs being success,
# branch protection blocks the merge. Operators currently recover manually
# per docs/runbooks/ci-umbrella-stale-compensating-status.md.
#
# This workflow automates that recovery: it scans open PRs and posts a
# compensating success status when the umbrella is stale but all sub-jobs
# are verified green.
#
# Trust boundary: the script only reads PR lists + statuses and POSTs to
# /statuses/{sha}. It never checks out PR HEAD code. The Gitea token has
# write:repository scope for statuses only.
#
# Sibling: .gitea/workflows/status-reaper.yml (default-branch push-suffix
# compensation). Same persona provisioning model.
name: umbrella-reaper
# IMPORTANT — Schedule moved to operator-config:
# /etc/cron.d/molecule-core-umbrella-reaper ->
# /usr/local/bin/molecule-core-cron-bot.sh umbrella-reaper
#
# This keeps the compensation cadence but stops a maintenance bot from
# consuming Gitea Actions runner slots during PR merge waves.
# Gitea 1.22.6 parser quirk per
# `feedback_gitea_workflow_dispatch_inputs_unsupported`: do NOT add an
# `inputs:` block here. Gitea 1.22.6 rejects the whole workflow as
# "unknown on type" when `workflow_dispatch.inputs.X` is present.
on:
workflow_dispatch:
permissions:
contents: read
# NOTE: NO `concurrency:` block is intentional — same reasoning as
# status-reaper.yml. Gitea 1.22.6 doesn't honor cancel-in-progress for
# queued ticks; the POST is idempotent so concurrent ticks are safe.
jobs:
reap:
runs-on: ubuntu-latest
timeout-minutes: 8
steps:
- name: Check out repo at default-branch HEAD
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
with:
ref: ${{ github.event.repository.default_branch }}
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065
with:
python-version: '3.12'
- name: Install PyYAML
run: python -m pip install --quiet 'PyYAML==6.0.2'
- name: Compensate stale PR umbrella statuses
env:
GITEA_TOKEN: ${{ secrets.UMBRELLA_REAPER_TOKEN }}
GITEA_HOST: git.moleculesai.app
REPO: ${{ github.repository }}
PR_LIMIT: "50"
run: python3 .gitea/scripts/umbrella-reaper.py
+1 -1
View File
@@ -26,7 +26,7 @@ name: verify-providers-gen
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
# from branch protection (turning it into a hard merge gate has blast radius
# — operator GO required, same pattern as sop-checklist / verify-providers-gen
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
# on controlplane). Promote it into branch protection in a follow-up once
# P2 has soaked.
# Until then it behaves like secret-scan / block-internal-paths: a standalone
+1 -11
View File
@@ -4,7 +4,7 @@
# use this Makefile; CI calls docker compose / go test directly so the
# Makefile can evolve without breaking the build.
.PHONY: help dev up down logs build test e2e-peer-visibility e2e-concierge-creates-workspace openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
# ─── Provider-registry SSOT codegen (internal#718) ─────────────────────
# The Go module lives in workspace-server/. The checked-in artifact
@@ -57,16 +57,6 @@ test: ## Run Go unit tests in workspace-server/.
e2e-peer-visibility: ## Run the LOCAL peer-visibility MCP gate vs the running stack (needs `make up` first).
bash tests/e2e/test_peer_visibility_mcp_local.sh
# FUNCTIONAL local proof that the org concierge actually DOES org-management:
# send it a natural-language A2A request and assert it really CREATES a workspace
# via its platform MCP (create_workspace) — the deterministic side effect, not a
# REST 200. SKIPs LOUD (exit 0) unless the local concierge is seeded, online, and
# running on the platform-agent image (so create_workspace exists). To run it
# green locally: seed the concierge (MOLECULE_SEED_PLATFORM_AGENT=1) on the
# platform-agent image WITH a model key. See the script header for the contract.
e2e-concierge-creates-workspace: ## Prove the concierge actually creates a workspace via its platform MCP (skips loud if not runnable).
bash tests/e2e/test_concierge_creates_workspace_local.sh
# ─── OpenAPI spec generation (RFC #1706, Phase 1) ─────────────────────
# Regenerate workspace-server/docs/openapi/swagger.{yaml,json} from
# swaggo annotations on the gin handlers. Commit the output. CI runs
-10
View File
@@ -1,14 +1,7 @@
import { test, expect } from "@playwright/test";
import type { Page } from "@playwright/test";
import { startEchoRuntime } from "./fixtures/echo-runtime";
import { seedWorkspace, startHeartbeat, cleanupWorkspace } from "./fixtures/chat-seed";
/** Enter the Org-map view so the Canvas (React Flow graph) mounts. */
async function enterMapView(page: Page): Promise<void> {
const btn = page.getByTestId("nav-map");
await expect(btn, "rail button nav-map missing").toBeVisible({ timeout: 10_000 });
await btn.click();
}
test.describe("Desktop ChatTab", () => {
let cleanup: () => Promise<void> = async () => {};
@@ -36,7 +29,6 @@ test.describe("Desktop ChatTab", () => {
test.beforeEach(async ({ page }) => {
await page.setViewportSize({ width: 1280, height: 800 });
await page.goto("/");
await enterMapView(page);
await page.waitForSelector(".react-flow__node", { timeout: 10_000 });
// Dismiss onboarding guide if present.
const skipGuide = page.getByText("Skip guide");
@@ -75,7 +67,6 @@ test.describe("Desktop ChatTab", () => {
await expect(page.getByText("Echo: Persistence test")).toBeVisible({ timeout: 15_000 });
await page.reload();
await enterMapView(page);
await page.waitForSelector(".react-flow__node", { timeout: 10_000 });
await page.getByText(workspaceName, { exact: true }).first().click();
await page.locator('#tab-chat').click();
@@ -152,7 +143,6 @@ test.describe("Desktop ChatTab — Markdown rendering", () => {
test.beforeEach(async ({ page }) => {
await page.setViewportSize({ width: 1280, height: 800 });
await page.goto("/");
await enterMapView(page);
await page.waitForSelector(".react-flow__node", { timeout: 10_000 });
const skipGuide2 = page.getByText("Skip guide");
if (await skipGuide2.isVisible().catch(() => false)) {
+1 -5
View File
@@ -27,13 +27,9 @@ export async function seedWorkspace(echoURL: string): Promise<SeededWorkspace> {
// 1. Create external workspace pointing at the in-process echo runtime.
const runId = Math.random().toString(36).slice(2, 8);
const wsName = `Chat E2E Agent ${runId}`;
const adminToken = process.env.E2E_ADMIN_TOKEN ?? process.env.ADMIN_TOKEN;
const createRes = await fetch(`${PLATFORM_URL}/workspaces`, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(adminToken ? { Authorization: `Bearer ${adminToken}` } : {}),
},
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
name: wsName,
tier: 1,
-648
View File
@@ -1,648 +0,0 @@
/**
* Staging concierge canvas E2E exercises the platform-agent CONCIERGE shell
* (canvas/src/components/concierge/ConciergeShell.tsx and the Settings split)
* against a fresh staging org provisioned by the shared global setup
* (e2e/staging-setup.ts). Each `test.describe` covers ONE concierge function
* and asserts the behaviour works not merely that an element exists.
*
* Why this is a SEPARATE spec from staging-tabs.spec.ts (which drives the
* Org-map SidePanel tab UI): the two assert different surfaces of the same
* tenant. Both reuse the EXACT shared harness same global setup (one
* provisioned org/workspace), same Playwright staging config (matched by the
* `staging-*.spec.ts` testMatch), same gated `Canvas tabs E2E` workflow check.
* No new harness, no new seeding mechanism.
*
* One extra precondition this spec needs that staging-tabs does NOT: a
* kind='platform' concierge ROW. The CI/SaaS tenant does not self-seed one
* (MOLECULE_SEED_PLATFORM_AGENT is unset on CI workspace-server
* cmd/server/main.go), so without it the concierge shell falls back to
* roots[0] as a *pseudo*-platform surface and the platform-specific
* behaviours (root tag, hidden-from-map) can't be asserted. So this spec
* installs one via the SAME admin endpoint the control plane uses at
* org-provision time POST /admin/org/platform-agent (AdminAuth, accepts the
* per-tenant admin bearer that global setup already exports). Installing it
* re-parents the provisioned hermes workspace UNDER the platform agent
* (handlers/platform_agent.go installPlatformAgent), giving us a real
* platform ROOT + a real child workspace exactly the topology the concierge
* Home tree and Org-map filter are built to handle.
*
* This install mutates the shared tenant (re-parents the workspace). It is the
* LAST staging spec alphabetically among the topology-touching ones, and
* staging-tabs / staging-display read the workspace by id (not by root-ness),
* so the re-parent does not break them; Playwright runs workers=1 in file
* order, and the install is idempotent.
*
* Auth model is identical to staging-tabs.spec.ts: feed the per-tenant admin
* token as an Authorization: Bearer header on every browser request, mock
* /cp/auth/me so AuthGate resolves, and fall any non-auth 401 back to an
* empty 200 so a workspace-scoped 401 can't yank us to AuthKit.
*/
import { test, expect, type Page, type BrowserContext } from "@playwright/test";
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
// Fail-closed, not skip-green (mirrors staging-tabs.spec.ts): a staging run
// that was REQUESTED (CANVAS_E2E_STAGING=1) but has no tenant state is a
// provisioning failure, asserted loudly inside the test body — not a skip.
// CANVAS_E2E_STAGING unset = operator did not request staging = clean skip.
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — staging-only suite, not requested");
/** Resolve + validate the tenant handoff that global setup exported. */
function tenantEnv() {
const tenantURL = process.env.STAGING_TENANT_URL;
const tenantToken = process.env.STAGING_TENANT_TOKEN;
const workspaceId = process.env.STAGING_WORKSPACE_ID;
const orgID = process.env.STAGING_ORG_ID;
if (!tenantURL || !tenantToken || !workspaceId) {
throw new Error(
"staging-setup.ts did not export STAGING_TENANT_URL / " +
"STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID. CANVAS_E2E_STAGING=1 was " +
"set (staging WAS requested) but global setup produced no tenant — a " +
"provisioning failure, NOT a reason to skip. See the [staging-setup] " +
"log above.",
);
}
return { tenantURL, tenantToken, workspaceId, orgID };
}
// A fixed, valid uuid for the installed platform agent. Any valid uuid works
// (the install upserts on this id); reusing one constant keeps re-runs
// idempotent on the same row. Chosen out of the e2e namespace so it can't
// collide with a CP-derived org id.
const PLATFORM_AGENT_ID = "e2e0c1e2-0000-4000-a000-000000c0ce0e";
const PLATFORM_AGENT_NAME = "E2E Concierge";
/**
* Idempotently install the platform-agent (concierge) row on the shared
* tenant so the concierge shell resolves a REAL kind='platform' root. Uses
* the per-tenant admin bearer + org-id headers, same as staging-display.spec.
* Tolerant of a pre-existing install (the endpoint is idempotent) and of a
* backend that predates the endpoint (404/405) in that degraded case the
* spec proceeds against the roots[0] fallback and the two platform-specific
* assertions self-document why they're loosened.
*/
async function installPlatformAgent(
page: Page,
tenantURL: string,
tenantToken: string,
orgID: string | undefined,
): Promise<{ installed: boolean }> {
const headers: Record<string, string> = {
Authorization: `Bearer ${tenantToken}`,
"Content-Type": "application/json",
};
if (orgID) headers["X-Molecule-Org-Id"] = orgID;
const resp = await page.request.post(`${tenantURL}/admin/org/platform-agent`, {
headers,
data: { id: PLATFORM_AGENT_ID, name: PLATFORM_AGENT_NAME },
});
const status = resp.status();
if (status >= 200 && status < 300) {
console.log(`[staging-concierge] platform agent installed (HTTP ${status})`);
return { installed: true };
}
// Endpoint absent on an older backend — proceed against the fallback root.
if (status === 404 || status === 405) {
console.warn(
`[staging-concierge] POST /admin/org/platform-agent returned ${status}` +
`backend predates the platform-agent endpoint. Proceeding against the ` +
`roots[0] concierge fallback; the platform-root / map-hidden assertions ` +
`are loosened accordingly.`,
);
return { installed: false };
}
throw new Error(
`POST /admin/org/platform-agent ${status}: ${await resp.text().catch(() => "")}`,
);
}
/**
* Wire the per-tenant bearer + the /cp/auth/me mock + the 401empty-200
* fallback. Verbatim contract from staging-tabs.spec.ts so the concierge spec
* authenticates identically (no WorkOS session available to Playwright).
*/
async function authenticate(
context: BrowserContext,
tenantToken: string,
workspaceId: string,
): Promise<void> {
await context.setExtraHTTPHeaders({ Authorization: `Bearer ${tenantToken}` });
await context.route("**/cp/auth/me", (route) =>
route.fulfill({
status: 200,
contentType: "application/json",
body: JSON.stringify({
user_id: `e2e-test-user-${workspaceId}`,
org_id: "e2e-test-org",
email: "e2e@test.local",
}),
}),
);
await context.route("**", async (route, request) => {
if (request.resourceType() !== "fetch") return route.fallback();
if (request.url().includes("/cp/auth/me")) return route.fallback();
let resp;
try {
resp = await route.fetch();
} catch {
return route.fallback();
}
if (resp.status() !== 401) return route.fulfill({ response: resp });
const lastSeg =
new URL(request.url()).pathname.split("/").filter(Boolean).pop() || "";
const looksLikeList = !/^[0-9a-f-]{8,}$/.test(lastSeg);
await route.fulfill({
status: 200,
contentType: "application/json",
body: looksLikeList ? "[]" : "{}",
});
});
}
/**
* Load the concierge shell and wait for hydration. Returns once the icon rail
* (the concierge's left nav) is visible — the rail is the shell's outermost
* stable landmark and only renders after the canvas store has hydrated.
*/
async function loadConcierge(page: Page, tenantURL: string): Promise<void> {
page.on("console", (msg) => {
if (msg.type() === "error") console.log(`[e2e/console-error] ${msg.text()}`);
});
await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
// The canvas store hydrates /workspaces before the desktop shell paints.
// Wait for the concierge nav rail OR the hydration-error banner — whichever
// wins. Don't wait on networkidle: the shell keeps a WS + polling open.
await page.waitForSelector(
'[data-testid="nav-home"], [data-testid="hydration-error"]',
{ timeout: 45_000 },
);
const hydrationErr = await page
.locator('[data-testid="hydration-error"]')
.count();
expect(
hydrationErr,
"canvas hydration failed — check staging CP + tenant reachability",
).toBe(0);
await expect(
page.getByText("Something went wrong", { exact: false }),
"app-level ErrorBoundary tripped during concierge hydration",
).toHaveCount(0);
}
/** Switch the concierge top-level view via the left rail. */
async function navTo(page: Page, view: "home" | "map" | "settings"): Promise<void> {
const btn = page.getByTestId(`nav-${view}`);
await expect(btn, `rail button nav-${view} missing`).toBeVisible({ timeout: 10_000 });
await btn.click();
}
// ── shared per-spec setup ──────────────────────────────────────────────────
// Each test gets a freshly-authenticated context + an installed platform
// agent. Install lives in beforeEach (idempotent) so any single test can run
// in isolation (`--grep`), not only in whole-file order.
let platformInstalled = false;
test.beforeEach(async ({ page, context }) => {
const { tenantURL, tenantToken, workspaceId, orgID } = tenantEnv();
await authenticate(context, tenantToken, workspaceId);
const { installed } = await installPlatformAgent(page, tenantURL, tenantToken, orgID);
platformInstalled = installed;
});
/* ───────────────────────── 1. Concierge shell / nav ──────────────────────── */
test.describe("concierge shell + nav", () => {
test("left rail switches Home / Org map / Settings; topbar shows the org name", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
// All three rail destinations are present.
for (const v of ["home", "map", "settings"] as const) {
await expect(page.getByTestId(`nav-${v}`)).toBeVisible();
}
// Topbar org name is dynamic from GET /org/identity. The endpoint returns
// MOLECULE_ORG_NAME (may be "" on a staging tenant), in which case the
// shell falls back to "Molecule AI". Either way it must render a
// non-empty name — assert the element resolves to real text.
const orgName = page.getByTestId("topbar-org-name");
await expect(orgName).toBeVisible();
await expect
.poll(async () => ((await orgName.innerText()) || "").trim().length, {
message: "topbar org name never resolved to non-empty text",
timeout: 10_000,
})
.toBeGreaterThan(0);
// Nav actually switches the active view. Home → Settings → Map → Home,
// asserting the destination rail button reflects active state each hop
// (the shell toggles the active class; we assert the view content too).
await navTo(page, "settings");
await expect(page.getByRole("heading", { name: "Settings" })).toBeVisible({
timeout: 10_000,
});
await navTo(page, "map");
await expect(page.locator('[aria-label="Agent canvas"]')).toBeVisible({
timeout: 15_000,
});
await navTo(page, "home");
// Home shows the agents/tasks/approvals sub-tab bar.
await expect(page.getByTestId("home-subtab-agents")).toBeVisible({
timeout: 10_000,
});
});
});
/* ─────────────────────────────── 2. Home ─────────────────────────────────── */
test.describe("concierge Home", () => {
test("renders the canonical ChatTab, Agents/Tasks/Approvals sub-tabs, and the platform agent as ROOT", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
await navTo(page, "home");
// (a) The Home chat panel reuses the EXACT canonical ChatTab — so it must
// expose the My Chat / Agent Comms sub-tabs, a message input, and the
// attachment affordance, exactly like the map SidePanel chat. The
// [data-testid="chat-panel"] root is ChatTab's own marker (canvas/src/
// components/tabs/ChatTab.tsx) — asserting it proves the canonical
// component is mounted, not a bespoke concierge re-implementation.
const chatPanel = page.getByTestId("chat-panel");
await expect(chatPanel, "Home did not mount the canonical ChatTab").toBeVisible({
timeout: 15_000,
});
await expect(chatPanel.locator("#chat-tab-my-chat")).toHaveText(/My Chat/);
await expect(chatPanel.locator("#chat-tab-agent-comms")).toHaveText(/Agent Comms/);
// Switching the chat sub-tab works (My Chat active by default → Agent Comms).
await chatPanel.locator("#chat-tab-agent-comms").click();
await expect(chatPanel.locator("#chat-tab-agent-comms")).toHaveAttribute(
"aria-selected",
"true",
);
await chatPanel.locator("#chat-tab-my-chat").click();
await expect(chatPanel.locator("#chat-tab-my-chat")).toHaveAttribute(
"aria-selected",
"true",
);
// Message input + attachment affordance (My Chat panel). The attach
// control is the labelled button (the underlying <input type=file> is
// aria-hidden); both are always present (disabled when the agent is
// unreachable), so assert presence, not enabled-state.
await expect(
chatPanel.locator('textarea[aria-label="Message to agent"]'),
"ChatTab message input missing",
).toHaveCount(1);
await expect(
chatPanel.locator('button[aria-label="Attach file"]'),
"ChatTab attachment affordance missing",
).toHaveCount(1);
// (b) Agents / Tasks / Approvals sub-tabs switch the Home sidebar pane.
await page.getByTestId("home-subtab-tasks").click();
await expect(page.getByTestId("home-subtab-tasks")).toHaveClass(/active/);
await page.getByTestId("home-subtab-approvals").click();
await expect(page.getByTestId("home-subtab-approvals")).toHaveClass(/active/);
await page.getByTestId("home-subtab-agents").click();
await expect(page.getByTestId("home-subtab-agents")).toHaveClass(/active/);
// (c) The agent tree shows the platform agent as ROOT. After install the
// platform agent is a kind='platform' root carrying the "root" tag, with
// the provisioned workspace re-parented under it (depth>0). When the
// backend predates the install endpoint, roots[0] is the pseudo-root and
// the "root" tag is absent (it only renders for a real kind='platform'
// root) — so we gate the strong assertion on a successful install.
const tree = page.getByTestId("agent-tree-node");
await expect(tree.first(), "agent tree rendered no nodes").toBeVisible({
timeout: 10_000,
});
if (platformInstalled) {
// The depth-0 node is the platform agent and it carries the root tag.
const rootNode = page
.locator('[data-testid="agent-tree-node"][data-depth="0"]')
.first();
await expect(rootNode).toHaveAttribute("data-platform", "true");
await expect(
rootNode.locator('[data-testid="agent-tree-root-tag"]'),
"platform root is missing the ROOT tag",
).toBeVisible();
// And the provisioned workspace is nested beneath it (a child node exists).
await expect(
page.locator('[data-testid="agent-tree-node"][data-depth="1"]'),
"the provisioned workspace did not re-parent under the platform root",
).toHaveCount(1, { timeout: 10_000 });
} else {
// Degraded backend: at least the tree renders a root-level node.
await expect(
page.locator('[data-testid="agent-tree-node"][data-depth="0"]'),
).not.toHaveCount(0);
}
});
});
/* ─────────────────────────────── 3. Org map ──────────────────────────────── */
test.describe("concierge Org map", () => {
test("hides the platform agent from the node graph; normal workspaces render", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
await navTo(page, "map");
// The React Flow canvas renders.
await expect(page.locator('[aria-label="Molecule AI workspace canvas"]')).toBeVisible({
timeout: 15_000,
});
// Normal workspaces render as map node cards (WorkspaceNode →
// data-testid="workspace-node"). The provisioned hermes workspace must
// appear. expect.poll lets React Flow finish its layout pass.
await expect
.poll(async () => page.locator('[data-testid="workspace-node"]').count(), {
message: "no workspace nodes rendered on the org map",
timeout: 15_000,
})
.toBeGreaterThan(0);
// The concierge (platform agent) is HIDDEN from the graph: no map node
// carries its name. WorkspaceNode's aria-label is "<name> workspace —
// <status>" — assert none matches the platform agent name. This is the
// real behaviour stripPlatformRootForMap implements (Canvas.tsx /
// canvas-topology.ts). Only meaningful when we actually installed one.
if (platformInstalled) {
const platformNode = page.locator(
`[data-testid="workspace-node"][aria-label^="${PLATFORM_AGENT_NAME} workspace"]`,
);
await expect(
platformNode,
"the platform agent (concierge) leaked into the org-map node graph — " +
"stripPlatformRootForMap should exclude it",
).toHaveCount(0);
}
});
});
/* ─────────────────────── 4. Settings — two tabs ──────────────────────────── */
test.describe("concierge Settings — two tabs", () => {
test("Platform-agent config and Org & canvas settings are separate panes; platform tab shows the full WorkspacePanelTabs defaulting to Config", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
await navTo(page, "settings");
const platformTab = page.getByTestId("settings-tab-platform");
const orgTab = page.getByTestId("settings-tab-org");
await expect(platformTab).toBeVisible({ timeout: 10_000 });
await expect(orgTab).toBeVisible();
// Platform tab is the default; its pane is shown and the org pane is not.
await expect(platformTab).toHaveAttribute("aria-selected", "true");
await expect(page.getByTestId("settings-pane-platform")).toBeVisible();
await expect(page.getByTestId("settings-pane-org")).toHaveCount(0);
// The platform pane embeds the FULL WorkspacePanelTabs (the SAME tablist
// the map SidePanel renders) and defaults to the Config tab. Assert the
// canonical workspace tablist is present, that Config is the active tab,
// and that the other signature tabs exist (Plugins, Container, Display,
// Details, Activity, Terminal, Channels, Schedule).
const wsTablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
await expect(
wsTablist,
"platform-agent Settings tab did not embed WorkspacePanelTabs",
).toBeVisible({ timeout: 15_000 });
await expect(page.locator("#tab-config")).toHaveAttribute(
"aria-selected",
"true",
);
for (const id of [
"config",
"skills",
"container-config",
"display",
"details",
"activity",
"terminal",
"channels",
"schedule",
]) {
await expect(
page.locator(`#tab-${id}`),
`WorkspacePanelTabs is missing #tab-${id}`,
).toHaveCount(1);
}
// Clicking the OTHER settings tab switches panes (not just toggles a
// class): the org pane mounts and the platform pane unmounts.
await orgTab.click();
await expect(orgTab).toHaveAttribute("aria-selected", "true");
await expect(page.getByTestId("settings-pane-org")).toBeVisible();
await expect(page.getByTestId("settings-pane-platform")).toHaveCount(0);
// And back.
await platformTab.click();
await expect(page.getByTestId("settings-pane-platform")).toBeVisible();
await expect(page.getByTestId("settings-pane-org")).toHaveCount(0);
});
});
/* ─────────────────────── 5. Settings — Config tab ────────────────────────── */
test.describe("concierge Settings — Config tab dropdowns", () => {
test("runtime dropdown is SSOT-driven; provider hides Platform on self-host but lists BYOK; model follows provider", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
await navTo(page, "settings");
// Platform tab defaults to the Config tab — the runtime select is in the
// ConfigTab "Runtime" section (label "Runtime"). Wait for it to settle.
await expect(
page.getByRole("tablist", { name: "Workspace panel tabs" }),
).toBeVisible({ timeout: 15_000 });
// The runtime <select> sits under the "Runtime" label inside the Config
// panel. Use the label association for a stable hook.
const runtimeByLabel = page.locator('#panel-config').getByLabel("Runtime", {
exact: true,
});
await expect(
runtimeByLabel,
"ConfigTab runtime dropdown never rendered",
).toBeVisible({ timeout: 15_000 });
// (a) Runtime dropdown is SSOT-driven: the options come from GET
// /templates (loadRuntimesFromManifest), so the live tenant must serve a
// non-trivial set. Assert >= 1 runtime option AND that the provisioned
// workspace's runtime (hermes) is among them — proving the list reflects
// what /templates actually serves, not a stale hard-coded allowlist.
const runtimeOptionValues = await runtimeByLabel
.locator("option")
.evaluateAll((els) => els.map((e) => (e as HTMLOptionElement).value));
expect(
runtimeOptionValues.length,
"runtime dropdown rendered no options — SSOT /templates feed is empty",
).toBeGreaterThan(0);
expect(
runtimeOptionValues,
"runtime dropdown does not list the provisioned 'hermes' runtime — the " +
"SSOT /templates list has drifted",
).toContain("hermes");
// (b) Provider dropdown: on self-host (no platform proxy) it must NOT
// offer the "Platform" billing option but MUST list BYOK providers. The
// ProviderModelSelector exposes data-testid="provider-select". Read its
// option labels: none should be the "Platform" proxy entry, and the list
// must be non-empty (BYOK providers present). /org/identity's
// platform_managed_available=false on a staging tenant drives this.
const providerSelect = page.getByTestId("provider-select");
await expect(
providerSelect,
"ConfigTab provider dropdown (ProviderModelSelector) never rendered",
).toBeVisible({ timeout: 15_000 });
const providerLabels = await providerSelect
.locator("option")
.evaluateAll((els) =>
els
.map((e) => (e.textContent || "").trim())
.filter((t) => t && !t.startsWith("—")),
);
expect(
providerLabels.length,
"provider dropdown lists no BYOK providers",
).toBeGreaterThan(0);
expect(
providerLabels.map((l) => l.toLowerCase()),
'provider dropdown offered the "Platform" proxy option on a self-host / ' +
"no-proxy tenant (platform_managed_available should hide it)",
).not.toContain("platform");
// (c) Model dropdown follows the provider. The model control is
// data-testid="model-select" (dropdown) or model-input (free-text
// wildcard). Whichever renders, it must be present — proving the model
// control is wired to the provider selection.
const modelControl = page
.locator('[data-testid="model-select"], [data-testid="model-input"]')
.first();
await expect(
modelControl,
"model control did not follow the provider selection",
).toBeVisible({ timeout: 10_000 });
});
});
/* ────────────────── 6. Settings — Org & canvas settings ──────────────────── */
test.describe("concierge Settings — Org & canvas", () => {
test("Secrets / Workspace Tokens / Org API Keys / Organization sub-tabs render; Organization shows the org (no 404)", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
await navTo(page, "settings");
await page.getByTestId("settings-tab-org").click();
const orgPane = page.getByTestId("settings-pane-org");
await expect(orgPane).toBeVisible({ timeout: 10_000 });
// The four SettingsTabs (canvas/src/components/settings/SettingsTabs.tsx)
// render as a radix tablist labelled "Settings sections". Assert all four
// triggers are present.
const settingsTablist = orgPane.getByRole("tablist", {
name: "Settings sections",
});
await expect(settingsTablist).toBeVisible({ timeout: 10_000 });
for (const label of [
"Secrets",
"Workspace Tokens",
"Org API Keys",
"Organization",
]) {
await expect(
settingsTablist.getByRole("tab", { name: label }),
`Org & canvas settings is missing the "${label}" sub-tab`,
).toBeVisible();
}
// Click the Organization sub-tab — on self-host the canvas reads
// /org/identity (NOT the CP /cp/orgs endpoint), so it must render the org
// identity card and NOT a 404 / error state. Assert the pane settles to
// real, non-error content.
await settingsTablist.getByRole("tab", { name: "Organization" }).click();
const orgInfoPanel = orgPane.locator(
'[role="tabpanel"]:not([hidden])',
);
await expect(orgInfoPanel).toBeVisible({ timeout: 10_000 });
await expect
.poll(
async () => {
const text = ((await orgInfoPanel.innerText()) || "").trim();
return text.length > 0 && !/404|not found/i.test(text);
},
{
message:
"Organization sub-tab rendered empty or a 404/not-found — the " +
"self-host /org/identity path is broken",
timeout: 15_000,
},
)
.toBe(true);
// And no visible error alert inside the org settings pane.
await expect(orgPane.locator('[role="alert"]:visible')).toHaveCount(0);
});
});
/* ───────────────────────────── 7. Map toolbar ────────────────────────────── */
test.describe("concierge Org map toolbar", () => {
test("settings gear, theme toggle and legend are NOT on the map toolbar (moved to Settings/topbar)", async ({
page,
}) => {
const { tenantURL } = tenantEnv();
await loadConcierge(page, tenantURL);
await navTo(page, "map");
await expect(page.locator('[aria-label="Molecule AI workspace canvas"]')).toBeVisible({
timeout: 15_000,
});
// The map toolbar no longer carries a settings gear, a theme toggle, or a
// legend — those moved to the concierge Settings (left rail) + topbar
// (Toolbar.tsx: "Theme picker + settings gear removed from the map
// toolbar"). Assert the map view contains none of them.
//
// Scope to the map mount (<main aria-label="Agent canvas">, ConciergeShell)
// so the legitimate left-rail Settings button + the topbar theme toggle
// (which live OUTSIDE the map) are not counted.
const mapRegion = page.locator('[aria-label="Agent canvas"]');
await expect(mapRegion).toBeVisible({ timeout: 10_000 });
// No settings-gear control inside the map. The old gear used
// title="Settings" / aria-label "Settings".
await expect(
mapRegion.locator('button[title="Settings"], button[aria-label="Settings"]'),
"a settings gear is still on the map toolbar (should be moved to Settings)",
).toHaveCount(0);
// No theme toggle inside the map. The toggle's accessible name is
// "Toggle theme" — it now lives only in the topbar.
await expect(
mapRegion.locator('button[title="Toggle theme"], button[aria-label*="theme" i]'),
"a theme toggle is still on the map toolbar (should be in the topbar)",
).toHaveCount(0);
// No legend inside the map. The Legend component's controls have accessible
// names "Show legend" / "Hide legend" and the panel carries
// data-testid="legend-panel" (canvas/src/components/Legend.tsx). It is no
// longer mounted in Canvas/Toolbar at all — assert none of its surfaces.
await expect(
mapRegion.locator(
'[data-testid="legend-panel"], button[aria-label="Show legend"], button[aria-label="Hide legend"]',
),
"a legend is still on the map toolbar (should be removed)",
).toHaveCount(0);
});
});
+32 -56
View File
@@ -234,44 +234,30 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
"Authorization": `Bearer ${tenantToken}`,
"X-Molecule-Org-Id": orgID,
};
// Retry workspace creation on transient 5xx / timeout — staging CP can
// return 502/503/504 under load and a single-shot failure kills the
// entire E2E run. 3 attempts with 3s exponential backoff (3s, 6s, 12s)
// gives ~21s total budget, well inside the 20-min provision envelope.
let workspaceId = "";
for (let attempt = 1; attempt <= 3; attempt++) {
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
method: "POST",
headers: tenantAuth,
body: JSON.stringify({
name: "E2E Canvas Test",
runtime: "hermes",
tier: 2,
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
// platform). The old `gpt-4o` was never a registered hermes model and
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
// defaults closed to platform_managed (see the boot-shape note below),
// so a platform-namespaced model id is the registry-correct choice.
model: "moonshot/kimi-k2.6",
}),
});
if (ws.status >= 200 && ws.status < 300 && ws.body?.id) {
workspaceId = ws.body.id as string;
break;
}
const isTransient = ws.status >= 500 || ws.status === 0;
if (!isTransient || attempt === 3) {
throw new Error(`Workspace create ${ws.status} (attempt ${attempt}): ${JSON.stringify(ws.body)}`);
}
const backoff = 3000 * Math.pow(2, attempt - 1);
console.log(`[staging-setup] Workspace create transient ${ws.status}, retrying in ${backoff}ms...`);
await new Promise((r) => setTimeout(r, backoff));
const ws = await jsonFetch(`${tenantURL}/workspaces`, {
method: "POST",
headers: tenantAuth,
body: JSON.stringify({
name: "E2E Canvas Test",
runtime: "hermes",
tier: 2,
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
// platform). The old `gpt-4o` was never a registered hermes model and
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
// defaults closed to platform_managed (see the boot-shape note below),
// so a platform-namespaced model id is the registry-correct choice.
model: "moonshot/kimi-k2.6",
}),
});
if (ws.status >= 400 || !ws.body?.id) {
throw new Error(`Workspace create ${ws.status}: ${JSON.stringify(ws.body)}`);
}
const workspaceId = ws.body.id as string;
console.log(`[staging-setup] Workspace created: ${workspaceId}`);
// 6. Wait for workspace online
// 6. Wait for workspace RENDERABLE.
//
// This harness exists to verify the canvas *tab UI* renders (staging-
// tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
@@ -280,16 +266,6 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
// it needs is a workspace ROW that the canvas lists so the node renders
// and the side-panel tabs open. A fully-`online` agent is NOT required.
//
// Hermes cold-boot takes 10-13 min on slow apt days (apt + uv + hermes
// install + npm browser-tools). The controlplane bootstrap-watcher
// deadline fires at 5 min and sets status=failed prematurely; heartbeat
// then transitions failed → online after install.sh finishes. The ONLY
// failed shape we tolerate is the pre-start credential-abort
// (uptime_seconds=0, no last_sample_error) — the agent never ran. Real
// boot regressions (image pull error, panic, PYTHONPATH, etc.) still
// hard-throw immediately so triage gets detail without waiting for a
// polling timeout. See test_staging_full_saas.sh step 7/11 and issue #2632.
//
// That distinction became load-bearing on 2026-06-03: workspace-server
// #2162 (fix(provision): platform-managed workspace must fail-closed when
// CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
@@ -311,10 +287,8 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
// the node + tabs render, proceed. We do NOT mask a real boot regression:
// any `failed` carrying a last_sample_error, OR a non-zero uptime (the
// agent started then crashed — image pull, panic, PYTHONPATH, etc.),
// still hard-throws immediately so triage gets boot_stage / last_error /
// image fields without waiting for a polling timeout.
// Genuine *infra* provision failure is already caught loud one step
// earlier at the org level (instance_status === "failed").
// still hard-throws. Genuine *infra* provision failure is already caught
// loud one step earlier at the org level (instance_status === "failed").
await waitFor<boolean>(
async () => {
const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
@@ -341,15 +315,17 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
);
return true;
}
// #2032: tolerate transient 'failed' during boot — some runtimes
// briefly report failed before recovering to online (e.g. agent
// restart during init). Retry instead of hard-throwing; genuine
// terminal failures will still surface via waitFor timeout.
// last_sample_error is often empty when the failure happens before
// the agent emits a sample (e.g. boot crash, image pull error,
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
// body gives triage the boot_stage / last_error / image fields it
// needs without a second probe. Otherwise this propagates as a
// bare "Workspace failed: " — the exact useless message that
// sent #2632 to the issue tracker.
const detail = sampleErr
? sampleErr
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
console.warn(`[staging-setup] transient failed (retrying): ${detail}`);
return null;
throw new Error(`Workspace failed: ${detail}`);
}
return null;
},
@@ -357,7 +333,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
10_000,
"workspace online",
);
console.log(`[staging-setup] Workspace online`);
console.log(`[staging-setup] Workspace renderable`);
// 7. Hand state off to tests + teardown — overwrite the slug-only
// bootstrap state with the full state spec tests need.
+1 -1
View File
@@ -370,7 +370,7 @@ test.describe("staging canvas tabs", () => {
// The tablist appears once the side panel mounts. Condition-based
// wait — no fixed delay.
const tablist = page.getByRole("tablist", { name: "Workspace panel tabs" });
const tablist = page.locator('[role="tablist"]');
await expect(
tablist,
"side panel tablist never appeared after clicking the workspace node",
+2 -4
View File
@@ -52,10 +52,8 @@ describe("prefers-reduced-motion compliance", () => {
expect(src).toContain("motion-safe:animate-pulse");
});
it("WorkspacePanelTabs.tsx uses motion-safe:animate-pulse", () => {
// The connection-status dot moved out of SidePanel.tsx into the extracted
// WorkspacePanelTabs.tsx; verify the reduced-motion guard followed it.
const src = readSrc("components/WorkspacePanelTabs.tsx");
it("SidePanel.tsx uses motion-safe:animate-pulse", () => {
const src = readSrc("components/SidePanel.tsx");
expect(src.includes("animate-pulse") && !src.includes("motion-safe:animate-pulse")).toBe(false);
expect(src).toContain("motion-safe:animate-pulse");
});
+1 -1
View File
@@ -10,7 +10,7 @@ import { describe, it, expect, vi } from "vitest";
// transform). We import layout.tsx only for its exported `metadata`
// constant — mock the font module to a constructor-returning stub.
vi.mock("next/font/google", () => ({
Hanken_Grotesk: () => ({ variable: "--font-hanken" }),
Inter: () => ({ variable: "--font-inter" }),
JetBrains_Mono: () => ({ variable: "--font-jetbrains" }),
}));
+38 -50
View File
@@ -42,52 +42,48 @@
* before paint to eliminate flash.
*/
@theme {
/* Org Concierge palette (RFC platform-agent / canvas redesign). Warm-paper
light theme + purple accent replacing the old blue brand. */
/* Surface — page / elevated card / sunken input / deep card */
--color-surface: #f1efe8;
--color-surface: #fafaf7;
--color-surface-elevated: #ffffff;
--color-surface-sunken: #f6f4ee;
--color-surface-card: #faf9f4;
--color-surface-sunken: #f3f1ec;
--color-surface-card: #efece4;
/* Borders */
--color-line: #ddd9cf;
--color-line-soft: #ebe8df;
--color-line: #e6e2d8;
--color-line-soft: #efece4;
/* Text */
--color-ink: #21201b;
--color-ink-mid: #5c5a52;
--color-ink-soft: #6f6c62;
--color-ink: #15181c;
--color-ink-mid: #5a5e66;
--color-ink-soft: #8b8e95;
/* Brand + state purple accent (concept #7c3aed); light good/bad kept
slightly darker than the raw concept hues for WCAG AA on the paper tints. */
--color-accent: #7c3aed;
--color-accent-strong: #6d28d9;
--color-warm: #c47e12;
--color-good: #0c8a52;
--color-bad: #c2403c;
/* Brand + state */
--color-accent: #3b5bdb;
--color-accent-strong: #1a2f99;
--color-warm: #c0532b;
--color-good: #2f7a4d;
--color-bad: #b94e4a;
}
[data-theme="dark"] {
/* Org Concierge dark palette — near-black panels, bright purple accent. */
--color-surface: #08080a;
--color-surface-elevated: #16161d;
--color-surface-sunken: #0d0d11;
--color-surface-card: #1b1b23;
--color-surface: #0e1014;
--color-surface-elevated: #15181c;
--color-surface-sunken: #0a0b0e;
--color-surface-card: #1a1d23;
--color-line: #26262e;
--color-line-soft: #1b1b22;
--color-line: #2a2f3a;
--color-line-soft: #1f2329;
--color-ink: #ececf1;
--color-ink-mid: #9b9baa;
--color-ink-soft: #65656f;
--color-ink: #f4f1e9;
--color-ink-mid: #c8c2b4;
--color-ink-soft: #8d92a0;
/* Purple accent brightened for AA on the near-black surfaces. */
--color-accent: #a78bfa;
--color-accent-strong: #c4b5fd;
--color-warm: #fbbf24;
--color-good: #34d399;
--color-bad: #f87171;
/* Accents brighten slightly for AA contrast on dark backgrounds. */
--color-accent: #6883e8;
--color-accent-strong: #8aa1ee;
--color-warm: #d96f48;
--color-good: #4ca06e;
--color-bad: #d27773;
}
:root {
@@ -111,22 +107,15 @@
* component, not per theme.
*/
@theme {
/* Org Concierge canvas palette (near-black + purple). */
--color-bg: rgb(8 8 10); /* concept --bg #08080a */
--color-bg-elev: rgb(22 22 29); /* concept --card #16161d */
--color-bg-card: rgb(27 27 35); /* concept --card-2 #1b1b23 */
--color-line-strong: rgb(54 54 64);
--color-ink-mute: rgb(155 155 170); /* concept --tx-2 */
--color-ink-dim: rgb(101 101 111); /* concept --tx-3 */
--color-accent-dim: rgb(167 139 250);/* concept --accent-2 #a78bfa */
--color-plasma: rgb(139 92 246); /* concept --accent #8b5cf6 */
--color-bg: rgb(9 9 11); /* zinc-950 */
--color-bg-elev: rgb(24 24 27); /* zinc-900 */
--color-bg-card: rgb(39 39 42); /* zinc-800 */
--color-line-strong: rgb(63 63 70); /* zinc-700 */
--color-ink-mute: rgb(161 161 170); /* zinc-400 */
--color-ink-dim: rgb(113 113 122); /* zinc-500 */
--color-accent-dim: rgb(96 165 250);/* blue-400 */
--color-plasma: rgb(59 130 246); /* blue-500 */
--color-warn: rgb(251 191 36); /* amber-400 */
/* Typography Org Concierge (Hanken Grotesk UI, JetBrains Mono code).
next/font variables are set on <html> in the canvas layout. */
--font-sans: var(--font-hanken), ui-sans-serif, system-ui, -apple-system,
"Segoe UI", Roboto, sans-serif;
--font-mono: var(--font-jetbrains), ui-monospace, "SF Mono", Menlo, monospace;
}
body {
@@ -135,8 +124,7 @@ body {
overflow: hidden;
background-color: var(--color-surface);
color: var(--color-ink);
font-family: var(--font-hanken), -apple-system, BlinkMacSystemFont, "Segoe UI",
Roboto, "Helvetica Neue", sans-serif;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
+3 -13
View File
@@ -1,5 +1,5 @@
import type { Metadata } from "next";
import { Hanken_Grotesk, JetBrains_Mono } from "next/font/google";
import { Inter, JetBrains_Mono } from "next/font/google";
import { cookies, headers } from "next/headers";
import "./globals.css";
@@ -7,13 +7,10 @@ import "./globals.css";
// because Next.js serves the .woff2 from /_next/static). Exposed as
// CSS variables so the mobile palette can reference them without
// importing this module.
// Org Concierge UI typeface (canvas redesign): Hanken Grotesk, exposed as
// --font-hanken and consumed by the --font-sans theme token in globals.css.
const interFont = Hanken_Grotesk({
const interFont = Inter({
subsets: ["latin"],
weight: ["400", "500", "600", "700"],
display: "swap",
variable: "--font-hanken",
variable: "--font-inter",
});
const monoFont = JetBrains_Mono({
subsets: ["latin"],
@@ -164,12 +161,6 @@ export default async function RootLayout({
*/}
<script
nonce={nonce}
// The browser strips the nonce attribute off <script> after applying
// CSP, so the hydrated DOM shows nonce="" while React's tree carries
// the real value — a benign, expected server/client diff. Suppress
// the hydration warning for this element (same rationale as the
// <html> suppressHydrationWarning above).
suppressHydrationWarning
dangerouslySetInnerHTML={{ __html: themeBootScript }}
/>
{/*
@@ -195,7 +186,6 @@ export default async function RootLayout({
<script
type="application/ld+json"
nonce={nonce}
suppressHydrationWarning
dangerouslySetInnerHTML={{
__html: JSON.stringify({
"@context": "https://schema.org",
+20
View File
@@ -179,6 +179,7 @@ function Shell({
<p className="mt-2 text-ink-mid">
Each org is an isolated Molecule workspace.
</p>
<DataResidencyNotice />
<div className="mt-8">{children}</div>
</div>
</TermsGate>
@@ -219,6 +220,25 @@ function AccountBar({ session }: { session: Session }) {
</div>
);
}
// DataResidencyNotice surfaces where workspace data lives so EU-based
// signups can make an informed choice (GDPR Art. 13 disclosure
// requirement). Plain text, no icon — the goal is clarity, not
// decoration. A future EU region selector can replace this with a
// region dropdown.
function DataResidencyNotice() {
return (
<p className="mt-3 rounded border border-line bg-surface-sunken/60 px-3 py-2 text-xs text-ink-mid">
Workspaces run in AWS us-east-2 (Ohio, United States). EU region support is on the roadmap reach out to
{" "}
<a href="mailto:support@moleculesai.app" className="underline">
support@moleculesai.app
</a>
{" "}if you need data residency in another region today.
</p>
);
}
function OrgRow({ org }: { org: Org }) {
return (
<li className="rounded-lg border border-line bg-surface-sunken p-4">
+8 -2
View File
@@ -1,7 +1,9 @@
"use client";
import { useEffect, useState } from "react";
import { ConciergeShell } from "@/components/concierge/ConciergeShell";
import { Canvas } from "@/components/Canvas";
import { Legend } from "@/components/Legend";
import { CommunicationOverlay } from "@/components/CommunicationOverlay";
import { MobileApp } from "@/components/mobile/MobileApp";
import { Spinner } from "@/components/Spinner";
import { connectSocket, disconnectSocket } from "@/store/socket";
@@ -113,7 +115,11 @@ export default function Home() {
return (
<>
<ConciergeShell />
<main aria-label="Agent canvas">
<Canvas />
</main>
<Legend />
<CommunicationOverlay />
{hydrationError && (
<div
role="alert"
+6 -36
View File
@@ -13,11 +13,8 @@ import {
import "@xyflow/react/dist/style.css";
import { useCanvasStore } from "@/store/canvas";
import { WORKSPACE_KIND } from "@/lib/workspace-kind";
import { stripPlatformRootForMap } from "@/store/canvas-topology";
import { useTheme } from "@/lib/theme-provider";
import { A2ATopologyOverlay } from "./A2ATopologyOverlay";
import { MessageFlightLayer } from "./MessageFlightLayer";
import { WorkspaceNode } from "./WorkspaceNode";
import { SidePanel } from "./SidePanel";
import { CreateWorkspaceButton } from "./CreateWorkspaceDialog";
@@ -81,38 +78,15 @@ function CanvasInner() {
// half-themed page. Pull resolvedTheme so the canvas matches the user's
// selected mode (and the system preference when they pick "system").
const { resolvedTheme } = useTheme();
const storeNodes = useCanvasStore((s) => s.nodes);
const storeEdges = useCanvasStore((s) => s.edges);
const rawNodes = useCanvasStore((s) => s.nodes);
const edges = useCanvasStore((s) => s.edges);
const a2aEdges = useCanvasStore((s) => s.a2aEdges);
const showA2AEdges = useCanvasStore((s) => s.showA2AEdges);
const deletingIds = useCanvasStore((s) => s.deletingIds);
// Hide the org-level platform agent (the concierge) from the map graph: it is
// the undeletable org ROOT surfaced in the shell (topbar + Home tree), not a
// draggable/deletable map node. Its direct children are reparented to
// top-level and tree edges touching it are dropped. The store keeps the full
// node set, so the shell's Home agent tree still renders it as ROOT.
const { nodes: rawNodes, edges } = useMemo(
() => stripPlatformRootForMap(storeNodes, storeEdges),
[storeNodes, storeEdges],
const allEdges = useMemo(
() => (showA2AEdges ? [...edges, ...a2aEdges] : edges),
[edges, a2aEdges, showA2AEdges],
);
const platformIds = useMemo(
() =>
new Set(
storeNodes
.filter((n) => n.data.kind === WORKSPACE_KIND.Platform)
.map((n) => n.id),
),
[storeNodes],
);
const allEdges = useMemo(() => {
if (!showA2AEdges) return edges;
// Drop A2A edges that touch the hidden platform root so React Flow doesn't
// warn about an edge to a missing node.
const a2a = a2aEdges.filter(
(e) => !platformIds.has(e.source) && !platformIds.has(e.target),
);
return [...edges, ...a2a];
}, [edges, a2aEdges, showA2AEdges, platformIds]);
// Drag-lock during a system-owned operation (deploy OR delete).
// React Flow respects Node.draggable, which stops the gesture
// before it starts — preventDefault() on the drag-start callback
@@ -303,7 +277,7 @@ function CanvasInner() {
>
Skip to canvas
</a>
<main id="canvas-main" className="w-full h-full bg-surface">
<main id="canvas-main" className="w-screen h-screen bg-surface">
<ReactFlow
colorMode={resolvedTheme}
nodes={nodes}
@@ -372,10 +346,6 @@ function CanvasInner() {
nodeBorderRadius={4}
/>
<DropTargetBadge />
{/* Flies an envelope between agents on each delegate/message event.
Inside <ReactFlow> so its ViewportPortal renders in flow coords
and tracks pan/zoom. */}
<MessageFlightLayer />
</ReactFlow>
{/* Screen-reader live region announces workspace count on initial load and
+2 -2
View File
@@ -172,7 +172,7 @@ export function ContextMenu() {
const nodeId = contextMenu.nodeId;
closeContextMenu();
try {
await api.post(`/workspaces/${nodeId}/pause?cascade=true`, {});
await api.post(`/workspaces/${nodeId}/pause`, {});
updateNodeData(nodeId, { status: "paused" });
} catch (e) {
showToast("Pause failed", "error");
@@ -184,7 +184,7 @@ export function ContextMenu() {
const nodeId = contextMenu.nodeId;
closeContextMenu();
try {
await api.post(`/workspaces/${nodeId}/resume?cascade=true`, {});
await api.post(`/workspaces/${nodeId}/resume`, {});
updateNodeData(nodeId, { status: "provisioning" });
} catch (e) {
showToast("Resume failed", "error");
@@ -60,16 +60,6 @@ const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "goog
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
const DEFAULT_HEADLESS_ROOT_GB = 30;
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
// Per-workspace cloud/compute backend (multi-provider RFC). "aws" is the default
// EC2 path; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner. A
// workspace whose cloud differs from its tenant's is reached over a per-workspace
// Cloudflare tunnel (runtime#95). Distinct from the LLM/model provider.
const CLOUD_PROVIDER_OPTIONS = [
{ value: "aws", label: "AWS (default)" },
{ value: "gcp", label: "GCP" },
{ value: "hetzner", label: "Hetzner" },
];
const DEFAULT_DISPLAY_ROOT_GB = 80;
export function CreateWorkspaceButton() {
@@ -87,10 +77,6 @@ export function CreateWorkspaceButton() {
const [displayInstanceType, setDisplayInstanceType] = useState(DEFAULT_DISPLAY_INSTANCE_TYPE);
const [displayRootGB, setDisplayRootGB] = useState(String(DEFAULT_DISPLAY_ROOT_GB));
const [displayResolution, setDisplayResolution] = useState("1920x1080");
// Cloud/compute backend for the workspace box (multi-provider, per-workspace).
// "aws" default; "gcp"/"hetzner" route to the matching CP WorkspaceProvisioner
// (a non-tenant-cloud box is reached over a per-workspace tunnel, runtime#95).
const [cloudProvider, setCloudProvider] = useState("aws");
// Templates fetched from /api/templates — drives the dynamic provider
// filter below. Same data source ConfigTab uses (PR #2454). When the
// selected template declares `runtime_config.providers` in its
@@ -280,7 +266,6 @@ export function CreateWorkspaceButton() {
setDisplayInstanceType(DEFAULT_DISPLAY_INSTANCE_TYPE);
setDisplayRootGB(String(DEFAULT_DISPLAY_ROOT_GB));
setDisplayResolution("1920x1080");
setCloudProvider("aws");
setExternalRuntime("external");
setLLMSelection({ providerId: "", model: "", envVars: [] });
setLLMSecret("");
@@ -370,16 +355,11 @@ export function CreateWorkspaceButton() {
width: Number.isFinite(displayWidth) ? displayWidth : 1920,
height: Number.isFinite(displayHeight) ? displayHeight : 1080,
},
// Only meaningful when CP provisions the box (SaaS), where
// the picker is shown. Omit on self-hosted so the payload is
// unchanged there.
...(isSaaS ? { provider: cloudProvider } : {}),
}
: {
instance_type: DEFAULT_HEADLESS_INSTANCE_TYPE,
volume: { root_gb: DEFAULT_HEADLESS_ROOT_GB },
display: { mode: "none" },
...(isSaaS ? { provider: cloudProvider } : {}),
},
}
: {}),
@@ -619,26 +599,6 @@ export function CreateWorkspaceButton() {
<div className="mb-2 text-[11px] font-medium text-ink-mid">
Container Config
</div>
{/* Cloud provider only meaningful when CP provisions the box
(SaaS). A non-tenant-cloud workspace is reached over a
per-workspace Cloudflare tunnel (runtime#95). */}
{isSaaS && (
<label htmlFor="workspace-cloud-provider" className="mb-3 grid gap-1">
<span className="text-xs font-medium text-ink">Cloud provider</span>
<select
id="workspace-cloud-provider"
value={cloudProvider}
onChange={(e) => setCloudProvider(e.target.value)}
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors"
>
{CLOUD_PROVIDER_OPTIONS.map((p) => (
<option key={p.value} value={p.value}>
{p.label}
</option>
))}
</select>
</label>
)}
<label className="flex items-center justify-between gap-3">
<span className="text-xs font-medium text-ink">Display</span>
<input
-84
View File
@@ -1,84 +0,0 @@
/** FlightEnvelope a single envelope that animates from `from` to `to` and
* fades out, used by both the canvas (flow coords inside a ViewportPortal) and
* the concierge home (screen coords inside a fixed overlay). The parent owns
* the coordinate space; this component only animates the translate delta.
*
* Uses the Web Animations API so the from/to delta can be dynamic per flight
* (a static CSS @keyframes can't translate to a runtime-computed point). */
import { useEffect, useRef } from "react";
import { FLIGHT_DURATION_MS, type A2AFlightKind } from "@/hooks/useA2AFlights";
/** Stroke colour by activity kind mirrors CommunicationOverlay's palette
* (send = cyan, receive = violet/accent, task = warm) so the two surfaces
* read as the same event. */
const KIND_COLOR: Record<A2AFlightKind, string> = {
send: "#22d3ee",
receive: "#8b5cf6",
task: "#f5a623",
};
export interface Point {
x: number;
y: number;
}
export function FlightEnvelope({
from,
to,
kind,
}: {
from: Point;
to: Point;
kind: A2AFlightKind;
}) {
const ref = useRef<HTMLDivElement>(null);
useEffect(() => {
const el = ref.current;
// Element.animate is unavailable in some test/SSR environments — degrade to
// a static (instantly-finished) envelope rather than throw.
if (!el || typeof el.animate !== "function") return;
const dx = to.x - from.x;
const dy = to.y - from.y;
const anim = el.animate(
[
{ transform: "translate(-50%,-50%) translate(0px,0px) scale(0.45)", opacity: 0 },
{ opacity: 1, offset: 0.16 },
{ opacity: 1, offset: 0.8 },
{ transform: `translate(-50%,-50%) translate(${dx}px,${dy}px) scale(1)`, opacity: 0 },
],
{ duration: FLIGHT_DURATION_MS, easing: "cubic-bezier(0.45, 0, 0.25, 1)", fill: "forwards" },
);
return () => anim.cancel();
}, [from.x, from.y, to.x, to.y]);
const color = KIND_COLOR[kind];
return (
<div
ref={ref}
data-testid="flight-envelope"
aria-hidden="true"
style={{
position: "absolute",
left: from.x,
top: from.y,
pointerEvents: "none",
willChange: "transform, opacity",
filter: "drop-shadow(0 1px 3px rgba(0,0,0,0.45))",
zIndex: 6,
}}
>
<svg width="22" height="22" viewBox="0 0 24 24" fill="none" aria-hidden="true">
<rect x="2.5" y="5.5" width="19" height="13" rx="2.5" fill="#0b0b0f" stroke={color} strokeWidth="1.6" />
<path
d="M3.5 7.5l8.5 6 8.5-6"
stroke={color}
strokeWidth="1.6"
fill="none"
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
</div>
);
}
@@ -1,46 +0,0 @@
/** MessageFlightLayer flies an envelope from the source agent to the target
* agent on the spatial canvas whenever a delegate / message event fires.
*
* Mounted INSIDE <ReactFlow> so its ViewportPortal places the envelope in flow
* coordinates; it therefore pans and zooms with the canvas for free. The
* flight lifecycle (which events become envelopes, reduced-motion opt-out,
* expiry) lives in useA2AFlights this component only resolves node centres
* and renders. */
import { ViewportPortal, type Node } from "@xyflow/react";
import { useCanvasStore } from "@/store/canvas";
import { useA2AFlights } from "@/hooks/useA2AFlights";
import { FlightEnvelope, type Point } from "./FlightEnvelope";
import type { WorkspaceNodeData } from "@/store/canvas";
// Fallback node footprint when React Flow has not measured a node yet. Matches
// WorkspaceNode's leaf size (w-[300px] min-h-[176px]); a slightly-off centre
// for the first frame after mount is invisible at flight scale.
const DEFAULT_W = 300;
const DEFAULT_H = 176;
function nodeCenter(n: Node<WorkspaceNodeData>): Point {
const w = n.measured?.width ?? DEFAULT_W;
const h = n.measured?.height ?? DEFAULT_H;
return { x: n.position.x + w / 2, y: n.position.y + h / 2 };
}
export function MessageFlightLayer() {
const flights = useA2AFlights();
const nodes = useCanvasStore((s) => s.nodes);
if (flights.length === 0) return null;
return (
<ViewportPortal>
{flights.map((f) => {
const src = nodes.find((n) => n.id === f.sourceId);
const dst = nodes.find((n) => n.id === f.targetId);
// Both endpoints must be on-canvas to draw a path between them.
if (!src || !dst) return null;
return (
<FlightEnvelope key={f.key} from={nodeCenter(src)} to={nodeCenter(dst)} kind={f.kind} />
);
})}
</ViewportPortal>
);
}
+3 -15
View File
@@ -12,7 +12,6 @@ import {
ProviderModelSelector,
buildProviderCatalog,
findProviderForModel,
isPlatformManagedProvider,
type SelectorValue,
} from "./ProviderModelSelector";
@@ -268,21 +267,10 @@ function ProviderPickerModal({
setSelectorValue(initial);
}, [open, initial]);
// #2248: filter out provisioner-injected internal tokens for platform-managed
// providers so the user can't clobber them. Memoized so the array reference is
// stable across renders and does not churn the entries useEffect.
const userEditableEnvVars = useMemo(() => {
const selectedProvider = catalog.find((p) => p.id === selectorValue.providerId);
const isPlatformManaged = selectedProvider ? isPlatformManagedProvider(selectedProvider) : false;
return isPlatformManaged
? selectorValue.envVars.filter((k) => k !== "MOLECULE_LLM_USAGE_TOKEN")
: selectorValue.envVars;
}, [catalog, selectorValue.providerId, selectorValue.envVars]);
useEffect(() => {
if (!open) return;
setEntries(
userEditableEnvVars.map((key) => ({
selectorValue.envVars.map((key) => ({
key,
value: "",
// Pre-mark as saved when the key is already in the configured
@@ -295,7 +283,7 @@ function ProviderPickerModal({
);
setOptionalEntries(
optionalKeys
.filter((key) => !userEditableEnvVars.includes(key))
.filter((key) => !selectorValue.envVars.includes(key))
.map((key) => ({
key,
value: "",
@@ -304,7 +292,7 @@ function ProviderPickerModal({
error: null,
})),
);
}, [open, userEditableEnvVars, configuredKeys, optionalKeys]);
}, [open, selectorValue.envVars, configuredKeys, optionalKeys]);
useEffect(() => {
if (!open) return;
@@ -91,7 +91,6 @@ export interface RegistryModel {
name?: string;
provider?: string;
billing_mode?: "platform_managed" | "byok";
required_env?: string[];
}
export interface SelectorValue {
+134 -8
View File
@@ -1,9 +1,25 @@
"use client";
import { useState, useCallback, useRef, useEffect } from "react";
import { useCanvasStore } from "@/store/canvas";
import { useCanvasStore, type PanelTab } from "@/store/canvas";
import { showToast } from "@/components/Toaster";
import { StatusDot } from "./StatusDot";
import { WorkspacePanelTabs } from "./WorkspacePanelTabs";
import { Tooltip } from "./Tooltip";
import { DetailsTab } from "./tabs/DetailsTab";
import { SkillsTab } from "./tabs/SkillsTab";
import { ChatTab } from "./tabs/ChatTab";
import { ConfigTab } from "./tabs/ConfigTab";
import { ContainerConfigTab } from "./tabs/ContainerConfigTab";
import { DisplayTab } from "./tabs/DisplayTab";
import { TerminalTab } from "./tabs/TerminalTab";
import { FilesTab } from "./tabs/FilesTab";
import { MemoryInspectorPanel } from "./MemoryInspectorPanel";
import { AuditTrailPanel } from "./AuditTrailPanel";
import { TracesTab } from "./tabs/TracesTab";
import { EventsTab } from "./tabs/EventsTab";
import { ActivityTab } from "./tabs/ActivityTab";
import { ScheduleTab } from "./tabs/ScheduleTab";
import { ChannelsTab } from "./tabs/ChannelsTab";
import { summarizeWorkspaceCapabilities } from "@/store/canvas";
const SIDEPANEL_WIDTH_KEY = "molecule:sidepanel-width";
@@ -11,6 +27,24 @@ const SIDEPANEL_DEFAULT_WIDTH = 480;
const SIDEPANEL_MIN_WIDTH = 320;
const SIDEPANEL_MAX_WIDTH = 800;
const TABS: { id: PanelTab; label: string; icon: string }[] = [
{ id: "chat", label: "Chat", icon: "◈" },
{ id: "activity", label: "Activity", icon: "⊙" },
{ id: "details", label: "Details", icon: "◉" },
{ id: "skills", label: "Plugins", icon: "✦" },
{ id: "terminal", label: "Terminal", icon: "▸" },
{ id: "display", label: "Display", icon: "▣" },
{ id: "container-config", label: "Container", icon: "▤" },
{ id: "config", label: "Config", icon: "⚙" },
{ id: "schedule", label: "Schedule", icon: "⏲" },
{ id: "channels", label: "Channels", icon: "⇌" },
{ id: "files", label: "Files", icon: "⊞" },
{ id: "memory", label: "Memory", icon: "◇" },
{ id: "traces", label: "Traces", icon: "◎" },
{ id: "events", label: "Events", icon: "◊" },
{ id: "audit", label: "Audit", icon: "⊟" },
];
export function SidePanel() {
const selectedNodeId = useCanvasStore((s) => s.selectedNodeId);
const panelTab = useCanvasStore((s) => s.panelTab);
@@ -185,12 +219,104 @@ export function SidePanel() {
</div>
</div>
{/* Tabs + tab content extracted into WorkspacePanelTabs so the same
tab bar/body is reused verbatim by the concierge Settings page. The
map drawer stays store-driven: we thread the global panelTab /
setPanelTab through as the controlled active-tab pair, preserving the
existing selection + keyboard behaviour. */}
<WorkspacePanelTabs node={node} activeTab={panelTab} onTabChange={setPanelTab} />
{/* Tabs — relative wrapper lets the fade gradient position against the scroll container */}
<div className="relative border-b border-line/40">
{/* Right-edge fade: signals more tabs are hidden off-screen when the bar overflows */}
<div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-surface to-transparent z-10" aria-hidden="true" />
<div
role="tablist"
aria-label="Workspace panel tabs"
className="flex overflow-x-auto bg-surface-sunken/20 px-1"
onKeyDown={(e) => {
const idx = TABS.findIndex((t) => t.id === panelTab);
let next: number | null = null;
if (e.key === "ArrowRight") { e.preventDefault(); next = (idx + 1) % TABS.length; }
else if (e.key === "ArrowLeft") { e.preventDefault(); next = (idx - 1 + TABS.length) % TABS.length; }
else if (e.key === "Home") { e.preventDefault(); next = 0; }
else if (e.key === "End") { e.preventDefault(); next = TABS.length - 1; }
if (next !== null) {
setPanelTab(TABS[next].id);
requestAnimationFrame(() => { const el = document.getElementById(`tab-${TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
}
}}
>
{TABS.map((tab) => (
<button
type="button"
key={tab.id}
id={`tab-${tab.id}`}
role="tab"
aria-selected={panelTab === tab.id}
aria-controls={`panel-${tab.id}`}
tabIndex={panelTab === tab.id ? 0 : -1}
onClick={() => setPanelTab(tab.id)}
className={`shrink-0 px-3 py-2.5 text-[10px] font-medium tracking-wide transition-all rounded-t-lg mx-0.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 ${
panelTab === tab.id
? "text-ink bg-surface-card border-b-2 border-accent"
: "text-ink-mid hover:text-ink hover:bg-surface-card/60"
}`}
>
<span className="mr-1 opacity-50" aria-hidden="true">{tab.icon}</span>
{tab.label}
</button>
))}
</div>
</div>
{/* Needs Restart Banner */}
{node.data.needsRestart && !node.data.currentTask && selectedNodeId && (
<div className="px-4 py-2 bg-sky-950/20 border-b border-sky-800/20 flex items-center justify-between">
<span className="text-[10px] text-sky-300/90">Config changed restart to apply</span>
<button
type="button"
onClick={() => {
useCanvasStore.getState().restartWorkspace(selectedNodeId).catch(() => showToast("Restart failed", "error"));
}}
className="text-[11px] px-2 py-1 bg-sky-800/40 hover:bg-sky-700/50 text-sky-200 rounded transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1"
>
Restart Now
</button>
</div>
)}
{/* Current Task Banner */}
{node.data.currentTask && (
<Tooltip text={node.data.currentTask as string}>
<div className="px-4 py-2 bg-amber-950/20 border-b border-amber-800/20 flex items-center gap-2 cursor-default">
<div className="w-1.5 h-1.5 rounded-full bg-amber-400 motion-safe:animate-pulse shrink-0" />
<span className="text-[10px] text-warm/90 truncate">
{node.data.currentTask}
</span>
</div>
</Tooltip>
)}
{/* Tab Content */}
<div
role="tabpanel"
id={`panel-${panelTab}`}
aria-labelledby={`tab-${panelTab}`}
tabIndex={0}
className="flex-1 overflow-y-auto focus:outline-none"
>
{panelTab === "details" && <DetailsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "skills" && <SkillsTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "activity" && <ActivityTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "chat" && <ChatTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "terminal" && <TerminalTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "display" && <DisplayTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "container-config" && selectedNodeId && (
<ContainerConfigTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />
)}
{panelTab === "config" && <ConfigTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "schedule" && <ScheduleTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "channels" && <ChannelsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "files" && <FilesTab key={selectedNodeId} workspaceId={selectedNodeId} data={node.data} />}
{panelTab === "memory" && <MemoryInspectorPanel key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "traces" && <TracesTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "events" && <EventsTab key={selectedNodeId} workspaceId={selectedNodeId} />}
{panelTab === "audit" && <AuditTrailPanel key={selectedNodeId} workspaceId={selectedNodeId} />}
</div>
{/* Footer — workspace ID */}
<div className="px-4 sm:px-5 py-2 border-t border-line/40 bg-surface-sunken/20">
+10 -8
View File
@@ -3,9 +3,11 @@
import { useMemo, useState, useCallback, useEffect, useRef } from "react";
import { api } from "@/lib/api";
import { useCanvasStore } from "@/store/canvas";
import { WORKSPACE_KIND } from "@/lib/workspace-kind";
import { SettingsButton } from "@/components/settings/SettingsButton";
import { settingsGearRef } from "@/components/settings/SettingsPanel";
import { ConfirmDialog } from "@/components/ConfirmDialog";
import { showToast } from "@/components/Toaster";
import { ThemeToggle } from "@/components/ThemeToggle";
import { statusDotClass } from "@/lib/design-tokens";
import { KeyboardShortcutsDialog } from "@/components/KeyboardShortcutsDialog";
@@ -53,11 +55,8 @@ export function Toolbar() {
}, [wsStatus]);
const counts = useMemo(() => {
// Exclude the org-level platform agent (the concierge) — it's the
// undeletable org root surfaced in the shell, not a counted map workspace.
const mapNodes = nodes.filter((n) => n.data.kind !== WORKSPACE_KIND.Platform);
const c = { total: mapNodes.length, roots: 0, children: 0, online: 0, offline: 0, failed: 0, provisioning: 0, activeTasks: 0 };
for (const n of mapNodes) {
const c = { total: nodes.length, roots: 0, children: 0, online: 0, offline: 0, failed: 0, provisioning: 0, activeTasks: 0 };
for (const n of nodes) {
if (n.data.parentId) c.children++; else c.roots++;
const s = n.data.status;
if (s === "online") c.online++;
@@ -461,8 +460,11 @@ export function Toolbar() {
)}
</div>
{/* Theme picker + settings gear removed from the map toolbar both now
live in the concierge global Settings (left rail) + topbar. */}
{/* Theme picker — System / Light / Dark */}
<ThemeToggle />
{/* Settings gear icon */}
<SettingsButton ref={settingsGearRef} />
<ConfirmDialog
open={restartConfirmOpen}
+72 -81
View File
@@ -1,7 +1,7 @@
"use client";
import { useMemo, type KeyboardEvent } from "react";
import { Handle, Position, type NodeProps, type Node } from "@xyflow/react";
import { useCallback, useMemo, type KeyboardEvent } from "react";
import { Handle, NodeResizer, Position, type NodeProps, type Node } from "@xyflow/react";
import { useCanvasStore, type WorkspaceNodeData } from "@/store/canvas";
import { getConfigurationError, getConfigurationStatus } from "@/store/canvas-topology";
import { showToast } from "@/components/Toaster";
@@ -21,8 +21,7 @@ function useDescendantCount(nodeId: string): number {
return useMemo(() => countDescendants(nodeId, nodes), [nodeId, nodes]);
}
/** Boolean flag used to drive the container's system-controlled size
* (leaves render fixed-size; parents grow to fit children).
/** Boolean flag used to drive min-size and NodeResizer dimensions.
* Selecting `nodes` stably avoids re-render loops (same issue as
* useDescendantCount). */
function useHasChildren(nodeId: string): boolean {
@@ -88,9 +87,16 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
return (
<>
{/* Free-resize removed (was NodeResizer). Container size + shape are now
* system-controlled: leaf workspaces render at a fixed width; parent
* workspaces grow to fit their nested children (store grow logic). */}
{/* NodeResizer visible only on the selected card. Lets the user
* drag any edge/corner to grow or shrink the workspace, which is
* useful on cards that contain nested child workspaces. */}
<NodeResizer
isVisible={isSelected}
minWidth={hasChildren ? 360 : 210}
minHeight={hasChildren ? 200 : 110}
lineClassName="!border-accent/40"
handleClassName="!w-2 !h-2 !bg-accent !border !border-blue-300"
/>
<div
role="button"
tabIndex={0}
@@ -155,22 +161,20 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
}
}}
className={`
group relative rounded-xl
${hasChildren && !data.collapsed
? "h-full w-full min-w-[420px] min-h-[240px]"
: "w-[300px] min-h-[176px]"}
group relative rounded-xl h-full w-full
${hasChildren && !data.collapsed ? "min-w-[360px] min-h-[200px]" : "min-w-[210px]"}
cursor-pointer overflow-hidden
transition-all duration-200 ease-out
${isDragTarget
? "bg-emerald-950/40 border-2 border-emerald-400/60 ring-2 ring-emerald-400/20 scale-[1.03]"
: isBatchSelected
? "bg-surface-sunken/95 border-2 border-accent/80 ring-2 ring-accent/30 shadow-lg shadow-accent/15"
? "bg-surface-sunken/95 border-2 border-accent/80 ring-2 ring-accent/30 shadow-lg shadow-blue-500/15"
: isSelected
? "bg-surface-sunken/95 border border-accent/70 ring-1 ring-accent/30 shadow-lg shadow-accent/10"
: "bg-surface-sunken/90 border border-line/80 hover:border-ink-soft/60 shadow-lg shadow-black/30 hover:shadow-xl hover:shadow-black/40"
? "bg-surface-sunken/95 border border-accent/70 ring-1 ring-accent/30 shadow-lg shadow-blue-500/10"
: "bg-surface-sunken/90 border border-line/80 hover:border-zinc-500/60 shadow-lg shadow-black/30 hover:shadow-xl hover:shadow-black/40"
}
backdrop-blur-sm
focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:ring-offset-1 focus-visible:ring-offset-surface
focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950
${deploy.isActivelyProvisioning ? "mol-deploy-shimmer" : ""}
${deploy.isLockedChild ? "mol-deploy-locked" : ""}
`}
@@ -208,45 +212,27 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
}
}
}}
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-top-0.5 hover:!bg-accent hover:!h-1.5 focus-visible:!bg-accent focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface transition-all"
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-top-0.5 hover:!bg-blue-400 hover:!h-1.5 focus-visible:!bg-blue-400 focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-blue-400/60 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950 transition-all"
/>
<div className="relative px-4 py-3.5">
<div className="relative px-3.5 py-2.5">
{/* Header row */}
<div className="flex items-center justify-between gap-2 mb-2.5">
<div className="flex items-center gap-2.5 min-w-0">
<div className={`w-2.5 h-2.5 rounded-full shrink-0 ${statusCfg.dot} ${statusCfg.glow} shadow-sm`} />
<span className="text-[15px] font-semibold text-ink truncate leading-tight">
<div className="flex items-center justify-between gap-2 mb-1">
<div className="flex items-center gap-2 min-w-0">
<div className={`w-2 h-2 rounded-full shrink-0 ${statusCfg.dot} ${statusCfg.glow} shadow-sm`} />
<span className="text-[13px] font-semibold text-ink truncate leading-tight">
{data.name}
</span>
</div>
<div className="flex items-center gap-1.5 shrink-0">
{/* Model pill (concept top-right). Shortens the agent_card model to
a family label (Opus/Sonnet/Haiku/Kimi); falls back to the raw
last segment, then to the tier badge when no model is known. */}
{(() => {
const m = (data.agentCard as Record<string, unknown> | null)?.model;
const model = typeof m === "string" && m ? m : null;
if (!model) {
return (
<span className={`text-[11px] font-mono px-2 py-1 rounded-md ${tierCfg.color}`}>
{tierCfg.label}
</span>
);
}
const label = /opus/i.test(model) ? "Opus"
: /sonnet/i.test(model) ? "Sonnet"
: /haiku/i.test(model) ? "Haiku"
: /kimi/i.test(model) ? "Kimi"
: /gpt|openai/i.test(model) ? "GPT"
: /gemini/i.test(model) ? "Gemini"
: (model.split(/[/:]/).pop() || model);
return (
<span className="text-[11px] font-mono px-2 py-1 rounded-md text-white bg-accent" title={model}>
{label}
</span>
);
})()}
{hasChildren && (
<span className="text-[10px] font-mono text-accent bg-accent/15 border border-accent/40 px-1.5 py-0.5 rounded-md">
{descendantCount} sub
</span>
)}
<span className={`text-[10px] font-mono px-1.5 py-0.5 rounded-md ${tierCfg.color}`}>
{tierCfg.label}
</span>
</div>
</div>
@@ -256,9 +242,6 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
We treat empty-string DB values as "missing" so an unbackfilled
row falls through to the agent-card value rather than rendering
a blank pill. */}
{/* Role pill (concept) uppercase, accent-bordered. Platform root
shows "PLATFORM · ROOT"; Phase 30 external-runtime agents get the
REMOTE marker alongside. */}
{(() => {
const dbRuntime = typeof data.runtime === "string" && data.runtime !== ""
? data.runtime : null;
@@ -266,46 +249,32 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
? (data.agentCard as Record<string, string>).runtime
: null;
const runtime = dbRuntime ?? cardRuntime;
const isRemote = !!runtime && isExternalLikeRuntime(runtime);
const isPlatformRoot = !data.parentId && hasChildren;
const roleLabel = isPlatformRoot ? "PLATFORM · ROOT" : (data.role || null);
if (!roleLabel && !isRemote) return null;
if (!runtime) return null;
return (
<div className="mb-2.5 flex items-center gap-1.5">
{roleLabel && (
<span className="max-w-[220px] truncate text-[10px] font-mono uppercase tracking-[0.04em] px-2 py-1 rounded-md text-accent bg-accent/12 border border-accent/35">
{roleLabel}
</span>
)}
{isRemote && (
<div className="mb-1 flex items-center gap-1">
{isExternalLikeRuntime(runtime) ? (
<span
className="text-[10px] font-mono uppercase px-2 py-1 rounded-md text-white bg-violet-800 border border-violet-900"
className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-white bg-violet-800 border border-violet-900"
title="Phase 30 remote agent — runs outside this platform's Docker network. Lifecycle managed via heartbeat-based polling, not Docker exec."
>
REMOTE
</span>
) : (
<span className="text-[7px] font-mono px-1.5 py-0.5 rounded-md text-ink-mid bg-surface-card border border-line">
{runtime}
</span>
)}
</div>
);
})()}
{/* Status line (concept) uppercase status, "· N AGENTS" for parents,
with a queued pill on the right. */}
<div className="mb-2 flex items-center justify-between gap-2">
<span className={`text-[11px] font-mono uppercase tracking-[0.04em] ${
isOnline ? "text-good"
: effectiveStatus === "failed" ? "text-bad"
: (effectiveStatus === "provisioning" || effectiveStatus === "degraded") ? "text-warm"
: "text-ink-soft"
}`}>
{statusCfg.label}{hasChildren ? ` · ${descendantCount} agents` : ""}
</span>
{data.activeTasks > 0 && (
<span className="shrink-0 text-[11px] font-mono px-2 py-1 rounded-md text-ink-mid bg-surface-card border border-line">
{data.activeTasks} queued
</span>
)}
</div>
{/* Role clamp to 2 lines. Without this, a verbose role
* description (common on org-template imports) lets the card
* grow arbitrarily tall, which wrecks the grid-slot layout
* because siblings all plan for the same CHILD_DEFAULT_HEIGHT. */}
{data.role && (
<div className="text-[10px] text-ink-mid mb-1.5 leading-tight line-clamp-2">{data.role}</div>
)}
{/* Skills */}
{skills.length > 0 && (
@@ -359,7 +328,29 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
</button>
)}
{/* (status + queued now rendered above, concept-style) */}
{/* Bottom row: status / active tasks */}
<div className="flex items-center justify-between mt-0.5">
{effectiveStatus !== "online" ? (
<div className={`text-[10px] uppercase tracking-widest font-medium ${
effectiveStatus === "failed" ? "text-bad" :
effectiveStatus === "degraded" ? "text-warm" :
effectiveStatus === "not_configured" ? "text-warm" :
effectiveStatus === "provisioning" ? "text-accent" :
"text-ink-mid"
}`}>
{statusCfg.label}
</div>
) : <div />}
{data.activeTasks > 0 && (
<div className="flex items-center gap-1">
<div className="w-1 h-1 rounded-full bg-warm motion-safe:animate-pulse" />
<span className="text-[10px] text-warm tabular-nums">
{data.activeTasks} task{data.activeTasks > 1 ? "s" : ""}
</span>
</div>
)}
</div>
{/* Degraded error preview */}
{data.status === "degraded" && data.lastSampleError && (
@@ -404,7 +395,7 @@ export function WorkspaceNode({ id, data }: NodeProps<Node<WorkspaceNodeData>>)
}
}
}}
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-bottom-0.5 hover:!bg-accent hover:!h-1.5 focus-visible:!bg-accent focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent/60 focus-visible:ring-offset-1 focus-visible:ring-offset-surface transition-all"
className="!w-2.5 !h-1 !rounded-full !bg-surface-card/80 !border-0 !-bottom-0.5 hover:!bg-blue-400 hover:!h-1.5 focus-visible:!bg-blue-400 focus-visible:!h-1.5 focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-blue-400/60 focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-950 transition-all"
/>
</div>
</>
@@ -1,195 +0,0 @@
"use client";
import { useState } from "react";
import type { Node } from "@xyflow/react";
import {
useCanvasStore,
type PanelTab,
type WorkspaceNodeData,
} from "@/store/canvas";
import { showToast } from "@/components/Toaster";
import { Tooltip } from "./Tooltip";
import { DetailsTab } from "./tabs/DetailsTab";
import { SkillsTab } from "./tabs/SkillsTab";
import { ChatTab } from "./tabs/ChatTab";
import { ConfigTab } from "./tabs/ConfigTab";
import { ContainerConfigTab } from "./tabs/ContainerConfigTab";
import { DisplayTab } from "./tabs/DisplayTab";
import { TerminalTab } from "./tabs/TerminalTab";
import { FilesTab } from "./tabs/FilesTab";
import { MemoryInspectorPanel } from "./MemoryInspectorPanel";
import { AuditTrailPanel } from "./AuditTrailPanel";
import { TracesTab } from "./tabs/TracesTab";
import { EventsTab } from "./tabs/EventsTab";
import { ActivityTab } from "./tabs/ActivityTab";
import { ScheduleTab } from "./tabs/ScheduleTab";
import { ChannelsTab } from "./tabs/ChannelsTab";
/**
* Canonical workspace tab set the SAME ids/labels/icons the map's
* SidePanel has always rendered. Single source of truth so the map drawer
* and any other host (the concierge Settings page) can't drift.
*/
export const WORKSPACE_PANEL_TABS: { id: PanelTab; label: string; icon: string }[] = [
{ id: "chat", label: "Chat", icon: "◈" },
{ id: "activity", label: "Activity", icon: "⊙" },
{ id: "details", label: "Details", icon: "◉" },
{ id: "skills", label: "Plugins", icon: "✦" },
{ id: "terminal", label: "Terminal", icon: "▸" },
{ id: "display", label: "Display", icon: "▣" },
{ id: "container-config", label: "Container", icon: "▤" },
{ id: "config", label: "Config", icon: "⚙" },
{ id: "schedule", label: "Schedule", icon: "⏲" },
{ id: "channels", label: "Channels", icon: "⇌" },
{ id: "files", label: "Files", icon: "⊞" },
{ id: "memory", label: "Memory", icon: "◇" },
{ id: "traces", label: "Traces", icon: "◎" },
{ id: "events", label: "Events", icon: "◊" },
{ id: "audit", label: "Audit", icon: "⊟" },
];
interface Props {
/** The workspace node whose tabs to render (id + data blob). */
node: Node<WorkspaceNodeData>;
/**
* Controlled active tab. When provided together with `onTabChange`, the
* caller owns the active-tab state (the map's SidePanel threads the global
* `panelTab`/`setPanelTab` here so the store stays the source of truth and
* the existing keyboard/selection behaviour is preserved verbatim).
* When omitted, the component manages its OWN local active-tab state
* which is what the concierge Settings page uses so the embedded tabs
* don't fight the map's selection.
*/
activeTab?: PanelTab;
onTabChange?: (tab: PanelTab) => void;
/** Initial tab for the uncontrolled (local-state) mode. Defaults to "chat". */
defaultTab?: PanelTab;
}
/**
* The workspace tab bar + tab body, extracted from SidePanel so it can be
* reused verbatim outside the map (e.g. the concierge Settings "Platform
* agent configuration" section). Renders the canonical ARIA tablist and the
* exact same tab content components keyed on the active tab.
*
* Does NOT render the workspace header / meta pills / resize handle / footer
* those are host chrome and stay in the host (SidePanel for the map).
*/
export function WorkspacePanelTabs({ node, activeTab, onTabChange, defaultTab = "chat" }: Props) {
const restartWorkspace = useCanvasStore((s) => s.restartWorkspace);
// Controlled when both props are present; otherwise own the state locally.
const controlled = activeTab !== undefined && onTabChange !== undefined;
const [localTab, setLocalTab] = useState<PanelTab>(defaultTab);
const tab = controlled ? (activeTab as PanelTab) : localTab;
const setTab = (next: PanelTab) => {
if (controlled) onTabChange!(next);
else setLocalTab(next);
};
const workspaceId = node.id;
const data = node.data;
return (
<>
{/* Tabs — relative wrapper lets the fade gradient position against the scroll container */}
<div className="relative border-b border-line/40">
{/* Right-edge fade: signals more tabs are hidden off-screen when the bar overflows */}
<div className="pointer-events-none absolute inset-y-0 right-0 w-8 bg-gradient-to-l from-surface to-transparent z-10" aria-hidden="true" />
<div
role="tablist"
aria-label="Workspace panel tabs"
className="flex overflow-x-auto bg-surface-sunken/20 px-1"
onKeyDown={(e) => {
const idx = WORKSPACE_PANEL_TABS.findIndex((t) => t.id === tab);
let next: number | null = null;
if (e.key === "ArrowRight") { e.preventDefault(); next = (idx + 1) % WORKSPACE_PANEL_TABS.length; }
else if (e.key === "ArrowLeft") { e.preventDefault(); next = (idx - 1 + WORKSPACE_PANEL_TABS.length) % WORKSPACE_PANEL_TABS.length; }
else if (e.key === "Home") { e.preventDefault(); next = 0; }
else if (e.key === "End") { e.preventDefault(); next = WORKSPACE_PANEL_TABS.length - 1; }
if (next !== null) {
setTab(WORKSPACE_PANEL_TABS[next].id);
requestAnimationFrame(() => { const el = document.getElementById(`tab-${WORKSPACE_PANEL_TABS[next!].id}`); el?.focus(); el?.scrollIntoView({ block: "nearest", inline: "nearest" }); });
}
}}
>
{WORKSPACE_PANEL_TABS.map((t) => (
<button
type="button"
key={t.id}
id={`tab-${t.id}`}
role="tab"
aria-selected={tab === t.id}
aria-controls={`panel-${t.id}`}
tabIndex={tab === t.id ? 0 : -1}
onClick={() => setTab(t.id)}
className={`shrink-0 px-3 py-2.5 text-[10px] font-medium tracking-wide transition-all rounded-t-lg mx-0.5 focus:outline-none focus-visible:ring-2 focus-visible:ring-accent/70 ${
tab === t.id
? "text-ink bg-surface-card border-b-2 border-accent"
: "text-ink-mid hover:text-ink hover:bg-surface-card/60"
}`}
>
<span className="mr-1 opacity-50" aria-hidden="true">{t.icon}</span>
{t.label}
</button>
))}
</div>
</div>
{/* Needs Restart Banner */}
{data.needsRestart && !data.currentTask && (
<div className="px-4 py-2 bg-sky-950/20 border-b border-sky-800/20 flex items-center justify-between">
<span className="text-[10px] text-sky-300/90">Config changed restart to apply</span>
<button
type="button"
onClick={() => {
restartWorkspace(workspaceId).catch(() => showToast("Restart failed", "error"));
}}
className="text-[11px] px-2 py-1 bg-sky-800/40 hover:bg-sky-700/50 text-sky-200 rounded transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1"
>
Restart Now
</button>
</div>
)}
{/* Current Task Banner */}
{data.currentTask && (
<Tooltip text={data.currentTask as string}>
<div className="px-4 py-2 bg-amber-950/20 border-b border-amber-800/20 flex items-center gap-2 cursor-default">
<div className="w-1.5 h-1.5 rounded-full bg-amber-400 motion-safe:animate-pulse shrink-0" />
<span className="text-[10px] text-warm/90 truncate">
{data.currentTask}
</span>
</div>
</Tooltip>
)}
{/* Tab Content */}
<div
role="tabpanel"
id={`panel-${tab}`}
aria-labelledby={`tab-${tab}`}
tabIndex={0}
className="flex-1 overflow-y-auto focus:outline-none"
>
{tab === "details" && <DetailsTab key={workspaceId} workspaceId={workspaceId} data={data} />}
{tab === "skills" && <SkillsTab key={workspaceId} workspaceId={workspaceId} data={data} />}
{tab === "activity" && <ActivityTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "chat" && <ChatTab key={workspaceId} workspaceId={workspaceId} data={data} />}
{tab === "terminal" && <TerminalTab key={workspaceId} workspaceId={workspaceId} data={data} />}
{tab === "display" && <DisplayTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "container-config" && (
<ContainerConfigTab key={workspaceId} workspaceId={workspaceId} data={data} />
)}
{tab === "config" && <ConfigTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "schedule" && <ScheduleTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "channels" && <ChannelsTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "files" && <FilesTab key={workspaceId} workspaceId={workspaceId} data={data} />}
{tab === "memory" && <MemoryInspectorPanel key={workspaceId} workspaceId={workspaceId} />}
{tab === "traces" && <TracesTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "events" && <EventsTab key={workspaceId} workspaceId={workspaceId} />}
{tab === "audit" && <AuditTrailPanel key={workspaceId} workspaceId={workspaceId} />}
</div>
</>
);
}
@@ -385,7 +385,7 @@ describe("ContextMenu — item actions", () => {
render(<ContextMenu />);
fireEvent.click(screen.getByRole("menuitem", { name: /pause/i }));
await act(async () => { /* flush */ });
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause?cascade=true", {});
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/pause", {});
expect(mockStoreState.updateNodeData).toHaveBeenCalledWith("n1", { status: "paused" });
});
@@ -395,7 +395,7 @@ describe("ContextMenu — item actions", () => {
render(<ContextMenu />);
fireEvent.click(screen.getByRole("menuitem", { name: /resume/i }));
await act(async () => { /* flush */ });
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume?cascade=true", {});
expect(mockPost).toHaveBeenCalledWith("/workspaces/n1/resume", {});
});
});
@@ -1,84 +0,0 @@
// @vitest-environment jsdom
//
// SaaS-mode coverage for the per-workspace cloud-provider picker. The main
// CreateWorkspaceDialog.test.tsx runs non-SaaS (the picker is hidden and the
// payload omits `provider`); this file forces SaaS by mocking isSaaSTenant so
// the picker renders and the selected provider flows into compute.provider.
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
vi.mock("@/lib/api", () => ({
api: { get: vi.fn(), post: vi.fn() },
}));
// Force SaaS so the Cloud provider picker is shown and the payload carries it.
vi.mock("@/lib/tenant", async (importOriginal) => ({
...(await importOriginal<typeof import("@/lib/tenant")>()),
isSaaSTenant: () => true,
}));
import { api } from "@/lib/api";
const mockGet = vi.mocked(api.get);
const mockPost = vi.mocked(api.post);
const SAMPLE_TEMPLATES = [
{
id: "claude-code-default",
name: "Claude Code Agent",
runtime: "claude-code",
model: "moonshot/kimi-k2.6",
providers: ["platform", "minimax"],
models: [{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] }],
},
];
beforeEach(() => {
vi.clearAllMocks();
mockGet.mockImplementation(async (url: string) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
if (url === "/templates") return SAMPLE_TEMPLATES as any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return [] as any;
});
// eslint-disable-next-line @typescript-eslint/no-explicit-any
mockPost.mockResolvedValue({} as any);
});
afterEach(() => cleanup());
async function openDialog() {
render(<CreateWorkspaceButton />);
const btn = screen.getAllByRole("button").find((b) => b.textContent?.includes("New Workspace"));
fireEvent.click(btn!);
await waitFor(() => expect(screen.getByText("Create Workspace")).toBeTruthy());
}
describe("CreateWorkspaceDialog — cloud provider (SaaS)", () => {
it("shows the Cloud provider picker, defaulting to AWS", async () => {
await openDialog();
const select = screen.getByLabelText("Cloud provider") as HTMLSelectElement;
expect(select).toBeTruthy();
expect(select.value).toBe("aws");
});
it("defaults compute.provider to aws when the picker is untouched", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "AWS Agent" } });
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.compute).toMatchObject({ provider: "aws" });
});
it("threads the selected cloud provider into compute.provider", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), { target: { value: "GCP Agent" } });
fireEvent.change(screen.getByLabelText("Cloud provider"), { target: { value: "gcp" } });
fireEvent.click(screen.getAllByRole("button").find((b) => b.textContent === "Create")!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.compute).toMatchObject({ provider: "gcp" });
});
});
@@ -1,175 +0,0 @@
// @vitest-environment jsdom
/**
* Regression tests for #2248 platform-managed provider credential suppression.
*
* Covers:
* - MOLECULE_LLM_USAGE_TOKEN is hidden when the selected provider is platform-managed
* - MOLECULE_LLM_USAGE_TOKEN is still shown for BYOK providers
* - No render churn from unstable array references (useMemo guard)
*/
import { describe, it, expect, vi, afterEach } from "vitest";
import { render, screen, fireEvent, cleanup, waitFor, act } from "@testing-library/react";
import { MissingKeysModal } from "../MissingKeysModal";
import type { ModelSpec, ProviderChoice } from "@/lib/deploy-preflight";
vi.mock("@/lib/api", () => ({
api: { get: vi.fn(), put: vi.fn() },
}));
vi.mock("@/lib/deploy-preflight", async () => {
const actual = await vi.importActual<typeof import("@/lib/deploy-preflight")>(
"@/lib/deploy-preflight",
);
return actual;
});
const PLATFORM_MANAGED_MODELS: ModelSpec[] = [
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
];
const BYOK_MODELS: ModelSpec[] = [
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
];
function makeProviders(billingMode: "platform_managed" | "byok"): ProviderChoice[] {
const main = {
id: billingMode === "platform_managed" ? "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" : "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
label: billingMode === "platform_managed" ? "Platform Anthropic" : "BYOK Anthropic",
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billingMode,
};
// Need ≥2 providers so MissingKeysModal enters picker mode (pickerMode = providers.length > 1).
const dummy = {
id: "openai|OPENAI_API_KEY",
label: "OpenAI",
envVars: ["OPENAI_API_KEY"],
};
return [main, dummy];
}
describe("ProviderPickerModal — platform-managed suppression (#2248)", () => {
afterEach(() => cleanup());
it("hides MOLECULE_LLM_USAGE_TOKEN when provider is platform-managed", () => {
render(
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={makeProviders("platform_managed")}
models={PLATFORM_MANAGED_MODELS}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>,
);
// Only ANTHROPIC_API_KEY should be rendered; MOLECULE_LLM_USAGE_TOKEN suppressed
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
});
it("shows MOLECULE_LLM_USAGE_TOKEN when provider is BYOK", () => {
render(
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={makeProviders("byok")}
models={BYOK_MODELS}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>,
);
// Both keys visible for BYOK
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
});
it("does not churn renders when the modal is open and platform-managed", () => {
let renderCount = 0;
function RenderSpy({ children }: { children: React.ReactNode }) {
renderCount++;
return <>{children}</>;
}
render(
<RenderSpy>
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={makeProviders("platform_managed")}
models={PLATFORM_MANAGED_MODELS}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>
</RenderSpy>,
);
const countAfterInitial = renderCount;
// Wait a tick — if useEffect were looping, renderCount would climb.
// In jsdom without real timers there's no automatic re-render, so we
// just assert the count is stable immediately after the single
// commit required by the initial open state.
expect(renderCount).toBe(countAfterInitial);
expect(renderCount).toBeLessThanOrEqual(2); // StrictMode double-render ceiling
});
it("updates suppression correctly when switching from BYOK to platform-managed", async () => {
const providers: ProviderChoice[] = [
{
id: "anthropic|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
label: "BYOK Anthropic",
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billingMode: "byok",
},
{
id: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN",
label: "Platform Anthropic",
envVars: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
billingMode: "platform_managed",
},
{
id: "openai|OPENAI_API_KEY",
label: "OpenAI",
envVars: ["OPENAI_API_KEY"],
},
];
const models: ModelSpec[] = [
{ id: "byok-claude", provider: "anthropic", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
{ id: "platform-claude", provider: "platform", required_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"] },
];
render(
<MissingKeysModal
open
missingKeys={["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"]}
providers={providers}
models={models}
runtime="claude-code"
onKeysAdded={vi.fn()}
onCancel={vi.fn()}
/>,
);
// Default selection is providers[0] (BYOK) — both keys visible
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
expect(screen.getByText("MOLECULE_LLM_USAGE_TOKEN")).toBeTruthy();
// Switch to platform-managed provider
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
act(() => {
fireEvent.change(providerSelect, {
target: { value: "platform|ANTHROPIC_API_KEY|MOLECULE_LLM_USAGE_TOKEN" },
});
});
// MOLECULE_LLM_USAGE_TOKEN should now be suppressed
await waitFor(() => {
expect(screen.getByText("ANTHROPIC_API_KEY")).toBeTruthy();
});
expect(screen.queryByText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
});
});
@@ -275,9 +275,9 @@ describe("WorkspaceNode — status states", () => {
expect(screen.getByText("STARTING")).toBeTruthy();
});
it("shows status label for online node (concept: status always visible)", () => {
it("suppresses status label for online node", () => {
renderNode({ status: "online" });
expect(screen.getByText("ONLINE")).toBeTruthy();
expect(screen.queryByText("ONLINE")).toBeNull();
});
it("shows degraded error preview when status is degraded and lastSampleError is set", () => {
@@ -404,18 +404,14 @@ describe("WorkspaceNode — double-click interactions", () => {
});
describe("WorkspaceNode — active tasks", () => {
it("shows the queued count when activeTasks > 0", () => {
it("shows active tasks badge when activeTasks > 0", () => {
renderNode({ activeTasks: 3 });
expect(
screen.getByText((_, el) => el?.tagName === "SPAN" && (el.textContent ?? "").includes("3 queued")),
).toBeTruthy();
expect(screen.getByText("3 tasks")).toBeTruthy();
});
it("shows the queued count for a single task", () => {
it("shows singular 'task' when activeTasks is 1", () => {
renderNode({ activeTasks: 1 });
expect(
screen.getByText((_, el) => el?.tagName === "SPAN" && (el.textContent ?? "").includes("1 queued")),
).toBeTruthy();
expect(screen.getByText("1 task")).toBeTruthy();
});
it("suppresses badge when no active tasks", () => {
@@ -475,15 +471,13 @@ describe("WorkspaceNode — needs restart", () => {
});
describe("WorkspaceNode — descendant badge", () => {
it("shows the agent count in the status line when node has children", () => {
it("shows descendant count badge when node has children in store", () => {
store().nodes = [
makeNode({ id: "ws-1" }),
{ id: "child-1", data: { ...makeNode({ id: "ws-1" }).data, parentId: "ws-1" } },
];
renderNode();
expect(
screen.getByText((_, el) => el?.tagName === "SPAN" && (el.textContent ?? "").includes("1 agents")),
).toBeTruthy();
expect(screen.getByText("1 sub")).toBeTruthy();
});
it("suppresses badge when node has no children", () => {
@@ -533,9 +527,9 @@ describe("WorkspaceNode — skills pills", () => {
});
describe("WorkspaceNode — runtime badge", () => {
it("shows the role pill (runtime pill replaced by role pill in the concept redesign)", () => {
renderNode({ role: "researcher" });
expect(screen.getByText("researcher")).toBeTruthy();
it("shows runtime badge when runtime is set", () => {
renderNode({ runtime: "hermes" });
expect(screen.getByText("hermes")).toBeTruthy();
});
it("shows REMOTE badge for external runtime", () => {
@@ -1,103 +0,0 @@
// @vitest-environment jsdom
import { describe, it, expect, vi, afterEach } from "vitest";
import { render, screen, fireEvent, cleanup } from "@testing-library/react";
afterEach(() => {
cleanup();
});
// ── Mock every tab content component to a sentinel so we can assert which
// body renders without dragging in API calls / heavy children. ───────────
vi.mock("../tabs/DetailsTab", () => ({ DetailsTab: () => <div data-testid="body-details" /> }));
vi.mock("../tabs/SkillsTab", () => ({ SkillsTab: () => <div data-testid="body-skills" /> }));
vi.mock("../tabs/ChatTab", () => ({ ChatTab: () => <div data-testid="body-chat" /> }));
vi.mock("../tabs/ConfigTab", () => ({ ConfigTab: () => <div data-testid="body-config" /> }));
vi.mock("../tabs/ContainerConfigTab", () => ({ ContainerConfigTab: () => <div data-testid="body-container" /> }));
vi.mock("../tabs/DisplayTab", () => ({ DisplayTab: () => <div data-testid="body-display" /> }));
vi.mock("../tabs/TerminalTab", () => ({ TerminalTab: () => <div data-testid="body-terminal" /> }));
vi.mock("../tabs/FilesTab", () => ({ FilesTab: () => <div data-testid="body-files" /> }));
vi.mock("../MemoryInspectorPanel", () => ({ MemoryInspectorPanel: () => <div data-testid="body-memory" /> }));
vi.mock("../tabs/TracesTab", () => ({ TracesTab: () => <div data-testid="body-traces" /> }));
vi.mock("../tabs/EventsTab", () => ({ EventsTab: () => <div data-testid="body-events" /> }));
vi.mock("../tabs/ActivityTab", () => ({ ActivityTab: () => <div data-testid="body-activity" /> }));
vi.mock("../tabs/ScheduleTab", () => ({ ScheduleTab: () => <div data-testid="body-schedule" /> }));
vi.mock("../tabs/ChannelsTab", () => ({ ChannelsTab: () => <div data-testid="body-channels" /> }));
vi.mock("../AuditTrailPanel", () => ({ AuditTrailPanel: () => <div data-testid="body-audit" /> }));
vi.mock("../Tooltip", () => ({
Tooltip: ({ children }: { children: React.ReactNode }) => <>{children}</>,
}));
vi.mock("@/components/Toaster", () => ({ showToast: vi.fn() }));
// The store is only consulted for restartWorkspace.
const mockRestart = vi.fn(() => Promise.resolve());
vi.mock("@/store/canvas", () => ({
useCanvasStore: vi.fn((selector: (s: { restartWorkspace: typeof mockRestart }) => unknown) =>
selector({ restartWorkspace: mockRestart })
),
}));
import { WorkspacePanelTabs, WORKSPACE_PANEL_TABS } from "../WorkspacePanelTabs";
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const node: any = {
id: "platform-1",
data: {
name: "Org Concierge",
status: "online",
tier: 0,
role: "platform",
parentId: null,
needsRestart: false,
currentTask: null,
agentCard: null,
},
};
describe("WorkspacePanelTabs — uncontrolled (Settings usage)", () => {
it("renders the canonical 15-tab tablist for an explicit node", () => {
render(<WorkspacePanelTabs node={node} />);
const tablist = screen.getByRole("tablist");
expect(tablist.getAttribute("aria-label")).toBe("Workspace panel tabs");
expect(screen.getAllByRole("tab").length).toBe(WORKSPACE_PANEL_TABS.length);
expect(WORKSPACE_PANEL_TABS.length).toBe(15);
});
it("defaults to the chat tab when no defaultTab is given", () => {
render(<WorkspacePanelTabs node={node} />);
expect(screen.getByTestId("body-chat")).toBeTruthy();
expect(document.getElementById("tab-chat")?.getAttribute("aria-selected")).toBe("true");
});
it("honours defaultTab='config' (the concierge Settings entry point)", () => {
render(<WorkspacePanelTabs node={node} defaultTab="config" />);
expect(screen.getByTestId("body-config")).toBeTruthy();
expect(document.getElementById("tab-config")?.getAttribute("aria-selected")).toBe("true");
});
it("clicking a tab swaps the body using local state (no store panelTab)", () => {
render(<WorkspacePanelTabs node={node} />);
fireEvent.click(document.getElementById("tab-channels")!);
expect(screen.getByTestId("body-channels")).toBeTruthy();
expect(document.getElementById("tab-channels")?.getAttribute("aria-selected")).toBe("true");
});
});
describe("WorkspacePanelTabs — controlled (SidePanel usage)", () => {
it("renders activeTab and calls onTabChange instead of local state", () => {
const onTabChange = vi.fn();
render(<WorkspacePanelTabs node={node} activeTab="details" onTabChange={onTabChange} />);
expect(screen.getByTestId("body-details")).toBeTruthy();
fireEvent.click(document.getElementById("tab-config")!);
expect(onTabChange).toHaveBeenCalledWith("config");
// Controlled: body does NOT change on its own (parent owns the state).
expect(screen.getByTestId("body-details")).toBeTruthy();
});
it("ArrowRight from chat calls onTabChange with the next tab", () => {
const onTabChange = vi.fn();
render(<WorkspacePanelTabs node={node} activeTab="chat" onTabChange={onTabChange} />);
fireEvent.keyDown(screen.getByRole("tablist"), { key: "ArrowRight" });
expect(onTabChange).toHaveBeenCalledWith("activity");
});
});
@@ -188,13 +188,11 @@ describe("DropTargetBadge — renders ghost slot + badge for valid drag target",
});
render(<DropTargetBadge />);
expect(screen.getByTestId("ghost-slot")).toBeTruthy();
// Ghost spans one default child slot at zoom 2: width = CHILD_DEFAULT_WIDTH
// (300) × 2 = 600; height = CHILD_DEFAULT_HEIGHT (176) × 2 = 352. left/top
// are the column-0/row-0 slot origin (unchanged by the card-size bump).
// Ghost uses slotBR from 3rd call: slotBR - slotTL = (712-232, 920-660)
expect(screen.getByTestId("ghost-slot").style.left).toBe("232px");
expect(screen.getByTestId("ghost-slot").style.top).toBe("660px");
expect(screen.getByTestId("ghost-slot").style.width).toBe("600px");
expect(screen.getByTestId("ghost-slot").style.height).toBe("352px");
expect(screen.getByTestId("ghost-slot").style.width).toBe("480px");
expect(screen.getByTestId("ghost-slot").style.height).toBe("260px");
});
it("ghost is hidden when slot falls entirely outside parent bounds", () => {
@@ -325,7 +325,7 @@ describe("all shortcuts respect inInput guard", () => {
});
});
describe("Cmd/Ctrl+Arrow — free-resize removed (system-controlled sizing)", () => {
describe("Cmd/Ctrl+Arrow — keyboard node resize", () => {
beforeEach(() => {
mockStoreState.nodes = [
{
@@ -340,15 +340,81 @@ describe("Cmd/Ctrl+Arrow — free-resize removed (system-controlled sizing)", ()
renderWithProvider();
});
it("no longer resizes the node on Cmd/Ctrl+Arrow (free-resize removed)", () => {
// Sizing is system-controlled now: leaves render fixed-size and parents
// grow to fit their children, so Cmd/Ctrl+Arrow must not emit a
// `dimensions` change anymore.
it("resizes height down (smaller) on Cmd/Ctrl+ArrowUp", () => {
// Node starts at minHeight=110 (no children). Shrinking clamps to min —
// height stays 110. Width is unchanged.
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true });
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
expect.objectContaining({
type: "dimensions",
id: "n1",
dimensions: { width: 210, height: 110 },
}),
]);
});
it("resizes height up (larger) on Cmd/Ctrl+ArrowDown", () => {
fireEvent.keyDown(window, { key: "ArrowDown", ctrlKey: true });
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
expect.objectContaining({
type: "dimensions",
id: "n1",
dimensions: { width: 210, height: 120 },
}),
]);
});
it("resizes width down (smaller) on Cmd/Ctrl+ArrowLeft", () => {
// Node starts at minWidth=210 (no children). Shrinking clamps to min —
// width stays 210. Height is unchanged.
fireEvent.keyDown(window, { key: "ArrowLeft", metaKey: true });
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
expect.objectContaining({
type: "dimensions",
id: "n1",
dimensions: { width: 210, height: 110 },
}),
]);
});
it("resizes width up (larger) on Cmd/Ctrl+ArrowRight", () => {
fireEvent.keyDown(window, { key: "ArrowRight", ctrlKey: true });
expect(mockStoreState.onNodesChange).not.toHaveBeenCalled();
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
expect.objectContaining({
type: "dimensions",
id: "n1",
dimensions: { width: 220, height: 110 },
}),
]);
});
it("uses 2px step with Shift held", () => {
// Step is 2px with Shift, but minHeight=110 clamps the result.
// 110 - 2 = 108, Math.max(110, 108) = 110. Width is unchanged.
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true, shiftKey: true });
expect(mockStoreState.onNodesChange).toHaveBeenCalledWith([
expect.objectContaining({
dimensions: { width: 210, height: 110 },
}),
]);
});
it("respects min-height constraint (no children)", () => {
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true });
fireEvent.keyDown(window, { key: "ArrowUp", metaKey: true });
// After shrinking from 110 to 100, another ArrowUp hits min-height of 110
// (110 - 10 = 100, but 100 < 110 so it should stay at 110)
// Actually: 110 -> 100 -> 110 (resets to min)
// Let me check: the hook does Math.max(minHeight, currentHeight - step)
// minHeight=110, step=10, so 110 - 10 = 100, but Math.max(110, 100) = 110
// So two ArrowUp calls should both result in height=100 then height=110?
// Wait: 110 - 10 = 100, Math.max(110, 100) = 110 (not 100)
// So the height never goes below 110. After first: 110 -> 100, but clamped to 110.
// Actually Math.max(110, 100) = 110, so the height never changes.
// The min constraint is respected — height stays at 110.
expect(mockStoreState.onNodesChange).toHaveBeenLastCalledWith([
expect.objectContaining({ dimensions: { width: 210, height: 110 } }),
]);
});
it("does NOT fire when no node is selected", () => {
@@ -2,6 +2,13 @@
import { useEffect } from "react";
import { useCanvasStore } from "@/store/canvas";
import { type NodeChange, type Node } from "@xyflow/react";
import type { WorkspaceNodeData } from "@/store/canvas";
/** Returns true if the node has any direct child in the node list. */
function hasChildren(nodeId: string, nodes: Node<WorkspaceNodeData>[]): boolean {
return nodes.some((n) => n.data.parentId === nodeId);
}
/**
* Canvas-wide keyboard shortcuts. All bound to the document window so
@@ -15,9 +22,8 @@ import { useCanvasStore } from "@/store/canvas";
* Cmd/Ctrl+[ bump selected node backward in z-order
* Z zoom-to-team if the selected node has children
* Arrow keys move selected node 10px (50px with Shift)
*
* Node resize shortcuts were removed: container size + shape are now
* system-controlled (leaves fixed-size, parents grow to fit children).
* Cmd/Ctrl+Arrow resize selected node ( height, width)
* Cmd/Ctrl+Shift+Arrow resize by 2px per press (fine control)
*/
export function useKeyboardShortcuts() {
useEffect(() => {
@@ -90,8 +96,8 @@ export function useKeyboardShortcuts() {
// Arrow-key node movement — Figma-style keyboard drag for keyboard users.
// 10 px per press, 50 px with Shift held. Only fires when a node
// is selected and the target isn't a form control. Skipped when a
// modifier key (Cmd/Ctrl/Alt) is held so those combos stay free for
// browser/OS shortcuts (node resize via Cmd+Arrow was removed).
// modifier key (Cmd/Ctrl/Alt) is held so those combos can be used
// for other shortcuts (e.g. Cmd+Arrow = resize).
if (
!inInput &&
!e.metaKey &&
@@ -119,9 +125,43 @@ export function useKeyboardShortcuts() {
state.moveNode(selectedId, dx, dy);
}
// Node resize (was Cmd/Ctrl+Arrow) removed — container size + shape are
// now system-controlled: leaves render at a fixed size and parents grow
// to fit their children, so there is no user-driven resize affordance.
// Cmd/Ctrl+Arrow — keyboard-accessible node resize.
// ↑/↓ resizes height, ←/→ resizes width.
// 10 px per press (2 px with Shift for fine control).
// Uses the same onNodesChange('dimensions') path that NodeResizer uses.
if (
!inInput &&
(e.metaKey || e.ctrlKey) &&
(e.key === "ArrowUp" ||
e.key === "ArrowDown" ||
e.key === "ArrowLeft" ||
e.key === "ArrowRight")
) {
const state = useCanvasStore.getState();
const selectedId = state.selectedNodeId;
if (!selectedId) return;
if (document.querySelector('[role="dialog"][aria-modal="true"]')) return;
e.preventDefault();
const step = e.shiftKey ? 2 : 10;
const node = state.nodes.find((n) => n.id === selectedId);
if (!node) return;
const currentWidth = (node.width ?? 210) as number;
const currentHeight = (node.height ?? 110) as number;
const minWidth = hasChildren(node.id, state.nodes) ? 360 : 210;
const minHeight = hasChildren(node.id, state.nodes) ? 200 : 110;
let newWidth = currentWidth;
let newHeight = currentHeight;
if (e.key === "ArrowUp") newHeight = Math.max(minHeight, currentHeight - step);
else if (e.key === "ArrowDown") newHeight = currentHeight + step;
else if (e.key === "ArrowLeft") newWidth = Math.max(minWidth, currentWidth - step);
else newWidth = currentWidth + step;
const change: NodeChange = {
type: "dimensions",
id: selectedId,
dimensions: { width: newWidth, height: newHeight },
};
state.onNodesChange([change]);
}
};
window.addEventListener("keydown", handler);
return () => window.removeEventListener("keydown", handler);
@@ -1,339 +0,0 @@
/* Faithful port of the Org Concierge concept (molecule-concierge-v1).
Scoped under .root so the concept's generic class names (.btn, .view,
.msg, .node ) cannot collide with the rest of the canvas app. Theme
tokens are redefined here (not the app tokens) so the port matches the
concept palette exactly; they key off the same [data-theme] on <html>. */
.root {
--mono: "JetBrains Mono", ui-monospace, monospace;
--sans: var(--font-hanken), "Hanken Grotesk", system-ui, sans-serif;
/* dark (default) */
--bg: #08080a; --panel: #0d0d11; --panel-2: #101015;
--card: #16161d; --card-2: #1b1b23; --card-hover: #1f1f28;
--hair: rgba(255,255,255,.07); --hair-2: rgba(255,255,255,.11);
--tx: #ececf1; --tx-2: #9b9baa; --tx-3: #65656f;
--accent: #8b5cf6; --accent-2: #a78bfa; --accent-soft: rgba(139,92,246,.14);
--green: #34d399; --green-soft: rgba(52,211,153,.13); --green-bd: rgba(52,211,153,.26);
--amber: #fbbf24; --grey: #6a6a78; --warn: #f5a623; --red: #f87171;
--dot: rgba(255,255,255,.06);
--shadow: 0 18px 50px rgba(0,0,0,.5);
--user-bubble-tx: #fff;
font-family: var(--sans);
background: var(--bg);
color: var(--tx);
font-size: 14px;
-webkit-font-smoothing: antialiased;
position: fixed;
inset: 0;
overflow: hidden;
}
:global([data-theme="light"]) .root {
--bg: #f1efe8; --panel: #fbfaf6; --panel-2: #f6f4ee;
--card: #ffffff; --card-2: #faf9f4; --card-hover: #f3f1ea;
--hair: rgba(20,18,12,.10); --hair-2: rgba(20,18,12,.16);
--tx: #21201b; --tx-2: #5c5a52; --tx-3: #8e8b81;
--accent: #7c3aed; --accent-2: #7c3aed; --accent-soft: rgba(124,58,237,.10);
--green: #0f9d63; --green-soft: rgba(15,157,99,.10); --green-bd: rgba(15,157,99,.24);
--amber: #c98a04; --grey: #a8a59b; --warn: #c47e12; --red: #dc4d4d;
--dot: rgba(20,18,12,.10);
--shadow: 0 18px 50px rgba(60,56,40,.14);
}
.root *, .root *::before, .root *::after { box-sizing: border-box; }
.root ::-webkit-scrollbar { width: 8px; height: 8px; }
.root ::-webkit-scrollbar-thumb { background: var(--hair-2); border-radius: 8px; }
.root ::-webkit-scrollbar-track { background: transparent; }
.app { display: flex; height: 100%; width: 100%; }
/* ===== ICON RAIL ===== */
.rail {
width: 52px; flex: 0 0 52px; background: var(--panel);
border-right: 1px solid var(--hair);
display: flex; flex-direction: column; padding: 12px 8px; gap: 3px;
transition: width .22s cubic-bezier(.4,0,.2,1), flex-basis .22s cubic-bezier(.4,0,.2,1);
overflow: hidden;
}
.app.railOpen .rail { width: 212px; flex-basis: 212px; }
.railTop { display: flex; align-items: center; gap: 8px; height: 36px; margin-bottom: 8px; }
.logo {
width: 36px; height: 36px; flex: 0 0 36px; border-radius: 10px; display: grid; place-items: center; cursor: pointer;
background: linear-gradient(150deg,#7c3aed,#a78bfa);
box-shadow: 0 4px 14px rgba(124,58,237,.45), inset 0 1px 0 rgba(255,255,255,.25);
}
.railWordmark { font-weight: 700; font-size: 14.5px; letter-spacing: -.01em; white-space: nowrap; opacity: 0; transition: opacity .16s; pointer-events: none; }
.app.railOpen .railWordmark { opacity: 1; transition: opacity .18s .08s; }
.railToggle { margin-left: auto; width: 30px; height: 30px; flex: 0 0 30px; border-radius: 8px; display: grid; place-items: center; color: var(--tx-3); cursor: pointer; transition: .16s; border: none; background: none; }
.railToggle:hover { color: var(--tx); background: var(--hair); }
.railToggle svg { width: 18px; height: 18px; }
.app:not(.railOpen) .railToggle { display: none; }
.navbtn { height: 40px; border-radius: 10px; color: var(--tx-3); cursor: pointer; position: relative; transition: .16s; display: flex; align-items: center; gap: 12px; padding: 0; justify-content: flex-start; width: 100%; background: none; border: none; }
.app.railOpen .navbtn { padding: 0 11px; }
.navbtn .ico { width: 36px; flex: 0 0 36px; display: grid; place-items: center; }
.app.railOpen .navbtn .ico { width: 20px; flex: 0 0 20px; }
.navbtn .lbl { font-size: 13.5px; font-weight: 500; white-space: nowrap; opacity: 0; transition: opacity .16s; pointer-events: none; }
.app.railOpen .navbtn .lbl { opacity: 1; transition: opacity .18s .08s; }
.navbtn:hover { color: var(--tx-2); background: var(--hair); }
.navbtn.active { color: var(--accent-2); background: var(--accent-soft); }
.navbtn.active::before { content: ""; position: absolute; left: -8px; top: 50%; transform: translateY(-50%); width: 3px; height: 18px; border-radius: 0 3px 3px 0; background: var(--accent-2); }
.navbtn svg { width: 20px; height: 20px; }
.spacer { flex: 1; }
/* ===== MAIN ===== */
.main { flex: 1; display: flex; flex-direction: column; min-width: 0; }
.topbar { height: 56px; flex: 0 0 56px; border-bottom: 1px solid var(--hair); background: var(--panel); display: flex; align-items: center; justify-content: space-between; padding: 0 18px 0 20px; }
.org { display: flex; align-items: center; gap: 10px; cursor: pointer; padding: 6px 10px; border-radius: 9px; transition: .16s; margin-left: -6px; }
.org:hover { background: var(--hair); }
.orgBadge { width: 24px; height: 24px; border-radius: 7px; display: grid; place-items: center; background: linear-gradient(150deg,#2d2d36,#3a3a46); font-size: 12px; font-weight: 700; color: #d8d8e2; border: 1px solid var(--hair-2); }
:global([data-theme="light"]) .orgBadge { background: linear-gradient(150deg,#7c3aed,#a78bfa); color: #fff; border: none; }
.orgName { font-weight: 600; font-size: 14.5px; letter-spacing: -.01em; }
.chev { color: var(--tx-3); display: flex; }
.chev svg { width: 15px; height: 15px; }
.topbarRight { display: flex; align-items: center; gap: 10px; }
.iconPill { width: 34px; height: 34px; border-radius: 9px; display: grid; place-items: center; color: var(--tx-3); cursor: pointer; transition: .16s; border: none; background: none; }
.iconPill:hover { color: var(--tx-2); background: var(--hair); }
.iconPill svg { width: 18px; height: 18px; }
.themeToggle { width: 34px; height: 34px; border-radius: 9px; display: grid; place-items: center; color: var(--tx-2); cursor: pointer; transition: .16s; border: 1px solid var(--hair); background: none; }
.themeToggle:hover { background: var(--hair); color: var(--tx); }
.themeToggle svg { width: 17px; height: 17px; }
.avatar { width: 32px; height: 32px; border-radius: 50%; background: linear-gradient(150deg,#f0a36b,#e8638a); display: grid; place-items: center; font-weight: 700; font-size: 12.5px; color: #1a0d12; cursor: pointer; border: 1px solid rgba(255,255,255,.16); box-shadow: 0 2px 8px rgba(0,0,0,.3); margin-left: 4px; }
/* ===== VIEWS ===== */
.viewArea { flex: 1; min-height: 0; position: relative; }
.view { position: absolute; inset: 0; display: none; }
.view.active { display: flex; }
/* A transform turns this into the containing block for its position:fixed
descendants so the canvas's own overlays (Toolbar, Legend, Communications,
New Workspace, minimap) anchor to THIS box (the map view area, right of the
rail and below the topbar) instead of the viewport, and stop overlapping the
shell chrome. */
.canvasMount { position: absolute; inset: 0; transform: translateZ(0); overflow: hidden; }
/* ===== HOME VIEW ===== */
.homeSidebar { flex: 0 0 296px; max-width: 296px; background: var(--panel-2); border-right: 1px solid var(--hair); display: flex; flex-direction: column; min-height: 0; }
.sbTabs { display: flex; gap: 2px; padding: 12px 12px 0; border-bottom: 1px solid var(--hair); }
.sbTab { flex: 1; text-align: center; padding: 9px 4px 11px; font-size: 12.5px; font-weight: 600; color: var(--tx-3); cursor: pointer; position: relative; transition: .14s; border-radius: 8px 8px 0 0; border: none; background: none; }
.sbTab:hover { color: var(--tx-2); }
.sbTab.active { color: var(--tx); }
.sbTab.active::after { content: ""; position: absolute; left: 8px; right: 8px; bottom: -1px; height: 2px; border-radius: 2px; background: var(--accent); }
.cnt { font-family: var(--mono); font-size: 10px; font-weight: 600; margin-left: 5px; background: var(--hair); color: var(--tx-2); padding: 1px 5px; border-radius: 10px; }
.sbTab.active .cnt { background: var(--accent-soft); color: var(--accent-2); }
.sbBody { flex: 1; overflow-y: auto; padding: 14px 12px; }
.wsList { display: flex; flex-direction: column; gap: 6px; }
.treeChildren { position: relative; padding-left: 22px; display: flex; flex-direction: column; gap: 6px; margin-top: 6px; }
.tnode { position: relative; display: flex; flex-direction: column; gap: 6px; }
.tnode::before { content: ""; position: absolute; left: -14px; top: -6px; width: 1.5px; height: calc(100% + 6px); background: var(--hair-2); }
.tnode.last::before { height: 33px; }
.tnode::after { content: ""; position: absolute; left: -14px; top: 27px; width: 14px; height: 1.5px; background: var(--hair-2); }
.ws { display: flex; align-items: center; gap: 11px; padding: 10px 11px; border-radius: 13px; cursor: pointer; border: 1px solid transparent; background: transparent; transition: .16s; position: relative; width: 100%; text-align: left; }
.ws:hover { background: var(--card); }
.ws.active { background: var(--accent-soft); border-color: rgba(139,92,246,.34); }
.wsAv { width: 34px; height: 34px; border-radius: 50%; flex: 0 0 34px; position: relative; display: grid; place-items: center; font-weight: 700; font-size: 12px; color: #0c0c10; box-shadow: inset 0 1px 0 rgba(255,255,255,.3); }
.wsAv .dot { position: absolute; right: -1px; bottom: -1px; width: 10px; height: 10px; border-radius: 50%; border: 2.5px solid var(--panel-2); }
.ws.active .wsAv .dot { border-color: var(--card); }
.wsMeta { min-width: 0; flex: 1; }
.wsName { font-weight: 600; font-size: 13.5px; letter-spacing: -.01em; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
.wsSub { display: flex; align-items: center; gap: 6px; margin-top: 1px; min-width: 0; }
.wsRole { font-family: var(--mono); font-size: 10.5px; color: var(--tx-3); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; min-width: 0; flex: 0 1 auto; }
.wsStatus { font-size: 10.5px; font-weight: 500; display: flex; align-items: center; gap: 4px; flex: 0 0 auto; }
.wsStatus .sdot { width: 6px; height: 6px; border-radius: 50%; }
.rootTag { margin-left: auto; font-family: var(--mono); font-size: 9px; letter-spacing: .1em; text-transform: uppercase; color: var(--accent-2); background: var(--accent-soft); padding: 3px 6px; border-radius: 6px; border: 1px solid rgba(139,92,246,.28); }
.wsQ { margin-left: auto; flex: 0 0 auto; font-family: var(--mono); font-size: 10px; font-weight: 700; color: var(--tx-2); background: var(--hair); border: 1px solid var(--hair-2); padding: 2px 7px; border-radius: 20px; display: inline-flex; align-items: center; gap: 4px; }
.wsQ svg { width: 9px; height: 9px; color: var(--tx-3); }
.wsQ.zero { color: var(--tx-3); opacity: .65; }
.wsCaret { flex: 0 0 auto; width: 20px; height: 20px; margin-left: 4px; border: none; background: none; color: var(--tx-3); cursor: pointer; display: grid; place-items: center; border-radius: 6px; transition: .14s; }
.wsCaret:hover { background: var(--hair); color: var(--tx); }
.wsCaret svg { width: 13px; height: 13px; }
.sbSection { font-size: 11px; font-weight: 600; letter-spacing: .12em; text-transform: uppercase; color: var(--tx-3); font-family: var(--mono); padding: 18px 4px 10px; }
/* tasks */
.task { display: flex; flex-direction: column; align-items: stretch; gap: 0; padding: 11px; border-radius: 12px; border: 1px solid var(--hair); background: var(--card); margin-bottom: 7px; }
.taskRow { display: flex; gap: 11px; }
.taskIc { width: 28px; height: 28px; border-radius: 8px; flex: 0 0 28px; display: grid; place-items: center; }
.taskIc svg { width: 15px; height: 15px; }
.taskIc.done { background: var(--green-soft); color: var(--green); border: 1px solid var(--green-bd); }
.taskIc.run { background: rgba(245,166,35,.12); color: var(--amber); border: 1px solid rgba(245,166,35,.28); }
.taskIc.sched { background: var(--accent-soft); color: var(--accent-2); border: 1px solid rgba(139,92,246,.26); }
.taskMeta { flex: 1; min-width: 0; }
.taskT { font-size: 13px; font-weight: 600; letter-spacing: -.01em; line-height: 1.35; }
.taskS { font-size: 11px; color: var(--tx-3); margin-top: 3px; display: flex; align-items: center; gap: 6px; }
.taskS .pip { width: 4px; height: 4px; border-radius: 50%; background: var(--tx-3); }
.taskActions { display: flex; gap: 7px; margin-top: 11px; padding-left: 39px; }
.tbtn { font-family: var(--sans); font-size: 11.5px; font-weight: 600; cursor: pointer; padding: 5px 12px; border-radius: 8px; border: 1px solid var(--hair-2); background: var(--card-2); color: var(--tx-2); transition: .14s; display: inline-flex; align-items: center; gap: 5px; }
.tbtn svg { width: 13px; height: 13px; }
.tbtn:hover { background: var(--card-hover); color: var(--tx); }
.tbtn.done { background: var(--green-soft); color: var(--green); border-color: var(--green-bd); }
.task.isDone .taskT { color: var(--tx-2); }
/* activity */
.act { display: flex; gap: 11px; padding: 6px 4px; }
.actTime { font-family: var(--mono); font-size: 10.5px; color: var(--tx-3); flex: 0 0 52px; padding-top: 1px; font-variant-numeric: tabular-nums; }
.actLine { position: relative; padding-left: 15px; flex: 1; }
.actLine::before { content: ""; position: absolute; left: 0; top: 6px; width: 6px; height: 6px; border-radius: 50%; background: var(--accent); }
.actLine.grn::before { background: var(--green); }
.actText { font-size: 12px; color: var(--tx-2); line-height: 1.45; }
.actText b { color: var(--tx); font-weight: 600; }
/* approvals */
.apprCard { background: var(--card); border: 1px solid var(--hair); border-radius: 14px; overflow: hidden; }
.apprRow { display: flex; align-items: flex-start; gap: 11px; padding: 13px; }
.apprIc { width: 30px; height: 30px; border-radius: 8px; flex: 0 0 30px; display: grid; place-items: center; background: rgba(239,68,68,.12); color: var(--red); border: 1px solid rgba(239,68,68,.22); }
.apprIc svg { width: 15px; height: 15px; }
.apprMeta { flex: 1; min-width: 0; }
.apprT { font-size: 13px; font-weight: 600; letter-spacing: -.01em; line-height: 1.35; }
.apprT code { font-family: var(--mono); font-size: 11px; color: var(--tx-2); background: var(--hair); padding: 1px 5px; border-radius: 5px; font-weight: 500; }
.apprS { font-size: 11px; color: var(--tx-3); margin-top: 3px; }
.apprActions { display: flex; gap: 7px; padding: 0 13px 13px; }
.empty { text-align: center; color: var(--tx-3); font-size: 12.5px; padding: 30px 16px; line-height: 1.6; }
.empty svg { width: 30px; height: 30px; margin-bottom: 10px; color: var(--tx-3); opacity: .6; }
/* buttons */
.btn { font-family: var(--sans); font-size: 12px; font-weight: 600; cursor: pointer; padding: 6px 13px; border-radius: 8px; border: 1px solid var(--hair-2); background: var(--card-2); color: var(--tx-2); transition: .14s; white-space: nowrap; }
.btn:hover { background: var(--card-hover); color: var(--tx); }
.btn.approve { background: var(--accent); color: #fff; border-color: transparent; box-shadow: 0 2px 10px rgba(124,58,237,.4); }
.btn.approve:hover { background: #9d6ef8; }
.btn.deny:hover { background: rgba(239,68,68,.14); color: var(--red); border-color: rgba(239,68,68,.3); }
.btn.flex { flex: 1; text-align: center; }
/* ===== CHAT ===== */
.chat { flex: 1; display: flex; flex-direction: column; min-width: 0; background: var(--bg); }
.chatHead { height: 56px; flex: 0 0 56px; border-bottom: 1px solid var(--hair); display: flex; align-items: center; gap: 12px; padding: 0 22px; background: var(--panel-2); }
.chAv { width: 30px; height: 30px; border-radius: 9px; display: grid; place-items: center; background: linear-gradient(150deg,#7c3aed,#a78bfa); color: #fff; box-shadow: 0 2px 8px rgba(124,58,237,.4); }
.chAv svg { width: 16px; height: 16px; }
.chMeta { flex: 1; }
.chTitle { font-size: 14.5px; font-weight: 600; letter-spacing: -.01em; }
.chSub { font-size: 11.5px; color: var(--tx-3); display: flex; align-items: center; gap: 6px; margin-top: 1px; }
.chSub .sdot { width: 6px; height: 6px; border-radius: 50%; background: var(--green); }
.chTools { display: flex; gap: 6px; }
.chatScroll { flex: 1; overflow-y: auto; padding: 30px 0; }
.chatInner { max-width: 720px; margin: 0 auto; padding: 0 28px; display: flex; flex-direction: column; gap: 22px; }
.msg { display: flex; gap: 13px; max-width: 100%; }
.msg.user { flex-direction: row-reverse; }
.msgAv { width: 30px; height: 30px; border-radius: 9px; flex: 0 0 30px; display: grid; place-items: center; font-weight: 700; font-size: 12px; }
.msg.user .msgAv { background: linear-gradient(150deg,#f0a36b,#e8638a); color: #1a0d12; }
.msg.bot .msgAv { background: linear-gradient(150deg,#7c3aed,#a78bfa); color: #fff; }
.msg.bot .msgAv svg { width: 16px; height: 16px; }
.bubbleWrap { display: flex; flex-direction: column; gap: 11px; min-width: 0; max-width: 560px; }
.msg.user .bubbleWrap { align-items: flex-end; }
.bubble { padding: 12px 15px; border-radius: 15px; font-size: 14px; line-height: 1.55; letter-spacing: -.005em; }
.msg.user .bubble { background: var(--accent); color: var(--user-bubble-tx); border-bottom-right-radius: 5px; box-shadow: 0 3px 14px rgba(124,58,237,.3); }
.msg.bot .bubble { background: var(--card); border: 1px solid var(--hair); border-bottom-left-radius: 5px; color: var(--tx); }
.bubble b { font-weight: 600; }
.actionCard { background: var(--card); border: 1px solid var(--hair); border-radius: 14px; padding: 13px 15px; display: flex; align-items: center; gap: 13px; width: 100%; }
.acIc { width: 34px; height: 34px; border-radius: 10px; flex: 0 0 34px; display: grid; place-items: center; background: var(--green-soft); border: 1px solid var(--green-bd); color: var(--green); }
.acIc svg { width: 18px; height: 18px; }
.acMeta { flex: 1; min-width: 0; }
.acLabel { font-family: var(--mono); font-size: 10px; letter-spacing: .1em; text-transform: uppercase; color: var(--tx-3); margin-bottom: 3px; }
.acTitle { font-size: 13.5px; font-weight: 600; letter-spacing: -.01em; display: flex; align-items: center; gap: 7px; flex-wrap: wrap; }
.acTitle .pill { font-family: var(--mono); font-size: 11px; font-weight: 500; color: var(--accent-2); white-space: nowrap; background: var(--accent-soft); padding: 2px 8px; border-radius: 6px; border: 1px solid rgba(139,92,246,.24); }
.acCheck { color: var(--green); display: flex; }
.acCheck svg { width: 18px; height: 18px; }
.reqCard { background: linear-gradient(180deg,rgba(245,166,35,.08),rgba(245,166,35,.02)); border: 1px solid rgba(245,166,35,.3); border-radius: 16px; padding: 16px; width: 100%; }
.reqTop { display: flex; align-items: flex-start; gap: 13px; }
.reqIc { width: 36px; height: 36px; border-radius: 10px; flex: 0 0 36px; display: grid; place-items: center; background: rgba(245,166,35,.15); border: 1px solid rgba(245,166,35,.34); color: var(--warn); }
.reqIc svg { width: 19px; height: 19px; }
.reqMeta { flex: 1; }
.reqLabel { font-family: var(--mono); font-size: 10px; letter-spacing: .1em; text-transform: uppercase; color: var(--warn); margin-bottom: 4px; font-weight: 600; }
.reqTitle { font-size: 14.5px; font-weight: 600; letter-spacing: -.01em; line-height: 1.4; }
.reqTitle code { font-family: var(--mono); font-size: 12.5px; color: var(--amber); background: rgba(245,166,35,.12); padding: 1px 6px; border-radius: 5px; font-weight: 500; }
.reqDesc { font-size: 12.5px; color: var(--tx-2); margin-top: 6px; line-height: 1.5; }
.reqActions { display: flex; gap: 9px; margin-top: 14px; padding-left: 49px; }
.reqActions .btn { padding: 8px 18px; font-size: 12.5px; }
.composer { padding: 14px 28px 20px; border-top: 1px solid var(--hair); background: var(--panel-2); }
.composerInner { max-width: 720px; margin: 0 auto; }
.inputBox { background: var(--card); border: 1px solid var(--hair-2); border-radius: 16px; padding: 12px 12px 10px 16px; transition: .16s; }
.inputBox:focus-within { border-color: rgba(139,92,246,.5); box-shadow: 0 0 0 3px rgba(139,92,246,.12); }
.inputTop { display: flex; align-items: flex-end; gap: 10px; }
.msgInput { flex: 1; background: none; border: none; outline: none; color: var(--tx); font-family: var(--sans); font-size: 14px; line-height: 1.5; resize: none; max-height: 120px; padding: 5px 0; }
.msgInput::placeholder { color: var(--tx-3); }
.send { width: 36px; height: 36px; flex: 0 0 36px; border-radius: 11px; border: none; cursor: pointer; background: var(--accent); color: #fff; display: grid; place-items: center; transition: .16s; box-shadow: 0 2px 10px rgba(124,58,237,.4); }
.send:hover { background: #9d6ef8; transform: translateY(-1px); }
.send svg { width: 17px; height: 17px; }
.inputBottom { display: flex; align-items: center; gap: 10px; margin-top: 8px; }
.hint { margin-left: auto; font-size: 11px; color: var(--tx-3); font-family: var(--mono); }
.hint kbd { background: var(--hair); border: 1px solid var(--hair); border-radius: 4px; padding: 1px 5px; font-family: var(--mono); font-size: 10px; }
/* greeting (empty chat state) */
.greetWrap { flex: 1; display: flex; flex-direction: column; align-items: center; justify-content: center; gap: 26px; padding: 0 28px; }
.greet { display: flex; align-items: center; gap: 14px; font-size: 34px; font-weight: 400; letter-spacing: -.02em; color: var(--tx); }
.greet .stamp { color: #f0a36b; }
.greetChips { display: flex; flex-wrap: wrap; gap: 10px; justify-content: center; }
.chip { display: inline-flex; align-items: center; gap: 7px; font-size: 13px; font-weight: 600; color: var(--tx-2); background: var(--card); border: 1px solid var(--hair); padding: 8px 13px; border-radius: 10px; cursor: pointer; transition: .14s; }
.chip:hover { background: var(--card-hover); color: var(--tx); border-color: var(--hair-2); }
/* placeholder (settings) */
.ph { flex: 1; display: flex; flex-direction: column; align-items: center; justify-content: center; gap: 14px; color: var(--tx-3); text-align: center; }
.ph svg { width: 42px; height: 42px; opacity: .5; }
.ph h2 { font-size: 18px; font-weight: 600; color: var(--tx-2); }
.ph p { font-size: 13.5px; max-width: 340px; line-height: 1.55; }
/* settings view */
.settingsScroll { flex: 1; min-height: 0; overflow-y: auto; padding: 28px 32px 60px; }
.settingsInner { max-width: 720px; margin: 0 auto; display: flex; flex-direction: column; gap: 26px; }
.settingsHead { display: flex; flex-direction: column; gap: 5px; }
.settingsHead h1 { font-size: 21px; font-weight: 600; letter-spacing: -.01em; color: var(--tx); }
.settingsHead p { font-size: 13px; color: var(--tx-3); line-height: 1.55; max-width: 540px; }
.scard { background: var(--card); border: 1px solid var(--hair); border-radius: 14px; padding: 18px 20px; display: flex; flex-direction: column; gap: 14px; }
.scardHead { display: flex; flex-direction: column; gap: 4px; }
.scardTitle { font-size: 14.5px; font-weight: 600; color: var(--tx); display: flex; align-items: center; gap: 9px; }
.scardDesc { font-size: 12.5px; color: var(--tx-3); line-height: 1.5; }
/* billing radio options */
.optList { display: flex; flex-direction: column; gap: 10px; }
.opt { display: flex; gap: 12px; padding: 13px 14px; border: 1px solid var(--hair); border-radius: 11px; cursor: pointer; transition: .14s; background: var(--card-2); align-items: flex-start; }
.opt:hover { border-color: var(--hair-2); background: var(--card-hover); }
.opt.optActive { border-color: rgba(139,92,246,.5); background: var(--accent-soft); }
.optRadio { width: 16px; height: 16px; flex: 0 0 16px; border-radius: 50%; border: 2px solid var(--hair-2); margin-top: 2px; position: relative; transition: .14s; }
.opt.optActive .optRadio { border-color: var(--accent); }
.opt.optActive .optRadio::after { content: ""; position: absolute; inset: 2px; border-radius: 50%; background: var(--accent); }
.optBody { display: flex; flex-direction: column; gap: 3px; min-width: 0; }
.optTitle { font-size: 13px; font-weight: 600; color: var(--tx); display: flex; align-items: center; gap: 8px; }
.optDesc { font-size: 12px; color: var(--tx-3); line-height: 1.5; }
.optTag { font-family: var(--mono); font-size: 9.5px; font-weight: 600; letter-spacing: .06em; text-transform: uppercase; color: var(--green); background: var(--green-soft); border: 1px solid var(--green-bd); padding: 1px 7px; border-radius: 20px; }
.optTagCur { color: var(--accent-2); background: var(--accent-soft); border-color: rgba(139,92,246,.3); }
/* byok key entry */
.keyRow { display: flex; flex-direction: column; gap: 9px; padding: 14px; border: 1px solid var(--hair); border-radius: 11px; background: var(--card-2); }
.keyLabel { font-size: 11px; font-weight: 600; letter-spacing: .04em; color: var(--tx-2); font-family: var(--mono); }
.keyInputRow { display: flex; gap: 9px; }
.keyInput { flex: 1; min-width: 0; background: var(--panel); border: 1px solid var(--hair-2); border-radius: 8px; padding: 8px 11px; font-family: var(--mono); font-size: 12px; color: var(--tx); outline: none; transition: .14s; }
.keyInput:focus { border-color: var(--accent); }
.keyInput::placeholder { color: var(--tx-3); }
.keyNote { font-size: 11.5px; color: var(--tx-3); line-height: 1.5; }
.keyNote code { font-family: var(--mono); font-size: 11px; color: var(--tx-2); background: var(--hair); padding: 1px 5px; border-radius: 4px; }
.sMsg { font-size: 12px; padding: 8px 11px; border-radius: 8px; line-height: 1.45; }
.sMsgErr { color: var(--red); background: rgba(239,68,68,.12); border: 1px solid rgba(239,68,68,.28); }
.sMsgOk { color: var(--green); background: var(--green-soft); border: 1px solid var(--green-bd); }
.btn.primary { background: var(--accent); color: #fff; border-color: transparent; box-shadow: 0 2px 10px rgba(124,58,237,.4); }
.btn.primary:hover { background: #9d6ef8; }
.btn.primary:disabled { opacity: .4; cursor: default; box-shadow: none; }
/* embedded canvas settings tabs */
.embedSettings { border: 1px solid var(--hair); border-radius: 14px; overflow: hidden; background: var(--card); }
/* embedded full workspace tab panel (the SAME WorkspacePanelTabs the Org-map
SidePanel renders), pointed at the platform agent. A bordered card with a
bounded height + flex column so the tab body's own overflow-y scroller works
inside it (mirrors .embedChat's min-height:0 trick). */
.embedPanel {
border: 1px solid var(--hair);
border-radius: 14px;
overflow: hidden;
background: var(--card);
display: flex;
flex-direction: column;
min-height: 0;
height: 70vh;
max-height: 760px;
}
/* embedded canonical ChatTab (shared with the Org-map SidePanel).
Fills the chat column below the concierge header; min-height:0 lets the
ChatTab's own overflow-y scroller work inside the flex column. */
.embedChat { flex: 1; min-height: 0; display: flex; flex-direction: column; }
@@ -1,604 +0,0 @@
"use client";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useCanvasStore, type TopView } from "@/store/canvas";
import { WORKSPACE_KIND } from "@/lib/workspace-kind";
import { useTheme } from "@/lib/theme-provider";
import { api } from "@/lib/api";
import { showToast } from "@/components/Toaster";
import type { ActivityEntry } from "@/types/activity";
import { Canvas } from "@/components/Canvas";
import { CommunicationOverlay } from "@/components/CommunicationOverlay";
import { MessageFlightHome } from "./MessageFlightHome";
import { ChatTab } from "@/components/tabs/ChatTab";
import { WorkspacePanelTabs } from "@/components/WorkspacePanelTabs";
import { SettingsTabs } from "@/components/settings";
import s from "./Concierge.module.css";
import {
IcHome, IcOrgMap, IcSettings, IcSearch, IcBell, IcSun, IcMoon, IcChevDown,
IcQueue, IcCaret, IcMolecule, IcClock, IcCheck, IcTrash, IcChat,
} from "./icons";
/* ── status → concept palette ─────────────────────────────────────────── */
function statusInfo(status: string): { color: string; label: string } {
switch (status) {
case "online": return { color: "var(--green)", label: "online" };
case "provisioning":
case "starting": return { color: "var(--amber)", label: "starting" };
case "degraded": return { color: "var(--amber)", label: "degraded" };
case "building": return { color: "var(--amber)", label: "building" };
case "failed": return { color: "var(--red)", label: "failed" };
case "paused": return { color: "var(--accent-2)", label: "paused" };
default: return { color: "var(--grey)", label: status || "idle" };
}
}
const AV_GRADIENTS = [
"linear-gradient(150deg,#a78bfa,#7c3aed)",
"linear-gradient(150deg,#60a5fa,#3b82f6)",
"linear-gradient(150deg,#34d399,#10b981)",
"linear-gradient(150deg,#fbbf77,#f59e0b)",
"linear-gradient(150deg,#5eead4,#14b8a6)",
"linear-gradient(150deg,#f0a36b,#e8638a)",
];
function initials(name: string): string {
const parts = name.trim().split(/\s+/).filter(Boolean);
if (parts.length === 0) return "?";
if (parts.length === 1) return parts[0].slice(0, 2).toUpperCase();
return (parts[0][0] + parts[parts.length - 1][0]).toUpperCase();
}
function gradientFor(id: string): string {
let h = 0;
for (let i = 0; i < id.length; i++) h = (h * 31 + id.charCodeAt(i)) >>> 0;
return AV_GRADIENTS[h % AV_GRADIENTS.length];
}
type SbTab = "agents" | "tasks" | "approvals";
interface PendingApproval {
id: string;
workspace_id: string;
workspace_name: string;
action: string;
reason: string | null;
status: string;
created_at: string;
}
interface UserTask {
id: string;
workspace_id: string;
workspace_name: string;
title: string;
detail: string | null;
status: string;
created_at: string;
}
/** ISO timestamp → "9:05 PM" (local). Empty string on a bad/missing value. */
function clockTime(iso: string | null | undefined): string {
if (!iso) return "";
const d = new Date(iso);
if (Number.isNaN(d.getTime())) return "";
return d.toLocaleTimeString([], { hour: "numeric", minute: "2-digit" });
}
/** A human action label from an activity row. */
function activityText(a: ActivityEntry): string {
if (a.summary) return a.summary;
const verb = a.activity_type?.replace(/_/g, " ") ?? "activity";
return a.method ? `${verb} · ${a.method}` : verb;
}
export function ConciergeShell() {
const nodes = useCanvasStore((st) => st.nodes);
const topView = useCanvasStore((st) => st.topView);
const setTopView = useCanvasStore((st) => st.setTopView);
const selectNode = useCanvasStore((st) => st.selectNode);
const selectedNodeId = useCanvasStore((st) => st.selectedNodeId);
const { resolvedTheme, setTheme } = useTheme();
const [railOpen, setRailOpen] = useState(false);
const [sbTab, setSbTab] = useState<SbTab>("agents");
const [settingsTab, setSettingsTab] = useState<"platform" | "org">("platform");
const [collapsed, setCollapsed] = useState<Record<string, boolean>>({});
// Dynamic org name for the topbar. Sourced from GET /org/identity
// ({name} ← MOLECULE_ORG_NAME, added by a parallel backend change).
// Falls back to "Molecule AI" when the endpoint 404s / errors or
// returns an empty name, so the topbar never breaks before the backend
// lands.
const [orgName, setOrgName] = useState("Molecule AI");
useEffect(() => {
let cancelled = false;
api
.get<{ name?: string }>("/org/identity")
.then((r) => {
const name = (r?.name || "").trim();
if (!cancelled && name) setOrgName(name);
})
.catch(() => {
// No endpoint / not reachable — keep the "Molecule AI" fallback.
});
return () => {
cancelled = true;
};
}, []);
// Build the agent hierarchy from live nodes.
const { roots, childrenOf } = useMemo(() => {
const childrenOf = new Map<string, typeof nodes>();
const roots: typeof nodes = [];
for (const n of nodes) {
const p = n.data.parentId;
if (p) {
const arr = childrenOf.get(p) ?? [];
arr.push(n);
childrenOf.set(p, arr);
} else {
roots.push(n);
}
}
return { roots, childrenOf };
}, [nodes]);
const platformRoot = useMemo(
() =>
// Resolve the platform agent by the authoritative kind='platform' marker
// only — the backend in this branch always returns kind
// (COALESCE(w.kind,'workspace')) and the map-side filter
// (canvas-topology/Canvas/Toolbar) is kind-only, so the shell must not
// disagree via a name/role heuristic. Fall back to the first root only as
// graceful degradation if no node is tagged platform.
roots.find((r) => r.data.kind === WORKSPACE_KIND.Platform) ??
roots[0] ??
null,
[roots],
);
const platformId = platformRoot?.id ?? null;
// ── live data: approvals + user-tasks (org-wide), activity (platform agent) ──
const [approvals, setApprovals] = useState<PendingApproval[]>([]);
const [userTasks, setUserTasks] = useState<UserTask[]>([]);
const [activity, setActivity] = useState<ActivityEntry[]>([]);
const [deciding, setDeciding] = useState<string | null>(null);
const [resolving, setResolving] = useState<string | null>(null);
const loadApprovals = useCallback(() => {
api.get<PendingApproval[]>("/approvals/pending")
.then((r) => setApprovals(r ?? []))
.catch(() => setApprovals([]));
}, []);
const loadUserTasks = useCallback(() => {
api.get<UserTask[]>("/user-tasks/pending")
.then((r) => setUserTasks(r ?? []))
.catch(() => setUserTasks([]));
}, []);
useEffect(() => { loadApprovals(); loadUserTasks(); }, [loadApprovals, loadUserTasks]);
useEffect(() => {
if (!platformId) return;
let cancelled = false;
api.get<ActivityEntry[]>(`/workspaces/${platformId}/activity?limit=12`)
.then((r) => { if (!cancelled) setActivity(r ?? []); })
.catch(() => { if (!cancelled) setActivity([]); });
return () => { cancelled = true; };
}, [platformId]);
const decide = useCallback(async (a: PendingApproval, decision: "approved" | "denied") => {
if (deciding) return;
setDeciding(a.id);
try {
await api.post(`/workspaces/${a.workspace_id}/approvals/${a.id}/decide`, {
decision, decided_by: "human",
});
showToast(decision === "approved" ? "Approved" : "Denied", decision === "approved" ? "success" : "info");
setApprovals((prev) => prev.filter((x) => x.id !== a.id));
} catch {
showToast("Failed to record decision", "error");
} finally {
setDeciding(null);
}
}, [deciding]);
const resolveTask = useCallback(async (t: UserTask, status: "done" | "dismissed") => {
if (resolving) return;
setResolving(t.id);
try {
await api.post(`/workspaces/${t.workspace_id}/user-tasks/${t.id}/resolve`, {
status, resolved_by: "human",
});
showToast(status === "done" ? "Marked done" : "Dismissed", status === "done" ? "success" : "info");
setUserTasks((prev) => prev.filter((x) => x.id !== t.id));
} catch {
showToast("Failed to resolve task", "error");
} finally {
setResolving(null);
}
}, [resolving]);
const nav = (v: TopView) => setTopView(v);
/* ── agents tree (recursive) ──────────────────────────────────────── */
function renderNode(n: (typeof nodes)[number], depth: number) {
const kids = childrenOf.get(n.id) ?? [];
const hasKids = kids.length > 0;
const isCollapsed = collapsed[n.id];
const st = statusInfo(n.data.status);
const isRoot = depth === 0;
const isPlatform = n.id === platformRoot?.id;
const q = (n.data.activeTasks as number) ?? 0;
// Role can be a long descriptor (e.g. "Coding Executor (Kimi) — …"); render
// it compact (single-line, truncated by .wsRole) and surface the full text
// on hover via the native tooltip.
const roleLabel = isPlatform ? "platform" : n.data.role || "agent";
const row = (
<div
role="button"
tabIndex={0}
data-testid="agent-tree-node"
data-node-name={n.data.name}
data-ws-id={n.id}
data-platform={isPlatform ? "true" : "false"}
data-depth={depth}
className={`${s.ws} ${selectedNodeId === n.id ? s.active : ""}`}
onClick={() => selectNode(n.id)}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ") {
e.preventDefault();
selectNode(n.id);
}
}}
>
<div className={s.wsAv} style={{ background: gradientFor(n.id) }}>
{initials(n.data.name)}
<span className={s.dot} style={{ background: st.color }} />
</div>
<div className={s.wsMeta}>
<div className={s.wsName}>{n.data.name}</div>
<div className={s.wsSub}>
<span className={s.wsRole} title={roleLabel}>{roleLabel}</span>
<span className={s.wsStatus} style={{ color: st.color }}>
<span className={s.sdot} style={{ background: st.color }} />
{st.label}
</span>
</div>
</div>
{isRoot && isPlatform ? (
<span data-testid="agent-tree-root-tag" className={s.rootTag}>root</span>
) : (
<span className={`${s.wsQ} ${q === 0 ? s.zero : ""}`} title="Tasks in queue">
<IcQueue />
{q}
</span>
)}
{hasKids && (
<button
className={s.wsCaret}
title="Expand / collapse"
onClick={(e) => {
e.stopPropagation();
setCollapsed((c) => ({ ...c, [n.id]: !c[n.id] }));
}}
style={{ transform: isCollapsed ? "none" : "rotate(90deg)", transition: "transform .18s" }}
>
<IcCaret />
</button>
)}
</div>
);
return (
<div key={n.id} className={s.tnode}>
{row}
{hasKids && !isCollapsed && (
<div className={s.treeChildren}>
{kids.map((k) => renderNode(k, depth + 1))}
</div>
)}
</div>
);
}
return (
<div className={s.root}>
{/* Envelope flies between agent rows on each delegate/message event. */}
<MessageFlightHome />
<div className={`${s.app} ${railOpen ? s.railOpen : ""}`}>
{/* ICON RAIL */}
<nav className={s.rail}>
<div className={s.railTop}>
<div className={s.logo} title="Toggle sidebar" onClick={() => setRailOpen((o) => !o)}>
<IcMolecule />
</div>
<span className={s.railWordmark}>Molecule</span>
<button className={s.railToggle} title="Collapse sidebar" onClick={() => setRailOpen((o) => !o)}>
<IcOrgMap />
</button>
</div>
<button data-testid="nav-home" className={`${s.navbtn} ${topView === "home" ? s.active : ""}`} title="Home" onClick={() => nav("home")}>
<span className={s.ico}><IcHome /></span><span className={s.lbl}>Home</span>
</button>
<button data-testid="nav-map" className={`${s.navbtn} ${topView === "map" ? s.active : ""}`} title="Org map" onClick={() => nav("map")}>
<span className={s.ico}><IcOrgMap /></span><span className={s.lbl}>Org map</span>
</button>
<div className={s.spacer} />
<button data-testid="nav-settings" className={`${s.navbtn} ${topView === "settings" ? s.active : ""}`} title="Settings" onClick={() => nav("settings")}>
<span className={s.ico}><IcSettings /></span><span className={s.lbl}>Settings</span>
</button>
</nav>
<div className={s.main}>
{/* TOPBAR */}
<header className={s.topbar}>
<div className={s.org}>
<div className={s.orgBadge}>{initials(orgName).slice(0, 1)}</div>
<span data-testid="topbar-org-name" className={s.orgName}>{orgName}</span>
<span className={s.chev}><IcChevDown /></span>
</div>
<div className={s.topbarRight}>
<button className={s.iconPill} title="Search"><IcSearch /></button>
<button className={s.iconPill} title="Notifications"><IcBell /></button>
<button
className={s.themeToggle}
title="Toggle theme"
onClick={() => setTheme(resolvedTheme === "dark" ? "light" : "dark")}
>
{resolvedTheme === "dark" ? <IcMoon /> : <IcSun />}
</button>
<div className={s.avatar} title="You">HW</div>
</div>
</header>
<div className={s.viewArea}>
{/* HOME VIEW */}
<div className={`${s.view} ${topView === "home" ? s.active : ""}`}>
<aside className={s.homeSidebar}>
<div className={s.sbTabs}>
<button data-testid="home-subtab-agents" className={`${s.sbTab} ${sbTab === "agents" ? s.active : ""}`} onClick={() => setSbTab("agents")}>Agents</button>
<button data-testid="home-subtab-tasks" className={`${s.sbTab} ${sbTab === "tasks" ? s.active : ""}`} onClick={() => setSbTab("tasks")}>
Tasks{userTasks.length > 0 && <span className={s.cnt}>{userTasks.length}</span>}
</button>
<button data-testid="home-subtab-approvals" className={`${s.sbTab} ${sbTab === "approvals" ? s.active : ""}`} onClick={() => setSbTab("approvals")}>
Approvals{approvals.length > 0 && <span className={s.cnt}>{approvals.length}</span>}
</button>
</div>
<div className={s.sbBody}>
{sbTab === "agents" && (
<>
<div className={s.wsList}>
{roots.length === 0 && (
<div className={s.empty}>No agents yet. Ask the concierge to spin up a team.</div>
)}
{roots.map((r) => renderNode(r, 0))}
</div>
<div className={s.sbSection}>Recent activity</div>
<div>
{activity.length === 0 && (
<div className={s.empty}>No recent activity yet.</div>
)}
{activity.map((a) => {
const ok = a.status !== "error" && a.status !== "failed";
return (
<div key={a.id} className={s.act}>
<span className={s.actTime}>{clockTime(a.created_at)}</span>
<div className={`${s.actLine} ${ok ? s.grn : ""}`}>
<div className={s.actText}>{activityText(a)}</div>
</div>
</div>
);
})}
</div>
</>
)}
{sbTab === "tasks" && (
<>
{userTasks.length === 0 && (
<div className={s.empty}>Nothing needs you right now. When an agent needs you to do something, it shows up here.</div>
)}
{userTasks.map((t) => (
<div key={t.id} className={s.task}>
<div className={s.taskRow}>
<div className={`${s.taskIc} ${s.run}`}><IcClock /></div>
<div className={s.taskMeta}>
<div className={s.taskT}>{t.title}</div>
<div className={s.taskS}>
{t.workspace_name}<span className={s.pip} />asked {clockTime(t.created_at)}
</div>
{t.detail && (
<div style={{ fontSize: 12, color: "var(--tx-3)", marginTop: 6, lineHeight: 1.45 }}>
{t.detail}
</div>
)}
</div>
</div>
<div className={s.taskActions}>
<button className={`${s.tbtn} ${s.done}`} disabled={resolving === t.id} onClick={() => resolveTask(t, "done")}>
<IcCheck />Done
</button>
<button className={s.tbtn} disabled={resolving === t.id} onClick={() => resolveTask(t, "dismissed")}>
Dismiss
</button>
</div>
</div>
))}
</>
)}
{sbTab === "approvals" && (
<>
{approvals.length === 0 && (
<div className={s.empty}>No pending approvals. Destructive actions await sign-off here.</div>
)}
{approvals.map((a) => (
<div key={a.id} className={s.apprCard} style={{ marginBottom: 7 }}>
<div className={s.apprRow}>
<div className={s.apprIc}><IcTrash /></div>
<div className={s.apprMeta}>
<div className={s.apprT}>{a.action.replace(/_/g, " ")} <code>{a.workspace_name}</code></div>
<div className={s.apprS}>{a.reason || "destructive"}</div>
</div>
</div>
<div className={s.apprActions}>
<button className={`${s.btn} ${s.approve} ${s.flex}`} disabled={deciding === a.id} onClick={() => decide(a, "approved")}>
{deciding === a.id ? "…" : "Approve"}
</button>
<button className={`${s.btn} ${s.deny} ${s.flex}`} disabled={deciding === a.id} onClick={() => decide(a, "denied")}>
{deciding === a.id ? "…" : "Deny"}
</button>
</div>
</div>
))}
</>
)}
</div>
</aside>
{/* CHAT reuses the EXACT canonical chat the Org-map SidePanel
renders (My Chat / Agent Comms sub-tabs, attachments, history,
delivery-mode handling), pointed at the platform agent. A thin
concierge-styled header keeps the Home look; the ChatTab body
below is identical to the map path so features can't drift. */}
{platformId && platformRoot ? (
<section className={s.chat}>
<div className={s.chatHead}>
<div className={s.chAv}><IcChat /></div>
<div className={s.chMeta}>
<div className={s.chTitle}>{platformRoot.data.name ?? "Org Concierge"}</div>
<div className={s.chSub}>
{(() => {
const online =
platformRoot.data.status === "online" ||
platformRoot.data.status === "degraded";
return (
<>
<span
className={s.sdot}
style={{ background: online ? "var(--green)" : "var(--grey)" }}
/>
{online ? "online" : statusInfo(platformRoot.data.status ?? "").label} · platform agent
</>
);
})()}
</div>
</div>
</div>
<div className={s.embedChat}>
<ChatTab key={platformId} workspaceId={platformId} data={platformRoot.data} />
</div>
</section>
) : (
<section className={s.chat}>
<div className={s.greetWrap}>
<div className={s.greet}>
<span className={s.stamp}></span> No platform agent yet
</div>
</div>
</section>
)}
</div>
{/* ORG MAP VIEW — the live canvas */}
<div className={`${s.view} ${topView === "map" ? s.active : ""}`}>
{topView === "map" && (
<div className={s.canvasMount}>
<main aria-label="Agent canvas" style={{ position: "absolute", inset: 0 }}>
<Canvas />
</main>
<CommunicationOverlay />
</div>
)}
</div>
{/* SETTINGS VIEW */}
<div className={`${s.view} ${topView === "settings" ? s.active : ""}`}>
<div className={s.settingsScroll}>
<div className={s.settingsInner}>
<div className={s.settingsHead}>
<h1>Settings</h1>
<p>
Org-level settings for the platform concierge. Configure the
concierge exactly like any workspace config.yaml, plugins
and skills, container/compute, display, channels, schedule
and secrets plus how it pays for model usage and org
identity.
</p>
</div>
{/* Two tabs instead of one long sheet: Platform agent
configuration vs Org & canvas settings. Reuses the same
.sbTabs purple-underline tab style as the Home sub-tabs. */}
<div className={s.sbTabs} role="tablist" aria-label="Settings sections">
<button
type="button"
role="tab"
data-testid="settings-tab-platform"
aria-selected={settingsTab === "platform"}
className={`${s.sbTab} ${settingsTab === "platform" ? s.active : ""}`}
onClick={() => setSettingsTab("platform")}
>
Platform agent configuration
</button>
<button
type="button"
role="tab"
data-testid="settings-tab-org"
aria-selected={settingsTab === "org"}
className={`${s.sbTab} ${settingsTab === "org" ? s.active : ""}`}
onClick={() => setSettingsTab("org")}
>
Org &amp; canvas settings
</button>
</div>
{/* Platform agent configuration the FULL workspace tab UI
(Config, Plugins/Skills, Container, Display, Details,
Activity, Terminal, Channels, Schedule, Files, Memory,
Traces, Events, Audit), reusing the exact same
WorkspacePanelTabs the Org-map SidePanel renders so the two
surfaces can't drift. Pointed at the platform agent; the
panel owns its own local active-tab state so it doesn't
fight the map's node selection. */}
{settingsTab === "platform" && (
<div data-testid="settings-pane-platform" className={s.scard}>
<div className={s.scardHead}>
<div className={s.scardDesc}>
Update the concierge like any workspace: its config.yaml,
plugins &amp; skills, container/compute, display, channels,
schedule and more.
</div>
</div>
{platformRoot ? (
<div className={s.embedPanel}>
<WorkspacePanelTabs key={platformRoot.id} node={platformRoot} defaultTab="config" />
</div>
) : (
<div className={s.scardDesc}>
No platform agent yet. Spin one up from Home to configure it.
</div>
)}
</div>
)}
{settingsTab === "org" && (
<div data-testid="settings-pane-org" className={s.scard}>
<div className={s.scardHead}>
<div className={s.scardDesc}>
Secrets, workspace tokens, org API keys and organization
identity. These also live behind the gear in the top bar.
</div>
</div>
{platformId && (
<div className={s.embedSettings}>
<SettingsTabs workspaceId={platformId} />
</div>
)}
</div>
)}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
);
}
@@ -1,50 +0,0 @@
/** MessageFlightHome the concierge-home counterpart of MessageFlightLayer.
* The home view is a vertical agent tree (not a spatial canvas), so an envelope
* flies between the source and target agent ROWS. It shares the exact same
* flight stream (useA2AFlights) as the canvas, and resolves endpoints from each
* row's DOM rect (rows carry data-ws-id). Reduced-motion is honoured by the
* shared hook (it emits no flights). */
import { useRef } from "react";
import { useA2AFlights, type A2AFlight } from "@/hooks/useA2AFlights";
import { FlightEnvelope, type Point } from "../FlightEnvelope";
function rowCenter(wsId: string): Point | null {
if (typeof document === "undefined") return null;
const sel =
typeof CSS !== "undefined" && typeof CSS.escape === "function"
? CSS.escape(wsId)
: wsId;
const el = document.querySelector<HTMLElement>(`[data-ws-id="${sel}"]`);
if (!el) return null;
const r = el.getBoundingClientRect();
return { x: r.left + r.width / 2, y: r.top + r.height / 2 };
}
/** One flight. Captures the source/target row rects ONCE on mount (a ref, not
* per-render) so a later re-render or scroll mid-flight does not restart the
* animation. */
function HomeFlight({ flight }: { flight: A2AFlight }) {
const pos = useRef<{ from: Point; to: Point } | null>(null);
if (pos.current === null) {
const from = rowCenter(flight.sourceId);
const to = rowCenter(flight.targetId);
if (from && to) pos.current = { from, to };
}
if (!pos.current) return null; // one or both agents not visible in the tree
return <FlightEnvelope from={pos.current.from} to={pos.current.to} kind={flight.kind} />;
}
export function MessageFlightHome() {
const flights = useA2AFlights();
if (flights.length === 0) return null;
return (
<div
aria-hidden="true"
style={{ position: "fixed", inset: 0, pointerEvents: "none", zIndex: 50 }}
>
{flights.map((f) => (
<HomeFlight key={f.key} flight={f} />
))}
</div>
);
}

Some files were not shown because too many files have changed in this diff Show More