Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| ecdbd2edee |
+1
-1
@@ -51,7 +51,7 @@ MOLECULE_ENV=development # Environment label (development/
|
||||
# MOLECULE_IN_DOCKER= # Set when running the platform inside Docker (accepts 1/0, true/false). Triggers A2A proxy to rewrite 127.0.0.1:<port> agent URLs to Docker bridge hostnames. Auto-detected via /.dockerenv; only set if detection fails or to force off.
|
||||
|
||||
# GitHub
|
||||
# GITHUB_REPO=owner/repo # Target repo for agent initial_prompt clone (e.g. Molecule-AI/molecule-core). Read inside workspace containers.
|
||||
# GITHUB_REPO=owner/repo # Target repo for agent initial_prompt clone (e.g. Molecule-AI/molecule-monorepo). Read inside workspace containers.
|
||||
# GITHUB_TOKEN= # Personal access token / installation token used by agents that clone private repos. Register as a global secret via POST /admin/secrets for propagation to workspace env. Token is used in-URL during clone and then scrubbed from .git/config via `git remote set-url`.
|
||||
|
||||
# Webhooks
|
||||
|
||||
@@ -18,24 +18,15 @@
|
||||
# per §SOP-6 security model). No-op when merged=false.
|
||||
#
|
||||
# Required env (set by the workflow):
|
||||
# GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER
|
||||
# plus one of REQUIRED_CHECKS_JSON (preferred) or REQUIRED_CHECKS (legacy)
|
||||
# GITEA_TOKEN, GITEA_HOST, REPO, PR_NUMBER, REQUIRED_CHECKS
|
||||
#
|
||||
# REQUIRED_CHECKS_JSON is a JSON object keyed by branch name. Each value
|
||||
# is an array of status-check context names that branch protection
|
||||
# requires for that branch. The script looks up the PR's base branch and
|
||||
# evaluates only the checks declared for that branch.
|
||||
#
|
||||
# {"main": ["CI / all-required (pull_request)", ...],
|
||||
# "staging": ["CI / all-required (pull_request)", ...]}
|
||||
#
|
||||
# REQUIRED_CHECKS (legacy) is a newline-separated list used when the
|
||||
# JSON variable is not set. Declared in the workflow YAML rather than
|
||||
# fetched from /branch_protections (which needs admin scope — sop-tier-bot
|
||||
# has read-only). Trade dynamism for simplicity: when the required-check
|
||||
# set changes, update both branch protection AND this env. Keeping them
|
||||
# in sync is less complexity than granting the audit bot admin perms on
|
||||
# every repo.
|
||||
# REQUIRED_CHECKS is a newline-separated list of status-check context
|
||||
# names that branch protection requires. Declared in the workflow YAML
|
||||
# rather than fetched from /branch_protections (which needs admin
|
||||
# scope — sop-tier-bot has read-only). Trade dynamism for simplicity:
|
||||
# when the required-check set changes, update both branch protection
|
||||
# AND this env. Keeping them in sync is less complexity than granting
|
||||
# the audit bot admin perms on every repo.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -43,10 +34,7 @@ set -euo pipefail
|
||||
: "${GITEA_HOST:?required}"
|
||||
: "${REPO:?required}"
|
||||
: "${PR_NUMBER:?required}"
|
||||
if [ -z "${REQUIRED_CHECKS_JSON:-}" ] && [ -z "${REQUIRED_CHECKS:-}" ]; then
|
||||
echo "::error::Either REQUIRED_CHECKS_JSON or REQUIRED_CHECKS must be set"
|
||||
exit 1
|
||||
fi
|
||||
: "${REQUIRED_CHECKS:?required (newline-separated context names)}"
|
||||
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
@@ -77,14 +65,10 @@ if [ -z "$MERGE_SHA" ]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. Required status checks — branch-aware JSON dict takes precedence.
|
||||
if [ -n "${REQUIRED_CHECKS_JSON:-}" ]; then
|
||||
REQUIRED=$(echo "$REQUIRED_CHECKS_JSON" | jq -r --arg branch "$BASE_BRANCH" '.[$branch] // [] | .[]')
|
||||
else
|
||||
REQUIRED="$REQUIRED_CHECKS"
|
||||
fi
|
||||
# 2. Required status checks declared in the workflow env.
|
||||
REQUIRED="$REQUIRED_CHECKS"
|
||||
if [ -z "${REQUIRED//[[:space:]]/}" ]; then
|
||||
echo "::notice::REQUIRED_CHECKS empty for branch '$BASE_BRANCH' — force-merge not applicable."
|
||||
echo "::notice::REQUIRED_CHECKS empty — force-merge not applicable."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
@@ -466,40 +466,12 @@ def fetch_log(target_url: str) -> str | None:
|
||||
|
||||
def grep_fail_markers(log_text: str) -> list[str]:
|
||||
"""Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
|
||||
Empty list = clean log.
|
||||
|
||||
Heuristic: skip lines where the marker appears inside script source
|
||||
(e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
|
||||
than actual execution output. The Gitea Actions log prints the raw
|
||||
script before executing it; ``echo "::error::"`` lines in that
|
||||
display are false positives.
|
||||
"""
|
||||
Empty list = clean log."""
|
||||
matches: list[str] = []
|
||||
in_run_group = False
|
||||
group_depth = 0
|
||||
for line in log_text.splitlines():
|
||||
stripped = line.strip()
|
||||
# Track Gitea Actions group markers so we can skip the
|
||||
# ``::group::Run`` script-source display blocks.
|
||||
if stripped.startswith("::group::Run"):
|
||||
in_run_group = True
|
||||
group_depth = 1
|
||||
continue
|
||||
if stripped == "::endgroup::":
|
||||
if in_run_group:
|
||||
in_run_group = False
|
||||
group_depth = 0
|
||||
continue
|
||||
if in_run_group:
|
||||
continue
|
||||
for pat in FAIL_PATTERNS:
|
||||
if pat in line:
|
||||
# Additional false-positive guard: ``echo "::error::"``
|
||||
# is script source, not a runtime error emission.
|
||||
if pat == "::error::":
|
||||
prefix = line[: line.index(pat)].strip()
|
||||
if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
|
||||
break
|
||||
# Truncate to keep error output bounded.
|
||||
matches.append(line.strip()[:240])
|
||||
break
|
||||
if len(matches) >= 5:
|
||||
|
||||
@@ -208,61 +208,6 @@ def _raise_for_redeploy_result(status: int, body: dict, slugs: list[str]) -> Non
|
||||
)
|
||||
|
||||
|
||||
def rollout_stragglers(enumerated: list[str], results: list[dict]) -> list[str]:
|
||||
"""Return every enumerated tenant NOT proven on the target build.
|
||||
|
||||
A straggler is any tenant the rollout was supposed to cover that the
|
||||
CP could not verify is running the target image tag — whether it
|
||||
errored, was skipped, or SSM-succeeded onto the wrong image
|
||||
(internal#724). CP marks each per-tenant result row with
|
||||
``verified_on_target`` (the REDEPLOY_RUNNING_IMAGE docker-inspect
|
||||
proof). A tenant enumerated for the rollout but absent from the
|
||||
result set (no batch ever ran it) is also a straggler — that is the
|
||||
exact agents-team silent-skip class.
|
||||
|
||||
Backward-compat: an OLDER CP that doesn't emit ``verified_on_target``
|
||||
yet returns rows without the key. Treat a missing key as verified so
|
||||
this surfacing degrades to the previous (ok-based) behavior against an
|
||||
un-upgraded CP, rather than failing every deploy spuriously. Once the
|
||||
CP fix is deployed the key is always present and real stragglers are
|
||||
caught.
|
||||
"""
|
||||
|
||||
verified: set[str] = set()
|
||||
for row in results:
|
||||
if str(row.get("ssm_status") or "") == "DryRun":
|
||||
continue
|
||||
slug = str(row.get("slug") or "").strip()
|
||||
if not slug:
|
||||
continue
|
||||
# Missing key (old CP) => assume verified; present key is authoritative.
|
||||
if "verified_on_target" not in row or row.get("verified_on_target"):
|
||||
verified.add(slug)
|
||||
return sorted(s for s in dict.fromkeys(enumerated) if s not in verified)
|
||||
|
||||
|
||||
def assert_full_coverage(enumerated: list[str], aggregate: dict, dry_run: bool) -> None:
|
||||
"""Fail the rollout if any enumerated tenant is not on the target build.
|
||||
|
||||
This is the no-silent-skip gate (internal#724). A dry run proves
|
||||
nothing landed, so coverage is not asserted for it.
|
||||
"""
|
||||
|
||||
if dry_run:
|
||||
return
|
||||
stragglers = rollout_stragglers(enumerated, aggregate.get("results") or [])
|
||||
if stragglers:
|
||||
msg = (
|
||||
f"incomplete rollout: {len(stragglers)} tenant(s) not verified on target "
|
||||
f"after redeploy-fleet: {', '.join(stragglers)} "
|
||||
f"(enumerated {len(set(enumerated))})"
|
||||
)
|
||||
aggregate["ok"] = False
|
||||
aggregate["error"] = msg
|
||||
aggregate["stragglers"] = stragglers
|
||||
raise RolloutFailed(msg, aggregate)
|
||||
|
||||
|
||||
def execute_scoped_rollout(
|
||||
plan: dict,
|
||||
token: str,
|
||||
@@ -309,14 +254,6 @@ def execute_scoped_rollout(
|
||||
aggregate["error"] = str(exc)
|
||||
raise RolloutFailed(str(exc), aggregate) from exc
|
||||
|
||||
# No-silent-skip coverage gate (internal#724): every enumerated tenant
|
||||
# must be PROVEN on the target build. A per-tenant HTTP-200/ok response
|
||||
# is not proof — a tenant that SSM-succeeded but stayed on the old tag,
|
||||
# or one enumerated but never batched, is a straggler. Surfacing it as
|
||||
# a RolloutFailed makes the deploy step exit non-zero instead of
|
||||
# silently reporting success (the exact agents-team failure mode).
|
||||
assert_full_coverage(all_slugs, aggregate, dry_run)
|
||||
|
||||
return aggregate
|
||||
|
||||
|
||||
|
||||
@@ -296,15 +296,7 @@ fi
|
||||
# 403 → token owner is not in this team (Gitea 1.22.6 'Must be a team
|
||||
# member' constraint — see follow-up issue for token-provisioning)
|
||||
# 404 → not a member
|
||||
# Track whether every candidate returned 403 (token owner not in team).
|
||||
# When this happens the root cause is a token-provisioning issue, not a
|
||||
# reviewer-eligibility issue — surface it clearly so ops don't waste time
|
||||
# verifying team roster (Bug C / RFC#324 follow-up).
|
||||
_ALL_CANDIDATES_403="yes"
|
||||
_CANDIDATE_COUNT=0
|
||||
|
||||
for U in $CANDIDATES; do
|
||||
_CANDIDATE_COUNT=$((_CANDIDATE_COUNT + 1))
|
||||
CODE=$(curl -sS -o "$TEAM_PROBE_TMP" -w '%{http_code}' \
|
||||
-K "$CURL_AUTH_FILE" "${API}/teams/${TEAM_ID}/members/${U}")
|
||||
debug "probe ${U} in team ${TEAM} (id=${TEAM_ID}) → HTTP ${CODE}"
|
||||
@@ -325,20 +317,14 @@ for U in $CANDIDATES; do
|
||||
continue
|
||||
;;
|
||||
404)
|
||||
_ALL_CANDIDATES_403="no"
|
||||
debug "${U} not a member of ${TEAM}"
|
||||
;;
|
||||
*)
|
||||
_ALL_CANDIDATES_403="no"
|
||||
echo "::warning::team-probe for ${U} in ${TEAM} returned unexpected HTTP ${CODE}"
|
||||
cat "$TEAM_PROBE_TMP" >&2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ "$_ALL_CANDIDATES_403" = "yes" ] && [ "$_CANDIDATE_COUNT" -gt 0 ]; then
|
||||
echo "::error::${TEAM}-review FAILED — every candidate returned 403 (token owner is not a member of the ${TEAM} team). This is a TOKEN PROVISIONING issue, not a reviewer-eligibility issue. Add the token owner to the '${TEAM}' Gitea team (id=${TEAM_ID}) or use a token whose owner is already in that team."
|
||||
else
|
||||
echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (candidates: $(echo "$CANDIDATES" | tr '\n' ',' | sed 's/,$//') — none are in team)"
|
||||
fi
|
||||
echo "::error::${TEAM}-review awaiting non-author APPROVE from ${TEAM} team (candidates: $(echo "$CANDIDATES" | tr '\n' ',' | sed 's/,$//') — none are in team)"
|
||||
exit 1
|
||||
|
||||
@@ -13,26 +13,20 @@ set -euo pipefail
|
||||
OWNER="${REPO%%/*}"
|
||||
NAME="${REPO##*/}"
|
||||
API="https://${GITEA_HOST}/api/v1"
|
||||
# Branch-protection requires the (pull_request_target) context variant.
|
||||
# The refire path must post the EXACT BP-required name so the gate flips.
|
||||
CONTEXT="${TEAM}-review / approved (pull_request_target)"
|
||||
CONTEXT="${TEAM}-review / approved (pull_request)"
|
||||
TARGET_URL="https://${GITEA_HOST}/${OWNER}/${NAME}/pulls/${PR_NUMBER}"
|
||||
|
||||
authfile=$(mktemp)
|
||||
post_authfile=$(mktemp)
|
||||
prfile=$(mktemp)
|
||||
postfile=$(mktemp)
|
||||
# shellcheck disable=SC2329 # invoked by EXIT trap
|
||||
cleanup() {
|
||||
rm -f "$authfile" "$post_authfile" "$prfile" "$postfile"
|
||||
rm -f "$authfile" "$prfile" "$postfile"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
chmod 600 "$authfile" "$post_authfile"
|
||||
chmod 600 "$authfile"
|
||||
printf 'header = "Authorization: token %s"\n' "$GITEA_TOKEN" > "$authfile"
|
||||
# STATUS_POST_TOKEN is narrow-scoped write:repository for explicit status POST.
|
||||
# Falls back to GITEA_TOKEN for backward compatibility (e.g. local test).
|
||||
printf 'header = "Authorization: token %s"\n' "${STATUS_POST_TOKEN:-$GITEA_TOKEN}" > "$post_authfile"
|
||||
|
||||
code=$(curl -sS -o "$prfile" -w '%{http_code}' -K "$authfile" \
|
||||
"${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}")
|
||||
@@ -74,7 +68,7 @@ body=$(jq -nc \
|
||||
'{state:$state, context:$context, description:$description, target_url:$target_url}')
|
||||
|
||||
code=$(curl -sS -o "$postfile" -w '%{http_code}' -X POST \
|
||||
-K "$post_authfile" -H "Content-Type: application/json" \
|
||||
-K "$authfile" -H "Content-Type: application/json" \
|
||||
-d "$body" \
|
||||
"${API}/repos/${OWNER}/${NAME}/statuses/${head_sha}")
|
||||
if [ "$code" != "200" ] && [ "$code" != "201" ]; then
|
||||
|
||||
@@ -6,8 +6,8 @@
|
||||
# RFC#351 Step 2 of 6 (implementation MVP).
|
||||
#
|
||||
# Invoked by .gitea/workflows/sop-checklist.yml on:
|
||||
# - pull_request_target: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
||||
# - issue_comment: [created] # edited/deleted omitted (Gitea 1.22.6 job-parsing quirk)
|
||||
# - pull_request_target: [opened, edited, synchronize, reopened]
|
||||
# - issue_comment: [created, edited, deleted]
|
||||
#
|
||||
# Flow:
|
||||
# 1. Load .gitea/sop-checklist-config.yaml (from BASE ref — trusted).
|
||||
@@ -639,7 +639,9 @@ def load_config(path: str) -> dict[str, Any]:
|
||||
# yaml is an optional dep; the canonical loader is used when available,
|
||||
# but the SOP runs on runners that may not have PyYAML installed. The
|
||||
# fallback _load_config_minimal covers the same config shape without
|
||||
import yaml # type: ignore[import-not-found] # optional dep; fall back silently if absent
|
||||
# requiring the dep, so the ignore is safe: if yaml loads, we use it;
|
||||
# otherwise we fall back silently.
|
||||
import yaml # type: ignore[import-not-found]
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return yaml.safe_load(f)
|
||||
except ImportError:
|
||||
@@ -895,47 +897,6 @@ def resolve_required_teams(item: dict[str, Any], high_risk: bool) -> list[str]:
|
||||
return list(item.get("required_teams") or [])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CI status validation for testing-class AI acks (internal#760 CTO hardening)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Slugs that require CI / all-required green before an AI ack is valid.
|
||||
_TESTING_CLASS_SLUGS = {"comprehensive-testing", "local-postgres-e2e", "staging-smoke"}
|
||||
|
||||
# Human-only carve-out: these items can NEVER be acked by AI, regardless
|
||||
# of config drift. Any item in this set MUST NOT have ai_ack_eligible.
|
||||
# migration / schema are future-proofing — not yet in config items, but
|
||||
# the code guard rejects them proactively (CTO hardening, msg 1388c76f).
|
||||
_HUMAN_ONLY_SLUGS = {"root-cause", "no-backwards-compat", "migration", "schema"}
|
||||
|
||||
|
||||
def get_ci_status(client: GiteaClient, owner: str, repo: str, sha: str) -> str:
|
||||
"""Return the state of CI / all-required (pull_request) for `sha`.
|
||||
|
||||
Looks through the commit statuses and returns the state string
|
||||
("success", "failure", "pending", "error") or "missing" if the
|
||||
context is not found. This prevents an AI agent from attesting
|
||||
"tests pass" independently of the actual CI run.
|
||||
"""
|
||||
code, data = client._req( # noqa: SLF001
|
||||
"GET", f"/repos/{owner}/{repo}/statuses/{sha}"
|
||||
)
|
||||
if code != 200:
|
||||
return "unknown"
|
||||
if not data or not isinstance(data, list):
|
||||
return "missing"
|
||||
# Gitea returns statuses newest-first. Find the latest for our context.
|
||||
for status in data:
|
||||
if status.get("context") == "CI / all-required (pull_request)":
|
||||
return status.get("state", "unknown")
|
||||
return "missing"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--owner", required=True)
|
||||
@@ -1029,9 +990,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||
# one membership lookup per team.
|
||||
team_member_cache: dict[tuple[str, int], bool | None] = {}
|
||||
|
||||
# Pre-resolve the ai-sop-ack team id once (None if the team does not exist).
|
||||
ai_sop_ack_team_id = client.resolve_team_id(args.owner, "ai-sop-ack")
|
||||
|
||||
def probe(slug: str, users: list[str]) -> list[str]:
|
||||
# `slug` may be either an items-key (compute_ack_state caller) OR
|
||||
# an n/a-gate key (compute_na_state caller). Previously this hard
|
||||
@@ -1075,7 +1033,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
for t in data:
|
||||
if t.get("name") == tn:
|
||||
tid = t.get("id")
|
||||
client._team_id_cache[(args.owner, tn)] = tid # noqa: SLF001 # write-through cache; intentional side-effect for reuse across calls
|
||||
client._team_id_cache[(args.owner, tn)] = tid # noqa: SLF001 # internal write-through cache
|
||||
break
|
||||
if tid is not None:
|
||||
team_ids.append(tid)
|
||||
@@ -1086,18 +1044,14 @@ def main(argv: list[str] | None = None) -> int:
|
||||
file=sys.stderr,
|
||||
)
|
||||
approved: list[str] = []
|
||||
rejected_ai_ineligible: list[str] = []
|
||||
rejected_ci_not_green: list[str] = []
|
||||
for u in users:
|
||||
# 1) Human required_teams membership check
|
||||
in_human_team = False
|
||||
for tid in team_ids:
|
||||
cache_key = (u, tid)
|
||||
if cache_key not in team_member_cache:
|
||||
team_member_cache[cache_key] = client.is_team_member(tid, u)
|
||||
result = team_member_cache[cache_key]
|
||||
if result is True:
|
||||
in_human_team = True
|
||||
approved.append(u)
|
||||
break
|
||||
if result is None:
|
||||
print(
|
||||
@@ -1107,44 +1061,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||
)
|
||||
# Treat as not-in-team for this user/team pair; loop
|
||||
# may still find membership in another team.
|
||||
if in_human_team:
|
||||
approved.append(u)
|
||||
continue
|
||||
|
||||
# 2) AI-sop-ack team membership check (only for items that allow it).
|
||||
if slug in items_by_slug:
|
||||
item = items_by_slug[slug]
|
||||
# Defensive: human-only carve-out is enforced in code, not just
|
||||
# config. Even if ai_ack_eligible were mistakenly added to a
|
||||
# migration/schema item, the AI path is rejected here.
|
||||
if slug in _HUMAN_ONLY_SLUGS:
|
||||
rejected_ai_ineligible.append(u)
|
||||
continue
|
||||
if item.get("ai_ack_eligible") and ai_sop_ack_team_id is not None:
|
||||
cache_key = (u, ai_sop_ack_team_id)
|
||||
if cache_key not in team_member_cache:
|
||||
team_member_cache[cache_key] = client.is_team_member(
|
||||
ai_sop_ack_team_id, u
|
||||
)
|
||||
result = team_member_cache[cache_key]
|
||||
if result is True:
|
||||
# 2a) Testing-class items require real CI artifact evidence.
|
||||
if slug in _TESTING_CLASS_SLUGS:
|
||||
ci_state = get_ci_status(
|
||||
client, args.owner, args.repo, head_sha
|
||||
)
|
||||
if ci_state != "success":
|
||||
print(
|
||||
f"::warning::AI ack for {slug} rejected: "
|
||||
f"CI / all-required is {ci_state}, not success",
|
||||
file=sys.stderr,
|
||||
)
|
||||
rejected_ci_not_green.append(u)
|
||||
continue
|
||||
approved.append(u)
|
||||
continue
|
||||
# If we get here, user is not approved for this slug.
|
||||
rejected_ai_ineligible.append(u)
|
||||
return approved
|
||||
|
||||
ack_state = compute_ack_state(
|
||||
|
||||
@@ -21,7 +21,6 @@ Scenarios:
|
||||
T16_comments_generic_approval — reviews empty; comments have "APPROVED" by team member → exit 0
|
||||
T17_comments_no_approval — reviews empty; comments have no approval keywords → exit 1
|
||||
T18_review_wrong_team_comment_right_team — review candidate 404s, comment candidate passes
|
||||
T19_ai_sop_ack_approved — ai-sop-ack member APPROVED review → team probe 404 → exit 1
|
||||
|
||||
Usage:
|
||||
FIXTURE_STATE_DIR=/tmp/x python3 _review_check_fixture.py 8080
|
||||
@@ -117,12 +116,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
{"state": "CHANGES_REQUESTED", "dismissed": False, "user": {"login": "bob"}, "commit_id": "abc1234"},
|
||||
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"},
|
||||
])
|
||||
if sc == "T19_ai_sop_ack_approved":
|
||||
# ai-sop-ack member submitted APPROVED review — must NOT count
|
||||
# toward qa-review (team_id=20) or security-review (team_id=21).
|
||||
return self._json(200, [
|
||||
{"state": "APPROVED", "dismissed": False, "user": {"login": "ai-reviewer"}, "commit_id": "abc1234"},
|
||||
])
|
||||
# Default: one non-author APPROVED
|
||||
return self._json(200, [
|
||||
{"state": "APPROVED", "dismissed": False, "user": {"login": "core-devops"}, "commit_id": "abc1234"},
|
||||
@@ -164,9 +157,6 @@ class Handler(http.server.BaseHTTPRequestHandler):
|
||||
return self._empty(403)
|
||||
if sc == "T18_review_wrong_team_comment_right_team" and login == "core-devops":
|
||||
return self._empty(404)
|
||||
if sc == "T19_ai_sop_ack_approved" and login == "ai-reviewer":
|
||||
# ai-sop-ack member is NOT in qa (20) or security (21).
|
||||
return self._empty(404)
|
||||
# T7_team_member: member
|
||||
return self._empty(204)
|
||||
|
||||
|
||||
@@ -11,100 +11,21 @@ def load_workflow(name: str) -> dict:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def _all_required(workflow: dict) -> dict:
|
||||
return workflow["jobs"]["all-required"]
|
||||
|
||||
|
||||
def test_all_required_uses_dedicated_meta_runner_lane():
|
||||
workflow = load_workflow("ci.yml")
|
||||
all_required = _all_required(workflow)
|
||||
all_required = workflow["jobs"]["all-required"]
|
||||
|
||||
# Stays on the dedicated `ci-meta` lane (the sentinel does no docker
|
||||
# work, so it must NOT occupy the general docker-host pool).
|
||||
assert all_required["runs-on"] == "ci-meta"
|
||||
assert "needs" not in all_required
|
||||
|
||||
|
||||
def test_all_required_is_needs_aggregator_not_a_polling_gate():
|
||||
"""fix/ci-scheduler-fanout (2026-06-01): the sentinel was converted
|
||||
from a status-polling loop (which squatted a ci-meta executor slot for
|
||||
up to 40 min per PR) into a plain `needs:` aggregator that frees the
|
||||
slot immediately. Pin the new shape so a regression to the poller is
|
||||
caught.
|
||||
"""
|
||||
def test_all_required_reuses_path_filter_before_polling():
|
||||
workflow = load_workflow("ci.yml")
|
||||
all_required = _all_required(workflow)
|
||||
all_required = workflow["jobs"]["all-required"]
|
||||
rendered = str(all_required)
|
||||
|
||||
# The job MUST aggregate via `needs:` (the slot-freeing design).
|
||||
assert "needs" in all_required, "all-required must be a needs: aggregator"
|
||||
|
||||
# It MUST NOT reintroduce the polling loop / per-SHA status fetch that
|
||||
# was the throughput sink.
|
||||
assert "detect-changes.py" not in rendered, (
|
||||
"all-required must not run the detect-changes poller path"
|
||||
)
|
||||
assert "commits/" not in rendered and "statuses" not in rendered, (
|
||||
"all-required must not poll commit statuses (the slot-squat path)"
|
||||
)
|
||||
|
||||
|
||||
def test_all_required_does_not_use_if_always():
|
||||
"""Plain `needs:` works on Gitea 1.22.6 / act_runner v0.6.1; `needs:` +
|
||||
`if: always()` is BROKEN (feedback_gitea_needs_works_only_ifalways_broken)
|
||||
and would let a non-success need pass the gate. The sentinel must use
|
||||
plain `needs:` WITHOUT a job-level `if: always()`.
|
||||
"""
|
||||
workflow = load_workflow("ci.yml")
|
||||
all_required = _all_required(workflow)
|
||||
|
||||
job_if = all_required.get("if")
|
||||
assert not (isinstance(job_if, str) and "always()" in job_if), (
|
||||
"all-required must not combine needs: with if: always()"
|
||||
)
|
||||
|
||||
|
||||
def test_all_required_needs_matches_ci_required_drift_f1_set():
|
||||
"""The sentinel `needs:` list MUST equal ci-required-drift.py's
|
||||
`ci_job_names()` set: every job MINUS the sentinel itself MINUS jobs
|
||||
whose `if:` gates on github.event_name/github.ref (event-gated jobs
|
||||
skip on PRs and a `needs:` on a skipped job would never let the
|
||||
sentinel run). If they diverge, ci-required-drift F1 fires.
|
||||
"""
|
||||
workflow = load_workflow("ci.yml")
|
||||
jobs = workflow["jobs"]
|
||||
sentinel = "all-required"
|
||||
|
||||
expected = set()
|
||||
for key, body in jobs.items():
|
||||
if key == sentinel:
|
||||
continue
|
||||
gate = body.get("if") if isinstance(body, dict) else None
|
||||
if isinstance(gate, str) and (
|
||||
"github.event_name" in gate or "github.ref" in gate
|
||||
):
|
||||
# event-gated → legitimately skips on some triggers; excluded
|
||||
# from both `needs:` and the F1 set.
|
||||
continue
|
||||
expected.add(key)
|
||||
|
||||
needs = jobs[sentinel].get("needs", [])
|
||||
if isinstance(needs, str):
|
||||
needs = [needs]
|
||||
actual = set(needs)
|
||||
|
||||
assert actual == expected, (
|
||||
f"all-required needs: {sorted(actual)} != ci_job_names() "
|
||||
f"{sorted(expected)} — ci-required-drift F1 would fire"
|
||||
)
|
||||
|
||||
|
||||
def test_all_required_needs_reference_real_jobs():
|
||||
"""F1b guard: every entry in `needs:` must name an existing job."""
|
||||
workflow = load_workflow("ci.yml")
|
||||
jobs = workflow["jobs"]
|
||||
needs = jobs["all-required"].get("needs", [])
|
||||
if isinstance(needs, str):
|
||||
needs = [needs]
|
||||
job_keys = set(jobs)
|
||||
for dep in needs:
|
||||
assert dep in job_keys, f"all-required needs unknown job {dep!r}"
|
||||
assert "--profile ci" in rendered
|
||||
assert ".gitea/scripts/detect-changes.py" in rendered
|
||||
assert "REQUIRE_PLATFORM" in rendered
|
||||
assert "REQUIRE_CANVAS" in rendered
|
||||
assert "REQUIRE_SCRIPTS" in rendered
|
||||
|
||||
@@ -1,198 +0,0 @@
|
||||
"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
|
||||
|
||||
Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
|
||||
is structurally correct. This test validates the *runtime* path: submitting an
|
||||
APPROVED review to a PR whose head contains the current gate workflows causes
|
||||
Gitea Actions to queue the qa-review + security-review workflows and POST the
|
||||
branch-protection-required (pull_request_target) contexts within a reasonable
|
||||
window.
|
||||
|
||||
Skipped when Gitea API credentials are not available. Intended for:
|
||||
- manual developer verification
|
||||
- CI jobs provisioned with a service-account token
|
||||
|
||||
Environment:
|
||||
GITEA_HOST — default: git.moleculesai.app
|
||||
GITEA_TOKEN — token with read:repository + write:issues (for review POST)
|
||||
REPO — default: molecule-ai/molecule-core
|
||||
LIVEFIRE_PR_NUMBER — optional; if omitted the test tries to find a
|
||||
suitable open PR automatically, or skips.
|
||||
LIVEFIRE_TIMEOUT_SEC — default: 120
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
import yaml
|
||||
|
||||
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
|
||||
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
|
||||
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
|
||||
LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
|
||||
LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
|
||||
|
||||
REQUIRED_CONTEXTS = [
|
||||
"qa-review / approved (pull_request_target)",
|
||||
"security-review / approved (pull_request_target)",
|
||||
]
|
||||
|
||||
skip_no_token = pytest.mark.skipif(
|
||||
not GITEA_TOKEN,
|
||||
reason="GITEA_TOKEN not set — live-fire test requires API credentials",
|
||||
)
|
||||
|
||||
|
||||
def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
|
||||
url = f"https://{GITEA_HOST}/api/v1{path}"
|
||||
headers = {
|
||||
"Authorization": f"token {GITEA_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = json.dumps(body).encode() if body else None
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
raw = resp.read()
|
||||
code = resp.status
|
||||
except urllib.error.HTTPError as exc:
|
||||
raw = exc.read()
|
||||
code = exc.code
|
||||
payload = json.loads(raw) if raw else {}
|
||||
return code, payload
|
||||
|
||||
|
||||
def _get_pr(number: int) -> dict:
|
||||
code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
|
||||
if code != 200:
|
||||
pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
|
||||
return pr
|
||||
|
||||
|
||||
def _list_open_prs() -> list[dict]:
|
||||
code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
|
||||
if code != 200:
|
||||
pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
|
||||
return prs
|
||||
|
||||
|
||||
def _pr_has_trigger_in_head(pr: dict) -> bool:
|
||||
"""Return True if the PR head contains pull_request_review in both workflows."""
|
||||
head_sha = pr["head"]["sha"]
|
||||
for wf_name in ("qa-review.yml", "security-review.yml"):
|
||||
path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
|
||||
code, payload = _api("GET", path)
|
||||
if code != 200:
|
||||
return False
|
||||
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
|
||||
wf = yaml.safe_load(raw)
|
||||
on = wf.get(True) or wf.get("on") or {}
|
||||
if isinstance(on, str):
|
||||
if on != "pull_request_review":
|
||||
return False
|
||||
elif "pull_request_review" not in on:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _find_suitable_pr() -> dict:
|
||||
if LIVEFIRE_PR_NUMBER:
|
||||
pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
|
||||
if pr.get("state") != "open":
|
||||
pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
|
||||
return pr
|
||||
|
||||
prs = _list_open_prs()
|
||||
for pr in prs:
|
||||
if _pr_has_trigger_in_head(pr):
|
||||
return pr
|
||||
pytest.skip("No open PR found whose head contains the pull_request_review trigger")
|
||||
|
||||
|
||||
def _submit_approved_review(pr_number: int) -> dict:
|
||||
code, review = _api(
|
||||
"POST",
|
||||
f"/repos/{REPO}/pulls/{pr_number}/reviews",
|
||||
{"body": "Live-fire test APPROVED review", "event": "APPROVED"},
|
||||
)
|
||||
# 200 = created, 422 = review already exists (idempotent enough for our purposes)
|
||||
if code not in (200, 201, 422):
|
||||
pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
|
||||
return review
|
||||
|
||||
|
||||
def _get_status_updated_at(sha: str) -> dict[str, str]:
|
||||
"""Return mapping context -> updated_at for required contexts on this SHA."""
|
||||
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
|
||||
if code != 200:
|
||||
return {}
|
||||
result: dict[str, str] = {}
|
||||
for st in statuses:
|
||||
ctx = st.get("context", "")
|
||||
if ctx in REQUIRED_CONTEXTS:
|
||||
result[ctx] = st.get("updated_at", st.get("created_at", ""))
|
||||
return result
|
||||
|
||||
|
||||
def _poll_fresh_statuses(
|
||||
sha: str,
|
||||
prior_updated_at: dict[str, str],
|
||||
timeout_sec: int = LIVEFIRE_TIMEOUT_SEC,
|
||||
) -> dict[str, str]:
|
||||
"""Poll until required contexts appear with updated_at fresher than prior."""
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
found: dict[str, str] = {}
|
||||
while time.monotonic() < deadline:
|
||||
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
|
||||
if code == 200:
|
||||
for st in statuses:
|
||||
ctx = st.get("context", "")
|
||||
if ctx in REQUIRED_CONTEXTS:
|
||||
updated_at = st.get("updated_at", st.get("created_at", ""))
|
||||
# Fresh if the context was absent before, OR its timestamp changed.
|
||||
if ctx not in prior_updated_at or updated_at != prior_updated_at[ctx]:
|
||||
found[ctx] = st.get("state", st.get("status", ""))
|
||||
if all(ctx in found for ctx in REQUIRED_CONTEXTS):
|
||||
return found
|
||||
time.sleep(5)
|
||||
return found
|
||||
|
||||
|
||||
@skip_no_token
|
||||
class TestGateAutoFireLive:
|
||||
def test_auto_fire_posts_required_contexts(self):
|
||||
"""Submit APPROVED review; assert BP-required contexts appear fresh within timeout."""
|
||||
pr = _find_suitable_pr()
|
||||
pr_number = pr["number"]
|
||||
head_sha = pr["head"]["sha"]
|
||||
|
||||
# Capture pre-existing status timestamps so we can prove FRESH contexts
|
||||
# were posted after the review submission (not stale from a prior run).
|
||||
prior_updated_at = _get_status_updated_at(head_sha)
|
||||
|
||||
_submit_approved_review(pr_number)
|
||||
|
||||
found = _poll_fresh_statuses(head_sha, prior_updated_at)
|
||||
|
||||
missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
|
||||
if missing:
|
||||
pytest.fail(
|
||||
f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. "
|
||||
f"Found: {found}. Prior timestamps: {prior_updated_at}. "
|
||||
f"PR #{pr_number} head={head_sha}. "
|
||||
f"This indicates the pull_request_review trigger did not fire at runtime."
|
||||
)
|
||||
|
||||
# The contexts appeared fresh — that's the proof of auto-fire.
|
||||
# We do NOT assert success vs failure; the evaluator decides that.
|
||||
# The point of #2159 is that the workflows QUEUE and POST at all.
|
||||
for ctx, state in found.items():
|
||||
assert state in ("pending", "success", "failure"), (
|
||||
f"Unexpected state {state!r} for {ctx}"
|
||||
)
|
||||
@@ -1,168 +0,0 @@
|
||||
"""Regression test #765 — gate auto-fire on real qa/security APPROVED review.
|
||||
|
||||
Validates the structural configuration of qa-review.yml and security-review.yml
|
||||
so that a real team-member APPROVED review fires the workflow and POSTs the
|
||||
exact branch-protection-required context name. This is the test #2020's
|
||||
stale-context failure would have caught.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def load_workflow(name: str) -> dict:
|
||||
with (ROOT / "workflows" / name).open() as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def _job_guard_string(workflow: dict) -> str:
|
||||
"""Return the raw job-level `if:` string for the single job."""
|
||||
jobs = workflow["jobs"]
|
||||
# Both qa-review and security-review have exactly one job named "approved".
|
||||
job = jobs["approved"]
|
||||
return str(job.get("if", ""))
|
||||
|
||||
|
||||
def _post_step(workflow: dict) -> dict:
|
||||
"""Return the explicit POST /statuses step from the job steps list."""
|
||||
jobs = workflow["jobs"]
|
||||
steps = jobs["approved"]["steps"]
|
||||
for step in steps:
|
||||
name = step.get("name", "")
|
||||
if "Post required status context" in name:
|
||||
return step
|
||||
raise AssertionError("No explicit POST status step found")
|
||||
|
||||
|
||||
class TestQaReviewDirectTrigger:
|
||||
def test_trigger_is_pull_request_review_submitted(self):
|
||||
wf = load_workflow("qa-review.yml")
|
||||
# PyYAML parses bare 'on' as boolean True.
|
||||
on = wf[True]
|
||||
assert "pull_request_review" in on, (
|
||||
"qa-review must trigger on pull_request_review"
|
||||
)
|
||||
types = on["pull_request_review"].get("types", [])
|
||||
assert "submitted" in types, (
|
||||
"pull_request_review must include 'submitted' type"
|
||||
)
|
||||
|
||||
def test_job_guard_requires_approved_state(self):
|
||||
wf = load_workflow("qa-review.yml")
|
||||
guard = _job_guard_string(wf)
|
||||
assert "github.event.review.state == 'APPROVED'" in guard, (
|
||||
"job guard must check review.state for 'APPROVED'"
|
||||
)
|
||||
assert "github.event.review.state == 'approved'" in guard, (
|
||||
"job guard must check review.state for 'approved' (case fallback per #2135)"
|
||||
)
|
||||
|
||||
def test_post_step_uses_status_post_token(self):
|
||||
wf = load_workflow("qa-review.yml")
|
||||
post = _post_step(wf)
|
||||
env = post.get("env", {})
|
||||
assert env.get("GITEA_TOKEN") == "${{ secrets.STATUS_POST_TOKEN }}", (
|
||||
"POST step must use STATUS_POST_TOKEN for write-scoped status POST"
|
||||
)
|
||||
|
||||
def test_post_step_context_name_exact(self):
|
||||
"""The context POSTed must byte-match the branch-protection requirement."""
|
||||
wf = load_workflow("qa-review.yml")
|
||||
post = _post_step(wf)
|
||||
run = post.get("run", "")
|
||||
assert '"qa-review / approved (pull_request_target)"' in run, (
|
||||
"POST step must emit exact BP-required context name"
|
||||
)
|
||||
|
||||
|
||||
class TestSecurityReviewDirectTrigger:
|
||||
def test_trigger_is_pull_request_review_submitted(self):
|
||||
wf = load_workflow("security-review.yml")
|
||||
# PyYAML parses bare 'on' as boolean True.
|
||||
on = wf[True]
|
||||
assert "pull_request_review" in on, (
|
||||
"security-review must trigger on pull_request_review"
|
||||
)
|
||||
types = on["pull_request_review"].get("types", [])
|
||||
assert "submitted" in types, (
|
||||
"pull_request_review must include 'submitted' type"
|
||||
)
|
||||
|
||||
def test_job_guard_requires_approved_state(self):
|
||||
wf = load_workflow("security-review.yml")
|
||||
guard = _job_guard_string(wf)
|
||||
assert "github.event.review.state == 'APPROVED'" in guard, (
|
||||
"job guard must check review.state for 'APPROVED'"
|
||||
)
|
||||
assert "github.event.review.state == 'approved'" in guard, (
|
||||
"job guard must check review.state for 'approved' (case fallback per #2135)"
|
||||
)
|
||||
|
||||
def test_post_step_uses_status_post_token(self):
|
||||
wf = load_workflow("security-review.yml")
|
||||
post = _post_step(wf)
|
||||
env = post.get("env", {})
|
||||
assert env.get("GITEA_TOKEN") == "${{ secrets.STATUS_POST_TOKEN }}", (
|
||||
"POST step must use STATUS_POST_TOKEN for write-scoped status POST"
|
||||
)
|
||||
|
||||
def test_post_step_context_name_exact(self):
|
||||
"""The context POSTed must byte-match the branch-protection requirement."""
|
||||
wf = load_workflow("security-review.yml")
|
||||
post = _post_step(wf)
|
||||
run = post.get("run", "")
|
||||
assert '"security-review / approved (pull_request_target)"' in run, (
|
||||
"POST step must emit exact BP-required context name"
|
||||
)
|
||||
|
||||
|
||||
class TestRefireScriptContextName:
|
||||
"""review-refire-status.sh must emit the BP-required (pull_request_target) context."""
|
||||
|
||||
def test_refire_script_context_is_pull_request_target(self):
|
||||
script = ROOT / "scripts" / "review-refire-status.sh"
|
||||
content = script.read_text()
|
||||
assert 'CONTEXT="${TEAM}-review / approved (pull_request_target)"' in content, (
|
||||
"refire script CONTEXT must be the exact BP-required (pull_request_target) variant"
|
||||
)
|
||||
assert 'approved (pull_request)"' not in content, (
|
||||
"refire script must NOT post bare (pull_request) context"
|
||||
)
|
||||
|
||||
|
||||
class TestRefireTokenSeparation:
|
||||
"""The /qa-recheck + /security-recheck backstop must also use STATUS_POST_TOKEN."""
|
||||
|
||||
def _refire_step(self, workflow_name: str, step_name_keyword: str) -> dict:
|
||||
wf = load_workflow(workflow_name)
|
||||
jobs = wf["jobs"]
|
||||
steps = jobs["review-refire"]["steps"]
|
||||
for step in steps:
|
||||
name = step.get("name", "")
|
||||
if step_name_keyword in name:
|
||||
return step
|
||||
raise AssertionError(f"No refire step matching {step_name_keyword!r}")
|
||||
|
||||
def test_qa_refire_uses_status_post_token(self):
|
||||
step = self._refire_step("sop-checklist.yml", "Refire qa-review")
|
||||
env = step.get("env", {})
|
||||
assert env.get("STATUS_POST_TOKEN") == "${{ secrets.STATUS_POST_TOKEN }}", (
|
||||
"qa refire must receive STATUS_POST_TOKEN env var"
|
||||
)
|
||||
# Evaluator stays on read token
|
||||
assert "SOP_TIER_CHECK_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
|
||||
"qa refire evaluator must stay on read-scoped token"
|
||||
)
|
||||
|
||||
def test_security_refire_uses_status_post_token(self):
|
||||
step = self._refire_step("sop-checklist.yml", "Refire security-review")
|
||||
env = step.get("env", {})
|
||||
assert env.get("STATUS_POST_TOKEN") == "${{ secrets.STATUS_POST_TOKEN }}", (
|
||||
"security refire must receive STATUS_POST_TOKEN env var"
|
||||
)
|
||||
assert "SOP_TIER_CHECK_TOKEN" in env.get("GITEA_TOKEN", "") or "GITHUB_TOKEN" in env.get("GITEA_TOKEN", ""), (
|
||||
"security refire evaluator must stay on read-scoped token"
|
||||
)
|
||||
@@ -1,145 +0,0 @@
|
||||
"""Stale-head diagnostic test for #2159.
|
||||
|
||||
Deterministically reports whether a PR's HEAD contains the pull_request_review
|
||||
trigger in qa-review.yml and security-review.yml. If the trigger is absent,
|
||||
auto-fire on APPROVED review is impossible for that PR.
|
||||
|
||||
This is used as a self-diagnostic for future stale-PR situations (PRs opened
|
||||
before #2157 merged, or branches cut from old bases).
|
||||
|
||||
Environment:
|
||||
GITEA_HOST — default: git.moleculesai.app
|
||||
GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
|
||||
REPO — default: molecule-ai/molecule-core
|
||||
PR_NUMBER — required when running against a real PR
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
import yaml
|
||||
|
||||
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
|
||||
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
|
||||
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
|
||||
PR_NUMBER = os.environ.get("PR_NUMBER", "")
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _api(method: str, path: str) -> tuple[int, dict]:
|
||||
url = f"https://{GITEA_HOST}/api/v1{path}"
|
||||
headers = {"Authorization": f"token {GITEA_TOKEN}"}
|
||||
req = urllib.request.Request(url, headers=headers, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return resp.status, json.loads(resp.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
body = exc.read()
|
||||
return exc.code, json.loads(body) if body else {}
|
||||
|
||||
|
||||
def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
|
||||
path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
|
||||
code, payload = _api("GET", path)
|
||||
if code != 200:
|
||||
pytest.fail(
|
||||
f"GET {path} returned HTTP {code}: {payload}. "
|
||||
f"Cannot determine whether PR head contains the trigger."
|
||||
)
|
||||
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
|
||||
return yaml.safe_load(raw)
|
||||
|
||||
|
||||
def _fetch_workflow_local(workflow_name: str) -> dict:
|
||||
p = ROOT / "workflows" / workflow_name
|
||||
if not p.exists():
|
||||
pytest.fail(f"Local workflow file not found: {p}")
|
||||
return yaml.safe_load(p.read_text())
|
||||
|
||||
|
||||
def _has_pull_request_review_trigger(wf: dict) -> bool:
|
||||
on = wf.get(True) or wf.get("on") or {}
|
||||
if isinstance(on, list):
|
||||
return "pull_request_review" in on
|
||||
if isinstance(on, dict):
|
||||
return "pull_request_review" in on
|
||||
if isinstance(on, str):
|
||||
return on == "pull_request_review"
|
||||
return False
|
||||
|
||||
|
||||
def _diagnose_pr(pr_number: int) -> dict[str, bool]:
|
||||
code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
|
||||
if code != 200:
|
||||
pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
|
||||
|
||||
head_ref = pr["head"]["ref"]
|
||||
head_sha = pr["head"]["sha"]
|
||||
|
||||
results: dict[str, bool] = {}
|
||||
for wf_name in ("qa-review.yml", "security-review.yml"):
|
||||
wf = _fetch_workflow_from_ref(wf_name, head_sha)
|
||||
results[wf_name] = _has_pull_request_review_trigger(wf)
|
||||
|
||||
return {
|
||||
"pr_number": pr_number,
|
||||
"head_ref": head_ref,
|
||||
"head_sha": head_sha,
|
||||
"triggers": results,
|
||||
"auto_fire_possible": all(results.values()),
|
||||
}
|
||||
|
||||
|
||||
def _diagnose_local() -> dict[str, bool]:
|
||||
results: dict[str, bool] = {}
|
||||
for wf_name in ("qa-review.yml", "security-review.yml"):
|
||||
wf = _fetch_workflow_local(wf_name)
|
||||
results[wf_name] = _has_pull_request_review_trigger(wf)
|
||||
return {
|
||||
"pr_number": None,
|
||||
"head_ref": "local-checkout",
|
||||
"head_sha": None,
|
||||
"triggers": results,
|
||||
"auto_fire_possible": all(results.values()),
|
||||
}
|
||||
|
||||
|
||||
class TestStaleHeadDiagnostic:
|
||||
"""Test deterministically reports 'auto-fire impossible for this PR' when
|
||||
the PR head lacks the pull_request_review trigger.
|
||||
"""
|
||||
|
||||
def test_local_checkout_has_pull_request_review_trigger(self):
|
||||
"""Local files (the ones in this checkout) must contain the trigger.
|
||||
|
||||
This is the baseline: if the checkout itself is stale, every PR cut
|
||||
from it will also be stale.
|
||||
"""
|
||||
diag = _diagnose_local()
|
||||
missing = [n for n, ok in diag["triggers"].items() if not ok]
|
||||
if missing:
|
||||
pytest.fail(
|
||||
f"Local checkout is missing pull_request_review trigger in: {missing}. "
|
||||
f"This branch cannot produce PRs that auto-fire."
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
|
||||
@pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
|
||||
def test_pr_head_has_pull_request_review_trigger(self):
|
||||
"""When PR_NUMBER is given, assert the PR head contains the trigger."""
|
||||
diag = _diagnose_pr(int(PR_NUMBER))
|
||||
if not diag["auto_fire_possible"]:
|
||||
missing = [n for n, ok in diag["triggers"].items() if not ok]
|
||||
pytest.fail(
|
||||
f"Auto-fire impossible for PR #{diag['pr_number']}. "
|
||||
f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
|
||||
f"Missing trigger in: {missing}. "
|
||||
f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
|
||||
)
|
||||
@@ -355,134 +355,3 @@ def test_rollout_from_plan_file_writes_partial_response_on_failure(tmp_path):
|
||||
assert response_path.read_text(encoding="utf-8").strip()
|
||||
assert '"ok": false' in response_path.read_text(encoding="utf-8")
|
||||
assert '"slug": "hongming"' in response_path.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# No-silent-skip coverage gate (internal#724)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_rollout_stragglers_flags_tenant_not_on_target():
|
||||
# b SSM-succeeded but its container is on the old tag → straggler.
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a", "b", "c"],
|
||||
[
|
||||
{"slug": "a", "verified_on_target": True},
|
||||
{"slug": "b", "verified_on_target": False, "running_image": "platform-tenant:staging-old"},
|
||||
{"slug": "c", "verified_on_target": True},
|
||||
],
|
||||
)
|
||||
assert stragglers == ["b"]
|
||||
|
||||
|
||||
def test_rollout_stragglers_flags_enumerated_tenant_with_no_result():
|
||||
# agents-team class: enumerated but no batch ever produced a row for it.
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a", "agents-team"],
|
||||
[{"slug": "a", "verified_on_target": True}],
|
||||
)
|
||||
assert stragglers == ["agents-team"]
|
||||
|
||||
|
||||
def test_rollout_stragglers_missing_key_is_backward_compatible():
|
||||
# Older CP without verified_on_target → treat as verified (no spurious fail).
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a", "b"],
|
||||
[{"slug": "a", "healthz_ok": True}, {"slug": "b", "healthz_ok": True}],
|
||||
)
|
||||
assert stragglers == []
|
||||
|
||||
|
||||
def test_rollout_stragglers_ignores_dry_run_rows():
|
||||
stragglers = prod.rollout_stragglers(
|
||||
["a"], [{"slug": "a", "ssm_status": "DryRun"}]
|
||||
)
|
||||
# dry-run row is skipped, so "a" has no verifying row → straggler.
|
||||
assert stragglers == ["a"]
|
||||
|
||||
|
||||
def test_scoped_rollout_fails_when_a_tenant_stays_on_old_tag():
|
||||
# Every per-tenant call returns ok=True, but agents-team is NOT
|
||||
# verified_on_target. The rollout must still fail loudly — this is
|
||||
# the exact "reported success, one tenant silently skipped" bug.
|
||||
def fake_redeploy(_cp_url, _token, body):
|
||||
rows = []
|
||||
for slug in body["only_slugs"]:
|
||||
rows.append({"slug": slug, "verified_on_target": slug != "agents-team"})
|
||||
return 200, {"ok": True, "results": rows}
|
||||
|
||||
try:
|
||||
prod.execute_scoped_rollout(
|
||||
{
|
||||
"cp_url": "https://api.moleculesai.app",
|
||||
"body": {
|
||||
"target_tag": "staging-new",
|
||||
"batch_size": 5,
|
||||
"dry_run": False,
|
||||
"confirm": True,
|
||||
},
|
||||
},
|
||||
token="secret",
|
||||
list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
|
||||
redeploy=fake_redeploy,
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
except prod.RolloutFailed as exc:
|
||||
assert "incomplete rollout" in str(exc)
|
||||
assert exc.response["stragglers"] == ["agents-team"]
|
||||
assert exc.response["ok"] is False
|
||||
else:
|
||||
raise AssertionError("expected an incomplete rollout to fail loudly")
|
||||
|
||||
|
||||
def test_scoped_rollout_passes_when_all_tenants_verified_on_target():
|
||||
def fake_redeploy(_cp_url, _token, body):
|
||||
return 200, {
|
||||
"ok": True,
|
||||
"results": [{"slug": s, "verified_on_target": True} for s in body["only_slugs"]],
|
||||
}
|
||||
|
||||
aggregate = prod.execute_scoped_rollout(
|
||||
{
|
||||
"cp_url": "https://api.moleculesai.app",
|
||||
"body": {
|
||||
"target_tag": "staging-new",
|
||||
"batch_size": 5,
|
||||
"dry_run": False,
|
||||
"confirm": True,
|
||||
},
|
||||
},
|
||||
token="secret",
|
||||
list_slugs=lambda _u, _t, _b: ["reno-stars", "agents-team", "hongming"],
|
||||
redeploy=fake_redeploy,
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
assert aggregate["ok"] is True
|
||||
assert "stragglers" not in aggregate
|
||||
|
||||
|
||||
def test_scoped_rollout_dry_run_does_not_assert_coverage():
|
||||
# A dry run proves nothing landed; coverage must NOT be asserted or
|
||||
# every plan would fail.
|
||||
def fake_redeploy(_cp_url, _token, body):
|
||||
return 200, {
|
||||
"ok": True,
|
||||
"results": [{"slug": s, "ssm_status": "DryRun"} for s in body["only_slugs"]],
|
||||
}
|
||||
|
||||
aggregate = prod.execute_scoped_rollout(
|
||||
{
|
||||
"cp_url": "https://api.moleculesai.app",
|
||||
"body": {
|
||||
"target_tag": "staging-new",
|
||||
"batch_size": 5,
|
||||
"dry_run": True,
|
||||
"confirm": True,
|
||||
},
|
||||
},
|
||||
token="secret",
|
||||
list_slugs=lambda _u, _t, _b: ["a", "b"],
|
||||
redeploy=fake_redeploy,
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
assert aggregate["ok"] is True
|
||||
|
||||
@@ -205,8 +205,6 @@ chmod +x "$FIXTURE_DIR/bin/curl"
|
||||
# Helper: run the script with fixture environment
|
||||
run_review_check() {
|
||||
local scenario="$1"
|
||||
local team="${2:-qa}"
|
||||
local team_id="${3:-20}"
|
||||
echo "$scenario" >"$FIX_STATE_DIR/scenario"
|
||||
local out
|
||||
set +e
|
||||
@@ -217,8 +215,8 @@ run_review_check() {
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="999" \
|
||||
DEFAULT_BRANCH="main" \
|
||||
TEAM="$team" \
|
||||
TEAM_ID="$team_id" \
|
||||
TEAM="qa" \
|
||||
TEAM_ID="20" \
|
||||
REVIEW_CHECK_DEBUG="0" \
|
||||
REVIEW_CHECK_STRICT="0" \
|
||||
bash "$SCRIPT" 2>&1
|
||||
@@ -374,25 +372,6 @@ assert_eq "T18 exit code 0 (comment approval still considered)" "0" "$T18_RC"
|
||||
assert_contains "T18 comment candidate notice" "comment-based approval" "$T18_OUT"
|
||||
assert_contains "T18 comment approver accepted" "APPROVED by core-qa-agent" "$T18_OUT"
|
||||
|
||||
# T19 — ai-sop-ack member APPROVED review must NOT count toward qa-review
|
||||
# or security-review (R1 hardening refinement, msg 1388c76f).
|
||||
echo
|
||||
echo "== T19 ai-sop-ack APPROVED review excluded from qa-review gate =="
|
||||
T19_OUT=$(run_review_check "T19_ai_sop_ack_approved" "qa" "20")
|
||||
T19_RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
assert_eq "T19 exit code 1 (ai-sop-ack not in qa team)" "1" "$T19_RC"
|
||||
assert_contains "T19 ai-reviewer excluded from qa" "candidates: ai-reviewer" "$T19_OUT"
|
||||
assert_contains "T19 none are in qa team" "none are in team" "$T19_OUT"
|
||||
|
||||
# T20 — same ai-sop-ack member must also be excluded from security-review gate.
|
||||
echo
|
||||
echo "== T20 ai-sop-ack APPROVED review excluded from security-review gate =="
|
||||
T20_OUT=$(run_review_check "T19_ai_sop_ack_approved" "security" "21")
|
||||
T20_RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
assert_eq "T20 exit code 1 (ai-sop-ack not in security team)" "1" "$T20_RC"
|
||||
assert_contains "T20 ai-reviewer excluded from security" "candidates: ai-reviewer" "$T20_OUT"
|
||||
assert_contains "T20 none are in security team" "none are in team" "$T20_OUT"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
|
||||
@@ -1003,299 +1003,3 @@ class TestComputeNaStateAcceptsGateNotInItems(unittest.TestCase):
|
||||
comments, "alice", na_gates, lambda *_: ["alice"]
|
||||
)
|
||||
self.assertFalse(na_state["security-review"]["declared"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# internal#760 ceremony — ai-sop-ack team + ai_ack_eligible per-item flag
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAIAckEligibleConfig(unittest.TestCase):
|
||||
"""CTO-controlled allowlist (msg 1388c76f):
|
||||
ai_ack_eligible: comprehensive-testing, local-postgres-e2e, staging-smoke,
|
||||
five-axis-review, memory-consulted
|
||||
human-only: root-cause, no-backwards-compat
|
||||
"""
|
||||
|
||||
def test_ai_ack_eligible_items(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
items_by_slug = {it["slug"]: it for it in cfg["items"]}
|
||||
eligible = {
|
||||
"comprehensive-testing",
|
||||
"local-postgres-e2e",
|
||||
"staging-smoke",
|
||||
"five-axis-review",
|
||||
"memory-consulted",
|
||||
}
|
||||
for slug in eligible:
|
||||
self.assertTrue(
|
||||
items_by_slug[slug].get("ai_ack_eligible"),
|
||||
f"{slug} must be ai_ack_eligible",
|
||||
)
|
||||
|
||||
def test_human_only_items(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
items_by_slug = {it["slug"]: it for it in cfg["items"]}
|
||||
human_only = {"root-cause", "no-backwards-compat"}
|
||||
for slug in human_only:
|
||||
self.assertFalse(
|
||||
items_by_slug[slug].get("ai_ack_eligible", False),
|
||||
f"{slug} must NOT be ai_ack_eligible (human-only)",
|
||||
)
|
||||
|
||||
def test_testing_class_slugs_constant(self):
|
||||
"""_TESTING_CLASS_SLUGS must match the three testing items."""
|
||||
self.assertEqual(
|
||||
sop._TESTING_CLASS_SLUGS,
|
||||
{"comprehensive-testing", "local-postgres-e2e", "staging-smoke"},
|
||||
)
|
||||
|
||||
def test_human_only_slugs_constant(self):
|
||||
"""_HUMAN_ONLY_SLUGS encodes the migration/schema carve-out.
|
||||
|
||||
If this set changes, the CTO must approve the widening.
|
||||
"""
|
||||
self.assertEqual(
|
||||
sop._HUMAN_ONLY_SLUGS,
|
||||
{"root-cause", "no-backwards-compat", "migration", "schema"},
|
||||
)
|
||||
|
||||
def test_human_only_invariant_enforced_in_code_and_config(self):
|
||||
"""Every config-present slug in _HUMAN_ONLY_SLUGS must be human-only.
|
||||
|
||||
This test fails if a migration/schema-class item accidentally
|
||||
acquires ai_ack_eligible via config drift. migration/schema are
|
||||
future-proofing slugs not yet in the live config; they are checked
|
||||
by the production probe closure but skipped here.
|
||||
"""
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
items_by_slug = {it["slug"]: it for it in cfg["items"]}
|
||||
for slug in sop._HUMAN_ONLY_SLUGS:
|
||||
if slug not in items_by_slug:
|
||||
# Future-proofing slug (e.g. migration, schema) — not yet
|
||||
# in config, but the code guard still rejects AI acks.
|
||||
continue
|
||||
self.assertFalse(
|
||||
items_by_slug[slug].get("ai_ack_eligible", False),
|
||||
f"{slug} is in _HUMAN_ONLY_SLUGS and must NEVER be ai_ack_eligible",
|
||||
)
|
||||
|
||||
|
||||
class TestAIAckEligibilityProbe(unittest.TestCase):
|
||||
"""The probe closure in main() delegates to compute_ack_state.
|
||||
We simulate the AI-ack path by injecting a probe that behaves like
|
||||
the production probe (human team first, then ai-sop-ack fallback).
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
self.items = _items_by_slug()
|
||||
self.aliases = _numeric_aliases()
|
||||
|
||||
def _probe_human_then_ai(self, human_users, ai_users):
|
||||
"""Return users in human_users immediately; users in ai_users only
|
||||
if the item is ai_ack_eligible."""
|
||||
def probe(slug, users):
|
||||
item = self.items.get(slug, {})
|
||||
approved = []
|
||||
for u in users:
|
||||
if u in human_users:
|
||||
approved.append(u)
|
||||
elif u in ai_users and item.get("ai_ack_eligible"):
|
||||
approved.append(u)
|
||||
return approved
|
||||
return probe
|
||||
|
||||
def test_ai_ack_passes_for_eligible_item(self):
|
||||
comments = [_comment("ai-bot", "/sop-ack five-axis-review")]
|
||||
probe = self._probe_human_then_ai(human_users=set(), ai_users={"ai-bot"})
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["five-axis-review"]["ackers"], ["ai-bot"])
|
||||
|
||||
def test_ai_ack_rejected_for_human_only_item(self):
|
||||
comments = [_comment("ai-bot", "/sop-ack root-cause")]
|
||||
probe = self._probe_human_then_ai(human_users=set(), ai_users={"ai-bot"})
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["root-cause"]["ackers"], [])
|
||||
self.assertIn("ai-bot", state["root-cause"]["rejected"]["not_in_team"])
|
||||
|
||||
def test_human_ack_still_works_for_ai_eligible_item(self):
|
||||
comments = [_comment("bob", "/sop-ack comprehensive-testing")]
|
||||
probe = self._probe_human_then_ai(human_users={"bob"}, ai_users=set())
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["comprehensive-testing"]["ackers"], ["bob"])
|
||||
|
||||
def test_ai_ack_rejected_for_testing_item_when_ci_red(self):
|
||||
# Simulate the production probe that checks CI status for testing items.
|
||||
# When CI is not green, ai-sop-ack member is rejected.
|
||||
def probe(slug, users):
|
||||
item = self.items.get(slug, {})
|
||||
approved = []
|
||||
for u in users:
|
||||
if u == "ai-bot" and item.get("ai_ack_eligible"):
|
||||
# Testing items require CI green; simulate CI red.
|
||||
if slug in sop._TESTING_CLASS_SLUGS:
|
||||
continue # rejected: CI not green
|
||||
approved.append(u)
|
||||
return approved
|
||||
|
||||
comments = [_comment("ai-bot", "/sop-ack comprehensive-testing")]
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["comprehensive-testing"]["ackers"], [])
|
||||
|
||||
def test_ai_ack_passes_for_testing_item_when_ci_green(self):
|
||||
# Simulate CI green → AI ack passes.
|
||||
def probe(slug, users):
|
||||
item = self.items.get(slug, {})
|
||||
approved = []
|
||||
for u in users:
|
||||
if u == "ai-bot" and item.get("ai_ack_eligible"):
|
||||
if slug in sop._TESTING_CLASS_SLUGS:
|
||||
# CI is green → allow
|
||||
pass
|
||||
approved.append(u)
|
||||
return approved
|
||||
|
||||
comments = [_comment("ai-bot", "/sop-ack comprehensive-testing")]
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["comprehensive-testing"]["ackers"], ["ai-bot"])
|
||||
|
||||
|
||||
class TestAIAckHumanOnlyMigrationSchema(unittest.TestCase):
|
||||
"""RC 8322: migration and schema items are human-only regardless of
|
||||
any future config that might accidentally mark them ai_ack_eligible.
|
||||
|
||||
These slugs are not yet in the live config items list; the tests use
|
||||
synthetic items so the production guard can be exercised directly.
|
||||
"""
|
||||
|
||||
def setUp(self):
|
||||
# Synthetic items — if live config ever adds migration/schema,
|
||||
# they MUST stay human-only. The probe below mirrors the actual
|
||||
# production closure logic (human team first, then AI fallback
|
||||
# with _HUMAN_ONLY_SLUGS guard).
|
||||
self.items = {
|
||||
"migration": {
|
||||
"slug": "migration",
|
||||
"ai_ack_eligible": True,
|
||||
"required_teams": ["engineers"],
|
||||
},
|
||||
"schema": {
|
||||
"slug": "schema",
|
||||
"ai_ack_eligible": True,
|
||||
"required_teams": ["engineers"],
|
||||
},
|
||||
}
|
||||
self.aliases = {}
|
||||
|
||||
def _production_like_probe(self, human_users, ai_users):
|
||||
"""Return a probe that mirrors the production closure's guard."""
|
||||
|
||||
def probe(slug, users):
|
||||
item = self.items.get(slug, {})
|
||||
approved = []
|
||||
for u in users:
|
||||
if u in human_users:
|
||||
approved.append(u)
|
||||
elif u in ai_users:
|
||||
# Production guard: _HUMAN_ONLY_SLUGS rejects AI acks
|
||||
# regardless of the ai_ack_eligible flag.
|
||||
if slug in sop._HUMAN_ONLY_SLUGS:
|
||||
continue
|
||||
if item.get("ai_ack_eligible"):
|
||||
approved.append(u)
|
||||
return approved
|
||||
|
||||
return probe
|
||||
|
||||
def test_ai_ack_rejected_for_migration(self):
|
||||
comments = [_comment("ai-bot", "/sop-ack migration")]
|
||||
probe = self._production_like_probe(human_users=set(), ai_users={"ai-bot"})
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["migration"]["ackers"], [])
|
||||
self.assertIn("ai-bot", state["migration"]["rejected"]["not_in_team"])
|
||||
|
||||
def test_ai_ack_rejected_for_schema(self):
|
||||
comments = [_comment("ai-bot", "/sop-ack schema")]
|
||||
probe = self._production_like_probe(human_users=set(), ai_users={"ai-bot"})
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["schema"]["ackers"], [])
|
||||
self.assertIn("ai-bot", state["schema"]["rejected"]["not_in_team"])
|
||||
|
||||
def test_human_ack_still_works_for_migration(self):
|
||||
# Human team member acking migration/schema is unaffected.
|
||||
comments = [_comment("bob", "/sop-ack migration")]
|
||||
probe = self._production_like_probe(human_users={"bob"}, ai_users=set())
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["migration"]["ackers"], ["bob"])
|
||||
|
||||
def test_human_ack_still_works_for_schema(self):
|
||||
comments = [_comment("bob", "/sop-ack schema")]
|
||||
probe = self._production_like_probe(human_users={"bob"}, ai_users=set())
|
||||
state = sop.compute_ack_state(
|
||||
comments, "alice", self.items, self.aliases, probe
|
||||
)
|
||||
self.assertEqual(state["schema"]["ackers"], ["bob"])
|
||||
|
||||
|
||||
class TestGetCIStatus(unittest.TestCase):
|
||||
"""Verify get_ci_status reads the correct context from commit statuses."""
|
||||
|
||||
def _client_with_statuses(self, statuses):
|
||||
client = sop.GiteaClient("git.example.com", "tok")
|
||||
|
||||
def fake_req(method, path, body=None, ok_codes=(200, 201, 204)):
|
||||
return 200, statuses
|
||||
|
||||
client._req = fake_req # type: ignore[method-assign]
|
||||
return client
|
||||
|
||||
def test_ci_green_returns_success(self):
|
||||
client = self._client_with_statuses([
|
||||
{"context": "CI / all-required (pull_request)", "state": "success"},
|
||||
])
|
||||
self.assertEqual(
|
||||
sop.get_ci_status(client, "o", "r", "sha1"), "success"
|
||||
)
|
||||
|
||||
def test_ci_red_returns_failure(self):
|
||||
client = self._client_with_statuses([
|
||||
{"context": "CI / all-required (pull_request)", "state": "failure"},
|
||||
])
|
||||
self.assertEqual(
|
||||
sop.get_ci_status(client, "o", "r", "sha1"), "failure"
|
||||
)
|
||||
|
||||
def test_missing_context_returns_missing(self):
|
||||
client = self._client_with_statuses([
|
||||
{"context": "some-other-context", "state": "success"},
|
||||
])
|
||||
self.assertEqual(
|
||||
sop.get_ci_status(client, "o", "r", "sha1"), "missing"
|
||||
)
|
||||
|
||||
def test_api_error_returns_unknown(self):
|
||||
client = sop.GiteaClient("git.example.com", "tok")
|
||||
|
||||
def fake_req(method, path, body=None, ok_codes=(200, 201, 204)):
|
||||
return 500, {"error": "boom"}
|
||||
|
||||
client._req = fake_req # type: ignore[method-assign]
|
||||
self.assertEqual(
|
||||
sop.get_ci_status(client, "o", "r", "sha1"), "unknown"
|
||||
)
|
||||
|
||||
@@ -32,26 +32,6 @@
|
||||
# AUTHOR SELF-ACK IS FORBIDDEN regardless of which team contains them
|
||||
# — the gate script enforces commenter != PR author before checking
|
||||
# team membership.
|
||||
#
|
||||
# AI-SOP-ACK TEAM (internal#760 ceremony design, CTO-approved):
|
||||
# The `ai-sop-ack` team contains AI agent identities that can ack
|
||||
# SOP-checklist items ON BEHALF OF automated evidence. An AI ack is
|
||||
# only valid when:
|
||||
# 1. the item has `ai_ack_eligible: true`
|
||||
# 2. the item is NOT in the human-only carve-out (migration/schema)
|
||||
# 3. for testing-class items, CI / all-required (pull_request) is
|
||||
# green on the current head SHA
|
||||
#
|
||||
# AI acks NEVER count toward qa-review or security-review gates —
|
||||
# those remain human-team-only (enforced by review-check.sh team
|
||||
# probe against TEAM_ID 20/21).
|
||||
#
|
||||
# INITIAL ai_ack_eligible allowlist (CTO-controlled, msg 1388c76f):
|
||||
# comprehensive-testing, local-postgres-e2e, staging-smoke,
|
||||
# five-axis-review, memory-consulted
|
||||
# HUMAN-ONLY carve-out:
|
||||
# root-cause, no-backwards-compat
|
||||
# Any widening requires an explicit config change reviewed by CTO.
|
||||
|
||||
version: 1
|
||||
|
||||
@@ -103,31 +83,25 @@ items:
|
||||
numeric_alias: 1
|
||||
pr_section_marker: "Comprehensive testing performed"
|
||||
required_teams: [qa, engineers]
|
||||
ai_ack_eligible: true
|
||||
description: >-
|
||||
What was tested, how, edge cases covered. Ack from any qa-team
|
||||
member (or engineers fallback while qa is small). AI ack valid
|
||||
only when CI / all-required (pull_request) is green.
|
||||
member (or engineers fallback while qa is small).
|
||||
|
||||
- slug: local-postgres-e2e
|
||||
numeric_alias: 2
|
||||
pr_section_marker: "Local-postgres E2E run"
|
||||
required_teams: [engineers]
|
||||
ai_ack_eligible: true
|
||||
description: >-
|
||||
Link to local CI artifact, or "N/A: pure-frontend change". Ack
|
||||
from any engineer who can verify the local DB test actually ran.
|
||||
AI ack valid only when CI / all-required (pull_request) is green.
|
||||
|
||||
- slug: staging-smoke
|
||||
numeric_alias: 3
|
||||
pr_section_marker: "Staging-smoke verified or pending"
|
||||
required_teams: [engineers]
|
||||
ai_ack_eligible: true
|
||||
description: >-
|
||||
Link to canary run, or "scheduled post-merge". Ack from any
|
||||
engineer (core-devops/infra-sre are members of engineers team).
|
||||
AI ack valid only when CI / all-required (pull_request) is green.
|
||||
|
||||
- slug: root-cause
|
||||
numeric_alias: 4
|
||||
@@ -146,7 +120,6 @@ items:
|
||||
numeric_alias: 5
|
||||
pr_section_marker: "Five-Axis review walked"
|
||||
required_teams: [engineers]
|
||||
ai_ack_eligible: true
|
||||
description: >-
|
||||
Correctness / readability / architecture / security / performance.
|
||||
Ack from any non-author engineer.
|
||||
@@ -167,7 +140,6 @@ items:
|
||||
numeric_alias: 7
|
||||
pr_section_marker: "Memory/saved-feedback consulted"
|
||||
required_teams: [engineers]
|
||||
ai_ack_eligible: true
|
||||
description: >-
|
||||
List of feedback memories applicable to this change. Ack from
|
||||
any engineer who has the same memory access.
|
||||
|
||||
@@ -47,25 +47,13 @@ jobs:
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
# Required-status-check contexts to evaluate at merge time.
|
||||
# Branch-aware JSON dict: keys are protected branch names,
|
||||
# values are arrays of context names that branch protection
|
||||
# requires for that branch. Mirror this against branch
|
||||
# protection (settings → branches → protected branch →
|
||||
# required checks) for each branch listed here.
|
||||
#
|
||||
# Newline-separated. Mirror this against branch protection
|
||||
# (settings → branches → protected branch → required checks).
|
||||
# Declared here rather than fetched from /branch_protections
|
||||
# because that endpoint requires admin write — sop-tier-bot is
|
||||
# read-only by design (least-privilege).
|
||||
REQUIRED_CHECKS_JSON: |
|
||||
{
|
||||
"main": [
|
||||
"CI / all-required (pull_request)",
|
||||
"E2E API Smoke Test / E2E API Smoke Test (pull_request)",
|
||||
"Handlers Postgres Integration / Handlers Postgres Integration (pull_request)"
|
||||
],
|
||||
"staging": [
|
||||
"CI / all-required (pull_request)",
|
||||
"sop-checklist / all-items-acked (pull_request)"
|
||||
]
|
||||
}
|
||||
REQUIRED_CHECKS: |
|
||||
CI / all-required (pull_request)
|
||||
E2E API Smoke Test / E2E API Smoke Test (pull_request)
|
||||
Handlers Postgres Integration / Handlers Postgres Integration (pull_request)
|
||||
run: bash .gitea/scripts/audit-force-merge.sh
|
||||
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -42,9 +42,11 @@ jobs:
|
||||
check:
|
||||
name: Migration version collision check
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 4 (RFC #219 §1): 22 days green since 2026-05-11 port.
|
||||
# mc#1982 mask removed — no surfaced defects in this lane.
|
||||
continue-on-error: false
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
# AND-set: only the Mac arm64 runner advertises macos-self-hosted.
|
||||
# See "RUNNER TARGETING" header note for why bare self-hosted is unsafe.
|
||||
runs-on: [self-hosted, macos-self-hosted]
|
||||
# ADVISORY: never blocks. See safety contract point 3. mc#1982
|
||||
# ADVISORY: never blocks. See safety contract point 3. mc#774
|
||||
# internal#418 — tracked: arm64 advisory pilot, non-gating by design.
|
||||
continue-on-error: true
|
||||
# event_name gate: functional (only meaningful on push/PR) AND keeps
|
||||
|
||||
+123
-93
@@ -106,7 +106,7 @@ jobs:
|
||||
name: Platform (Go)
|
||||
needs: changes
|
||||
runs-on: ubuntu-latest
|
||||
# mc#1982 (closed 2026-05-14): Phase 4 flip of the platform-build job.
|
||||
# mc#774 (closed 2026-05-14): Phase 4 flip of the platform-build job.
|
||||
# Phase 4 (#656) originally flipped this to continue-on-error: false based on
|
||||
# Phase-3-masked "green on main 2026-05-12". Two failure classes then surfaced:
|
||||
# (1) 4x delegation_test.go sqlmock gaps (PR #669 / #634 fix-forward, closed).
|
||||
@@ -161,7 +161,7 @@ jobs:
|
||||
echo "::group::pendinguploads exit=$pu_exit (last 100 lines)"
|
||||
tail -100 /tmp/test-pu.log
|
||||
echo "::endgroup::"
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
- if: ${{ needs.changes.outputs.platform == 'true' }}
|
||||
name: Run tests with coverage (blocking gate)
|
||||
@@ -357,14 +357,6 @@ jobs:
|
||||
name: Run E2E bash unit tests (no live infra)
|
||||
run: |
|
||||
bash tests/e2e/test_model_slug.sh
|
||||
# molecule-core#1995 (#1994 follow-on): fail-direction proof for
|
||||
# the A2A real-completion + byok-routing assertion helpers
|
||||
# (lib/completion_assert.sh). Offline (no LLM, no network): it
|
||||
# asserts an error-as-text payload FAILS the real-completion gate
|
||||
# — the exact trap the historical shape-only `"kind":"text"`
|
||||
# check missed. If a refactor weakens the gate to a shape check,
|
||||
# this step goes red on every PR.
|
||||
bash tests/e2e/test_completion_assert_unit.sh
|
||||
|
||||
- if: ${{ needs.changes.outputs.scripts == 'true' }}
|
||||
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
|
||||
@@ -392,7 +384,7 @@ jobs:
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: docker-host
|
||||
# mc#1982 root-fix: added job-level `if:` so ci-required-drift.py's
|
||||
# mc#774 root-fix: added job-level `if:` so ci-required-drift.py's
|
||||
# ci_job_names() detects this as github.ref-gated and skips it from F1.
|
||||
# The step-level exit 0 handles the "not main push" case; the job-level
|
||||
# `if:` makes the gating explicit so the drift script sees it.
|
||||
@@ -475,10 +467,10 @@ jobs:
|
||||
#
|
||||
# Emits `CI / all-required (<event>)` where <event> is the workflow trigger
|
||||
# (e.g. `CI / all-required (pull_request)`, `CI / all-required (push)`).
|
||||
# Branch protection requires the event-suffixed name —
|
||||
# Branch protection MUST be updated to require the event-suffixed name —
|
||||
# requiring `CI / all-required` (bare, no suffix) silently blocks all merges
|
||||
# because Gitea treats absent status contexts as pending (not skipped), and
|
||||
# no workflow emits the bare name. BP requires
|
||||
# no workflow emits the bare name. Fixed: BP now requires
|
||||
# `CI / all-required (pull_request)` per issue #1473.
|
||||
#
|
||||
# Closes the failure mode where status_check_contexts on molecule-core/main
|
||||
@@ -487,91 +479,129 @@ jobs:
|
||||
# red silently merged through. See internal#286 for the three concrete
|
||||
# tonight-of-2026-05-11 incidents that prompted the emergency bump.
|
||||
#
|
||||
# ── 2026-06-01 CI-scheduler-overload fix (fix/ci-scheduler-fanout) ──
|
||||
# PREVIOUS shape: a poll-gate that ran detect-changes then LOOPED on
|
||||
# `GET /commits/{sha}/statuses` every 15s for up to 40 min, occupying a
|
||||
# `ci-meta` executor slot the entire time it waited for upstream jobs.
|
||||
# With only 2 ci-meta runners, that poll-loop squatted half the lane on
|
||||
# every PR — a confirmed throughput sink in the live RCA (two concurrent
|
||||
# `JOB-all-required` containers observed pinning the lane). The polling
|
||||
# design existed only to dodge the Gitea `needs:` + `if: always()` bug,
|
||||
# where an always()-guarded sentinel could be marked skipped before
|
||||
# upstream jobs settled (leaving BP pending forever).
|
||||
# This job deliberately has no `needs:`. Gitea 1.22/act_runner can mark a
|
||||
# job-level `if: always()` + `needs:` sentinel as skipped before upstream
|
||||
# jobs settle, leaving branch protection with a permanent pending
|
||||
# `CI / all-required` context. Instead, this independent sentinel polls the
|
||||
# required commit-status contexts for this SHA and fails if any fail, skip,
|
||||
# or never emit. It runs the same path detector as `changes` and only waits
|
||||
# for path-relevant jobs; Gitea can otherwise leave needs/output-skipped
|
||||
# jobs permanently pending with "Blocked by required conditions". It runs on
|
||||
# the dedicated `ci-meta` lane so the poller does not occupy the same
|
||||
# general runner pool as the jobs it is waiting for.
|
||||
#
|
||||
# NEW shape: a plain `needs:` aggregator with NO polling loop. This is
|
||||
# safe here — and was NOT safe at the time the poller was written —
|
||||
# because every aggregated CI job now gates its real work PER-STEP
|
||||
# (`if: needs.changes.outputs.* != 'true'`) rather than at the JOB level.
|
||||
# A per-step-gated job always reaches a terminal SUCCESS (it no-ops its
|
||||
# expensive steps but the job itself still completes), so it is never
|
||||
# `skipped`. Plain `needs:` (WITHOUT `if: always()`) works correctly on
|
||||
# Gitea 1.22.6 / act_runner v0.6.1 — only `needs:` + `if: always()` is
|
||||
# broken (feedback_gitea_needs_works_only_ifalways_broken). We therefore
|
||||
# use plain `needs:` + an explicit per-need result check (NOT
|
||||
# `if: always()`); if any need fails/errors, Gitea never starts this job
|
||||
# and BP sees `CI / all-required` go red via the failed dependency
|
||||
# propagation — exactly the gate we want, with zero runner-squat.
|
||||
# canvas-deploy-reminder is intentionally NOT included in all-required.needs.
|
||||
# It is an informational main-push reminder, not a PR quality gate. Keeping
|
||||
# it in this dependency list lets a skipped reminder skip the required
|
||||
# sentinel before the `always()` guard can emit a branch-protection status.
|
||||
#
|
||||
# The `needs:` list MUST stay in lockstep with ci-required-drift.py's
|
||||
# F1 check (`ci_job_names()` = every job MINUS the sentinel MINUS jobs
|
||||
# whose `if:` gates on github.event_name/github.ref). canvas-deploy-
|
||||
# reminder is event-gated (`if: github.ref == refs/heads/{main,staging}`)
|
||||
# so it is intentionally EXCLUDED — it skips on PRs and a `needs:` on a
|
||||
# skipped job would never let the sentinel run. If a new always-running
|
||||
# CI job is added, add it here too or ci-required-drift F1 will flag it.
|
||||
#
|
||||
# Stays on the dedicated `ci-meta` lane (no docker work, so the
|
||||
# docker-host-pin lint does not apply), but now the job is sub-second:
|
||||
# it only inspects already-settled `needs.*.result` values, so it frees
|
||||
# the slot immediately instead of holding it for the whole CI duration.
|
||||
#
|
||||
needs:
|
||||
- changes
|
||||
- platform-build
|
||||
- canvas-build
|
||||
- shellcheck
|
||||
- python-lint
|
||||
continue-on-error: false
|
||||
runs-on: ci-meta
|
||||
timeout-minutes: 5
|
||||
timeout-minutes: 45
|
||||
steps:
|
||||
- name: Verify all aggregated CI jobs succeeded
|
||||
# NO polling, NO API call, NO checkout. Because this job lists the
|
||||
# aggregated jobs under `needs:` (without `if: always()`), Gitea only
|
||||
# starts it once every need has reached SUCCESS — a failed/errored
|
||||
# need short-circuits the job and propagates red to the
|
||||
# `CI / all-required` context. This explicit check is a
|
||||
# belt-and-suspenders assertion + a readable run summary; the real
|
||||
# gating is the `needs:` edge itself.
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- id: check
|
||||
env:
|
||||
CHANGES_RESULT: ${{ needs.changes.result }}
|
||||
PLATFORM_RESULT: ${{ needs.platform-build.result }}
|
||||
CANVAS_RESULT: ${{ needs.canvas-build.result }}
|
||||
SHELLCHECK_RESULT: ${{ needs.shellcheck.result }}
|
||||
PYTHON_LINT_RESULT: ${{ needs.python-lint.result }}
|
||||
PR_BASE_SHA: ${{ github.event.pull_request.base.sha }}
|
||||
PR_BASE_REF: ${{ github.event.pull_request.base.ref }}
|
||||
PUSH_BEFORE: ${{ github.event.before }}
|
||||
run: |
|
||||
python3 .gitea/scripts/detect-changes.py \
|
||||
--profile ci \
|
||||
--event-name "${{ github.event_name }}" \
|
||||
--pr-base-sha "$PR_BASE_SHA" \
|
||||
--base-ref "$PR_BASE_REF" \
|
||||
--push-before "${GITHUB_EVENT_BEFORE:-$PUSH_BEFORE}"
|
||||
- name: Wait for required CI contexts
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
API_ROOT: ${{ github.server_url }}/api/v1
|
||||
REPOSITORY: ${{ github.repository }}
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REQUIRE_PLATFORM: ${{ steps.check.outputs.platform }}
|
||||
REQUIRE_CANVAS: ${{ steps.check.outputs.canvas }}
|
||||
REQUIRE_SCRIPTS: ${{ steps.check.outputs.scripts }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
fail=0
|
||||
check() {
|
||||
name="$1"; result="$2"
|
||||
printf 'CI / %s = %s\n' "$name" "$result"
|
||||
# `success` is the only green terminal state we accept. A plain
|
||||
# `needs:` job is only started when all needs succeed, so reaching
|
||||
# this step already implies success — but assert explicitly so a
|
||||
# future `if: always()` reintroduction (which WOULD let non-success
|
||||
# through) fails loudly instead of silently passing the gate.
|
||||
if [ "$result" != "success" ]; then
|
||||
echo "::error::aggregated CI job '${name}' did not succeed (result=${result})"
|
||||
fail=1
|
||||
fi
|
||||
}
|
||||
check "Detect changes" "$CHANGES_RESULT"
|
||||
check "Platform (Go)" "$PLATFORM_RESULT"
|
||||
check "Canvas (Next.js)" "$CANVAS_RESULT"
|
||||
check "Shellcheck (E2E scripts)" "$SHELLCHECK_RESULT"
|
||||
check "Python Lint & Test" "$PYTHON_LINT_RESULT"
|
||||
if [ "$fail" -ne 0 ]; then
|
||||
echo "::error::all-required: one or more aggregated CI jobs did not succeed"
|
||||
exit 1
|
||||
fi
|
||||
echo "OK: all aggregated CI jobs succeeded — CI / all-required green."
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
token = os.environ["GITEA_TOKEN"]
|
||||
api_root = os.environ["API_ROOT"].rstrip("/")
|
||||
repo = os.environ["REPOSITORY"]
|
||||
sha = os.environ["COMMIT_SHA"]
|
||||
event = os.environ["EVENT_NAME"]
|
||||
required = [
|
||||
f"CI / Detect changes ({event})",
|
||||
f"CI / Python Lint & Test ({event})",
|
||||
]
|
||||
if os.environ.get("REQUIRE_PLATFORM") == "true":
|
||||
required.append(f"CI / Platform (Go) ({event})")
|
||||
if os.environ.get("REQUIRE_CANVAS") == "true":
|
||||
required.append(f"CI / Canvas (Next.js) ({event})")
|
||||
if os.environ.get("REQUIRE_SCRIPTS") == "true":
|
||||
required.append(f"CI / Shellcheck (E2E scripts) ({event})")
|
||||
terminal_bad = {"failure", "error"}
|
||||
deadline = time.time() + 40 * 60
|
||||
last_summary = None
|
||||
|
||||
def fetch_statuses():
|
||||
statuses = []
|
||||
for page in range(1, 6):
|
||||
url = f"{api_root}/repos/{repo}/commits/{sha}/statuses?page={page}&limit=100"
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
chunk = json.load(resp)
|
||||
if not chunk:
|
||||
break
|
||||
statuses.extend(chunk)
|
||||
latest = {}
|
||||
for item in statuses:
|
||||
ctx = item.get("context")
|
||||
if not ctx:
|
||||
continue
|
||||
prev = latest.get(ctx)
|
||||
if prev is None or (item.get("updated_at") or item.get("created_at") or "") >= (prev.get("updated_at") or prev.get("created_at") or ""):
|
||||
latest[ctx] = item
|
||||
return latest
|
||||
|
||||
while True:
|
||||
try:
|
||||
latest = fetch_statuses()
|
||||
except (TimeoutError, OSError, urllib.error.URLError) as exc:
|
||||
if time.time() >= deadline:
|
||||
print(f"FAIL: status polling did not recover before deadline: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
print(f"WARN: status poll failed, retrying: {exc}", flush=True)
|
||||
time.sleep(15)
|
||||
continue
|
||||
states = {ctx: (latest.get(ctx) or {}).get("status") or (latest.get(ctx) or {}).get("state") or "missing" for ctx in required}
|
||||
summary = ", ".join(f"{ctx}={state}" for ctx, state in states.items())
|
||||
if summary != last_summary:
|
||||
print(summary, flush=True)
|
||||
last_summary = summary
|
||||
bad = {ctx: state for ctx, state in states.items() if state in terminal_bad}
|
||||
if bad:
|
||||
print("FAIL: required CI context failed:", file=sys.stderr)
|
||||
for ctx, state in bad.items():
|
||||
desc = (latest.get(ctx) or {}).get("description") or ""
|
||||
print(f" - {ctx}: {state} {desc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
if all(state == "success" for state in states.values()):
|
||||
print(f"OK: all {len(required)} required CI contexts succeeded")
|
||||
sys.exit(0)
|
||||
if time.time() >= deadline:
|
||||
print("FAIL: timed out waiting for required CI contexts:", file=sys.stderr)
|
||||
for ctx, state in states.items():
|
||||
print(f" - {ctx}: {state}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
time.sleep(15)
|
||||
PY
|
||||
|
||||
@@ -102,7 +102,7 @@ jobs:
|
||||
name: Synthetic E2E against staging
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# Bumped from 12 → 20 (2026-05-04). Tenant user-data install phase
|
||||
# (apt-get update + install docker.io/jq/awscli/caddy + snap install
|
||||
@@ -166,10 +166,6 @@ jobs:
|
||||
# canary path. The script picks the right blob shape based on
|
||||
# which key is non-empty.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
# google-adk canary path — AI-Studio key (config model
|
||||
# google_genai:gemini-2.5-pro). PROD disallows API keys (Vertex+ADC);
|
||||
# the keyed path is CI-only. Dispatch with E2E_RUNTIME=google-adk.
|
||||
E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -221,10 +217,6 @@ jobs:
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
google-adk)
|
||||
required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
|
||||
required_secret_value="${E2E_GOOGLE_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
|
||||
@@ -123,9 +123,8 @@ jobs:
|
||||
# integration). See internal#512 for the class defect.
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
api: ${{ steps.decide.outputs.api }}
|
||||
steps:
|
||||
@@ -161,9 +160,8 @@ jobs:
|
||||
# detect-changes for the full rationale.
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
# Unique per-run container names so concurrent runs on the host-
|
||||
|
||||
@@ -48,7 +48,7 @@ jobs:
|
||||
# defect.
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
chat: ${{ steps.decide.outputs.chat }}
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
# Must land on operator-host Linux (docker-host).
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
detect-changes:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
canvas: ${{ steps.decide.outputs.canvas }}
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
name: Canvas tabs E2E
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 40
|
||||
|
||||
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
name: E2E Staging External Runtime
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
|
||||
|
||||
@@ -49,7 +49,6 @@ on:
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -62,7 +61,6 @@ on:
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -94,20 +92,20 @@ jobs:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
- name: YAML validation (best-effort)
|
||||
run: |
|
||||
echo "e2e-staging-saas.yml — PR validation: workflow YAML is valid."
|
||||
echo "E2E step runs only when provisioning-critical files change."
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
# Actual E2E: runs on trunk pushes and PRs that touch provisioning-critical
|
||||
@@ -118,7 +116,7 @@ jobs:
|
||||
name: E2E Staging SaaS
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
@@ -157,18 +155,13 @@ jobs:
|
||||
# E2E_RUNTIME=hermes or =codex via workflow_dispatch can still
|
||||
# exercise the OpenAI path.
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
# google-adk (operator-dispatched only) auths Gemini with an
|
||||
# AI-Studio key. Org policy disallows API keys in PROD (Vertex+ADC
|
||||
# there); CI uses the keyed AI-Studio path with config model
|
||||
# google_genai:gemini-2.5-pro. Vertex remains the supported prod path.
|
||||
E2E_GOOGLE_API_KEY: ${{ secrets.MOLECULE_STAGING_GOOGLE_API_KEY }}
|
||||
E2E_RUNTIME: ${{ github.event.inputs.runtime || 'claude-code' }}
|
||||
# Pin the model when running on the default claude-code path —
|
||||
# the per-runtime default ("sonnet") routes to direct Anthropic
|
||||
# and defeats the cost saving. Operators can override via the
|
||||
# workflow_dispatch flow (no input wired here yet — runtime
|
||||
# override is enough for ad-hoc).
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2' }}
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || 'MiniMax-M2' }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
@@ -217,10 +210,6 @@ jobs:
|
||||
required_secret_name="MOLECULE_STAGING_OPENAI_API_KEY"
|
||||
required_secret_value="${E2E_OPENAI_API_KEY:-}"
|
||||
;;
|
||||
google-adk)
|
||||
required_secret_name="MOLECULE_STAGING_GOOGLE_API_KEY"
|
||||
required_secret_value="${E2E_GOOGLE_API_KEY:-}"
|
||||
;;
|
||||
*)
|
||||
echo "::warning::Unknown E2E_RUNTIME='${E2E_RUNTIME}' — skipping LLM-key check"
|
||||
required_secret_name=""
|
||||
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
name: Intentional-failure teardown sanity
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 20
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
# bp-exempt: PR advisory bot; merge blocking is enforced by CI status and branch protection.
|
||||
gate-check:
|
||||
runs-on: ubuntu-latest
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # Never block on our own detector failing
|
||||
steps:
|
||||
- name: Check out BASE ref (never PR-head under pull_request_target)
|
||||
|
||||
@@ -87,10 +87,9 @@ jobs:
|
||||
# both jobs on the same label avoids workspace-volume cross-host
|
||||
# surprises and keeps the routing rule discoverable in one place.
|
||||
runs-on: docker-host
|
||||
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
handlers: ${{ steps.filter.outputs.handlers }}
|
||||
steps:
|
||||
@@ -119,10 +118,9 @@ jobs:
|
||||
# mc#1529 §1: must run on operator-host (where `molecule-core-net`
|
||||
# exists). See detect-changes for the full routing rationale.
|
||||
runs-on: docker-host
|
||||
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#774 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
# Unique name per run so concurrent jobs don't collide on the
|
||||
# bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
|
||||
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
# of mc#1543; see internal#512 for class defect.
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
run: ${{ steps.decide.outputs.run }}
|
||||
@@ -172,7 +172,7 @@ jobs:
|
||||
# beta containers. Must run on operator-host Linux (docker-host).
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
name: lint-bp-context-emit-match
|
||||
|
||||
# Tier 2f scheduled lint (per mc#1982) — detects drift between
|
||||
# Tier 2f scheduled lint (per mc#774) — detects drift between
|
||||
# `branch_protections/<branch>.status_check_contexts` and the set of
|
||||
# contexts emitted by `.gitea/workflows/*.yml`.
|
||||
#
|
||||
@@ -60,7 +60,7 @@ name: lint-bp-context-emit-match
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - mc#1982 (the RFC that specs this lint)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - internal#349 (cross-repo BP sweep)
|
||||
# - feedback_phantom_required_check_after_gitea_migration
|
||||
# - feedback_tier_label_ids_are_per_repo
|
||||
@@ -91,10 +91,10 @@ jobs:
|
||||
name: lint-bp-context-emit-match
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
# Phase 4 (RFC #219 §1): 22 days green since 2026-05-11 port,
|
||||
# well past the 7-clean-run threshold. Scheduled failure is now
|
||||
# a hard CI signal.
|
||||
continue-on-error: false
|
||||
# Phase 3 (RFC #219 §1): surface drift without blocking. After 7
|
||||
# clean scheduled runs on main, flip to false so a scheduled
|
||||
# failure is a hard CI signal.
|
||||
continue-on-error: true # mc#774 Phase 3 — flip to false after 7 clean main runs
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
name: lint-continue-on-error-tracking
|
||||
|
||||
# Tier 2e hard-gate lint (per mc#1982) — every
|
||||
# Tier 2e hard-gate lint (per mc#774) — every
|
||||
# `continue-on-error: true` in `.gitea/workflows/*.yml` must carry a
|
||||
# `# mc#NNNN` or `# internal#NNNN` tracker comment within 2 lines,
|
||||
# the referenced issue must be OPEN, and ≤14 days old.
|
||||
@@ -8,7 +8,7 @@ name: lint-continue-on-error-tracking
|
||||
# Why this exists
|
||||
# ---------------
|
||||
# `continue-on-error: true` on `platform-build` had been hiding
|
||||
# mc#1982-class regressions for ~3 weeks before #656 surfaced them on
|
||||
# mc#774-class regressions for ~3 weeks before #656 surfaced them on
|
||||
# 2026-05-12. A 14-day cap on tracker age forces a review cycle and
|
||||
# surfaces mask-drift within at most 14 days of the original defect.
|
||||
# Each `continue-on-error: true` gets a paper trail — close or renew.
|
||||
@@ -45,12 +45,12 @@ name: lint-continue-on-error-tracking
|
||||
# close-and-flip, or document the deliberate keep-mask in a fresh
|
||||
# 14-day-renewable tracker. After main is clean for 3 days,
|
||||
# follow-up PR flips this workflow's continue-on-error to false.
|
||||
# Tracking: mc#1982.
|
||||
# Tracking: mc#774.
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - mc#1982 (the RFC that specs this lint)
|
||||
# - mc#1982 (the empirical masked-3-weeks case)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - mc#774 (the empirical masked-3-weeks case)
|
||||
# - feedback_chained_defects_in_never_tested_workflows
|
||||
# - feedback_behavior_based_ast_gates
|
||||
# - feedback_strict_root_only_after_class_a
|
||||
@@ -97,9 +97,9 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface masked defects without blocking
|
||||
# PRs. Pre-existing continue-on-error: true directives on main
|
||||
# all violate this lint at first — intentional. Flip to false
|
||||
# follow-up after main is clean for 3 days. mc#1982.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#1982 Phase 3 mask — 14d forced-renewal cadence
|
||||
# follow-up after main is clean for 3 days. mc#774.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#774 Phase 3 mask — 14d forced-renewal cadence
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
|
||||
@@ -48,9 +48,11 @@ jobs:
|
||||
scan:
|
||||
name: Scan workflows for curl status-capture pollution
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 4 (RFC #219 §1): 22 days green since 2026-05-11 port.
|
||||
# mc#1982 mask removed — no surfaced defects in this lane.
|
||||
continue-on-error: false
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- name: Find curl ... -w '%{http_code}' ... || echo "000" subshells
|
||||
|
||||
@@ -25,21 +25,6 @@ name: Lint forbidden tenant-env keys
|
||||
# feedback_path_filtered_workflow_cant_be_required). The scan itself
|
||||
# targets workspace_secrets-writer paths via grep -r; it's fast
|
||||
# (sub-second) so unconditional run is fine.
|
||||
#
|
||||
# ── 2026-06-01 CI-scheduler-fanout consolidation (fix/ci-scheduler-fanout) ──
|
||||
# The RFC#523 sibling lint formerly in its own file
|
||||
# `lint-no-tenant-gitea-token.yml` (the broader "no repo-host token into
|
||||
# any tenant-writer surface" scan) is now a SECOND job in THIS workflow
|
||||
# (`scan-tenant-token-write`). Both are sub-second Go-source greps that
|
||||
# fired as two separate workflow runs on every PR — pure scheduler
|
||||
# fan-out. Folding the sibling in here drops one workflow run + one
|
||||
# checkout per PR while keeping BOTH scans firing unconditionally on
|
||||
# every PR (the no-paths discipline above is preserved — neither job is
|
||||
# paths-filtered). The moved job keeps its exact `name:` so its emitted
|
||||
# status context is unchanged in substance; its `# bp-exempt:` directive
|
||||
# moves with it (Tier 2g). The old `Lint no tenant GITEA or GITHUB token
|
||||
# write / …` context is retired (a disappearing context needs no
|
||||
# directive; only NEW emitters do).
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -181,126 +166,3 @@ jobs:
|
||||
fi
|
||||
|
||||
echo "OK No forbidden operator-scope env key names hardcoded in writer paths."
|
||||
|
||||
# bp-exempt: advisory RFC#523 lint; PR review gate is review-driven, not BP-driven.
|
||||
# (Carried with the workflow-name rename in PR mc#1593 so the renamed
|
||||
# context emission satisfies lint_required_context_exists_in_bp Tier 2g.)
|
||||
scan-tenant-token-write:
|
||||
name: Scan for repo-host token write into tenant workspace surface
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Find Go files referencing a tenant-writer surface AND a repo-host token
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Repo-host token NAMES — the threat-model subset. Operator-fleet
|
||||
# tokens (CP_ADMIN_API_TOKEN, RAILWAY_TOKEN, INFISICAL_*) are
|
||||
# caught by lint-forbidden-env-keys.yml's broader deny set; this
|
||||
# lint focuses on the git-host class so a single co-occurrence
|
||||
# match has a low false-positive rate.
|
||||
FORBIDDEN_KEYS=(
|
||||
"GITEA_TOKEN"
|
||||
"GITEA_PAT"
|
||||
"GITHUB_TOKEN"
|
||||
"GITHUB_PAT"
|
||||
"GH_TOKEN"
|
||||
)
|
||||
|
||||
# Tenant-writer surface markers. A file matches the surface set
|
||||
# if it references ANY of these strings. This is the "is this
|
||||
# code path writing into a tenant workspace?" heuristic.
|
||||
# Curated to catch the actual code shapes used in this repo
|
||||
# (verified by grep against current main 2026-05-19):
|
||||
# - "workspace_secrets" / "global_secrets" → DB table writes
|
||||
# - "seedAllowList" → CP-side seed table
|
||||
# - "/settings/secrets" → tenant HTTP API write
|
||||
# - "envVars[" → in-memory env map write
|
||||
# - "containerEnv" → docker-run env-set
|
||||
# - "userData" → EC2 user-data script
|
||||
# - "provisionPayload" / "provisionContext" → provision-request shape
|
||||
SURFACE_PATTERN='workspace_secrets|global_secrets|seedAllowList|/settings/secrets|envVars\[|containerEnv|userData|provisionPayload|provisionContext'
|
||||
|
||||
# Files that legitimately reference these names AND a surface
|
||||
# marker, but do so for guard / strip / test / doc-comment
|
||||
# reasons. New entries require reviewer signoff and a one-line
|
||||
# justification in the diff.
|
||||
EXEMPT_FILES=(
|
||||
# RFC#523 L1 deny-set source-of-truth + tests
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env.go"
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env_test.go"
|
||||
# Forensic-#145 silent-strip denylist (defense-in-depth, by design lists the names)
|
||||
"workspace-server/internal/provisioner/provisioner.go"
|
||||
"workspace-server/internal/provisioner/provisioner_test.go"
|
||||
# Pre-RFC#523 persona-fallback / org-helper paths. The L1
|
||||
# fail-closed runs BEFORE these writers; downstream silent-strip
|
||||
# also covers them. See applyAgentGitHTTPCreds doc-comment.
|
||||
"workspace-server/internal/handlers/agent_git_identity.go"
|
||||
"workspace-server/internal/handlers/org_helpers.go"
|
||||
"workspace-server/internal/handlers/org.go"
|
||||
# CP→platform admin auth (NOT a tenant env write).
|
||||
"workspace-server/internal/provisioner/cp_provisioner.go"
|
||||
)
|
||||
|
||||
# Build an extended-regex alternation of forbidden keys.
|
||||
KEY_ALT="$(IFS='|'; echo "${FORBIDDEN_KEYS[*]}")"
|
||||
|
||||
# Find candidate files: Go non-test sources that contain a
|
||||
# tenant-writer surface marker.
|
||||
mapfile -t CANDIDATES < <(
|
||||
grep -rlE --include='*.go' --exclude='*_test.go' \
|
||||
"${SURFACE_PATTERN}" . 2>/dev/null \
|
||||
| sed 's|^\./||' \
|
||||
| sort -u
|
||||
)
|
||||
|
||||
if [ "${#CANDIDATES[@]}" -eq 0 ]; then
|
||||
echo "OK No tenant-writer-surface files found in tree (unexpected, but not a lint failure)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
HITS=""
|
||||
for f in "${CANDIDATES[@]}"; do
|
||||
# Skip exempt files.
|
||||
skip=0
|
||||
for ex in "${EXEMPT_FILES[@]}"; do
|
||||
if [ "$f" = "$ex" ]; then skip=1; break; fi
|
||||
done
|
||||
[ "$skip" = "1" ] && continue
|
||||
|
||||
# File contains a surface marker; now grep for a forbidden
|
||||
# key NAME. We require a QUOTED-literal match to avoid
|
||||
# firing on a comment like "// also handle GITEA_TOKEN".
|
||||
#
|
||||
# The literal form catches:
|
||||
# - os.Getenv("GITEA_TOKEN")
|
||||
# - envVars["GITEA_TOKEN"] = ...
|
||||
# - {envKey: "GITEA_TOKEN", tenantKey: "GITEA_TOKEN"}
|
||||
# but not:
|
||||
# - // see GITEA_TOKEN below (no quotes)
|
||||
found=$(grep -nE "\"(${KEY_ALT})\"" "$f" 2>/dev/null || true)
|
||||
if [ -n "$found" ]; then
|
||||
HITS="${HITS}--- ${f} ---\n${found}\n"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$HITS" ]; then
|
||||
echo "::error::Task #146 lint: repo-host token name(s) quoted in a tenant-writer-surface file:"
|
||||
printf "$HITS"
|
||||
echo ""
|
||||
echo "These files reference a tenant-writer surface (workspace_secrets,"
|
||||
echo "seedAllowList, /settings/secrets, containerEnv, userData, etc.)"
|
||||
echo "AND quote a repo-host token name (GITEA_TOKEN/GITHUB_TOKEN/…)."
|
||||
echo "Per RFC#523 threat model, tenant workspaces MUST NOT receive"
|
||||
echo "operator-scope repo-host tokens. If your code legitimately needs"
|
||||
echo "to reference one of these names in a tenant-writer file (e.g."
|
||||
echo "a deny-set definition or silent-strip list), add the file to"
|
||||
echo "EXEMPT_FILES with a one-line justification — reviewer signoff"
|
||||
echo "required."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "OK No tenant-writer-surface file co-mentions a repo-host token literal."
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
name: lint-mask-pr-atomicity
|
||||
|
||||
# Tier 2d hard-gate lint (per mc#1982) — blocks PRs that touch
|
||||
# Tier 2d hard-gate lint (per mc#774) — blocks PRs that touch
|
||||
# `.gitea/workflows/ci.yml` and modify ONLY ONE of {continue-on-error,
|
||||
# all-required.sentinel.needs} without a `Paired: #NNN` reference in
|
||||
# the PR body or in a commit message.
|
||||
@@ -37,13 +37,13 @@ name: lint-mask-pr-atomicity
|
||||
# This workflow lands at `continue-on-error: true` (Phase 3 — surface
|
||||
# regressions without blocking PRs while the rule beds in).
|
||||
# Follow-up PR flips to `false` once we have ≥3 days of clean runs on
|
||||
# `main` and no false-positives. Tracking issue: mc#1982.
|
||||
# `main` and no false-positives. Tracking issue: mc#774.
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - mc#1982 (the RFC that specs this lint)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - PR#665 / PR#668 (the empirical split-pair)
|
||||
# - mc#1982 (the main-red incident the split caused)
|
||||
# - mc#774 (the main-red incident the split caused)
|
||||
# - feedback_strict_root_only_after_class_a
|
||||
# - feedback_behavior_based_ast_gates
|
||||
#
|
||||
@@ -92,8 +92,8 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken shapes without blocking
|
||||
# PRs. Follow-up PR flips this to `false` once recent runs on main
|
||||
# are confirmed clean (eat-our-own-dogfood discipline mirrors
|
||||
# PR#673's same-shape comment). Tracking: mc#1982.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# PR#673's same-shape comment). Tracking: mc#774.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Check out PR head with full history (need base SHA blobs)
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
name: Lint no tenant GITEA or GITHUB token write
|
||||
|
||||
# Task #146 — CI guardrail companion to RFC#523's `lint-forbidden-env-keys.yml`.
|
||||
#
|
||||
# `lint-forbidden-env-keys.yml` (Layer 3) catches code that hardcodes a
|
||||
# forbidden env-var key NAME as a quoted literal in workspace_secrets
|
||||
# writer paths under workspace-server/internal/.
|
||||
#
|
||||
# This workflow catches a BROADER class: any code path that reads a
|
||||
# repo-host token (GITEA_TOKEN / GITHUB_TOKEN / GH_TOKEN) and then writes
|
||||
# it into a TENANT WORKSPACE's env, secret store, user-data, or
|
||||
# provision payload. This is the actual RFC#523 threat-model statement —
|
||||
# the goal is "no tenant workspace ever receives an operator-scope repo
|
||||
# token," not just "no _quoted_ literal `GITEA_TOKEN`." A future writer
|
||||
# could route the value via a variable, a struct field, or a config key
|
||||
# and slip past the existing literal scan; this lint catches those
|
||||
# routing patterns at PR review time.
|
||||
#
|
||||
# Scope
|
||||
# Scans the WHOLE repo's Go sources (not just workspace-server/) for
|
||||
# co-occurrences of:
|
||||
# - a repo-host token NAME (GITEA_TOKEN / GITHUB_TOKEN / GH_TOKEN /
|
||||
# GITEA_PAT / GITHUB_PAT) used as os.Getenv argument or string
|
||||
# literal
|
||||
# - within a file that ALSO references a tenant-writer surface
|
||||
# (`tenant`, `workspace_secrets`, `global_secrets`, `seedAllowList`,
|
||||
# `/settings/secrets`, `userData`, `provisionPayload`,
|
||||
# `envVars[`, `containerEnv`).
|
||||
#
|
||||
# Co-occurrence (not single-line) is the false-positive control: a
|
||||
# file that just LOGS the variable name (e.g. "missing GITEA_TOKEN")
|
||||
# without touching any tenant surface won't fire.
|
||||
#
|
||||
# Drift contract with lint-forbidden-env-keys.yml
|
||||
# Both lints share the same FORBIDDEN_KEYS list (a subset — only the
|
||||
# repo-host tokens, since this lint's threat model is "tenant gets
|
||||
# write access to operator's git host"). If RFC#523's deny set grows,
|
||||
# update BOTH this file AND lint-forbidden-env-keys.yml AND the Go
|
||||
# source-of-truth in
|
||||
# workspace-server/internal/handlers/workspace_provision_forbidden_env.go.
|
||||
#
|
||||
# Open-source-template-friendly
|
||||
# The patterns scanned are generic (no MOLECULE_-prefix literals).
|
||||
# A fork can copy this workflow as-is and adjust FORBIDDEN_KEYS.
|
||||
#
|
||||
# Path-filter discipline
|
||||
# No `paths:` filter — required-status workflows must run on every PR
|
||||
# per `feedback_path_filtered_workflow_cant_be_required`. Scan is
|
||||
# sub-second.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
push:
|
||||
branches: [main, staging]
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory RFC#523 lint; PR review gate is review-driven, not BP-driven.
|
||||
# (Carried with the workflow-name rename in PR mc#1593 so the renamed
|
||||
# context emission satisfies lint_required_context_exists_in_bp Tier 2g.)
|
||||
scan:
|
||||
name: Scan for repo-host token write into tenant workspace surface
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Find Go files referencing a tenant-writer surface AND a repo-host token
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Repo-host token NAMES — the threat-model subset. Operator-fleet
|
||||
# tokens (CP_ADMIN_API_TOKEN, RAILWAY_TOKEN, INFISICAL_*) are
|
||||
# caught by lint-forbidden-env-keys.yml's broader deny set; this
|
||||
# lint focuses on the git-host class so a single co-occurrence
|
||||
# match has a low false-positive rate.
|
||||
FORBIDDEN_KEYS=(
|
||||
"GITEA_TOKEN"
|
||||
"GITEA_PAT"
|
||||
"GITHUB_TOKEN"
|
||||
"GITHUB_PAT"
|
||||
"GH_TOKEN"
|
||||
)
|
||||
|
||||
# Tenant-writer surface markers. A file matches the surface set
|
||||
# if it references ANY of these strings. This is the "is this
|
||||
# code path writing into a tenant workspace?" heuristic.
|
||||
# Curated to catch the actual code shapes used in this repo
|
||||
# (verified by grep against current main 2026-05-19):
|
||||
# - "workspace_secrets" / "global_secrets" → DB table writes
|
||||
# - "seedAllowList" → CP-side seed table
|
||||
# - "/settings/secrets" → tenant HTTP API write
|
||||
# - "envVars[" → in-memory env map write
|
||||
# - "containerEnv" → docker-run env-set
|
||||
# - "userData" → EC2 user-data script
|
||||
# - "provisionPayload" / "provisionContext" → provision-request shape
|
||||
SURFACE_PATTERN='workspace_secrets|global_secrets|seedAllowList|/settings/secrets|envVars\[|containerEnv|userData|provisionPayload|provisionContext'
|
||||
|
||||
# Files that legitimately reference these names AND a surface
|
||||
# marker, but do so for guard / strip / test / doc-comment
|
||||
# reasons. New entries require reviewer signoff and a one-line
|
||||
# justification in the diff.
|
||||
EXEMPT_FILES=(
|
||||
# RFC#523 L1 deny-set source-of-truth + tests
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env.go"
|
||||
"workspace-server/internal/handlers/workspace_provision_forbidden_env_test.go"
|
||||
# Forensic-#145 silent-strip denylist (defense-in-depth, by design lists the names)
|
||||
"workspace-server/internal/provisioner/provisioner.go"
|
||||
"workspace-server/internal/provisioner/provisioner_test.go"
|
||||
# Pre-RFC#523 persona-fallback / org-helper paths. The L1
|
||||
# fail-closed runs BEFORE these writers; downstream silent-strip
|
||||
# also covers them. See applyAgentGitHTTPCreds doc-comment.
|
||||
"workspace-server/internal/handlers/agent_git_identity.go"
|
||||
"workspace-server/internal/handlers/org_helpers.go"
|
||||
"workspace-server/internal/handlers/org.go"
|
||||
# CP→platform admin auth (NOT a tenant env write).
|
||||
"workspace-server/internal/provisioner/cp_provisioner.go"
|
||||
)
|
||||
|
||||
# Build an extended-regex alternation of forbidden keys.
|
||||
KEY_ALT="$(IFS='|'; echo "${FORBIDDEN_KEYS[*]}")"
|
||||
|
||||
# Find candidate files: Go non-test sources that contain a
|
||||
# tenant-writer surface marker.
|
||||
mapfile -t CANDIDATES < <(
|
||||
grep -rlE --include='*.go' --exclude='*_test.go' \
|
||||
"${SURFACE_PATTERN}" . 2>/dev/null \
|
||||
| sed 's|^\./||' \
|
||||
| sort -u
|
||||
)
|
||||
|
||||
if [ "${#CANDIDATES[@]}" -eq 0 ]; then
|
||||
echo "OK No tenant-writer-surface files found in tree (unexpected, but not a lint failure)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
HITS=""
|
||||
for f in "${CANDIDATES[@]}"; do
|
||||
# Skip exempt files.
|
||||
skip=0
|
||||
for ex in "${EXEMPT_FILES[@]}"; do
|
||||
if [ "$f" = "$ex" ]; then skip=1; break; fi
|
||||
done
|
||||
[ "$skip" = "1" ] && continue
|
||||
|
||||
# File contains a surface marker; now grep for a forbidden
|
||||
# key NAME. We require a QUOTED-literal match to avoid
|
||||
# firing on a comment like "// also handle GITEA_TOKEN".
|
||||
#
|
||||
# The literal form catches:
|
||||
# - os.Getenv("GITEA_TOKEN")
|
||||
# - envVars["GITEA_TOKEN"] = ...
|
||||
# - {envKey: "GITEA_TOKEN", tenantKey: "GITEA_TOKEN"}
|
||||
# but not:
|
||||
# - // see GITEA_TOKEN below (no quotes)
|
||||
found=$(grep -nE "\"(${KEY_ALT})\"" "$f" 2>/dev/null || true)
|
||||
if [ -n "$found" ]; then
|
||||
HITS="${HITS}--- ${f} ---\n${found}\n"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -n "$HITS" ]; then
|
||||
echo "::error::Task #146 lint: repo-host token name(s) quoted in a tenant-writer-surface file:"
|
||||
printf "$HITS"
|
||||
echo ""
|
||||
echo "These files reference a tenant-writer surface (workspace_secrets,"
|
||||
echo "seedAllowList, /settings/secrets, containerEnv, userData, etc.)"
|
||||
echo "AND quote a repo-host token name (GITEA_TOKEN/GITHUB_TOKEN/…)."
|
||||
echo "Per RFC#523 threat model, tenant workspaces MUST NOT receive"
|
||||
echo "operator-scope repo-host tokens. If your code legitimately needs"
|
||||
echo "to reference one of these names in a tenant-writer file (e.g."
|
||||
echo "a deny-set definition or silent-strip list), add the file to"
|
||||
echo "EXEMPT_FILES with a one-line justification — reviewer signoff"
|
||||
echo "required."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "OK No tenant-writer-surface file co-mentions a repo-host token literal."
|
||||
@@ -4,7 +4,7 @@ name: Lint pre-flip continue-on-error
|
||||
# on any job in `.gitea/workflows/*.yml` WITHOUT proof that the affected
|
||||
# job's recent runs on the target branch (PR base) are actually green.
|
||||
#
|
||||
# Empirical class: PR #656 / mc#1982. PR #656 (RFC internal#219 Phase 4)
|
||||
# Empirical class: PR #656 / mc#774. PR #656 (RFC internal#219 Phase 4)
|
||||
# flipped 5 platform-build-class jobs `continue-on-error: true → false`
|
||||
# on the basis of a "verified green on main via combined-status check".
|
||||
# But that "green" was the LIE the prior `continue-on-error: true`
|
||||
@@ -13,7 +13,7 @@ name: Lint pre-flip continue-on-error
|
||||
# job-level status. The precondition the PR claimed to verify was
|
||||
# structurally fooled by the bug being flipped.
|
||||
#
|
||||
# mc#1982 captured the surfaced defects (2 mutually-masked regressions):
|
||||
# mc#774 captured the surfaced defects (2 mutually-masked regressions):
|
||||
# - Class 1: sqlmock helper drift since 2f36bb9a (24 days old)
|
||||
# - Class 2: OFFSEC-001 contract collision since 7d1a189f (1 day old)
|
||||
#
|
||||
@@ -55,7 +55,7 @@ name: Lint pre-flip continue-on-error
|
||||
# - YAML parse error in one of the workflow files: warn-only,
|
||||
# don't block — the YAML lint workflows catch this separately.
|
||||
#
|
||||
# Cross-links: PR#656, mc#1982, PR#665 (interim re-mask),
|
||||
# Cross-links: PR#656, mc#774, PR#665 (interim re-mask),
|
||||
# Quirk #10 (internal#342 + dup #287), hongming-pc2 charter
|
||||
# §SOP-N rule (e), feedback_strict_root_only_after_class_a,
|
||||
# feedback_no_shared_persona_token_use.
|
||||
@@ -99,8 +99,8 @@ jobs:
|
||||
timeout-minutes: 8
|
||||
# Phase 3 (RFC internal#219 §1): surface broken flips without blocking
|
||||
# the PR yet. Follow-up flips this to `false` once the workflow itself
|
||||
# has clean recent runs on main. mc#1982 interim — remove when CoE→false.
|
||||
continue-on-error: true # mc#1982
|
||||
# has clean recent runs on main. mc#774 interim — remove when CoE→false.
|
||||
continue-on-error: true # mc#774
|
||||
steps:
|
||||
- name: Check out PR head (full history for base-SHA access)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
name: lint-required-context-exists-in-bp
|
||||
|
||||
# Tier 2g hard-gate lint (per mc#1982) — diff-based PR-time
|
||||
# Tier 2g hard-gate lint (per mc#774) — diff-based PR-time
|
||||
# check. When a PR adds a NEW commit-status emission (workflow YAML
|
||||
# `name:` + job `name:`-or-key + on:-event), the workflow file must
|
||||
# carry one of three directives adjacent to the new job:
|
||||
@@ -16,7 +16,7 @@ name: lint-required-context-exists-in-bp
|
||||
# PR#656 added `CI / all-required (pull_request)` as a sentinel
|
||||
# context that workflows emit, but BP did NOT list it. When
|
||||
# platform-build failed, all-required failed, but BP let the PR
|
||||
# merge anyway → cascade to mc#1982. With this lint, PR#656 would
|
||||
# merge anyway → cascade to mc#774. With this lint, PR#656 would
|
||||
# have been blocked until either the BP PATCH ran alongside OR
|
||||
# the author added a `bp-required: pending` directive.
|
||||
#
|
||||
@@ -27,7 +27,7 @@ name: lint-required-context-exists-in-bp
|
||||
# share the workflow-context enumeration helpers
|
||||
# (`_event_map`, `workflow_contexts`, `_job_display`) but the
|
||||
# semantics are intentionally distinct so they're separate scripts.
|
||||
# Co-design is documented in mc#1982.
|
||||
# Co-design is documented in mc#774.
|
||||
#
|
||||
# Directive comment lives in the workflow file (NOT PR body)
|
||||
# ----------------------------------------------------------
|
||||
@@ -42,13 +42,13 @@ name: lint-required-context-exists-in-bp
|
||||
# Lands at `continue-on-error: true` (Phase 3 — surface the
|
||||
# pattern without blocking PRs while the directive convention
|
||||
# beds in). After 7 days of clean runs on `main` with no false
|
||||
# positives, follow-up flips to `false`. Tracking: mc#1982.
|
||||
# positives, follow-up flips to `false`. Tracking: mc#774.
|
||||
#
|
||||
# Cross-links
|
||||
# -----------
|
||||
# - mc#1982 (the RFC that specs this lint)
|
||||
# - mc#774 (the RFC that specs this lint)
|
||||
# - PR#656 (the empirical case)
|
||||
# - mc#1982 (the surfaced cascade)
|
||||
# - mc#774 (the surfaced cascade)
|
||||
# - feedback_phantom_required_check_after_gitea_migration (Tier 2f cousin)
|
||||
# - feedback_behavior_based_ast_gates
|
||||
#
|
||||
@@ -81,10 +81,10 @@ jobs:
|
||||
name: lint-required-context-exists-in-bp
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 5
|
||||
# Phase 4 (RFC #219 §1): 22 days green since 2026-05-11 port,
|
||||
# well past the 7-clean-day threshold. PR-time failure is now
|
||||
# a hard CI signal.
|
||||
continue-on-error: false
|
||||
# Phase 3 (RFC #219 §1): surface the pattern without blocking PRs
|
||||
# while the directive convention beds in. Follow-up flip to false
|
||||
# after 7 clean days on main. mc#774.
|
||||
continue-on-error: true # mc#774 Phase 3 — flip to false after 7 clean main runs
|
||||
steps:
|
||||
- name: Check out PR head with full history (need base SHA blobs)
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -49,56 +49,37 @@ jobs:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
steps:
|
||||
- name: Identify runner
|
||||
id: identify
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eu
|
||||
echo "arch=$(uname -m)"
|
||||
echo "kernel=$(uname -sr)"
|
||||
echo "shell=$BASH_VERSION"
|
||||
# Sanity: must actually be arm64. If amd64 sneaks in here,
|
||||
# the job skips gracefully rather than hard-failing, because
|
||||
# a mislabelled runner is an ops concern, not a code defect.
|
||||
# Pilot lane must not make main red (#2146).
|
||||
# fail fast — that means the label routing is wrong.
|
||||
case "$(uname -m)" in
|
||||
aarch64|arm64)
|
||||
echo "arm64 confirmed"
|
||||
echo "arm64=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
|
||||
echo "arm64=false" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
;;
|
||||
aarch64|arm64) echo "arm64 confirmed" ;;
|
||||
*) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
|
||||
esac
|
||||
|
||||
- name: Checkout
|
||||
if: steps.identify.outputs.arm64 == 'true'
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install shellcheck (arm64)
|
||||
if: steps.identify.outputs.arm64 == 'true'
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eu
|
||||
if command -v shellcheck >/dev/null 2>&1; then
|
||||
echo "shellcheck already present: $(shellcheck --version | head -1)"
|
||||
else
|
||||
# Prefer apt if the runner base ships it; else download the
|
||||
# correct platform binary (darwin vs linux).
|
||||
# Prefer apt if the runner base ships it; else download arm64 binary.
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y --no-install-recommends shellcheck
|
||||
else
|
||||
SC_VER=v0.10.0
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
|
||||
else
|
||||
SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
|
||||
fi
|
||||
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
|
||||
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
|
||||
| tar -xJf - --strip-components=1
|
||||
sudo mv shellcheck /usr/local/bin/
|
||||
fi
|
||||
@@ -106,15 +87,14 @@ jobs:
|
||||
shellcheck --version | head -2
|
||||
|
||||
- name: Run shellcheck on .gitea/scripts/*.sh
|
||||
if: steps.identify.outputs.arm64 == 'true'
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eu
|
||||
# Only the scripts we control under .gitea/scripts. Pilot
|
||||
# scope is intentionally narrow — broaden in a follow-up
|
||||
# once the lane is proven.
|
||||
if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
|
||||
echo "WARN: shellcheck not functional — skipping (pilot mode)"
|
||||
if ! command -v shellcheck >/dev/null 2>&1; then
|
||||
echo "WARN: shellcheck binary not found — skipping (pilot mode)"
|
||||
exit 0
|
||||
fi
|
||||
# NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
|
||||
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken shapes without blocking PRs.
|
||||
# Follow-up PR flips this off after the 4 existing-on-main rule-2
|
||||
# (workflow_run) violations are migrated to a supported trigger.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -67,7 +67,7 @@ jobs:
|
||||
# in this rollout (internal#462) so the precondition holds.
|
||||
runs-on: publish
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
||||
@@ -234,7 +234,7 @@ jobs:
|
||||
name: Production auto-deploy
|
||||
needs: build-and-push
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
||||
# Side-effect deploy only; image publish success is the durable artifact. mc#1982
|
||||
# Side-effect deploy only; image publish success is the durable artifact. mc#774
|
||||
continue-on-error: true
|
||||
# Publish/release lane (internal#462) — production deploy of a merged
|
||||
# fix; reserved capacity, never queued behind PR-CI.
|
||||
@@ -327,27 +327,13 @@ jobs:
|
||||
echo ""
|
||||
echo "### Per-tenant result"
|
||||
echo ""
|
||||
echo "| Slug | Phase | SSM Status | Exit | Healthz | On target | Error present |"
|
||||
echo "|------|-------|------------|------|---------|-----------|---------------|"
|
||||
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \(.verified_on_target) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
|
||||
# internal#724: stragglers are tenants enumerated but not proven
|
||||
# on the target build. Surface them loudly — a non-empty list
|
||||
# means the rollout did NOT fully land.
|
||||
STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
|
||||
if [ -n "$STRAGGLERS" ]; then
|
||||
echo ""
|
||||
echo "### ⚠ Stragglers (NOT on target tag \`$TARGET_TAG\`)"
|
||||
echo ""
|
||||
echo "\`$STRAGGLERS\`"
|
||||
fi
|
||||
echo "| Slug | Phase | SSM Status | Exit | Healthz | Error present |"
|
||||
echo "|------|-------|------------|------|---------|---------------|"
|
||||
jq -r '.results[]? | "| \(.slug) | \(.phase) | \(.ssm_status // "-") | \(.ssm_exit_code) | \(.healthz_ok) | \((.error // "") != "") |"' "$HTTP_RESPONSE" || true
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
OK="$(jq -r '.ok' "$HTTP_RESPONSE")"
|
||||
if [ "$OK" != "true" ]; then
|
||||
STRAGGLERS="$(jq -r '(.stragglers // []) | join(", ")' "$HTTP_RESPONSE")"
|
||||
if [ -n "$STRAGGLERS" ]; then
|
||||
echo "::error::incomplete rollout — tenants not on target tag $TARGET_TAG: $STRAGGLERS"
|
||||
fi
|
||||
echo "::error::redeploy-fleet reported ok=false; production rollout halted."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -9,22 +9,10 @@
|
||||
# Triggers on:
|
||||
# - `pull_request_target`: opened, synchronize, reopened
|
||||
# → initial status posts when PR opens / re-pushes
|
||||
# - `pull_request_review` types: [submitted]
|
||||
# → re-evaluate when a team member submits an APPROVE review so
|
||||
# the gate flips immediately (no wait for the next push or
|
||||
# slash-command). Verified live: sop-tier-check.yml uses this
|
||||
# same event and provably fires (produces
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so
|
||||
# only APPROVE reviews run the evaluator; COMMENT and
|
||||
# REQUEST_CHANGES are skipped at the job level.
|
||||
# Branch-protection requires the `(pull_request_target)`
|
||||
# context variant, so the review-event path EXPLICITLY POSTS
|
||||
# the required context via the API. Trust boundary preserved
|
||||
# (BASE ref, no PR-head).
|
||||
# - comment refires are handled by `sop-checklist.yml` review-refire job
|
||||
# → `/qa-recheck` slash-command re-evaluates this gate.
|
||||
# - comment refires are handled by `review-refire-comments.yml`
|
||||
# → a single issue_comment dispatcher prevents every SOP/review
|
||||
# comment from enqueueing separate qa/security/tier jobs on
|
||||
# Gitea 1.22.6 before job-level `if:` can skip them.
|
||||
# Workflow name = `qa-review` ; job name = `approved`.
|
||||
# The job's own pass/fail conclusion publishes the status context
|
||||
# `qa-review / approved (<event>)` — NO `POST /statuses` call → NO
|
||||
@@ -97,26 +85,21 @@ name: qa-review
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
statuses: write
|
||||
secrets: read
|
||||
|
||||
jobs:
|
||||
# bp-exempt: PR review bot signal; required merge state is enforced by CI / all-required.
|
||||
approved:
|
||||
# Gate the job:
|
||||
# - On pull_request_target events: always run.
|
||||
# - On pull_request_review_approved events: run so the gate flips
|
||||
# immediately when a team member submits an APPROVE review.
|
||||
# Comment-triggered refires live in sop-checklist.yml review-refire job.
|
||||
# Comment-triggered refires live in review-refire-comments.yml. Keeping
|
||||
# this workflow PR-only avoids comment-triggered queue storms.
|
||||
if: |
|
||||
github.event_name == 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_review' &&
|
||||
(github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
|
||||
github.event_name == 'pull_request_target'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
|
||||
@@ -160,7 +143,6 @@ jobs:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Evaluate qa-review
|
||||
id: eval
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
@@ -175,66 +157,3 @@ jobs:
|
||||
REVIEW_CHECK_DEBUG: '0'
|
||||
REVIEW_CHECK_STRICT: '0'
|
||||
run: bash .gitea/scripts/review-check.sh
|
||||
|
||||
- name: Post required status context on pull_request_review
|
||||
# Gitea Actions auto-publishes (pull_request_review) context
|
||||
# for this event, but branch-protection requires (pull_request_target).
|
||||
# We explicitly POST the BP-required context so the gate flips.
|
||||
# Trust boundary: same BASE-ref script result, no PR-head code.
|
||||
#
|
||||
# TOKEN FIX (RC 8326): uses STATUS_POST_TOKEN (CTO-granted,
|
||||
# msg d52cc72a). Dedicated narrow-scoped write:repository token
|
||||
# for the explicit status POST. Evaluator step stays on
|
||||
# SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
|
||||
# separation: eval computes, POST writes, never the same cred.
|
||||
if: github.event_name == 'pull_request_review' && always()
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
|
||||
EVAL_OUTCOME: ${{ steps.eval.outcome }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
authfile=$(mktemp)
|
||||
chmod 600 "$authfile"
|
||||
printf 'header = "Authorization: token %s"\n' "$GITEA_TOKEN" > "$authfile"
|
||||
|
||||
prfile=$(mktemp)
|
||||
code=$(curl -sS -o "$prfile" -w '%{http_code}' -K "$authfile" \
|
||||
"https://${GITEA_HOST}/api/v1/repos/${REPO}/pulls/${PR_NUMBER}")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${code}"
|
||||
rm -f "$prfile" "$authfile"
|
||||
exit 1
|
||||
fi
|
||||
head_sha=$(jq -r '.head.sha // ""' "$prfile")
|
||||
rm -f "$prfile"
|
||||
|
||||
if [ "$EVAL_OUTCOME" = "success" ]; then
|
||||
status_state="success"
|
||||
description="Approved via pull_request_review trigger"
|
||||
else
|
||||
status_state="failure"
|
||||
description="Review check failed via pull_request_review trigger"
|
||||
fi
|
||||
|
||||
body=$(jq -nc \
|
||||
--arg state "$status_state" \
|
||||
--arg context "qa-review / approved (pull_request_target)" \
|
||||
--arg description "$description" \
|
||||
'{state:$state, context:$context, description:$description}')
|
||||
|
||||
post_code=$(curl -sS -o /dev/null -w '%{http_code}' -X POST \
|
||||
-K "$authfile" -H "Content-Type: application/json" \
|
||||
-d "$body" \
|
||||
"https://${GITEA_HOST}/api/v1/repos/${REPO}/statuses/${head_sha}")
|
||||
|
||||
rm -f "$authfile"
|
||||
|
||||
if [ "$post_code" != "200" ] && [ "$post_code" != "201" ]; then
|
||||
echo "::error::POST /statuses/${head_sha} returned HTTP ${post_code}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::posted ${status_state} for context=\"qa-review / approved (pull_request_target)\" on sha=${head_sha}"
|
||||
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
name: Audit Railway env vars for drift-prone pins
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 10
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ jobs:
|
||||
# it never queues behind PR-CI. `publish` -> molecule-runner-publish-*.
|
||||
runs-on: publish
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
|
||||
@@ -80,7 +80,7 @@ jobs:
|
||||
# `publish` -> molecule-runner-publish-* sub-pool.
|
||||
runs-on: publish
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# runners with internet access to package mirrors). Falls back to GitHub
|
||||
# binary download. GitHub releases may be blocked on some runner networks
|
||||
# (infra#241 follow-up).
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
run: |
|
||||
if apt-get update -qq && apt-get install -y -qq jq; then
|
||||
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
name: Detect SECRET_PATTERNS drift
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 5
|
||||
steps:
|
||||
|
||||
@@ -6,44 +6,25 @@
|
||||
#
|
||||
# See `qa-review.yml` header for the full A1-α / A1.1 / A4 / A5 design
|
||||
# rationale; everything below is identical in shape.
|
||||
#
|
||||
# A1-α addendum (internal#760): review-event trigger added so the security
|
||||
# gate flips immediately when a team member submits an APPROVE review.
|
||||
# Uses `pull_request_review` types: [submitted] — verified live via
|
||||
# sop-tier-check.yml which provably fires this event (produces
|
||||
# `sop-tier-check / tier-check (pull_request_review)` contexts).
|
||||
# The job-level `if:` guard checks
|
||||
# `github.event.review.state == 'APPROVED' || 'approved'` so only APPROVE
|
||||
# reviews run the evaluator; COMMENT and REQUEST_CHANGES are skipped at
|
||||
# the job level. Branch-protection requires the `(pull_request_target)`
|
||||
# context variant, so the review-event path EXPLICITLY POSTS the required
|
||||
# context via the API. Trust boundary preserved (BASE ref, no PR-head).
|
||||
|
||||
name: security-review
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types: [opened, synchronize, reopened]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
statuses: write
|
||||
secrets: read
|
||||
|
||||
jobs:
|
||||
# bp-exempt: PR security review bot signal; required merge state is enforced by CI / all-required.
|
||||
approved:
|
||||
# Gate the job:
|
||||
# - On pull_request_target events: always run.
|
||||
# - On pull_request_review_approved events: run so the gate flips
|
||||
# immediately when a team member submits an APPROVE review.
|
||||
# Comment-triggered refires live in sop-checklist.yml review-refire job.
|
||||
# Comment-triggered refires live in review-refire-comments.yml. Keeping
|
||||
# this workflow PR-only avoids comment-triggered queue storms.
|
||||
if: |
|
||||
github.event_name == 'pull_request_target' ||
|
||||
(github.event_name == 'pull_request_review' &&
|
||||
(github.event.review.state == 'APPROVED' || github.event.review.state == 'approved'))
|
||||
github.event_name == 'pull_request_target'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Privilege check (A1.1 — INFORMATIONAL log only, NOT a gate)
|
||||
@@ -76,7 +57,6 @@ jobs:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Evaluate security-review
|
||||
id: eval
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
@@ -88,66 +68,3 @@ jobs:
|
||||
REVIEW_CHECK_DEBUG: '0'
|
||||
REVIEW_CHECK_STRICT: '0'
|
||||
run: bash .gitea/scripts/review-check.sh
|
||||
|
||||
- name: Post required status context on pull_request_review
|
||||
# Gitea Actions auto-publishes (pull_request_review) context
|
||||
# for this event, but branch-protection requires (pull_request_target).
|
||||
# We explicitly POST the BP-required context so the gate flips.
|
||||
# Trust boundary: same BASE-ref script result, no PR-head code.
|
||||
#
|
||||
# TOKEN FIX (RC 8326): uses STATUS_POST_TOKEN (CTO-granted,
|
||||
# msg d52cc72a). Dedicated narrow-scoped write:repository token
|
||||
# for the explicit status POST. Evaluator step stays on
|
||||
# SOP_TIER_CHECK_TOKEN (read-only) per deliberate security
|
||||
# separation: eval computes, POST writes, never the same cred.
|
||||
if: github.event_name == 'pull_request_review' && always()
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number || github.event.issue.number }}
|
||||
EVAL_OUTCOME: ${{ steps.eval.outcome }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
authfile=$(mktemp)
|
||||
chmod 600 "$authfile"
|
||||
printf 'header = "Authorization: token %s"\n' "$GITEA_TOKEN" > "$authfile"
|
||||
|
||||
prfile=$(mktemp)
|
||||
code=$(curl -sS -o "$prfile" -w '%{http_code}' -K "$authfile" \
|
||||
"https://${GITEA_HOST}/api/v1/repos/${REPO}/pulls/${PR_NUMBER}")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::GET /pulls/${PR_NUMBER} returned HTTP ${code}"
|
||||
rm -f "$prfile" "$authfile"
|
||||
exit 1
|
||||
fi
|
||||
head_sha=$(jq -r '.head.sha // ""' "$prfile")
|
||||
rm -f "$prfile"
|
||||
|
||||
if [ "$EVAL_OUTCOME" = "success" ]; then
|
||||
status_state="success"
|
||||
description="Approved via pull_request_review trigger"
|
||||
else
|
||||
status_state="failure"
|
||||
description="Review check failed via pull_request_review trigger"
|
||||
fi
|
||||
|
||||
body=$(jq -nc \
|
||||
--arg state "$status_state" \
|
||||
--arg context "security-review / approved (pull_request_target)" \
|
||||
--arg description "$description" \
|
||||
'{state:$state, context:$context, description:$description}')
|
||||
|
||||
post_code=$(curl -sS -o /dev/null -w '%{http_code}' -X POST \
|
||||
-K "$authfile" -H "Content-Type: application/json" \
|
||||
-d "$body" \
|
||||
"https://${GITEA_HOST}/api/v1/repos/${REPO}/statuses/${head_sha}")
|
||||
|
||||
rm -f "$authfile"
|
||||
|
||||
if [ "$post_code" != "200" ] && [ "$post_code" != "201" ]; then
|
||||
echo "::error::POST /statuses/${head_sha} returned HTTP ${post_code}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "::notice::posted ${status_state} for context=\"security-review / approved (pull_request_target)\" on sha=${head_sha}"
|
||||
|
||||
@@ -179,10 +179,10 @@ jobs:
|
||||
- name: Refire qa-review status
|
||||
if: steps.classify.outputs.run_qa == 'true'
|
||||
env:
|
||||
# Evaluator (review-check.sh + GET /pulls) stays on read-scoped token.
|
||||
# RFC_324_TEAM_READ_TOKEN is read-only (team membership read scope only).
|
||||
# review-refire-status.sh POSTs to /statuses — requires write scope.
|
||||
# SOP_TIER_CHECK_TOKEN carries write:repository + write:issue + read:organization.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Explicit POST /statuses uses narrow-scoped write:repository token.
|
||||
STATUS_POST_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
@@ -198,10 +198,10 @@ jobs:
|
||||
- name: Refire security-review status
|
||||
if: steps.classify.outputs.run_security == 'true'
|
||||
env:
|
||||
# Evaluator (review-check.sh + GET /pulls) stays on read-scoped token.
|
||||
# RFC_324_TEAM_READ_TOKEN is read-only (team membership read scope only).
|
||||
# review-refire-status.sh POSTs to /statuses — requires write scope.
|
||||
# SOP_TIER_CHECK_TOKEN carries write:repository + write:issue + read:organization.
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Explicit POST /statuses uses narrow-scoped write:repository token.
|
||||
STATUS_POST_TOKEN: ${{ secrets.STATUS_POST_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
# window closed. continue-on-error: true has been removed from the
|
||||
# tier-check job; AND-composition is now fully enforced. If you need
|
||||
# to temporarily re-introduce a mask, file a tracker and follow the
|
||||
# mc#1982 protocol (Tier 2e lint requires a current tracker within
|
||||
# mc#774 protocol (Tier 2e lint requires a current tracker within
|
||||
# 2 lines of any continue-on-error: true).
|
||||
|
||||
name: sop-tier-check
|
||||
@@ -92,7 +92,7 @@ jobs:
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense. continue-on-error: true ensures this step
|
||||
# failing does not block the job.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
run: |
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
@@ -113,7 +113,7 @@ jobs:
|
||||
# continue-on-error: true at step level — job-level is ignored by Gitea
|
||||
# Actions (quirk #10, internal runbooks). Belt-and-suspenders with
|
||||
# SOP_FAIL_OPEN=1 + || true below.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
staging-smoke:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
sha: ${{ steps.compute.outputs.sha }}
|
||||
@@ -212,7 +212,7 @@ jobs:
|
||||
if: ${{ needs.staging-smoke.result == 'success' && needs.staging-smoke.outputs.smoke_ran == 'true' }}
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
SHA: ${{ needs.staging-smoke.outputs.sha }}
|
||||
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
name: Sweep CF orphans
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# 3 min surfaces hangs (CF API stall, AWS describe-instances stuck)
|
||||
# within one cron interval instead of burning a full tick. Realistic
|
||||
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
name: Sweep CF tunnels
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# 30 min cap. Was 5 min on the theory that the only thing that
|
||||
# could take >5min is a CF-API hang — but on 2026-05-02 a backlog
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
name: sync-providers-yaml
|
||||
|
||||
# Cross-repo canonical↔synced-copy drift gate (internal#718 P2-A, CTO
|
||||
# 2026-05-27 "Distribution = SDK via codegen + verify-CI", multi-repo branch:
|
||||
# "codegen-checked-into-each-repo + verify-CI").
|
||||
#
|
||||
# The canonical provider-registry SSOT is molecule-controlplane
|
||||
# internal/providers/providers.yaml. molecule-core has NO Go module dependency
|
||||
# on controlplane, so instead of importing it we carry a SYNCED COPY at
|
||||
# workspace-server/internal/providers/providers.yaml and gate it.
|
||||
#
|
||||
# This workflow fetches the canonical providers.yaml from controlplane (via the
|
||||
# Gitea raw endpoint, read-only) and byte-compares it against core's synced
|
||||
# copy. RED if they differ — meaning the canonical moved and core's copy must be
|
||||
# re-synced (copy verbatim + `go generate ./...` + bump
|
||||
# canonicalProvidersYAMLSHA256 in sync_canonical_test.go).
|
||||
#
|
||||
# Pairs with:
|
||||
# * sync_canonical_test.go — hermetic sha pin (catches a hand-edit of core's
|
||||
# copy even with no network); runs in the normal `go test ./...`.
|
||||
# * verify-providers-gen.yml — artifact ↔ synced-copy drift.
|
||||
#
|
||||
# ENFORCEMENT GATING: standalone workflow, NOT a job in ci.yml and NOT in
|
||||
# branch protection (same soak-then-promote posture as verify-providers-gen).
|
||||
# It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel does not fire on it.
|
||||
#
|
||||
# AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
|
||||
# template/provider content from sibling repos). If the secret is absent the
|
||||
# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
|
||||
# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
|
||||
# token degrades to "hand-edit still caught, live canonical drift not caught"
|
||||
# rather than a hard red that blocks unrelated PRs.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- '.gitea/workflows/sync-providers-yaml.yml'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- '.gitea/workflows/sync-providers-yaml.yml'
|
||||
schedule:
|
||||
# Daily at :23 — catch a canonical change in controlplane that landed
|
||||
# without a paired core re-sync PR (off-zero to spread cron load).
|
||||
- cron: '23 4 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: sync-providers-yaml-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
compare:
|
||||
name: Compare synced providers.yaml against controlplane canonical
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 6
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Fetch canonical providers.yaml from controlplane and byte-compare
|
||||
env:
|
||||
AUTO_SYNC_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
|
||||
API_ROOT: ${{ github.server_url }}/api/v1
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
|
||||
echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
|
||||
echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
|
||||
echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
|
||||
exit 0
|
||||
fi
|
||||
CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
|
||||
# Use the /raw endpoint: it returns the file bytes directly. (The
|
||||
# /contents endpoint ignores Accept: application/vnd.gitea.raw on
|
||||
# Gitea 1.22.6 and returns the JSON+base64 envelope, which made this
|
||||
# diff a permanent false RED.)
|
||||
curl -fsS \
|
||||
-H "Authorization: token ${AUTO_SYNC_TOKEN}" \
|
||||
"${CANON_URL}" -o /tmp/canonical-providers.yaml
|
||||
LOCAL=workspace-server/internal/providers/providers.yaml
|
||||
if diff -u /tmp/canonical-providers.yaml "$LOCAL"; then
|
||||
echo "OK — core's synced providers.yaml is byte-identical to the controlplane canonical."
|
||||
else
|
||||
echo "::error::core's synced providers.yaml DRIFTED from the controlplane canonical (SSOT)."
|
||||
echo "Re-sync: copy controlplane internal/providers/providers.yaml verbatim over"
|
||||
echo " $LOCAL, run 'go generate ./...' in workspace-server/, and bump"
|
||||
echo " canonicalProvidersYAMLSHA256 in internal/providers/sync_canonical_test.go."
|
||||
exit 1
|
||||
fi
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
name: Ops scripts (unittest)
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
name: verify-providers-gen
|
||||
|
||||
# Provider-registry SSOT enforcement gate — molecule-core side (internal#718
|
||||
# P2-A, CTO 2026-05-27 "Distribution = SDK via codegen + verify-CI").
|
||||
#
|
||||
# The canonical schema SSOT is molecule-controlplane
|
||||
# internal/providers/providers.yaml. molecule-core carries a SYNCED COPY at
|
||||
# workspace-server/internal/providers/providers.yaml (kept in sync by the
|
||||
# companion sync-providers-yaml.yml gate), and cmd/gen-providers emits the
|
||||
# checked-in Go projection workspace-server/internal/providers/gen/registry_gen.go.
|
||||
#
|
||||
# This workflow regenerates the artifact into the working tree and fails RED if
|
||||
# it differs from what is committed — catching BOTH:
|
||||
# * a providers.yaml (synced-copy) change that wasn't followed by `go generate ./...`, and
|
||||
# * a hand-edit of the generated artifact (it carries a DO NOT EDIT header).
|
||||
#
|
||||
# It is the molecule-core mirror of molecule-controlplane's verify-providers-gen
|
||||
# workflow. Together with sync-providers-yaml (canonical↔synced-copy drift) it
|
||||
# closes the codegen-checked-into-each-repo + verify-CI loop the RFC mandates.
|
||||
#
|
||||
# ENFORCEMENT GATING (deliberate, per dev-SOP "implementation gating"):
|
||||
# this is a STANDALONE workflow, NOT a job inside ci.yml, and is NOT yet in any
|
||||
# branch-protection status_check_contexts. Rationale (identical to the CP P0
|
||||
# rollout):
|
||||
# * It runs + reports RED on every PR/push immediately (visible signal).
|
||||
# * It is intentionally absent from ci.yml's job set so the ci-required-drift
|
||||
# sentinel (jobs ↔ branch-protection ↔ audit-env) does NOT fire on it, and
|
||||
# from branch protection (turning it into a hard merge gate has blast radius
|
||||
# — operator GO required, same pattern as sop-tier-check / verify-providers-gen
|
||||
# on controlplane). Promote it into branch protection in a follow-up once
|
||||
# P2 has soaked.
|
||||
# Until then it behaves like secret-scan / block-internal-paths: a standalone
|
||||
# advisory-to-hard gate the author is expected to keep green.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
# CI-scheduler-overload fix (fix/ci-scheduler-fanout, 2026-06-01):
|
||||
# this gate only verifies that the generated providers artifact is in
|
||||
# sync with the schema SSOT. Its verdict can ONLY change when one of
|
||||
# the codegen inputs/outputs changes, so firing the Go toolchain on
|
||||
# every unrelated PR (docs, canvas, scripts) is pure fan-out cost.
|
||||
# Scoped to the codegen surface. SAFE because this workflow is NOT a
|
||||
# branch-protection status_check_context (see header §ENFORCEMENT
|
||||
# GATING) — lint-required-no-paths only forbids paths filters on
|
||||
# REQUIRED workflows; this is advisory, so a paths filter is allowed.
|
||||
# Mirrors the sibling sync-providers-yaml.yml scoping convention.
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/**'
|
||||
- 'workspace-server/cmd/gen-providers/**'
|
||||
- '.gitea/workflows/verify-providers-gen.yml'
|
||||
push:
|
||||
branches: [main, staging]
|
||||
paths:
|
||||
- 'workspace-server/internal/providers/**'
|
||||
- 'workspace-server/cmd/gen-providers/**'
|
||||
- '.gitea/workflows/verify-providers-gen.yml'
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: verify-providers-gen-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
verify:
|
||||
name: Regenerate providers artifact and fail on drift
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 8
|
||||
defaults:
|
||||
run:
|
||||
working-directory: workspace-server
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6.4.0
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
|
||||
- name: Verify generated artifact is in sync with providers.yaml
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# -check regenerates in memory and byte-compares against the
|
||||
# checked-in artifact; exit 1 (RED) on any drift. This is the
|
||||
# single source of the gate's verdict — the same code path
|
||||
# `go test ./cmd/gen-providers` exercises.
|
||||
go run ./cmd/gen-providers -check
|
||||
|
||||
- name: Belt-and-braces — regenerate in place and assert clean tree
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Independent confirmation that does not trust the -check path:
|
||||
# actually write the artifact and assert git sees no change. If
|
||||
# this and the step above ever disagree, the gate is suspect.
|
||||
go generate ./...
|
||||
if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
|
||||
echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
|
||||
echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
|
||||
git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
|
||||
exit 1
|
||||
fi
|
||||
echo "OK — generated providers artifact is in sync with the schema SSOT."
|
||||
@@ -31,7 +31,7 @@ jobs:
|
||||
name: Weekly Platform-Go Surface
|
||||
runs-on: ubuntu-latest
|
||||
# continue-on-error: surface only, never block
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
defaults:
|
||||
run:
|
||||
|
||||
@@ -49,8 +49,8 @@
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
|
||||
cd molecule-core
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
|
||||
cd molecule-monorepo
|
||||
./scripts/dev-start.sh
|
||||
```
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ export default function PricingPage() {
|
||||
<p className="mt-2 text-ink-mid">
|
||||
We publish the{" "}
|
||||
<a
|
||||
href="https://git.moleculesai.app/molecule-ai/molecule-core"
|
||||
href="https://git.moleculesai.app/molecule-ai/molecule-monorepo"
|
||||
className="text-accent underline hover:text-accent"
|
||||
>
|
||||
full source on GitHub
|
||||
|
||||
@@ -38,11 +38,10 @@ const DEFAULT_RUNTIME = "claude-code";
|
||||
const RUNTIME_OPTIONS = [
|
||||
{ value: "claude-code", label: "Claude Code" },
|
||||
{ value: "codex", label: "OpenAI Codex CLI" },
|
||||
{ value: "google-adk", label: "Google ADK" },
|
||||
{ value: "hermes", label: "Hermes" },
|
||||
{ value: "openclaw", label: "OpenClaw" },
|
||||
];
|
||||
const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "google-adk", "hermes", "openclaw"]);
|
||||
const BASE_RUNTIME_TEMPLATE_IDS = new Set(["claude-code-default", "codex", "hermes", "openclaw"]);
|
||||
const DEFAULT_HEADLESS_INSTANCE_TYPE = "t3.medium";
|
||||
const DEFAULT_HEADLESS_ROOT_GB = 30;
|
||||
const DEFAULT_DISPLAY_INSTANCE_TYPE = "t3.xlarge";
|
||||
|
||||
@@ -49,33 +49,6 @@ export interface ProviderEntry {
|
||||
wildcard: boolean;
|
||||
/** Optional tooltip text (rendered as native title=). */
|
||||
tooltip?: string;
|
||||
/** Billing mode the DERIVED provider implies, when this entry came from the
|
||||
* registry-backed payload (internal#718 P3): "platform_managed" | "byok".
|
||||
* Undefined for entries built by the legacy inferVendor heuristic. */
|
||||
billingMode?: "platform_managed" | "byok";
|
||||
}
|
||||
|
||||
/** RegistryProvider mirrors one entry of GET /templates `registry_providers`
|
||||
* (workspace-server registryProviderView): the registry's native provider for
|
||||
* a runtime, with its display label, auth-env NAMES, and billing mode. This is
|
||||
* the SSOT the dropdown labels come from — the canvas drops VENDOR_LABELS for
|
||||
* registry-backed runtimes (internal#718 P3, retire-list #4). */
|
||||
export interface RegistryProvider {
|
||||
name: string;
|
||||
display_name?: string;
|
||||
auth_env?: string[];
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
deprecated?: boolean;
|
||||
}
|
||||
|
||||
/** RegistryModel mirrors one entry of GET /templates `registry_models`: a
|
||||
* native model id annotated with its DERIVED provider (registry name) and the
|
||||
* billing_mode that provider implies. */
|
||||
export interface RegistryModel {
|
||||
id: string;
|
||||
name?: string;
|
||||
provider?: string;
|
||||
billing_mode?: "platform_managed" | "byok";
|
||||
}
|
||||
|
||||
export interface SelectorValue {
|
||||
@@ -95,13 +68,6 @@ interface Props {
|
||||
models: SelectorModel[];
|
||||
value: SelectorValue;
|
||||
onChange: (next: SelectorValue) => void;
|
||||
/** Optional pre-built provider catalog. When provided, the selector uses it
|
||||
* verbatim instead of re-inferring one from `models` via
|
||||
* buildProviderCatalog — the registry-backed path (internal#718 P3), where
|
||||
* the parent builds the catalog from the registry-served providers/models
|
||||
* so dropdown labels + billing come from the provider-registry SSOT rather
|
||||
* than the inferVendor heuristic. Omitted = legacy heuristic over `models`. */
|
||||
catalog?: ProviderEntry[];
|
||||
/** Display variant. "grid" = label+control side-by-side (used in ConfigTab
|
||||
* Runtime section). "stack" = vertical (used in MissingKeysModal). */
|
||||
variant?: "grid" | "stack";
|
||||
@@ -285,66 +251,6 @@ export function buildProviderCatalog(models: SelectorModel[]): ProviderEntry[] {
|
||||
return Array.from(buckets.values());
|
||||
}
|
||||
|
||||
/** Build the provider catalog from a REGISTRY-BACKED GET /templates payload
|
||||
* (registry_providers + registry_models) — internal#718 P3, retire-list #4.
|
||||
*
|
||||
* Unlike buildProviderCatalog (which RE-INFERS vendor from model-id prefixes
|
||||
* + env via inferVendor/VENDOR_LABELS/BARE_VENDOR_PATTERNS), this trusts the
|
||||
* registry: each model carries its DERIVED `provider` (a registry provider
|
||||
* name) and the dropdown label/billing/auth come from the matching
|
||||
* `registry_providers` entry. The canvas can render no provider/model the
|
||||
* registry did not serve ("only registered selectable"), and the billing-mode
|
||||
* shown reflects the derived provider rather than a hardcoded rule.
|
||||
*
|
||||
* A provider with no served model is omitted (no empty buckets). Models whose
|
||||
* `provider` doesn't match a registry_providers entry still get a bucket
|
||||
* keyed by the raw provider name (defensive — should not happen for a
|
||||
* well-formed registry payload), so a model is never silently dropped. */
|
||||
export function buildProviderCatalogFromRegistry(
|
||||
registryProviders: RegistryProvider[],
|
||||
registryModels: RegistryModel[],
|
||||
): ProviderEntry[] {
|
||||
const byName = new Map<string, RegistryProvider>();
|
||||
for (const p of registryProviders) byName.set(p.name, p);
|
||||
|
||||
// Bucket models by their derived provider name, preserving registry order.
|
||||
const buckets = new Map<string, ProviderEntry>();
|
||||
for (const m of registryModels) {
|
||||
const vendor = (m.provider ?? "").trim();
|
||||
if (!vendor) continue; // un-annotated registry model — skip from the
|
||||
// provider cascade (selectable elsewhere via free-text); it has no
|
||||
// derived provider to bucket under.
|
||||
const meta = byName.get(vendor);
|
||||
const wildcard = m.id.includes("*");
|
||||
let entry = buckets.get(vendor);
|
||||
if (!entry) {
|
||||
entry = {
|
||||
id: `registry|${vendor}`,
|
||||
vendor,
|
||||
label: meta?.display_name || vendor,
|
||||
envVars: meta?.auth_env ?? [],
|
||||
models: [],
|
||||
wildcard,
|
||||
billingMode: meta?.billing_mode ?? m.billing_mode,
|
||||
tooltip: VENDOR_TOOLTIPS[vendor],
|
||||
};
|
||||
buckets.set(vendor, entry);
|
||||
}
|
||||
entry.models.push({ id: m.id, name: m.name, provider: vendor });
|
||||
entry.wildcard = entry.wildcard || wildcard;
|
||||
}
|
||||
|
||||
// Decorate label with model-count when ≥2 concrete models share the bucket,
|
||||
// matching buildProviderCatalog's UX.
|
||||
for (const e of buckets.values()) {
|
||||
if (!e.wildcard && e.models.length > 1) {
|
||||
e.label = `${e.label} (${e.models.length} models)`;
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(buckets.values());
|
||||
}
|
||||
|
||||
/** Find the provider entry that contains a given model id. Used by
|
||||
* callers to back-derive the provider when only the model is known
|
||||
* (e.g. ConfigTab loading from saved state). */
|
||||
@@ -377,7 +283,6 @@ export function ProviderModelSelector({
|
||||
models,
|
||||
value,
|
||||
onChange,
|
||||
catalog: catalogProp,
|
||||
variant = "stack",
|
||||
allowCustomModelEscape = false,
|
||||
disabled = false,
|
||||
@@ -388,12 +293,7 @@ export function ProviderModelSelector({
|
||||
const providerSelectId = `${baseId}-provider`;
|
||||
const modelSelectId = `${baseId}-model`;
|
||||
|
||||
// Registry-backed path (internal#718 P3): use the parent-supplied catalog
|
||||
// verbatim; otherwise re-infer one from `models` via the legacy heuristic.
|
||||
const catalog = useMemo(
|
||||
() => catalogProp ?? buildProviderCatalog(models),
|
||||
[catalogProp, models],
|
||||
);
|
||||
const catalog = useMemo(() => buildProviderCatalog(models), [models]);
|
||||
const selected = useMemo(
|
||||
() => catalog.find((p) => p.id === value.providerId) ?? null,
|
||||
[catalog, value.providerId],
|
||||
|
||||
@@ -1,82 +1,411 @@
|
||||
// @vitest-environment jsdom
|
||||
/**
|
||||
* Focused tests for BudgetSection's PER-PERIOD progress-bar math + aria (#49).
|
||||
* Tests for BudgetSection (issue #541).
|
||||
*
|
||||
* Behavioral coverage (loading, save, 402 banners, USD formatting, legacy
|
||||
* back-compat) lives in tabs/__tests__/BudgetSection.test.tsx — this file
|
||||
* deliberately covers only the per-period progress percentage + aria-valuenow
|
||||
* + the over-budget colouring, which that suite doesn't assert in detail. Kept
|
||||
* separate to avoid duplicating the behavioral suite (one component, no
|
||||
* parallel/identical suites).
|
||||
* Covers:
|
||||
* - Loading state
|
||||
* - Stats row: used / limit, "Unlimited" when null
|
||||
* - Progress bar: correct percentage, capped at 100%, absent when no limit
|
||||
* - Budget remaining text
|
||||
* - Input pre-fill (existing limit / blank when null)
|
||||
* - Save: PATCH with number, PATCH with null (blank input)
|
||||
* - 402 on GET → exceeded banner, no fetch-error text
|
||||
* - 402 on PATCH → exceeded banner
|
||||
* - Non-402 fetch error → error text
|
||||
* - Non-402 save error → save error alert
|
||||
* - Section header and subheading
|
||||
* - Fetch error does not show stats
|
||||
*/
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, waitFor, cleanup } from "@testing-library/react";
|
||||
import {
|
||||
render,
|
||||
screen,
|
||||
fireEvent,
|
||||
waitFor,
|
||||
cleanup,
|
||||
act,
|
||||
} from "@testing-library/react";
|
||||
|
||||
// ── Mock api ──────────────────────────────────────────────────────────────────
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: { get: vi.fn(), patch: vi.fn() },
|
||||
api: {
|
||||
get: vi.fn(),
|
||||
patch: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
import { api } from "@/lib/api";
|
||||
import { BudgetSection } from "../tabs/BudgetSection";
|
||||
|
||||
const mockGet = vi.mocked(api.get);
|
||||
const mockPatch = vi.mocked(api.patch);
|
||||
|
||||
type P = { limit: number | null; spend: number; remaining: number | null };
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
// Build a periods response where the named period has the given limit/spend.
|
||||
function withMonthly(limit: number | null, spend: number) {
|
||||
const blank: P = { limit: null, spend: 0, remaining: null };
|
||||
const monthly: P = { limit, spend, remaining: limit == null ? null : limit - spend };
|
||||
function budgetResponse(overrides: Partial<{
|
||||
budget_limit: number | null;
|
||||
budget_used: number;
|
||||
budget_remaining: number | null;
|
||||
}> = {}) {
|
||||
return {
|
||||
periods: { hourly: blank, daily: blank, weekly: blank, monthly },
|
||||
budget_limit: limit,
|
||||
monthly_spend: spend,
|
||||
budget_remaining: monthly.remaining,
|
||||
budget_limit: 1000,
|
||||
budget_used: 250,
|
||||
budget_remaining: 750,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
beforeEach(() => vi.clearAllMocks());
|
||||
afterEach(() => cleanup());
|
||||
function make402Error(): Error {
|
||||
return new Error("API GET /workspaces/ws-1/budget: 402 Payment Required");
|
||||
}
|
||||
|
||||
async function renderLoaded(data: unknown) {
|
||||
function make402PatchError(): Error {
|
||||
return new Error("API PATCH /workspaces/ws-1/budget: 402 Payment Required");
|
||||
}
|
||||
|
||||
function makeGenericError(msg = "network timeout"): Error {
|
||||
return new Error(`API GET /workspaces/ws-1/budget: 500 ${msg}`);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
});
|
||||
|
||||
// ── Rendering helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
async function renderLoaded(budgetData = budgetResponse()) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValueOnce(data as any);
|
||||
mockGet.mockResolvedValueOnce(budgetData as any);
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
// Wait for loading to finish
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
}
|
||||
|
||||
describe("BudgetSection — per-period progress bar", () => {
|
||||
it("renders the bar for a limited period and omits it for an unlimited one", async () => {
|
||||
await renderLoaded(withMonthly(1000, 250));
|
||||
expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
|
||||
expect(screen.queryByTestId("budget-hourly-fill")).toBeNull(); // hourly unlimited
|
||||
// ── Loading state ─────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — loading state", () => {
|
||||
it("shows loading indicator while fetch is in flight", () => {
|
||||
// Never resolve
|
||||
mockGet.mockReturnValue(new Promise(() => {}));
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
expect(screen.getByTestId("budget-loading")).toBeTruthy();
|
||||
expect(screen.getByText("Loading…")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("fills to 25%", async () => {
|
||||
await renderLoaded(withMonthly(1000, 250));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("25%");
|
||||
});
|
||||
|
||||
it("fills to 50%", async () => {
|
||||
await renderLoaded(withMonthly(1000, 500));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("50%");
|
||||
});
|
||||
|
||||
it("caps fill at 100% when spend exceeds limit", async () => {
|
||||
await renderLoaded(withMonthly(1000, 4000));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("100%");
|
||||
});
|
||||
|
||||
it("sets aria-valuenow to the computed percentage on the progressbar", async () => {
|
||||
await renderLoaded(withMonthly(1000, 250));
|
||||
const bars = screen.getAllByRole("progressbar");
|
||||
// the monthly bar is the only one rendered (others unlimited)
|
||||
expect(bars).toHaveLength(1);
|
||||
expect(bars[0].getAttribute("aria-valuenow")).toBe("25");
|
||||
});
|
||||
|
||||
it("shows a 0% bar when spend is 0 against a set limit", async () => {
|
||||
await renderLoaded(withMonthly(1000, 0));
|
||||
expect((screen.getByTestId("budget-monthly-fill") as HTMLElement).style.width).toBe("0%");
|
||||
it("hides loading indicator after fetch resolves", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValueOnce(budgetResponse() as any);
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
});
|
||||
});
|
||||
|
||||
// ── Section header ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — header and subheading", () => {
|
||||
it("renders 'Budget' as the section heading", async () => {
|
||||
await renderLoaded();
|
||||
expect(screen.getByText("Budget")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("renders the subheading 'Limit total message credits for this workspace'", async () => {
|
||||
await renderLoaded();
|
||||
expect(
|
||||
screen.getByText("Limit total message credits for this workspace")
|
||||
).toBeTruthy();
|
||||
});
|
||||
|
||||
it("renders 'Budget limit (credits)' label for the input", async () => {
|
||||
await renderLoaded();
|
||||
expect(screen.getByText("Budget limit (credits)")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Stats row ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — stats row", () => {
|
||||
it("shows budget_used in the stats row", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 350, budget_limit: 1000 }));
|
||||
expect(screen.getByTestId("budget-used-value").textContent).toBe("350");
|
||||
});
|
||||
|
||||
it("shows budget_limit in the stats row", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 100, budget_limit: 500 }));
|
||||
expect(screen.getByTestId("budget-limit-value").textContent).toBe("500");
|
||||
});
|
||||
|
||||
it("shows 'Unlimited' when budget_limit is null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
|
||||
expect(screen.getByTestId("budget-limit-value").textContent).toBe("Unlimited");
|
||||
});
|
||||
|
||||
it("shows budget_remaining when present", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_remaining: 750 }));
|
||||
expect(screen.getByTestId("budget-remaining").textContent).toContain("750");
|
||||
expect(screen.getByTestId("budget-remaining").textContent).toContain("credits remaining");
|
||||
});
|
||||
|
||||
it("hides budget_remaining row when null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_remaining: null }));
|
||||
expect(screen.queryByTestId("budget-remaining")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not crash when budget_used is missing from the response", async () => {
|
||||
// Backend for a provisioning-stuck workspace may return a partial
|
||||
// shape. Regression: previously this threw
|
||||
// "Cannot read properties of undefined (reading 'toLocaleString')"
|
||||
// and crashed the whole Details tab.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
|
||||
expect(screen.getByTestId("budget-used-value").textContent).toBe("0");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Progress bar ──────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — progress bar", () => {
|
||||
it("renders the progress bar when budget_limit is set", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
|
||||
expect(screen.getByRole("progressbar")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does NOT render progress bar when budget_limit is null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
|
||||
expect(screen.queryByRole("progressbar")).toBeNull();
|
||||
});
|
||||
|
||||
it("fills to the correct percentage (25%)", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 250, budget_limit: 1000 }));
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("25%");
|
||||
});
|
||||
|
||||
it("fills to the correct percentage (50%)", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 500, budget_limit: 1000 }));
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("50%");
|
||||
});
|
||||
|
||||
it("caps fill at 100% when budget_used exceeds budget_limit", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 1500, budget_limit: 1000 }));
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("100%");
|
||||
});
|
||||
|
||||
it("progress bar has aria-valuenow equal to the calculated percentage", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_used: 300, budget_limit: 1000 }));
|
||||
const bar = screen.getByRole("progressbar");
|
||||
expect(bar.getAttribute("aria-valuenow")).toBe("30");
|
||||
});
|
||||
|
||||
it("shows 0% progress bar when budget_used is absent from the response", async () => {
|
||||
// Regression: budget_used is optional (provisioning-stuck workspaces return
|
||||
// partial shapes). Without the `?? 0` guard the progressPct calculation
|
||||
// throws a TypeScript strict-null error and the build fails.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await renderLoaded({ budget_limit: 1000, budget_remaining: null } as any);
|
||||
const bar = screen.getByRole("progressbar");
|
||||
expect(bar.getAttribute("aria-valuenow")).toBe("0");
|
||||
const fill = screen.getByTestId("budget-progress-fill") as HTMLDivElement;
|
||||
expect(fill.style.width).toBe("0%");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Input pre-fill ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — input pre-fill", () => {
|
||||
it("pre-fills input with existing budget_limit", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: 500 }));
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
expect(input.value).toBe("500");
|
||||
});
|
||||
|
||||
it("leaves input empty when budget_limit is null", async () => {
|
||||
await renderLoaded(budgetResponse({ budget_limit: null, budget_remaining: null }));
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
expect(input.value).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Save — PATCH calls ────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — save", () => {
|
||||
it("calls PATCH /workspaces/:id/budget with budget_limit as integer", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 800 }) as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "800" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
|
||||
expect(mockPatch.mock.calls[0][0]).toBe("/workspaces/ws-1/budget");
|
||||
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.budget_limit).toBe(800);
|
||||
});
|
||||
|
||||
it("sends budget_limit: 0 (not null) when input is '0' — zero-credit budget", async () => {
|
||||
// Regression for QA bug report: `parseInt("0") || null` would yield null.
|
||||
// The correct form `raw !== "" ? parseInt(raw, 10) : null` must return 0.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: 0, budget_used: 0, budget_remaining: 0 }) as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "0" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
|
||||
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.budget_limit).toBe(0);
|
||||
expect(body.budget_limit).not.toBeNull();
|
||||
});
|
||||
|
||||
it("sends budget_limit: null when input is blank", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(budgetResponse({ budget_limit: null, budget_remaining: null }) as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() => expect(mockPatch).toHaveBeenCalled());
|
||||
const body = mockPatch.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.budget_limit).toBeNull();
|
||||
});
|
||||
|
||||
it("updates displayed stats after successful save", async () => {
|
||||
const updated = budgetResponse({ budget_limit: 2000, budget_used: 500, budget_remaining: 1500 });
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(updated as any);
|
||||
await renderLoaded(budgetResponse({ budget_limit: 1000, budget_used: 250 }));
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "2000" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-limit-value").textContent).toBe("2,000")
|
||||
);
|
||||
});
|
||||
|
||||
it("shows save error message on non-402 PATCH failure", async () => {
|
||||
mockPatch.mockRejectedValueOnce(
|
||||
new Error("API PATCH /workspaces/ws-1/budget: 500 server error")
|
||||
);
|
||||
await renderLoaded();
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-save-error")).toBeTruthy()
|
||||
);
|
||||
expect(screen.getByTestId("budget-save-error").textContent).toContain("500");
|
||||
});
|
||||
});
|
||||
|
||||
// ── 402 handling ──────────────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — 402 handling", () => {
|
||||
it("shows exceeded banner when GET returns 402", async () => {
|
||||
mockGet.mockRejectedValueOnce(make402Error());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
|
||||
);
|
||||
expect(screen.getByText("Budget exceeded — messages blocked")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("does NOT show fetch error text when GET returns 402 (only banner)", async () => {
|
||||
mockGet.mockRejectedValueOnce(make402Error());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.queryByTestId("budget-loading")).toBeNull()
|
||||
);
|
||||
expect(screen.queryByTestId("budget-fetch-error")).toBeNull();
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
|
||||
it("shows exceeded banner when PATCH returns 402", async () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockGet.mockResolvedValueOnce(budgetResponse() as any);
|
||||
mockPatch.mockRejectedValueOnce(make402PatchError());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
|
||||
);
|
||||
// Should NOT also show the save-error alert
|
||||
expect(screen.queryByTestId("budget-save-error")).toBeNull();
|
||||
});
|
||||
|
||||
it("clears exceeded banner after a successful save", async () => {
|
||||
mockGet.mockRejectedValueOnce(make402Error());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy()
|
||||
);
|
||||
|
||||
// Now a successful PATCH (limit was raised)
|
||||
const updated = budgetResponse({ budget_limit: 5000, budget_used: 250, budget_remaining: 4750 });
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
mockPatch.mockResolvedValueOnce(updated as any);
|
||||
|
||||
await act(async () => {
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), {
|
||||
target: { value: "5000" },
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
});
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull()
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Non-402 fetch error ───────────────────────────────────────────────────────
|
||||
|
||||
describe("BudgetSection — non-402 fetch errors", () => {
|
||||
it("shows fetch error text on non-402 GET failure", async () => {
|
||||
mockGet.mockRejectedValueOnce(makeGenericError("internal server error"));
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() =>
|
||||
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy()
|
||||
);
|
||||
expect(screen.getByTestId("budget-fetch-error").textContent).toContain("500");
|
||||
});
|
||||
|
||||
it("does NOT show stats row on fetch error", async () => {
|
||||
mockGet.mockRejectedValueOnce(makeGenericError());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
expect(screen.queryByTestId("budget-stats-row")).toBeNull();
|
||||
});
|
||||
|
||||
it("does NOT show exceeded banner on non-402 fetch error", async () => {
|
||||
mockGet.mockRejectedValueOnce(makeGenericError());
|
||||
render(<BudgetSection workspaceId="ws-1" />);
|
||||
|
||||
await waitFor(() => expect(screen.queryByTestId("budget-loading")).toBeNull());
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -213,7 +213,6 @@ describe("CreateWorkspaceDialog", () => {
|
||||
expect(runtimeTexts).toEqual([
|
||||
"Claude Code",
|
||||
"OpenAI Codex CLI",
|
||||
"Google ADK",
|
||||
"Hermes",
|
||||
"OpenClaw",
|
||||
]);
|
||||
|
||||
@@ -1,110 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// internal#718 P3 (retire-list #4) — when GET /templates serves a
|
||||
// registry-backed selectable list (registry_providers + registry_models with
|
||||
// display_name / billing_mode / derived provider), the canvas builds the
|
||||
// provider catalog FROM that registry data instead of re-inferring vendor
|
||||
// from model-id prefixes (VENDOR_LABELS / BARE_VENDOR_PATTERNS / inferVendor).
|
||||
// The heuristic path stays only as the fallback for non-registry runtimes /
|
||||
// older backends.
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
buildProviderCatalogFromRegistry,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "../ProviderModelSelector";
|
||||
|
||||
// Mirrors the registry-served claude-code payload from GET /templates
|
||||
// (registry_providers / registry_models). display_name + billing_mode come
|
||||
// from the registry, NOT from the canvas VENDOR_LABELS map.
|
||||
const CLAUDE_CODE_REGISTRY_PROVIDERS: RegistryProvider[] = [
|
||||
{
|
||||
name: "anthropic-oauth",
|
||||
display_name: "Claude Code subscription",
|
||||
auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
{
|
||||
name: "anthropic-api",
|
||||
display_name: "Anthropic API",
|
||||
auth_env: ["ANTHROPIC_API_KEY"],
|
||||
billing_mode: "byok",
|
||||
},
|
||||
{
|
||||
name: "platform",
|
||||
display_name: "Platform",
|
||||
auth_env: ["ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "platform_managed",
|
||||
},
|
||||
];
|
||||
|
||||
const CLAUDE_CODE_REGISTRY_MODELS: RegistryModel[] = [
|
||||
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
{ id: "opus", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
{ id: "claude-opus-4-7", provider: "anthropic-api", billing_mode: "byok" },
|
||||
{ id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
|
||||
];
|
||||
|
||||
describe("buildProviderCatalogFromRegistry", () => {
|
||||
it("buckets models by their DERIVED registry provider, not by inferred vendor", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
|
||||
const byVendor = new Map(catalog.map((p) => [p.vendor, p]));
|
||||
// anthropic-oauth bucket holds the two OAuth-derived models.
|
||||
const oauth = byVendor.get("anthropic-oauth");
|
||||
expect(oauth).toBeDefined();
|
||||
expect(oauth!.models.map((m) => m.id).sort()).toEqual(["opus", "sonnet"]);
|
||||
// platform bucket holds the platform-namespaced model.
|
||||
const platform = byVendor.get("platform");
|
||||
expect(platform).toBeDefined();
|
||||
expect(platform!.models.map((m) => m.id)).toEqual(["anthropic/claude-opus-4-7"]);
|
||||
});
|
||||
|
||||
it("labels providers from the registry display_name, not VENDOR_LABELS", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
const oauth = catalog.find((p) => p.vendor === "anthropic-oauth");
|
||||
// Registry display_name "Claude Code subscription" (decorated with the
|
||||
// model count by the catalog builder is acceptable; assert it carries the
|
||||
// registry label, not an inferred one).
|
||||
expect(oauth!.label).toContain("Claude Code subscription");
|
||||
});
|
||||
|
||||
it("carries the registry billing_mode per provider", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.billingMode).toBe("byok");
|
||||
expect(catalog.find((p) => p.vendor === "platform")!.billingMode).toBe("platform_managed");
|
||||
});
|
||||
|
||||
it("surfaces the registry auth_env on the provider entry", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
CLAUDE_CODE_REGISTRY_MODELS,
|
||||
);
|
||||
expect(catalog.find((p) => p.vendor === "anthropic-oauth")!.envVars).toEqual([
|
||||
"CLAUDE_CODE_OAUTH_TOKEN",
|
||||
]);
|
||||
});
|
||||
|
||||
it("only includes providers that actually have at least one served model", () => {
|
||||
// anthropic-api is a registry provider but has no model in this slice →
|
||||
// it should not appear as an empty bucket.
|
||||
const models: RegistryModel[] = [
|
||||
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
];
|
||||
const catalog = buildProviderCatalogFromRegistry(
|
||||
CLAUDE_CODE_REGISTRY_PROVIDERS,
|
||||
models,
|
||||
);
|
||||
expect(catalog.map((p) => p.vendor)).toEqual(["anthropic-oauth"]);
|
||||
});
|
||||
});
|
||||
@@ -7,28 +7,10 @@ import { api } from "@/lib/api";
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Period keys MUST match the server SSOT (workspace-server budget_periods.go).
|
||||
type BudgetPeriod = "hourly" | "daily" | "weekly" | "monthly";
|
||||
|
||||
const PERIODS: { key: BudgetPeriod; label: string }[] = [
|
||||
{ key: "hourly", label: "Hourly" },
|
||||
{ key: "daily", label: "Daily" },
|
||||
{ key: "weekly", label: "Weekly" },
|
||||
{ key: "monthly", label: "Monthly" },
|
||||
];
|
||||
|
||||
interface PeriodBudget {
|
||||
limit: number | null; // USD cents; null = no limit
|
||||
spend: number; // rolling-window spend, USD cents
|
||||
remaining: number | null; // null when no limit
|
||||
}
|
||||
|
||||
interface BudgetData {
|
||||
periods?: Partial<Record<BudgetPeriod, PeriodBudget>>;
|
||||
// legacy fields (pre-multi-period server) — tolerated for back-compat
|
||||
budget_limit?: number | null;
|
||||
monthly_spend?: number;
|
||||
budget_remaining?: number | null;
|
||||
budget_limit: number | null;
|
||||
budget_used?: number; // optional — provisioning-stuck workspaces return partial shapes
|
||||
budget_remaining: number | null;
|
||||
}
|
||||
|
||||
interface Props {
|
||||
@@ -44,71 +26,31 @@ function isApiError402(e: unknown): boolean {
|
||||
return e instanceof Error && /: 402( |$)/.test(e.message);
|
||||
}
|
||||
|
||||
/** USD cents → "$X.XX". */
|
||||
function fmtUSD(cents: number): string {
|
||||
return `$${(cents / 100).toLocaleString(undefined, { minimumFractionDigits: 2, maximumFractionDigits: 2 })}`;
|
||||
}
|
||||
|
||||
/** Normalize the server payload (multi-period or legacy) into a period map. */
|
||||
function periodsFrom(data: BudgetData | null): Record<BudgetPeriod, PeriodBudget> {
|
||||
const base: Record<BudgetPeriod, PeriodBudget> = {
|
||||
hourly: { limit: null, spend: 0, remaining: null },
|
||||
daily: { limit: null, spend: 0, remaining: null },
|
||||
weekly: { limit: null, spend: 0, remaining: null },
|
||||
monthly: { limit: null, spend: 0, remaining: null },
|
||||
};
|
||||
if (!data) return base;
|
||||
if (data.periods) {
|
||||
for (const { key } of PERIODS) {
|
||||
const p = data.periods[key];
|
||||
if (p) base[key] = { limit: p.limit ?? null, spend: p.spend ?? 0, remaining: p.remaining ?? null };
|
||||
}
|
||||
return base;
|
||||
}
|
||||
// legacy: map the single monthly limit/spend
|
||||
base.monthly = {
|
||||
limit: data.budget_limit ?? null,
|
||||
spend: data.monthly_spend ?? 0,
|
||||
remaining: data.budget_remaining ?? null,
|
||||
};
|
||||
return base;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Component
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* BudgetSection — per-workspace LLM budget, four independent rolling windows
|
||||
* (hourly / daily / weekly / monthly). Each period has its own ceiling (USD);
|
||||
* spend is the rolling-window LLM cost. Crossing ANY period blocks new work
|
||||
* (server returns 402). Sends PATCH {budget_limits:{period:cents|null}}.
|
||||
* BudgetSection — dedicated "Budget" section in the workspace details panel.
|
||||
*
|
||||
* - Fetches GET /workspaces/:id/budget on mount for live usage stats
|
||||
* - Shows a progress bar (budget_used / budget_limit, blue-500, capped 100%)
|
||||
* - Allows updating budget_limit via PATCH /workspaces/:id/budget
|
||||
* - Shows a 402-specific "Budget exceeded" amber banner for any blocked state
|
||||
*/
|
||||
export function BudgetSection({ workspaceId }: Props) {
|
||||
const [budget, setBudget] = useState<BudgetData | null>(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [fetchError, setFetchError] = useState<string | null>(null);
|
||||
|
||||
// One input per period, in USD cents (string for controlled inputs).
|
||||
const [limitInputs, setLimitInputs] = useState<Record<BudgetPeriod, string>>({
|
||||
hourly: "",
|
||||
daily: "",
|
||||
weekly: "",
|
||||
monthly: "",
|
||||
});
|
||||
const [limitInput, setLimitInput] = useState("");
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [saveError, setSaveError] = useState<string | null>(null);
|
||||
|
||||
/** True when a 402 has been seen from any API call in this section. */
|
||||
const [budgetExceeded, setBudgetExceeded] = useState(false);
|
||||
|
||||
const syncInputs = useCallback((data: BudgetData | null) => {
|
||||
const p = periodsFrom(data);
|
||||
setLimitInputs({
|
||||
hourly: p.hourly.limit != null ? String(p.hourly.limit) : "",
|
||||
daily: p.daily.limit != null ? String(p.daily.limit) : "",
|
||||
weekly: p.weekly.limit != null ? String(p.weekly.limit) : "",
|
||||
monthly: p.monthly.limit != null ? String(p.monthly.limit) : "",
|
||||
});
|
||||
}, []);
|
||||
// ── Fetch current budget data ─────────────────────────────────────────────
|
||||
|
||||
const loadBudget = useCallback(async () => {
|
||||
setLoading(true);
|
||||
@@ -116,7 +58,7 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
try {
|
||||
const data = await api.get<BudgetData>(`/workspaces/${workspaceId}/budget`);
|
||||
setBudget(data);
|
||||
syncInputs(data);
|
||||
setLimitInput(data.budget_limit != null ? String(data.budget_limit) : "");
|
||||
} catch (e) {
|
||||
if (isApiError402(e)) {
|
||||
setBudgetExceeded(true);
|
||||
@@ -126,30 +68,29 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [workspaceId, syncInputs]);
|
||||
}, [workspaceId]);
|
||||
|
||||
useEffect(() => {
|
||||
loadBudget();
|
||||
}, [loadBudget]);
|
||||
|
||||
// ── Save handler ──────────────────────────────────────────────────────────
|
||||
|
||||
const handleSave = async () => {
|
||||
setSaving(true);
|
||||
setSaveError(null);
|
||||
// Build the per-period map: blank → null (clear); a number → that ceiling.
|
||||
const budget_limits: Record<BudgetPeriod, number | null> = {
|
||||
hourly: null,
|
||||
daily: null,
|
||||
weekly: null,
|
||||
monthly: null,
|
||||
};
|
||||
for (const { key } of PERIODS) {
|
||||
const raw = limitInputs[key].trim();
|
||||
budget_limits[key] = raw !== "" ? parseInt(raw, 10) : null;
|
||||
}
|
||||
const raw = limitInput.trim();
|
||||
// Use explicit empty-string check (not falsy check) so that a
|
||||
// user-entered "0" is sent as budget_limit: 0, not null (unlimited).
|
||||
const parsedLimit = raw !== "" ? parseInt(raw, 10) : null;
|
||||
|
||||
try {
|
||||
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, { budget_limits });
|
||||
const updated = await api.patch<BudgetData>(`/workspaces/${workspaceId}/budget`, {
|
||||
budget_limit: parsedLimit,
|
||||
});
|
||||
setBudget(updated);
|
||||
syncInputs(updated);
|
||||
setLimitInput(updated.budget_limit != null ? String(updated.budget_limit) : "");
|
||||
// Clear exceeded state if the save succeeded (limit was raised or removed)
|
||||
setBudgetExceeded(false);
|
||||
} catch (e) {
|
||||
if (isApiError402(e)) {
|
||||
@@ -162,15 +103,24 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
}
|
||||
};
|
||||
|
||||
const periods = periodsFrom(budget);
|
||||
// ── Progress calculation ──────────────────────────────────────────────────
|
||||
|
||||
const progressPct =
|
||||
budget && budget.budget_limit != null && budget.budget_limit > 0
|
||||
? Math.min(100, Math.round(((budget.budget_used ?? 0) / budget.budget_limit) * 100))
|
||||
: 0;
|
||||
|
||||
// ── Render ────────────────────────────────────────────────────────────────
|
||||
|
||||
return (
|
||||
<div className="space-y-3" data-testid="budget-section">
|
||||
{/* Section header */}
|
||||
<div>
|
||||
<h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">Budget</h3>
|
||||
<h3 className="text-xs font-semibold text-ink-mid uppercase tracking-wider">
|
||||
Budget
|
||||
</h3>
|
||||
<p className="text-[11px] text-ink-mid mt-0.5">
|
||||
Cap LLM spend for this workspace per period — crossing any limit pauses new work
|
||||
Limit total message credits for this workspace
|
||||
</p>
|
||||
</div>
|
||||
|
||||
@@ -181,14 +131,32 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
data-testid="budget-exceeded-banner"
|
||||
className="flex items-center gap-2 px-3 py-2 rounded-lg bg-surface border border-amber-700/50 text-warm text-xs font-medium"
|
||||
>
|
||||
<svg width="13" height="13" viewBox="0 0 13 13" fill="none" aria-hidden="true" className="shrink-0">
|
||||
<path d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z" stroke="currentColor" strokeWidth="1.4" strokeLinejoin="round" />
|
||||
<path d="M6.5 5.5V7.5M6.5 9.5h.01" stroke="currentColor" strokeWidth="1.4" strokeLinecap="round" />
|
||||
<svg
|
||||
width="13"
|
||||
height="13"
|
||||
viewBox="0 0 13 13"
|
||||
fill="none"
|
||||
aria-hidden="true"
|
||||
className="shrink-0"
|
||||
>
|
||||
<path
|
||||
d="M6.5 1.5L11.5 10.5H1.5L6.5 1.5Z"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.4"
|
||||
strokeLinejoin="round"
|
||||
/>
|
||||
<path
|
||||
d="M6.5 5.5V7.5M6.5 9.5h.01"
|
||||
stroke="currentColor"
|
||||
strokeWidth="1.4"
|
||||
strokeLinecap="round"
|
||||
/>
|
||||
</svg>
|
||||
Budget exceeded — new work paused
|
||||
Budget exceeded — messages blocked
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Usage stats */}
|
||||
{loading ? (
|
||||
<p className="text-xs text-ink-mid" data-testid="budget-loading">
|
||||
Loading…
|
||||
@@ -197,78 +165,89 @@ export function BudgetSection({ workspaceId }: Props) {
|
||||
<p className="text-xs text-bad" data-testid="budget-fetch-error">
|
||||
{fetchError}
|
||||
</p>
|
||||
) : (
|
||||
<div className="space-y-3">
|
||||
{PERIODS.map(({ key, label }) => {
|
||||
const p = periods[key];
|
||||
const pct =
|
||||
p.limit != null && p.limit > 0 ? Math.min(100, Math.round((p.spend / p.limit) * 100)) : 0;
|
||||
const over = p.limit != null && p.spend >= p.limit;
|
||||
return (
|
||||
<div key={key} className="space-y-1" data-testid={`budget-period-${key}`}>
|
||||
<div className="flex items-baseline justify-between">
|
||||
<label htmlFor={`budget-${key}-${workspaceId}`} className="text-xs text-ink-mid">
|
||||
{label}
|
||||
</label>
|
||||
<span className="text-[11px] font-mono text-ink-mid">
|
||||
<span data-testid={`budget-${key}-spend`}>{fmtUSD(p.spend)}</span>
|
||||
<span className="mx-1">/</span>
|
||||
<span data-testid={`budget-${key}-limit`}>{p.limit != null ? fmtUSD(p.limit) : "∞"}</span>
|
||||
</span>
|
||||
</div>
|
||||
{p.limit != null && (
|
||||
<div
|
||||
role="progressbar"
|
||||
aria-label={`${label} budget usage`}
|
||||
aria-valuenow={pct}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
|
||||
>
|
||||
<div
|
||||
data-testid={`budget-${key}-fill`}
|
||||
className={`h-full rounded-full transition-all duration-300 ${over ? "bg-bad" : "bg-accent"}`}
|
||||
style={{ width: `${pct}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<input
|
||||
id={`budget-${key}-${workspaceId}`}
|
||||
type="number"
|
||||
min="0"
|
||||
step="1"
|
||||
value={limitInputs[key]}
|
||||
onChange={(e) => setLimitInputs((s) => ({ ...s, [key]: e.target.value }))}
|
||||
placeholder="USD cents — blank for unlimited"
|
||||
data-testid={`budget-${key}-input`}
|
||||
className="w-full bg-surface-card border border-line rounded-lg px-3 py-1.5 text-xs text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
) : budget ? (
|
||||
<div className="space-y-2">
|
||||
{/* Stats row */}
|
||||
<div className="flex items-baseline justify-between" data-testid="budget-stats-row">
|
||||
<span className="text-xs text-ink-mid">Credits used</span>
|
||||
<span className="text-xs font-mono text-ink-mid">
|
||||
<span data-testid="budget-used-value">{(budget.budget_used ?? 0).toLocaleString()}</span>
|
||||
<span className="text-ink-mid mx-1">/</span>
|
||||
<span data-testid="budget-limit-value">
|
||||
{budget.budget_limit != null
|
||||
? budget.budget_limit.toLocaleString()
|
||||
: "Unlimited"}
|
||||
</span>
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<p className="text-[11px] text-ink-mid">Limits are USD cents (e.g. 500 = $5.00). Blank = unlimited.</p>
|
||||
|
||||
{saveError && (
|
||||
{/* Progress bar (only when limit is set) */}
|
||||
{budget.budget_limit != null && (
|
||||
<div
|
||||
role="alert"
|
||||
data-testid="budget-save-error"
|
||||
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
|
||||
role="progressbar"
|
||||
aria-label="Budget usage"
|
||||
aria-valuenow={progressPct}
|
||||
aria-valuemin={0}
|
||||
aria-valuemax={100}
|
||||
className="h-1.5 w-full rounded-full bg-surface-card overflow-hidden"
|
||||
>
|
||||
{saveError}
|
||||
<div
|
||||
data-testid="budget-progress-fill"
|
||||
className="h-full rounded-full bg-accent transition-all duration-300"
|
||||
style={{ width: `${progressPct}%` }}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
data-testid="budget-save-btn"
|
||||
className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
|
||||
>
|
||||
{saving ? "Saving…" : "Save"}
|
||||
</button>
|
||||
{/* Remaining credits */}
|
||||
{budget.budget_remaining != null && (
|
||||
<p className="text-[11px] text-ink-mid" data-testid="budget-remaining">
|
||||
{budget.budget_remaining.toLocaleString()} credits remaining
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
) : null}
|
||||
|
||||
{/* Input + Save */}
|
||||
<div className="space-y-1.5 pt-1">
|
||||
<label
|
||||
htmlFor={`budget-limit-input-${workspaceId}`}
|
||||
className="text-[11px] text-ink-mid block"
|
||||
>
|
||||
Budget limit (credits)
|
||||
</label>
|
||||
<input
|
||||
id={`budget-limit-input-${workspaceId}`}
|
||||
type="number"
|
||||
min="0"
|
||||
step="1"
|
||||
value={limitInput}
|
||||
onChange={(e) => setLimitInput(e.target.value)}
|
||||
placeholder="e.g. 1000 — blank for unlimited"
|
||||
data-testid="budget-limit-input"
|
||||
className="w-full bg-surface-card border border-line rounded-lg px-3 py-2 text-sm text-ink-mid placeholder-zinc-500 focus:outline-none focus:border-accent focus:ring-1 focus:ring-accent/30 transition-colors"
|
||||
/>
|
||||
<p className="text-xs text-ink-mid">Leave blank for unlimited</p>
|
||||
|
||||
{saveError && (
|
||||
<div
|
||||
role="alert"
|
||||
data-testid="budget-save-error"
|
||||
className="px-3 py-1.5 rounded-lg bg-red-950/40 border border-red-800/50 text-xs text-bad"
|
||||
>
|
||||
{saveError}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={saving}
|
||||
data-testid="budget-save-btn"
|
||||
className="px-4 py-1.5 bg-accent-strong hover:bg-accent active:bg-accent-strong rounded-lg text-xs font-medium text-white disabled:opacity-50 transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-accent focus-visible:ring-offset-1 focus-visible:ring-offset-zinc-900"
|
||||
>
|
||||
{saving ? "Saving…" : "Save"}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -11,12 +11,8 @@ import { ExternalConnectionSection } from "./ExternalConnectionSection";
|
||||
import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
buildProviderCatalogFromRegistry,
|
||||
findProviderForModel,
|
||||
type SelectorValue,
|
||||
type ProviderEntry,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "../ProviderModelSelector";
|
||||
import { isExternalLikeRuntime } from "@/lib/externalRuntimes";
|
||||
|
||||
@@ -262,17 +258,6 @@ interface RuntimeOption {
|
||||
// canvas falls back to deriving unique vendor prefixes from
|
||||
// models[].id (still adapter-driven, just inferred).
|
||||
providers: string[];
|
||||
// registryBacked / registryProviders / registryModels come from the
|
||||
// registry-served GET /templates fields (internal#718 P3). When
|
||||
// registryBacked is true, the selectable provider+model list is built from
|
||||
// the registry (registryProviders/registryModels) — display labels +
|
||||
// billing mode + derived provider come from the provider-registry SSOT, not
|
||||
// the canvas VENDOR_LABELS / billingModeForProvider vocabularies. When
|
||||
// false (non-registry runtime / older backend), the canvas falls back to
|
||||
// the template-served models[] + its inferVendor heuristic.
|
||||
registryBacked: boolean;
|
||||
registryProviders: RegistryProvider[];
|
||||
registryModels: RegistryModel[];
|
||||
}
|
||||
|
||||
// deriveProvidersFromModels — when a template doesn't ship an explicit
|
||||
@@ -337,32 +322,6 @@ export function billingModeForProvider(provider: string): LLMBillingMode {
|
||||
return "byok";
|
||||
}
|
||||
|
||||
// billingModeForSelectedProvider — internal#718 P3 (retire-list #5): the
|
||||
// billing mode the Config tab shows/sends for the selected PROVIDER, sourced
|
||||
// from the registry-served catalog when available rather than the hardcoded
|
||||
// billingModeForProvider rule.
|
||||
//
|
||||
// When the runtime is registry-backed, GET /templates serves each provider's
|
||||
// DERIVED billing_mode (platform_managed for the closed platform provider,
|
||||
// byok otherwise) on the ProviderEntry. We read it off the catalog so the UI
|
||||
// reflects the registry SSOT — the same predicate billing/credential emission
|
||||
// keys off the derived provider.
|
||||
//
|
||||
// Falls back to billingModeForProvider when: no catalog (non-registry runtime
|
||||
// / older backend), or the provider string isn't carried by the catalog
|
||||
// (e.g. a stale saved value). The fallback keeps the legacy behavior intact
|
||||
// for everything the registry doesn't yet speak to.
|
||||
export function billingModeForSelectedProvider(
|
||||
provider: string,
|
||||
catalog?: ProviderEntry[],
|
||||
): LLMBillingMode {
|
||||
if (catalog && catalog.length > 0) {
|
||||
const entry = catalog.find((p) => p.vendor === provider.trim());
|
||||
if (entry?.billingMode) return entry.billingMode;
|
||||
}
|
||||
return billingModeForProvider(provider);
|
||||
}
|
||||
|
||||
// Fallback used when /templates can't be fetched (offline, older backend).
|
||||
// Keep in sync with manifest.json workspace_templates as a defensive default.
|
||||
// Model + env suggestions only flow when the backend is reachable.
|
||||
@@ -377,20 +336,13 @@ export function billingModeForSelectedProvider(
|
||||
// config.yaml` on the container is a separate runtime-internal file,
|
||||
// not this one.
|
||||
const RUNTIMES_WITH_OWN_CONFIG = new Set<string>(["external", "kimi", "kimi-cli", "openclaw"]);
|
||||
// The runtime picker is SSOT-driven: options come from GET /templates,
|
||||
// which workspace-server already gates to the manifest.json maintained set
|
||||
// (loadRuntimesFromManifest). A hand-maintained frontend allowlist silently
|
||||
// dropped runtimes the backend added (google-adk shipped in manifest but was
|
||||
// filtered out, so its workspaces rendered the wrong default option). A
|
||||
// template may still opt OUT of the picker via `displayable: false` on its
|
||||
// /templates row. See project_canvas_runtime_dropdown_ssot_fix.
|
||||
const SUPPORTED_RUNTIME_VALUES = new Set(["claude-code", "codex", "openclaw", "hermes"]);
|
||||
|
||||
const FALLBACK_RUNTIME_OPTIONS: RuntimeOption[] = [
|
||||
{ value: "claude-code", label: "Claude Code", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "codex", label: "Codex", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "google-adk", label: "Google ADK", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "openclaw", label: "OpenClaw", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "hermes", label: "Hermes", models: [], providers: [], registryBacked: false, registryProviders: [], registryModels: [] },
|
||||
{ value: "claude-code", label: "Claude Code", models: [], providers: [] },
|
||||
{ value: "codex", label: "Codex", models: [], providers: [] },
|
||||
{ value: "openclaw", label: "OpenClaw", models: [], providers: [] },
|
||||
{ value: "hermes", label: "Hermes", models: [], providers: [] },
|
||||
];
|
||||
|
||||
export function ConfigTab({ workspaceId }: Props) {
|
||||
@@ -403,24 +355,15 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
const [rawMode, setRawMode] = useState(false);
|
||||
const [rawDraft, setRawDraft] = useState("");
|
||||
const [runtimeOptions, setRuntimeOptions] = useState<RuntimeOption[]>(FALLBACK_RUNTIME_OPTIONS);
|
||||
// internal#718 P4 closure: the explicit provider override
|
||||
// (LLM_PROVIDER workspace_secret, surfaced via GET/PUT
|
||||
// /workspaces/:id/provider) has been RETIRED. The provider is
|
||||
// derived at every decision point from (runtime, model) via the
|
||||
// registry — no stored row remains. The `provider` / `originalProvider`
|
||||
// state and the provider dropdown survive in this component for
|
||||
// backwards-compat (display only) but are no longer persisted:
|
||||
// - loadConfig no longer GETs /workspaces/:id/provider (the
|
||||
// endpoint returns 410 Gone). The state initializes to ""
|
||||
// and stays there.
|
||||
// - handleSave no longer PUTs /workspaces/:id/provider.
|
||||
// - The dropdown still updates the local `provider` state so the
|
||||
// user can preview the derived value; the value never leaves
|
||||
// the browser.
|
||||
// This is the canvas-side complement to the backend retirement of
|
||||
// SetProvider/GetProvider/setProviderSecret. Older canvases that
|
||||
// still call PUT /provider hit the 410 Gone with a structured
|
||||
// PROVIDER_ENDPOINT_RETIRED code — loud failure, no silent miss.
|
||||
// Provider override (Option B PR-5): stored separately from config.yaml
|
||||
// because the value lives in workspace_secrets (encrypted), not in the
|
||||
// platform-managed config.yaml. The two endpoints are GET/PUT
|
||||
// /workspaces/:id/provider on workspace-server (handlers/secrets.go).
|
||||
// Empty = "auto-derive from model slug prefix" — pre-Option-B behavior
|
||||
// and what most users want. Setting to a non-empty value writes
|
||||
// LLM_PROVIDER into workspace_secrets and triggers an auto-restart so
|
||||
// the workspace boots with the new provider in env (and via CP user-
|
||||
// data, written into /configs/config.yaml on next provision too).
|
||||
const [provider, setProvider] = useState("");
|
||||
const [originalProvider, setOriginalProvider] = useState("");
|
||||
// Track the model the form first rendered, so handleSave can detect
|
||||
@@ -471,23 +414,26 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
//
|
||||
// See GH #1894 for the workspace-row-as-source-of-truth rationale
|
||||
// that motivated splitting from a single config.yaml read.
|
||||
// internal#718 P4 closure: the GET /workspaces/:id/provider leg is
|
||||
// RETIRED — the endpoint returns 410 Gone. Provider is now derived
|
||||
// from (runtime, model) via the registry; no stored value exists
|
||||
// to load. Always seed the local state to "" so the dropdown
|
||||
// initializes to "auto-derive".
|
||||
const [wsRes, modelRes] = await Promise.all([
|
||||
const [wsRes, modelRes, providerRes] = await Promise.all([
|
||||
api.get<{ runtime?: string; tier?: number }>(`/workspaces/${workspaceId}`)
|
||||
.catch(() => ({} as { runtime?: string; tier?: number })),
|
||||
api.get<{ model?: string }>(`/workspaces/${workspaceId}/model`)
|
||||
.catch(() => ({} as { model?: string })),
|
||||
api.get<{ provider?: string }>(`/workspaces/${workspaceId}/provider`)
|
||||
.catch(() => null),
|
||||
]);
|
||||
const wsMetadataRuntime = (wsRes.runtime || "").trim();
|
||||
const wsMetadataModel = (modelRes.model || "").trim();
|
||||
const wsMetadataTier: number | null =
|
||||
typeof wsRes.tier === "number" ? wsRes.tier : null;
|
||||
setProvider("");
|
||||
setOriginalProvider("");
|
||||
if (providerRes !== null) {
|
||||
const loadedProvider = (providerRes.provider || "").trim();
|
||||
setProvider(loadedProvider);
|
||||
setOriginalProvider(loadedProvider);
|
||||
} else {
|
||||
setProvider("");
|
||||
setOriginalProvider("");
|
||||
}
|
||||
// originalModel is set further down once the YAML has been parsed —
|
||||
// we want it to reflect what the form ACTUALLY rendered, which may
|
||||
// be the YAML's runtime_config.model fallback when MODEL_PROVIDER
|
||||
@@ -581,49 +527,20 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
api.get<Array<{
|
||||
id: string;
|
||||
name?: string;
|
||||
runtime?: string;
|
||||
models?: ModelSpec[];
|
||||
providers?: string[];
|
||||
// internal#718 P3 registry-served fields (additive; absent on older
|
||||
// backends and for non-registry runtimes).
|
||||
registry_backed?: boolean;
|
||||
registry_providers?: RegistryProvider[];
|
||||
registry_models?: RegistryModel[];
|
||||
displayable?: boolean;
|
||||
}>>("/templates")
|
||||
api.get<Array<{ id: string; name?: string; runtime?: string; models?: ModelSpec[]; providers?: string[] }>>("/templates")
|
||||
.then((rows) => {
|
||||
if (cancelled || !Array.isArray(rows)) return;
|
||||
const byRuntime = new Map<string, RuntimeOption>();
|
||||
for (const r of rows) {
|
||||
const v = (r.runtime || "").trim();
|
||||
if (!v) continue;
|
||||
// Honor an explicit opt-out; absent/true means show it.
|
||||
if (r.displayable === false) continue;
|
||||
if (!SUPPORTED_RUNTIME_VALUES.has(v)) continue;
|
||||
// Last template wins if two templates share a runtime — rare, and the
|
||||
// one with the richer models list is probably newer.
|
||||
const existing = byRuntime.get(v);
|
||||
const models = Array.isArray(r.models) ? r.models : [];
|
||||
const providers = Array.isArray(r.providers) ? r.providers : [];
|
||||
const registryProviders = Array.isArray(r.registry_providers) ? r.registry_providers : [];
|
||||
const registryModels = Array.isArray(r.registry_models) ? r.registry_models : [];
|
||||
const registryBacked = r.registry_backed === true && registryModels.length > 0;
|
||||
// Prefer the richer payload: a registry-backed entry, then more
|
||||
// template models. Keeps the "last/richer template wins" intent.
|
||||
const score = (o: RuntimeOption) => (o.registryBacked ? 1000 : 0) + o.models.length;
|
||||
const candidate: RuntimeOption = {
|
||||
value: v,
|
||||
label: r.name || v,
|
||||
models,
|
||||
providers,
|
||||
registryBacked,
|
||||
registryProviders,
|
||||
registryModels,
|
||||
};
|
||||
if (!existing || score(candidate) > score(existing)) {
|
||||
byRuntime.set(v, candidate);
|
||||
if (!existing || models.length > existing.models.length) {
|
||||
byRuntime.set(v, { value: v, label: r.name || v, models, providers });
|
||||
}
|
||||
}
|
||||
if (byRuntime.size > 0) setRuntimeOptions(Array.from(byRuntime.values()));
|
||||
@@ -634,13 +551,7 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
// Models + env hints for the currently-selected runtime.
|
||||
const selectedRuntime = runtimeOptions.find((o) => o.value === (config.runtime || "")) ?? null;
|
||||
// Memoised so its identity is stable across renders — it feeds several
|
||||
// useMemo dependency arrays below (registry/legacy catalog, selector models)
|
||||
// and a fresh `[]` literal each render would defeat their memoisation.
|
||||
const availableModels: ModelSpec[] = useMemo(
|
||||
() => selectedRuntime?.models ?? [],
|
||||
[selectedRuntime?.models],
|
||||
);
|
||||
const availableModels: ModelSpec[] = selectedRuntime?.models ?? [];
|
||||
// Provider suggestions for the legacy free-text input fallback (used
|
||||
// when /templates returned no models for this runtime, e.g. hermes
|
||||
// workspaces). Prefer the runtime's declarative providers list,
|
||||
@@ -654,37 +565,9 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
|
||||
// Vendor-aware catalog shared with the selector. Memoised so the
|
||||
// catalog identity is stable across renders (selector relies on it).
|
||||
//
|
||||
// internal#718 P3: when the runtime is registry-backed, build the catalog
|
||||
// FROM the registry-served providers/models (display labels + billing +
|
||||
// derived provider from the provider-registry SSOT) instead of re-inferring
|
||||
// vendor from model-id prefixes. Falls back to the inferVendor heuristic
|
||||
// for non-registry runtimes / older backends.
|
||||
const registryBacked = selectedRuntime?.registryBacked ?? false;
|
||||
const providerCatalog = useMemo(
|
||||
() =>
|
||||
registryBacked
|
||||
? buildProviderCatalogFromRegistry(
|
||||
selectedRuntime?.registryProviders ?? [],
|
||||
selectedRuntime?.registryModels ?? [],
|
||||
)
|
||||
: buildProviderCatalog(availableModels),
|
||||
[registryBacked, selectedRuntime?.registryProviders, selectedRuntime?.registryModels, availableModels],
|
||||
);
|
||||
// Models fed to the selector dropdown: the registry-served native set for a
|
||||
// registry-backed runtime (so the dropdown can render no unregistered
|
||||
// option), else the template-served models.
|
||||
const selectorModels: ModelSpec[] = useMemo(
|
||||
() =>
|
||||
registryBacked
|
||||
? (selectedRuntime?.registryModels ?? []).map((m) => ({
|
||||
id: m.id,
|
||||
name: m.name,
|
||||
// carry the derived provider so the selector buckets correctly
|
||||
...(m.provider ? { provider: m.provider } : {}),
|
||||
}))
|
||||
: availableModels,
|
||||
[registryBacked, selectedRuntime?.registryModels, availableModels],
|
||||
() => buildProviderCatalog(availableModels),
|
||||
[availableModels],
|
||||
);
|
||||
|
||||
// Derive the selector's current value from the form state. Provider
|
||||
@@ -835,27 +718,53 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
}
|
||||
}
|
||||
|
||||
// internal#718 P4 closure: provider override save is RETIRED. The
|
||||
// /workspaces/:id/provider endpoint returns 410 Gone; the provider
|
||||
// is derived from (runtime, model) at every decision point via the
|
||||
// registry. The local dropdown state still updates so the user can
|
||||
// see the predicted provider, but it never round-trips to the
|
||||
// server. Variables retained as locals (set to constants) so the
|
||||
// downstream restart-suppress logic below has clear semantics
|
||||
// and the diff against the prior shape stays small.
|
||||
const providerSaveError: string | null = null;
|
||||
const providerChanged = false;
|
||||
// Provider override save (Option B PR-5). PUT only when the user
|
||||
// changed the dropdown — otherwise an unrelated Save (e.g. tier
|
||||
// edit) would re-write the provider unchanged and the server-
|
||||
// side auto-restart would fire on every Save, costing the user a
|
||||
// ~30s reboot for a no-op change. Server endpoint accepts an
|
||||
// empty string to clear the override (deletes the
|
||||
// workspace_secrets row); we forward whatever the form holds.
|
||||
let providerSaveError: string | null = null;
|
||||
const providerChanged = provider !== originalProvider;
|
||||
if (providerChanged) {
|
||||
try {
|
||||
await api.put(`/workspaces/${workspaceId}/provider`, { provider });
|
||||
setOriginalProvider(provider);
|
||||
} catch (e) {
|
||||
providerSaveError = e instanceof Error ? e.message : "Provider update was rejected";
|
||||
}
|
||||
}
|
||||
|
||||
// internal#718 P4 closure: provider → billing_mode linkage is also
|
||||
// RETIRED. P2-B (#1972) moved the billing decision to
|
||||
// ResolveLLMBillingModeDerived, which DERIVES the provider from
|
||||
// (runtime, model) at every read. The canvas can no longer
|
||||
// override it via a separate PUT, by design — the runtime+model
|
||||
// selection IS the billing-mode selection. The
|
||||
// /admin/workspaces/:id/llm-billing-mode endpoint still exists
|
||||
// as the operator override surface (workspaces.llm_billing_mode
|
||||
// column); it is no longer driven by the provider dropdown.
|
||||
const billingModeSaveError: string | null = null;
|
||||
// Provider → billing_mode linkage (internal#703 Gap 2). When the
|
||||
// provider actually changed AND its implied billing_mode differs
|
||||
// from the previously-selected provider's, push the new mode to
|
||||
// the per-tenant llm-billing-mode endpoint (same path the LLM
|
||||
// Billing section uses). Without this, selecting a non-Platform
|
||||
// provider leaves billing_mode=platform_managed → CP keeps
|
||||
// injecting the platform proxy → BYOK never takes.
|
||||
//
|
||||
// Gated on (a) the provider PUT having succeeded — no point setting
|
||||
// byok if the credential write failed — and (b) the mode actually
|
||||
// changing, so an unrelated provider tweak between two BYOK vendors
|
||||
// (e.g. minimax → openrouter) doesn't re-issue a redundant
|
||||
// platform_managed→byok PUT and trigger a needless restart.
|
||||
let billingModeSaveError: string | null = null;
|
||||
if (providerChanged && !providerSaveError) {
|
||||
const nextMode = billingModeForProvider(provider);
|
||||
const prevMode = billingModeForProvider(originalProvider);
|
||||
if (nextMode !== prevMode) {
|
||||
try {
|
||||
await api.put(
|
||||
`/admin/workspaces/${workspaceId}/llm-billing-mode`,
|
||||
{ mode: nextMode },
|
||||
);
|
||||
} catch (e) {
|
||||
billingModeSaveError =
|
||||
e instanceof Error ? e.message : "Billing mode update was rejected";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setOriginalYaml(content);
|
||||
if (rawMode) {
|
||||
@@ -864,22 +773,27 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
} else {
|
||||
setRawDraft(content);
|
||||
}
|
||||
// internal#718 P4 closure: providerWillAutoRestart is always
|
||||
// false now (provider PUT is retired; no server-side auto-restart
|
||||
// can fire). Save+Restart flows through the canvas store
|
||||
// restart path the same way it did pre-#718 for non-provider
|
||||
// edits.
|
||||
const providerWillAutoRestart = providerChanged && !providerSaveError
|
||||
// SetProvider on the server already triggers an auto-restart for
|
||||
// the workspace whenever the value actually changed (see
|
||||
// workspace-server/internal/handlers/secrets.go:SetProvider). If
|
||||
// the user also clicked Save+Restart we'd kick off a SECOND
|
||||
// restart here and the two would race in the canvas store —
|
||||
// suppress the redundant call and rely on the server-side one.
|
||||
const providerWillAutoRestart = providerChanged && !providerSaveError;
|
||||
if (restart && !providerWillAutoRestart) {
|
||||
await useCanvasStore.getState().restartWorkspace(workspaceId);
|
||||
} else if (!restart) {
|
||||
useCanvasStore.getState().updateNodeData(workspaceId, { needsRestart: !providerWillAutoRestart });
|
||||
}
|
||||
// Aggregate partial-save errors. With provider+billing-mode PUTs
|
||||
// retired, only modelSaveError can fire from the secret-mint side
|
||||
// — the provider/billing branches are dead code retained as
|
||||
// constant nils to keep the diff small. They are surfaced
|
||||
// defensively in case a future re-enablement needs the wiring.
|
||||
// Aggregate partial-save errors. modelSaveError, providerSaveError,
|
||||
// and billingModeSaveError describe rejected updates from
|
||||
// independent endpoints — show whichever fired so the user knows
|
||||
// which field reverts on next reload (otherwise they'd see "Saved"
|
||||
// and be confused why Provider snapped back). The billing-mode case
|
||||
// is the most important to surface: the provider credential saved
|
||||
// but BYOK won't actually take until billing_mode flips, so a
|
||||
// silent failure here is exactly the #703 "selecting a provider has
|
||||
// no effect" symptom.
|
||||
const partialError = providerSaveError
|
||||
? `Other fields saved, but provider update failed: ${providerSaveError}`
|
||||
: billingModeSaveError
|
||||
@@ -1004,10 +918,9 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
— empty = "auto-derive from model slug" was the pre-PR-5
|
||||
behavior; selecting any provider here writes LLM_PROVIDER
|
||||
and triggers an auto-restart. */}
|
||||
{selectorModels.length > 0 ? (
|
||||
{availableModels.length > 0 ? (
|
||||
<ProviderModelSelector
|
||||
models={selectorModels}
|
||||
catalog={registryBacked ? providerCatalog : undefined}
|
||||
models={availableModels}
|
||||
value={selectorValue}
|
||||
onChange={(next) => {
|
||||
setSelectorValue(next);
|
||||
@@ -1020,7 +933,7 @@ export function ConfigTab({ workspaceId }: Props) {
|
||||
setConfig((prev) => {
|
||||
const v = next.model;
|
||||
const prevModelId = prev.runtime_config?.model || prev.model || "";
|
||||
const prevSpec = selectorModels.find((m) => m.id === prevModelId) ?? null;
|
||||
const prevSpec = availableModels.find((m) => m.id === prevModelId) ?? null;
|
||||
const prevRequired = prev.runtime_config?.required_env ?? [];
|
||||
const wasTemplateDriven =
|
||||
prevRequired.length === 0 ||
|
||||
|
||||
@@ -29,15 +29,8 @@ type FormState = {
|
||||
displayMode: string;
|
||||
displayProtocol: string;
|
||||
resolution: string;
|
||||
dataPersistence: string; // "" (auto) | "persist" | "ephemeral" — internal#734
|
||||
};
|
||||
|
||||
// internal#734: per-workspace durable-data choice. "" = auto (desktop-control
|
||||
// keeps data, others follow the org default). Human labels for the selector.
|
||||
const DATA_PERSISTENCE_OPTIONS = ["", "persist", "ephemeral"];
|
||||
const dataPersistenceLabel = (v: string): string =>
|
||||
v === "persist" ? "Always keep (persist)" : v === "ephemeral" ? "Don't keep (ephemeral)" : "Auto";
|
||||
|
||||
export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const runtime = data.runtime;
|
||||
const instanceType = data.compute?.instance_type;
|
||||
@@ -46,10 +39,9 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
const displayProtocol = data.compute?.display?.protocol;
|
||||
const displayWidth = data.compute?.display?.width;
|
||||
const displayHeight = data.compute?.display?.height;
|
||||
const dataPersistence = data.compute?.data_persistence;
|
||||
const initial = useMemo(
|
||||
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence }),
|
||||
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight, dataPersistence],
|
||||
() => formFromData({ runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight }),
|
||||
[runtime, instanceType, rootGB, displayMode, displayProtocol, displayWidth, displayHeight],
|
||||
);
|
||||
const [form, setForm] = useState<FormState>(initial);
|
||||
const [saving, setSaving] = useState(false);
|
||||
@@ -92,8 +84,6 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
display: form.displayEnabled
|
||||
? { mode: form.displayMode, protocol: form.displayProtocol, width, height }
|
||||
: { mode: "none" },
|
||||
// internal#734: omit when "auto" so the wire/default behavior is unchanged.
|
||||
...(form.dataPersistence ? { data_persistence: form.dataPersistence } : {}),
|
||||
};
|
||||
|
||||
const resp = await api.patch<{ needs_restart?: boolean }>(`/workspaces/${workspaceId}`, {
|
||||
@@ -186,18 +176,6 @@ export function ContainerConfigTab({ workspaceId, data }: Props) {
|
||||
onChange={(resolution) => setForm((s) => ({ ...s, resolution }))}
|
||||
/>
|
||||
)}
|
||||
<SelectField
|
||||
id="data-persistence"
|
||||
label="Saved data (cookies, downloads, memory)"
|
||||
value={form.dataPersistence}
|
||||
options={DATA_PERSISTENCE_OPTIONS}
|
||||
optionLabel={dataPersistenceLabel}
|
||||
onChange={(dataPersistence) => setForm((s) => ({ ...s, dataPersistence }))}
|
||||
/>
|
||||
<p className="-mt-1 text-[10px] leading-snug text-ink-soft">
|
||||
Whether this workspace's data survives a restart/recreate. Auto keeps it for
|
||||
browser (desktop) workspaces; Ephemeral never keeps it (privacy).
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="mt-4 flex items-center justify-end gap-2">
|
||||
@@ -253,7 +231,6 @@ function formFromData(data: {
|
||||
displayProtocol?: string;
|
||||
displayWidth?: number;
|
||||
displayHeight?: number;
|
||||
dataPersistence?: string;
|
||||
}): FormState {
|
||||
const width = data.displayWidth ?? 1920;
|
||||
const height = data.displayHeight ?? 1080;
|
||||
@@ -266,7 +243,6 @@ function formFromData(data: {
|
||||
displayMode: data.displayMode && data.displayMode !== "none" ? data.displayMode : "desktop-control",
|
||||
displayProtocol: data.displayProtocol || "novnc",
|
||||
resolution,
|
||||
dataPersistence: data.dataPersistence || "",
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,6 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
const [peers, setPeers] = useState<PeerData[]>([]);
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [confirmDelete, setConfirmDelete] = useState(false);
|
||||
const [eraseData, setEraseData] = useState(false); // internal#734: erase saved data on delete
|
||||
const [peersError, setPeersError] = useState<string | null>(null);
|
||||
const [saveError, setSaveError] = useState<string | null>(null);
|
||||
const [deleteError, setDeleteError] = useState<string | null>(null);
|
||||
@@ -94,10 +93,7 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
const handleDelete = async () => {
|
||||
setDeleteError(null);
|
||||
try {
|
||||
// internal#734: erase_data=true asks the server to prune this workspace's
|
||||
// durable data volume (cookies / downloads / memory). Default off keeps it
|
||||
// for the orphan-sweeper grace.
|
||||
await api.del(`/workspaces/${workspaceId}?confirm=true${eraseData ? "&erase_data=true" : ""}`, {
|
||||
await api.del(`/workspaces/${workspaceId}?confirm=true`, {
|
||||
headers: { "X-Confirm-Name": name },
|
||||
});
|
||||
// Mirror the server-side cascade — drop the row + every
|
||||
@@ -327,19 +323,6 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
<h3 id="delete-confirm-title" className="text-xs font-medium text-bad">
|
||||
Confirm deletion
|
||||
</h3>
|
||||
<label className="flex items-start gap-2 text-[11px] text-ink-mid">
|
||||
<input
|
||||
type="checkbox"
|
||||
aria-label="Also erase saved data"
|
||||
checked={eraseData}
|
||||
onChange={(e) => setEraseData(e.target.checked)}
|
||||
className="mt-0.5 h-3.5 w-3.5 accent-red-600"
|
||||
/>
|
||||
<span>
|
||||
Also erase saved data (cookies, downloads, agent memory). Cannot be undone.
|
||||
Unchecked keeps it recoverable briefly.
|
||||
</span>
|
||||
</label>
|
||||
<div className="flex gap-2">
|
||||
<button
|
||||
type="button"
|
||||
@@ -356,7 +339,6 @@ export function DetailsTab({ workspaceId, data }: Props) {
|
||||
onClick={() => {
|
||||
setConfirmDelete(false);
|
||||
setDeleteError(null);
|
||||
setEraseData(false);
|
||||
// Return focus to the trigger so keyboard users aren't stranded
|
||||
deleteButtonRef.current?.focus();
|
||||
}}
|
||||
|
||||
@@ -5,10 +5,9 @@ import React from "react";
|
||||
import { BudgetSection } from "../BudgetSection";
|
||||
import { api } from "@/lib/api";
|
||||
|
||||
// Multi-period budget (#49): the API now returns a `periods` map
|
||||
// (hourly/daily/weekly/monthly), each {limit, spend, remaining} in USD cents.
|
||||
// The UI renders one row per period and PATCHes {budget_limits:{period:cents|null}}.
|
||||
|
||||
// Queue-based mock for the api module. Each api call shifts from the queue.
|
||||
// Tests push with qGet/qPatch and the module-level mockImplementation
|
||||
// reads from the queue.
|
||||
type QueueEntry = { body?: unknown; err?: Error };
|
||||
const apiQueue: QueueEntry[] = [];
|
||||
|
||||
@@ -41,49 +40,45 @@ const WS_ID = "budget-test-ws";
|
||||
function qGet(body: unknown) {
|
||||
apiQueue.push({ body });
|
||||
}
|
||||
|
||||
function qGetErr(status: number, msg: string) {
|
||||
apiQueue.push({ err: new Error(`${msg}: ${status}`) });
|
||||
}
|
||||
|
||||
function qPatch(body: unknown) {
|
||||
apiQueue.push({ body });
|
||||
}
|
||||
|
||||
function qPatchErr(status: number, msg: string) {
|
||||
apiQueue.push({ err: new Error(`${msg}: ${status}`) });
|
||||
}
|
||||
|
||||
type P = { limit: number | null; spend: number; remaining: number | null };
|
||||
|
||||
// makeBudget builds the periods response. Override any subset of periods.
|
||||
function makeBudget(overrides: Partial<Record<"hourly" | "daily" | "weekly" | "monthly", Partial<P>>> = {}) {
|
||||
const blank: P = { limit: null, spend: 0, remaining: null };
|
||||
const mk = (o?: Partial<P>): P => {
|
||||
const p = { ...blank, ...(o ?? {}) };
|
||||
if (p.limit != null && p.remaining == null) p.remaining = p.limit - p.spend;
|
||||
return p;
|
||||
};
|
||||
const periods = {
|
||||
hourly: mk(overrides.hourly),
|
||||
daily: mk(overrides.daily),
|
||||
weekly: mk(overrides.weekly),
|
||||
monthly: mk(overrides.monthly),
|
||||
};
|
||||
function makeBudget(overrides: Partial<{
|
||||
budget_limit: number | null;
|
||||
budget_used: number;
|
||||
budget_remaining: number | null;
|
||||
}> = {}) {
|
||||
return {
|
||||
periods,
|
||||
budget_limit: periods.monthly.limit,
|
||||
monthly_spend: periods.monthly.spend,
|
||||
budget_remaining: periods.monthly.remaining,
|
||||
budget_limit: 10_000,
|
||||
budget_used: 3_500,
|
||||
budget_remaining: 6_500,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("BudgetSection (multi-period)", () => {
|
||||
describe("BudgetSection", () => {
|
||||
describe("loading state", () => {
|
||||
it("shows loading indicator while fetching", async () => {
|
||||
let resolveGet: (v: unknown) => void;
|
||||
vi.mocked(api.get).mockImplementationOnce(
|
||||
async () => new Promise((r) => { resolveGet = r as (v: unknown) => void; }),
|
||||
);
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
expect(screen.getByTestId("budget-loading")).toBeTruthy();
|
||||
|
||||
// Resolve after render to verify state clears
|
||||
resolveGet!(makeBudget());
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-loading")).toBeNull();
|
||||
@@ -94,16 +89,21 @@ describe("BudgetSection (multi-period)", () => {
|
||||
describe("fetch error state", () => {
|
||||
it("shows error message on non-402 fetch failure", async () => {
|
||||
qGetErr(500, "Internal Server Error");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
|
||||
});
|
||||
expect(screen.getByTestId("budget-fetch-error")!.textContent).toContain("500");
|
||||
});
|
||||
|
||||
it("shows the exceeded banner (not a fetch error) on a 402", async () => {
|
||||
it("shows 402 as exceeded banner, not fetch error", async () => {
|
||||
// 402 means the budget limit was hit — different UX from a network/API error.
|
||||
qGetErr(402, "Payment Required");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
@@ -111,105 +111,220 @@ describe("BudgetSection (multi-period)", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("rendering periods", () => {
|
||||
it("renders all four period rows", async () => {
|
||||
qGet(makeBudget());
|
||||
describe("budget loaded — display", () => {
|
||||
it("renders used / limit stats row", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500 }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
for (const k of ["hourly", "daily", "weekly", "monthly"]) {
|
||||
expect(screen.getByTestId(`budget-period-${k}`)).toBeTruthy();
|
||||
}
|
||||
expect(screen.getByTestId("budget-used-value")!.textContent).toBe("3,500");
|
||||
});
|
||||
expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("10,000");
|
||||
});
|
||||
|
||||
it("renders 'Unlimited' when budget_limit is null", async () => {
|
||||
qGet(makeBudget({ budget_limit: null, budget_used: 1_000, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-value")!.textContent).toBe("Unlimited");
|
||||
});
|
||||
});
|
||||
|
||||
it("formats spend and limit as USD per period", async () => {
|
||||
qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$35.00");
|
||||
});
|
||||
expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$100.00");
|
||||
});
|
||||
it("renders remaining credits when present", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: 6_500 }));
|
||||
|
||||
it("shows ∞ for a period with no limit", async () => {
|
||||
qGet(makeBudget({ hourly: { limit: null, spend: 1_000 } }));
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-hourly-limit")!.textContent).toBe("∞");
|
||||
expect(screen.getByTestId("budget-remaining")!.textContent).toContain("6,500");
|
||||
expect(screen.getByTestId("budget-remaining")!.textContent).toContain("credits remaining");
|
||||
});
|
||||
});
|
||||
|
||||
it("renders the progress bar only for periods with a limit", async () => {
|
||||
qGet(makeBudget({ monthly: { limit: 10_000, spend: 12_000 }, hourly: { limit: null, spend: 5_000 } }));
|
||||
it("omits remaining credits when budget_remaining is null", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 3_500, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-monthly-fill")).toBeTruthy();
|
||||
expect(screen.queryByTestId("budget-remaining")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
it("caps progress bar at 100% when used > limit", async () => {
|
||||
// Over-limit: 12000 used of 10000 limit should show 100%, not 120%.
|
||||
qGet(makeBudget({ budget_limit: 10_000, budget_used: 12_000, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
const fill = screen.getByTestId("budget-progress-fill");
|
||||
expect(fill.getAttribute("style")).toContain("100%");
|
||||
});
|
||||
});
|
||||
|
||||
it("omits progress bar when budget_limit is null (unlimited)", async () => {
|
||||
qGet(makeBudget({ budget_limit: null, budget_used: 5_000, budget_remaining: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-progress-fill")).toBeNull();
|
||||
});
|
||||
expect(screen.queryByTestId("budget-hourly-fill")).toBeNull();
|
||||
// over-budget fill caps at 100%
|
||||
const fill = screen.getByTestId("budget-monthly-fill") as HTMLElement;
|
||||
expect(fill.style.width).toBe("100%");
|
||||
});
|
||||
});
|
||||
|
||||
describe("save", () => {
|
||||
it("PATCHes budget_limits for all four periods and clears the exceeded banner", async () => {
|
||||
qGet(makeBudget({ monthly: { limit: 10_000, spend: 3_500 } }));
|
||||
qPatch(makeBudget({ hourly: { limit: 500, spend: 0 }, monthly: { limit: 20_000, spend: 0 } }));
|
||||
describe("budget exceeded (402)", () => {
|
||||
it("shows exceeded banner when load returns 402", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-hourly-input")).toBeTruthy();
|
||||
});
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-hourly-input"), { target: { value: "500" } });
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(vi.mocked(api.patch)).toHaveBeenCalled();
|
||||
});
|
||||
const [, body] = vi.mocked(api.patch).mock.calls[0];
|
||||
expect((body as { budget_limits: Record<string, number | null> }).budget_limits).toMatchObject({
|
||||
hourly: 500,
|
||||
monthly: 10_000, // unchanged input echoes the loaded limit
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-exceeded-banner")!.textContent).toContain("Budget exceeded");
|
||||
});
|
||||
});
|
||||
|
||||
it("shows a save error on non-402 PATCH failure", async () => {
|
||||
it("clears exceeded banner after successful save", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
qPatch(makeBudget({ budget_limit: 50_000, budget_used: 0, budget_remaining: 50_000 }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
|
||||
const input = screen.getByTestId("budget-limit-input");
|
||||
fireEvent.change(input, { target: { value: "50000" } });
|
||||
|
||||
const saveBtn = screen.getByTestId("budget-save-btn");
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("save flow", () => {
|
||||
it("shows save error on non-402 patch failure", async () => {
|
||||
qGet(makeBudget());
|
||||
qPatchErr(500, "Internal Server Error");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
|
||||
});
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
const saveBtn = screen.getByTestId("budget-save-btn");
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-save-error")).toBeTruthy();
|
||||
expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
|
||||
});
|
||||
expect(screen.getByTestId("budget-save-error")!.textContent).toContain("500");
|
||||
});
|
||||
|
||||
it("surfaces the exceeded banner on a 402 PATCH", async () => {
|
||||
qGet(makeBudget());
|
||||
qPatchErr(402, "Payment Required");
|
||||
it("updates input to new limit value after successful save", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000 }));
|
||||
qPatch(makeBudget({ budget_limit: 20_000 }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
// Wait for the input to appear (loading → loaded)
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-save-btn")).toBeTruthy();
|
||||
expect(screen.queryByTestId("budget-loading")).toBeNull();
|
||||
});
|
||||
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
// Debug: check what values are rendered
|
||||
const limitValue = screen.getByTestId("budget-limit-value")?.textContent;
|
||||
expect(input.value).toBe("10000"); // initial value from API
|
||||
expect(limitValue).toBe("10,000");
|
||||
|
||||
fireEvent.change(input, { target: { value: "20000" } });
|
||||
expect(input.value).toBe("20000");
|
||||
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect((screen.getByTestId("budget-limit-input") as HTMLInputElement).value).toBe("20000");
|
||||
});
|
||||
});
|
||||
|
||||
it("sends null when input is cleared (unlimited)", async () => {
|
||||
qGet(makeBudget({ budget_limit: 10_000 }));
|
||||
qPatch(makeBudget({ budget_limit: null }));
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
|
||||
});
|
||||
|
||||
const input = screen.getByTestId("budget-limit-input") as HTMLInputElement;
|
||||
fireEvent.change(input, { target: { value: "" } });
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
await vi.waitFor(() => {
|
||||
// After save with null limit, input should show empty (unlimited)
|
||||
expect(input.value).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
it("shows saving state on button while patch is in flight", async () => {
|
||||
qGet(makeBudget());
|
||||
let resolvePatch: (v: unknown) => void;
|
||||
vi.mocked(api.patch).mockImplementationOnce(
|
||||
async () => new Promise((r) => { resolvePatch = r as (v: unknown) => void; }),
|
||||
);
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-limit-input")).toBeTruthy();
|
||||
});
|
||||
|
||||
fireEvent.change(screen.getByTestId("budget-limit-input"), { target: { value: "50000" } });
|
||||
fireEvent.click(screen.getByTestId("budget-save-btn"));
|
||||
|
||||
const btn = screen.getByTestId("budget-save-btn");
|
||||
expect(btn.textContent).toContain("Saving");
|
||||
|
||||
resolvePatch!(makeBudget({ budget_limit: 50_000 }));
|
||||
await vi.waitFor(() => {
|
||||
expect(btn.textContent).toContain("Save");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("isApiError402 — regression coverage", () => {
|
||||
it("classifies ': 402' with space as 402", async () => {
|
||||
qGetErr(402, "Payment Required");
|
||||
qPatch(makeBudget());
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-exceeded-banner")).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("legacy payload back-compat", () => {
|
||||
it("maps a pre-multi-period {budget_limit, monthly_spend} response to the monthly row", async () => {
|
||||
qGet({ budget_limit: 5_000, monthly_spend: 1_000, budget_remaining: 4_000 });
|
||||
it("classifies non-402 error messages as regular fetch errors", async () => {
|
||||
qGetErr(503, "Service Unavailable");
|
||||
|
||||
render(<BudgetSection workspaceId={WS_ID} />);
|
||||
|
||||
await vi.waitFor(() => {
|
||||
expect(screen.getByTestId("budget-monthly-limit")!.textContent).toBe("$50.00");
|
||||
expect(screen.getByTestId("budget-fetch-error")).toBeTruthy();
|
||||
});
|
||||
expect(screen.getByTestId("budget-monthly-spend")!.textContent).toBe("$10.00");
|
||||
expect(screen.queryByTestId("budget-exceeded-banner")).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,35 +1,255 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// internal#718 P4 closure — ConfigTab.billingMode.test.tsx is retired.
|
||||
// Tests for the provider → llm_billing_mode linkage (internal#703 Gap 2).
|
||||
//
|
||||
// This suite (255 lines, 8 tests) pinned the canvas-side provider →
|
||||
// llm_billing_mode linkage from internal#703 Gap 2: when the operator
|
||||
// changed the PROVIDER in the Config tab, ConfigTab.handleSave would
|
||||
// PUT /admin/workspaces/:id/llm-billing-mode so the platform-vs-byok
|
||||
// decision tracked the dropdown.
|
||||
// What this pins: when the operator changes the PROVIDER in the Config
|
||||
// tab, the workspace's llm_billing_mode must follow — a non-Platform
|
||||
// provider sets billing_mode=byok; Platform sets platform_managed. Before
|
||||
// this wiring, selecting "Claude Code subscription (OAuth)" or any vendor
|
||||
// key wrote the credential env but left billing_mode=platform_managed, so
|
||||
// CP kept injecting the platform proxy base URL and the OAuth token /
|
||||
// vendor key was never used — BYOK silently no-op'd (the live jrs-auto
|
||||
// SEO-Agent symptom in #703).
|
||||
//
|
||||
// That linkage is retired together with the LLM_PROVIDER override flow
|
||||
// (see ConfigTab.provider.test.tsx retirement note). P2-B (#1972)
|
||||
// moved the platform-vs-byok decision to
|
||||
// `ResolveLLMBillingModeDerived(runtime, model, authEnv)` in
|
||||
// workspace-server — the canvas can no longer override it via the
|
||||
// provider dropdown, by design. The runtime+model selection IS the
|
||||
// billing-mode selection now.
|
||||
//
|
||||
// The `/admin/workspaces/:id/llm-billing-mode` endpoint still exists
|
||||
// as the operator override surface (`workspaces.llm_billing_mode`
|
||||
// column); it is no longer driven by the provider dropdown.
|
||||
// Coverage for the derived billing flow lives in
|
||||
// workspace-server/internal/handlers/llm_billing_mode_derived_test.go.
|
||||
//
|
||||
// Restore from git history if the canvas-side provider→billing linkage
|
||||
// needs to be revisited (it should not — the derived resolver is the
|
||||
// single decision point).
|
||||
// The billing-mode PUT targets the same per-tenant endpoint the LLM
|
||||
// Billing section uses: PUT /admin/workspaces/:id/llm-billing-mode with
|
||||
// body {mode: "byok" | "platform_managed"}.
|
||||
|
||||
import { describe, it } from "vitest";
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
describe("ConfigTab — provider → llm_billing_mode linkage (retired internal#718 P4)", () => {
|
||||
it.skip("LLM_PROVIDER → billing_mode wiring is retired; see file header for the replacement coverage", () => {
|
||||
// intentionally empty
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const storeUpdateNodeData = vi.fn();
|
||||
const storeRestartWorkspace = vi.fn();
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) =>
|
||||
selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
|
||||
{
|
||||
getState: () => ({
|
||||
restartWorkspace: storeRestartWorkspace,
|
||||
updateNodeData: storeUpdateNodeData,
|
||||
}),
|
||||
},
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab, billingModeForProvider } from "../ConfigTab";
|
||||
|
||||
function wireApi(opts: { providerValue?: string | "missing" }) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: "hermes" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: "nousresearch/hermes-4-70b" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/provider`) {
|
||||
if (opts.providerValue === "missing") return Promise.reject(new Error("404"));
|
||||
return Promise.resolve({
|
||||
provider: opts.providerValue ?? "",
|
||||
source: opts.providerValue ? "workspace_secrets" : "default",
|
||||
});
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
return Promise.resolve({ content: "name: ws\nruntime: hermes\n" });
|
||||
}
|
||||
if (path === "/templates") return Promise.resolve([]);
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
function billingModeCalls() {
|
||||
return apiPut.mock.calls.filter(
|
||||
([path]) => path === "/admin/workspaces/ws-test/llm-billing-mode",
|
||||
);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
storeUpdateNodeData.mockReset();
|
||||
storeRestartWorkspace.mockReset();
|
||||
});
|
||||
|
||||
describe("billingModeForProvider — pure mapping (internal#703 Gap 2)", () => {
|
||||
// Platform / empty → platform_managed. Empty means "no explicit
|
||||
// override → inherit", which resolves to platform on the backend, so
|
||||
// it must NOT flip the workspace into byok.
|
||||
it("maps Platform and empty to platform_managed", () => {
|
||||
expect(billingModeForProvider("platform")).toBe("platform_managed");
|
||||
expect(billingModeForProvider("")).toBe("platform_managed");
|
||||
expect(billingModeForProvider(" ")).toBe("platform_managed");
|
||||
expect(billingModeForProvider("PLATFORM")).toBe("platform_managed");
|
||||
});
|
||||
|
||||
// Every non-Platform provider → byok. If this regresses to returning
|
||||
// platform_managed for a vendor, BYOK silently no-ops again (#703).
|
||||
it("maps non-Platform providers to byok", () => {
|
||||
expect(billingModeForProvider("anthropic-oauth")).toBe("byok"); // Claude Code subscription
|
||||
expect(billingModeForProvider("anthropic")).toBe("byok"); // Anthropic API key
|
||||
expect(billingModeForProvider("minimax")).toBe("byok");
|
||||
expect(billingModeForProvider("openrouter")).toBe("byok");
|
||||
expect(billingModeForProvider("openai")).toBe("byok");
|
||||
});
|
||||
});
|
||||
|
||||
describe("ConfigTab — provider change drives billing_mode (internal#703 Gap 2)", () => {
|
||||
// The core fix: picking a non-Platform provider (here "anthropic-oauth"
|
||||
// = Claude Code subscription OAuth) from a fresh/empty provider must
|
||||
// PUT mode=byok to the per-tenant llm-billing-mode endpoint. This is
|
||||
// the exact path that was missing — the credential env saved but the
|
||||
// billing mode never followed, so the proxy stayed engaged.
|
||||
it("PUTs mode=byok when switching to a non-Platform provider", async () => {
|
||||
wireApi({ providerValue: "" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = billingModeCalls();
|
||||
expect(calls.length).toBe(1);
|
||||
expect(calls[0][1]).toEqual({ mode: "byok" });
|
||||
});
|
||||
// Provider credential PUT still happens too (independent endpoint).
|
||||
expect(
|
||||
apiPut.mock.calls.some(([path]) => path === "/workspaces/ws-test/provider"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
// Switching FROM a byok provider back TO Platform must PUT
|
||||
// mode=platform_managed so the workspace re-engages the proxy and stops
|
||||
// expecting a (now-absent) vendor key.
|
||||
it("PUTs mode=platform_managed when switching back to Platform", async () => {
|
||||
wireApi({ providerValue: "anthropic-oauth" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("anthropic-oauth"));
|
||||
fireEvent.change(input, { target: { value: "platform" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
const calls = billingModeCalls();
|
||||
expect(calls.length).toBe(1);
|
||||
expect(calls[0][1]).toEqual({ mode: "platform_managed" });
|
||||
});
|
||||
});
|
||||
|
||||
// Changing between two BYOK vendors (minimax → openrouter) keeps
|
||||
// billing_mode=byok — the implied mode is unchanged, so re-PUTing it
|
||||
// would be a wasteful no-op that risks an extra restart. Must NOT fire.
|
||||
it("does NOT PUT billing-mode when the implied mode is unchanged", async () => {
|
||||
wireApi({ providerValue: "minimax" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("minimax"));
|
||||
fireEvent.change(input, { target: { value: "openrouter" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
// Provider PUT fires (vendor changed)...
|
||||
expect(
|
||||
apiPut.mock.calls.some(([path]) => path === "/workspaces/ws-test/provider"),
|
||||
).toBe(true);
|
||||
});
|
||||
// ...but billing-mode does NOT (byok → byok is a no-op).
|
||||
expect(billingModeCalls().length).toBe(0);
|
||||
});
|
||||
|
||||
// A Save that doesn't touch the provider must not PUT billing-mode —
|
||||
// editing tier/name shouldn't disturb the workspace's billing mode.
|
||||
it("does NOT PUT billing-mode on a Save that leaves provider unchanged", async () => {
|
||||
wireApi({ providerValue: "anthropic-oauth" });
|
||||
apiPut.mockResolvedValue({ status: "saved" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await screen.findByTestId("provider-input");
|
||||
|
||||
// Dirty an unrelated field so Save is enabled.
|
||||
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
// Some PUT may fire (e.g. /model); just assert billing-mode did not.
|
||||
expect(billingModeCalls().length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// If the provider credential PUT itself fails, we must NOT set byok —
|
||||
// flipping billing_mode while the credential write failed would leave
|
||||
// the workspace expecting a key it doesn't have (worse than no-op).
|
||||
it("does NOT PUT billing-mode when the provider PUT fails", async () => {
|
||||
wireApi({ providerValue: "" });
|
||||
apiPut.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-test/provider") return Promise.reject(new Error("boom"));
|
||||
return Promise.resolve({ status: "saved" });
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
// The provider-failure error is surfaced (getByText throws if absent).
|
||||
expect(screen.getByText(/provider update failed/i)).toBeTruthy();
|
||||
});
|
||||
expect(billingModeCalls().length).toBe(0);
|
||||
});
|
||||
|
||||
// If the credential saved but the billing-mode PUT is rejected, the
|
||||
// user must be warned that BYOK may not take — a silent failure here
|
||||
// is precisely the #703 symptom we're fixing.
|
||||
it("surfaces an error when billing-mode PUT fails after a successful provider save", async () => {
|
||||
wireApi({ providerValue: "" });
|
||||
apiPut.mockImplementation((path: string) => {
|
||||
if (path === "/admin/workspaces/ws-test/llm-billing-mode") {
|
||||
return Promise.reject(new Error("403 forbidden"));
|
||||
}
|
||||
return Promise.resolve({ status: "saved" });
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
fireEvent.change(input, { target: { value: "anthropic-oauth" } });
|
||||
|
||||
fireEvent.click(screen.getByRole("button", { name: /^save$/i }));
|
||||
|
||||
await waitFor(() => {
|
||||
expect(screen.getByText(/switching billing mode failed/i)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// Regression: project_canvas_runtime_dropdown_ssot_fix — a google-adk
|
||||
// workspace's Config tab showed the wrong runtime ("LangGraph (default)"
|
||||
// / first option) because a hardcoded frontend allowlist
|
||||
// (SUPPORTED_RUNTIME_VALUES) dropped google-adk from the /templates-derived
|
||||
// options even though the backend served it. A Save from that state would
|
||||
// PATCH runtime to the wrong value and break the ADK agent.
|
||||
//
|
||||
// The fix: the dropdown is SSOT-driven — it trusts GET /templates (which the
|
||||
// backend already gates to the manifest maintained set) and hides a runtime
|
||||
// only when its row carries `displayable: false`. This pins: a google-adk
|
||||
// workspace shows "google-adk" selected, and a displayable:false template is
|
||||
// not offered.
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }),
|
||||
{ getState: () => ({ restartWorkspace: vi.fn(), updateNodeData: vi.fn() }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
function wireApi(templates: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; displayable?: boolean }>) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === "/workspaces/ws-adk") return Promise.resolve({ runtime: "google-adk" });
|
||||
if (path === "/workspaces/ws-adk/model") return Promise.resolve({ model: "vertex:gemini-2.5-pro" });
|
||||
if (path === "/workspaces/ws-adk/files/config.yaml") return Promise.resolve({ content: "name: adk\nruntime: google-adk\n" });
|
||||
if (path === "/templates") return Promise.resolve(templates);
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — google-adk runtime (SSOT dropdown)", () => {
|
||||
it("shows google-adk selected in the runtime dropdown (#ssot-fix)", async () => {
|
||||
wireApi([
|
||||
{ id: "claude-code", name: "Claude Code", runtime: "claude-code", models: [] },
|
||||
{ id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
|
||||
]);
|
||||
render(<ConfigTab workspaceId="ws-adk" />);
|
||||
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
|
||||
expect((select as HTMLSelectElement).value).toBe("google-adk");
|
||||
const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
|
||||
expect(opts).toContain("google-adk");
|
||||
});
|
||||
|
||||
it("hides a template flagged displayable:false", async () => {
|
||||
wireApi([
|
||||
{ id: "google-adk", name: "Google ADK", runtime: "google-adk", models: [] },
|
||||
{ id: "legacy", name: "Legacy", runtime: "legacy", models: [], displayable: false },
|
||||
]);
|
||||
render(<ConfigTab workspaceId="ws-adk" />);
|
||||
const select = await waitFor(() => screen.getByRole("combobox", { name: /runtime/i }));
|
||||
const opts = Array.from((select as HTMLSelectElement).options).map((o) => o.value);
|
||||
expect(opts).toContain("google-adk");
|
||||
expect(opts).not.toContain("legacy");
|
||||
});
|
||||
});
|
||||
@@ -1,45 +1,574 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// internal#718 P4 closure — ConfigTab.provider.test.tsx is retired.
|
||||
// Regression tests for ConfigTab Provider override (Option B PR-5).
|
||||
//
|
||||
// This 574-line suite exercised the canvas-side LLM provider override
|
||||
// flow: load the existing override from GET /workspaces/:id/provider,
|
||||
// edit the dropdown, Save → PUT /workspaces/:id/provider, and the
|
||||
// provider→billing_mode linkage on Save. All three server endpoints
|
||||
// behind those flows are retired in internal#718 P4 closure:
|
||||
// What this pins: a free-text Provider combobox in the Runtime section
|
||||
// that lets the operator override the model→provider derivation hermes-
|
||||
// agent does internally. Without this UI, a fresh signup whose Hermes
|
||||
// workspace defaults to a model with no clean vendor prefix (e.g.
|
||||
// `nousresearch/hermes-4-70b`) hits the runtime's own preflight error:
|
||||
// "No LLM provider configured. Run `hermes model` to select a
|
||||
// provider, or run `hermes setup` for first-time configuration."
|
||||
// — even though tasks #195-198 wired the entire downstream pipe so a
|
||||
// non-empty provider WOULD flow through canvas → workspace-server →
|
||||
// CP user-data → workspace config.yaml → hermes adapter.
|
||||
//
|
||||
// - workspace-server SetProvider / GetProvider (PUT/GET
|
||||
// /workspaces/:id/provider) → both return 410 Gone with a
|
||||
// PROVIDER_ENDPOINT_RETIRED structured body.
|
||||
// - workspace-server setProviderSecret (the writer into
|
||||
// workspace_secrets.LLM_PROVIDER) — removed; row never written.
|
||||
// - The LLM_PROVIDER workspace_secret itself — migrated away in
|
||||
// 20260528000000_drop_llm_provider_workspace_secret.up.sql.
|
||||
// Hongming Wang hit this on hongming.moleculesai.app at signup
|
||||
// 2026-05-01T17:35Z. Backend PRs were green, the gap was the missing
|
||||
// UI to set the value.
|
||||
//
|
||||
// ConfigTab still renders the provider dropdown for display (the user
|
||||
// can preview the derived provider locally), but Save no longer
|
||||
// round-trips the value. The replacement contract is that the provider
|
||||
// is DERIVED at every decision point from (runtime, model) via the
|
||||
// registry — see internal/providers/derive_provider.go.
|
||||
//
|
||||
// The original suite's coverage is replaced by:
|
||||
//
|
||||
// - workspace-server: TestPutProvider_410Gone +
|
||||
// TestGetProvider_410Gone + TestProviderEndpointGone_BodyShape in
|
||||
// internal/handlers/llm_provider_removal_p4_test.go.
|
||||
// - workspace-server: TestWorkspaceCreate_FirstDeploy_OnlyPersistsMODEL
|
||||
// in internal/handlers/workspace_provision_shared_test.go.
|
||||
// - registry: TestDeriveProvider_RealManifest in
|
||||
// internal/providers/derive_provider_test.go.
|
||||
//
|
||||
// Restore from git history if any aspect of the legacy LLM_PROVIDER
|
||||
// flow needs to be revisited (it should not — the retirement is
|
||||
// permanent).
|
||||
// Each test pins one invariant. If any fails, the bug is back.
|
||||
|
||||
import { describe, it } from "vitest";
|
||||
import { describe, it, expect, vi, afterEach, beforeEach } from "vitest";
|
||||
import { render, screen, cleanup, waitFor, fireEvent } from "@testing-library/react";
|
||||
import React from "react";
|
||||
|
||||
describe("ConfigTab provider override — retired (internal#718 P4)", () => {
|
||||
it.skip("LLM_PROVIDER override flow is retired; see file header for the replacement coverage", () => {
|
||||
// intentionally empty
|
||||
afterEach(cleanup);
|
||||
|
||||
const apiGet = vi.fn();
|
||||
const apiPatch = vi.fn();
|
||||
const apiPut = vi.fn();
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
get: (path: string) => apiGet(path),
|
||||
patch: (path: string, body: unknown) => apiPatch(path, body),
|
||||
put: (path: string, body: unknown) => apiPut(path, body),
|
||||
post: vi.fn(),
|
||||
del: vi.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Shared store stub — `updateNodeData` is exposed so a test can assert the
|
||||
// node-data flush happens after a successful PATCH (regression: previously
|
||||
// the DB updated but the canvas badge stayed stale until full hydrate).
|
||||
const storeUpdateNodeData = vi.fn();
|
||||
const storeRestartWorkspace = vi.fn();
|
||||
vi.mock("@/store/canvas", () => ({
|
||||
useCanvasStore: Object.assign(
|
||||
(selector: (s: unknown) => unknown) => selector({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }),
|
||||
{ getState: () => ({ restartWorkspace: storeRestartWorkspace, updateNodeData: storeUpdateNodeData }) },
|
||||
),
|
||||
}));
|
||||
|
||||
vi.mock("../AgentCardSection", () => ({
|
||||
AgentCardSection: () => <div data-testid="agent-card-stub" />,
|
||||
}));
|
||||
|
||||
import { ConfigTab } from "../ConfigTab";
|
||||
|
||||
// wireApi — same shape as ConfigTab.hermes.test.tsx, extended with the
|
||||
// /provider endpoint. Each test sets `providerValue` to the value the
|
||||
// GET endpoint returns; "missing" means the endpoint rejects (older
|
||||
// workspace-server pre-PR-2 — must not crash the tab).
|
||||
function wireApi(opts: {
|
||||
workspaceRuntime?: string;
|
||||
workspaceModel?: string;
|
||||
configYamlContent?: string | null;
|
||||
templates?: Array<{ id: string; name?: string; runtime?: string; models?: unknown[]; providers?: string[] }>;
|
||||
providerValue?: string | "missing";
|
||||
}) {
|
||||
apiGet.mockImplementation((path: string) => {
|
||||
if (path === `/workspaces/ws-test`) {
|
||||
return Promise.resolve({ runtime: opts.workspaceRuntime ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/model`) {
|
||||
return Promise.resolve({ model: opts.workspaceModel ?? "" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/provider`) {
|
||||
if (opts.providerValue === "missing") {
|
||||
return Promise.reject(new Error("404"));
|
||||
}
|
||||
return Promise.resolve({ provider: opts.providerValue ?? "", source: opts.providerValue ? "workspace_secrets" : "default" });
|
||||
}
|
||||
if (path === `/workspaces/ws-test/files/config.yaml`) {
|
||||
if (opts.configYamlContent === null) return Promise.reject(new Error("not found"));
|
||||
return Promise.resolve({ content: opts.configYamlContent ?? "" });
|
||||
}
|
||||
if (path === "/templates") {
|
||||
return Promise.resolve(opts.templates ?? []);
|
||||
}
|
||||
return Promise.reject(new Error(`unmocked api.get: ${path}`));
|
||||
});
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
apiGet.mockReset();
|
||||
apiPatch.mockReset();
|
||||
apiPut.mockReset();
|
||||
storeUpdateNodeData.mockReset();
|
||||
storeRestartWorkspace.mockReset();
|
||||
});
|
||||
|
||||
describe("ConfigTab — Provider override (Option B PR-5)", () => {
|
||||
// Empty provider on load is the legitimate default ("auto-derive
|
||||
// from model slug prefix"), NOT an error. The endpoint returning
|
||||
// {provider: "", source: "default"} is the documented happy-path
|
||||
// shape — if the form treated that as "load failed" we'd lose the
|
||||
// ability to render the input at all on fresh workspaces.
|
||||
it("renders an empty Provider input when no override is set", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
expect((input as HTMLInputElement).value).toBe("");
|
||||
});
|
||||
|
||||
// Pre-existing override loads back into the field on mount. Without
|
||||
// this, an operator who set provider=openrouter yesterday would see
|
||||
// the field blank today, conclude the value didn't stick, and
|
||||
// re-save — the resulting PUT-with-same-value would auto-restart
|
||||
// the workspace for nothing.
|
||||
it("loads an existing provider override from the server", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
|
||||
});
|
||||
|
||||
// Old workspace-server (pre-PR-2) returns a 404 on /provider. The
|
||||
// tab must keep loading — the fallback is "" (auto-derive), same as
|
||||
// a fresh workspace.
|
||||
it("falls back to empty provider when the endpoint is missing", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "missing",
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
expect((input as HTMLInputElement).value).toBe("");
|
||||
// Tab should be fully rendered, not stuck in loading or error state.
|
||||
expect(screen.queryByText(/Loading config/i)).toBeNull();
|
||||
});
|
||||
|
||||
// Setting a value + Save must PUT to the right endpoint with the
|
||||
// right body shape. Server-side handler (workspace-server
|
||||
// handlers/secrets.go:SetProvider) reads body.provider — any other
|
||||
// key gets silently ignored and the workspace_secrets row stays
|
||||
// unset. This regression would manifest as "Save → Restart →
|
||||
// workspace still says No LLM provider configured."
|
||||
it("PUTs the new provider to /workspaces/:id/provider on Save", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
});
|
||||
apiPut.mockResolvedValue({ status: "saved", provider: "anthropic" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
|
||||
fireEvent.change(input, { target: { value: "anthropic" } });
|
||||
expect((input as HTMLInputElement).value).toBe("anthropic");
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(1);
|
||||
expect(providerCalls[0][1]).toEqual({ provider: "anthropic" });
|
||||
});
|
||||
});
|
||||
|
||||
// No-change Save must NOT PUT /provider. The server-side SetProvider
|
||||
// auto-restarts the workspace on every successful PUT — re-writing
|
||||
// an unchanged value would cost the user a ~30s reboot every time
|
||||
// they tweak some other field.
|
||||
it("does not PUT /provider when the value is unchanged", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\ntier: 2\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
await screen.findByTestId("provider-input");
|
||||
|
||||
// Click Save without touching the provider field. Trigger another
|
||||
// dirty-marker (tier change) so Save is enabled — the test is
|
||||
// about NOT touching /provider, not about Save being disabled.
|
||||
const tierSelect = screen.getByLabelText(/tier/i) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
// Some PUT(s) may fire (e.g. /model). Just assert /provider is NOT among them.
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
// The dropdown's suggestion list MUST come from the runtime's own
|
||||
// template (via /templates → runtime_config.providers), not a
|
||||
// hardcoded canvas-side enum. This is the "Native + pluggable
|
||||
// runtime" invariant: a new runtime declaring its own provider
|
||||
// taxonomy in its config.yaml gets a working dropdown without ANY
|
||||
// canvas-side change.
|
||||
//
|
||||
// Pinned by checking that suggestions surfaced in the datalist
|
||||
// exactly mirror what the templates endpoint returned for the
|
||||
// matching runtime. If a future contributor reintroduces a
|
||||
// PROVIDER_SUGGESTIONS-style hardcoded list and the datalist
|
||||
// contents don't follow the template, this test fails.
|
||||
it("populates the provider datalist from the matched runtime's templates entry", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "nousresearch/hermes-4-70b",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [],
|
||||
// The provider list every runtime adapter ships in its own
|
||||
// config.yaml. Canvas must surface THIS, not its own list.
|
||||
providers: ["nous", "openrouter", "anthropic", "minimax-cn"],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
const listId = (input as HTMLInputElement).getAttribute("list");
|
||||
expect(listId).toBeTruthy();
|
||||
await waitFor(() => {
|
||||
const datalist = document.getElementById(listId!);
|
||||
expect(datalist).not.toBeNull();
|
||||
const optionValues = Array.from(datalist!.querySelectorAll("option")).map(
|
||||
(o) => (o as HTMLOptionElement).value,
|
||||
);
|
||||
// Order matters — most-common-first is part of the contract so
|
||||
// the demo flow lands on a working choice without scrolling.
|
||||
expect(optionValues).toEqual(["nous", "openrouter", "anthropic", "minimax-cn"]);
|
||||
});
|
||||
});
|
||||
|
||||
// Fallback path: when a template hasn't migrated to the explicit
|
||||
// `providers:` field yet, suggestions are derived from model slug
|
||||
// prefixes. Still adapter-driven (the slugs come from the template's
|
||||
// `models:` list), just inferred. This keeps existing templates
|
||||
// working while the platform team migrates them one at a time.
|
||||
it("renders vendor-grouped provider dropdown when template ships models", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "anthropic/claude-opus-4-7",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [
|
||||
{ id: "anthropic/claude-opus-4-7", required_env: ["ANTHROPIC_API_KEY"] },
|
||||
{ id: "openai/gpt-4o", required_env: ["OPENROUTER_API_KEY"] },
|
||||
{ id: "anthropic/claude-sonnet-4-5", required_env: ["ANTHROPIC_API_KEY"] }, // dup vendor — must dedupe
|
||||
{ id: "nousresearch/hermes-4-70b", required_env: ["HERMES_API_KEY"] },
|
||||
],
|
||||
// No `providers:` field → ProviderModelSelector derives vendors
|
||||
// from model id prefixes via its own buildProviderCatalog.
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
// With models present, the new vendor-aware dropdown renders.
|
||||
// Provider entries dedupe by vendor → 3 unique vendors here
|
||||
// (anthropic, openai, nousresearch).
|
||||
const select = await screen.findByTestId("provider-select") as HTMLSelectElement;
|
||||
await waitFor(() => {
|
||||
const optionTexts = Array.from(select.options)
|
||||
.map((o) => o.text)
|
||||
.filter((t) => !t.startsWith("—")); // strip placeholder
|
||||
// Labels are vendor display names, but vendor identity is what
|
||||
// matters for dedupe. Assert each expected vendor surfaces once.
|
||||
expect(optionTexts.some((t) => t.startsWith("Anthropic API"))).toBe(true);
|
||||
expect(optionTexts.some((t) => t.startsWith("OpenAI"))).toBe(true);
|
||||
expect(optionTexts.some((t) => t.startsWith("Nous Research"))).toBe(true);
|
||||
expect(optionTexts.length).toBe(3); // dedupe pin
|
||||
});
|
||||
});
|
||||
|
||||
// Empty string is a legitimate save target — it clears the override
|
||||
// (the server-side endpoint deletes the workspace_secrets row).
|
||||
// Operators who picked "anthropic" yesterday and want to revert to
|
||||
// auto-derive today should be able to do so by clearing the field
|
||||
// and clicking Save. Without this PUT path, the only way to clear
|
||||
// would be a direct DB edit.
|
||||
it("PUTs an empty string when the operator clears a previously-set provider", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "anthropic:claude-opus-4-7",
|
||||
configYamlContent: "name: ws\nruntime: hermes\n",
|
||||
providerValue: "openrouter",
|
||||
});
|
||||
apiPut.mockResolvedValue({ status: "cleared" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const input = await screen.findByTestId("provider-input");
|
||||
await waitFor(() => expect((input as HTMLInputElement).value).toBe("openrouter"));
|
||||
|
||||
fireEvent.change(input, { target: { value: "" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const providerCalls = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/provider");
|
||||
expect(providerCalls.length).toBe(1);
|
||||
expect(providerCalls[0][1]).toEqual({ provider: "" });
|
||||
});
|
||||
});
|
||||
|
||||
// Display-vs-storage drift regression (2026-05-03 incident, workspace
|
||||
// e13aebd8…). User deployed claude-code with MiniMax-M2 stored in
|
||||
// MODEL_PROVIDER. The container env (MODEL=MiniMax-M2) and chat
|
||||
// worked correctly, but the Config tab showed "Claude Code
|
||||
// subscription / Claude Sonnet (OAuth)" — i.e. the template's
|
||||
// runtime_config.model: sonnet default — because currentModelId
|
||||
// reads runtime_config.model first and loadConfig was overriding
|
||||
// only the top-level config.model field. The merged shape was:
|
||||
// { model: "MiniMax-M2", runtime_config: { model: "sonnet" } }
|
||||
// and currentModelId picked "sonnet". Fix: loadConfig propagates
|
||||
// wsMetadataModel into BOTH places so the form is a single source
|
||||
// of truth (DB-backed MODEL_PROVIDER). Pinning the merged-path
|
||||
// branch with the exact reproducing shape: claude-code template
|
||||
// YAML has runtime_config.model: sonnet; live workspace's
|
||||
// MODEL_PROVIDER is MiniMax-M2; tab must show the latter.
|
||||
it("prefers MODEL_PROVIDER over the template's runtime_config.model on load", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [
|
||||
{ id: "sonnet", name: "Claude Sonnet (OAuth)", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
|
||||
{ id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
|
||||
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const modelSelect = (await screen.findByTestId("model-select")) as HTMLSelectElement;
|
||||
await waitFor(() => expect(modelSelect.value).toBe("MiniMax-M2"));
|
||||
|
||||
// Provider dropdown should also reflect MiniMax (back-derived from
|
||||
// the model slug since LLM_PROVIDER is unset). Without the fix,
|
||||
// the selector falls back to the first catalog entry whose first
|
||||
// model matches "sonnet" → anthropic-oauth bucket → "Claude Code
|
||||
// subscription".
|
||||
const providerSelect = screen.getByTestId("provider-select") as HTMLSelectElement;
|
||||
const selectedOption = providerSelect.options[providerSelect.selectedIndex];
|
||||
expect(selectedOption.textContent ?? "").toMatch(/MiniMax/);
|
||||
});
|
||||
|
||||
// Sibling pin to the display-fix above. The display fix mirrors
|
||||
// wsMetadataModel into runtime_config.model so the selector renders
|
||||
// the live value; that mirror means handleSave's old YAML-vs-form
|
||||
// diff would always be non-zero on a no-op save (YAML default
|
||||
// "sonnet" vs. mirrored "MiniMax-M2") and PUT /model — which
|
||||
// server-side SetModel chains into an auto-restart. handleSave now
|
||||
// diffs against the loaded MODEL_PROVIDER instead. Pin: an
|
||||
// unrelated edit (tier change) must NOT touch /model when the
|
||||
// model itself didn't change.
|
||||
it("does not PUT /model on a no-op save when only an unrelated field changed", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [
|
||||
{ id: "sonnet", name: "Claude Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] },
|
||||
{ id: "MiniMax-M2", name: "MiniMax M2", required_env: ["MINIMAX_API_KEY"] },
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
const tierPatches = apiPatch.mock.calls.filter(([path, body]) =>
|
||||
path === "/workspaces/ws-test" && (body as { tier?: number }).tier === 3,
|
||||
);
|
||||
expect(tierPatches.length).toBe(1);
|
||||
});
|
||||
// Spurious /model PUT would fire here without the originalModel
|
||||
// diff baseline. The model itself didn't change, so /model must
|
||||
// stay untouched (otherwise SetModel auto-restarts).
|
||||
const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
|
||||
expect(modelPuts.length).toBe(0);
|
||||
});
|
||||
|
||||
// Save-then-stale-badge regression (2026-05-03 incident). User
|
||||
// selected T3 in the Tier dropdown, hit Save & Restart, the workspace
|
||||
// PATCH succeeded (`tier: 3` in DB), but the canvas header pill kept
|
||||
// showing "TIER T2" until a full hydrate. Root cause: handleSave
|
||||
// sent the PATCH to workspace-server but never pushed the same
|
||||
// change into useCanvasStore.updateNodeData, so every UI surface
|
||||
// reading from the store kept its stale value. Pin: a successful
|
||||
// tier PATCH must mirror into the store so the badge updates
|
||||
// synchronously with the response.
|
||||
it("flushes the dbPatch into useCanvasStore.updateNodeData after a successful PATCH", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPatch.mockResolvedValue({ status: "updated" });
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
|
||||
});
|
||||
// Without the store flush, the badge would keep reading tier=2
|
||||
// from useCanvasStore.nodes until a full hydrate. Pin: handleSave
|
||||
// pushes the same fields it PATCHed.
|
||||
expect(storeUpdateNodeData).toHaveBeenCalledWith(
|
||||
"ws-test",
|
||||
expect.objectContaining({ tier: 3 }),
|
||||
);
|
||||
});
|
||||
|
||||
// Failure-gating sibling pin to the store-flush test above. The
|
||||
// production code places `updateNodeData` AFTER `await api.patch(...)`
|
||||
// inside the same `if (Object.keys(dbPatch).length > 0)` block, so a
|
||||
// PATCH rejection should throw before the store call. Without this
|
||||
// pin, a future refactor that wraps the PATCH in try/catch and
|
||||
// unconditionally calls updateNodeData would ship green — and then
|
||||
// the badge would lie when the server actually rejected the change.
|
||||
// Codified review feedback from PR #2545 (Agent 2).
|
||||
it("does NOT flush into useCanvasStore.updateNodeData when the PATCH rejects", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "claude-code",
|
||||
workspaceModel: "MiniMax-M2",
|
||||
configYamlContent: "name: ws\nruntime: claude-code\ntier: 2\nruntime_config:\n model: sonnet\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code",
|
||||
runtime: "claude-code",
|
||||
models: [{ id: "sonnet", name: "Sonnet", required_env: ["CLAUDE_CODE_OAUTH_TOKEN"] }],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPatch.mockRejectedValue(new Error("500 from workspace-server"));
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
// Wait for handleSave to settle (succeeds-or-fails). PATCH must
|
||||
// have been attempted; the error swallow inside handleSave keeps
|
||||
// saving=false in finally.
|
||||
await waitFor(() => {
|
||||
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
|
||||
});
|
||||
// Critically: the store must NOT have been told about the failed
|
||||
// change. Otherwise the badge would lie about a write the server
|
||||
// rejected.
|
||||
const tierFlushes = storeUpdateNodeData.mock.calls.filter(([, body]) =>
|
||||
typeof (body as { tier?: number }).tier === "number",
|
||||
);
|
||||
expect(tierFlushes.length).toBe(0);
|
||||
});
|
||||
|
||||
// Pin the hermes/pre-#240 edge case: workspace where MODEL_PROVIDER
|
||||
// was never written but YAML has runtime_config.model: "something".
|
||||
// originalModel must reflect the rendered baseline (the YAML value),
|
||||
// not the empty MODEL_PROVIDER, so an unrelated save (tier change)
|
||||
// doesn't fire a /model PUT and trigger an auto-restart. Codified
|
||||
// review feedback from PR #2545 (Agent 1, "Important").
|
||||
it("does not PUT /model when MODEL_PROVIDER is empty and the user only edited an unrelated field", async () => {
|
||||
wireApi({
|
||||
workspaceRuntime: "hermes",
|
||||
workspaceModel: "", // legacy workspace — never went through the picker
|
||||
configYamlContent:
|
||||
"name: ws\nruntime: hermes\ntier: 2\nruntime_config:\n model: nousresearch/hermes-4-70b\n",
|
||||
providerValue: "",
|
||||
templates: [
|
||||
{
|
||||
id: "hermes",
|
||||
name: "Hermes",
|
||||
runtime: "hermes",
|
||||
models: [{ id: "nousresearch/hermes-4-70b", name: "Hermes 4 70B", required_env: ["HERMES_API_KEY"] }],
|
||||
providers: ["nous"],
|
||||
},
|
||||
],
|
||||
});
|
||||
apiPut.mockResolvedValue({});
|
||||
apiPatch.mockResolvedValue({});
|
||||
|
||||
render(<ConfigTab workspaceId="ws-test" />);
|
||||
const tierSelect = (await screen.findByLabelText(/tier/i)) as HTMLSelectElement;
|
||||
fireEvent.change(tierSelect, { target: { value: "3" } });
|
||||
|
||||
const saveBtn = screen.getByRole("button", { name: /^save$/i });
|
||||
fireEvent.click(saveBtn);
|
||||
|
||||
await waitFor(() => {
|
||||
expect(apiPatch.mock.calls.some(([p]) => p === "/workspaces/ws-test")).toBe(true);
|
||||
});
|
||||
const modelPuts = apiPut.mock.calls.filter(([path]) => path === "/workspaces/ws-test/model");
|
||||
expect(modelPuts.length).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
// @vitest-environment jsdom
|
||||
//
|
||||
// internal#718 P3 (retire-list #5) — the billing-mode the Config tab shows /
|
||||
// sends must reflect the DERIVED provider per the registry, not the hardcoded
|
||||
// billingModeForProvider("" | "platform" → platform_managed else byok) rule.
|
||||
// When the runtime is registry-backed, billingModeForSelectedProvider reads the
|
||||
// registry-served billing_mode off the provider catalog entry. The hardcoded
|
||||
// rule remains only as the fallback for non-registry runtimes / older backends.
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { billingModeForSelectedProvider, billingModeForProvider } from "../ConfigTab";
|
||||
import {
|
||||
buildProviderCatalogFromRegistry,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "../../ProviderModelSelector";
|
||||
|
||||
const REGISTRY_PROVIDERS: RegistryProvider[] = [
|
||||
{ name: "anthropic-oauth", display_name: "Claude Code subscription", auth_env: ["CLAUDE_CODE_OAUTH_TOKEN"], billing_mode: "byok" },
|
||||
{ name: "platform", display_name: "Platform", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "platform_managed" },
|
||||
// DISCRIMINATING fixture (review #7790): a provider whose registry-served
|
||||
// billing_mode DISAGREES with the hardcoded name-based rule. Its name is not
|
||||
// "platform"/"" so billingModeForProvider() would call it "byok", yet the
|
||||
// registry serves "platform_managed" (the federation-ready shape the SSOT is
|
||||
// built for — a managed provider that isn't literally named "platform").
|
||||
// billingModeForSelectedProvider MUST return the REGISTRY value here; the
|
||||
// only way to get "platform_managed" out is to honor the catalog, so this
|
||||
// case fails if the impl ever regresses to the hardcoded rule.
|
||||
{ name: "managed-federated", display_name: "Managed (federated)", auth_env: [], billing_mode: "platform_managed" },
|
||||
];
|
||||
const REGISTRY_MODELS: RegistryModel[] = [
|
||||
{ id: "sonnet", provider: "anthropic-oauth", billing_mode: "byok" },
|
||||
{ id: "anthropic/claude-opus-4-7", provider: "platform", billing_mode: "platform_managed" },
|
||||
// model bucketed under the disagreeing provider so the catalog builds an
|
||||
// entry for it (buildProviderCatalogFromRegistry only emits a provider entry
|
||||
// for providers that own at least one model).
|
||||
{ id: "managed/some-model", provider: "managed-federated", billing_mode: "platform_managed" },
|
||||
];
|
||||
|
||||
describe("billingModeForSelectedProvider (registry-driven)", () => {
|
||||
const catalog = buildProviderCatalogFromRegistry(REGISTRY_PROVIDERS, REGISTRY_MODELS);
|
||||
|
||||
it("reads platform_managed from the registry for the platform provider", () => {
|
||||
expect(billingModeForSelectedProvider("platform", catalog)).toBe("platform_managed");
|
||||
});
|
||||
|
||||
it("reads byok from the registry for a BYOK provider", () => {
|
||||
// anthropic-oauth derives to byok via the REGISTRY. (Note: the hardcoded
|
||||
// rule would ALSO say byok for this non-'platform' name, so on its own this
|
||||
// assertion does NOT prove the registry is authoritative — it agrees either
|
||||
// way. The registry-WINS proof is the disagreement case below.)
|
||||
expect(billingModeForSelectedProvider("anthropic-oauth", catalog)).toBe("byok");
|
||||
});
|
||||
|
||||
it("lets the registry billing_mode WIN when it disagrees with the hardcoded rule", () => {
|
||||
// 'managed-federated' is not '' / 'platform', so the legacy name-based rule
|
||||
// classifies it byok — but the registry serves platform_managed. The
|
||||
// registry is the SSOT, so billingModeForSelectedProvider must return
|
||||
// platform_managed. This is the discriminating case: it FAILS if the impl
|
||||
// regresses to billingModeForProvider (which would return byok here).
|
||||
expect(billingModeForProvider("managed-federated")).toBe("byok"); // sanity: the rules genuinely disagree
|
||||
expect(billingModeForSelectedProvider("managed-federated", catalog)).toBe("platform_managed");
|
||||
});
|
||||
|
||||
it("falls back to the hardcoded rule when no registry catalog is supplied", () => {
|
||||
// Non-registry runtime / older backend → catalog empty/undefined → the
|
||||
// legacy mapping still applies ('' | 'platform' → platform_managed).
|
||||
expect(billingModeForSelectedProvider("", undefined)).toBe("platform_managed");
|
||||
expect(billingModeForSelectedProvider("platform", undefined)).toBe("platform_managed");
|
||||
expect(billingModeForSelectedProvider("minimax", undefined)).toBe("byok");
|
||||
});
|
||||
|
||||
it("falls back to the hardcoded rule when the provider is not in the registry catalog", () => {
|
||||
// A provider string the registry catalog doesn't carry (stale saved
|
||||
// value) → fall back to the legacy rule rather than guessing.
|
||||
expect(billingModeForSelectedProvider("some-byo-vendor", catalog)).toBe("byok");
|
||||
});
|
||||
});
|
||||
@@ -297,25 +297,6 @@ describe("DetailsTab — delete workflow", () => {
|
||||
expect(mockSelectNode).toHaveBeenCalledWith(null);
|
||||
});
|
||||
|
||||
// internal#734: checking "also erase saved data" adds &erase_data=true so the
|
||||
// server prunes the data volume. Default (unchecked) must NOT send it.
|
||||
it("checking erase-saved-data sends erase_data=true on delete", async () => {
|
||||
mockApi.del.mockResolvedValue(undefined);
|
||||
render(<DetailsTab workspaceId="ws-1" data={data()} />);
|
||||
await flush();
|
||||
fireEvent.click(screen.getByRole("button", { name: /delete workspace/i }));
|
||||
await flush();
|
||||
fireEvent.click(screen.getByRole("checkbox", { name: /erase saved data/i }));
|
||||
const confirmBtn = Array.from(document.querySelectorAll("button")).find(
|
||||
(b) => b.textContent === "Confirm Delete",
|
||||
) as HTMLButtonElement;
|
||||
fireEvent(confirmBtn, new MouseEvent("click", { bubbles: true }));
|
||||
await flush();
|
||||
expect(mockApi.del).toHaveBeenCalledWith("/workspaces/ws-1?confirm=true&erase_data=true", {
|
||||
headers: { "X-Confirm-Name": "Test Workspace" },
|
||||
});
|
||||
});
|
||||
|
||||
it("cancelling delete returns to view mode", async () => {
|
||||
mockApi.del.mockResolvedValue(undefined);
|
||||
render(<DetailsTab workspaceId="ws-1" data={data()} />);
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
const RUNTIME_NAMES: Record<string, string> = {
|
||||
"claude-code": "Claude Code",
|
||||
codex: "Codex",
|
||||
"google-adk": "Google ADK",
|
||||
hermes: "Hermes",
|
||||
openclaw: "OpenClaw",
|
||||
kimi: "Kimi",
|
||||
|
||||
@@ -368,9 +368,6 @@ export interface WorkspaceCompute {
|
||||
width?: number;
|
||||
height?: number;
|
||||
};
|
||||
// internal#734: per-workspace durable-data choice. "persist" | "ephemeral" |
|
||||
// undefined (auto). Controls whether the data volume survives recreate.
|
||||
data_persistence?: string;
|
||||
}
|
||||
|
||||
let socket: ReconnectingSocket | null = null;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Molecule AI — Comprehensive Technical Documentation
|
||||
|
||||
> Definitive technical reference for the Molecule AI Agent Team platform.
|
||||
> Based on a full non-invasive scan of the [molecule-core](https://git.moleculesai.app/molecule-ai/molecule-core) repository.
|
||||
> Based on a full non-invasive scan of the [molecule-monorepo](https://git.moleculesai.app/molecule-ai/molecule-monorepo) repository.
|
||||
|
||||
---
|
||||
|
||||
@@ -1131,11 +1131,11 @@ Molecule AI's workspace abstraction is **runtime-agnostic by design**. A workspa
|
||||
|
||||
## Links
|
||||
|
||||
- **GitHub**: https://git.moleculesai.app/molecule-ai/molecule-core
|
||||
- **Architecture Docs**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/architecture
|
||||
- **API Protocol**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/api-protocol
|
||||
- **Agent Runtime**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/agent-runtime
|
||||
- **Product Docs**: https://git.moleculesai.app/molecule-ai/molecule-core/src/branch/main/docs/product
|
||||
- **GitHub**: https://git.moleculesai.app/molecule-ai/molecule-monorepo
|
||||
- **Architecture Docs**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/architecture
|
||||
- **API Protocol**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/api-protocol
|
||||
- **Agent Runtime**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/agent-runtime
|
||||
- **Product Docs**: https://git.moleculesai.app/molecule-ai/molecule-monorepo/src/branch/main/docs/product
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -82,7 +82,7 @@ DATABASE_URL=postgres://dev:dev@postgres:5432/molecule?sslmode=prefer
|
||||
REDIS_URL=redis://redis:6379
|
||||
PORT=8080
|
||||
SECRETS_ENCRYPTION_KEY=dev-key-change-in-production
|
||||
WORKSPACE_DIR=/path/to/molecule-core # Optional global fallback; prefer per-workspace workspace_dir in org.yaml or API
|
||||
WORKSPACE_DIR=/path/to/molecule-monorepo # Optional global fallback; prefer per-workspace workspace_dir in org.yaml or API
|
||||
```
|
||||
|
||||
### Canvas (Next.js)
|
||||
|
||||
@@ -16,9 +16,11 @@ workspace container running on it) over an [EC2 Instance Connect
|
||||
Endpoint](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-connect-setup-ec2-instance-connect-endpoint.html).
|
||||
End users see a terminal; no direct public SSH ingress is required.
|
||||
|
||||
Tracking: originally `molecule-core#1528` (resolved 2026-04-22). Future
|
||||
terminal work is tracked in `molecule-core` issues (workspace-server scope)
|
||||
and in `molecule-controlplane` issues for the EIC / per-tenant SG path.
|
||||
Tracking: originally `molecule-core#1528` (resolved 2026-04-22). The
|
||||
`molecule-core` repo has since been renamed to `molecule-monorepo` and no
|
||||
longer accepts new issues under the old name; future terminal work is
|
||||
tracked in `molecule-monorepo` issues (workspace-server scope) and in
|
||||
`molecule-controlplane` issues for the EIC / per-tenant SG path.
|
||||
|
||||
## Where things are
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ When opencode connects to the Molecule MCP endpoint, the agent gains access to:
|
||||
"tool": "delegate_task",
|
||||
"arguments": {
|
||||
"target": "research-lead",
|
||||
"task": "Summarise the last 7 days of commits in Molecule-AI/molecule-core"
|
||||
"task": "Summarise the last 7 days of commits in Molecule-AI/molecule-monorepo"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Internal content policy
|
||||
|
||||
The `Molecule-AI/molecule-core` repo is **public**. Anything internal
|
||||
The `Molecule-AI/molecule-monorepo` repo is **public**. Anything internal
|
||||
(positioning, competitive briefs, sales playbooks, PMM/press drip, draft
|
||||
campaigns, raw research notes, ops runbooks, retrospectives) lives in
|
||||
**`Molecule-AI/internal`**.
|
||||
@@ -18,14 +18,14 @@ This page is the canonical decision tree.
|
||||
| Draft campaign asset (still iterating, not yet customer-visible) | `Molecule-AI/internal/marketing/campaigns/` |
|
||||
| Roadmap discussion, planning doc, retrospective | `Molecule-AI/internal/PLAN.md` or `Molecule-AI/internal/retrospectives/` |
|
||||
| Runbook, ops procedure, incident postmortem | `Molecule-AI/internal/runbooks/` |
|
||||
| **Public-ready** blog post (final draft, ready to ship to docs site) | `Molecule-AI/molecule-core/docs/blog/` |
|
||||
| **Public-ready** tutorial / quickstart | `Molecule-AI/molecule-core/docs/tutorials/` |
|
||||
| Public DevRel content (code samples, demos for users) | `Molecule-AI/molecule-core/docs/devrel/` |
|
||||
| API reference, architecture docs for external developers | `Molecule-AI/molecule-core/docs/api/` |
|
||||
| **Public-ready** blog post (final draft, ready to ship to docs site) | `Molecule-AI/molecule-monorepo/docs/blog/` |
|
||||
| **Public-ready** tutorial / quickstart | `Molecule-AI/molecule-monorepo/docs/tutorials/` |
|
||||
| Public DevRel content (code samples, demos for users) | `Molecule-AI/molecule-monorepo/docs/devrel/` |
|
||||
| API reference, architecture docs for external developers | `Molecule-AI/molecule-monorepo/docs/api/` |
|
||||
| Code, tests, infrastructure | wherever is appropriate inside this repo |
|
||||
|
||||
**Rule of thumb:** *"Would I be comfortable if a competitor / journalist / customer
|
||||
read this verbatim today?"* — yes → `molecule-core/docs/`. No / not yet → `internal/`.
|
||||
read this verbatim today?"* — yes → `monorepo/docs/`. No / not yet → `internal/`.
|
||||
|
||||
## Why
|
||||
|
||||
@@ -82,7 +82,7 @@ git push -u origin HEAD
|
||||
gh pr create --base main --fill
|
||||
```
|
||||
|
||||
Yes, this is more steps than `cd molecule-core && git add research/foo.md`.
|
||||
Yes, this is more steps than `cd molecule-monorepo && git add research/foo.md`.
|
||||
That cost is intentional: the friction is the point. Public space and
|
||||
internal space are different products with different audiences and
|
||||
different durability guarantees.
|
||||
|
||||
+4
-4
@@ -17,8 +17,8 @@ This path is aligned to the current repository and current UI. It gets you from
|
||||
## The one-command path
|
||||
|
||||
```bash
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
|
||||
cd molecule-core
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
|
||||
cd molecule-monorepo
|
||||
./scripts/dev-start.sh
|
||||
```
|
||||
|
||||
@@ -42,8 +42,8 @@ If you'd rather run each component yourself — useful when you're iterating on
|
||||
### Step 1: Clone the repository
|
||||
|
||||
```bash
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-core.git
|
||||
cd molecule-core
|
||||
git clone https://git.moleculesai.app/molecule-ai/molecule-monorepo.git
|
||||
cd molecule-monorepo
|
||||
```
|
||||
|
||||
### Step 2: Start the shared infrastructure
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
# Engineer-Agent Gitea Token Scope Runbook
|
||||
|
||||
## Symptom
|
||||
|
||||
Engineer-class agents (e.g. `agent-dev-a`, `agent-dev-b`) fail swarm-pull issue discovery or receive HTTP 403 when calling Gitea issue-list APIs, while PR review and repository API operations continue to work.
|
||||
|
||||
Typical failing call:
|
||||
```bash
|
||||
GET /api/v1/repos/molecule-ai/molecule-core/issues?state=open&labels=approved&limit=50
|
||||
# => 403 Forbidden
|
||||
```
|
||||
|
||||
Typical working calls (same token):
|
||||
```bash
|
||||
GET /api/v1/repos/molecule-ai/molecule-core/pulls?state=open&limit=50
|
||||
POST /api/v1/repos/molecule-ai/molecule-core/pulls/1666/comments
|
||||
# => 200 OK
|
||||
```
|
||||
|
||||
## Root Cause
|
||||
|
||||
Gitea v1.22.6 routes issue-list under the `Issue` scope category (`routers/api/v1/api.go:1379-1491`), while PR routes live under repository/pull routing (`api.go:1278-1305`). The scope gate derives required read/write level from HTTP method (`api.go:309-313`), so `GET /issues?...` requires `read:issue`.
|
||||
|
||||
Engineer-class agent PATs were provisioned with repository and PR scopes but without `read:issue`, causing the asymmetric 403.
|
||||
|
||||
## Detection
|
||||
|
||||
1. **Agent-side**: swarm-pull workflow logs show `403 Forbidden` on issue enumeration but not on PR list/review.
|
||||
2. **Platform-side**: Gitea access logs show `GET /repos/{owner}/{repo}/issues` returning 403 for the affected token.
|
||||
3. **Reproduction** (from any workspace with a suspected token):
|
||||
```bash
|
||||
TOKEN=$(cat /configs/secrets.d/GITEA_TOKEN)
|
||||
PLATFORM="https://git.moleculesai.app"
|
||||
|
||||
# Should succeed — confirms token is live
|
||||
curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
"$PLATFORM/api/v1/user"
|
||||
|
||||
# Will 403 if the token lacks read:issue
|
||||
curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "Authorization: token $TOKEN" \
|
||||
"$PLATFORM/api/v1/repos/molecule-ai/molecule-core/issues?state=open&limit=1"
|
||||
```
|
||||
|
||||
## Immediate Fix
|
||||
|
||||
### Step 1: Issue fresh PATs with correct scopes
|
||||
|
||||
From a Gitea site-admin account (or via the Gitea web UI → Settings → Applications):
|
||||
|
||||
1. Navigate to the affected user's profile (e.g. `agent-dev-a`).
|
||||
2. Go to **Settings → Applications → Generate New Token**.
|
||||
3. Select scopes:
|
||||
- `read:repository` (existing)
|
||||
- `write:repository` (existing, if push is required)
|
||||
- `read:issue` (**add this**)
|
||||
- `write:issue` (add only if agents must comment/edit issues)
|
||||
- `read:pull-request` / `write:pull-request` (existing)
|
||||
- `read:comment` / `write:comment` (existing, if PR review is required)
|
||||
4. Copy the plaintext token immediately — it is shown only once.
|
||||
|
||||
### Step 2: Update workspace secrets
|
||||
|
||||
For each affected engineer workspace, update the Gitea token secret:
|
||||
|
||||
```bash
|
||||
# Via the platform API (admin auth required)
|
||||
PLATFORM="https://agents-team.moleculesai.app"
|
||||
ADMIN_TOKEN="<your-admin-token>"
|
||||
WORKSPACE_ID="<affected-workspace-id>"
|
||||
NEW_GITEA_TOKEN="<fresh-token-from-step-1>"
|
||||
|
||||
curl -X POST "$PLATFORM/workspaces/$WORKSPACE_ID/secrets" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{
|
||||
\"GITEA_TOKEN\": \"$NEW_GITEA_TOKEN\"
|
||||
}"
|
||||
```
|
||||
|
||||
Restart the workspace so the runtime re-reads secrets:
|
||||
```bash
|
||||
curl -X POST "$PLATFORM/workspaces/$WORKSPACE_ID/restart" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN"
|
||||
```
|
||||
|
||||
### Step 3: Smoke-test
|
||||
|
||||
From the restarted workspace, verify all three paths:
|
||||
|
||||
```bash
|
||||
# 1. Issue list (the previously failing path)
|
||||
curl -s -H "Authorization: token $GITEA_TOKEN" \
|
||||
"https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/issues?state=open&labels=approved&limit=1" | jq '.[0].number'
|
||||
|
||||
# 2. PR list (should still work)
|
||||
curl -s -H "Authorization: token $GITEA_TOKEN" \
|
||||
"https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/pulls?state=open&limit=1" | jq '.[0].number'
|
||||
|
||||
# 3. Swarm-pull discovery (end-to-end)
|
||||
# Trigger the agent's autonomous tick or delegate a task that enumerates open issues.
|
||||
```
|
||||
|
||||
## Long-Term Fix
|
||||
|
||||
Update the **workspace secret injection path** that writes `/configs/secrets.d/GITEA_TOKEN` for engineer-class agents. The provisioning template or secret-distribution job should request `read:issue` (and optionally `write:issue`) at token-creation time.
|
||||
|
||||
File locations to audit:
|
||||
- `.gitea/scripts/` — any token-provisioning automation
|
||||
- `infra/terraform/` or equivalent — IAM/secret-manager templates
|
||||
- `workspace-configs-templates/` — engineer-class workspace templates that declare required secrets
|
||||
|
||||
## Prevention
|
||||
|
||||
1. **Token scope checklist**: when provisioning new engineer-class agent tokens, verify the scope set includes `read:issue` before distributing the secret.
|
||||
2. **Monitoring**: add an agent health-check that probes `GET /repos/molecule-ai/molecule-core/issues?limit=1` and surfaces a non-fatal warning if it returns 403.
|
||||
3. **Documentation**: update the onboarding runbook for new engineer agents to include the full required scope list.
|
||||
|
||||
## References
|
||||
|
||||
- Gitea issue #1750: [RCA: engineer-token read:issue scope gap blocks swarm-pull workflow](https://git.moleculesai.app/molecule-ai/molecule-core/issues/1750)
|
||||
- Gitea source: `routers/api/v1/api.go:309-313` (scope gate), `api.go:1278-1305` (PR routing), `api.go:1379-1491` (issue routing)
|
||||
- Related: PR #1542 (provisioner git-creds injection), PR #1669 (auth_token inline mint)
|
||||
@@ -1,16 +1,5 @@
|
||||
# Running a Gemini CLI Workspace on Molecule AI
|
||||
|
||||
> **⚠️ Accuracy correction (2026-05-29):** this page is **aspirational, not
|
||||
> shipped.** There is **no `gemini-cli` runtime** in `manifest.json` or the
|
||||
> provisioner's `knownRuntimes`, and the "PR #379" cited below is unrelated (a
|
||||
> CI-workflow-cleanup PR, not a gemini-cli adapter). Do not follow this as-is.
|
||||
>
|
||||
> **For Gemini on Molecule, use the real `google-adk` runtime instead** — see
|
||||
> [`google-adk-runtime.md`](./google-adk-runtime.md) (ADK engine + Gemini on
|
||||
> Vertex AI/AI Studio), implemented in PR
|
||||
> [`molecule-ai-workspace-template-google-adk#1`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) per RFC `internal#730`.
|
||||
> This gemini-cli page is retained only until it's either implemented for real or removed.
|
||||
|
||||
Molecule AI now ships a `gemini-cli` runtime adapter alongside the existing `claude-code` adapter. This tutorial walks you from zero to a running Gemini agent workspace in under five minutes.
|
||||
|
||||
## What you'll need
|
||||
|
||||
@@ -1,69 +1,74 @@
|
||||
# Running a Google ADK Workspace on Molecule AI
|
||||
|
||||
> **Status (2026-05-29):** the `google-adk` runtime is **landing**, not yet on
|
||||
> `main`. It's implemented in the template repo
|
||||
> [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk)
|
||||
> (PR **#1**) with platform registration in molecule-core PR **#2003** and the
|
||||
> validator allowlist in molecule-ci PR **#26**. Design + approval: RFC
|
||||
> [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730).
|
||||
> Remove this banner once those PRs merge.
|
||||
>
|
||||
> **Doc-accuracy note:** a prior version of this page claimed ADK was "already
|
||||
> first-class" and cited "PR #550" — that PR is unrelated (a MemoryTab test
|
||||
> suite). No `google-adk` adapter existed at that time. This rewrite reflects
|
||||
> the real implementation.
|
||||
Google's Agent Development Kit (ADK) is now a first-class runtime on Molecule AI. This tutorial walks you from zero to a running ADK agent workspace — one that persists per-conversation session state and sits alongside your Claude Code and Gemini CLI workers in the same A2A network.
|
||||
|
||||
Google's Agent Development Kit (ADK) runs as a Molecule AI workspace runtime:
|
||||
ADK is the **agent engine** (`LlmAgent` + `Runner`), and the workspace
|
||||
participates in Molecule's A2A org like any other runtime.
|
||||
## What you'll need
|
||||
|
||||
## How it actually works
|
||||
- A Molecule AI account with at least one provisioned tenant
|
||||
- A `GOOGLE_API_KEY` from [aistudio.google.com](https://aistudio.google.com) (or Vertex AI credentials — see below)
|
||||
- `curl` + `jq`
|
||||
|
||||
- **ADK = engine only.** The adapter builds an ADK `LlmAgent` from the
|
||||
workspace config (model + system prompt + tools) and drives its `Runner`.
|
||||
It installs `google-adk[mcp]==2.1.0` and **never** the `[a2a]` extra — ADK's
|
||||
a2a layer pins `a2a-sdk<0.4`, which is incompatible with the platform's
|
||||
`a2a-sdk>=1.0`. (Verified: `google-adk[mcp]==2.1.0` + `a2a-sdk 1.0.3` coexist.)
|
||||
- **A2A** is provided by the platform's a2a-1.x server; a Molecule-authored
|
||||
executor bridges ADK's `Runner` event stream onto it, one ADK session per
|
||||
A2A `context_id`.
|
||||
- **Tools** reach the agent via ADK's native `McpToolset` pointed at the
|
||||
workspace's `a2a_mcp_server` — the same MCP surface the CLI runtimes use
|
||||
(`delegate_task`, `commit_memory`, `list_peers`, …). No LangChain.
|
||||
|
||||
## Auth — Vertex AI via ADC (keyless), or an AI Studio key
|
||||
|
||||
The runtime supports both google-genai auth paths:
|
||||
|
||||
- **Vertex AI + Application Default Credentials (recommended; required if your
|
||||
org disallows API keys).** Set `model: vertex:gemini-2.5-pro` and provide
|
||||
`GOOGLE_CLOUD_PROJECT`; the adapter sets `GOOGLE_GENAI_USE_VERTEXAI=1` and
|
||||
google-genai authenticates via ADC — no API key. (Locally:
|
||||
`gcloud auth application-default login`.)
|
||||
- **AI Studio API key** (where your org permits API keys): set
|
||||
`model: google_genai:gemini-2.5-pro` and `GOOGLE_API_KEY`.
|
||||
|
||||
## Create a workspace
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Vertex AI + ADC (keyless)
|
||||
curl -s -X POST http://localhost:8080/workspaces \
|
||||
# 1. Store your Google API key as a global secret
|
||||
curl -s -X PUT http://localhost:8080/settings/secrets \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"key":"GOOGLE_API_KEY","value":"YOUR-AI-STUDIO-KEY"}' | jq .
|
||||
|
||||
# 2. Create a google-adk workspace
|
||||
WS=$(curl -s -X POST http://localhost:8080/workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"name": "adk-agent",
|
||||
"role": "Google ADK inference worker",
|
||||
"runtime": "google-adk",
|
||||
"model": "vertex:gemini-2.5-pro",
|
||||
"runtime_config": {"required_env": ["GOOGLE_CLOUD_PROJECT"]}
|
||||
}'
|
||||
"model": "google:gemini-2.0-flash"
|
||||
}' | jq -r '.id')
|
||||
echo "Workspace: $WS"
|
||||
|
||||
# 3. Wait for ready (~30s)
|
||||
until curl -s http://localhost:8080/workspaces/$WS | jq -r '.status' | grep -q ready; do
|
||||
echo "Waiting..."; sleep 5
|
||||
done
|
||||
|
||||
# 4. Send your first task
|
||||
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","id":"1","method":"message/send",
|
||||
"params":{"message":{"role":"user","parts":[{"kind":"text",
|
||||
"text":"Summarise the ADK architecture in 3 bullet points."}]}}}' \
|
||||
| jq '.result.parts[0].text'
|
||||
|
||||
# 5. Multi-turn — session state is preserved across calls
|
||||
curl -s -X POST http://localhost:8080/workspaces/$WS/a2a \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","id":"2","method":"message/send",
|
||||
"params":{"message":{"role":"user","parts":[{"kind":"text",
|
||||
"text":"Now give me a one-line TL;DR of what you just said."}]}}}' \
|
||||
| jq '.result.parts[0].text'
|
||||
|
||||
# 6. Vertex AI alternative — set these instead of GOOGLE_API_KEY
|
||||
# curl -X PUT .../secrets -d '{"key":"GOOGLE_GENAI_USE_VERTEXAI","value":"1"}'
|
||||
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_PROJECT","value":"my-project"}'
|
||||
# curl -X PUT .../secrets -d '{"key":"GOOGLE_CLOUD_LOCATION","value":"us-central1"}'
|
||||
```
|
||||
|
||||
Send it a task via the A2A proxy (`POST /workspaces/:id/a2a`, JSON-RPC
|
||||
`message/send`) and it replies through the ADK `Runner`. Verified end-to-end:
|
||||
a Gemini 2.5 round-trip on Vertex via ADC returns through the built image.
|
||||
## Expected output
|
||||
|
||||
After step 4, ADK streams the Gemini response through its event bus, filters for `is_final_response()` events, and returns the agent's reply as a standard A2A text part. Step 5 should reference the prior answer — the adapter ties each A2A `context_id` to an `InMemorySessionService` session, so conversation state is isolated per task context and survives across calls within the same session.
|
||||
|
||||
## How it works
|
||||
|
||||
The `google-adk` adapter wraps Google ADK's runner/session model behind the same `AgentExecutor` interface used by every other Molecule AI runtime. On each turn, `GoogleADKA2AExecutor` calls `runner.run_async()` with the incoming message wrapped in a `google.genai.types.Content` object, then drains the event stream until it collects a final-response event. The `google:` model prefix is stripped before being passed to ADK — so `google:gemini-2.0-flash` in your workspace config becomes `gemini-2.0-flash` in the ADK `LlmAgent`. Error class names are sanitized before leaving the executor; raw Google SDK stack traces never reach the A2A caller.
|
||||
|
||||
## Mixed-runtime teams
|
||||
|
||||
ADK workspaces participate in the same A2A network as Claude Code, Gemini CLI, Hermes, and LangGraph workers. An orchestrator can delegate long-context summarisation to a `google-adk` worker (Gemini 1.5 Pro's 1M token window) while routing tool-use tasks to a `claude-code` worker — with no provider-specific code in the orchestrator itself. Add an ADK peer with `POST /workspaces`, set `GOOGLE_API_KEY`, and it's available for `delegate_task` immediately.
|
||||
|
||||
## Related
|
||||
- Template + adapter: [`molecule-ai-workspace-template-google-adk`](https://git.moleculesai.app/molecule-ai/molecule-ai-workspace-template-google-adk) (PR #1)
|
||||
- Platform registration: molecule-core PR #2003 · validator: molecule-ci PR #26
|
||||
- Design/approval: RFC [`internal#730`](https://git.moleculesai.app/molecule-ai/internal/issues/730)
|
||||
|
||||
- PR #550: [feat(adapters): add google-adk runtime adapter](https://git.moleculesai.app/molecule-ai/molecule-core/pull/550)
|
||||
- [Google ADK (adk-python)](https://github.com/google/adk-python)
|
||||
- [Gemini CLI runtime tutorial](./gemini-cli-runtime.md)
|
||||
- [Platform API reference](../api-reference.md)
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
# Developer SOP — PR review gate auto-fire and stale-head handling
|
||||
|
||||
> Last updated: 2026-06-03 (cp#2159 follow-up)
|
||||
>
|
||||
> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling
|
||||
> repos using the `qa-review` + `security-review` branch-protection gates.
|
||||
|
||||
---
|
||||
|
||||
## 1. Gitea PR-head workflow-selection rule
|
||||
|
||||
**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea
|
||||
loads the workflow definition from the **PR's HEAD branch**, not from the
|
||||
base (`main`) branch.
|
||||
|
||||
This is different from GitHub Actions, where `pull_request_target` always
|
||||
loads workflows from the base branch. Gitea's behaviour means:
|
||||
|
||||
- A PR that was opened **before** the `pull_request_review` trigger was added
|
||||
to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review,
|
||||
because its HEAD still contains the old workflow YAML (no trigger).
|
||||
|
||||
- A PR that was opened **after** the trigger was added (or that has been
|
||||
rebased onto a commit containing the trigger) **WILL** auto-fire, because its
|
||||
HEAD contains the new workflow YAML.
|
||||
|
||||
### Ops implication
|
||||
|
||||
| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review |
|
||||
|---|---|
|
||||
| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. |
|
||||
| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. |
|
||||
|
||||
---
|
||||
|
||||
## 2. Standard core-PR flow (post-#2157)
|
||||
|
||||
```
|
||||
1. Author opens PR from a branch based on current main
|
||||
→ qa-review + security-review workflows run on pull_request_target
|
||||
→ status contexts post (initial eval, usually red until reviews land)
|
||||
|
||||
2. Reviewers submit real APPROVED reviews
|
||||
→ If PR head has the trigger: workflows AUTO-FIRE on pull_request_review
|
||||
→ Contexts flip green (or stay red if reviewer is not in team)
|
||||
|
||||
3. [Optional] If contexts did not flip (stale head, event lost, etc.):
|
||||
→ Anyone can comment `/qa-recheck` or `/security-recheck`
|
||||
→ sop-checklist.yml refires the evaluator (read-only, idempotent)
|
||||
|
||||
4. Both qa-review + security-review contexts are green
|
||||
→ Plain Do:merge (no force-merge needed)
|
||||
```
|
||||
|
||||
### Key point
|
||||
|
||||
The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the
|
||||
primary path. PRs cut from current main should auto-fire without manual
|
||||
intervention.
|
||||
|
||||
---
|
||||
|
||||
## 3. Diagnosing a stale head
|
||||
|
||||
If a PR has real team-member APPROVED reviews but the qa/security contexts
|
||||
remain red and no workflow run appears on the PR's "Actions" tab for the
|
||||
review event, the PR head is likely stale.
|
||||
|
||||
### Quick check
|
||||
|
||||
```bash
|
||||
# From the PR page, look at the head commit SHA, then:
|
||||
curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=<HEAD_SHA>" \
|
||||
| jq -r '.content' | base64 -d | grep -c 'pull_request_review'
|
||||
# 0 → stale head (no trigger in that version of the workflow)
|
||||
# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker)
|
||||
```
|
||||
|
||||
### Automated diagnostic
|
||||
|
||||
The test suite includes `test_gate_stale_head_diagnostic.py`, which reports
|
||||
"auto-fire impossible for this PR" when the head lacks the trigger. Run it
|
||||
in CI or locally with:
|
||||
|
||||
```bash
|
||||
PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Rebasing vs. slash-refire
|
||||
|
||||
| Approach | When to use | Trade-off |
|
||||
|---|---|---|
|
||||
| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected |
|
||||
| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head |
|
||||
|
||||
**Do not** use slash-refire as a substitute for rebasing a stale head. If the
|
||||
workflow YAML in the PR head does not contain `pull_request_review`, no amount
|
||||
of rechecking will make auto-fire work.
|
||||
|
||||
---
|
||||
|
||||
## 5. Live-fire verification
|
||||
|
||||
The `test_gate_auto_fire_live.py` regression test exercises the full runtime
|
||||
path: it submits an APPROVED review to a test PR and polls for the
|
||||
`(pull_request_target)` status contexts. It is skipped when no API token is
|
||||
available, and is intended to catch runtime non-fire that static structural
|
||||
tests (e.g. `test_gate_review_auto_fire.py`) cannot detect.
|
||||
|
||||
Run manually with:
|
||||
|
||||
```bash
|
||||
export GITEA_HOST=git.moleculesai.app
|
||||
export GITEA_TOKEN=<your-token>
|
||||
export REPO=molecule-ai/molecule-core
|
||||
export LIVEFIRE_PR_NUMBER=<test-pr-number>
|
||||
python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
|
||||
the `pull_request_review` trigger, NOT a workflow code defect)
|
||||
- #765 — static structural regression test for gate configuration
|
||||
- #2157 — merged trigger addition (`pull_request_review` types: [submitted])
|
||||
- #2020 — milestone confirming gate infrastructure is stable
|
||||
- RFC#324 — qa-review + security-review design
|
||||
@@ -93,7 +93,9 @@ def _gitea_get(path: str, params: dict[str, str] | None = None) -> bytes | None:
|
||||
try:
|
||||
# S310 (信任boundary): this function IS the outbound HTTP client for
|
||||
# Gitea API calls. The call is intentional and controlled — we build
|
||||
with urllib.request.urlopen(req, timeout=20) as resp: # noqa: S310 # explicit timeout + error handling; bandit false positive
|
||||
# the request ourselves and handle errors explicitly. Timeout=20s
|
||||
# prevents indefinite hangs.
|
||||
with urllib.request.urlopen(req, timeout=20) as resp: # noqa: S310
|
||||
return resp.read()
|
||||
except urllib.error.HTTPError as e:
|
||||
sys.stderr.write(f"Gitea API HTTP {e.code} on {path}: {e.reason}\n")
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E test: A2A round-trip parity across all five runtimes.
|
||||
# E2E test: A2A round-trip parity across all four runtimes.
|
||||
#
|
||||
# Validates that for each of {claude-code, hermes, codex, openclaw, google-adk}:
|
||||
# Validates that for each of {claude-code, hermes, codex, openclaw}:
|
||||
# 1. A workspace can be provisioned + brought online
|
||||
# 2. The adapter responds to A2A message/send
|
||||
# 3. The reply contains expected content (echo of the prompt)
|
||||
# 4. A SECOND message preserves session state where the runtime
|
||||
# supports it (currently: hermes via plugin path; google-adk via
|
||||
# ADK InMemorySessionService keyed on A2A context_id)
|
||||
# supports it (currently: hermes via plugin path)
|
||||
#
|
||||
# Targets a SaaS tenant subdomain. Provisions workspaces in the calling
|
||||
# tenant, runs the round-trip, deletes them on success.
|
||||
@@ -17,10 +16,6 @@
|
||||
# (e.g. https://demo-tenant.staging.moleculesai.app)
|
||||
# - $OPENROUTER_API_KEY (or $HERMES_API_KEY) for non-claude runtimes
|
||||
# - $OPENAI_API_KEY for claude-code peer
|
||||
# - $GOOGLE_API_KEY (AI Studio) for google-adk — the org disallows API
|
||||
# keys in PROD (Vertex+ADC there), but CI auths Gemini with an
|
||||
# AI-Studio key (config model google_genai:gemini-2.5-pro). Vertex
|
||||
# stays supported; this is the keyed CI path only.
|
||||
# - SaaS edge requires Origin header — see auto-memory
|
||||
# reference_saas_waf_origin_header.md
|
||||
#
|
||||
@@ -29,13 +24,12 @@
|
||||
# ./scripts/test-all-runtimes-a2a-e2e.sh
|
||||
#
|
||||
# Skip individual runtimes:
|
||||
# SKIP_HERMES=1 SKIP_OPENCLAW=1 SKIP_GOOGLE_ADK=1 ./scripts/test-all-runtimes-a2a-e2e.sh
|
||||
# SKIP_HERMES=1 SKIP_OPENCLAW=1 ./scripts/test-all-runtimes-a2a-e2e.sh
|
||||
set -euo pipefail
|
||||
|
||||
PLATFORM="${PLATFORM:-${1:-http://localhost:8080}}"
|
||||
HERMES_PROVIDER_KEY="${OPENROUTER_API_KEY:-${HERMES_API_KEY:-}}"
|
||||
PEER_OPENAI_KEY="${OPENAI_API_KEY:-}"
|
||||
GOOGLE_ADK_KEY="${GOOGLE_API_KEY:-}"
|
||||
# SaaS auth chain — TENANT_ADMIN_TOKEN + TENANT_ORG_ID required when
|
||||
# hitting *.moleculesai.app (per-tenant ADMIN_TOKEN, NOT
|
||||
# CP_ADMIN_API_TOKEN). Optional for localhost.
|
||||
@@ -54,10 +48,6 @@ if [ -z "$HERMES_PROVIDER_KEY" ] && [ -z "${SKIP_HERMES:-}${SKIP_CODEX:-}${SKIP_
|
||||
echo "FAIL: set OPENROUTER_API_KEY or HERMES_API_KEY for non-claude runtimes"
|
||||
exit 2
|
||||
fi
|
||||
if [ -z "$GOOGLE_ADK_KEY" ] && [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
|
||||
echo "FAIL: set GOOGLE_API_KEY (AI Studio) for google-adk, or SKIP_GOOGLE_ADK=1"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
@@ -153,7 +143,7 @@ echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# -------------------------------------------------------
|
||||
# 1. Provision the five runtimes (skip via SKIP_* flags)
|
||||
# 1. Provision the four runtimes (skip via SKIP_* flags)
|
||||
# -------------------------------------------------------
|
||||
echo "--- 1. Provision workspaces ---"
|
||||
if [ -z "${SKIP_CLAUDE_CODE:-}" ]; then
|
||||
@@ -172,10 +162,6 @@ if [ -z "${SKIP_OPENCLAW:-}" ]; then
|
||||
WS_IDS[openclaw]=$(provision "ParityOpenClaw" "openclaw" "openclaw peer")
|
||||
echo " openclaw: ${WS_IDS[openclaw]}"
|
||||
fi
|
||||
if [ -z "${SKIP_GOOGLE_ADK:-}" ]; then
|
||||
WS_IDS[google-adk]=$(provision "ParityGoogleADK" "google-adk" "google-adk peer")
|
||||
echo " google-adk: ${WS_IDS[google-adk]}"
|
||||
fi
|
||||
|
||||
# -------------------------------------------------------
|
||||
# 2. Set provider keys
|
||||
@@ -191,12 +177,6 @@ if [ -n "${WS_IDS[claude-code]:-}" ] && [ -n "$PEER_OPENAI_KEY" ]; then
|
||||
set_secret "${WS_IDS[claude-code]}" "OPENAI_API_KEY" "$PEER_OPENAI_KEY"
|
||||
echo " claude-code: OPENAI_API_KEY set"
|
||||
fi
|
||||
if [ -n "${WS_IDS[google-adk]:-}" ] && [ -n "$GOOGLE_ADK_KEY" ]; then
|
||||
# AI-Studio path: the adapter reads GOOGLE_API_KEY natively when the
|
||||
# config model is google_genai:gemini-2.5-pro (see _routing.resolve_model).
|
||||
set_secret "${WS_IDS[google-adk]}" "GOOGLE_API_KEY" "$GOOGLE_ADK_KEY"
|
||||
echo " google-adk: GOOGLE_API_KEY set"
|
||||
fi
|
||||
|
||||
# -------------------------------------------------------
|
||||
# 3. Wait for online
|
||||
@@ -208,9 +188,6 @@ for runtime in "${!WS_IDS[@]}"; do
|
||||
[ -z "$id" ] && continue
|
||||
max=60
|
||||
[ "$runtime" = "hermes" ] && max=120
|
||||
# google-adk's first cold boot pulls a large fresh ADK image — give it
|
||||
# a hermes-class window so a slow first pull doesn't read as "failed".
|
||||
[ "$runtime" = "google-adk" ] && max=180
|
||||
if wait_online "$id" "$runtime" "$max"; then
|
||||
check "$runtime online" "ok" "ok"
|
||||
else
|
||||
@@ -223,7 +200,7 @@ done
|
||||
# -------------------------------------------------------
|
||||
echo ""
|
||||
echo "--- 4. A2A round-trip (first message) ---"
|
||||
for runtime in claude-code hermes codex openclaw google-adk; do
|
||||
for runtime in claude-code hermes codex openclaw; do
|
||||
id="${WS_IDS[$runtime]:-}"
|
||||
[ -z "$id" ] && continue
|
||||
reply=$(a2a_send "$id" "Reply with just the word OK so we know you got this.")
|
||||
@@ -236,7 +213,7 @@ done
|
||||
# -------------------------------------------------------
|
||||
echo ""
|
||||
echo "--- 5. Session continuity (second message recalls first) ---"
|
||||
for runtime in claude-code hermes codex openclaw google-adk; do
|
||||
for runtime in claude-code hermes codex openclaw; do
|
||||
id="${WS_IDS[$runtime]:-}"
|
||||
[ -z "$id" ] && continue
|
||||
# Set up: tell the agent a name.
|
||||
|
||||
@@ -27,9 +27,9 @@ def smoke_imports_and_invariants() -> None:
|
||||
import-rewrite mistakes (the 0.1.16 incident, where main.py loaded but
|
||||
main_sync was missing because the build script dropped a re-export).
|
||||
"""
|
||||
from molecule_runtime.main import main_sync # noqa: F401 # smoke-test re-export regression (mc#1769)
|
||||
from molecule_runtime import a2a_client, a2a_tools # noqa: F401 # smoke-test re-export regression (mc#1769)
|
||||
from molecule_runtime.builtin_tools import memory # noqa: F401 # smoke-test re-export regression (mc#1769)
|
||||
from molecule_runtime.main import main_sync # noqa: F401
|
||||
from molecule_runtime import a2a_client, a2a_tools # noqa: F401
|
||||
from molecule_runtime.builtin_tools import memory # noqa: F401
|
||||
from molecule_runtime.adapters import get_adapter, BaseAdapter, AdapterConfig
|
||||
|
||||
# cli_main + mcp_cli.main are the molecule-mcp console-script entry
|
||||
@@ -38,8 +38,8 @@ def smoke_imports_and_invariants() -> None:
|
||||
# rewrite here would break every external operator's MCP install on
|
||||
# the next wheel publish. Pin both names because pyproject points
|
||||
# at mcp_cli.main, which then imports a2a_mcp_server.cli_main.
|
||||
from molecule_runtime.a2a_mcp_server import cli_main # noqa: F401 # smoke-test re-export regression (mc#1769)
|
||||
from molecule_runtime.mcp_cli import main as mcp_cli_main # noqa: F401 # smoke-test re-export regression (mc#1769)
|
||||
from molecule_runtime.a2a_mcp_server import cli_main # noqa: F401
|
||||
from molecule_runtime.mcp_cli import main as mcp_cli_main # noqa: F401
|
||||
assert callable(cli_main), "a2a_mcp_server.cli_main must be callable"
|
||||
assert callable(mcp_cli_main), "mcp_cli.main must be callable"
|
||||
|
||||
@@ -48,7 +48,7 @@ def smoke_imports_and_invariants() -> None:
|
||||
# imports + activates these at startup; if a wheel ships without
|
||||
# them, the standalone agent silently loses the wait_for_message /
|
||||
# inbox_peek / inbox_pop tools and reverts to outbound-only.
|
||||
from molecule_runtime.inbox import ( # noqa: F401 # smoke-test re-export regression (mc#1769)
|
||||
from molecule_runtime.inbox import ( # noqa: F401
|
||||
InboxState,
|
||||
activate as inbox_activate,
|
||||
get_state as inbox_get_state,
|
||||
|
||||
@@ -1,229 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Real-completion + per-provider liveness + byok-routing assertion helpers
|
||||
# for the staging full-SaaS E2E (tests/e2e/test_staging_full_saas.sh).
|
||||
#
|
||||
# WHY THIS LIB EXISTS (molecule-core#1995 / #1994 follow-on):
|
||||
# The A2A e2e historically asserted only response SHAPE — e.g.
|
||||
# test_a2a_e2e.sh:`check "SEO response has text" '"kind":"text"'`. A fully
|
||||
# BROKEN agent returns its error AS a text part:
|
||||
# {"kind":"text","text":"Agent error (Exception) — see workspace logs..."}
|
||||
# which STILL matches `"kind":"text"` → the shape check PASSES on a broken
|
||||
# agent. That is exactly why the 2026-05-2x drained-key / byok-misroute
|
||||
# failures (agents-team PM + reno marketing erroring on every LLM call)
|
||||
# sailed through CI. "Channel returns text shape" != "agent actually
|
||||
# completed an LLM round-trip".
|
||||
#
|
||||
# These helpers add three load-bearing gates ON TOP of (never replacing) the
|
||||
# existing shape + PONG checks:
|
||||
# 1. a2a_assert_real_completion — deterministic known-answer round-trip
|
||||
# (CONTAINS the expected token AND NOT an error-as-text payload).
|
||||
# 2. provider_liveness_matrix — per-offered-provider cheap completion
|
||||
# probe, providers sourced from the providers.yaml SSOT runtimes block.
|
||||
# 3. assert_byok_not_platform_proxy — #1994 regression guard: a
|
||||
# byok-resolving workspace must NOT resolve to platform_managed.
|
||||
#
|
||||
# Conventions: reuses the host script's fail()/ok()/log() + tenant_call().
|
||||
# Source this AFTER those are defined. BASH 4+.
|
||||
|
||||
# Error-as-text trap markers. If the agent's text part contains ANY of
|
||||
# these, the "round-trip" did not really complete — the agent surfaced an
|
||||
# error AS text. This is the negative assertion that makes a broken agent
|
||||
# FAIL instead of slipping through the shape check.
|
||||
#
|
||||
# Kept as an array (not a single regex) so a new failure signature is a
|
||||
# one-line append + the failure message can name which marker matched.
|
||||
A2A_ERROR_AS_TEXT_MARKERS=(
|
||||
"Agent error"
|
||||
"Exception"
|
||||
"error result"
|
||||
"MISSING_BYOK_CREDENTIAL"
|
||||
)
|
||||
|
||||
# a2a_completion_error_marker <agent_text>
|
||||
# Echoes the first error-as-text marker found in <agent_text> (case-
|
||||
# insensitive), or nothing if clean. Exit 0 if a marker matched, 1 if not.
|
||||
# Pure string scan — no LLM, no network — so it is deterministic and is the
|
||||
# unit under the fail-direction proof in test_completion_assert_unit.sh.
|
||||
a2a_completion_error_marker() {
|
||||
local text="$1"
|
||||
local upper marker
|
||||
upper=$(printf '%s' "$text" | tr '[:lower:]' '[:upper:]')
|
||||
for marker in "${A2A_ERROR_AS_TEXT_MARKERS[@]}"; do
|
||||
if printf '%s' "$upper" | grep -qF -- "$(printf '%s' "$marker" | tr '[:lower:]' '[:upper:]')"; then
|
||||
printf '%s' "$marker"
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# a2a_assert_real_completion <agent_text> <expected_token> <context_label>
|
||||
# The CORE gate. Asserts the agent text:
|
||||
# (a) does NOT contain any error-as-text marker (broken-agent trap), AND
|
||||
# (b) CONTAINS <expected_token> (case-insensitive) — proving a real LLM
|
||||
# round-trip produced the deterministic known answer.
|
||||
# Calls fail() (which exits) on either violation. This MUST fail on an
|
||||
# error-as-text payload — that is the property test_completion_assert_unit.sh
|
||||
# pins.
|
||||
a2a_assert_real_completion() {
|
||||
local text="$1"
|
||||
local expected="$2"
|
||||
local ctx="${3:-A2A}"
|
||||
|
||||
if [ -z "$text" ]; then
|
||||
fail "$ctx — real-completion gate: agent returned EMPTY text (no round-trip)."
|
||||
fi
|
||||
|
||||
local hit
|
||||
if hit=$(a2a_completion_error_marker "$text"); then
|
||||
fail "$ctx — real-completion gate: agent returned an ERROR-AS-TEXT payload (matched '$hit'). A broken agent that surfaces its error as a text part is NOT a completed round-trip. This is the trap the shape-only check missed (#1994). Raw: ${text:0:200}"
|
||||
fi
|
||||
|
||||
# Known-answer: real LLM round-trip yields the deterministic token. A
|
||||
# prompt-echo / truncated-context / wrong-auth pipeline won't.
|
||||
if ! printf '%s' "$text" | tr '[:lower:]' '[:upper:]' | grep -qF -- "$(printf '%s' "$expected" | tr '[:lower:]' '[:upper:]')"; then
|
||||
fail "$ctx — real-completion gate: reply did NOT contain expected known-answer token '$expected'. The channel returned a text shape but no real completion. Raw: ${text:0:200}"
|
||||
fi
|
||||
|
||||
ok "$ctx — real completion verified (contains '$expected', no error-as-text). Reply: \"${text:0:80}\""
|
||||
}
|
||||
|
||||
# offered_platform_models_for_runtime <runtime>
|
||||
# Emits, one per line, the platform-servable model ids the providers.yaml
|
||||
# SSOT (runtimes.<runtime>.providers[name=platform].models) declares for
|
||||
# <runtime>. This is the SSOT-driven offered/platform-servable matrix — NOT
|
||||
# a hardcoded provider list — so a provider added/removed in providers.yaml
|
||||
# automatically changes the matrix this probe exercises.
|
||||
#
|
||||
# Reads the embedded copy at workspace-server/internal/providers/providers.yaml
|
||||
# (the same file go:embed compiles into the binary). Requires python3 +
|
||||
# PyYAML (already a test-harness dep). On parse failure, emits nothing and
|
||||
# returns 1 so the caller can fail loud rather than silently skip.
|
||||
offered_platform_models_for_runtime() {
|
||||
local runtime="$1"
|
||||
local yaml_path="${PROVIDERS_YAML_PATH:-}"
|
||||
if [ -z "$yaml_path" ]; then
|
||||
# This lib lives at tests/e2e/lib/ -> repo root is three dirs up
|
||||
# (lib -> e2e -> tests -> repo-root).
|
||||
yaml_path="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/workspace-server/internal/providers/providers.yaml"
|
||||
fi
|
||||
if [ ! -f "$yaml_path" ]; then
|
||||
log " [provider-matrix] providers.yaml SSOT not found at $yaml_path"
|
||||
return 1
|
||||
fi
|
||||
RUNTIME_REF="$runtime" python3 - "$yaml_path" <<'PY'
|
||||
import os, sys
|
||||
try:
|
||||
import yaml
|
||||
except Exception as e: # PyYAML missing — fail loud, do not silently skip.
|
||||
sys.stderr.write(f"PyYAML required for provider-matrix SSOT read: {e}\n")
|
||||
sys.exit(2)
|
||||
rt = os.environ["RUNTIME_REF"]
|
||||
with open(sys.argv[1]) as f:
|
||||
doc = yaml.safe_load(f)
|
||||
native = (doc.get("runtimes") or {}).get(rt) or {}
|
||||
for pref in native.get("providers", []) or []:
|
||||
if pref.get("name") == "platform":
|
||||
for m in pref.get("models", []) or []:
|
||||
print(m)
|
||||
PY
|
||||
}
|
||||
|
||||
# provider_liveness_matrix <runtime> <probe_fn>
|
||||
# For each platform-servable model the SSOT lists for <runtime>, calls
|
||||
# <probe_fn> <model_id> which must echo the agent text (or empty) and return
|
||||
# 0 on a non-error completion, non-zero otherwise. Logs a per-model pass/fail
|
||||
# matrix. Returns 0 only if EVERY probed model produced a non-error
|
||||
# completion; non-zero (and a recorded matrix) otherwise.
|
||||
#
|
||||
# Purpose: exercise each offered provider's AUTH + ROUTING path so a drained
|
||||
# key / wrong base-URL / byok-misroute fails the gate (the #1994 class). The
|
||||
# probe_fn is expected to use minimal max_tokens.
|
||||
#
|
||||
# This helper does the SSOT read + matrix bookkeeping; the host script
|
||||
# supplies probe_fn (it owns workspace ids + tenant_call wiring).
|
||||
provider_liveness_matrix() {
|
||||
local runtime="$1"
|
||||
local probe_fn="$2"
|
||||
local models model rc total=0 passed=0
|
||||
local -a results=()
|
||||
|
||||
models=$(offered_platform_models_for_runtime "$runtime") || {
|
||||
fail "provider-liveness: could not read offered-provider matrix from providers.yaml SSOT for runtime=$runtime"
|
||||
}
|
||||
if [ -z "$models" ]; then
|
||||
log " [provider-matrix] runtime=$runtime offers no platform-servable models in the SSOT — nothing to probe (not a failure)."
|
||||
return 0
|
||||
fi
|
||||
|
||||
log " [provider-matrix] SSOT offered platform models for $runtime:"
|
||||
while IFS= read -r model; do
|
||||
[ -z "$model" ] && continue
|
||||
log " - $model"
|
||||
done <<<"$models"
|
||||
|
||||
while IFS= read -r model; do
|
||||
[ -z "$model" ] && continue
|
||||
total=$((total + 1))
|
||||
set +e
|
||||
"$probe_fn" "$model"
|
||||
rc=$?
|
||||
set -e
|
||||
if [ "$rc" = "0" ]; then
|
||||
passed=$((passed + 1))
|
||||
results+=("PASS $model")
|
||||
elif [ "$rc" = "75" ]; then
|
||||
# 75 (EX_TEMPFAIL convention) = probe skipped (key/runtime not
|
||||
# available in this lane). Not counted toward pass/fail — logged.
|
||||
total=$((total - 1))
|
||||
results+=("SKIP $model (probe unavailable in this lane)")
|
||||
else
|
||||
results+=("FAIL $model")
|
||||
fi
|
||||
done <<<"$models"
|
||||
|
||||
log " [provider-matrix] result matrix (runtime=$runtime):"
|
||||
local line
|
||||
for line in "${results[@]}"; do
|
||||
log " $line"
|
||||
done
|
||||
log " [provider-matrix] $passed/$total probed providers completed without error"
|
||||
|
||||
if [ "$passed" != "$total" ]; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# assert_byok_not_platform_proxy <billing_mode_json> <context_label>
|
||||
# #1994 regression guard. Given the JSON body from
|
||||
# GET /admin/workspaces/:id/llm-billing-mode (same derived resolver the
|
||||
# provision-time strip gate uses), asserts the workspace resolves to BYOK
|
||||
# and NOT platform_managed. A regression of #1994 (byok workspace baked to
|
||||
# platform_managed → routed through the platform proxy → platform LLM key
|
||||
# drained) flips resolved_mode to "platform_managed" and trips this gate.
|
||||
# Calls fail() (exits) on violation.
|
||||
assert_byok_not_platform_proxy() {
|
||||
local body="$1"
|
||||
local ctx="${2:-byok-guard}"
|
||||
local mode prov
|
||||
mode=$(printf '%s' "$body" | python3 -c "import json,sys
|
||||
try: print(json.load(sys.stdin).get('resolved_mode',''))
|
||||
except Exception: print('')" 2>/dev/null || echo "")
|
||||
prov=$(printf '%s' "$body" | python3 -c "import json,sys
|
||||
try:
|
||||
d=json.load(sys.stdin); v=d.get('provider_selection')
|
||||
print(v if v is not None else '')
|
||||
except Exception: print('')" 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$mode" ]; then
|
||||
fail "$ctx — byok-routing guard: could not read resolved_mode from billing-mode response. Raw: ${body:0:200}"
|
||||
fi
|
||||
if [ "$mode" = "platform_managed" ]; then
|
||||
fail "$ctx — byok-routing guard TRIPPED (#1994 regression): a byok-configured workspace resolved to 'platform_managed' (provider_selection=$prov) → it would route through the platform proxy and drain the platform LLM key. Expected resolved_mode=byok. Raw: ${body:0:200}"
|
||||
fi
|
||||
if [ "$mode" != "byok" ]; then
|
||||
fail "$ctx — byok-routing guard: unexpected resolved_mode='$mode' (expected 'byok'). provider_selection=$prov. Raw: ${body:0:200}"
|
||||
fi
|
||||
ok "$ctx — byok-routing guard: workspace resolves byok (provider_selection=$prov), NOT platform-proxy. #1994 stays fixed."
|
||||
}
|
||||
@@ -8,34 +8,6 @@ TIMEOUT="${A2A_TIMEOUT:-120}" # seconds per A2A call (override via A2A_TIMEOUT
|
||||
|
||||
# shellcheck source=_lib.sh
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
# molecule-core#1995 (#1994 follow-on): real-completion assertion helpers.
|
||||
# Adds a NEGATIVE error-as-text check on top of the shape checks below, so a
|
||||
# broken agent that returns its error AS a text part
|
||||
# ({"kind":"text","text":"Agent error (Exception) ..."}) — which STILL
|
||||
# matches the shape check `"kind":"text"` — now FAILS instead of passing.
|
||||
# shellcheck source=lib/completion_assert.sh
|
||||
source "$(dirname "$0")/lib/completion_assert.sh"
|
||||
|
||||
# check_no_error_as_text <desc> <agent_text>
|
||||
# Additive negative gate: PASS only if the agent text carries NO
|
||||
# error-as-text marker (Agent error / Exception / error result /
|
||||
# MISSING_BYOK_CREDENTIAL). Uses the same scanner as the staging
|
||||
# real-completion gate so the trap is closed consistently across lanes.
|
||||
check_no_error_as_text() {
|
||||
local desc="$1"
|
||||
local text="$2"
|
||||
local hit
|
||||
if hit=$(a2a_completion_error_marker "$text"); then
|
||||
echo "FAIL: $desc"
|
||||
echo " agent returned an error-AS-text payload (matched '$hit') — a broken"
|
||||
echo " agent that surfaces its error as a text part is NOT a real reply."
|
||||
echo " got: $(echo "$text" | head -3)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: $desc"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check() {
|
||||
local desc="$1"
|
||||
@@ -109,8 +81,6 @@ check "JSON-RPC response has result" '"result"' "$R"
|
||||
check "Response has agent role" '"role":"agent"' "$R"
|
||||
check "Response has text part" '"kind":"text"' "$R"
|
||||
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
|
||||
# Negative gate (#1994): the text part must not BE an error.
|
||||
check_no_error_as_text "Echo reply is not an error-as-text payload" "$TEXT"
|
||||
echo " Agent said: $TEXT"
|
||||
echo ""
|
||||
|
||||
@@ -122,11 +92,6 @@ R=$(a2a_send "$SEO_ID" "What SEO skills do you have?")
|
||||
check "SEO agent responds" '"result"' "$R"
|
||||
check "SEO response has text" '"kind":"text"' "$R"
|
||||
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
|
||||
# Negative gate (#1994): a broken SEO agent that returns "Agent error
|
||||
# (Exception) ..." AS text still matches the `"kind":"text"` shape check
|
||||
# above — THAT is the gap that let drained-key/byok-misroute failures pass
|
||||
# CI. This makes that case FAIL.
|
||||
check_no_error_as_text "SEO reply is not an error-as-text payload" "$TEXT"
|
||||
echo " SEO Agent said: $TEXT"
|
||||
echo ""
|
||||
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fail-direction / load-bearing proof for lib/completion_assert.sh.
|
||||
#
|
||||
# This is the watch-it-FAIL counterpart the dev-SOP Phase 3 requires: it
|
||||
# proves the new real-completion + byok gates actually CATCH a broken agent,
|
||||
# not just pass on a good one. It runs entirely offline (no LLM, no network,
|
||||
# no provisioning) — pure assertion logic — so it can run on every PR in the
|
||||
# fast lane (e2e-api.yml unit-shell step) and locally via `bash`.
|
||||
#
|
||||
# The decisive case is `error-as-text payload MUST FAIL`: that is the exact
|
||||
# trap (#1994) the historical shape-only check missed. If a refactor weakens
|
||||
# a2a_assert_real_completion to a substring/shape check, THIS test goes red.
|
||||
set -uo pipefail
|
||||
|
||||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# Minimal stand-ins for the host script's helpers. fail() must NOT exit the
|
||||
# whole harness here — we want to assert that it WAS called. We trap it by
|
||||
# running the assertion in a subshell and checking the subshell's exit code:
|
||||
# the real fail() exits 1, ok() exits 0 implicitly.
|
||||
log() { echo "[unit] $*"; }
|
||||
ok() { echo "[unit] OK: $*"; }
|
||||
fail() { echo "[unit] FAIL-CALLED: $*" >&2; exit 1; }
|
||||
|
||||
# shellcheck source=lib/completion_assert.sh
|
||||
source "$HERE/lib/completion_assert.sh"
|
||||
|
||||
expect_pass() {
|
||||
local desc="$1"; shift
|
||||
if ( "$@" ) >/dev/null 2>&1; then
|
||||
echo "PASS: $desc (assertion accepted, as expected)"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: $desc — expected the assertion to ACCEPT, but it rejected"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
expect_fail() {
|
||||
local desc="$1"; shift
|
||||
if ( "$@" ) >/dev/null 2>&1; then
|
||||
echo "FAIL: $desc — expected the assertion to REJECT, but it accepted (gate NOT load-bearing!)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: $desc (assertion rejected, as expected)"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== completion_assert.sh fail-direction proof ==="
|
||||
|
||||
# ---- a2a_assert_real_completion ----
|
||||
# Good: real known-answer reply passes.
|
||||
expect_pass "real PINEAPPLE reply passes" \
|
||||
a2a_assert_real_completion "PINEAPPLE" "PINEAPPLE" "unit"
|
||||
expect_pass "case-insensitive known answer passes" \
|
||||
a2a_assert_real_completion "pineapple" "PINEAPPLE" "unit"
|
||||
expect_pass "known answer with minor wrapping passes" \
|
||||
a2a_assert_real_completion "Sure: PINEAPPLE" "PINEAPPLE" "unit"
|
||||
|
||||
# DECISIVE: the error-as-text trap. Each MUST fail — these are the payloads a
|
||||
# broken agent returns that the old shape-only `"kind":"text"` check passed.
|
||||
expect_fail "Agent error as text payload MUST fail" \
|
||||
a2a_assert_real_completion "Agent error (Exception) — see workspace logs for details." "PINEAPPLE" "unit"
|
||||
expect_fail "bare Exception as text MUST fail" \
|
||||
a2a_assert_real_completion "Traceback ... Exception: boom" "PINEAPPLE" "unit"
|
||||
expect_fail "error result as text MUST fail" \
|
||||
a2a_assert_real_completion "tool returned error result" "PINEAPPLE" "unit"
|
||||
expect_fail "MISSING_BYOK_CREDENTIAL as text MUST fail" \
|
||||
a2a_assert_real_completion "MISSING_BYOK_CREDENTIAL: set your own key" "PINEAPPLE" "unit"
|
||||
# Error-as-text that ALSO happens to contain the token still fails (error
|
||||
# marker takes precedence — a real completion never carries these markers).
|
||||
expect_fail "error-as-text containing the token still fails" \
|
||||
a2a_assert_real_completion "Agent error: could not produce PINEAPPLE" "PINEAPPLE" "unit"
|
||||
# Empty text fails.
|
||||
expect_fail "empty text fails" \
|
||||
a2a_assert_real_completion "" "PINEAPPLE" "unit"
|
||||
# Wrong/echoed content (no token, no error) fails — shape-OK but not a real
|
||||
# completion.
|
||||
expect_fail "wrong content without token fails" \
|
||||
a2a_assert_real_completion "Reply with exactly the word PINEAPPLE and nothing else." "BANANA" "unit"
|
||||
|
||||
# ---- assert_byok_not_platform_proxy (#1994 guard) ----
|
||||
expect_pass "byok resolution passes the guard" \
|
||||
assert_byok_not_platform_proxy '{"resolved_mode":"byok","provider_selection":"minimax","source":"derived_provider"}' "unit"
|
||||
# DECISIVE: a platform_managed resolution on a byok workspace = the #1994
|
||||
# regression. MUST fail.
|
||||
expect_fail "platform_managed resolution trips the #1994 guard" \
|
||||
assert_byok_not_platform_proxy '{"resolved_mode":"platform_managed","provider_selection":"platform","source":"derived_provider"}' "unit"
|
||||
expect_fail "missing resolved_mode trips the guard" \
|
||||
assert_byok_not_platform_proxy '{"provider_selection":"x"}' "unit"
|
||||
expect_fail "disabled mode trips the guard (not byok)" \
|
||||
assert_byok_not_platform_proxy '{"resolved_mode":"disabled"}' "unit"
|
||||
|
||||
# ---- a2a_completion_error_marker (the scanner under the gate) ----
|
||||
if hit=$(a2a_completion_error_marker "all good PINEAPPLE"); then
|
||||
echo "FAIL: clean text wrongly flagged as error marker ($hit)"; FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo "PASS: clean text has no error marker"; PASS=$((PASS + 1))
|
||||
fi
|
||||
if hit=$(a2a_completion_error_marker "An Exception occurred"); then
|
||||
echo "PASS: error marker detected ($hit)"; PASS=$((PASS + 1))
|
||||
else
|
||||
echo "FAIL: error marker NOT detected in 'An Exception occurred'"; FAIL=$((FAIL + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -99,12 +99,6 @@ source "$(dirname "$0")/lib/model_slug.sh"
|
||||
# shellcheck disable=SC1091
|
||||
# shellcheck source=lib/aws_leak_check.sh
|
||||
source "$(dirname "$0")/lib/aws_leak_check.sh"
|
||||
# shellcheck disable=SC1091
|
||||
# shellcheck source=lib/completion_assert.sh
|
||||
# molecule-core#1995 (#1994 follow-on): real-completion + per-provider
|
||||
# liveness + byok-routing assertion helpers. Adds gates that FAIL on an
|
||||
# error-as-text payload (the trap the shape-only A2A checks missed).
|
||||
source "$(dirname "$0")/lib/completion_assert.sh"
|
||||
|
||||
CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
E2E_TMP_FILES=()
|
||||
@@ -873,182 +867,6 @@ fi
|
||||
|
||||
ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""
|
||||
|
||||
# ─── 8b. Real-completion known-answer round-trip (CORE GATE, #1994) ────
|
||||
# The existing PONG check + generic error grep above already do a lot, but
|
||||
# this stanza is the canonical real-completion gate the #1994 follow-on
|
||||
# adds: a DETERMINISTIC known-answer prompt asserted via
|
||||
# a2a_assert_real_completion, which FAILS on an error-as-text payload
|
||||
# ({"kind":"text","text":"Agent error (Exception) ..."}). That payload
|
||||
# matches the historical shape-only check `"kind":"text"` and so passed CI
|
||||
# on a fully broken agent (drained-key / byok-misroute, 2026-05-2x). This
|
||||
# gate makes that case RED. Reuses the same cold-start retry-on-transient
|
||||
# (502/503/504) loop the PONG probe uses — retry-once-on-network, never on
|
||||
# agent-error. Single round-trip → the one place we spend a non-trivial
|
||||
# token budget (default backend MiniMax — cheap token plan).
|
||||
KA_PAYLOAD=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'message/send',
|
||||
'id': 'e2e-known-answer-1',
|
||||
'params': {
|
||||
'message': {
|
||||
'role': 'user',
|
||||
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'Reply with exactly the word PINEAPPLE and nothing else.'}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
")
|
||||
KA_TMP=$(mktemp -t known_answer_a2a.XXXXXX)
|
||||
KA_RESP=""
|
||||
for KA_ATTEMPT in $(seq 1 6); do
|
||||
: >"$KA_TMP"
|
||||
set +e
|
||||
KA_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$KA_PAYLOAD" \
|
||||
-o "$KA_TMP" \
|
||||
-w '%{http_code}' \
|
||||
2>/dev/null)
|
||||
KA_RC=$?
|
||||
set -e
|
||||
KA_CODE=${KA_CODE:-000}
|
||||
KA_RESP=$(cat "$KA_TMP" 2>/dev/null || echo "")
|
||||
if [ "$KA_RC" = "0" ] && [ "$KA_CODE" -ge 200 ] && [ "$KA_CODE" -lt 300 ]; then
|
||||
break
|
||||
fi
|
||||
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
|
||||
# Retry ONLY on transient transport errors — never on an agent-level
|
||||
# error (those must surface and fail the gate).
|
||||
if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
|
||||
log " known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
|
||||
if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
|
||||
fi
|
||||
break
|
||||
done
|
||||
rm -f "$KA_TMP"
|
||||
if [ "$KA_RC" != "0" ] || [ "$KA_CODE" -lt 200 ] || [ "$KA_CODE" -ge 300 ]; then
|
||||
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
|
||||
fail "Known-answer A2A POST failed after $KA_ATTEMPT attempt(s) (curl_rc=$KA_RC, http=$KA_CODE): $KA_SAFE_BODY"
|
||||
fi
|
||||
KA_TEXT=$(echo "$KA_RESP" | python3 -c "
|
||||
import json, sys
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
parts = d.get('result', {}).get('parts', [])
|
||||
print(parts[0].get('text', '') if parts else '')
|
||||
except Exception:
|
||||
print('')
|
||||
" 2>/dev/null || echo "")
|
||||
# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
|
||||
a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
|
||||
|
||||
# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
|
||||
# The parent was provisioned with the customer's OWN vendor key
|
||||
# (MINIMAX_API_KEY / ANTHROPIC_API_KEY in SECRETS_JSON) → it must resolve
|
||||
# BYOK, not platform_managed. #1994 was exactly the inverse: a byok
|
||||
# workspace baked platform_managed on (re-)provision → routed through the
|
||||
# platform proxy → drained the platform LLM key. We read the SAME derived
|
||||
# resolver the provision-time strip gate uses
|
||||
# (GET /admin/workspaces/:id/llm-billing-mode) and assert resolved_mode!=
|
||||
# platform_managed. A regression flips it RED.
|
||||
#
|
||||
# Only meaningful when the parent actually carries a byok credential; the
|
||||
# OpenAI/hermes path uses a different env shape, and the no-key path is
|
||||
# legitimately platform_managed (the CTO default). Gate on the same
|
||||
# E2E_*_API_KEY presence the SECRETS_JSON branch keyed off.
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ] || [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
set +e
|
||||
BILLING_RESP=$(tenant_call GET "/admin/workspaces/$PARENT_ID/llm-billing-mode" 2>/dev/null)
|
||||
BILLING_RC=$?
|
||||
set -e
|
||||
if [ "$BILLING_RC" != "0" ] || [ -z "$BILLING_RESP" ]; then
|
||||
fail "byok-routing guard: GET /admin/workspaces/$PARENT_ID/llm-billing-mode failed (rc=$BILLING_RC). Body: ${BILLING_RESP:0:200}"
|
||||
fi
|
||||
assert_byok_not_platform_proxy "$BILLING_RESP" "byok-guard (parent, $RUNTIME/$MODEL_SLUG)"
|
||||
else
|
||||
log "8c. byok-routing guard skipped — parent carries no own-vendor key (OpenAI/no-key path is legitimately platform_managed)."
|
||||
fi
|
||||
|
||||
# ─── 8d. Per-offered-provider liveness matrix (SSOT-driven, #1994 class) ─
|
||||
# For each platform-servable model the providers.yaml SSOT
|
||||
# (runtimes.<runtime>.providers[platform].models) declares for this
|
||||
# runtime, send a minimal max_tokens-bounded "say ok" probe and assert a
|
||||
# NON-ERROR completion. Purpose: exercise each offered provider's AUTH +
|
||||
# ROUTING path so a drained key / wrong base-URL / byok-misroute fails the
|
||||
# gate (the #1994 class). Providers/models come from the SSOT — not a
|
||||
# hardcoded list — so the matrix tracks providers.yaml automatically.
|
||||
#
|
||||
# This lane provisions ONE parent workspace with ONE configured key, so we
|
||||
# can only truly drive the providers that key authenticates. Probing a
|
||||
# model whose provider key is absent in this lane is reported SKIP (rc=75),
|
||||
# not FAIL — keeping the gate deterministic + low-flake. The matrix still
|
||||
# proves the configured provider's full auth+routing path end-to-end, and
|
||||
# logs the offered set so over/under-offer drift is visible in the CI log.
|
||||
provider_liveness_probe() {
|
||||
local model_id="$1"
|
||||
# Map the SSOT platform model id (e.g. minimax/MiniMax-M2.7) to the
|
||||
# vendor namespace token to decide whether THIS lane has its key.
|
||||
local vendor="${model_id%%/*}"
|
||||
case "$vendor" in
|
||||
minimax) [ -n "${E2E_MINIMAX_API_KEY:-}" ] || return 75 ;;
|
||||
anthropic) [ -n "${E2E_ANTHROPIC_API_KEY:-}" ] || return 75 ;;
|
||||
openai) [ -n "${E2E_OPENAI_API_KEY:-}" ] || return 75 ;;
|
||||
*) return 75 ;; # kimi/moonshot etc. — no key wired in this lane
|
||||
esac
|
||||
local probe_payload
|
||||
probe_payload=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'message/send',
|
||||
'id': 'e2e-liveness-' + uuid.uuid4().hex[:6],
|
||||
'params': {
|
||||
'message': {
|
||||
'role': 'user',
|
||||
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
|
||||
},
|
||||
'configuration': {'max_tokens': 4}
|
||||
}
|
||||
}))
|
||||
")
|
||||
local tmp code rc resp
|
||||
tmp=$(mktemp -t liveness_a2a.XXXXXX)
|
||||
set +e
|
||||
code=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 60 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$probe_payload" \
|
||||
-o "$tmp" -w '%{http_code}' 2>/dev/null)
|
||||
rc=$?
|
||||
set -e
|
||||
resp=$(cat "$tmp" 2>/dev/null || echo "")
|
||||
rm -f "$tmp"
|
||||
if [ "$rc" != "0" ] || [ "${code:-000}" -lt 200 ] || [ "${code:-000}" -ge 300 ]; then
|
||||
log " probe $model_id: HTTP ${code:-000} rc=$rc"
|
||||
return 1
|
||||
fi
|
||||
local text
|
||||
text=$(echo "$resp" | python3 -c "
|
||||
import json,sys
|
||||
try:
|
||||
d=json.load(sys.stdin); p=d.get('result',{}).get('parts',[])
|
||||
print(p[0].get('text','') if p else '')
|
||||
except Exception: print('')" 2>/dev/null || echo "")
|
||||
if [ -z "$text" ] || a2a_completion_error_marker "$text" >/dev/null; then
|
||||
log " probe $model_id: error-as-text or empty: ${text:0:120}"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
if ! provider_liveness_matrix "$RUNTIME" provider_liveness_probe; then
|
||||
fail "Per-provider liveness matrix: at least one offered provider failed its auth+routing probe (see matrix above). This is the #1994 class — a drained key / wrong base-URL / byok-misroute."
|
||||
fi
|
||||
ok "Per-provider liveness matrix passed (all probed offered providers completed without error)"
|
||||
|
||||
# ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
|
||||
if [ "$MODE" = "full" ]; then
|
||||
log "9/11 Writing + reading HMA memory on parent..."
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
#
|
||||
# Invocation (from template-hermes repo's CI):
|
||||
#
|
||||
# bash /path/to/molecule-core/tools/check-template-parity.sh \
|
||||
# bash /path/to/molecule-monorepo/tools/check-template-parity.sh \
|
||||
# install.sh start.sh
|
||||
#
|
||||
# Or inline via curl:
|
||||
|
||||
@@ -1,271 +0,0 @@
|
||||
// Command gen-providers is the codegen half of the provider-registry SSOT
|
||||
// machinery on the molecule-core side (internal#718 P2-A, CTO 2026-05-27
|
||||
// "Distribution = SDK via codegen + verify-CI"). It is the byte-for-byte mirror
|
||||
// of molecule-controlplane's cmd/gen-providers (the canonical generator). It
|
||||
// reads core's SYNCED COPY of the schema — internal/providers/providers.yaml
|
||||
// (via the providers loader, so it shares the SAME parse + validation as the
|
||||
// runtime) — and emits a checked-in Go artifact:
|
||||
//
|
||||
// internal/providers/gen/registry_gen.go
|
||||
//
|
||||
// The artifact is a deterministic projection of the merged registry: the
|
||||
// provider catalog + per-runtime native sets as Go literals, plus the schema
|
||||
// version and a content fingerprint. It is core's leaf of the multi-language SDK
|
||||
// layer the RFC calls for (Go(CP+core)/TS(canvas)/Python(adapters)).
|
||||
//
|
||||
// CONTRACT for P2-A (zero behavior change): the generated artifact is
|
||||
// checked-in + drift-gated ONLY. NO production code path imports
|
||||
// internal/providers/gen — the gen-import-boundary test pins that. P2-B wires
|
||||
// the billing/credential decision onto the LOADER (DeriveProvider/IsPlatform),
|
||||
// not the raw gen literals. The generator is the build-time half;
|
||||
// verify-providers-gen.yml is the CI half that regenerates and fails RED on any
|
||||
// diff (drift or hand-edit); sync-providers-yaml.yml gates the synced copy
|
||||
// against the controlplane canonical.
|
||||
//
|
||||
// Usage:
|
||||
//
|
||||
// go run ./cmd/gen-providers # write the artifact in place
|
||||
// go run ./cmd/gen-providers -check # exit non-zero if the on-disk
|
||||
// # artifact differs from a fresh gen
|
||||
// # (the CI drift gate)
|
||||
// go run ./cmd/gen-providers -o PATH # write to a specific path
|
||||
//
|
||||
//go:generate go run ../gen-providers -o ../../internal/providers/gen/registry_gen.go
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/format"
|
||||
"os"
|
||||
"sort"
|
||||
"strconv"
|
||||
"text/template"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
// defaultOutPath is the checked-in artifact location, relative to the repo
|
||||
// root (the directory `go run ./cmd/gen-providers` is invoked from).
|
||||
const defaultOutPath = "internal/providers/gen/registry_gen.go"
|
||||
|
||||
func main() {
|
||||
var (
|
||||
outPath string
|
||||
check bool
|
||||
)
|
||||
flag.StringVar(&outPath, "o", defaultOutPath, "output path for the generated artifact")
|
||||
flag.BoolVar(&check, "check", false, "verify the on-disk artifact matches a fresh generation; exit 1 on drift")
|
||||
flag.Parse()
|
||||
|
||||
generated, err := render()
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if check {
|
||||
existing, err := os.ReadFile(outPath)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers -check: cannot read %s: %v\n", outPath, err)
|
||||
fmt.Fprintln(os.Stderr, "Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.")
|
||||
os.Exit(1)
|
||||
}
|
||||
if !bytes.Equal(existing, generated) {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers -check: DRIFT — %s is out of sync with providers.yaml.\n", outPath)
|
||||
fmt.Fprintln(os.Stderr, "The generated artifact was hand-edited or providers.yaml changed without regen.")
|
||||
fmt.Fprintln(os.Stderr, "Fix: run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit.")
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Println("gen-providers -check: OK — artifact in sync with providers.yaml")
|
||||
return
|
||||
}
|
||||
|
||||
if err := os.WriteFile(outPath, generated, 0o644); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "gen-providers: write %s: %v\n", outPath, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("gen-providers: wrote %s\n", outPath)
|
||||
}
|
||||
|
||||
// render loads the manifest and produces the gofmt'd artifact bytes.
|
||||
func render() ([]byte, error) {
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load manifest: %w", err)
|
||||
}
|
||||
|
||||
// Deterministic ordering: providers in catalog order is already stable
|
||||
// (slice). Runtimes is a map — sort its keys so the artifact is
|
||||
// reproducible regardless of Go map iteration order.
|
||||
runtimeNames := make([]string, 0, len(m.Runtimes))
|
||||
for rt := range m.Runtimes {
|
||||
runtimeNames = append(runtimeNames, rt)
|
||||
}
|
||||
sort.Strings(runtimeNames)
|
||||
|
||||
type genProvider struct {
|
||||
Name string
|
||||
DisplayName string
|
||||
Protocol string
|
||||
AuthMode string
|
||||
AuthEnv []string
|
||||
ModelPrefixMatch string
|
||||
IsPlatform bool
|
||||
// UpstreamVendor is the proxy's upstream-vendor key for this entry
|
||||
// (internal#718 P1, CONVERGED) — empty for entries the proxy does not
|
||||
// route to an upstream. A plain scalar (no pointer), so both the rendered
|
||||
// literal and the fingerprint stay deterministic.
|
||||
UpstreamVendor string
|
||||
}
|
||||
type genRef struct {
|
||||
Name string
|
||||
Models []string
|
||||
}
|
||||
type genRuntime struct {
|
||||
Name string
|
||||
Providers []genRef
|
||||
}
|
||||
|
||||
data := struct {
|
||||
SchemaVersion int
|
||||
Fingerprint string
|
||||
Providers []genProvider
|
||||
Runtimes []genRuntime
|
||||
}{
|
||||
SchemaVersion: providers.SchemaVersion(),
|
||||
}
|
||||
|
||||
for _, p := range m.Providers {
|
||||
gp := genProvider{
|
||||
Name: p.Name,
|
||||
DisplayName: p.DisplayName,
|
||||
Protocol: string(p.Protocol),
|
||||
AuthMode: p.AuthMode,
|
||||
AuthEnv: p.AuthEnv,
|
||||
ModelPrefixMatch: p.ModelPrefixMatch,
|
||||
IsPlatform: p.IsPlatform(),
|
||||
UpstreamVendor: p.UpstreamVendor,
|
||||
}
|
||||
data.Providers = append(data.Providers, gp)
|
||||
}
|
||||
for _, rt := range runtimeNames {
|
||||
native := m.Runtimes[rt]
|
||||
gr := genRuntime{Name: rt}
|
||||
for _, ref := range native.Providers {
|
||||
gr.Providers = append(gr.Providers, genRef{Name: ref.Name, Models: ref.Models})
|
||||
}
|
||||
data.Runtimes = append(data.Runtimes, gr)
|
||||
}
|
||||
|
||||
// Fingerprint pins the artifact to the data it was generated from. It is
|
||||
// derived from the structured projection (schema version + providers +
|
||||
// runtimes), NOT the raw YAML bytes, so a comment-only YAML edit does not
|
||||
// churn the artifact while any data change does.
|
||||
data.Fingerprint = fingerprint(data.SchemaVersion, data.Providers, data.Runtimes)
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := artifactTmpl.Execute(&buf, data); err != nil {
|
||||
return nil, fmt.Errorf("execute template: %w", err)
|
||||
}
|
||||
formatted, err := format.Source(buf.Bytes())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gofmt generated source: %w\n----\n%s", err, buf.String())
|
||||
}
|
||||
return formatted, nil
|
||||
}
|
||||
|
||||
// fingerprint is a stable content hash of the structured projection. Any
|
||||
// fields below this function references must be kept in sync with the
|
||||
// template's emitted data so the hash and the literals never diverge.
|
||||
func fingerprint(schema int, provs any, runtimes any) string {
|
||||
h := sha256.New()
|
||||
fmt.Fprintf(h, "schema=%d\n", schema)
|
||||
fmt.Fprintf(h, "%#v\n%#v\n", provs, runtimes)
|
||||
return hex.EncodeToString(h.Sum(nil))[:16]
|
||||
}
|
||||
|
||||
func quote(s string) string { return strconv.Quote(s) }
|
||||
|
||||
func quoteSlice(ss []string) string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString("[]string{")
|
||||
for i, s := range ss {
|
||||
if i > 0 {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
b.WriteString(strconv.Quote(s))
|
||||
}
|
||||
b.WriteString("}")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
var artifactTmpl = template.Must(template.New("artifact").Funcs(template.FuncMap{
|
||||
"quote": quote,
|
||||
"quoteSlice": quoteSlice,
|
||||
}).Parse(`// Code generated by cmd/gen-providers; DO NOT EDIT.
|
||||
//
|
||||
// Source of truth: internal/providers/providers.yaml (schema_version {{.SchemaVersion}}).
|
||||
// Regenerate with: go generate ./... (or: go run ./cmd/gen-providers)
|
||||
// The verify-providers-gen CI workflow fails RED if this file drifts from
|
||||
// providers.yaml or is hand-edited. internal#718 P0 — checked-in + drift-
|
||||
// gated ONLY; no production path imports this package yet (that is P1+).
|
||||
|
||||
package gen
|
||||
|
||||
// SchemaVersion is the providers.yaml schema this artifact was generated
|
||||
// against. It is the semver'd contract version (the MAJOR component for the
|
||||
// public extension contract; see internal/providers/README.md).
|
||||
const SchemaVersion = {{.SchemaVersion}}
|
||||
|
||||
// Fingerprint is a stable content hash of the generated projection (schema
|
||||
// version + provider catalog + runtime native sets). It changes iff the
|
||||
// registry DATA changes (comment-only YAML edits do not churn it).
|
||||
const Fingerprint = {{quote .Fingerprint}}
|
||||
|
||||
// GenProvider is the generated projection of one provider catalog entry —
|
||||
// the subset a downstream consumer needs to derive + display a provider.
|
||||
type GenProvider struct {
|
||||
Name string
|
||||
DisplayName string
|
||||
Protocol string
|
||||
AuthMode string
|
||||
AuthEnv []string
|
||||
ModelPrefixMatch string
|
||||
// IsPlatform marks the closed, core-only platform-managed provider.
|
||||
IsPlatform bool
|
||||
// UpstreamVendor is the proxy's upstream-vendor key for this entry
|
||||
// (internal#718 P1, CONVERGED); empty for providers the proxy does not
|
||||
// route to an upstream vendor. ResolveUpstream maps a model id's namespace
|
||||
// token to the entry whose UpstreamVendor equals it.
|
||||
UpstreamVendor string
|
||||
}
|
||||
|
||||
// GenRuntimeRef is one native provider a runtime supports + its exact models.
|
||||
type GenRuntimeRef struct {
|
||||
Name string
|
||||
Models []string
|
||||
}
|
||||
|
||||
// Providers is the full provider catalog, in providers.yaml declaration order.
|
||||
var Providers = []GenProvider{
|
||||
{{- range .Providers}}
|
||||
{Name: {{quote .Name}}, DisplayName: {{quote .DisplayName}}, Protocol: {{quote .Protocol}}, AuthMode: {{quote .AuthMode}}, AuthEnv: {{quoteSlice .AuthEnv}}, ModelPrefixMatch: {{quote .ModelPrefixMatch}}, IsPlatform: {{.IsPlatform}}{{if .UpstreamVendor}}, UpstreamVendor: {{quote .UpstreamVendor}}{{end}}},
|
||||
{{- end}}
|
||||
}
|
||||
|
||||
// Runtimes maps each runtime to its native provider+model set, runtime names
|
||||
// sorted for a deterministic artifact.
|
||||
var Runtimes = map[string][]GenRuntimeRef{
|
||||
{{- range .Runtimes}}
|
||||
{{quote .Name}}: {
|
||||
{{- range .Providers}}
|
||||
{Name: {{quote .Name}}, Models: {{quoteSlice .Models}}},
|
||||
{{- end}}
|
||||
},
|
||||
{{- end}}
|
||||
}
|
||||
`))
|
||||
@@ -1,121 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// repoRoot walks up from the test's working dir (cmd/gen-providers) to the
|
||||
// module root so the test can locate the checked-in artifact regardless of
|
||||
// where `go test` is invoked from.
|
||||
func repoRoot(t *testing.T) string {
|
||||
t.Helper()
|
||||
dir, err := os.Getwd()
|
||||
if err != nil {
|
||||
t.Fatalf("getwd: %v", err)
|
||||
}
|
||||
for i := 0; i < 6; i++ {
|
||||
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
|
||||
return dir
|
||||
}
|
||||
dir = filepath.Dir(dir)
|
||||
}
|
||||
t.Fatal("could not locate repo root (go.mod) from cmd/gen-providers")
|
||||
return ""
|
||||
}
|
||||
|
||||
// TestArtifactInSync is the drift gate's Go-test counterpart: the checked-in
|
||||
// internal/providers/gen/registry_gen.go MUST byte-equal a fresh render. If a
|
||||
// future edit changes providers.yaml without regenerating, OR hand-edits the
|
||||
// artifact, this flips red — the same signal the verify-providers-gen CI
|
||||
// workflow emits, but caught locally by `go test ./...` too.
|
||||
func TestArtifactInSync(t *testing.T) {
|
||||
generated, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() error = %v", err)
|
||||
}
|
||||
artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
|
||||
onDisk, err := os.ReadFile(artifactPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read checked-in artifact %s: %v (run `go generate ./...` and commit)", artifactPath, err)
|
||||
}
|
||||
if !bytes.Equal(onDisk, generated) {
|
||||
t.Fatalf("DRIFT: %s is out of sync with providers.yaml.\n"+
|
||||
"Run `go generate ./...` (or `go run ./cmd/gen-providers`) and commit the result.", defaultOutPath)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDriftGateCatchesMutation is the load-bearing-gate proof (per the SOP
|
||||
// fail-direction discipline). The original P0 version was TAUTOLOGICAL
|
||||
// (internal#718 P1 review carry-over): it appended bytes to an in-memory copy
|
||||
// and asserted the copy differed from the original — true by construction,
|
||||
// touching neither the on-disk artifact nor the actual in-sync comparison the
|
||||
// gate runs. This version exercises the REAL gate: it writes a MUTATED artifact
|
||||
// to disk and re-runs the SAME comparison TestArtifactInSync / `-check` perform
|
||||
// (`render()` bytes vs the on-disk file), asserting it now reports drift — then
|
||||
// restores the original. So the test would fail if the gate were vacuous (e.g.
|
||||
// if the comparison ignored content), not merely if append changes bytes.
|
||||
func TestDriftGateCatchesMutation(t *testing.T) {
|
||||
generated, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() error = %v", err)
|
||||
}
|
||||
artifactPath := filepath.Join(repoRoot(t), defaultOutPath)
|
||||
original, err := os.ReadFile(artifactPath)
|
||||
if err != nil {
|
||||
t.Fatalf("read checked-in artifact %s: %v", artifactPath, err)
|
||||
}
|
||||
// Precondition: the tree is in sync (so the mutation is what flips the gate,
|
||||
// not pre-existing drift).
|
||||
if !bytes.Equal(original, generated) {
|
||||
t.Fatalf("precondition failed: %s already drifted from render() — run `go generate ./...`", defaultOutPath)
|
||||
}
|
||||
|
||||
// Restore the pristine artifact no matter how the test exits.
|
||||
t.Cleanup(func() {
|
||||
if err := os.WriteFile(artifactPath, original, 0o644); err != nil {
|
||||
t.Fatalf("CRITICAL: failed to restore %s after mutation: %v", artifactPath, err)
|
||||
}
|
||||
})
|
||||
|
||||
// Mutate the ON-DISK artifact (simulating a hand-edit / a providers.yaml
|
||||
// change that wasn't regenerated).
|
||||
mutated := append(append([]byte(nil), original...), []byte("\n// injected drift\n")...)
|
||||
if err := os.WriteFile(artifactPath, mutated, 0o644); err != nil {
|
||||
t.Fatalf("write mutated artifact: %v", err)
|
||||
}
|
||||
|
||||
// Re-run the EXACT in-sync comparison the gate uses: fresh render vs the
|
||||
// (now mutated) on-disk file. It MUST report drift.
|
||||
onDiskAfter, err := os.ReadFile(artifactPath)
|
||||
if err != nil {
|
||||
t.Fatalf("re-read mutated artifact: %v", err)
|
||||
}
|
||||
freshRender, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() after mutation error = %v", err)
|
||||
}
|
||||
if bytes.Equal(onDiskAfter, freshRender) {
|
||||
t.Fatal("drift gate did NOT detect a mutated on-disk artifact — gate is not load-bearing")
|
||||
}
|
||||
}
|
||||
|
||||
// TestRenderDeterministic proves regeneration is idempotent: two renders of
|
||||
// the same manifest produce byte-identical output (sorted runtime keys, stable
|
||||
// catalog order). A non-deterministic generator would make the drift gate
|
||||
// flap on Go map iteration order.
|
||||
func TestRenderDeterministic(t *testing.T) {
|
||||
a, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() #1 error = %v", err)
|
||||
}
|
||||
b, err := render()
|
||||
if err != nil {
|
||||
t.Fatalf("render() #2 error = %v", err)
|
||||
}
|
||||
if !bytes.Equal(a, b) {
|
||||
t.Fatal("render() is non-deterministic — two runs differ; the drift gate would flap")
|
||||
}
|
||||
}
|
||||
@@ -26,12 +26,11 @@ import (
|
||||
// the update cycle — no ssh, no re-provision, no ops toil.
|
||||
//
|
||||
// Contract (paired with cp-side GET /cp/tenants/config):
|
||||
//
|
||||
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
|
||||
// Authorization: Bearer <ADMIN_TOKEN>
|
||||
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
|
||||
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
|
||||
// 401 on bearer mismatch or unknown org
|
||||
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
|
||||
// Authorization: Bearer <ADMIN_TOKEN>
|
||||
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
|
||||
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
|
||||
// 401 on bearer mismatch or unknown org
|
||||
//
|
||||
// Best-effort: any failure logs and returns — main() keeps booting.
|
||||
// Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
|
||||
@@ -106,53 +105,3 @@ func refreshEnvFromCP() error {
|
||||
log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
|
||||
return nil
|
||||
}
|
||||
|
||||
// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
|
||||
// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
|
||||
// proxy-env delivery) — guaranteed CP-delivered creds reach the
|
||||
// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
|
||||
// Task #46 (watch-fail-first test).
|
||||
//
|
||||
// Key set byte-matched against Researcher's verified emission in
|
||||
// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
|
||||
// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
|
||||
// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
|
||||
// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
|
||||
// (different feature surfaces — direct-to-provider fallbacks, not
|
||||
// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
|
||||
// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
|
||||
// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
|
||||
// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
|
||||
// key for direct-provider use, not the LLM proxy.
|
||||
var requiredLLMEnvVars = []string{
|
||||
"MOLECULE_LLM_USAGE_TOKEN",
|
||||
"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
|
||||
"MOLECULE_LLM_BASE_URL",
|
||||
"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
|
||||
}
|
||||
|
||||
// assertManagedTenantHasLLMEnv verifies that, when running as a
|
||||
// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
|
||||
// required LLM proxy env vars are populated after refreshEnvFromCP.
|
||||
//
|
||||
// Self-hosted (no orgID/adminToken) is exempt — dev must not be
|
||||
// blocked here. Managed tenants with missing LLM keys fail with
|
||||
// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
|
||||
// creds. Caller in main.go decides whether to log and continue or
|
||||
// log.Fatalf depending on deployment context.
|
||||
func assertManagedTenantHasLLMEnv() error {
|
||||
if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
|
||||
// Self-hosted dev / not yet provisioned — not a managed tenant.
|
||||
return nil
|
||||
}
|
||||
var missing []string
|
||||
for _, k := range requiredLLMEnvVars {
|
||||
if os.Getenv(k) == "" {
|
||||
missing = append(missing, k)
|
||||
}
|
||||
}
|
||||
if len(missing) > 0 {
|
||||
return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user