Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f12c38b3f6 |
@@ -466,40 +466,12 @@ def fetch_log(target_url: str) -> str | None:
|
||||
|
||||
def grep_fail_markers(log_text: str) -> list[str]:
|
||||
"""Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
|
||||
Empty list = clean log.
|
||||
|
||||
Heuristic: skip lines where the marker appears inside script source
|
||||
(e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
|
||||
than actual execution output. The Gitea Actions log prints the raw
|
||||
script before executing it; ``echo "::error::"`` lines in that
|
||||
display are false positives.
|
||||
"""
|
||||
Empty list = clean log."""
|
||||
matches: list[str] = []
|
||||
in_run_group = False
|
||||
group_depth = 0
|
||||
for line in log_text.splitlines():
|
||||
stripped = line.strip()
|
||||
# Track Gitea Actions group markers so we can skip the
|
||||
# ``::group::Run`` script-source display blocks.
|
||||
if stripped.startswith("::group::Run"):
|
||||
in_run_group = True
|
||||
group_depth = 1
|
||||
continue
|
||||
if stripped == "::endgroup::":
|
||||
if in_run_group:
|
||||
in_run_group = False
|
||||
group_depth = 0
|
||||
continue
|
||||
if in_run_group:
|
||||
continue
|
||||
for pat in FAIL_PATTERNS:
|
||||
if pat in line:
|
||||
# Additional false-positive guard: ``echo "::error::"``
|
||||
# is script source, not a runtime error emission.
|
||||
if pat == "::error::":
|
||||
prefix = line[: line.index(pat)].strip()
|
||||
if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
|
||||
break
|
||||
# Truncate to keep error output bounded.
|
||||
matches.append(line.strip()[:240])
|
||||
break
|
||||
if len(matches) >= 5:
|
||||
|
||||
@@ -364,71 +364,6 @@ def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
|
||||
return exc.code, None
|
||||
|
||||
|
||||
def current_branch_head(env: dict[str, str]) -> str | None:
|
||||
"""Return the SHA at the tip of the deploy branch (main) per Gitea, or None.
|
||||
|
||||
Used to detect a *superseded* deploy job (see `superseded_by`). Fail-safe:
|
||||
any read error / missing token returns None so the caller treats the job as
|
||||
NOT superseded and the strict /buildinfo verify still runs. We never let an
|
||||
unreadable head silently green a deploy.
|
||||
"""
|
||||
|
||||
token = env.get("GITEA_TOKEN", "").strip()
|
||||
if not token:
|
||||
return None
|
||||
host = env.get("GITEA_HOST", "git.moleculesai.app")
|
||||
repo = env.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
|
||||
# Deploy lane is on: push:main; the branch is always main here, but read it
|
||||
# from the ref name when present so a future branch rename doesn't break us.
|
||||
branch = env.get("GITHUB_REF_NAME", "").strip() or "main"
|
||||
url = f"https://{host}/api/v1/repos/{repo}/branches/{quote(branch, safe='')}"
|
||||
status, body = _api_json_optional(url, token)
|
||||
if status != 200 or not isinstance(body, dict):
|
||||
return None
|
||||
commit = body.get("commit")
|
||||
if isinstance(commit, dict):
|
||||
head = commit.get("id") or commit.get("sha")
|
||||
if isinstance(head, str) and head.strip():
|
||||
return head.strip()
|
||||
return None
|
||||
|
||||
|
||||
def superseded_by(env: dict[str, str]) -> str | None:
|
||||
"""Return the newer head SHA if THIS deploy job has been superseded, else None.
|
||||
|
||||
This workflow runs with no `concurrency:` (intentional — Gitea 1.22.6 cancels
|
||||
queued runs, which is unacceptable for a prod deploy). When two main pushes
|
||||
land close together, BOTH deploy-production jobs run. The newer push rolls the
|
||||
fleet forward first; the OLDER job's strict /buildinfo verify then sees tenants
|
||||
on the NEWER SHA and false-reds with "$slug is stale" — even though the fleet
|
||||
is AHEAD, not behind. Git SHAs aren't ordered, so the verify can't tell ahead
|
||||
from behind on its own (and /buildinfo exposes only git_sha, no build time).
|
||||
|
||||
Resolve it at the source of truth for ordering — the branch ref: if main's
|
||||
current head is a DIFFERENT SHA than the one this job is deploying, a newer
|
||||
commit has landed and this job is superseded; the newest job's verify is the
|
||||
authoritative one. We return that head SHA so the caller can log it and exit
|
||||
success early, skipping the strict-equality verify for this stale job.
|
||||
|
||||
Fail-safe: returns None (NOT superseded) when the head can't be read or equals
|
||||
our SHA, so a genuinely-behind tenant under the LATEST deploy job still fails
|
||||
the strict verify loudly. This never suppresses a real-stale signal — it only
|
||||
excuses a job that is no longer the latest from asserting exact equality.
|
||||
"""
|
||||
|
||||
sha = env.get("GITHUB_SHA", "").strip()
|
||||
if not sha:
|
||||
return None
|
||||
head = current_branch_head(env)
|
||||
if not head:
|
||||
return None
|
||||
# SHA lengths can differ (short vs full); compare on the shorter prefix.
|
||||
n = min(len(head), len(sha))
|
||||
if head[:n].lower() == sha[:n].lower():
|
||||
return None
|
||||
return head
|
||||
|
||||
|
||||
def live_disable_flag(env: dict[str, str]) -> str:
|
||||
"""Return a live disable value from Gitea variables when readable.
|
||||
|
||||
@@ -507,14 +442,6 @@ def main() -> int:
|
||||
sub.add_parser("plan", help="print production deploy plan as JSON")
|
||||
sub.add_parser("assert-enabled", help="fail if production deploy is currently disabled")
|
||||
sub.add_parser("wait-ci", help="block until required CI context is green")
|
||||
sub.add_parser(
|
||||
"check-superseded",
|
||||
help=(
|
||||
"exit 0 if a newer commit has landed on the deploy branch (this job "
|
||||
"is superseded; prints the newer head SHA), exit 10 if this job is "
|
||||
"still the latest"
|
||||
),
|
||||
)
|
||||
rollout_parser = sub.add_parser("rollout", help="execute canary-first scoped production rollout")
|
||||
rollout_parser.add_argument("--plan", required=True, help="path to prod-auto-deploy plan JSON")
|
||||
rollout_parser.add_argument("--response", required=True, help="path to write aggregate response JSON")
|
||||
@@ -530,16 +457,6 @@ def main() -> int:
|
||||
if args.command == "wait-ci":
|
||||
wait_for_ci_context(dict(os.environ))
|
||||
return 0
|
||||
if args.command == "check-superseded":
|
||||
newer = superseded_by(dict(os.environ))
|
||||
if newer:
|
||||
print(newer)
|
||||
return 0
|
||||
# Exit 10 (not 0, not 1): "this job is still the latest". The
|
||||
# workflow treats only exit 0 as superseded; 10 means proceed to
|
||||
# the strict verify. A non-zero code here is informational, not a
|
||||
# failure — the workflow step swallows it.
|
||||
return 10
|
||||
if args.command == "rollout":
|
||||
rollout_from_plan_file(args.plan, args.response, dict(os.environ))
|
||||
return 0
|
||||
|
||||
@@ -1,244 +0,0 @@
|
||||
"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
|
||||
|
||||
Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
|
||||
is structurally correct. This test validates the *runtime* path: submitting an
|
||||
APPROVED review to a PR whose head contains the current gate workflows causes
|
||||
Gitea Actions to queue the qa-review + security-review workflows and POST the
|
||||
branch-protection-required (pull_request_target) contexts within a reasonable
|
||||
window.
|
||||
|
||||
Skipped when Gitea API credentials are not available. Intended for:
|
||||
- manual developer verification
|
||||
- CI jobs provisioned with a service-account token
|
||||
|
||||
Environment:
|
||||
GITEA_HOST — default: git.moleculesai.app
|
||||
GITEA_TOKEN — token with read:repository + write:issues (for review POST)
|
||||
REPO — default: molecule-ai/molecule-core
|
||||
LIVEFIRE_PR_NUMBER — optional; if omitted the test tries to find a
|
||||
suitable open PR automatically, or skips.
|
||||
LIVEFIRE_TIMEOUT_SEC — default: 120
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
import yaml
|
||||
|
||||
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
|
||||
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
|
||||
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
|
||||
LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
|
||||
LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
|
||||
|
||||
REQUIRED_CONTEXTS = [
|
||||
"qa-review / approved (pull_request_target)",
|
||||
"security-review / approved (pull_request_target)",
|
||||
]
|
||||
|
||||
skip_no_token = pytest.mark.skipif(
|
||||
not GITEA_TOKEN,
|
||||
reason="GITEA_TOKEN not set — live-fire test requires API credentials",
|
||||
)
|
||||
|
||||
|
||||
def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
|
||||
url = f"https://{GITEA_HOST}/api/v1{path}"
|
||||
headers = {
|
||||
"Authorization": f"token {GITEA_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
data = json.dumps(body).encode() if body else None
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
raw = resp.read()
|
||||
code = resp.status
|
||||
except urllib.error.HTTPError as exc:
|
||||
raw = exc.read()
|
||||
code = exc.code
|
||||
payload = json.loads(raw) if raw else {}
|
||||
return code, payload
|
||||
|
||||
|
||||
def _get_pr(number: int) -> dict:
|
||||
code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
|
||||
if code != 200:
|
||||
pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
|
||||
return pr
|
||||
|
||||
|
||||
def _list_open_prs() -> list[dict]:
|
||||
code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
|
||||
if code != 200:
|
||||
pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
|
||||
return prs
|
||||
|
||||
|
||||
def _pr_has_trigger_in_head(pr: dict) -> bool:
|
||||
"""Return True if the PR head contains pull_request_review in both workflows."""
|
||||
head_sha = pr["head"]["sha"]
|
||||
for wf_name in ("qa-review.yml", "security-review.yml"):
|
||||
path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
|
||||
code, payload = _api("GET", path)
|
||||
if code != 200:
|
||||
return False
|
||||
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
|
||||
wf = yaml.safe_load(raw)
|
||||
on = wf.get(True) or wf.get("on") or {}
|
||||
if isinstance(on, str):
|
||||
if on != "pull_request_review":
|
||||
return False
|
||||
elif "pull_request_review" not in on:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _find_suitable_pr() -> dict:
|
||||
if LIVEFIRE_PR_NUMBER:
|
||||
pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
|
||||
if pr.get("state") != "open":
|
||||
pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
|
||||
return pr
|
||||
|
||||
prs = _list_open_prs()
|
||||
for pr in prs:
|
||||
if _pr_has_trigger_in_head(pr):
|
||||
return pr
|
||||
pytest.skip("No open PR found whose head contains the pull_request_review trigger")
|
||||
|
||||
|
||||
def _submit_approved_review(pr_number: int) -> dict:
|
||||
code, review = _api(
|
||||
"POST",
|
||||
f"/repos/{REPO}/pulls/{pr_number}/reviews",
|
||||
{"body": "Live-fire test APPROVED review", "event": "APPROVED"},
|
||||
)
|
||||
# 200 = created, 422 = review already exists (idempotent enough for our purposes)
|
||||
if code not in (200, 201, 422):
|
||||
pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
|
||||
return review
|
||||
|
||||
|
||||
def _get_status_snapshot(sha: str) -> dict[str, dict]:
|
||||
"""Return mapping context -> {id, updated_at, target_url} for required contexts."""
|
||||
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
|
||||
if code != 200:
|
||||
return {}
|
||||
result: dict[str, dict] = {}
|
||||
for st in statuses:
|
||||
ctx = st.get("context", "")
|
||||
if ctx in REQUIRED_CONTEXTS:
|
||||
result[ctx] = {
|
||||
"id": st.get("id"),
|
||||
"updated_at": st.get("updated_at", st.get("created_at", "")),
|
||||
"target_url": st.get("target_url"),
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def _extract_run_id(target_url: str | None) -> str | None:
|
||||
"""Extract the Actions run_id from a status target_url."""
|
||||
if not target_url:
|
||||
return None
|
||||
m = re.search(r"/actions/runs/(\d+)", target_url)
|
||||
return m.group(1) if m else None
|
||||
|
||||
|
||||
def _poll_fresh_statuses(
|
||||
sha: str,
|
||||
prior_snapshot: dict[str, dict],
|
||||
timeout_sec: int = LIVEFIRE_TIMEOUT_SEC,
|
||||
) -> dict[str, dict]:
|
||||
"""Poll until required contexts appear fresh (newer timestamp, id, or run)."""
|
||||
deadline = time.monotonic() + timeout_sec
|
||||
found: dict[str, dict] = {}
|
||||
while time.monotonic() < deadline:
|
||||
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
|
||||
if code == 200:
|
||||
for st in statuses:
|
||||
ctx = st.get("context", "")
|
||||
if ctx in REQUIRED_CONTEXTS:
|
||||
updated_at = st.get("updated_at", st.get("created_at", ""))
|
||||
status_id = st.get("id")
|
||||
target_url = st.get("target_url")
|
||||
prior = prior_snapshot.get(ctx, {})
|
||||
# Fresh if timestamp changed, id changed, or target_url changed.
|
||||
is_fresh = (
|
||||
ctx not in prior_snapshot
|
||||
or updated_at != prior.get("updated_at", "")
|
||||
or status_id != prior.get("id")
|
||||
or target_url != prior.get("target_url")
|
||||
)
|
||||
if is_fresh:
|
||||
found[ctx] = {
|
||||
"state": st.get("state", st.get("status", "")),
|
||||
"updated_at": updated_at,
|
||||
"id": status_id,
|
||||
"target_url": target_url,
|
||||
}
|
||||
if all(ctx in found for ctx in REQUIRED_CONTEXTS):
|
||||
return found
|
||||
time.sleep(5)
|
||||
return found
|
||||
|
||||
|
||||
@skip_no_token
|
||||
class TestGateAutoFireLive:
|
||||
def test_auto_fire_posts_required_contexts(self):
|
||||
"""Submit APPROVED review; assert BP-required contexts appear fresh within timeout."""
|
||||
pr = _find_suitable_pr()
|
||||
pr_number = pr["number"]
|
||||
head_sha = pr["head"]["sha"]
|
||||
|
||||
# Capture pre-existing status snapshot so we can prove FRESH contexts
|
||||
# were posted after the review submission (not stale from a prior run).
|
||||
prior_snapshot = _get_status_snapshot(head_sha)
|
||||
prior_run_ids = {
|
||||
_extract_run_id(s["target_url"])
|
||||
for s in prior_snapshot.values()
|
||||
if _extract_run_id(s["target_url"])
|
||||
}
|
||||
|
||||
review = _submit_approved_review(pr_number)
|
||||
|
||||
found = _poll_fresh_statuses(head_sha, prior_snapshot)
|
||||
|
||||
missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
|
||||
if missing:
|
||||
pytest.fail(
|
||||
f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. "
|
||||
f"Found: {found}. Prior snapshot: {prior_snapshot}. "
|
||||
f"PR #{pr_number} head={head_sha}. "
|
||||
f"This indicates the pull_request_review trigger did not fire at runtime."
|
||||
)
|
||||
|
||||
# The contexts appeared fresh — that's the proof of auto-fire.
|
||||
# We do NOT assert success vs failure; the evaluator decides that.
|
||||
# The point of #2159 is that the workflows QUEUE and POST at all.
|
||||
for ctx, info in found.items():
|
||||
state = info["state"]
|
||||
assert state in ("pending", "success", "failure"), (
|
||||
f"Unexpected state {state!r} for {ctx}"
|
||||
)
|
||||
|
||||
# CR2 Finding 1: prove a NEW workflow run was triggered, not just
|
||||
# an in-place status update. Gitea 1.22.6 lacks REST /actions/runs/*
|
||||
# endpoints, so we use the run_id embedded in the status target_url
|
||||
# as a proxy for distinct run_id.
|
||||
run_id = _extract_run_id(info.get("target_url"))
|
||||
if run_id and run_id in prior_run_ids:
|
||||
pytest.fail(
|
||||
f"Context {ctx!r} has target_url run_id {run_id} which existed "
|
||||
f"BEFORE the review was submitted. This means the status was "
|
||||
f"updated in-place by an existing run, not by a new workflow "
|
||||
f"run triggered from the pull_request_review event."
|
||||
)
|
||||
@@ -1,145 +0,0 @@
|
||||
"""Stale-head diagnostic test for #2159.
|
||||
|
||||
Deterministically reports whether a PR's HEAD contains the pull_request_review
|
||||
trigger in qa-review.yml and security-review.yml. If the trigger is absent,
|
||||
auto-fire on APPROVED review is impossible for that PR.
|
||||
|
||||
This is used as a self-diagnostic for future stale-PR situations (PRs opened
|
||||
before #2157 merged, or branches cut from old bases).
|
||||
|
||||
Environment:
|
||||
GITEA_HOST — default: git.moleculesai.app
|
||||
GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
|
||||
REPO — default: molecule-ai/molecule-core
|
||||
PR_NUMBER — required when running against a real PR
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
import yaml
|
||||
|
||||
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
|
||||
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
|
||||
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
|
||||
PR_NUMBER = os.environ.get("PR_NUMBER", "")
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _api(method: str, path: str) -> tuple[int, dict]:
|
||||
url = f"https://{GITEA_HOST}/api/v1{path}"
|
||||
headers = {"Authorization": f"token {GITEA_TOKEN}"}
|
||||
req = urllib.request.Request(url, headers=headers, method=method)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return resp.status, json.loads(resp.read())
|
||||
except urllib.error.HTTPError as exc:
|
||||
body = exc.read()
|
||||
return exc.code, json.loads(body) if body else {}
|
||||
|
||||
|
||||
def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
|
||||
path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
|
||||
code, payload = _api("GET", path)
|
||||
if code != 200:
|
||||
pytest.fail(
|
||||
f"GET {path} returned HTTP {code}: {payload}. "
|
||||
f"Cannot determine whether PR head contains the trigger."
|
||||
)
|
||||
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
|
||||
return yaml.safe_load(raw)
|
||||
|
||||
|
||||
def _fetch_workflow_local(workflow_name: str) -> dict:
|
||||
p = ROOT / "workflows" / workflow_name
|
||||
if not p.exists():
|
||||
pytest.fail(f"Local workflow file not found: {p}")
|
||||
return yaml.safe_load(p.read_text())
|
||||
|
||||
|
||||
def _has_pull_request_review_trigger(wf: dict) -> bool:
|
||||
on = wf.get(True) or wf.get("on") or {}
|
||||
if isinstance(on, list):
|
||||
return "pull_request_review" in on
|
||||
if isinstance(on, dict):
|
||||
return "pull_request_review" in on
|
||||
if isinstance(on, str):
|
||||
return on == "pull_request_review"
|
||||
return False
|
||||
|
||||
|
||||
def _diagnose_pr(pr_number: int) -> dict[str, bool]:
|
||||
code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
|
||||
if code != 200:
|
||||
pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
|
||||
|
||||
head_ref = pr["head"]["ref"]
|
||||
head_sha = pr["head"]["sha"]
|
||||
|
||||
results: dict[str, bool] = {}
|
||||
for wf_name in ("qa-review.yml", "security-review.yml"):
|
||||
wf = _fetch_workflow_from_ref(wf_name, head_sha)
|
||||
results[wf_name] = _has_pull_request_review_trigger(wf)
|
||||
|
||||
return {
|
||||
"pr_number": pr_number,
|
||||
"head_ref": head_ref,
|
||||
"head_sha": head_sha,
|
||||
"triggers": results,
|
||||
"auto_fire_possible": all(results.values()),
|
||||
}
|
||||
|
||||
|
||||
def _diagnose_local() -> dict[str, bool]:
|
||||
results: dict[str, bool] = {}
|
||||
for wf_name in ("qa-review.yml", "security-review.yml"):
|
||||
wf = _fetch_workflow_local(wf_name)
|
||||
results[wf_name] = _has_pull_request_review_trigger(wf)
|
||||
return {
|
||||
"pr_number": None,
|
||||
"head_ref": "local-checkout",
|
||||
"head_sha": None,
|
||||
"triggers": results,
|
||||
"auto_fire_possible": all(results.values()),
|
||||
}
|
||||
|
||||
|
||||
class TestStaleHeadDiagnostic:
|
||||
"""Test deterministically reports 'auto-fire impossible for this PR' when
|
||||
the PR head lacks the pull_request_review trigger.
|
||||
"""
|
||||
|
||||
def test_local_checkout_has_pull_request_review_trigger(self):
|
||||
"""Local files (the ones in this checkout) must contain the trigger.
|
||||
|
||||
This is the baseline: if the checkout itself is stale, every PR cut
|
||||
from it will also be stale.
|
||||
"""
|
||||
diag = _diagnose_local()
|
||||
missing = [n for n, ok in diag["triggers"].items() if not ok]
|
||||
if missing:
|
||||
pytest.fail(
|
||||
f"Local checkout is missing pull_request_review trigger in: {missing}. "
|
||||
f"This branch cannot produce PRs that auto-fire."
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
|
||||
@pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
|
||||
def test_pr_head_has_pull_request_review_trigger(self):
|
||||
"""When PR_NUMBER is given, assert the PR head contains the trigger."""
|
||||
diag = _diagnose_pr(int(PR_NUMBER))
|
||||
if not diag["auto_fire_possible"]:
|
||||
missing = [n for n, ok in diag["triggers"].items() if not ok]
|
||||
pytest.fail(
|
||||
f"Auto-fire impossible for PR #{diag['pr_number']}. "
|
||||
f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
|
||||
f"Missing trigger in: {missing}. "
|
||||
f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
|
||||
)
|
||||
@@ -486,129 +486,3 @@ def test_scoped_rollout_dry_run_does_not_assert_coverage():
|
||||
sleep=lambda _s: None,
|
||||
)
|
||||
assert aggregate["ok"] is True
|
||||
|
||||
|
||||
# --- Superseded-deploy guard (false-stale fix) -----------------------------
|
||||
#
|
||||
# Scenario this fixes: no `concurrency:` on the prod-deploy workflow means two
|
||||
# close main pushes run BOTH deploy-production jobs. eb31bcf (Fix A) and 286338
|
||||
# (Fix C) merge back-to-back; the 286338 job rolls the fleet to staging-2863380
|
||||
# first; the OLDER eb31bcf job's strict verify then sees tenants on 2863380 and
|
||||
# false-reds "stale" though the fleet is AHEAD. superseded_by detects that main's
|
||||
# head is no longer eb31bcf and lets the older job succeed without weakening the
|
||||
# behind-tenant signal for whichever job IS the latest.
|
||||
|
||||
|
||||
def test_superseded_by_returns_newer_head_when_main_moved_ahead(monkeypatch):
|
||||
# eb31bcf job: main head is now 2863380 -> superseded, return the newer head.
|
||||
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
|
||||
newer = prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"})
|
||||
assert newer == "2863380fullhash"
|
||||
|
||||
|
||||
def test_superseded_by_none_when_this_job_is_still_head(monkeypatch):
|
||||
# 2863380 job (the latest): head == our SHA -> NOT superseded -> strict verify
|
||||
# runs, so a genuinely-behind tenant still fails loudly.
|
||||
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
|
||||
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
|
||||
|
||||
|
||||
def test_superseded_by_matches_on_short_vs_full_sha_prefix(monkeypatch):
|
||||
# GITHUB_SHA is full; Gitea may return a different-length id. Equal prefixes
|
||||
# must NOT count as superseded (avoid false-skipping the real latest job).
|
||||
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380")
|
||||
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
|
||||
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380FULLHASH")
|
||||
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
|
||||
|
||||
|
||||
def test_superseded_by_fail_safe_returns_none_when_head_unreadable(monkeypatch):
|
||||
# Fail-safe: unreadable head (no token / API error) must NOT be treated as
|
||||
# superseded, so the strict verify still runs and never silently greens.
|
||||
monkeypatch.setattr(prod, "current_branch_head", lambda _env: None)
|
||||
assert prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"}) is None
|
||||
|
||||
|
||||
def test_superseded_by_none_without_github_sha(monkeypatch):
|
||||
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
|
||||
assert prod.superseded_by({}) is None
|
||||
|
||||
|
||||
def test_current_branch_head_parses_gitea_branch_commit_id(monkeypatch):
|
||||
captured = {}
|
||||
|
||||
def fake_optional(url, _token):
|
||||
captured["url"] = url
|
||||
return 200, {"name": "main", "commit": {"id": "2863380fullhash"}}
|
||||
|
||||
monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
|
||||
head = prod.current_branch_head(
|
||||
{"GITEA_TOKEN": "secret", "GITHUB_REPOSITORY": "molecule-ai/molecule-core"}
|
||||
)
|
||||
assert head == "2863380fullhash"
|
||||
assert captured["url"].endswith("/repos/molecule-ai/molecule-core/branches/main")
|
||||
|
||||
|
||||
def test_current_branch_head_uses_ref_name_branch(monkeypatch):
|
||||
captured = {}
|
||||
|
||||
def fake_optional(url, _token):
|
||||
captured["url"] = url
|
||||
return 200, {"commit": {"sha": "deadbeef"}}
|
||||
|
||||
monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
|
||||
head = prod.current_branch_head(
|
||||
{"GITEA_TOKEN": "secret", "GITHUB_REF_NAME": "release"}
|
||||
)
|
||||
assert head == "deadbeef"
|
||||
assert captured["url"].endswith("/branches/release")
|
||||
|
||||
|
||||
def test_current_branch_head_none_without_token():
|
||||
assert prod.current_branch_head({}) is None
|
||||
|
||||
|
||||
def test_current_branch_head_none_on_non_200(monkeypatch):
|
||||
monkeypatch.setattr(prod, "_api_json_optional", lambda _u, _t: (500, None))
|
||||
assert prod.current_branch_head({"GITEA_TOKEN": "secret"}) is None
|
||||
|
||||
|
||||
# --- #2213: superseded check must fire BEFORE production side effects ----------
|
||||
#
|
||||
# Real incident shape: two main pushes land ~2 min apart. The OLDER deploy job
|
||||
# (GITHUB_SHA=7a72516, target staging-7a72516) started LATE — main head was
|
||||
# already 7f25373. The #2194 guard only protected the *verify* step, so the
|
||||
# older job still:
|
||||
# 1. rolled the canary (hongming) BACKWARD to staging-7a72516 (the #2213 red,
|
||||
# seen as the newer job's verify reading hongming on the old SHA), then
|
||||
# 2. promoted :latest backward to the older image,
|
||||
# before finally skipping verify. The workflow now calls this same superseded
|
||||
# check BEFORE the redeploy + promote steps and gates both off when it fires.
|
||||
# These tests pin the contract that check-superseded relies on for the exact
|
||||
# incident shape.
|
||||
|
||||
|
||||
def test_superseded_by_fires_for_older_job_when_newer_already_head(monkeypatch):
|
||||
# Older job (7a72516) re-checks the head just before rollout and finds the
|
||||
# newer merge (7f25373) already owns main -> superseded -> skip side effects.
|
||||
monkeypatch.setattr(
|
||||
prod, "current_branch_head", lambda _env: "7f25373309eca54a36f08c371ff783c3a47c3f8d"
|
||||
)
|
||||
newer = prod.superseded_by(
|
||||
{"GITHUB_SHA": "7a72516f7e7ba1a710c4f393fef08be8d22e1866"}
|
||||
)
|
||||
assert newer == "7f25373309eca54a36f08c371ff783c3a47c3f8d"
|
||||
|
||||
|
||||
def test_superseded_by_none_for_latest_job_so_it_still_rolls(monkeypatch):
|
||||
# The newer job (7f25373) IS the head -> NOT superseded -> it proceeds to
|
||||
# roll the fleet and verify, so a genuinely-behind tenant still fails loud.
|
||||
monkeypatch.setattr(
|
||||
prod, "current_branch_head", lambda _env: "7f25373309eca54a36f08c371ff783c3a47c3f8d"
|
||||
)
|
||||
assert (
|
||||
prod.superseded_by(
|
||||
{"GITHUB_SHA": "7f25373309eca54a36f08c371ff783c3a47c3f8d"}
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
@@ -96,7 +96,6 @@ env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory arm64 pilot, non-gating by design (internal#418).
|
||||
fast-checks:
|
||||
name: fast-checks
|
||||
# AND-set: only the Mac arm64 runner advertises macos-self-hosted.
|
||||
|
||||
+38
-37
@@ -25,9 +25,10 @@
|
||||
# sufficient for `actions/checkout` against this same repo.
|
||||
#
|
||||
# 4. Docs — no docs/scripts reference github.com URLs that need swapping.
|
||||
# The canvas-deploy-status step (core#2226, formerly canvas-deploy-reminder)
|
||||
# writes the canvas ordered-deploy status into the step summary; it points
|
||||
# at the ECR canvas image and the publish workflow, no ghcr.io prose.
|
||||
# The canvas-deploy-reminder step writes a `ghcr.io/...` image
|
||||
# reference into the step summary text — that's documentation prose
|
||||
# pointing at the ECR-mirrored canvas image and stays unchanged for
|
||||
# this port (a separate cleanup if ghcr→ECR sweep is in scope).
|
||||
#
|
||||
# Cross-links:
|
||||
# - RFC: internal#219 (CI/CD hard-gate hardening)
|
||||
@@ -388,61 +389,61 @@ jobs:
|
||||
|
||||
# mc#959 root-fix (sre)
|
||||
|
||||
canvas-deploy-status:
|
||||
# core#2226: replaces the old advisory "Canvas Deploy Reminder". The canvas
|
||||
# image now has a real ORDERED auto-deploy (publish-canvas-image.yml:
|
||||
# build → push :staging-<sha> → wait green main CI → promote :latest by
|
||||
# digest), and docker-compose pins via CANVAS_IMAGE_TAG. There is no longer
|
||||
# a manual "go run docker compose pull by hand" step to remind operators
|
||||
# about — so this job just records, on a canvas-touching main push, that the
|
||||
# ordered deploy is handling it (and where to watch), instead of prescribing
|
||||
# a manual action that determinism made obsolete.
|
||||
name: Canvas Deploy Status
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
runs-on: docker-host
|
||||
# Job-level `if:` so ci-required-drift.py's ci_job_names() detects this as
|
||||
# github.ref-gated and skips it from the required-context F1 set (mc#1982).
|
||||
# Step-level exit 0 handles the "not a canvas main push" case.
|
||||
# mc#1982 root-fix: added job-level `if:` so ci-required-drift.py's
|
||||
# ci_job_names() detects this as github.ref-gated and skips it from F1.
|
||||
# The step-level exit 0 handles the "not main push" case; the job-level
|
||||
# `if:` makes the gating explicit so the drift script sees it.
|
||||
# Runs on both main and staging pushes; step exits 0 when not applicable.
|
||||
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
|
||||
needs: [changes, canvas-build]
|
||||
steps:
|
||||
- name: Record canvas ordered-deploy status
|
||||
- name: Write deploy reminder to step summary
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
CANVAS_CHANGED: ${{ needs.changes.outputs.canvas }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref }}
|
||||
# github.server_url resolves via the workflow-level env override to the
|
||||
# Gitea instance, so RUN_URL points at the Gitea run page (not github.com).
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions
|
||||
# github.server_url resolves via the workflow-level env override
|
||||
# to the Gitea instance, so the RUN_URL points at the Gitea run
|
||||
# page (not github.com). See feedback_act_runner_github_server_url.
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ "$CANVAS_CHANGED" != "true" ] || [ "$EVENT_NAME" != "push" ] || [ "$REF_NAME" != "refs/heads/main" ]; then
|
||||
echo "Canvas deploy status not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
|
||||
echo "Canvas deploy reminder not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Write body to a temp file — avoids backtick escaping in shell.
|
||||
cat > /tmp/deploy-status.md << 'BODY'
|
||||
## Canvas ordered deploy in progress — no manual action required
|
||||
cat > /tmp/deploy-reminder.md << 'BODY'
|
||||
## Canvas build passed — deploy required
|
||||
|
||||
This canvas-touching main push triggers `publish-canvas-image`, which now
|
||||
runs an ORDERED, CI-gated deploy (core#2226) — the same shape as the
|
||||
platform's deploy-production:
|
||||
The `publish-canvas-image` workflow is now building a fresh Docker image
|
||||
(`ghcr.io/molecule-ai/canvas:latest`) in the background.
|
||||
|
||||
1. Build → push `molecule-ai/canvas:staging-<sha>` + `:staging-latest`.
|
||||
2. Wait for green main CI on this SHA.
|
||||
3. Promote `:latest` to the verified `:staging-<sha>` by digest.
|
||||
Once it completes (~3–5 min), apply on the host machine with:
|
||||
```bash
|
||||
cd <runner-workspace>
|
||||
git pull origin main
|
||||
docker compose pull canvas && docker compose up -d canvas
|
||||
```
|
||||
|
||||
Tenants/hosts pin via `CANVAS_IMAGE_TAG` (default `latest` = the last
|
||||
CI-green build), so a deploy is reproducible — no hand-run
|
||||
`docker compose pull` needed. Watch the run in the canvas publish workflow.
|
||||
If you need to rebuild from local source instead (e.g. testing unreleased
|
||||
changes or a new `NEXT_PUBLIC_*` URL), use:
|
||||
```bash
|
||||
docker compose build canvas && docker compose up -d canvas
|
||||
```
|
||||
BODY
|
||||
printf '\n> Posted automatically by CI · commit `%s` · [publish workflow](%s)\n' \
|
||||
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-status.md
|
||||
printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \
|
||||
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md
|
||||
|
||||
# Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY, which
|
||||
# both GitHub and Gitea Actions render as the run's summary page.
|
||||
cat /tmp/deploy-status.md >> "$GITHUB_STEP_SUMMARY"
|
||||
# Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY,
|
||||
# which both GitHub Actions and Gitea Actions render as the
|
||||
# workflow run's summary page. (#75 / PR-D)
|
||||
cat /tmp/deploy-reminder.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
# Python Lint & Test — required check, always runs.
|
||||
# Runtime Python moved to molecule-ai-workspace-runtime. Keep this context as
|
||||
|
||||
@@ -123,9 +123,8 @@ jobs:
|
||||
# integration). See internal#512 for the class defect.
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
api: ${{ steps.decide.outputs.api }}
|
||||
steps:
|
||||
@@ -161,9 +160,8 @@ jobs:
|
||||
# detect-changes for the full rationale.
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
# Unique per-run container names so concurrent runs on the host-
|
||||
@@ -330,40 +328,16 @@ jobs:
|
||||
- name: Wait for /health
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
# Readiness signal: the platform binds /health only AFTER the full
|
||||
# migration chain has been applied on cold start (it prints
|
||||
# "Platform starting on :PORT" at that point). So a 200 from /health
|
||||
# is the real "migrations done + server listening" signal.
|
||||
#
|
||||
# The migration chain grows every release, so a fixed ~30s budget is
|
||||
# brittle by construction (it WILL be exceeded as migrations accrue).
|
||||
# Use a generous wall-clock budget that comfortably exceeds
|
||||
# cold-start + full-migration time, polling fast. This is robust to a
|
||||
# growing chain WITHOUT masking a genuinely dead platform: if the
|
||||
# background platform-server process has exited (e.g. a broken
|
||||
# migration crashed it), we stop and fail loudly at once instead of
|
||||
# waiting out the whole budget.
|
||||
DEADLINE_SECS=180 # cold-start + full migration chain headroom
|
||||
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
|
||||
start=$(date +%s)
|
||||
while :; do
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf "$BASE/health" > /dev/null; then
|
||||
echo "Platform healthy after $(( $(date +%s) - start ))s"
|
||||
echo "Platform up after ${i}s"
|
||||
exit 0
|
||||
fi
|
||||
# Fast-fail: if the platform process died, /health will never come.
|
||||
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
|
||||
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
|
||||
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Platform did not become healthy in 30s"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
- name: Assert migrations applied
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
|
||||
@@ -242,36 +242,16 @@ jobs:
|
||||
- name: Wait for /health
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
run: |
|
||||
# Readiness signal: the platform binds /health only AFTER the full
|
||||
# migration chain has been applied on cold start (it prints
|
||||
# "Platform starting on :PORT" at that point). So a 200 from /health
|
||||
# is the real "migrations done + server listening" signal.
|
||||
#
|
||||
# The migration chain grows every release, so a fixed ~30s budget is
|
||||
# brittle by construction. Use a generous wall-clock budget that
|
||||
# comfortably exceeds cold-start + full-migration time, polling fast.
|
||||
# Robust to a growing chain WITHOUT masking a dead platform: if the
|
||||
# background platform-server process has exited, fail loudly at once.
|
||||
DEADLINE_SECS=180 # cold-start + full migration chain headroom
|
||||
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
|
||||
start=$(date +%s)
|
||||
while :; do
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf "http://127.0.0.1:${PLATFORM_PORT}/health" > /dev/null; then
|
||||
echo "Platform healthy after $(( $(date +%s) - start ))s"
|
||||
echo "Platform up after ${i}s"
|
||||
exit 0
|
||||
fi
|
||||
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
|
||||
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
|
||||
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Platform did not become healthy in 30s"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
|
||||
- name: Install canvas dependencies
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
@@ -298,38 +278,16 @@ jobs:
|
||||
export NEXT_PUBLIC_WS_URL="ws://127.0.0.1:${PLATFORM_PORT}/ws"
|
||||
npx next dev --turbopack -p "${CANVAS_PORT}" > canvas.log 2>&1 &
|
||||
echo $! > canvas.pid
|
||||
# Readiness must wait for the actual chat route to *compile*, not
|
||||
# just for the dev server to bind the port. `next dev --turbopack`
|
||||
# accepts the TCP connection well before it has compiled a route
|
||||
# on first request, so a bare `curl /` can 200 (or hang) while the
|
||||
# page the tests load is still building. We therefore probe the
|
||||
# real route the specs navigate to (`/?m=chat`) and require a 2xx,
|
||||
# which only happens once Turbopack has finished the first
|
||||
# compile. The previous 30s budget was also too tight for a cold
|
||||
# Turbopack first-compile on a loaded operator-host runner — the
|
||||
# `Canvas did not start in 30s` flake. Raise to 120s (job
|
||||
# timeout-minutes is 15, so this is comfortably bounded) and probe
|
||||
# every 2s.
|
||||
READY=""
|
||||
for i in $(seq 1 60); do
|
||||
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
|
||||
# pollution of the captured status (lint-curl-status-capture.yml).
|
||||
set +e
|
||||
curl -s -o /dev/null -w '%{http_code}' "http://localhost:${CANVAS_PORT}/?m=chat" > /tmp/canvas-ready.code
|
||||
set -e
|
||||
CODE=$(cat /tmp/canvas-ready.code 2>/dev/null || echo "000")
|
||||
if [ "$CODE" -ge 200 ] && [ "$CODE" -lt 400 ]; then
|
||||
echo "Canvas (chat route compiled) up after ~$((i*2))s (HTTP ${CODE})"
|
||||
READY=1
|
||||
break
|
||||
for i in $(seq 1 30); do
|
||||
if curl -sf "http://localhost:${CANVAS_PORT}" > /dev/null 2>&1; then
|
||||
echo "Canvas up after ${i}s"
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
sleep 1
|
||||
done
|
||||
if [ -z "$READY" ]; then
|
||||
echo "::error::Canvas chat route did not compile in 120s (last HTTP ${CODE})"
|
||||
cat canvas.log || true
|
||||
exit 1
|
||||
fi
|
||||
echo "::error::Canvas did not start in 30s"
|
||||
cat canvas.log || true
|
||||
exit 1
|
||||
|
||||
- name: Run Playwright E2E tests
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
|
||||
@@ -130,37 +130,13 @@ jobs:
|
||||
run: |
|
||||
set -euo pipefail
|
||||
./workspace-server/platform-server > workspace-server/platform.log 2>&1 &
|
||||
PLATFORM_PID=$!
|
||||
echo "$PLATFORM_PID" > workspace-server/platform.pid
|
||||
# Readiness signal: the platform binds /health only AFTER the full
|
||||
# migration chain has been applied on cold start (it prints
|
||||
# "Platform starting on :PORT" at that point). So a 200 from /health
|
||||
# is the real "migrations done + server listening" signal.
|
||||
#
|
||||
# The migration chain grows every release, so a fixed ~30s budget is
|
||||
# brittle by construction. Use a generous wall-clock budget that
|
||||
# comfortably exceeds cold-start + full-migration time, polling fast.
|
||||
# Robust to a growing chain WITHOUT masking a dead platform: if the
|
||||
# background platform-server process has exited, fail loudly at once.
|
||||
DEADLINE_SECS=180 # cold-start + full migration chain headroom
|
||||
start=$(date +%s)
|
||||
while :; do
|
||||
if curl -sf "$BASE/health" >/dev/null; then
|
||||
echo "Platform healthy after $(( $(date +%s) - start ))s"
|
||||
exit 0
|
||||
fi
|
||||
if ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
|
||||
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
|
||||
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
echo $! > workspace-server/platform.pid
|
||||
for i in $(seq 1 30); do
|
||||
curl -sf "$BASE/health" >/dev/null && exit 0
|
||||
sleep 1
|
||||
done
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
|
||||
- name: Run comprehensive E2E
|
||||
run: bash tests/e2e/test_comprehensive_e2e.sh
|
||||
|
||||
@@ -126,7 +126,6 @@ jobs:
|
||||
# push/dispatch/cron only (30+ min). This is NOT a fake-green mask of
|
||||
# the real assertion — it validates the driving script's bash syntax
|
||||
# and inline-python so a broken test script fails at PR time.
|
||||
# bp-required: pending #1296 — PR emitter, not yet required (tracked in #1296).
|
||||
pr-validate:
|
||||
name: E2E Peer Visibility
|
||||
runs-on: ubuntu-latest
|
||||
@@ -268,36 +267,12 @@ jobs:
|
||||
echo $! > platform.pid
|
||||
- name: Wait for /health
|
||||
run: |
|
||||
# Readiness signal: the platform binds /health only AFTER the full
|
||||
# migration chain has been applied on cold start (it prints
|
||||
# "Platform starting on :PORT" at that point). So a 200 from /health
|
||||
# is the real "migrations done + server listening" signal.
|
||||
#
|
||||
# The migration chain grows every release, so a fixed ~30s budget is
|
||||
# brittle by construction. Use a generous wall-clock budget that
|
||||
# comfortably exceeds cold-start + full-migration time, polling fast.
|
||||
# Robust to a growing chain WITHOUT masking a dead platform: if the
|
||||
# background platform-server process has exited, fail loudly at once.
|
||||
DEADLINE_SECS=180 # cold-start + full migration chain headroom
|
||||
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
|
||||
start=$(date +%s)
|
||||
while :; do
|
||||
if curl -sf "$BASE/health" > /dev/null; then
|
||||
echo "Platform healthy after $(( $(date +%s) - start ))s"
|
||||
exit 0
|
||||
fi
|
||||
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
|
||||
echo "::error::platform-server (pid ${PLATFORM_PID}) exited before /health became reachable — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
|
||||
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
for i in $(seq 1 30); do
|
||||
curl -sf "$BASE/health" > /dev/null && { echo "Platform up after ${i}s"; exit 0; }
|
||||
sleep 1
|
||||
done
|
||||
echo "::error::Platform did not become healthy in 30s"
|
||||
cat workspace-server/platform.log || true; exit 1
|
||||
- name: Run LOCAL fresh-provision peer-visibility E2E (literal MCP list_peers)
|
||||
# HONEST gate — NO continue-on-error. The local backend uses
|
||||
# external-mode workspaces so this context tests the literal MCP
|
||||
|
||||
@@ -167,30 +167,16 @@ jobs:
|
||||
- if: needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
# Skip-if-absent (core#2225), mirroring the serving-e2e gate's
|
||||
# skip-if-secret-unset contract: a MISSING CI secret is an operator
|
||||
# CONFIG gap, not a code regression, so it must not paint this E2E
|
||||
# red. When CP_STAGING_ADMIN_API_TOKEN is unset we emit a LOUD
|
||||
# ::warning:: + ::notice:: and skip the real provision/test steps (the
|
||||
# job still completes green). When the secret IS present we run the
|
||||
# full suite exactly as before. Operators: set
|
||||
# CP_STAGING_ADMIN_API_TOKEN as a repo/org Actions secret on
|
||||
# molecule-core to actually exercise this E2E.
|
||||
- name: Check admin token (skip-if-absent)
|
||||
id: token_check
|
||||
- name: Verify admin token present
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::warning::CP_STAGING_ADMIN_API_TOKEN is not set on this runner — SKIPPING the staging canvas E2E (cannot auth to staging CP). This is an operator config gap, not a code failure; set the secret on molecule-core (repo or org Actions secrets) to run it. See core#2225."
|
||||
echo "::notice::E2E Staging Canvas skipped: CP_STAGING_ADMIN_API_TOKEN absent."
|
||||
echo "present=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "CP_STAGING_ADMIN_API_TOKEN present ✓ — running staging canvas E2E."
|
||||
echo "present=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::error::Missing CP_STAGING_ADMIN_API_TOKEN"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
- name: Set up Node
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '20'
|
||||
@@ -198,11 +184,11 @@ jobs:
|
||||
cache-dependency-path: canvas/package-lock.json
|
||||
|
||||
- name: Install canvas deps
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: npm ci
|
||||
|
||||
- name: Install Playwright browsers
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
PREBAKED_PLAYWRIGHT=/ms-playwright
|
||||
@@ -214,7 +200,7 @@ jobs:
|
||||
npx playwright install --with-deps chromium
|
||||
|
||||
- name: Run staging canvas E2E
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: npx playwright test --config=playwright.staging.config.ts
|
||||
|
||||
- name: Upload Playwright report on failure
|
||||
|
||||
@@ -48,10 +48,8 @@ on:
|
||||
- 'workspace-server/internal/handlers/a2a_proxy.go'
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -63,10 +61,8 @@ on:
|
||||
- 'workspace-server/internal/handlers/a2a_proxy.go'
|
||||
- 'workspace-server/internal/middleware/**'
|
||||
- 'workspace-server/internal/provisioner/**'
|
||||
- 'workspace-server/internal/providers/providers.yaml'
|
||||
- 'tests/e2e/test_staging_full_saas.sh'
|
||||
- 'tests/e2e/lib/completion_assert.sh'
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/test_aws_leak_check.sh'
|
||||
- '.gitea/workflows/e2e-staging-saas.yml'
|
||||
@@ -319,148 +315,3 @@ jobs:
|
||||
echo "::warning::saas teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
# ── PLATFORM-MANAGED BOOT REGRESSION (moonshot/kimi NOT_CONFIGURED) ──────────
|
||||
#
|
||||
# The REAL-boot complement to the deterministic unit suite
|
||||
# (workspace_provision_platform_boot_test.go). Provisions a REAL staging
|
||||
# claude-code workspace on the PLATFORM-managed path — provider=platform,
|
||||
# model=moonshot/kimi-k2.6, NO tenant LLM key — and asserts it reaches
|
||||
# status=online (NOT not_configured) and a completion returns 200, via the same
|
||||
# online-wait + completion-assert the BYOK job uses.
|
||||
#
|
||||
# Why a SEPARATE job (not a matrix leg of e2e-staging-saas): the platform path
|
||||
# injects NO secret and pins a different model, so its env block diverges from
|
||||
# the BYOK job's. A dedicated job keeps each path's "verify key present" preflight
|
||||
# honest (BYOK requires a key; platform requires its ABSENCE not to matter) and
|
||||
# gives the regression its own named commit-status for branch protection.
|
||||
#
|
||||
# Add `E2E Staging Platform Boot` to branch protection after 3 consecutive
|
||||
# green runs on main (de-flake window; this path shares the cp#245
|
||||
# boot-timeout flake surface the BYOK job has, so it must prove stable before
|
||||
# it can BLOCK — see the gate-making plan in the PR body).
|
||||
# bp-required: pending #2187
|
||||
e2e-staging-platform-boot:
|
||||
name: E2E Staging Platform Boot
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface without blocking until the de-flake window
|
||||
# closes. mc#1982: do NOT renew this mask silently — the gate-making plan
|
||||
# tracks the flip to false under #2187.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
E2E_AWS_LEAK_CHECK: required
|
||||
E2E_AWS_TERMINATE_LEAKS: '1'
|
||||
# The regression combo: claude-code + platform-managed + moonshot/kimi-k2.6.
|
||||
# NO E2E_*_API_KEY is set — platform-managed billing is owned by Molecule via
|
||||
# the CP LLM proxy. The harness's E2E_LLM_PATH=platform branch sends empty
|
||||
# secrets and pin-selects the platform model.
|
||||
E2E_RUNTIME: claude-code
|
||||
E2E_LLM_PATH: platform
|
||||
# Smoke mode: a single parent workspace is enough to prove online +
|
||||
# completion for the platform path (the A2A/delegation matrix is the BYOK
|
||||
# job's job). Override E2E_DEFAULT_PLATFORM_MODEL via workflow_dispatch to
|
||||
# exercise another platform model id.
|
||||
E2E_MODE: smoke
|
||||
E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
echo "::error::$var not set — EC2 leak verification cannot run"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
echo "Admin token present ✓"
|
||||
|
||||
- name: Assert NO BYOK key leaks into the platform run
|
||||
run: |
|
||||
# The whole point of this job is the platform-managed path. A stray
|
||||
# E2E_*_API_KEY in the runner env would (via the harness) still be
|
||||
# skipped by the E2E_LLM_PATH=platform branch — but assert their
|
||||
# absence loudly here so a future env edit can't silently convert this
|
||||
# into a masked BYOK run that no longer exercises the regression.
|
||||
for var in E2E_MINIMAX_API_KEY E2E_ANTHROPIC_API_KEY E2E_OPENAI_API_KEY; do
|
||||
if [ -n "${!var:-}" ]; then
|
||||
echo "::warning::$var is set in this platform-boot job's env — the harness ignores it on E2E_LLM_PATH=platform, but it should not be wired here."
|
||||
fi
|
||||
done
|
||||
echo "Platform-managed path: no tenant LLM key required ✓"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a workspace bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy ✓"
|
||||
|
||||
- name: Run platform-managed boot E2E (online + completion)
|
||||
id: e2e
|
||||
run: bash tests/e2e/test_staging_full_saas.sh
|
||||
|
||||
- name: Teardown safety net (runs on cancel/failure)
|
||||
if: always()
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, os, datetime
|
||||
run_id = os.environ.get('GITHUB_RUN_ID', '')
|
||||
d = json.load(sys.stdin)
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
|
||||
# smoke mode slugs are e2e-smoke-YYYYMMDD-platform-<run_id>-...
|
||||
if run_id:
|
||||
prefixes = tuple(f'e2e-smoke-{d}-platform-{run_id}-' for d in dates)
|
||||
else:
|
||||
prefixes = tuple(f'e2e-smoke-{d}-platform-' for d in dates)
|
||||
candidates = [o['slug'] for o in d.get('orgs', [])
|
||||
if any(o.get('slug','').startswith(p) for p in prefixes)
|
||||
and o.get('instance_status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
set +e
|
||||
curl -sS -o /tmp/plat-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/plat-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/plat-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::platform-boot teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/plat-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::platform-boot teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
|
||||
@@ -88,9 +88,8 @@ jobs:
|
||||
# surprises and keeps the routing rule discoverable in one place.
|
||||
runs-on: docker-host
|
||||
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
handlers: ${{ steps.filter.outputs.handlers }}
|
||||
steps:
|
||||
@@ -120,9 +119,8 @@ jobs:
|
||||
# exists). See detect-changes for the full routing rationale.
|
||||
runs-on: docker-host
|
||||
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
|
||||
# mc#1982: mask removed. If regressions appear, root-fix the underlying
|
||||
# test — do NOT renew the mask silently.
|
||||
continue-on-error: false
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
env:
|
||||
# Unique name per run so concurrent jobs don't collide on the
|
||||
# bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
|
||||
@@ -253,19 +251,6 @@ jobs:
|
||||
echo "✓ $tbl table present"
|
||||
done
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Preflight — INTEGRATION_DB_URL must be present
|
||||
run: |
|
||||
# Belt-and-suspenders: if the postgres-start step failed to
|
||||
# export INTEGRATION_DB_URL, fail loud BEFORE go test can
|
||||
# t.Skip its way to a green build. Closes the workflow-level
|
||||
# fail-open gap identified in PR #2166 blocker #2.
|
||||
if [ -z "${INTEGRATION_DB_URL:-}" ]; then
|
||||
echo "::error::INTEGRATION_DB_URL is empty — postgres-start step did not export the connection string"
|
||||
exit 1
|
||||
fi
|
||||
echo "INTEGRATION_DB_URL is set"
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Run integration tests
|
||||
run: |
|
||||
|
||||
@@ -49,56 +49,37 @@ jobs:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
steps:
|
||||
- name: Identify runner
|
||||
id: identify
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eu
|
||||
echo "arch=$(uname -m)"
|
||||
echo "kernel=$(uname -sr)"
|
||||
echo "shell=$BASH_VERSION"
|
||||
# Sanity: must actually be arm64. If amd64 sneaks in here,
|
||||
# the job skips gracefully rather than hard-failing, because
|
||||
# a mislabelled runner is an ops concern, not a code defect.
|
||||
# Pilot lane must not make main red (#2146).
|
||||
# fail fast — that means the label routing is wrong.
|
||||
case "$(uname -m)" in
|
||||
aarch64|arm64)
|
||||
echo "arm64 confirmed"
|
||||
echo "arm64=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
|
||||
echo "arm64=false" >> "$GITHUB_OUTPUT"
|
||||
exit 1
|
||||
;;
|
||||
aarch64|arm64) echo "arm64 confirmed" ;;
|
||||
*) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
|
||||
esac
|
||||
|
||||
- name: Checkout
|
||||
if: steps.identify.outputs.arm64 == 'true'
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install shellcheck (arm64)
|
||||
if: steps.identify.outputs.arm64 == 'true'
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eu
|
||||
if command -v shellcheck >/dev/null 2>&1; then
|
||||
echo "shellcheck already present: $(shellcheck --version | head -1)"
|
||||
else
|
||||
# Prefer apt if the runner base ships it; else download the
|
||||
# correct platform binary (darwin vs linux).
|
||||
# Prefer apt if the runner base ships it; else download arm64 binary.
|
||||
if command -v apt-get >/dev/null 2>&1; then
|
||||
sudo apt-get update -qq
|
||||
sudo apt-get install -y --no-install-recommends shellcheck
|
||||
else
|
||||
SC_VER=v0.10.0
|
||||
if [ "$(uname -s)" = "Darwin" ]; then
|
||||
SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
|
||||
else
|
||||
SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
|
||||
fi
|
||||
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
|
||||
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
|
||||
| tar -xJf - --strip-components=1
|
||||
sudo mv shellcheck /usr/local/bin/
|
||||
fi
|
||||
@@ -106,15 +87,14 @@ jobs:
|
||||
shellcheck --version | head -2
|
||||
|
||||
- name: Run shellcheck on .gitea/scripts/*.sh
|
||||
if: steps.identify.outputs.arm64 == 'true'
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set -eu
|
||||
# Only the scripts we control under .gitea/scripts. Pilot
|
||||
# scope is intentionally narrow — broaden in a follow-up
|
||||
# once the lane is proven.
|
||||
if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
|
||||
echo "WARN: shellcheck not functional — skipping (pilot mode)"
|
||||
if ! command -v shellcheck >/dev/null 2>&1; then
|
||||
echo "WARN: shellcheck binary not found — skipping (pilot mode)"
|
||||
exit 0
|
||||
fi
|
||||
# NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
|
||||
|
||||
@@ -14,37 +14,10 @@ name: publish-canvas-image
|
||||
# authenticate to ghcr.io.
|
||||
#
|
||||
|
||||
# Builds, pushes, and (ordered) deploys the standalone canvas Docker image to
|
||||
# ECR whenever a commit lands on main that touches canvas code.
|
||||
#
|
||||
# Ordered deploy (core#2226) — mirrors publish-workspace-server-image.yml so the
|
||||
# standalone `molecule-ai/canvas` image is deterministic + verifiable, not a
|
||||
# side effect of the platform fleet pulling a mutable `:latest`:
|
||||
#
|
||||
# build-and-push: build → push :staging-<sha> + :staging-latest + :sha-<sha>
|
||||
# (does NOT move :latest — an unpromoted build must never
|
||||
# become the prod-blessed tag).
|
||||
# promote-canvas: waits for green main CI on this SHA, then re-points
|
||||
# :latest to the verified :staging-<sha> by digest
|
||||
# (imagetools create — no rebuild). So `:latest` == the
|
||||
# current prod-blessed canvas, byte-identical to staging-<sha>.
|
||||
#
|
||||
# Tag scheme produced (parallels platform-tenant):
|
||||
# :staging-<sha> — per-commit immutable digest, what docker-compose pins to.
|
||||
# :staging-latest — most recent BUILD on main (last-writer-wins, NOT gated).
|
||||
# :sha-<sha> — kept for back-compat with any consumer pinning the old tag.
|
||||
# :latest — most recent CI-GREEN build. Only moved by promote-canvas.
|
||||
#
|
||||
# WHY this is the canvas analogue of the platform's deploy-production, not a
|
||||
# literal copy: the standalone canvas co-deploys with the platform on the same
|
||||
# host via the root docker-compose.yml (`docker compose pull && up -d`). Gating
|
||||
# the canvas `:latest` promotion on the SAME green-main-CI signal the platform
|
||||
# deploy waits on makes platform + canvas roll together by the same SHA. The
|
||||
# canvas has no per-tenant fleet of its own and no /buildinfo endpoint, so there
|
||||
# is no fleet-rollout / per-tenant verify step to mirror here — CI-green +
|
||||
# digest-pin + immutable :staging-<sha> is the determinism contract. (A future
|
||||
# canvas /buildinfo would let this assert the served SHA like the platform does;
|
||||
# tracked in core#2226.)
|
||||
# Builds and pushes the canvas Docker image to ECR whenever a commit lands
|
||||
# on main that touches canvas code. Previously canvas changes were visible in
|
||||
# CI (npm run build passed) but the live container was never updated —
|
||||
# operators had to manually run `docker compose build canvas` each time.
|
||||
#
|
||||
# Mirror of publish-platform-image.yml, adapted for the Next.js canvas layer.
|
||||
# See that workflow for inline notes on macOS Keychain isolation and QEMU.
|
||||
@@ -57,7 +30,6 @@ on:
|
||||
# platform-only / docs-only / MCP-only merges.
|
||||
- 'canvas/**'
|
||||
- '.gitea/workflows/publish-canvas-image.yml'
|
||||
workflow_dispatch:
|
||||
# NOTE (Gitea port): the original GitHub workflow had a
|
||||
# `workflow_dispatch:` manual trigger for the
|
||||
# non-canvas-merge-but-need-fresh-image scenario. Dropped in the
|
||||
@@ -97,10 +69,6 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
# Exposed so promote-canvas re-points :latest to the EXACT per-commit tag
|
||||
# this build produced (digest-level), never a re-resolved mutable tag.
|
||||
staging_sha: ${{ steps.tags.outputs.staging_sha }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -172,7 +140,6 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
echo "staging_sha=staging-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Resolve build args
|
||||
id: build_args
|
||||
@@ -208,14 +175,8 @@ jobs:
|
||||
build-args: |
|
||||
NEXT_PUBLIC_PLATFORM_URL=${{ steps.build_args.outputs.platform_url }}
|
||||
NEXT_PUBLIC_WS_URL=${{ steps.build_args.outputs.ws_url }}
|
||||
# Ordered deploy (core#2226): the build job pushes the immutable
|
||||
# per-commit tag + the build-tracking staging-latest + the legacy
|
||||
# back-compat :sha-<sha> tag. It does NOT push :latest — :latest is
|
||||
# the prod-blessed tag and is only re-pointed by promote-canvas after
|
||||
# green main CI, so an unpromoted/red build can never become :latest.
|
||||
tags: |
|
||||
${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.staging_sha }}
|
||||
${{ env.IMAGE_NAME }}:staging-latest
|
||||
${{ env.IMAGE_NAME }}:latest
|
||||
${{ env.IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
|
||||
# Gitea artifact-cache reachability is best-effort on the operator
|
||||
# runner network. Do not let cache export fail an image that already
|
||||
@@ -224,107 +185,3 @@ jobs:
|
||||
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.description=Molecule AI canvas (Next.js 15 + React Flow)
|
||||
|
||||
# bp-exempt: post-merge canvas promote side-effect; merge is gated by CI /
|
||||
# all-required and this job waits for green push CI on the SHA before acting.
|
||||
promote-canvas:
|
||||
name: Promote canvas :latest to CI-green build
|
||||
needs: build-and-push
|
||||
# Only on a real main push — workflow_dispatch / non-main never promotes.
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
||||
# Side-effect deploy only; the image publish above is the durable artifact.
|
||||
# mc#1982: do NOT renew this mask silently — it mirrors deploy-production's
|
||||
# contract (a flaky promote must not red the ship lane), tracked in core#2226.
|
||||
continue-on-error: true
|
||||
runs-on: publish
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
# Same green-main-CI gate the platform deploy-production waits on, so
|
||||
# platform + canvas advance :latest off the identical signal/SHA.
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
GITEA_TOKEN: ${{ secrets.PROD_AUTO_DEPLOY_CONTROL_TOKEN || secrets.AUTO_SYNC_TOKEN }}
|
||||
CI_STATUS_TIMEOUT_SECONDS: "3600"
|
||||
# Re-uses the platform's disable kill-switch: when prod auto-deploy is
|
||||
# paused, the canvas :latest promote pauses too (correct — an unpromoted
|
||||
# build must not become :latest while the fleet is frozen).
|
||||
PROD_AUTO_DEPLOY_DISABLED: ${{ vars.PROD_AUTO_DEPLOY_DISABLED || secrets.PROD_AUTO_DEPLOY_DISABLED || '' }}
|
||||
steps:
|
||||
# The publish runner's default HOME (/home/hongming) is not writable, so
|
||||
# docker credential saves fail and halt the promote (#2193 on the platform
|
||||
# side). Point HOME + DOCKER_CONFIG at the writable job temp dir.
|
||||
- name: Prepare writable HOME + Docker config
|
||||
run: |
|
||||
set -euo pipefail
|
||||
H="$RUNNER_TEMP/canvas-promote-home"
|
||||
mkdir -p "$H/.docker"
|
||||
echo "HOME=$H" >> "$GITHUB_ENV"
|
||||
echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Resolve promote gate
|
||||
id: gate
|
||||
env:
|
||||
PROD_AUTO_DEPLOY_DISABLED: ${{ env.PROD_AUTO_DEPLOY_DISABLED }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -n "${PROD_AUTO_DEPLOY_DISABLED:-}" ]; then
|
||||
case "$(printf '%s' "$PROD_AUTO_DEPLOY_DISABLED" | tr '[:upper:]' '[:lower:]')" in
|
||||
1|true|yes|on|disabled|disable)
|
||||
echo "enabled=false" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Canvas :latest promote skipped: PROD_AUTO_DEPLOY_DISABLED=$PROD_AUTO_DEPLOY_DISABLED"
|
||||
{
|
||||
echo "## Canvas :latest promote skipped"
|
||||
echo ""
|
||||
echo "Reason: \`PROD_AUTO_DEPLOY_DISABLED=$PROD_AUTO_DEPLOY_DISABLED\`. The CI-green build is published as \`:staging-${GITHUB_SHA::7}\`; \`:latest\` was left unchanged."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 0 ;;
|
||||
esac
|
||||
fi
|
||||
if [ -z "${GITEA_TOKEN:-}" ]; then
|
||||
echo "::error::AUTO_SYNC_TOKEN/PROD_AUTO_DEPLOY_CONTROL_TOKEN is required so the canvas promote can wait for green CI."
|
||||
exit 1
|
||||
fi
|
||||
echo "enabled=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Wait for green main CI on this SHA
|
||||
if: ${{ steps.gate.outputs.enabled == 'true' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Same SSOT wait the platform deploy uses: blocks until the required
|
||||
# push contexts (CI / all-required (push) + Secret scan) go green on
|
||||
# THIS sha, and fails closed if any required context terminally fails.
|
||||
python3 .gitea/scripts/prod-auto-deploy.py wait-ci
|
||||
|
||||
- name: Promote canvas :latest to the CI-green image
|
||||
if: ${{ steps.gate.outputs.enabled == 'true' }}
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
STAGING_SHA_TAG: ${{ needs.build-and-push.outputs.staging_sha }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Fail-safe: if the build job's output didn't propagate, recompute the
|
||||
# immutable per-commit tag from the SHA so we never promote a guess.
|
||||
SHA_TAG="${STAGING_SHA_TAG:-staging-${GITHUB_SHA::7}}"
|
||||
ECR_REGISTRY="${IMAGE_NAME%%/*}"
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
# Digest-level re-tag (no pull/rebuild): :latest becomes byte-identical
|
||||
# to the verified :staging-<sha> for this commit.
|
||||
docker buildx imagetools create \
|
||||
--tag "${IMAGE_NAME}:latest" \
|
||||
"${IMAGE_NAME}:${SHA_TAG}"
|
||||
|
||||
{
|
||||
echo "## Canvas :latest promoted"
|
||||
echo ""
|
||||
echo "Re-pointed \`molecule-ai/canvas:latest\` → \`${SHA_TAG}\` (by digest)."
|
||||
echo ":latest now tracks the CI-green canvas build for commit \`${GITHUB_SHA::7}\`."
|
||||
echo ""
|
||||
echo "Tenants/hosts that \`docker compose pull canvas\` now get the same build the platform deploy rolled for this SHA."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
@@ -16,24 +16,14 @@ name: publish-workspace-server-image
|
||||
#
|
||||
# Image tags produced:
|
||||
# :staging-<sha> — per-commit digest, stable for canary verify
|
||||
# :staging-latest — tracks most recent BUILD on this branch (set by the
|
||||
# build job, last-writer-wins, NOT prod-gated)
|
||||
# :latest — tracks the most recent PROD-PROMOTED build. Re-pointed by the
|
||||
# deploy-production job ONLY after green main CI + canary +
|
||||
# fleet rollout + /buildinfo verification pass. So :latest ==
|
||||
# "current prod image", never the raw build. (Added 2026-06-03
|
||||
# after a stale :latest — last moved 2026-05-10 — reverted a
|
||||
# production tenant on a no-arg redeploy.)
|
||||
# :staging-latest — tracks most recent build on this branch
|
||||
#
|
||||
# Production auto-deploy:
|
||||
# After both platform and tenant images are pushed, deploy-production waits
|
||||
# for strict required push contexts on the same SHA to go green, then
|
||||
# calls the production CP redeploy-fleet endpoint with target_tag=
|
||||
# staging-<sha>. On success (rollout + buildinfo verified) it re-points
|
||||
# :latest to the same SHA. Set repo variable or secret
|
||||
# PROD_AUTO_DEPLOY_DISABLED=true to stop production rollout while keeping
|
||||
# image publishing enabled — in which case :latest is NOT advanced either
|
||||
# (correct: an unpromoted build must not become :latest).
|
||||
# staging-<sha>. Set repo variable or secret PROD_AUTO_DEPLOY_DISABLED=true
|
||||
# to stop production rollout while keeping image publishing enabled.
|
||||
#
|
||||
# Primary ECR target: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*
|
||||
# Optional staging tenant mirror target:
|
||||
@@ -262,25 +252,7 @@ jobs:
|
||||
PROD_AUTO_DEPLOY_BATCH_SIZE: ${{ vars.PROD_AUTO_DEPLOY_BATCH_SIZE || '3' }}
|
||||
PROD_AUTO_DEPLOY_DRY_RUN: ${{ vars.PROD_AUTO_DEPLOY_DRY_RUN || '' }}
|
||||
PROD_ALLOW_NON_PROD_CP_URL: ${{ vars.PROD_ALLOW_NON_PROD_CP_URL || '' }}
|
||||
# #2213: per-tenant /buildinfo settle budget. A freshly-swapped tenant can
|
||||
# keep serving the old image at the edge for a short drain window; the
|
||||
# verify step polls each tenant up to this budget before declaring it stale.
|
||||
PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS: ${{ vars.PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS || '240' }}
|
||||
PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS: ${{ vars.PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS || '20' }}
|
||||
steps:
|
||||
# The publish runner's default HOME (/home/hongming) is not writable, so
|
||||
# git/docker credential saves fail (`Error saving credentials: mkdir
|
||||
# /home/hongming: permission denied`) and halt the production rollout
|
||||
# (#2193). Point HOME + DOCKER_CONFIG at the writable job temp dir —
|
||||
# mirrors build-and-push's "Prepare writable Docker config" fix above.
|
||||
- name: Prepare writable HOME + Docker config
|
||||
run: |
|
||||
set -euo pipefail
|
||||
H="$RUNNER_TEMP/auto-deploy-home"
|
||||
mkdir -p "$H/.docker"
|
||||
echo "HOME=$H" >> "$GITHUB_ENV"
|
||||
echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -325,50 +297,8 @@ jobs:
|
||||
set -euo pipefail
|
||||
python3 .gitea/scripts/prod-auto-deploy.py wait-ci
|
||||
|
||||
# Superseded-job guard — BEFORE any production side effect (#2213).
|
||||
#
|
||||
# This workflow has no `concurrency:` (see header: Gitea 1.22.6 cancels
|
||||
# queued prod deploys). So two close main pushes run BOTH deploy-production
|
||||
# jobs. The verify step already skips its strict /buildinfo check when this
|
||||
# job is superseded (#2194) — but that guard was AFTER the redeploy and the
|
||||
# :latest promote, so an OLDER job that started late still:
|
||||
# 1. rolled the whole fleet BACKWARD to its older tag (canary hongming
|
||||
# was reverted from the newer SHA — the #2213 red), then
|
||||
# 2. promoted :latest backward to the older image,
|
||||
# and only THEN skipped verify and exited green. A superseded job must do
|
||||
# NEITHER. We re-check the branch head here, immediately before the rollout,
|
||||
# and skip every side effect when a newer commit already owns main.
|
||||
#
|
||||
# exit 0 + non-empty stdout => superseded (newer head printed); the redeploy
|
||||
# and promote steps are gated off via this output. exit 10 => this job is
|
||||
# still the latest, proceed to roll the fleet. Fail-safe: a head that can't
|
||||
# be read returns NOT-superseded (exit 10), so a genuine deploy is never
|
||||
# silently skipped. (Re-checked again at verify time to catch a newer job
|
||||
# that lands DURING this rollout.)
|
||||
- name: Check superseded before production side effects
|
||||
id: supersede
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
set +e
|
||||
NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
|
||||
SUPERSEDED_EXIT=$?
|
||||
set -e
|
||||
if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
|
||||
echo "superseded=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Superseded before rollout: main head is now ${NEWER_HEAD:0:7} (this job deploys ${GITHUB_SHA:0:7}). Skipping redeploy + :latest promote so an older job never rolls the fleet backward."
|
||||
{
|
||||
echo "## Production auto-deploy skipped — superseded before rollout"
|
||||
echo ""
|
||||
echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
|
||||
echo "A newer deploy job owns the fleet; rolling it backward to this older build would revert tenants and \`:latest\`. No side effects performed."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "superseded=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Call production CP redeploy-fleet
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
python3 .gitea/scripts/prod-auto-deploy.py assert-enabled
|
||||
@@ -427,66 +357,18 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Verify reachable tenants report this SHA
|
||||
# Skip when superseded BEFORE rollout: the redeploy step did not run, so
|
||||
# there is no redeploy-fleet response to verify against and the newer job
|
||||
# owns verification (#2213). The in-step guard below still catches the
|
||||
# case where a newer job lands DURING this job's rollout.
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' }}
|
||||
env:
|
||||
TENANT_DOMAIN: moleculesai.app
|
||||
run: |
|
||||
set -euo pipefail
|
||||
RESP="$RUNNER_TEMP/prod-redeploy-response.json"
|
||||
|
||||
# Superseded-job guard. This workflow has no `concurrency:` (header
|
||||
# explains why: Gitea 1.22.6 cancels queued prod deploys). So two
|
||||
# close main pushes run BOTH deploy-production jobs. The newer one
|
||||
# rolls the fleet to its (newer) build first; this older job's strict
|
||||
# equality check below would then see tenants on the NEWER SHA and
|
||||
# false-red "$slug is stale" even though the fleet is AHEAD, not
|
||||
# behind (git SHAs aren't ordered; /buildinfo exposes only git_sha).
|
||||
#
|
||||
# If main's current head is no longer THIS job's SHA, a newer commit
|
||||
# has landed and this deploy is superseded — the newest job's verify
|
||||
# is authoritative. Skip strict verify and succeed. exit 0 => newer
|
||||
# head printed (superseded); exit 10 => still the latest, proceed to
|
||||
# the strict verify so a genuinely-behind tenant still fails loudly.
|
||||
set +e
|
||||
NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
|
||||
SUPERSEDED_EXIT=$?
|
||||
set -e
|
||||
if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
|
||||
echo "::notice::Superseded deploy: main head is now ${NEWER_HEAD:0:7} (this job deployed ${GITHUB_SHA:0:7}). The fleet is at or ahead of this build; the newer deploy job's verify is authoritative. Skipping strict SHA verify."
|
||||
{
|
||||
echo ""
|
||||
echo "### Buildinfo verification skipped — superseded deploy"
|
||||
echo ""
|
||||
echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
|
||||
echo "A newer deploy job is rolling the fleet forward; its verify is authoritative."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
mapfile -t SLUGS < <(jq -r '.results[]? | .slug' "$RESP")
|
||||
if [ ${#SLUGS[@]} -eq 0 ]; then
|
||||
echo "::error::No tenants returned from redeploy-fleet; refusing to mark production deploy verified."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Per-tenant settle/retry budget (#2213). A tenant whose container the
|
||||
# CP just swapped can keep serving the OLD image at the edge for a short
|
||||
# window while the old container drains — /buildinfo returns HTTP 200
|
||||
# with the previous SHA, which `curl --retry` does NOT retry (it only
|
||||
# retries connection/5xx failures, not a stale-but-200 body). Without a
|
||||
# settle window a still-rolling tenant false-reds "stale" on the very
|
||||
# first poll. So poll each tenant's /buildinfo until it reports the
|
||||
# target SHA or the budget is exhausted; only THEN declare it stale or
|
||||
# unreachable. This never masks a genuinely stuck tenant — a tenant that
|
||||
# never reaches the target within the budget still fails loud (and the
|
||||
# superseded-job revert class is already blocked before rollout above).
|
||||
SETTLE_BUDGET_SECONDS="${PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS:-240}"
|
||||
SETTLE_INTERVAL_SECONDS="${PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS:-20}"
|
||||
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
UNHEALTHY_COUNT=0
|
||||
@@ -498,36 +380,18 @@ jobs:
|
||||
continue
|
||||
fi
|
||||
url="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
deadline=$(( $(date +%s) + SETTLE_BUDGET_SECONDS ))
|
||||
actual=""
|
||||
last_actual=""
|
||||
on_target=false
|
||||
while :; do
|
||||
body="$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$url" || true)"
|
||||
actual="$(echo "$body" | jq -r '.git_sha // ""' 2>/dev/null || echo "")"
|
||||
[ -n "$actual" ] && last_actual="$actual"
|
||||
if [ "$actual" = "$GITHUB_SHA" ]; then
|
||||
on_target=true
|
||||
break
|
||||
fi
|
||||
now=$(date +%s)
|
||||
if [ "$now" -ge "$deadline" ]; then
|
||||
break
|
||||
fi
|
||||
# Still rolling (stale 200) or transiently unreachable — wait and
|
||||
# re-poll within the settle budget rather than failing on first read.
|
||||
remaining=$(( deadline - now ))
|
||||
echo "$slug: waiting for target SHA (have '${actual:0:7}', want ${GITHUB_SHA:0:7}; ${remaining}s left)"
|
||||
sleep "$SETTLE_INTERVAL_SECONDS"
|
||||
done
|
||||
if [ "$on_target" = true ]; then
|
||||
echo "$slug: ${actual:0:7}"
|
||||
elif [ -z "$last_actual" ]; then
|
||||
echo "::error::$slug did not return /buildinfo after deploy (waited ${SETTLE_BUDGET_SECONDS}s)."
|
||||
body="$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$url" || true)"
|
||||
actual="$(echo "$body" | jq -r '.git_sha // ""' 2>/dev/null || echo "")"
|
||||
if [ -z "$actual" ]; then
|
||||
echo "::error::$slug did not return /buildinfo after deploy."
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
else
|
||||
echo "::error::$slug is stale: actual=${last_actual:0:7}, expected=${GITHUB_SHA:0:7} (waited ${SETTLE_BUDGET_SECONDS}s)"
|
||||
continue
|
||||
fi
|
||||
if [ "$actual" != "$GITHUB_SHA" ]; then
|
||||
echo "::error::$slug is stale: actual=${actual:0:7}, expected=${GITHUB_SHA:0:7}"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
else
|
||||
echo "$slug: ${actual:0:7}"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -545,69 +409,3 @@ jobs:
|
||||
if [ "$STALE_COUNT" -gt 0 ] || [ "$UNHEALTHY_COUNT" -gt 0 ] || [ "$UNREACHABLE_COUNT" -gt 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Re-point :latest to the just-promoted image — ONLY after the
|
||||
# production rollout + buildinfo verification above have passed.
|
||||
#
|
||||
# WHY HERE (promote point), not at build time:
|
||||
# The platform-tenant ECR `:latest` tag was last moved 2026-05-10
|
||||
# and went 3.5 weeks stale because the build step only pushes
|
||||
# :staging-<sha> + :staging-latest and never re-points :latest. A
|
||||
# no-arg POST /cp/admin/tenants/:slug/redeploy (whose default tag
|
||||
# fell through to "latest") then pulled the 3.5-week-old image and
|
||||
# REVERTED the tenant (incident: molecule-adk-demo, 2026-06-03).
|
||||
#
|
||||
# The defense-in-depth half of this fix changes that redeploy
|
||||
# default to :staging-latest, but :latest itself must also be
|
||||
# kept meaningful. We make :latest track the PROD-BLESSED build,
|
||||
# not the raw build: by living at the end of deploy-production —
|
||||
# after `wait-ci` (green main CI), the canary-first batched fleet
|
||||
# rollout, AND the /buildinfo SHA verification — :latest only ever
|
||||
# advances to a SHA that is actually green and confirmed running
|
||||
# across the live fleet. So `:latest` == "current prod image",
|
||||
# and any consumer that pulls :latest (legacy callers, manual
|
||||
# `docker pull`, a redeploy that somehow still resolves "latest")
|
||||
# gets the blessed image instead of whatever happened to build.
|
||||
#
|
||||
# Re-tag is digest-level (imagetools create), so no rebuild and
|
||||
# :latest is byte-identical to :staging-<sha> for this commit.
|
||||
# Gate on supersede: a superseded older job must NOT move :latest backward
|
||||
# to its older image (#2213 — 275383 promoted :latest → the older
|
||||
# staging-7a72516 after a newer job had already shipped). :latest must only
|
||||
# ever advance under the job that owns main's head.
|
||||
- name: Promote :latest to the verified prod image
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
|
||||
env:
|
||||
TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
|
||||
STAGING_TENANT_IMAGE_NAME: ${{ env.STAGING_TENANT_IMAGE_NAME }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
run: |
|
||||
set -euo pipefail
|
||||
SHA_TAG="staging-${GITHUB_SHA::7}"
|
||||
PROD_ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
|
||||
STAGING_ECR_REGISTRY="${STAGING_TENANT_IMAGE_NAME%%/*}"
|
||||
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${PROD_ECR_REGISTRY}"
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${STAGING_ECR_REGISTRY}"
|
||||
|
||||
# imagetools create copies the source manifest to the new tag by
|
||||
# digest (no pull/rebuild). :latest now points at the exact image
|
||||
# that just passed the prod gate.
|
||||
docker buildx imagetools create \
|
||||
--tag "${TENANT_IMAGE_NAME}:latest" \
|
||||
"${TENANT_IMAGE_NAME}:${SHA_TAG}"
|
||||
docker buildx imagetools create \
|
||||
--tag "${STAGING_TENANT_IMAGE_NAME}:latest" \
|
||||
"${STAGING_TENANT_IMAGE_NAME}:${SHA_TAG}"
|
||||
|
||||
{
|
||||
echo ""
|
||||
echo "### :latest promoted"
|
||||
echo ""
|
||||
echo "Re-pointed \`platform-tenant:latest\` → \`${SHA_TAG}\` (prod + staging ECR)."
|
||||
echo ":latest now tracks the prod-blessed, fleet-verified image."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
@@ -60,7 +60,6 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# bp-required: pending #718 — soak-then-promote, not in BP yet.
|
||||
compare:
|
||||
name: Compare synced providers.yaml against controlplane canonical
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -67,7 +67,6 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# bp-required: pending #718 — soak-then-promote, not in BP yet.
|
||||
verify:
|
||||
name: Regenerate providers artifact and fail on drift
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -60,26 +60,11 @@ test.describe("MobileChat", () => {
|
||||
|
||||
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
// Reload and deterministically wait for the chat-history GET that
|
||||
// rehydrates the transcript to come back 2xx, rather than racing a
|
||||
// fixed-timeout render assertion against an in-flight fetch. The
|
||||
// server now persists the a2a_receive row SYNCHRONOUSLY before the
|
||||
// send's 200 (workspace-server logA2ASuccess), so the row is
|
||||
// guaranteed present by the time this GET runs — the wait is for
|
||||
// hydration latency, not for a still-racing write.
|
||||
const historyResponse = page.waitForResponse(
|
||||
(resp) =>
|
||||
resp.url().includes("/chat-history") &&
|
||||
resp.request().method() === "GET" &&
|
||||
resp.status() === 200,
|
||||
{ timeout: 15_000 },
|
||||
);
|
||||
await page.reload();
|
||||
await page.waitForSelector("[data-testid='chat-panel']", { timeout: 10_000 });
|
||||
await historyResponse;
|
||||
|
||||
await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible();
|
||||
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible();
|
||||
await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible({ timeout: 5_000 });
|
||||
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 5_000 });
|
||||
});
|
||||
|
||||
test("composer auto-grows with multi-line text", async ({ page }) => {
|
||||
|
||||
@@ -241,14 +241,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
name: "E2E Canvas Test",
|
||||
runtime: "hermes",
|
||||
tier: 2,
|
||||
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
|
||||
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
|
||||
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
|
||||
// platform). The old `gpt-4o` was never a registered hermes model and
|
||||
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
|
||||
// defaults closed to platform_managed (see the boot-shape note below),
|
||||
// so a platform-namespaced model id is the registry-correct choice.
|
||||
model: "moonshot/kimi-k2.6",
|
||||
model: "gpt-4o",
|
||||
}),
|
||||
});
|
||||
if (ws.status >= 400 || !ws.body?.id) {
|
||||
@@ -257,38 +250,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
const workspaceId = ws.body.id as string;
|
||||
console.log(`[staging-setup] Workspace created: ${workspaceId}`);
|
||||
|
||||
// 6. Wait for workspace RENDERABLE.
|
||||
//
|
||||
// This harness exists to verify the canvas *tab UI* renders (staging-
|
||||
// tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard
|
||||
// crash / no "Failed to load" toast). It does NOT exercise the agent —
|
||||
// no LLM call is made, the spec even mocks /cp/auth/me and 401→200. All
|
||||
// it needs is a workspace ROW that the canvas lists so the node renders
|
||||
// and the side-panel tabs open. A fully-`online` agent is NOT required.
|
||||
//
|
||||
// That distinction became load-bearing on 2026-06-03: workspace-server
|
||||
// #2162 (fix(provision): platform-managed workspace must fail-closed when
|
||||
// CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
|
||||
// with MISSING_PLATFORM_PROXY when MOLECULE_LLM_BASE_URL /
|
||||
// MOLECULE_LLM_USAGE_TOKEN are not present in the tenant's env. The
|
||||
// canvas E2E creates a bare hermes/moonshot platform workspace, which defaults
|
||||
// closed to platform_managed (workspace_provision.go:~1009), and the
|
||||
// staging tenant does not carry the CP proxy env — so the agent never
|
||||
// starts. Pre-#2162 this same workspace booted credential-less (the bug
|
||||
// #2162 fixed) and the tabs rendered fine; #2162 is a correct production
|
||||
// safety fix, but it surfaced here as `status:"failed", uptime_seconds:0,
|
||||
// last_sample_error:null` — the pre-start credential-abort shape — and the
|
||||
// old hard-throw turned a UI-irrelevant boot skip into a main-red
|
||||
// (core#2199). The agent boot stage is simply not what this test gates.
|
||||
//
|
||||
// So: online is the happy path. A `failed` row that is the PRE-START
|
||||
// credential-abort shape (the agent process never ran: uptime_seconds==0
|
||||
// AND no last_sample_error) is treated as RENDERABLE — the row exists,
|
||||
// the node + tabs render, proceed. We do NOT mask a real boot regression:
|
||||
// any `failed` carrying a last_sample_error, OR a non-zero uptime (the
|
||||
// agent started then crashed — image pull, panic, PYTHONPATH, etc.),
|
||||
// still hard-throws. Genuine *infra* provision failure is already caught
|
||||
// loud one step earlier at the org level (instance_status === "failed").
|
||||
// 6. Wait for workspace online
|
||||
await waitFor<boolean>(
|
||||
async () => {
|
||||
const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
|
||||
@@ -297,24 +259,6 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
if (r.status !== 200) return null;
|
||||
if (r.body?.status === "online") return true;
|
||||
if (r.body?.status === "failed") {
|
||||
const uptime = Number(r.body?.uptime_seconds ?? 0);
|
||||
const sampleErr = r.body?.last_sample_error;
|
||||
const preStartCredentialAbort = uptime === 0 && !sampleErr;
|
||||
if (preStartCredentialAbort) {
|
||||
// Agent never started (no LLM cred on this staging tenant — the
|
||||
// expected #2162 platform-proxy gap). The workspace row still
|
||||
// renders, which is all the tab-UI test needs. Proceed, but log
|
||||
// loudly so a real "agent never booted because of something else"
|
||||
// is not silently normalized.
|
||||
console.warn(
|
||||
`[staging-setup] workspace ${workspaceId} is 'failed' with the pre-start ` +
|
||||
`credential-abort shape (uptime_seconds=0, no last_sample_error) — agent did ` +
|
||||
`not boot (expected on staging without CP LLM proxy env, post workspace-server ` +
|
||||
`#2162). The tab-UI test does not exercise the agent; proceeding with the ` +
|
||||
`workspace row, which renders regardless. full body: ${JSON.stringify(r.body)}`,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
// last_sample_error is often empty when the failure happens before
|
||||
// the agent emits a sample (e.g. boot crash, image pull error,
|
||||
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
|
||||
@@ -322,8 +266,8 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
// needs without a second probe. Otherwise this propagates as a
|
||||
// bare "Workspace failed: " — the exact useless message that
|
||||
// sent #2632 to the issue tracker.
|
||||
const detail = sampleErr
|
||||
? sampleErr
|
||||
const detail = r.body.last_sample_error
|
||||
? r.body.last_sample_error
|
||||
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
|
||||
throw new Error(`Workspace failed: ${detail}`);
|
||||
}
|
||||
@@ -333,7 +277,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
10_000,
|
||||
"workspace online",
|
||||
);
|
||||
console.log(`[staging-setup] Workspace renderable`);
|
||||
console.log(`[staging-setup] Workspace online`);
|
||||
|
||||
// 7. Hand state off to tests + teardown — overwrite the slug-only
|
||||
// bootstrap state with the full state spec tests need.
|
||||
|
||||
@@ -8,12 +8,9 @@ import { ExternalConnectModal, type ExternalConnectionInfo } from "./ExternalCon
|
||||
import {
|
||||
ProviderModelSelector,
|
||||
buildProviderCatalog,
|
||||
buildProviderCatalogFromRegistry,
|
||||
findProviderForModel,
|
||||
type SelectorModel,
|
||||
type SelectorValue,
|
||||
type RegistryProvider,
|
||||
type RegistryModel,
|
||||
} from "./ProviderModelSelector";
|
||||
|
||||
interface WorkspaceOption {
|
||||
@@ -35,16 +32,6 @@ interface TemplateSpec {
|
||||
model?: string;
|
||||
models?: SelectorModel[];
|
||||
providers?: string[];
|
||||
// internal#718 P3 registry-served fields (additive; absent on older
|
||||
// backends and for non-registry runtimes). When registry_backed is true the
|
||||
// provider→model catalog is built from registry_providers/registry_models so
|
||||
// each model's DERIVED provider (e.g. moonshot/kimi-k2.6 → "platform") drives
|
||||
// the dropdown bucket and the create payload's llm_provider — instead of the
|
||||
// legacy inferVendor heuristic that slash-splits the id into "moonshot".
|
||||
// Mirrors ConfigTab's RuntimeOption loader (RFC#340 Fix C).
|
||||
registry_backed?: boolean;
|
||||
registry_providers?: RegistryProvider[];
|
||||
registry_models?: RegistryModel[];
|
||||
}
|
||||
|
||||
const DEFAULT_RUNTIME = "claude-code";
|
||||
@@ -181,53 +168,15 @@ export function CreateWorkspaceButton() {
|
||||
}),
|
||||
[runtime, templateSpecs],
|
||||
);
|
||||
// The /templates row backing the LLM picker: an explicitly-selected
|
||||
// workspace template wins, else the base runtime template row.
|
||||
const llmSourceSpec = useMemo<TemplateSpec | null>(
|
||||
() => selectedTemplateSpec ?? selectedRuntimeTemplateSpec,
|
||||
const llmModels = useMemo(
|
||||
() => {
|
||||
const sourceSpec = selectedTemplateSpec ?? selectedRuntimeTemplateSpec;
|
||||
if (!sourceSpec?.models?.length) return [];
|
||||
return sourceSpec.models;
|
||||
},
|
||||
[selectedRuntimeTemplateSpec, selectedTemplateSpec],
|
||||
);
|
||||
// internal#718 P3 / RFC#340 Fix C: a runtime is registry-backed when the
|
||||
// /templates row says so AND it served a non-empty registry_models set.
|
||||
// Mirrors ConfigTab's `registryBacked` derivation exactly.
|
||||
const registryBacked = useMemo(
|
||||
() =>
|
||||
llmSourceSpec?.registry_backed === true &&
|
||||
(llmSourceSpec.registry_models?.length ?? 0) > 0,
|
||||
[llmSourceSpec],
|
||||
);
|
||||
// Models fed to the selector dropdown. For a registry-backed runtime use the
|
||||
// registry-served native set, carrying each model's DERIVED provider so the
|
||||
// selector buckets it correctly (moonshot/kimi-k2.6 → "platform", not the
|
||||
// inferVendor "moonshot"). Otherwise fall back to the template-served
|
||||
// models[] + the legacy heuristic — same fallback ConfigTab keeps.
|
||||
const llmModels = useMemo<SelectorModel[]>(
|
||||
() => {
|
||||
if (registryBacked) {
|
||||
return (llmSourceSpec?.registry_models ?? []).map((m) => ({
|
||||
id: m.id,
|
||||
name: m.name,
|
||||
...(m.provider ? { provider: m.provider } : {}),
|
||||
}));
|
||||
}
|
||||
return llmSourceSpec?.models?.length ? llmSourceSpec.models : [];
|
||||
},
|
||||
[registryBacked, llmSourceSpec],
|
||||
);
|
||||
// Registry-backed path: build the catalog from registry_providers/
|
||||
// registry_models so dropdown labels + billing + the derived provider come
|
||||
// from the provider-registry SSOT (restores the "Platform" bucket). Legacy
|
||||
// path: re-infer from models[] via buildProviderCatalog (inferVendor).
|
||||
const llmCatalog = useMemo(
|
||||
() =>
|
||||
registryBacked
|
||||
? buildProviderCatalogFromRegistry(
|
||||
llmSourceSpec?.registry_providers ?? [],
|
||||
llmSourceSpec?.registry_models ?? [],
|
||||
)
|
||||
: buildProviderCatalog(llmModels),
|
||||
[registryBacked, llmSourceSpec, llmModels],
|
||||
);
|
||||
const llmCatalog = useMemo(() => buildProviderCatalog(llmModels), [llmModels]);
|
||||
const selectedLLMProvider = useMemo(
|
||||
() => llmCatalog.find((p) => p.id === llmSelection.providerId) ?? llmCatalog[0],
|
||||
[llmCatalog, llmSelection.providerId],
|
||||
@@ -235,7 +184,7 @@ export function CreateWorkspaceButton() {
|
||||
|
||||
useEffect(() => {
|
||||
if (llmCatalog.length === 0) return;
|
||||
const sourceDefault = llmSourceSpec?.model?.trim();
|
||||
const sourceDefault = (selectedTemplateSpec ?? selectedRuntimeTemplateSpec)?.model?.trim();
|
||||
const platformProvider = llmCatalog.find((p) => p.vendor === "platform");
|
||||
const matched = sourceDefault ? findProviderForModel(llmCatalog, sourceDefault) : null;
|
||||
const next = platformProvider ?? matched ?? llmCatalog[0];
|
||||
@@ -248,7 +197,7 @@ export function CreateWorkspaceButton() {
|
||||
envVars: next.envVars,
|
||||
});
|
||||
setLLMSecret("");
|
||||
}, [llmCatalog, llmSourceSpec]);
|
||||
}, [llmCatalog, selectedRuntimeTemplateSpec, selectedTemplateSpec]);
|
||||
|
||||
// Reset form and load workspaces whenever dialog opens
|
||||
useEffect(() => {
|
||||
@@ -512,7 +461,6 @@ export function CreateWorkspaceButton() {
|
||||
</div>
|
||||
<ProviderModelSelector
|
||||
models={llmModels}
|
||||
catalog={registryBacked ? llmCatalog : undefined}
|
||||
value={llmSelection}
|
||||
onChange={(next) => {
|
||||
setLLMSelection(next);
|
||||
|
||||
@@ -454,128 +454,6 @@ describe("CreateWorkspaceDialog — dynamic runtime provider picker", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Registry-backed provider catalog (RFC#340 Fix C)
|
||||
//
|
||||
// Regression guard for the mis-bucketing bug: when a registry-backed
|
||||
// claude-code template serves `moonshot/kimi-k2.6` whose DERIVED provider is
|
||||
// `platform`, the dialog must build the dropdown from registry_providers/
|
||||
// registry_models (buildProviderCatalogFromRegistry) — NOT the legacy
|
||||
// inferVendor heuristic which slash-splits the id into "moonshot". The
|
||||
// distinguishing trait of this fixture: the plain `models[]` array does NOT
|
||||
// carry an explicit `provider` field, so the LEGACY path would bucket the
|
||||
// model under "moonshot" and send llm_provider:"moonshot". Only the
|
||||
// registry-backed path yields the Platform bucket + llm_provider:"platform".
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// claude-code template whose plain models[] is UN-annotated (no explicit
|
||||
// provider). The derived-provider annotation lives ONLY in registry_models.
|
||||
const REGISTRY_TEMPLATE = {
|
||||
id: "claude-code-default",
|
||||
name: "Claude Code Agent",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
// Legacy fields — note: NO explicit provider on the platform model, so the
|
||||
// legacy inferVendor path would slash-split it into "moonshot".
|
||||
providers: ["platform", "minimax", "anthropic"],
|
||||
models: [
|
||||
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", required_env: [] },
|
||||
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
|
||||
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
|
||||
],
|
||||
// Registry-served SSOT (internal#718 P3). DeriveProvider resolved
|
||||
// moonshot/kimi-k2.6 → "platform"; MiniMax-M2.7 → "minimax".
|
||||
registry_backed: true,
|
||||
registry_providers: [
|
||||
{ name: "platform", display_name: "Platform", auth_env: [], billing_mode: "platform_managed" },
|
||||
{ name: "minimax", display_name: "MiniMax", auth_env: ["MINIMAX_API_KEY"], billing_mode: "byok" },
|
||||
{ name: "anthropic", display_name: "Anthropic API", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "byok" },
|
||||
],
|
||||
registry_models: [
|
||||
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", billing_mode: "platform_managed" },
|
||||
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", provider: "minimax", billing_mode: "byok" },
|
||||
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", provider: "anthropic", billing_mode: "byok" },
|
||||
],
|
||||
};
|
||||
|
||||
describe("CreateWorkspaceDialog — registry-backed provider catalog (RFC#340 Fix C)", () => {
|
||||
beforeEach(() => {
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
if (url === "/templates") {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return [REGISTRY_TEMPLATE] as any;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return SAMPLE_WORKSPACES as any;
|
||||
});
|
||||
});
|
||||
|
||||
it("shows the Platform provider bucket for the registry-backed claude-code runtime", async () => {
|
||||
await openDialog();
|
||||
const providerSelect = await waitFor(() => {
|
||||
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
|
||||
expect(sel).toBeTruthy();
|
||||
return sel;
|
||||
});
|
||||
const labels = Array.from(providerSelect.options).map((o) => o.text.trim());
|
||||
// Registry display_name "Platform" appears — NOT "moonshot" from the
|
||||
// legacy slash-split heuristic.
|
||||
expect(labels).toContain("Platform");
|
||||
expect(labels).not.toContain("moonshot");
|
||||
// Bucket id is the registry-keyed id, vendor is the bare provider name.
|
||||
const values = Array.from(providerSelect.options).map((o) => o.value);
|
||||
expect(values).toContain("registry|platform");
|
||||
});
|
||||
|
||||
it("sends llm_provider: platform (not moonshot) for moonshot/kimi-k2.6", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
|
||||
target: { value: "Kimi Agent" },
|
||||
});
|
||||
// Wait for the registry default to settle on the Platform bucket + model.
|
||||
await waitFor(() => {
|
||||
const modelSelect = document.querySelector("[data-testid='model-select']") as HTMLSelectElement;
|
||||
expect(modelSelect?.value).toBe("moonshot/kimi-k2.6");
|
||||
});
|
||||
|
||||
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
|
||||
fireEvent.click(createBtn!);
|
||||
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.model).toBe("moonshot/kimi-k2.6");
|
||||
expect(body.llm_provider).toBe("platform");
|
||||
// Platform is auth-env-free → no BYOK secret.
|
||||
expect(body.secrets).toBeUndefined();
|
||||
});
|
||||
|
||||
it("buckets MiniMax-M2.7 under its derived provider and sends llm_provider: minimax", async () => {
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
|
||||
target: { value: "MiniMax Agent" },
|
||||
});
|
||||
await waitFor(() => {
|
||||
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
|
||||
expect(Array.from(sel.options).map((o) => o.value)).toContain("registry|minimax");
|
||||
});
|
||||
fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
|
||||
target: { value: "registry|minimax" },
|
||||
});
|
||||
fireEvent.change(document.getElementById("llm-secret-input") as HTMLInputElement, {
|
||||
target: { value: "sk-minimax-test" },
|
||||
});
|
||||
|
||||
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
|
||||
fireEvent.click(createBtn!);
|
||||
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.model).toBe("MiniMax-M2.7");
|
||||
expect(body.llm_provider).toBe("minimax");
|
||||
expect(body.secrets).toEqual({ MINIMAX_API_KEY: "sk-minimax-test" });
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// budget_limit field tests (#541)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import type RFB from "@novnc/novnc";
|
||||
|
||||
@@ -33,11 +33,6 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
const [controlBusy, setControlBusy] = useState(false);
|
||||
const [sessionUrl, setSessionUrl] = useState<string | null>(null);
|
||||
const requestGeneration = useRef(0);
|
||||
// Freshest signed session URL (token bound to the lease's expires_at). The
|
||||
// renewal timer keeps this current WITHOUT swapping the live stream's
|
||||
// sessionUrl (which would needlessly reconnect the desktop); the stream uses
|
||||
// it only when it has to reconnect after an unclean drop.
|
||||
const latestSessionUrlRef = useRef<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const generation = requestGeneration.current + 1;
|
||||
@@ -46,7 +41,6 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
setStatus(null);
|
||||
setControl(null);
|
||||
setSessionUrl(null);
|
||||
latestSessionUrlRef.current = null;
|
||||
setError(null);
|
||||
setControlError(null);
|
||||
setControlBusy(false);
|
||||
@@ -75,41 +69,6 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
};
|
||||
}, [workspaceId]);
|
||||
|
||||
// Acquire (or re-acquire) the display-control lease as the current holder.
|
||||
// Re-acquiring extends the 300s server-side lock AND returns a freshly-signed
|
||||
// session URL (token bound to the new expires_at). Used both to renew the
|
||||
// lease on a timer and to mint a non-stale token for each reconnect — a
|
||||
// cached URL can be past its ~300s expiry, which would make a reconnect 401.
|
||||
const reacquireSession = useCallback(async (): Promise<string | null> => {
|
||||
const generation = requestGeneration.current;
|
||||
try {
|
||||
const next = await api.post<DisplayControlStatus>(
|
||||
`/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ controller: "user", ttl_seconds: 300 },
|
||||
);
|
||||
if (requestGeneration.current !== generation) return null;
|
||||
setControl(next);
|
||||
if (next.session_url) latestSessionUrlRef.current = next.session_url;
|
||||
return next.session_url ?? null;
|
||||
} catch {
|
||||
// Transient failure, or another holder took over: the live stream keeps
|
||||
// running on its existing connection; a reconnect re-evaluates control.
|
||||
return null;
|
||||
}
|
||||
}, [workspaceId]);
|
||||
|
||||
// Renew the lease while we hold it. The lock is a 300s lease with no
|
||||
// server-side auto-renewal, so without this the control (and the session
|
||||
// token) silently expire mid-session — the user appears "kicked" every ~5
|
||||
// minutes. We renew well inside the TTL and do not touch the live stream.
|
||||
useEffect(() => {
|
||||
if (!sessionUrl) return;
|
||||
const timer = setInterval(() => {
|
||||
void reacquireSession();
|
||||
}, 120_000);
|
||||
return () => clearInterval(timer);
|
||||
}, [sessionUrl, reacquireSession]);
|
||||
|
||||
const acquireControl = async () => {
|
||||
const generation = requestGeneration.current;
|
||||
const controlPath = `/workspaces/${workspaceId}/display/control`;
|
||||
@@ -123,7 +82,6 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControl(next);
|
||||
setSessionUrl(next.session_url || null);
|
||||
latestSessionUrlRef.current = next.session_url || null;
|
||||
} catch (err) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControlError("Failed to take control");
|
||||
@@ -150,7 +108,6 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControl(next);
|
||||
setSessionUrl(null);
|
||||
latestSessionUrlRef.current = null;
|
||||
} catch (err) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControlError("Failed to release control");
|
||||
@@ -278,11 +235,7 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
/>
|
||||
</div>
|
||||
{sessionUrl ? (
|
||||
<DesktopStream
|
||||
sessionUrl={sessionUrl}
|
||||
latestSessionUrlRef={latestSessionUrlRef}
|
||||
reacquireSession={reacquireSession}
|
||||
/>
|
||||
<DesktopStream sessionUrl={sessionUrl} />
|
||||
) : (
|
||||
<div className="flex flex-1 items-center justify-center p-8 text-center">
|
||||
<div>
|
||||
@@ -358,15 +311,7 @@ function DisplayControlBar({
|
||||
);
|
||||
}
|
||||
|
||||
function DesktopStream({
|
||||
sessionUrl,
|
||||
latestSessionUrlRef,
|
||||
reacquireSession,
|
||||
}: {
|
||||
sessionUrl: string;
|
||||
latestSessionUrlRef: { current: string | null };
|
||||
reacquireSession: () => Promise<string | null>;
|
||||
}) {
|
||||
function DesktopStream({ sessionUrl }: { sessionUrl: string }) {
|
||||
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||
const rfbRef = useRef<RFB | null>(null);
|
||||
const [streamError, setStreamError] = useState<string | null>(null);
|
||||
@@ -384,37 +329,20 @@ function DesktopStream({
|
||||
clipboardTimer = setTimeout(() => setClipboardStatus(null), 2500);
|
||||
};
|
||||
|
||||
let attempts = 0;
|
||||
let retryTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
const maxAttempts = 10;
|
||||
|
||||
async function connect(reacquire = false) {
|
||||
async function connect() {
|
||||
setStreamError(null);
|
||||
try {
|
||||
// On a reconnect, mint a fresh lease + token first — the original token
|
||||
// is only ~300s, so a cached URL can be expired and would 401. The
|
||||
// initial connect already holds a fresh token from acquireControl.
|
||||
if (reacquire) await reacquireSession();
|
||||
const mod = await import("@novnc/novnc");
|
||||
if (cancelled || !containerRef.current) return;
|
||||
const stream = displayWebSocketConnection(latestSessionUrlRef.current || sessionUrl);
|
||||
const stream = displayWebSocketConnection(sessionUrl);
|
||||
rfb = new mod.default(containerRef.current, stream.url, {
|
||||
wsProtocols: ["binary", `molecule-display-token.${stream.token}`],
|
||||
});
|
||||
rfbRef.current = rfb;
|
||||
rfb.scaleViewport = true;
|
||||
// Do NOT request a server-side resize: the workspace display runs a
|
||||
// fixed Xorg modeline and x11vnc rejects SetDesktopSize ("Resize is
|
||||
// administratively prohibited"), which spams the console on every
|
||||
// (re)connect. scaleViewport already fits the fixed framebuffer to the
|
||||
// container client-side, so we don't need the server to resize.
|
||||
rfb.resizeSession = false;
|
||||
rfb.resizeSession = true;
|
||||
rfb.focusOnClick = true;
|
||||
rfb.focus({ preventScroll: true });
|
||||
rfb.addEventListener("connect", () => {
|
||||
attempts = 0;
|
||||
if (!cancelled) setStreamError(null);
|
||||
});
|
||||
rfb.addEventListener("clipboard", (event: Event) => {
|
||||
const text = (event as CustomEvent<{ text?: string }>).detail?.text ?? "";
|
||||
if (!text) return;
|
||||
@@ -425,20 +353,7 @@ function DesktopStream({
|
||||
});
|
||||
rfb.addEventListener("disconnect", (event: Event) => {
|
||||
const detail = (event as CustomEvent<{ clean?: boolean }>).detail;
|
||||
rfbRef.current = null;
|
||||
if (cancelled || detail?.clean) return;
|
||||
// Auto-reconnect after an unclean drop (idle/network blip, brief
|
||||
// agent hiccup); bounded backoff so a genuinely-dead session still
|
||||
// surfaces an error instead of looping forever.
|
||||
if (attempts < maxAttempts) {
|
||||
attempts += 1;
|
||||
setStreamError(`Reconnecting to desktop… (attempt ${attempts})`);
|
||||
retryTimer = setTimeout(() => {
|
||||
if (!cancelled) void connect(true);
|
||||
}, Math.min(1000 * attempts, 5000));
|
||||
} else {
|
||||
setStreamError("Desktop stream disconnected.");
|
||||
}
|
||||
if (!cancelled && !detail?.clean) setStreamError("Desktop stream disconnected.");
|
||||
});
|
||||
} catch {
|
||||
if (!cancelled) setStreamError("Desktop stream could not be opened.");
|
||||
@@ -448,12 +363,11 @@ function DesktopStream({
|
||||
connect();
|
||||
return () => {
|
||||
cancelled = true;
|
||||
if (retryTimer) clearTimeout(retryTimer);
|
||||
if (clipboardTimer) clearTimeout(clipboardTimer);
|
||||
rfbRef.current = null;
|
||||
rfb?.disconnect();
|
||||
};
|
||||
}, [sessionUrl, reacquireSession, latestSessionUrlRef]);
|
||||
}, [sessionUrl]);
|
||||
|
||||
useEffect(() => {
|
||||
const onPaste = (event: ClipboardEvent) => {
|
||||
|
||||
@@ -2,13 +2,12 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { cleanup, fireEvent, render, screen, waitFor } from "@testing-library/react";
|
||||
|
||||
const { mockGet, mockPost, mockRFBConstructor, mockRFBClipboardPasteFrom, mockRFBFocus, rfbInstances } = vi.hoisted(() => ({
|
||||
const { mockGet, mockPost, mockRFBConstructor, mockRFBClipboardPasteFrom, mockRFBFocus } = vi.hoisted(() => ({
|
||||
mockGet: vi.fn(),
|
||||
mockPost: vi.fn(),
|
||||
mockRFBConstructor: vi.fn(),
|
||||
mockRFBClipboardPasteFrom: vi.fn(),
|
||||
mockRFBFocus: vi.fn(),
|
||||
rfbInstances: [] as EventTarget[],
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
@@ -32,7 +31,6 @@ vi.mock("@novnc/novnc", () => ({
|
||||
this.url = url;
|
||||
this.options = options;
|
||||
mockRFBConstructor(target, url, options);
|
||||
rfbInstances.push(this);
|
||||
}
|
||||
clipboardPasteFrom(text: string) {
|
||||
mockRFBClipboardPasteFrom(text);
|
||||
@@ -54,7 +52,6 @@ describe("DisplayTab", () => {
|
||||
mockRFBConstructor.mockReset();
|
||||
mockRFBClipboardPasteFrom.mockReset();
|
||||
mockRFBFocus.mockReset();
|
||||
rfbInstances.length = 0;
|
||||
});
|
||||
|
||||
it("renders unavailable state for non-display workspaces", async () => {
|
||||
@@ -403,62 +400,6 @@ describe("DisplayTab", () => {
|
||||
});
|
||||
expect(screen.getByRole("button", { name: "Take control" })).toBeTruthy();
|
||||
});
|
||||
|
||||
it("auto-reconnects the desktop stream after an unclean disconnect but not a clean one", async () => {
|
||||
mockGet
|
||||
.mockResolvedValueOnce({
|
||||
available: true,
|
||||
mode: "desktop-control",
|
||||
protocol: "novnc",
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
})
|
||||
.mockResolvedValueOnce({ controller: "none" });
|
||||
// Initial acquire returns token "signed"; the reconnect re-acquire mints a
|
||||
// FRESH token "signed2" (the lock/token is only ~300s — reconnecting with a
|
||||
// cached, possibly-expired token would 401 and never recover).
|
||||
mockPost
|
||||
.mockResolvedValueOnce({
|
||||
controller: "user",
|
||||
controlled_by: "admin-token",
|
||||
expires_at: "2026-05-23T08:48:27Z",
|
||||
session_url: "/workspaces/ws-display/display/session/websockify#token=signed",
|
||||
})
|
||||
.mockResolvedValue({
|
||||
controller: "user",
|
||||
controlled_by: "admin-token",
|
||||
expires_at: "2026-05-23T08:53:27Z",
|
||||
session_url: "/workspaces/ws-display/display/session/websockify#token=signed2",
|
||||
});
|
||||
|
||||
render(<DisplayTab workspaceId="ws-display" />);
|
||||
await waitFor(() => {
|
||||
expect(screen.getByRole("button", { name: "Take control" })).toBeTruthy();
|
||||
});
|
||||
fireEvent.click(screen.getByRole("button", { name: "Take control" }));
|
||||
await waitFor(() => {
|
||||
expect(rfbInstances.length).toBe(1);
|
||||
});
|
||||
expect(mockRFBConstructor.mock.calls[0][2].wsProtocols).toContain("molecule-display-token.signed");
|
||||
|
||||
// An idle/network drop closes the websocket uncleanly. The client must
|
||||
// re-acquire a fresh token and reconnect instead of giving up — this is the
|
||||
// "disconnects every ~5 min and stays dead" report.
|
||||
rfbInstances[0].dispatchEvent(new CustomEvent("disconnect", { detail: { clean: false } }));
|
||||
await waitFor(
|
||||
() => {
|
||||
expect(rfbInstances.length).toBe(2);
|
||||
},
|
||||
{ timeout: 3000 },
|
||||
);
|
||||
// Reconnect dialed with the FRESH token, not the stale original.
|
||||
expect(mockRFBConstructor.mock.calls[1][2].wsProtocols).toContain("molecule-display-token.signed2");
|
||||
|
||||
// A clean disconnect (the user released control) must NOT reconnect.
|
||||
rfbInstances[1].dispatchEvent(new CustomEvent("disconnect", { detail: { clean: true } }));
|
||||
await new Promise((resolve) => setTimeout(resolve, 1100));
|
||||
expect(rfbInstances.length).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
function deferred<T>() {
|
||||
|
||||
+8
-21
@@ -159,28 +159,15 @@ services:
|
||||
|
||||
# --- Canvas ---
|
||||
canvas:
|
||||
# The publish-canvas-image CI workflow runs an ORDERED deploy (core#2226):
|
||||
# build → push :staging-<sha> + :staging-latest → (after green main CI)
|
||||
# re-point :latest to the verified :staging-<sha> by digest. So both tags
|
||||
# below resolve to a CI-green, reproducible build, never a raw/red one.
|
||||
#
|
||||
# Reproducible deploy: pin CANVAS_IMAGE_TAG to the immutable per-commit tag
|
||||
# the ordered deploy produced, e.g.
|
||||
# CANVAS_IMAGE_TAG=staging-<sha> docker compose pull canvas && docker compose up -d canvas
|
||||
# This makes a tenant/host deploy reproducible (resolves the standing
|
||||
# `TODO: pin canvas ECR image digest`). Unset it and the default `latest`
|
||||
# is the prod-blessed tag the ordered deploy keeps pointed at the last
|
||||
# green build — still deterministic vs. the old raw `:latest`.
|
||||
#
|
||||
# To pin by content digest instead of tag (fully immutable):
|
||||
# aws ecr describe-images --repository-name molecule-ai/canvas \
|
||||
# --image-tags staging-<sha> --region us-east-2 \
|
||||
# --query 'imageDetails[0].imageDigest' --output text
|
||||
# then set CANVAS_IMAGE_TAG=staging-<sha>@<digest> (compose passes it through).
|
||||
#
|
||||
# The publish-canvas-image CI workflow pushes a fresh image to GHCR on
|
||||
# every canvas/** merge to main. To update the running container:
|
||||
# docker compose pull canvas && docker compose up -d canvas
|
||||
# First-time local setup or testing unreleased changes — build from source:
|
||||
# docker compose build canvas && docker compose up -d canvas
|
||||
# Note: ECR images require AWS auth — `aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 153263036946.dkr.ecr.us-east-2.amazonaws.com` before pull.
|
||||
# Local dev keeps working via the `build:` context below (docker compose build canvas).
|
||||
image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:${CANVAS_IMAGE_TAG:-latest}
|
||||
# Digest-pin requires: aws ecr describe-images --repository-name molecule-ai/canvas --image-tags latest --query 'imageDetails[0].imageDigest'
|
||||
# TODO: pin canvas ECR image digest once AWS creds are available in CI.
|
||||
image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:latest
|
||||
build:
|
||||
context: ./canvas
|
||||
dockerfile: Dockerfile
|
||||
|
||||
@@ -34,6 +34,8 @@
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||
{"name": "free-beats-all", "repo": "molecule-ai/molecule-ai-org-template-free-beats-all", "ref": "main"},
|
||||
{"name": "medo-smoke", "repo": "molecule-ai/molecule-ai-org-template-medo-smoke", "ref": "main"},
|
||||
{"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
|
||||
{"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"}
|
||||
]
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
# Developer SOP — PR review gate auto-fire and stale-head handling
|
||||
|
||||
> Last updated: 2026-06-03 (cp#2159 follow-up)
|
||||
>
|
||||
> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling
|
||||
> repos using the `qa-review` + `security-review` branch-protection gates.
|
||||
|
||||
---
|
||||
|
||||
## 1. Gitea PR-head workflow-selection rule
|
||||
|
||||
**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea
|
||||
loads the workflow definition from the **PR's HEAD branch**, not from the
|
||||
base (`main`) branch.
|
||||
|
||||
This is different from GitHub Actions, where `pull_request_target` always
|
||||
loads workflows from the base branch. Gitea's behaviour means:
|
||||
|
||||
- A PR that was opened **before** the `pull_request_review` trigger was added
|
||||
to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review,
|
||||
because its HEAD still contains the old workflow YAML (no trigger).
|
||||
|
||||
- A PR that was opened **after** the trigger was added (or that has been
|
||||
rebased onto a commit containing the trigger) **WILL** auto-fire, because its
|
||||
HEAD contains the new workflow YAML.
|
||||
|
||||
### Ops implication
|
||||
|
||||
| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review |
|
||||
|---|---|
|
||||
| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. |
|
||||
| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. |
|
||||
|
||||
---
|
||||
|
||||
## 2. Standard core-PR flow (post-#2157)
|
||||
|
||||
```
|
||||
1. Author opens PR from a branch based on current main
|
||||
→ qa-review + security-review workflows run on pull_request_target
|
||||
→ status contexts post (initial eval, usually red until reviews land)
|
||||
|
||||
2. Reviewers submit real APPROVED reviews
|
||||
→ If PR head has the trigger: workflows AUTO-FIRE on pull_request_review
|
||||
→ Contexts flip green (or stay red if reviewer is not in team)
|
||||
|
||||
3. [Optional] If contexts did not flip (stale head, event lost, etc.):
|
||||
→ Anyone can comment `/qa-recheck` or `/security-recheck`
|
||||
→ sop-checklist.yml refires the evaluator (read-only, idempotent)
|
||||
|
||||
4. Both qa-review + security-review contexts are green
|
||||
→ Plain Do:merge (no force-merge needed)
|
||||
```
|
||||
|
||||
### Key point
|
||||
|
||||
The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the
|
||||
primary path. PRs cut from current main should auto-fire without manual
|
||||
intervention.
|
||||
|
||||
---
|
||||
|
||||
## 3. Diagnosing a stale head
|
||||
|
||||
If a PR has real team-member APPROVED reviews but the qa/security contexts
|
||||
remain red and no workflow run appears on the PR's "Actions" tab for the
|
||||
review event, the PR head is likely stale.
|
||||
|
||||
### Quick check
|
||||
|
||||
```bash
|
||||
# From the PR page, look at the head commit SHA, then:
|
||||
curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=<HEAD_SHA>" \
|
||||
| jq -r '.content' | base64 -d | grep -c 'pull_request_review'
|
||||
# 0 → stale head (no trigger in that version of the workflow)
|
||||
# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker)
|
||||
```
|
||||
|
||||
### Automated diagnostic
|
||||
|
||||
The test suite includes `test_gate_stale_head_diagnostic.py`, which reports
|
||||
"auto-fire impossible for this PR" when the head lacks the trigger. Run it
|
||||
in CI or locally with:
|
||||
|
||||
```bash
|
||||
PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Rebasing vs. slash-refire
|
||||
|
||||
| Approach | When to use | Trade-off |
|
||||
|---|---|---|
|
||||
| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected |
|
||||
| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head |
|
||||
|
||||
**Do not** use slash-refire as a substitute for rebasing a stale head. If the
|
||||
workflow YAML in the PR head does not contain `pull_request_review`, no amount
|
||||
of rechecking will make auto-fire work.
|
||||
|
||||
---
|
||||
|
||||
## 5. Live-fire verification
|
||||
|
||||
The `test_gate_auto_fire_live.py` regression test exercises the full runtime
|
||||
path: it submits an APPROVED review to a test PR and polls for the
|
||||
`(pull_request_target)` status contexts. It is skipped when no API token is
|
||||
available, and is intended to catch runtime non-fire that static structural
|
||||
tests (e.g. `test_gate_review_auto_fire.py`) cannot detect.
|
||||
|
||||
Run manually with:
|
||||
|
||||
```bash
|
||||
export GITEA_HOST=git.moleculesai.app
|
||||
export GITEA_TOKEN=<your-token>
|
||||
export REPO=molecule-ai/molecule-core
|
||||
export LIVEFIRE_PR_NUMBER=<test-pr-number>
|
||||
python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
|
||||
the `pull_request_review` trigger, NOT a workflow code defect)
|
||||
- #765 — static structural regression test for gate configuration
|
||||
- #2157 — merged trigger addition (`pull_request_review` types: [submitted])
|
||||
- #2020 — milestone confirming gate infrastructure is stable
|
||||
- RFC#324 — qa-review + security-review design
|
||||
@@ -23,61 +23,23 @@
|
||||
# their provider entries, otherwise the workspace boots
|
||||
# reachable but the first A2A call hits the wrong auth path.
|
||||
#
|
||||
# PLATFORM-MANAGED path (E2E_LLM_PATH=platform) — the moonshot/kimi
|
||||
# NOT_CONFIGURED regression (RFC#340 Fix A #2187):
|
||||
#
|
||||
# The branches above all exercise BYOK: a tenant key (MINIMAX/ANTHROPIC/
|
||||
# OPENAI) is injected as a workspace secret and the model id resolves to that
|
||||
# vendor's *BYOK* provider entry. That path NEVER exercises the platform arm —
|
||||
# the exact arm that booted "moonshot/kimi-k2.6" into NOT_CONFIGURED in prod,
|
||||
# because the generated config.yaml lacked the derived `provider: platform`.
|
||||
#
|
||||
# E2E_LLM_PATH=platform selects a platform-managed model id (slash-namespaced,
|
||||
# no tenant key — Molecule owns billing via the CP LLM proxy). The default is
|
||||
# "moonshot/kimi-k2.6", the headline incident combo. Override the specific
|
||||
# platform model with E2E_MODEL_SLUG. The provision branch in
|
||||
# test_staging_full_saas.sh sends NO secrets for this path (platform-managed
|
||||
# needs none), so the workspace must boot online purely on the proxy env the
|
||||
# control plane injects + the manifest-derived `provider: platform` that Fix A
|
||||
# stamps. That is the REAL boot-path assertion the deterministic unit test
|
||||
# (workspace_provision_platform_boot_test.go) cannot make.
|
||||
#
|
||||
# When E2E_MODEL_SLUG is set, it overrides this dispatch entirely — useful when
|
||||
# an operator dispatches the workflow to test a specific slug (or a specific
|
||||
# platform model id).
|
||||
# When E2E_MODEL_SLUG is set, it overrides this dispatch — useful when an
|
||||
# operator dispatches the workflow to test a specific slug.
|
||||
#
|
||||
# Unit tested by tests/e2e/test_model_slug.sh — every branch must stay
|
||||
# pinned because regressions silently mask as "Could not resolve
|
||||
# authentication method" + the synth-E2E gate goes red without naming
|
||||
# the slug-format mismatch.
|
||||
|
||||
# Default platform-managed model for the platform-boot regression path. The
|
||||
# exact id that booted NOT_CONFIGURED in prod. Must stay a member of the
|
||||
# claude-code `platform` arm in workspace-server/internal/providers/providers.yaml
|
||||
# (the deterministic suite TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel
|
||||
# enforces every member of that arm derives provider=platform). Resolved INSIDE
|
||||
# pick_model_slug via ${E2E_DEFAULT_PLATFORM_MODEL:-...} so callers can override
|
||||
# it (or unset it) without tripping `set -u`.
|
||||
E2E_DEFAULT_PLATFORM_MODEL_FALLBACK="moonshot/kimi-k2.6"
|
||||
|
||||
# Usage: pick_model_slug <runtime>
|
||||
# stdout: the slug string
|
||||
# E2E_MODEL_SLUG (env): if set + non-empty, used as-is (operator override)
|
||||
# E2E_LLM_PATH=platform (env): select the platform-managed model id
|
||||
# (E2E_DEFAULT_PLATFORM_MODEL) instead of a BYOK slug. Takes precedence over
|
||||
# the per-key BYOK branches; E2E_MODEL_SLUG still wins over everything.
|
||||
pick_model_slug() {
|
||||
local runtime="${1:-}"
|
||||
if [ -n "${E2E_MODEL_SLUG:-}" ]; then
|
||||
printf '%s' "$E2E_MODEL_SLUG"
|
||||
return 0
|
||||
fi
|
||||
# Platform-managed path: the slash-namespaced platform model, no tenant key.
|
||||
# Exercises the arm the moonshot/kimi NOT_CONFIGURED bug shipped on.
|
||||
if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
|
||||
printf '%s' "${E2E_DEFAULT_PLATFORM_MODEL:-$E2E_DEFAULT_PLATFORM_MODEL_FALLBACK}"
|
||||
return 0
|
||||
fi
|
||||
case "$runtime" in
|
||||
hermes) printf 'openai/gpt-4o' ;;
|
||||
claude-code)
|
||||
|
||||
@@ -65,28 +65,6 @@ assert_eq "claude-code + both keys → MiniMax priority" "$got" "Mini
|
||||
run_test "unknown runtime → slash-form fallback" gemini "openai/gpt-4o"
|
||||
run_test "empty runtime → slash-form fallback" "" "openai/gpt-4o"
|
||||
|
||||
# ── Platform-managed path (E2E_LLM_PATH=platform) ──
|
||||
# The moonshot/kimi NOT_CONFIGURED regression path (RFC#340 Fix A #2187).
|
||||
# Selects the slash-namespaced platform model (default moonshot/kimi-k2.6),
|
||||
# takes precedence over the per-key BYOK branches, and is itself overridden by
|
||||
# E2E_MODEL_SLUG. These pins guard the harness's ability to drive the platform
|
||||
# arm — the one the prod bug shipped on.
|
||||
echo
|
||||
echo "Test: pick_model_slug — platform-managed path (E2E_LLM_PATH=platform)"
|
||||
echo
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform pick_model_slug claude-code)
|
||||
assert_eq "claude-code + platform path → headline kimi model" "$got" "moonshot/kimi-k2.6"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_DEFAULT_PLATFORM_MODEL; E2E_LLM_PATH=platform E2E_MINIMAX_API_KEY="mx-stray" pick_model_slug claude-code)
|
||||
assert_eq "platform path beats a stray BYOK key (no mask)" "$got" "moonshot/kimi-k2.6"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG; E2E_LLM_PATH=platform E2E_DEFAULT_PLATFORM_MODEL="minimax/MiniMax-M3" pick_model_slug claude-code)
|
||||
assert_eq "platform path honours E2E_DEFAULT_PLATFORM_MODEL" "$got" "minimax/MiniMax-M3"
|
||||
|
||||
got=$(unset E2E_DEFAULT_PLATFORM_MODEL; E2E_MODEL_SLUG="anthropic/claude-opus-4-7" E2E_LLM_PATH=platform pick_model_slug claude-code)
|
||||
assert_eq "E2E_MODEL_SLUG still wins over platform path" "$got" "anthropic/claude-opus-4-7"
|
||||
|
||||
# ── Override via E2E_MODEL_SLUG ──
|
||||
# When the operator sets E2E_MODEL_SLUG, the per-runtime dispatch is
|
||||
# bypassed. Used during workflow_dispatch to A/B specific slugs.
|
||||
|
||||
@@ -234,30 +234,9 @@ elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_OPENAI_API_KEY'];print(json.dumps({'OPENAI_API_KEY':k,'OPENAI_BASE_URL':'https://api.openai.com/v1','MODEL_PROVIDER':'openai:gpt-4o','HERMES_INFERENCE_PROVIDER':'custom','HERMES_CUSTOM_BASE_URL':'https://api.openai.com/v1','HERMES_CUSTOM_API_KEY':k,'HERMES_CUSTOM_API_MODE':'chat_completions'}))")
|
||||
fi
|
||||
|
||||
# Workspace-create now enforces the MODEL_REQUIRED contract: there is NO
|
||||
# platform-side default model for a runtime (feedback_workspace_model_required_
|
||||
# no_platform_default). Every create MUST carry an explicit `model`, or the CP
|
||||
# rejects it with MODEL_REQUIRED before this gate's peer-visibility assertion
|
||||
# can run. We pick a PLATFORM-MANAGED id (Molecule owns billing — no tenant key
|
||||
# needed; this gate only needs the workspace to boot + list peers, not heavy
|
||||
# LLM work), validated against the controlplane providers SSOT
|
||||
# (internal/providers/providers.yaml runtimes.<rt>.providers[platform].models):
|
||||
# claude-code → anthropic/claude-sonnet-4-6 (platform claude model)
|
||||
# hermes/openclaw → moonshot/kimi-k2.6 (their only platform family)
|
||||
# E2E_MODEL_SLUG overrides for operator-dispatched runs.
|
||||
pv_platform_model_for_runtime() {
|
||||
if [ -n "${E2E_MODEL_SLUG:-}" ]; then printf '%s' "$E2E_MODEL_SLUG"; return 0; fi
|
||||
case "$1" in
|
||||
claude-code) printf 'anthropic/claude-sonnet-4-6' ;;
|
||||
hermes|openclaw) printf 'moonshot/kimi-k2.6' ;;
|
||||
*) printf 'moonshot/kimi-k2.6' ;;
|
||||
esac
|
||||
}
|
||||
|
||||
log "4/6 provisioning parent (claude-code) + one sibling per runtime under test..."
|
||||
PARENT_MODEL=$(pv_platform_model_for_runtime claude-code)
|
||||
P_RESP=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"pv-parent\",\"runtime\":\"claude-code\",\"model\":\"$PARENT_MODEL\",\"tier\":3,\"secrets\":$SECRETS_JSON}")
|
||||
-d "{\"name\":\"pv-parent\",\"runtime\":\"claude-code\",\"tier\":3,\"secrets\":$SECRETS_JSON}")
|
||||
PARENT_ID=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
[ -n "$PARENT_ID" ] || fail "parent create failed: $(echo "$P_RESP" | head -c 300)"
|
||||
log " PARENT_ID=$PARENT_ID"
|
||||
@@ -266,9 +245,8 @@ log " PARENT_ID=$PARENT_ID"
|
||||
declare -A WS_IDS WS_TOKENS
|
||||
ALL_WS_IDS="$PARENT_ID"
|
||||
for rt in $PV_RUNTIMES; do
|
||||
RT_MODEL=$(pv_platform_model_for_runtime "$rt")
|
||||
R=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"pv-$rt\",\"runtime\":\"$rt\",\"model\":\"$RT_MODEL\",\"tier\":2,\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
-d "{\"name\":\"pv-$rt\",\"runtime\":\"$rt\",\"tier\":2,\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
WID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
WTOK=$(echo "$R" | extract_auth_token)
|
||||
[ -n "$WID" ] || fail "$rt workspace create failed: $(printf '%s' "$R" | head -c 300)"
|
||||
|
||||
@@ -476,19 +476,7 @@ wait_workspaces_online_routable() {
|
||||
# All empty → '{}' (workspace will fail at first turn with an
|
||||
# expected, actionable auth error rather than masking the test).
|
||||
SECRETS_JSON='{}'
|
||||
# Platform-managed path (E2E_LLM_PATH=platform) — the moonshot/kimi
|
||||
# NOT_CONFIGURED regression (RFC#340 Fix A #2187). Molecule owns billing via the
|
||||
# CP LLM proxy, so the workspace needs NO tenant key: provision with empty
|
||||
# secrets and let the workspace boot purely on (a) the proxy env the control
|
||||
# plane injects + (b) the manifest-derived `provider: platform` Fix A stamps into
|
||||
# the generated config.yaml. This is the path that booted NOT_CONFIGURED in prod
|
||||
# precisely because the BYOK branches below never exercise it. We deliberately
|
||||
# skip the key-injection branches so a stray E2E_*_API_KEY in the runner env
|
||||
# cannot silently convert this into a BYOK run and mask the regression.
|
||||
if [ "${E2E_LLM_PATH:-}" = "platform" ]; then
|
||||
log " LLM path: PLATFORM-MANAGED (no tenant key; proxy + Fix A provider stamp)"
|
||||
SECRETS_JSON='{}'
|
||||
elif [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_MINIMAX_API_KEY']
|
||||
@@ -870,24 +858,6 @@ fi
|
||||
if echo "$AGENT_TEXT" | grep -qiE "exceeded your current quota|insufficient_quota"; then
|
||||
fail "A2A — PROVIDER QUOTA EXHAUSTED (NOT a platform regression). Operator action: top up MOLECULE_STAGING_OPENAI_API_KEY billing or rotate to a higher-quota org at Settings → Secrets and Variables → Actions. Tracked in #2578. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Empty-completion class — the agent runtime reached the LLM and got a
|
||||
# 2xx back, but the assistant turn carried NO text part (empty content,
|
||||
# or tool_calls/reasoning-only with no surfaced text), so the runtime
|
||||
# returns the literal "Error: message contained no text content." as its
|
||||
# reply text. Steps 0-7 passing means the platform is healthy (CP up,
|
||||
# tenant provisioned, workspace online + routable, A2A delivery e2e); the
|
||||
# break is the configured completion BACKEND returning an empty turn — a
|
||||
# model/provider-side regression, NOT a workspace-server or harness bug,
|
||||
# and NOT NOT_CONFIGURED (that fails earlier, at boot). Name it explicitly
|
||||
# so the canary alert points at the model, not the platform: a generic
|
||||
# "error-shaped response" misdirects triage to workspace-server. Observed
|
||||
# 2026-06-03/04 across every staging canary on MODEL_SLUG=MiniMax-M2 (the
|
||||
# canary default since #2710) — 100% on the parent's first cold turn,
|
||||
# identical on main's scheduled synthetic E2E and on PRs (so it is an
|
||||
# environmental backend regression, never PR-introduced).
|
||||
if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
|
||||
fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (MiniMax-M2 since #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Generic catch-all — falls through if none of the known regressions hit.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
|
||||
fail "A2A returned an error-shaped response: $AGENT_TEXT"
|
||||
@@ -1041,7 +1011,7 @@ print(json.dumps({
|
||||
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
|
||||
},
|
||||
'configuration': {'max_tokens': 32}
|
||||
'configuration': {'max_tokens': 4}
|
||||
}
|
||||
}))
|
||||
")
|
||||
|
||||
@@ -26,12 +26,11 @@ import (
|
||||
// the update cycle — no ssh, no re-provision, no ops toil.
|
||||
//
|
||||
// Contract (paired with cp-side GET /cp/tenants/config):
|
||||
//
|
||||
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
|
||||
// Authorization: Bearer <ADMIN_TOKEN>
|
||||
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
|
||||
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
|
||||
// 401 on bearer mismatch or unknown org
|
||||
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
|
||||
// Authorization: Bearer <ADMIN_TOKEN>
|
||||
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
|
||||
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
|
||||
// 401 on bearer mismatch or unknown org
|
||||
//
|
||||
// Best-effort: any failure logs and returns — main() keeps booting.
|
||||
// Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
|
||||
@@ -106,53 +105,3 @@ func refreshEnvFromCP() error {
|
||||
log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
|
||||
return nil
|
||||
}
|
||||
|
||||
// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
|
||||
// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
|
||||
// proxy-env delivery) — guaranteed CP-delivered creds reach the
|
||||
// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
|
||||
// Task #46 (watch-fail-first test).
|
||||
//
|
||||
// Key set byte-matched against Researcher's verified emission in
|
||||
// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
|
||||
// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
|
||||
// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
|
||||
// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
|
||||
// (different feature surfaces — direct-to-provider fallbacks, not
|
||||
// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
|
||||
// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
|
||||
// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
|
||||
// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
|
||||
// key for direct-provider use, not the LLM proxy.
|
||||
var requiredLLMEnvVars = []string{
|
||||
"MOLECULE_LLM_USAGE_TOKEN",
|
||||
"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
|
||||
"MOLECULE_LLM_BASE_URL",
|
||||
"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
|
||||
}
|
||||
|
||||
// assertManagedTenantHasLLMEnv verifies that, when running as a
|
||||
// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
|
||||
// required LLM proxy env vars are populated after refreshEnvFromCP.
|
||||
//
|
||||
// Self-hosted (no orgID/adminToken) is exempt — dev must not be
|
||||
// blocked here. Managed tenants with missing LLM keys fail with
|
||||
// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
|
||||
// creds. Caller in main.go decides whether to log and continue or
|
||||
// log.Fatalf depending on deployment context.
|
||||
func assertManagedTenantHasLLMEnv() error {
|
||||
if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
|
||||
// Self-hosted dev / not yet provisioned — not a managed tenant.
|
||||
return nil
|
||||
}
|
||||
var missing []string
|
||||
for _, k := range requiredLLMEnvVars {
|
||||
if os.Getenv(k) == "" {
|
||||
missing = append(missing, k)
|
||||
}
|
||||
}
|
||||
if len(missing) > 0 {
|
||||
return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -60,138 +59,6 @@ func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: watch-fail-first
|
||||
// per Researcher Task #46. When running as a managed tenant
|
||||
// (MOLECULE_ORG_ID + ADMIN_TOKEN set), missing LLM proxy env vars
|
||||
// after refreshEnvFromCP MUST surface as MISSING_CP_LLM_ENV, not be
|
||||
// silently accepted. Without this guard, a CP that loses its LLM
|
||||
// creds (e.g. during an incident) would let a tenant boot and then
|
||||
// fail later at first LLM call — worse than a loud refusal here.
|
||||
func TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
// Stub CP returns a CP response WITHOUT any of the required
|
||||
// LLM keys — simulates the failure mode where the CP side
|
||||
// dropped or never had the LLM creds for this org.
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"x","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
t.Setenv("MOLECULE_ORG_ID", "org-managed-1")
|
||||
t.Setenv("ADMIN_TOKEN", "admin-tok")
|
||||
t.Setenv("MOLECULE_CP_URL", srv.URL)
|
||||
// Clear all LLM keys to simulate the boot-without-LLM-env failure mode.
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
|
||||
|
||||
// refreshEnvFromCP itself should succeed — CP is reachable, returned 200.
|
||||
if err := refreshEnvFromCP(); err != nil {
|
||||
t.Fatalf("refreshEnvFromCP: %v", err)
|
||||
}
|
||||
// The boot assertion must catch the missing LLM keys.
|
||||
err := assertManagedTenantHasLLMEnv()
|
||||
if err == nil {
|
||||
t.Fatal("expected MISSING_CP_LLM_ENV error for managed tenant without LLM keys, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
|
||||
t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshEnvFromCP_ManagedTenantHappyPath: when the CP returns
|
||||
// all 4 LLM-proxy keys, the gate must PASS — no MISSING_CP_LLM_ENV
|
||||
// for a properly-configured managed tenant. Watch-fail counterpart
|
||||
// to TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: if THIS test
|
||||
// ever fires MISSING_CP_LLM_ENV on the byte-correct key set, the
|
||||
// requiredLLMEnvVars list has drifted from the CP emission again.
|
||||
// Per Researcher REQUEST_CHANGES TEST ADEQUACY note.
|
||||
func TestRefreshEnvFromCP_ManagedTenantHappyPath(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
// Return ALL 4 LLM-proxy keys — names byte-matched to
|
||||
// tenant_config.go:140-144 CP emission.
|
||||
fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com","MOLECULE_LLM_ANTHROPIC_BASE_URL":"https://llm.example.com/anthropic"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
t.Setenv("MOLECULE_ORG_ID", "org-managed-happy")
|
||||
t.Setenv("ADMIN_TOKEN", "admin-tok")
|
||||
t.Setenv("MOLECULE_CP_URL", srv.URL)
|
||||
// Pre-clear so we can verify the refresh actually populated them.
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
|
||||
|
||||
if err := refreshEnvFromCP(); err != nil {
|
||||
t.Fatalf("refreshEnvFromCP: %v", err)
|
||||
}
|
||||
// Sanity: refresh actually applied the keys.
|
||||
if got := os.Getenv("MOLECULE_LLM_USAGE_TOKEN"); got != "tok-1" {
|
||||
t.Errorf("refresh did not apply USAGE_TOKEN: got %q", got)
|
||||
}
|
||||
// The boot assertion must pass — no MISSING_CP_LLM_ENV.
|
||||
if err := assertManagedTenantHasLLMEnv(); err != nil {
|
||||
t.Errorf("managed happy path must not MISSING_CP_LLM_ENV, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshEnvFromCP_ManagedTenantPartialEnv: when the CP returns
|
||||
// 3 of 4 LLM-proxy keys (one missing), the gate must STILL catch it
|
||||
// and the error must name the missing key. Per Researcher
|
||||
// REQUEST_CHANGES TEST ADEQUACY note — partial-env coverage is
|
||||
// critical because the production failure mode is usually "one
|
||||
// key dropped" not "all keys dropped".
|
||||
func TestRefreshEnvFromCP_ManagedTenantPartialEnv(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
// 3 of 4 — MOLECULE_LLM_ANTHROPIC_BASE_URL is missing.
|
||||
fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
t.Setenv("MOLECULE_ORG_ID", "org-managed-partial")
|
||||
t.Setenv("ADMIN_TOKEN", "admin-tok")
|
||||
t.Setenv("MOLECULE_CP_URL", srv.URL)
|
||||
// Pre-clear all 4 so the 3 that come back from CP are the only
|
||||
// ones set; the 4th (MOLECULE_LLM_ANTHROPIC_BASE_URL) stays empty.
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
|
||||
|
||||
if err := refreshEnvFromCP(); err != nil {
|
||||
t.Fatalf("refreshEnvFromCP: %v", err)
|
||||
}
|
||||
err := assertManagedTenantHasLLMEnv()
|
||||
if err == nil {
|
||||
t.Fatal("expected MISSING_CP_LLM_ENV for partial env (3 of 4 keys), got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
|
||||
t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
|
||||
}
|
||||
if !strings.Contains(err.Error(), "MOLECULE_LLM_ANTHROPIC_BASE_URL") {
|
||||
t.Errorf("expected error to name the missing key MOLECULE_LLM_ANTHROPIC_BASE_URL, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop: self-hosted
|
||||
// (no orgID/adminToken) must NOT block on missing LLM keys — dev
|
||||
// ergonomics matter and the assertion's contract is "managed only".
|
||||
func TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ORG_ID", "")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
|
||||
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_BASE_URL", "")
|
||||
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
|
||||
if err := assertManagedTenantHasLLMEnv(); err != nil {
|
||||
t.Errorf("self-hosted (not managed) must not block, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
|
||||
// return non-nil BUT main.go treats that as warn-and-continue. We assert
|
||||
// the function returns an error (not a panic) so the caller can log.
|
||||
|
||||
@@ -82,16 +82,6 @@ func main() {
|
||||
log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
|
||||
}
|
||||
|
||||
// Managed-tenant boot assertion (cp#469 — tenant proxy-env delivery).
|
||||
// If we're a managed SaaS tenant (orgID + adminToken set), all required
|
||||
// LLM proxy env vars must be present after refresh. Missing keys block
|
||||
// the tenant from booting with broken LLM creds — silent-fail is worse
|
||||
// than a loud refusal. Self-hosted (no orgID/adminToken) short-circuits
|
||||
// inside the assertion, so this never fires for dev.
|
||||
if err := assertManagedTenantHasLLMEnv(); err != nil {
|
||||
log.Fatalf("Managed tenant boot assertion: %v", err)
|
||||
}
|
||||
|
||||
// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
|
||||
// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
|
||||
// In any other environment, missing keys just log a warning and
|
||||
@@ -369,6 +359,7 @@ func main() {
|
||||
// (WorkspaceHandler.BootstrapFailed) wires its own capture inline.
|
||||
registry.BootFailureRescueHook = handlers.BootFailureRescueHook
|
||||
|
||||
|
||||
// Provision-timeout sweep — flips workspaces that have been stuck in
|
||||
// status='provisioning' past the timeout window to 'failed' and emits
|
||||
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
|
||||
|
||||
@@ -149,11 +149,9 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
|
||||
models.StatusFailed, msg, wsID); dbErr != nil {
|
||||
log.Printf("bundle import: failed to mark workspace %s as failed: %v", wsID, dbErr)
|
||||
}
|
||||
if bcErr := broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
|
||||
broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
|
||||
"error": msg,
|
||||
}); bcErr != nil {
|
||||
log.Printf("bundle import: failed to broadcast provision failed for %s: %v", wsID, bcErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func nilIfEmpty(s string) interface{} {
|
||||
|
||||
@@ -407,14 +407,12 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
|
||||
|
||||
// Broadcast event
|
||||
if m.broadcaster != nil {
|
||||
if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
|
||||
m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
|
||||
"channel_id": ch.ID,
|
||||
"channel_type": ch.ChannelType,
|
||||
"username": msg.Username,
|
||||
"direction": "inbound",
|
||||
}); err != nil {
|
||||
log.Printf("Channels: failed to broadcast inbound event: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -455,13 +453,11 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
|
||||
}
|
||||
|
||||
if m.broadcaster != nil {
|
||||
if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
|
||||
m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
|
||||
"channel_id": ch.ID,
|
||||
"channel_type": ch.ChannelType,
|
||||
"direction": "outbound",
|
||||
}); err != nil {
|
||||
log.Printf("Channels: failed to broadcast outbound event: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
@@ -517,9 +517,7 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
|
||||
|
||||
// Acknowledge the button press (removes loading spinner)
|
||||
ackCfg := tgbotapi.NewCallback(cb.ID, "Received")
|
||||
if _, err := bot.Send(ackCfg); err != nil {
|
||||
log.Printf("telegram: failed to send callback ack: %v", err)
|
||||
}
|
||||
bot.Send(ackCfg)
|
||||
|
||||
// Update the message to show what was clicked
|
||||
decision := "approved"
|
||||
@@ -531,9 +529,7 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
|
||||
cb.Message.MessageID,
|
||||
cb.Message.Text+"\n\n✅ CEO "+decision,
|
||||
)
|
||||
if _, err := bot.Send(editMsg); err != nil {
|
||||
log.Printf("telegram: failed to send edit message: %v", err)
|
||||
}
|
||||
bot.Send(editMsg)
|
||||
|
||||
// Route the decision as an inbound message to the agent
|
||||
inbound := &InboundMessage{
|
||||
|
||||
@@ -383,48 +383,23 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
|
||||
}
|
||||
summary := a2aMethod + " → " + wsNameForLog
|
||||
toolTrace := extractToolTrace(respBody)
|
||||
|
||||
// DATA-LOSS FIX (internal#470 / #1347 push-mode sibling): this
|
||||
// a2a_receive row is the ONLY durable record of a push-mode chat
|
||||
// round-trip — request_body carries the user's message, response_body
|
||||
// carries the agent's reply, and chat-history hydration
|
||||
// (messagestore.PostgresMessageStore) reads BOTH back to rebuild the
|
||||
// transcript on canvas reopen / reload. It MUST be written
|
||||
// SYNCHRONOUSLY, before proxyA2ARequest returns and ProxyA2A flushes
|
||||
// the 200 to the canvas — otherwise the canvas sees the reply
|
||||
// acknowledged (and rendered optimistically) while the row is still
|
||||
// racing in a detached goroutine, and a reload (or a workspace-server
|
||||
// restart / deploy / OOM) between the 200 and the goroutine's commit
|
||||
// loses the message permanently on reopen.
|
||||
//
|
||||
// This mirrors the discipline already applied to the poll-mode ingest
|
||||
// path (logA2AReceiveQueued / persistUserMessageAtIngest); the
|
||||
// push-mode counterpart was left async, which the E2E Chat
|
||||
// "history persists across reload" test surfaced as an intermittent
|
||||
// red (the reload out-raced the INSERT).
|
||||
//
|
||||
// - context.WithoutCancel: a client disconnect on chat-exit (which
|
||||
// cancels the inbound request ctx) MUST NOT abort this write.
|
||||
// - SYNCHRONOUS (no goAsync): the row must be durable before the 200.
|
||||
// - Best-effort: LogActivity logs+swallows INSERT errors internally,
|
||||
// so a DB hiccup never blocks or fails the user's send — behaviour
|
||||
// for that one request is never worse than the pre-fix async path.
|
||||
// - The post-commit ACTIVITY_LOGGED broadcast still fires inside
|
||||
// LogActivity; the durable row is the truth the canvas re-reads.
|
||||
logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
|
||||
defer cancel()
|
||||
LogActivity(logCtx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
ActivityType: "a2a_receive",
|
||||
SourceID: nilIfEmpty(callerID),
|
||||
TargetID: &workspaceID,
|
||||
Method: &a2aMethod,
|
||||
Summary: &summary,
|
||||
RequestBody: json.RawMessage(body),
|
||||
ResponseBody: json.RawMessage(respBody),
|
||||
ToolTrace: toolTrace,
|
||||
DurationMs: &durationMs,
|
||||
Status: logStatus,
|
||||
parent := ctx
|
||||
h.goAsync(func() {
|
||||
logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
|
||||
defer cancel()
|
||||
LogActivity(logCtx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
ActivityType: "a2a_receive",
|
||||
SourceID: nilIfEmpty(callerID),
|
||||
TargetID: &workspaceID,
|
||||
Method: &a2aMethod,
|
||||
Summary: &summary,
|
||||
RequestBody: json.RawMessage(body),
|
||||
ResponseBody: json.RawMessage(respBody),
|
||||
ToolTrace: toolTrace,
|
||||
DurationMs: &durationMs,
|
||||
Status: logStatus,
|
||||
})
|
||||
})
|
||||
|
||||
if callerID == "" && statusCode < 400 {
|
||||
|
||||
@@ -246,20 +246,6 @@ func MarkQueueItemFailed(ctx context.Context, id, errMsg string) {
|
||||
}
|
||||
}
|
||||
|
||||
// QueueDepth returns the number of currently-queued (not dispatched/completed)
|
||||
// items for a workspace. Used by the busy-return response body so callers
|
||||
// can see how many ahead of them.
|
||||
func QueueDepth(ctx context.Context, workspaceID string) int {
|
||||
var n int
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued'`,
|
||||
workspaceID,
|
||||
).Scan(&n); err != nil {
|
||||
log.Printf("A2AQueue: QueueDepth query failed for workspace %s: %v", workspaceID, err)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// DropStaleQueueItems marks queued items older than maxAge as 'dropped' with a
|
||||
// system-generated reason so PM agents stop processing stale post-incident noise.
|
||||
// Called with a workspaceID to scope cleanup to one workspace, or empty to sweep
|
||||
|
||||
@@ -60,10 +60,10 @@ func sanitizeErrorDetailForBroadcast(s string) string {
|
||||
}
|
||||
|
||||
type ActivityHandler struct {
|
||||
broadcaster events.EventEmitter
|
||||
broadcaster *events.Broadcaster
|
||||
}
|
||||
|
||||
func NewActivityHandler(b events.EventEmitter) *ActivityHandler {
|
||||
func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
|
||||
return &ActivityHandler{broadcaster: b}
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -54,29 +54,23 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
|
||||
"approval_id": approvalID,
|
||||
"action": body.Action,
|
||||
"reason": body.Reason,
|
||||
"task_id": body.TaskID,
|
||||
}); err != nil {
|
||||
log.Printf("approvals: failed to broadcast approval requested: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
// Auto-escalate to parent
|
||||
var parentID *string
|
||||
if err := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); err != nil {
|
||||
log.Printf("approvals: failed to lookup parent for escalation: %v", err)
|
||||
}
|
||||
db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID)
|
||||
if parentID != nil {
|
||||
if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
|
||||
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
|
||||
"approval_id": approvalID,
|
||||
"from_workspace_id": workspaceID,
|
||||
"action": body.Action,
|
||||
"reason": body.Reason,
|
||||
}); err != nil {
|
||||
log.Printf("approvals: failed to broadcast approval escalated: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusCreated, gin.H{"approval_id": approvalID, "status": "pending"})
|
||||
@@ -227,13 +221,11 @@ func (h *ApprovalsHandler) Decide(c *gin.Context) {
|
||||
eventType = "APPROVAL_DENIED"
|
||||
}
|
||||
|
||||
if err := h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
|
||||
h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
|
||||
"approval_id": approvalID,
|
||||
"decision": body.Decision,
|
||||
"decided_by": decidedBy,
|
||||
}); err != nil {
|
||||
log.Printf("approvals: failed to broadcast approval decision: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
c.JSON(http.StatusOK, gin.H{"status": body.Decision, "approval_id": approvalID})
|
||||
}
|
||||
|
||||
@@ -102,10 +102,10 @@ func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, st
|
||||
// and the A2A request runs in the background.
|
||||
type DelegationHandler struct {
|
||||
workspace *WorkspaceHandler
|
||||
broadcaster events.EventEmitter
|
||||
broadcaster *events.Broadcaster
|
||||
}
|
||||
|
||||
func NewDelegationHandler(wh *WorkspaceHandler, b events.EventEmitter) *DelegationHandler {
|
||||
func NewDelegationHandler(wh *WorkspaceHandler, b *events.Broadcaster) *DelegationHandler {
|
||||
return &DelegationHandler{workspace: wh, broadcaster: b}
|
||||
}
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -56,7 +57,10 @@ import (
|
||||
// directly rather than going through the package global.
|
||||
func integrationDB(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
url := requireIntegrationDBURL(t)
|
||||
url := os.Getenv("INTEGRATION_DB_URL")
|
||||
if url == "" {
|
||||
t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
|
||||
}
|
||||
conn, err := sql.Open("postgres", url)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
// integration_helper_test.go — shared preflight for handler Postgres
|
||||
// integration tests. Extracted so the fail-open/skip logic is in ONE place
|
||||
// and can be tightened without editing every integration test file.
|
||||
//
|
||||
// See delegation_ledger_integration_test.go for the docker-postgres setup
|
||||
// incantation used by local devs.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// requireIntegrationDBURL returns $INTEGRATION_DB_URL.
|
||||
//
|
||||
// In CI (CI, GITHUB_ACTIONS, or GITEA_ACTIONS env var is non-empty), an
|
||||
// empty URL is a fatal error — it means the workflow failed to export the
|
||||
// variable (postgres container did not start, bridge IP resolution failed,
|
||||
// or a regression in the workflow YAML). t.Fatalf keeps the test red so the
|
||||
// failure is visible; t.Skip would silently pass and mask the defect.
|
||||
//
|
||||
// Locally (none of the three CI markers set), an empty URL skips the test
|
||||
// so devs can run `go test ./...` without booting a Postgres container.
|
||||
func requireIntegrationDBURL(t *testing.T) string {
|
||||
t.Helper()
|
||||
url := os.Getenv("INTEGRATION_DB_URL")
|
||||
if url == "" {
|
||||
if os.Getenv("CI") != "" ||
|
||||
os.Getenv("GITHUB_ACTIONS") != "" ||
|
||||
os.Getenv("GITEA_ACTIONS") != "" {
|
||||
t.Fatalf("INTEGRATION_DB_URL required in CI handler integration tests — check workflow env export")
|
||||
}
|
||||
t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
|
||||
}
|
||||
return url
|
||||
}
|
||||
@@ -17,7 +17,6 @@ package handlers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -56,95 +55,3 @@ func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
|
||||
"model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)",
|
||||
model, runtime)
|
||||
}
|
||||
|
||||
// validateDerivedProviderInRegistry (issue #2172) is the provider-side companion
|
||||
// to validateRegisteredModelForRuntime. The model-side check asks "is this
|
||||
// (runtime, model) in the registry?"; the provider-side check asks "is the
|
||||
// provider this model DERIVES to — the same one the adapter will resolve at
|
||||
// boot — a known provider in providers.yaml?"
|
||||
//
|
||||
// Live trigger (adk-demo Assistant, 2026-06-03): workspace config
|
||||
// `model=moonshot/kimi-k2.6` (claude-code) → adapter derives `provider=moonshot`
|
||||
// → `ValueError: provider=moonshot not in providers registry` at BOOT. The
|
||||
// save was accepted (no validation at the API boundary), and the failure only
|
||||
// surfaced when the agent tried to register. CI never saw it. The drift gate
|
||||
// (RFC#580) validates TEMPLATES against the registry, NOT per-workspace
|
||||
// configs; the existing model-side check rejects a model the runtime doesn't
|
||||
// own but says nothing about the DERIVED provider's registry membership.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// (true, "") — pass: model is empty (MODEL_REQUIRED owns it), the
|
||||
// runtime is not in the registry (fail-open for
|
||||
// federated / non-first-party runtimes — mirror of the
|
||||
// model-side check's federation contract), the registry
|
||||
// failed to load (build-time gate owns it), OR the
|
||||
// derived provider name is a known provider in the
|
||||
// registry's `providers:` list.
|
||||
// (false, reason) — reject: a known (runtime, model) pair derives to a
|
||||
// provider name absent from the providers list. This is
|
||||
// the structural class the adk-demo boot failure belongs
|
||||
// to — the registry's `runtimes:` block references a
|
||||
// provider not declared in `providers:`, which by
|
||||
// construction is a registry-data bug. Catching it at
|
||||
// config-SAVE keeps it out of the agent-boot path.
|
||||
//
|
||||
// Defense-in-depth: by construction, a model in a runtime's native provider set
|
||||
// has a provider that IS in the catalog (the runtime ref names a provider from
|
||||
// the providers list). So the rejection path is primarily a registry-consistency
|
||||
// guard. The real value is the FAIL-LOUD semantics — any future drift between
|
||||
// `providers:` and `runtimes:` fails the create call with a clear pointer to
|
||||
// the missing provider, instead of silently wedging the agent at boot.
|
||||
func validateDerivedProviderInRegistry(runtime, model string) (bool, string) {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
return true, "" // MODEL_REQUIRED owns this.
|
||||
}
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
// Registry unavailable (build-time defect the gates catch). Fail open —
|
||||
// do not block create on a registry-load failure.
|
||||
return true, ""
|
||||
}
|
||||
// DeriveProvider is fail-closed for unknown runtimes. Mirror the
|
||||
// model-side check's federation contract: a runtime the registry does
|
||||
// NOT know (langgraph / external / kimi / mock / federated) is allowed
|
||||
// to pass through. DeriveProvider's `unknown runtime` error IS that
|
||||
// signal — treat it as fail-open, identical to ModelsForRuntime's
|
||||
// not-found behavior above.
|
||||
p, err := m.DeriveProvider(runtime, model, nil)
|
||||
if err != nil {
|
||||
// Either the runtime is unknown (fail-open by contract) OR the model
|
||||
// is not native to the runtime (the model-side validator already
|
||||
// rejected this — DeriveProvider's error here means
|
||||
// validateRegisteredModelForRuntime should have caught it. Don't
|
||||
// double-reject: pass through and let the model-side response own
|
||||
// the message).
|
||||
return true, ""
|
||||
}
|
||||
// Defense-in-depth: confirm the DERIVED provider is a known entry in the
|
||||
// providers list. By construction it should be (DeriveProvider only
|
||||
// returns a Provider that was looked up by name from `providers:`), but
|
||||
// a future federation merge could introduce a runtime ref pointing at a
|
||||
// contributed provider absent from the core catalog. Reject loudly here
|
||||
// rather than letting the save reach the agent-boot path and wedge with
|
||||
// "provider=X not in providers registry" (the original adk-demo class).
|
||||
for _, candidate := range m.Providers {
|
||||
if candidate.Name == p.Name {
|
||||
return true, ""
|
||||
}
|
||||
}
|
||||
// Build a sorted, comma-separated list of valid provider names so the
|
||||
// operator/caller sees the actionable list (the boot-time error message
|
||||
// the adk-demo class produced does NOT include this — the fix is to
|
||||
// surface it at the API boundary, where the caller can fix the request
|
||||
// without a stuck workspace + operator page).
|
||||
valid := make([]string, 0, len(m.Providers))
|
||||
for _, c := range m.Providers {
|
||||
valid = append(valid, c.Name)
|
||||
}
|
||||
sort.Strings(valid)
|
||||
return false, fmt.Sprintf(
|
||||
"derived provider %q (for model %q on runtime %q) is not in the providers registry; pick a model whose derived provider is one of: %s",
|
||||
p.Name, model, runtime, strings.Join(valid, ", "))
|
||||
}
|
||||
|
||||
@@ -6,17 +6,8 @@ package handlers
|
||||
// fail OPEN (allow) for a runtime the registry doesn't know yet (federation /
|
||||
// langgraph/etc. not in the first-party registry) so the existing knownRuntimes
|
||||
// gate stays authoritative there.
|
||||
//
|
||||
// TestValidateDerivedProviderInRegistry (issue #2172) is the provider-side
|
||||
// companion: once the model-side check passes, confirm the DERIVED provider
|
||||
// (the one the adapter will resolve at boot) is a known provider in
|
||||
// providers.yaml. Catches the adk-demo "provider=X not in providers registry"
|
||||
// class at config-SAVE time instead of letting it wedge the agent at boot.
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
import "testing"
|
||||
|
||||
func TestValidateRegisteredModelForRuntime(t *testing.T) {
|
||||
type tc struct {
|
||||
@@ -89,163 +80,3 @@ func TestValidateRegisteredModelForRuntime(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateDerivedProviderInRegistry(t *testing.T) {
|
||||
type tc struct {
|
||||
name string
|
||||
runtime string
|
||||
model string
|
||||
wantOK bool
|
||||
// wantReasonContains: a substring the rejection reason must include
|
||||
// (skipped for OK cases). Pins the actionable list / derivation pointer
|
||||
// so the caller knows which provider was missing and what the valid
|
||||
// set looks like — this is the fix that distinguishes the new gate
|
||||
// from the boot-time "provider=X not in providers registry" string
|
||||
// it replaces.
|
||||
wantReasonContains string
|
||||
}
|
||||
cases := []tc{
|
||||
// PASS — every native (runtime, model) in the catalog derives to a
|
||||
// provider that IS in the providers list. These are the live corpus
|
||||
// entries; the test pins the registry-consistency invariant.
|
||||
{
|
||||
name: "claude_code_anthropic_api_native",
|
||||
runtime: "claude-code",
|
||||
model: "claude-sonnet-4-6",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "claude_code_kimi_coding_native",
|
||||
runtime: "claude-code",
|
||||
model: "kimi-for-coding",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "claude_code_minimax_native",
|
||||
runtime: "claude-code",
|
||||
model: "MiniMax-M2.7",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "claude_code_platform_namespaced",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "codex_openai_subscription_default_arm",
|
||||
runtime: "codex",
|
||||
model: "gpt-5.5",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "codex_platform_namespaced",
|
||||
runtime: "codex",
|
||||
model: "openai/gpt-5.4-mini",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "hermes_kimi_coding",
|
||||
runtime: "hermes",
|
||||
model: "kimi-coding/kimi-k2",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "hermes_platform_namespaced",
|
||||
runtime: "hermes",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "openclaw_kimi_coding",
|
||||
runtime: "openclaw",
|
||||
model: "moonshot:kimi-k2.6",
|
||||
wantOK: true,
|
||||
},
|
||||
// FAIL — model-side validator catches this, but the provider-side
|
||||
// gate is called AFTER it in Create and inherits the fail-open
|
||||
// contract for "model is not native to runtime" (DeriveProvider
|
||||
// errors → allow, letting the model-side response own the message).
|
||||
// This is the deliberate "don't double-reject" decision.
|
||||
{
|
||||
name: "unregistered_model_pass_through_to_model_side",
|
||||
runtime: "claude-code",
|
||||
model: "totally-made-up-model-xyz",
|
||||
wantOK: true, // pass-through: model-side validator owns the rejection
|
||||
},
|
||||
// Federation contract — mirror of the model-side test above.
|
||||
{
|
||||
name: "langgraph_runtime_failopen",
|
||||
runtime: "langgraph",
|
||||
model: "anything-goes",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "external_runtime_failopen",
|
||||
runtime: "external",
|
||||
model: "whatever",
|
||||
wantOK: true,
|
||||
},
|
||||
// Empty model — MODEL_REQUIRED owns it; allow.
|
||||
{
|
||||
name: "empty_model_allowed_other_gate_owns_it",
|
||||
runtime: "claude-code",
|
||||
model: "",
|
||||
wantOK: true,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
ok, why := validateDerivedProviderInRegistry(c.runtime, c.model)
|
||||
if ok != c.wantOK {
|
||||
t.Errorf("validateDerivedProviderInRegistry(%q,%q) ok=%v want %v (reason=%q)",
|
||||
c.runtime, c.model, ok, c.wantOK, why)
|
||||
}
|
||||
if !c.wantOK && c.wantReasonContains != "" && !strings.Contains(why, c.wantReasonContains) {
|
||||
t.Errorf("rejection reason missing %q: got %q", c.wantReasonContains, why)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider walks every
|
||||
// (runtime, model) pair in the registry's native model sets and asserts each
|
||||
// one derives to a provider that IS in the providers list. This is the
|
||||
// static regression gate the issue calls for ("a CI test fails if any shipped
|
||||
// demo/template config references an unregistered provider") — generalized
|
||||
// to the catalog as a whole: if anyone edits providers.yaml such that a
|
||||
// `runtimes:` block names a provider absent from `providers:`, this test
|
||||
// fires before the bad config can reach a customer workspace.
|
||||
//
|
||||
// By construction this invariant should always hold (DeriveProvider only
|
||||
// returns a Provider that was looked up by name from `providers:`), so the
|
||||
// test primarily guards against future federation merges that introduce a
|
||||
// runtime ref pointing at a contributed provider absent from the core
|
||||
// catalog — exactly the failure shape the adk-demo Assistant wedge
|
||||
// belongs to.
|
||||
func TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider(t *testing.T) {
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
t.Skipf("providerRegistry unavailable in test env (err=%v); skipping consistency walk", err)
|
||||
}
|
||||
providerNames := make(map[string]struct{}, len(m.Providers))
|
||||
for _, p := range m.Providers {
|
||||
providerNames[p.Name] = struct{}{}
|
||||
}
|
||||
for runtimeName, runtime := range m.Runtimes {
|
||||
for _, ref := range runtime.Providers {
|
||||
for _, modelID := range ref.Models {
|
||||
p, err := m.DeriveProvider(runtimeName, modelID, nil)
|
||||
if err != nil {
|
||||
t.Errorf("catalog invariant broken: runtime=%q model=%q failed DeriveProvider: %v",
|
||||
runtimeName, modelID, err)
|
||||
continue
|
||||
}
|
||||
if _, ok := providerNames[p.Name]; !ok {
|
||||
t.Errorf("catalog invariant broken: runtime=%q model=%q derives to provider %q which is not in the providers list (refs=%q)",
|
||||
runtimeName, modelID, p.Name, ref.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@ package handlers
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -62,7 +63,10 @@ import (
|
||||
// but kept separate so each table's wipe step is local to its tests.
|
||||
func integrationDB_PendingUploads(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
url := requireIntegrationDBURL(t)
|
||||
url := os.Getenv("INTEGRATION_DB_URL")
|
||||
if url == "" {
|
||||
t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
|
||||
}
|
||||
conn, err := sql.Open("postgres", url)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
// Proper-SSOT (task #65): required_env is DERIVED from the resolved provider's
|
||||
// serving classification (IsPlatform), not hand-authored — platform injects
|
||||
// creds server-side (none required), BYOK requires its auth_env.
|
||||
func TestRequiredEnvForRegistryProvider(t *testing.T) {
|
||||
if got := requiredEnvForRegistryProvider(providers.Provider{Name: providers.PlatformProviderName}); got != nil {
|
||||
t.Errorf("platform provider requiredEnv = %v; want nil (creds injected server-side)", got)
|
||||
}
|
||||
byok := providers.Provider{Name: "google", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}}
|
||||
got := requiredEnvForRegistryProvider(byok)
|
||||
if len(got) != 2 || got[0] != "GEMINI_API_KEY" {
|
||||
t.Errorf("byok requiredEnv = %v; want its auth_env", got)
|
||||
}
|
||||
}
|
||||
@@ -176,10 +176,6 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) {
|
||||
// TestGracefulPreRestart_Success verifies that when the workspace returns 200,
|
||||
// the signal is logged as acknowledged without error.
|
||||
func TestGracefulPreRestart_Success(t *testing.T) {
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: "http://fake-agent.example/agent",
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
|
||||
// httptest server simulating the workspace container's /signals/restart_pending
|
||||
@@ -209,15 +205,18 @@ func TestGracefulPreRestart_Success(t *testing.T) {
|
||||
})
|
||||
}))
|
||||
defer srv.Close()
|
||||
hWrapper.testURL = srv.URL + "/agent"
|
||||
|
||||
// Pre-populate Redis cache with the test server URL
|
||||
_ = setupTestRedisWithURL(t, srv.URL)
|
||||
|
||||
// gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored.
|
||||
// Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore).
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
// Use a wrapper so gracefulPreRestart runs through the embedded handler.
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: srv.URL + "/agent",
|
||||
}
|
||||
|
||||
// gracefulPreRestart runs in a goroutine with its own timeout.
|
||||
// We give it time to complete before the test ends.
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
}
|
||||
@@ -225,22 +224,19 @@ func TestGracefulPreRestart_Success(t *testing.T) {
|
||||
// TestGracefulPreRestart_NotImplemented verifies that when the workspace returns
|
||||
// 404 (old SDK version), the platform proceeds gracefully (log + no error).
|
||||
func TestGracefulPreRestart_NotImplemented(t *testing.T) {
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: "http://fake-agent.example/agent",
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
hWrapper.testURL = srv.URL + "/agent"
|
||||
|
||||
_ = setupTestRedisWithURL(t, srv.URL)
|
||||
|
||||
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: srv.URL + "/agent",
|
||||
}
|
||||
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
@@ -250,18 +246,15 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) {
|
||||
// TestGracefulPreRestart_ConnectionRefused verifies that when the workspace
|
||||
// is unreachable, the platform proceeds gracefully without error.
|
||||
func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
|
||||
_ = setupTestDB(t)
|
||||
|
||||
mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999
|
||||
_ = mr
|
||||
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
testURL: "http://localhost:19999/agent",
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
|
||||
// Nothing listening on 19999 — deliberate connection failure.
|
||||
mr := setupTestRedisWithURL(t, "http://localhost:19999/agent")
|
||||
_ = mr
|
||||
|
||||
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000")
|
||||
time.Sleep(200 * time.Millisecond)
|
||||
@@ -271,17 +264,13 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
|
||||
// TestGracefulPreRestart_URLResolutionError verifies that when URL resolution
|
||||
// fails, the platform proceeds gracefully without blocking the restart.
|
||||
func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
|
||||
_ = setupTestDB(t)
|
||||
_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
|
||||
|
||||
hWrapper := &resolveURLTestWrapper{
|
||||
WorkspaceHandler: newHandlerWithTestDeps(t),
|
||||
errToReturn: context.DeadlineExceeded,
|
||||
}
|
||||
_ = setupTestDB(t)
|
||||
_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
|
||||
|
||||
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
|
||||
// This ensures goroutines (which access both DB and Redis) are drained before
|
||||
// any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps
|
||||
// so the handler holds the correct Redis client reference.
|
||||
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
|
||||
|
||||
hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
|
||||
|
||||
@@ -710,44 +710,6 @@ func (h *SecretsHandler) SetModel(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// issue #2172: validate the model against the registry before persisting.
|
||||
// Empty model clears the override — skip validation (MODEL_REQUIRED owns
|
||||
// the empty case at create time; clearing is always allowed).
|
||||
if body.Model != "" {
|
||||
var runtime string
|
||||
if err := db.DB.QueryRowContext(ctx,
|
||||
`SELECT runtime FROM workspaces WHERE id = $1`, workspaceID,
|
||||
).Scan(&runtime); err != nil {
|
||||
if err == sql.ErrNoRows {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": "workspace not found"})
|
||||
return
|
||||
}
|
||||
log.Printf("SetModel: runtime lookup failed for %s: %v", workspaceID, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to read workspace runtime"})
|
||||
return
|
||||
}
|
||||
if ok, why := validateRegisteredModelForRuntime(runtime, body.Model); !ok {
|
||||
log.Printf("SetModel: 422 UNREGISTERED_MODEL_FOR_RUNTIME (runtime=%q model=%q): %s", runtime, body.Model, why)
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": why,
|
||||
"runtime": runtime,
|
||||
"model": body.Model,
|
||||
"code": "UNREGISTERED_MODEL_FOR_RUNTIME",
|
||||
})
|
||||
return
|
||||
}
|
||||
if ok, why := validateDerivedProviderInRegistry(runtime, body.Model); !ok {
|
||||
log.Printf("SetModel: 422 DERIVED_PROVIDER_NOT_IN_REGISTRY (runtime=%q model=%q): %s", runtime, body.Model, why)
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": why,
|
||||
"runtime": runtime,
|
||||
"model": body.Model,
|
||||
"code": "DERIVED_PROVIDER_NOT_IN_REGISTRY",
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if err := setModelSecret(ctx, workspaceID, body.Model); err != nil {
|
||||
log.Printf("SetModel error: %v", err)
|
||||
if body.Model == "" {
|
||||
|
||||
@@ -546,11 +546,6 @@ func TestSecretsSetModel_Upsert(t *testing.T) {
|
||||
restartCalled := make(chan string, 1)
|
||||
handler := NewSecretsHandler(func(id string) { restartCalled <- id })
|
||||
|
||||
// Runtime lookup (issue #2172) — model is non-empty so validation fires.
|
||||
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000001").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
|
||||
|
||||
// Pin the literal 'MODEL' key in the SQL so a regression to the
|
||||
// pre-2026-05-19 'MODEL_PROVIDER' column name shows up here.
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'`).
|
||||
@@ -628,99 +623,6 @@ func TestSecretsSetModel_InvalidID(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsSetModel_UnregisteredModel_422 guards that a model not in the
|
||||
// runtime's native set is rejected at save (issue #2172 continuation).
|
||||
func TestSecretsSetModel_UnregisteredModel_422(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000003").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000003"}}
|
||||
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000003/model",
|
||||
strings.NewReader(`{"model":"totally-made-up-model-xyz"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetModel(c)
|
||||
|
||||
if w.Code != http.StatusUnprocessableEntity {
|
||||
t.Fatalf("expected 422, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
body := w.Body.String()
|
||||
if !strings.Contains(body, "UNREGISTERED_MODEL_FOR_RUNTIME") {
|
||||
t.Errorf("expected code UNREGISTERED_MODEL_FOR_RUNTIME in body, got: %s", body)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsSetModel_UnknownRuntimeFailOpen_200 verifies the federation
|
||||
// contract: a runtime absent from the registry (langgraph) passes through
|
||||
// without validation so non-first-party runtimes are not blocked.
|
||||
func TestSecretsSetModel_UnknownRuntimeFailOpen_200(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000004").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("langgraph"))
|
||||
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000004", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000004"}}
|
||||
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000004/model",
|
||||
strings.NewReader(`{"model":"any-arbitrary-model"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetModel(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsSetModel_WorkspaceNotFound_404 verifies 404 when the runtime
|
||||
// lookup finds no workspace row.
|
||||
func TestSecretsSetModel_WorkspaceNotFound_404(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewSecretsHandler(nil)
|
||||
|
||||
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000005").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "00000000-0000-0000-0000-000000000005"}}
|
||||
c.Request = httptest.NewRequest("PUT", "/workspaces/00000000-0000-0000-0000-000000000005/model",
|
||||
strings.NewReader(`{"model":"claude-sonnet-4-6"}`))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
handler.SetModel(c)
|
||||
|
||||
if w.Code != http.StatusNotFound {
|
||||
t.Fatalf("expected 404, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecretsModel_RoundTrip_KeyIsMODELNotMODEL_PROVIDER pins the
|
||||
// 2026-05-19 rename: writes via SetModel land under workspace_secrets
|
||||
// key='MODEL', and reads via GetModel hit the same key. A regression
|
||||
@@ -734,10 +636,6 @@ func TestSecretsModel_RoundTrip_KeyIsMODELNotMODEL_PROVIDER(t *testing.T) {
|
||||
handler := NewSecretsHandler(func(string) {})
|
||||
|
||||
// 1. SetModel — must hit key='MODEL' in the INSERT.
|
||||
// Runtime lookup (issue #2172) — model is non-empty so validation fires.
|
||||
mock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000099").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("codex"))
|
||||
mock.ExpectExec(`INSERT INTO workspace_secrets[\s\S]*'MODEL'[\s\S]*ON CONFLICT`).
|
||||
WithArgs("00000000-0000-0000-0000-000000000099", sqlmock.AnyArg(), sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
|
||||
@@ -44,20 +44,6 @@ func billingModeForRegistryProvider(p providers.Provider) string {
|
||||
return LLMBillingModeBYOK
|
||||
}
|
||||
|
||||
// requiredEnvForRegistryProvider derives the env vars the USER must supply for
|
||||
// a model owned by the resolved provider — the proper-SSOT single fact behind
|
||||
// the canvas "Missing API Keys" preflight (task #65). The closed platform
|
||||
// provider injects credentials server-side (the metered proxy) -> nothing
|
||||
// required; BYOK providers require their auth_env. Derived from IsPlatform +
|
||||
// AuthEnv so a template can no longer hand-author a required_env that drifts
|
||||
// from the registry's serving classification.
|
||||
func requiredEnvForRegistryProvider(p providers.Provider) []string {
|
||||
if p.IsPlatform() {
|
||||
return nil
|
||||
}
|
||||
return p.AuthEnv
|
||||
}
|
||||
|
||||
// enrichFromRegistry populates the registry-served fields on a templateSummary
|
||||
// when its runtime is known to the provider registry. It is a no-op (leaves
|
||||
// RegistryBacked=false and the registry slices nil) for a runtime the registry
|
||||
@@ -112,7 +98,6 @@ func enrichFromRegistry(summary *templateSummary, runtime string) {
|
||||
if derived, derr := m.DeriveProvider(runtime, id, nil); derr == nil {
|
||||
ms.Provider = derived.Name
|
||||
ms.BillingMode = billingModeForRegistryProvider(derived)
|
||||
ms.RequiredEnv = requiredEnvForRegistryProvider(derived)
|
||||
}
|
||||
// If DeriveProvider errors (ambiguous/overlap — a manifest defect the
|
||||
// loader's tests pin against), still serve the id without a provider
|
||||
|
||||
@@ -474,32 +474,6 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
|
||||
})
|
||||
return
|
||||
}
|
||||
// issue #2172 (provider-side companion): once the (runtime, model)
|
||||
// pair is known to be registered, confirm the DERIVED provider
|
||||
// (the one the adapter will resolve at boot) is a known provider
|
||||
// in the providers.yaml catalog. Live trigger (adk-demo Assistant,
|
||||
// 2026-06-03): the model-side check passed for a registry-resident
|
||||
// model whose derived provider name was NOT in the providers list,
|
||||
// so the save was accepted and the agent wedged at boot with
|
||||
// "provider=X not in providers registry". This check is a
|
||||
// defense-in-depth registry-consistency guard: by construction a
|
||||
// model in a runtime's native set derives to a provider that IS in
|
||||
// the catalog, so the rejection path is primarily a registry-data
|
||||
// invariant — any future drift between `providers:` and `runtimes:`
|
||||
// fails the create with a clear pointer to the missing provider
|
||||
// rather than silently wedging the agent. Fails open for runtimes
|
||||
// the registry doesn't know (langgraph/external/kimi/mock/federated
|
||||
// — the federation contract the model-side check also honors).
|
||||
if ok, why := validateDerivedProviderInRegistry(payload.Runtime, payload.Model); !ok {
|
||||
log.Printf("Create: 422 DERIVED_PROVIDER_NOT_IN_REGISTRY (runtime=%q model=%q): %s [issue #2172 hard-reject]", payload.Runtime, payload.Model, why)
|
||||
c.JSON(http.StatusUnprocessableEntity, gin.H{
|
||||
"error": why,
|
||||
"runtime": payload.Runtime,
|
||||
"model": payload.Model,
|
||||
"code": "DERIVED_PROVIDER_NOT_IN_REGISTRY",
|
||||
})
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
@@ -56,20 +56,7 @@ func PatchAbilities(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Atomic update: when both fields are supplied, apply them in one SQL
|
||||
// statement so the request is all-or-nothing (#2131). A partial mutation
|
||||
// (e.g. broadcast_enabled updated but talk_to_user_enabled failing) would
|
||||
// leave the workspace in an ambiguous capability state.
|
||||
if body.BroadcastEnabled != nil && body.TalkToUserEnabled != nil {
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET broadcast_enabled = $2, talk_to_user_enabled = $3, updated_at = now() WHERE id = $1`,
|
||||
id, *body.BroadcastEnabled, *body.TalkToUserEnabled,
|
||||
); err != nil {
|
||||
log.Printf("PatchAbilities both-fields for %s: %v", id, err)
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
|
||||
return
|
||||
}
|
||||
} else if body.BroadcastEnabled != nil {
|
||||
if body.BroadcastEnabled != nil {
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET broadcast_enabled = $2, updated_at = now() WHERE id = $1`,
|
||||
id, *body.BroadcastEnabled,
|
||||
@@ -78,7 +65,9 @@ func PatchAbilities(c *gin.Context) {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
|
||||
return
|
||||
}
|
||||
} else if body.TalkToUserEnabled != nil {
|
||||
}
|
||||
|
||||
if body.TalkToUserEnabled != nil {
|
||||
if _, err := db.DB.ExecContext(ctx,
|
||||
`UPDATE workspaces SET talk_to_user_enabled = $2, updated_at = now() WHERE id = $1`,
|
||||
id, *body.TalkToUserEnabled,
|
||||
|
||||
@@ -130,8 +130,11 @@ func TestPatchAbilities_BothFields(t *testing.T) {
|
||||
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
|
||||
WithArgs(wsUUID1, true, true).
|
||||
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
|
||||
WithArgs(wsUUID1, true).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectExec(`UPDATE workspaces SET talk_to_user_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
|
||||
WithArgs(wsUUID1, true).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
|
||||
@@ -179,25 +182,19 @@ func TestPatchAbilities_TalkToUserUpdateError(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestPatchAbilities_BothFields_UpdateError — regression for #2131. When
|
||||
// both fields are supplied the handler uses a single combined UPDATE. A
|
||||
// failure of that UPDATE must leave the workspace unchanged (atomic).
|
||||
func TestPatchAbilities_BothFields_UpdateError(t *testing.T) {
|
||||
func TestPatchAbilities_BothFields_BroadcastFails(t *testing.T) {
|
||||
mock, cleanup := withMockDB(t)
|
||||
defer cleanup()
|
||||
|
||||
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
|
||||
WithArgs(wsUUID1).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
|
||||
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
|
||||
WithArgs(wsUUID1, true, true).
|
||||
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
|
||||
WithArgs(wsUUID1, true).
|
||||
WillReturnError(errors.New("disk full"))
|
||||
|
||||
w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
|
||||
if w.Code != http.StatusInternalServerError {
|
||||
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
// Because only one UPDATE is issued, there is no partial-mutation
|
||||
// path to assert against; sqlmock implicitly verifies no second
|
||||
// exec occurred.
|
||||
}
|
||||
|
||||
@@ -95,14 +95,6 @@ func TestIntegration_BroadcastOrgRoot_NonRootSenderResolvesToRoot(t *testing.T)
|
||||
}
|
||||
})
|
||||
|
||||
// Pre-test hygiene: if a prior run crashed or was killed, its rows may
|
||||
// still be in the shared integration DB. Remove them before inserting so
|
||||
// the unique index workspaces_parent_name_uniq does not conflict.
|
||||
if _, err := conn.ExecContext(ctx,
|
||||
`DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%"); err != nil {
|
||||
t.Logf("pre-test cleanup (non-fatal): %v", err)
|
||||
}
|
||||
|
||||
rootID := uuid.New().String()
|
||||
midID := uuid.New().String()
|
||||
leafID := uuid.New().String()
|
||||
|
||||
@@ -41,6 +41,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -58,7 +59,10 @@ import (
|
||||
// only those.
|
||||
func integrationDB_WorkspaceCreateName(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
url := requireIntegrationDBURL(t)
|
||||
url := os.Getenv("INTEGRATION_DB_URL")
|
||||
if url == "" {
|
||||
t.Skip("INTEGRATION_DB_URL not set; skipping (see file header)")
|
||||
}
|
||||
conn, err := sql.Open("postgres", url)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
|
||||
@@ -613,32 +613,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
if model == "" {
|
||||
log.Printf("ensureDefaultConfig: workspace %s reached provisioning with empty model — Create handler should have rejected this; rendering empty model: \"\" in config.yaml (workspace will boot not_configured)", workspaceID)
|
||||
}
|
||||
|
||||
// Derive the provider from the providers manifest and stamp it into the
|
||||
// generated config BEFORE claude-code model normalization strips the
|
||||
// slash-prefix. DeriveProvider needs the FULL, un-normalized model id
|
||||
// (e.g. "moonshot/kimi-k2.6") for the exact-id match that resolves the
|
||||
// canvas claude-code case to provider=platform — normalizing to
|
||||
// "kimi-k2.6" first would lose that match.
|
||||
//
|
||||
// Why this exists (RFC#340 Fix A): a canvas-created claude-code workspace
|
||||
// with model "moonshot/kimi-k2.6" booted NOT_CONFIGURED — the adapter
|
||||
// derived provider="moonshot" (slash-split of the model id) which is not
|
||||
// in the providers registry. CP bakes `provider: platform` via heredoc,
|
||||
// but the cp#329 config-bundle fetch overwrites /configs/config.yaml with
|
||||
// THIS (previously providerless) bundle version, so molecule-runtime
|
||||
// config.py re-derived the wrong provider. Stamping the manifest-derived
|
||||
// provider here (mirroring CP's buildModelProviderYAML shape) makes the
|
||||
// config the adapter reads carry the canonical provider.
|
||||
//
|
||||
// Reuses the SAME manifest path the config-SAVE validators use
|
||||
// (providerRegistry() + Manifest.DeriveProvider; see
|
||||
// model_registry_validation.go). On a derive MISS (unknown/unregistered
|
||||
// model, or registry unavailable) provider is left empty and the field is
|
||||
// omitted below — preserving today's behavior; never fail provisioning on
|
||||
// a derive miss.
|
||||
derivedProvider := deriveDefaultConfigProvider(runtime, model)
|
||||
|
||||
if runtime == "claude-code" {
|
||||
model = normalizeClaudeCodeModel(model)
|
||||
}
|
||||
@@ -666,14 +640,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
// Model always at top level — config.py reads raw["model"] for all runtimes.
|
||||
configYAML += fmt.Sprintf("model: %s\n", quoteModel)
|
||||
|
||||
// Stamp the manifest-derived provider at top level (mirroring CP's
|
||||
// buildModelProviderYAML). Omitted entirely on a derive miss so the prior
|
||||
// behavior — no `provider:` key, runtime re-derives — is preserved for
|
||||
// unregistered models (requirement #3).
|
||||
if derivedProvider != "" {
|
||||
configYAML += fmt.Sprintf("provider: '%s'\n", yamlEscapeSingleQuotedProvider(derivedProvider))
|
||||
}
|
||||
|
||||
// Add runtime_config. required_env is intentionally omitted — the
|
||||
// platform injects secrets at container-start time via the secrets API,
|
||||
// and preflight already validates that the env vars are present before
|
||||
@@ -683,10 +649,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
if runtime == "claude-code" {
|
||||
configYAML += fmt.Sprintf(" model: %s\n", quoteModel)
|
||||
}
|
||||
// Mirror the top-level provider under runtime_config (CP writes both).
|
||||
if derivedProvider != "" {
|
||||
configYAML += fmt.Sprintf(" provider: '%s'\n", yamlEscapeSingleQuotedProvider(derivedProvider))
|
||||
}
|
||||
configYAML += " timeout: 0\n"
|
||||
|
||||
files["config.yaml"] = []byte(configYAML)
|
||||
@@ -695,48 +657,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
|
||||
return files
|
||||
}
|
||||
|
||||
// deriveDefaultConfigProvider resolves the provider name the adapter should
|
||||
// see for (runtime, model) using the SAME providers manifest the config-SAVE
|
||||
// validators use (providerRegistry() + Manifest.DeriveProvider; see
|
||||
// model_registry_validation.go). It is intentionally fail-OPEN: any miss
|
||||
// (empty model, registry unavailable, unknown runtime, or a model the runtime
|
||||
// does not own) returns "" so the caller omits the `provider:` field and the
|
||||
// generated config keeps its pre-fix shape. It NEVER fails provisioning.
|
||||
//
|
||||
// `model` must be the FULL, un-normalized id (e.g. "moonshot/kimi-k2.6") so
|
||||
// DeriveProvider's exact-id match resolves the canvas claude-code case to
|
||||
// provider=platform. The availableAuthEnv arg is nil here — config-generation
|
||||
// has no per-workspace auth context yet (secrets are injected at container
|
||||
// start), matching the validators' nil call.
|
||||
func deriveDefaultConfigProvider(runtime, model string) string {
|
||||
if strings.TrimSpace(model) == "" {
|
||||
return ""
|
||||
}
|
||||
m, err := providerRegistry()
|
||||
if err != nil || m == nil {
|
||||
// Registry unavailable (a build-time defect the gen/sync gates catch).
|
||||
// Fail open — do not stamp a provider, do not block provisioning.
|
||||
return ""
|
||||
}
|
||||
p, err := m.DeriveProvider(runtime, model, nil)
|
||||
if err != nil {
|
||||
// Unknown runtime (federation / non-first-party) or a model the
|
||||
// runtime does not own. Either way, omit the provider and let the
|
||||
// runtime fall back to its prior derivation — preserving today's
|
||||
// behavior for unregistered models.
|
||||
return ""
|
||||
}
|
||||
return p.Name
|
||||
}
|
||||
|
||||
// yamlEscapeSingleQuotedProvider escapes a value for a YAML single-quoted
|
||||
// scalar, mirroring CP's buildModelProviderYAML (a literal single quote is
|
||||
// doubled). Provider names are registry-controlled identifiers, so this is a
|
||||
// defense-in-depth measure rather than a hot path.
|
||||
func yamlEscapeSingleQuotedProvider(v string) string {
|
||||
return strings.ReplaceAll(v, "'", "''")
|
||||
}
|
||||
|
||||
func normalizeClaudeCodeModel(model string) string {
|
||||
model = strings.TrimSpace(model)
|
||||
if before, after, ok := strings.Cut(model, "/"); ok && before != "" && after != "" {
|
||||
|
||||
@@ -1,196 +0,0 @@
|
||||
package handlers
|
||||
|
||||
// workspace_provision_platform_boot_test.go — the deterministic, SSOT-driven
|
||||
// regression suite for the class of bug behind the moonshot/kimi
|
||||
// "canvas-created claude-code workspace boots NOT_CONFIGURED" production
|
||||
// incident (RFC#340 Fix A #2187, canvas Fix C #2188).
|
||||
//
|
||||
// THE BUG (what shipped to prod):
|
||||
// A claude-code workspace created via the canvas with provider=Platform +
|
||||
// model="moonshot/kimi-k2.6" booted NOT_CONFIGURED. Unit tests passed; the
|
||||
// REAL boot path was broken. ensureDefaultConfig generated a config.yaml that
|
||||
// carried NO derived `provider:` key, so the cp#329 config-bundle the adapter
|
||||
// actually reads left molecule-runtime config.py to slash-split the model id
|
||||
// "moonshot/kimi-k2.6" -> provider="moonshot", which is NOT in the providers
|
||||
// registry -> NOT_CONFIGURED.
|
||||
//
|
||||
// THE FIX A INVARIANT (this file pins it, and pins it for the WHOLE class):
|
||||
// ensureDefaultConfig MUST stamp the manifest-derived provider into the
|
||||
// generated config.yaml — at BOTH the top level and under runtime_config —
|
||||
// for every (runtime, model) the providers SSOT maps to a platform provider.
|
||||
// The single-combo pin (TestEnsureDefaultConfig_StampsDerivedProvider in
|
||||
// workspace_provision_test.go) proves the headline case. THIS file closes the
|
||||
// gap that single pin leaves: it is PARAMETRIZED OVER THE SSOT, so when a NEW
|
||||
// platform model is added to providers.yaml for claude-code (or any runtime
|
||||
// with a platform arm), the new id is automatically covered — a future
|
||||
// platform model that fails to derive `provider: platform` fails THIS test at
|
||||
// build time, before it can ship a NOT_CONFIGURED boot.
|
||||
//
|
||||
// WHY SSOT-DRIVEN AND NOT A HAND-MAINTAINED LIST:
|
||||
// The original bug was a divergence between "what the canvas offers"
|
||||
// (providers.yaml platform arm) and "what the config generator stamps". A
|
||||
// hardcoded test model list would itself drift from the SSOT and re-open the
|
||||
// same divergence gap. By enumerating the platform model set directly from the
|
||||
// loaded providers.Manifest (the SAME manifest ensureDefaultConfig's
|
||||
// deriveDefaultConfigProvider resolves against), this test cannot fall behind
|
||||
// the offered set: add a platform model, get a test case for free; the test
|
||||
// only passes if the generator actually stamps it.
|
||||
//
|
||||
// SCOPE: deterministic, no live infra. The REAL-boot complement (provision a
|
||||
// staging workspace and assert status=online + a completion returns 200 for the
|
||||
// SAME combo) is the bash staging harness — see
|
||||
// tests/e2e/test_staging_full_saas.sh (E2E_LLM_PATH=platform) and the
|
||||
// e2e-staging-platform-boot job in .gitea/workflows/e2e-staging-saas.yml. That
|
||||
// asserts the REAL artifact (booted status / completion); THIS asserts the
|
||||
// deterministic config-generation invariant the real boot depends on.
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// platformModelsForRuntime returns the exact model ids the providers SSOT lists
|
||||
// under runtime rt's `platform` native provider arm — the set the canvas offers
|
||||
// as provider=Platform and the set ensureDefaultConfig MUST stamp
|
||||
// `provider: platform` for. Reads the SAME embedded manifest the config
|
||||
// generator derives against (providers.LoadManifest), so it can never drift from
|
||||
// the offered set. Returns nil when the runtime has no platform arm.
|
||||
func platformModelsForRuntime(t *testing.T, rt string) []string {
|
||||
t.Helper()
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatalf("LoadManifest: %v", err)
|
||||
}
|
||||
native, ok := m.Runtimes[rt]
|
||||
if !ok {
|
||||
t.Fatalf("providers SSOT has no runtimes entry for %q", rt)
|
||||
}
|
||||
for _, ref := range native.Providers {
|
||||
if ref.Name == "platform" {
|
||||
return ref.Models
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel is the
|
||||
// class-level regression for the moonshot/kimi NOT_CONFIGURED incident. For
|
||||
// EVERY model the providers SSOT offers under claude-code's platform arm, it
|
||||
// asserts the generated config.yaml carries the manifest-derived provider at
|
||||
// both the top level and under runtime_config. This is the Fix A invariant,
|
||||
// parametrized over the SSOT so a newly-offered platform model cannot ship
|
||||
// without the stamp (the exact divergence — offered-but-not-stamped — that
|
||||
// booted "moonshot/kimi-k2.6" into NOT_CONFIGURED).
|
||||
func TestEnsureDefaultConfig_StampsProviderForEverySSOTPlatformModel(t *testing.T) {
|
||||
const runtime = "claude-code"
|
||||
platformModels := platformModelsForRuntime(t, runtime)
|
||||
if len(platformModels) == 0 {
|
||||
t.Fatalf("providers SSOT lists no platform models for runtime %q — the regression matrix would be empty; the SSOT shape changed (this test is the canary)", runtime)
|
||||
}
|
||||
// Headline sentinel: the exact id that booted NOT_CONFIGURED in prod MUST be
|
||||
// in the enumerated set. If a refactor drops it from the platform arm, this
|
||||
// test must still cover it explicitly — fail loud rather than silently
|
||||
// shrinking the matrix.
|
||||
if !containsString(platformModels, "moonshot/kimi-k2.6") {
|
||||
t.Fatalf("the headline incident model \"moonshot/kimi-k2.6\" is no longer in the claude-code platform SSOT set (%v) — regression coverage for the original bug would be lost", platformModels)
|
||||
}
|
||||
|
||||
for _, model := range platformModels {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-platform-boot", models.CreateWorkspacePayload{
|
||||
Name: "Platform Boot Agent",
|
||||
Tier: 2,
|
||||
Runtime: runtime,
|
||||
Model: model,
|
||||
})
|
||||
|
||||
raw, ok := files["config.yaml"]
|
||||
if !ok {
|
||||
t.Fatalf("expected config.yaml in generated files for model %q", model)
|
||||
}
|
||||
|
||||
var parsed struct {
|
||||
Model string `yaml:"model"`
|
||||
Provider string `yaml:"provider"`
|
||||
RuntimeConfig struct {
|
||||
Model string `yaml:"model"`
|
||||
Provider string `yaml:"provider"`
|
||||
} `yaml:"runtime_config"`
|
||||
}
|
||||
if err := yaml.Unmarshal(raw, &parsed); err != nil {
|
||||
t.Fatalf("generated YAML invalid for model %q: %v\n%s", model, err, raw)
|
||||
}
|
||||
|
||||
// The load-bearing invariant: BOTH the top-level and the
|
||||
// runtime_config provider must be exactly "platform". An empty or
|
||||
// vendor-namespace ("moonshot") value here is the prod NOT_CONFIGURED
|
||||
// boot — the adapter would slash-split the model id and look up an
|
||||
// unregistered provider.
|
||||
if parsed.Provider != "platform" {
|
||||
t.Errorf("model %q: top-level provider = %q, want \"platform\" (Fix A invariant — empty/vendor value is the NOT_CONFIGURED boot)\n%s", model, parsed.Provider, raw)
|
||||
}
|
||||
if parsed.RuntimeConfig.Provider != "platform" {
|
||||
t.Errorf("model %q: runtime_config.provider = %q, want \"platform\"\n%s", model, parsed.RuntimeConfig.Provider, raw)
|
||||
}
|
||||
// Sanity: the config must still render a non-empty model (a config
|
||||
// with provider but no model is equally undeployable).
|
||||
if parsed.Model == "" {
|
||||
t.Errorf("model %q: generated config has empty top-level model\n%s", model, raw)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestPlatformModelDeriveProvider_SSOTConsistency is the upstream half of the
|
||||
// same invariant, one layer below ensureDefaultConfig: it asserts the providers
|
||||
// manifest's DeriveProvider — the resolver deriveDefaultConfigProvider calls —
|
||||
// maps every SSOT-offered claude-code platform model to a provider whose Name is
|
||||
// "platform". If DeriveProvider itself regressed (e.g. a model_prefix_match
|
||||
// change made "moonshot/kimi-k2.6" resolve to the bare "moonshot" entry again),
|
||||
// this fails closer to the root cause than the config-shape test above, making
|
||||
// the diagnosis unambiguous: SSOT/derive regression vs config-emission
|
||||
// regression.
|
||||
func TestPlatformModelDeriveProvider_SSOTConsistency(t *testing.T) {
|
||||
const runtime = "claude-code"
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatalf("LoadManifest: %v", err)
|
||||
}
|
||||
platformModels := platformModelsForRuntime(t, runtime)
|
||||
if len(platformModels) == 0 {
|
||||
t.Fatalf("no platform models for %q in SSOT", runtime)
|
||||
}
|
||||
for _, model := range platformModels {
|
||||
model := model
|
||||
t.Run(model, func(t *testing.T) {
|
||||
// nil availableAuthEnv mirrors deriveDefaultConfigProvider's call at
|
||||
// config-generation time (no per-workspace auth context yet).
|
||||
p, err := m.DeriveProvider(runtime, model, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("DeriveProvider(%q, %q): unexpected error %v — an SSOT-offered platform model MUST derive", runtime, model, err)
|
||||
}
|
||||
if p.Name != "platform" {
|
||||
t.Errorf("DeriveProvider(%q, %q).Name = %q, want \"platform\" (this is the exact slash-split-to-vendor regression that booted NOT_CONFIGURED)", runtime, model, p.Name)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// containsString is a tiny local membership helper. Kept here (not a shared
|
||||
// test util) so this regression file is self-contained and can be read top to
|
||||
// bottom without chasing helpers across the package.
|
||||
func containsString(xs []string, want string) bool {
|
||||
for _, x := range xs {
|
||||
if x == want {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -363,74 +363,6 @@ runtime_config:
|
||||
}
|
||||
}
|
||||
|
||||
// TestEnsureDefaultConfig_StampsDerivedProvider pins RFC#340 Fix A: a
|
||||
// canvas-created claude-code workspace with model "moonshot/kimi-k2.6" must
|
||||
// have the manifest-derived provider stamped into config.yaml at BOTH the top
|
||||
// level and under runtime_config, so the cp#329 config-bundle the adapter
|
||||
// reads no longer leaves the runtime to slash-split "moonshot/..." → an
|
||||
// unregistered provider="moonshot" (the original NOT_CONFIGURED boot). The
|
||||
// canonical manifest exact-id-matches "moonshot/kimi-k2.6" to provider=platform.
|
||||
func TestEnsureDefaultConfig_StampsDerivedProvider(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-moonshot", models.CreateWorkspacePayload{
|
||||
Name: "Kimi Agent",
|
||||
Tier: 2,
|
||||
Runtime: "claude-code",
|
||||
Model: "moonshot/kimi-k2.6",
|
||||
})
|
||||
|
||||
var parsed struct {
|
||||
Model string `yaml:"model"`
|
||||
Provider string `yaml:"provider"`
|
||||
RuntimeConfig struct {
|
||||
Model string `yaml:"model"`
|
||||
Provider string `yaml:"provider"`
|
||||
} `yaml:"runtime_config"`
|
||||
}
|
||||
if err := yaml.Unmarshal(files["config.yaml"], &parsed); err != nil {
|
||||
t.Fatalf("generated YAML invalid: %v\n%s", err, files["config.yaml"])
|
||||
}
|
||||
if parsed.Provider != "platform" {
|
||||
t.Errorf("top-level provider = %q, want platform\n%s", parsed.Provider, files["config.yaml"])
|
||||
}
|
||||
if parsed.RuntimeConfig.Provider != "platform" {
|
||||
t.Errorf("runtime_config.provider = %q, want platform\n%s", parsed.RuntimeConfig.Provider, files["config.yaml"])
|
||||
}
|
||||
// The claude-code model normalization still strips the slash prefix.
|
||||
if parsed.Model != "kimi-k2.6" {
|
||||
t.Errorf("top-level model = %q, want kimi-k2.6\n%s", parsed.Model, files["config.yaml"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestEnsureDefaultConfig_DeriveMissOmitsProvider pins requirement #3: a model
|
||||
// the providers manifest does NOT recognize for the runtime (a derive miss)
|
||||
// must NOT write any `provider:` key — neither top-level nor under
|
||||
// runtime_config — preserving the pre-fix behavior (no empty `provider:`,
|
||||
// provisioning never fails on a miss). "gpt-4o" is not a registered
|
||||
// claude-code model, so DeriveProvider errors and the field is omitted.
|
||||
func TestEnsureDefaultConfig_DeriveMissOmitsProvider(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
files := handler.ensureDefaultConfig("ws-derivemiss", models.CreateWorkspacePayload{
|
||||
Name: "Unregistered Agent",
|
||||
Tier: 1,
|
||||
Runtime: "claude-code",
|
||||
Model: "gpt-4o",
|
||||
})
|
||||
|
||||
content := string(files["config.yaml"])
|
||||
if strings.Contains(content, "provider:") {
|
||||
t.Errorf("derive miss must NOT write any provider: key, got:\n%s", content)
|
||||
}
|
||||
// Sanity: a derive miss must still produce a valid, model-bearing config.
|
||||
if !strings.Contains(content, `model: "gpt-4o"`) {
|
||||
t.Errorf("derive miss should still render the model, got:\n%s", content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnsureDefaultConfig_CustomModel(t *testing.T) {
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
@@ -876,9 +876,8 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
|
||||
h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
|
||||
// sendRestartContext is a one-way notification to the new container; safe
|
||||
// to fire async — the next restart cycle won't depend on it completing.
|
||||
// Tracked via h.goAsync so tests can wait for it via h.asyncWG before
|
||||
// closing the sqlmock. Without this, untracked goroutines hit the restored
|
||||
// mock and cause "was not expected" errors in parallel CI execution (mc#1264).
|
||||
// Tracked via goAsync so the test harness can drain it before the
|
||||
// global db.DB swap (sendRestartContext reads db.DB).
|
||||
h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ const SchemaVersion = 1
|
||||
// Fingerprint is a stable content hash of the generated projection (schema
|
||||
// version + provider catalog + runtime native sets). It changes iff the
|
||||
// registry DATA changes (comment-only YAML edits do not churn it).
|
||||
const Fingerprint = "a491f5ff8a17ef59"
|
||||
const Fingerprint = "8f733b112695b926"
|
||||
|
||||
// GenProvider is the generated projection of one provider catalog entry —
|
||||
// the subset a downstream consumer needs to derive + display a provider.
|
||||
@@ -50,7 +50,7 @@ var Providers = []GenProvider{
|
||||
{Name: "openai-api", DisplayName: "OpenAI API", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^openai-api[:/]", IsPlatform: false, UpstreamVendor: "openai"},
|
||||
{Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"},
|
||||
{Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"},
|
||||
{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
|
||||
{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
|
||||
{Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^mimo-", IsPlatform: false},
|
||||
{Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^glm-", IsPlatform: false},
|
||||
{Name: "kimi-coding", DisplayName: "Moonshot Kimi (coding-tuned)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^kimi-", IsPlatform: false},
|
||||
@@ -89,9 +89,8 @@ var Runtimes = map[string][]GenRuntimeRef{
|
||||
{Name: "platform", Models: []string{"openai/gpt-5.4", "openai/gpt-5.4-mini"}},
|
||||
},
|
||||
"google-adk": {
|
||||
{Name: "platform", Models: []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"}},
|
||||
{Name: "google", Models: []string{"gemini-2.5-pro", "gemini-2.5-flash"}},
|
||||
{Name: "vertex", Models: []string{"vertex:gemini-2.5-pro"}},
|
||||
{Name: "google", Models: []string{"gemini-2.5-pro"}},
|
||||
},
|
||||
"hermes": {
|
||||
{Name: "kimi-coding", Models: []string{"kimi-coding/kimi-k2"}},
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
package providers
|
||||
|
||||
import "testing"
|
||||
|
||||
// Proper-SSOT (task #65): google-adk keyless Gemini resolves to the closed
|
||||
// platform provider -> IsPlatform=true; BYOK AI Studio -> google. The
|
||||
// platform: select ids are registered so workspace-create accepts them
|
||||
// (was 422 UNREGISTERED_MODEL_FOR_RUNTIME).
|
||||
func TestGoogleADK_PlatformGeminiResolvesToPlatform(t *testing.T) {
|
||||
m, err := LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, id := range []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"} {
|
||||
p, err := m.DeriveProvider("google-adk", id, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v", id, err)
|
||||
}
|
||||
if p.Name != PlatformProviderName || !p.IsPlatform() {
|
||||
t.Errorf("%s -> %q IsPlatform=%v; want platform", id, p.Name, p.IsPlatform())
|
||||
}
|
||||
}
|
||||
p, err := m.DeriveProvider("google-adk", "gemini-2.5-pro", nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if p.IsPlatform() || p.Name != "google" {
|
||||
t.Errorf("gemini-2.5-pro -> %q IsPlatform=%v; want google byok", p.Name, p.IsPlatform())
|
||||
}
|
||||
models, _ := m.ModelsForRuntime("google-adk")
|
||||
want := map[string]bool{"platform:gemini-2.5-pro": false, "platform:gemini-2.5-flash": false}
|
||||
for _, id := range models {
|
||||
if _, ok := want[id]; ok {
|
||||
want[id] = true
|
||||
}
|
||||
}
|
||||
for id, ok := range want {
|
||||
if !ok {
|
||||
t.Errorf("%s not registered for google-adk — create would 422", id)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -292,7 +292,7 @@ providers:
|
||||
# PR-1 simplification when only claude-code referenced platform.
|
||||
base_url_template: "https://api.moleculesai.app/api/v1/internal/llm/openai/v1"
|
||||
base_url_anthropic: "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1"
|
||||
auth_env: [MOLECULE_LLM_USAGE_TOKEN]
|
||||
auth_env: [ANTHROPIC_API_KEY, MOLECULE_LLM_USAGE_TOKEN]
|
||||
auth_token_env: ANTHROPIC_API_KEY
|
||||
# Adapter routes kimi- / moonshot/ through platform by default. No bare
|
||||
# vendor prefix of its own; it multiplexes other vendors' slugs. Match
|
||||
@@ -412,24 +412,14 @@ providers:
|
||||
model_prefix_match: "^gemini-"
|
||||
model_aliases: []
|
||||
|
||||
# Google Vertex AI — served via the Molecule CP LLM proxy (NOT on-box ADC).
|
||||
# google-adk routes ALL Gemini through the proxy, which mints a Vertex token
|
||||
# server-side over Workload Identity Federation (AWS -> GCP STS -> SA; see
|
||||
# internal/vertexauth + llm_proxy.go google/vertex case) and meters usage to
|
||||
# org credits. The former on-box ADC delivery (a NON-SECRET external_account
|
||||
# cred-config written to /configs/gcp-adc.json via GOOGLE_APPLICATION_CREDENTIALS,
|
||||
# injected by the provisioner) was REMOVED: a tenant has EC2 root and could
|
||||
# have used that credential to call Vertex DIRECTLY, bypassing metering
|
||||
# (keyless-Vertex billing leak — task #64 / RFC internal#763; provisioner
|
||||
# force-off + template routes vertex: through the proxy). The `vertex:`
|
||||
# namespace + this entry remain for proxy routing/billing of Vertex-upstream
|
||||
# requests, distinct from the API-key `google` vendor's ^gemini-
|
||||
# (TestNoAmbiguousModelMatch).
|
||||
#
|
||||
# NOTE: display_name ("keyless ADC") and auth_env (GOOGLE_APPLICATION_CREDENTIALS)
|
||||
# are now VESTIGIAL — no consumer reads auth_env post-leak-fix, but it must stay
|
||||
# non-empty (providers.go validate). Left as-is to keep this a comment-only,
|
||||
# regen-free change; retiring them is a registry-regen follow-up.
|
||||
# Google Vertex AI — KEYLESS arm (mirrors the anthropic-oauth / anthropic-api
|
||||
# and openai-subscription / openai-api split: same vendor, distinct auth).
|
||||
# google-adk serves Gemini via Vertex using Application Default Credentials
|
||||
# over Workload Identity Federation (AWS EC2 role -> GCP STS -> SA), injected
|
||||
# by the provisioner (cp#416 + envs.yaml vertex block) as a NON-SECRET
|
||||
# external_account cred-config at GOOGLE_APPLICATION_CREDENTIALS. No API key.
|
||||
# Distinct `vertex:` model namespace keeps it unambiguous vs the API-key
|
||||
# `google` vendor's ^gemini- (TestNoAmbiguousModelMatch).
|
||||
- name: vertex
|
||||
display_name: "Google Vertex AI (keyless ADC)"
|
||||
vendor_logo: "google"
|
||||
@@ -854,24 +844,11 @@ runtimes:
|
||||
# this runtimes entry declares the selectable model set.
|
||||
google-adk:
|
||||
providers:
|
||||
# Platform-managed (keyless, metered) Gemini via the Molecule LLM proxy ->
|
||||
# Vertex AI (server-side WIF mint; NO credential on the tenant box — the
|
||||
# keyless-Vertex leak fix). Resolves to the closed `platform` provider ->
|
||||
# IsPlatform=true -> platform_managed billing + required_env=[] (derived).
|
||||
# The org-compliant default. The runtime translates the `platform:` select
|
||||
# id to the bare wire id the proxy routes to Vertex.
|
||||
- name: platform
|
||||
models:
|
||||
- platform:gemini-2.5-pro
|
||||
- platform:gemini-2.5-flash
|
||||
# API-key BYOK arm: AI Studio (the tenant's OWN GOOGLE_API_KEY).
|
||||
- name: google
|
||||
models:
|
||||
- gemini-2.5-pro
|
||||
- gemini-2.5-flash
|
||||
# DEPRECATED transitional: vertex: ids stay registered until templates
|
||||
# move to platform: (superseded by the platform arm above). Remove in a
|
||||
# cleanup once no template references vertex:gemini-*.
|
||||
# Keyless Vertex (org-compliant default): Gemini via Vertex AI + ADC/WIF.
|
||||
- name: vertex
|
||||
models:
|
||||
- vertex:gemini-2.5-pro
|
||||
- vertex:gemini-2.5-pro
|
||||
# API-key BYOK arm: AI Studio GEMINI_API_KEY/GOOGLE_API_KEY.
|
||||
- name: google
|
||||
models:
|
||||
- gemini-2.5-pro
|
||||
@@ -29,7 +29,7 @@ import (
|
||||
// canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
|
||||
// synced from molecule-controlplane. Bumped deliberately on each re-sync (see
|
||||
// file doc). Cross-checked live by the sync-providers-yaml CI workflow.
|
||||
const canonicalProvidersYAMLSHA256 = "021ae082c2bbbbb61c406cae03205ac6b7fff160ae5976cfc64de3de676d02b2"
|
||||
const canonicalProvidersYAMLSHA256 = "dec73199e26cee2d395a0acece99771618d3879dc5ca724ba57cb5b38079c6ce"
|
||||
|
||||
func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
|
||||
sum := sha256.Sum256(embeddedYAML)
|
||||
|
||||
Reference in New Issue
Block a user