Compare commits

..

1 Commits

Author SHA1 Message Date
Molecule AI Dev Engineer A (Kimi) f12c38b3f6 chore(dead-code): remove unused QueueDepth function
QueueDepth was added for Phase 2/3 busy-return response visibility but
was never wired to a caller. The inline depth query in EnqueueA2A serves
today's enqueue response, making this function dead code.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-03 07:23:28 +00:00
44 changed files with 136 additions and 3141 deletions
@@ -466,40 +466,12 @@ def fetch_log(target_url: str) -> str | None:
def grep_fail_markers(log_text: str) -> list[str]:
"""Return up to 5 sample matching lines for any FAIL_PATTERNS hit.
Empty list = clean log.
Heuristic: skip lines where the marker appears inside script source
(e.g. ``echo "::error::..."`` in a ``::group::Run`` block) rather
than actual execution output. The Gitea Actions log prints the raw
script before executing it; ``echo "::error::"`` lines in that
display are false positives.
"""
Empty list = clean log."""
matches: list[str] = []
in_run_group = False
group_depth = 0
for line in log_text.splitlines():
stripped = line.strip()
# Track Gitea Actions group markers so we can skip the
# ``::group::Run`` script-source display blocks.
if stripped.startswith("::group::Run"):
in_run_group = True
group_depth = 1
continue
if stripped == "::endgroup::":
if in_run_group:
in_run_group = False
group_depth = 0
continue
if in_run_group:
continue
for pat in FAIL_PATTERNS:
if pat in line:
# Additional false-positive guard: ``echo "::error::"``
# is script source, not a runtime error emission.
if pat == "::error::":
prefix = line[: line.index(pat)].strip()
if prefix.endswith('echo') or prefix.endswith("echo '") or prefix.endswith('echo "'):
break
# Truncate to keep error output bounded.
matches.append(line.strip()[:240])
break
if len(matches) >= 5:
-83
View File
@@ -364,71 +364,6 @@ def _api_json_optional(url: str, token: str) -> tuple[int, dict | None]:
return exc.code, None
def current_branch_head(env: dict[str, str]) -> str | None:
"""Return the SHA at the tip of the deploy branch (main) per Gitea, or None.
Used to detect a *superseded* deploy job (see `superseded_by`). Fail-safe:
any read error / missing token returns None so the caller treats the job as
NOT superseded and the strict /buildinfo verify still runs. We never let an
unreadable head silently green a deploy.
"""
token = env.get("GITEA_TOKEN", "").strip()
if not token:
return None
host = env.get("GITEA_HOST", "git.moleculesai.app")
repo = env.get("GITHUB_REPOSITORY", "molecule-ai/molecule-core")
# Deploy lane is on: push:main; the branch is always main here, but read it
# from the ref name when present so a future branch rename doesn't break us.
branch = env.get("GITHUB_REF_NAME", "").strip() or "main"
url = f"https://{host}/api/v1/repos/{repo}/branches/{quote(branch, safe='')}"
status, body = _api_json_optional(url, token)
if status != 200 or not isinstance(body, dict):
return None
commit = body.get("commit")
if isinstance(commit, dict):
head = commit.get("id") or commit.get("sha")
if isinstance(head, str) and head.strip():
return head.strip()
return None
def superseded_by(env: dict[str, str]) -> str | None:
"""Return the newer head SHA if THIS deploy job has been superseded, else None.
This workflow runs with no `concurrency:` (intentional — Gitea 1.22.6 cancels
queued runs, which is unacceptable for a prod deploy). When two main pushes
land close together, BOTH deploy-production jobs run. The newer push rolls the
fleet forward first; the OLDER job's strict /buildinfo verify then sees tenants
on the NEWER SHA and false-reds with "$slug is stale" — even though the fleet
is AHEAD, not behind. Git SHAs aren't ordered, so the verify can't tell ahead
from behind on its own (and /buildinfo exposes only git_sha, no build time).
Resolve it at the source of truth for ordering — the branch ref: if main's
current head is a DIFFERENT SHA than the one this job is deploying, a newer
commit has landed and this job is superseded; the newest job's verify is the
authoritative one. We return that head SHA so the caller can log it and exit
success early, skipping the strict-equality verify for this stale job.
Fail-safe: returns None (NOT superseded) when the head can't be read or equals
our SHA, so a genuinely-behind tenant under the LATEST deploy job still fails
the strict verify loudly. This never suppresses a real-stale signal — it only
excuses a job that is no longer the latest from asserting exact equality.
"""
sha = env.get("GITHUB_SHA", "").strip()
if not sha:
return None
head = current_branch_head(env)
if not head:
return None
# SHA lengths can differ (short vs full); compare on the shorter prefix.
n = min(len(head), len(sha))
if head[:n].lower() == sha[:n].lower():
return None
return head
def live_disable_flag(env: dict[str, str]) -> str:
"""Return a live disable value from Gitea variables when readable.
@@ -507,14 +442,6 @@ def main() -> int:
sub.add_parser("plan", help="print production deploy plan as JSON")
sub.add_parser("assert-enabled", help="fail if production deploy is currently disabled")
sub.add_parser("wait-ci", help="block until required CI context is green")
sub.add_parser(
"check-superseded",
help=(
"exit 0 if a newer commit has landed on the deploy branch (this job "
"is superseded; prints the newer head SHA), exit 10 if this job is "
"still the latest"
),
)
rollout_parser = sub.add_parser("rollout", help="execute canary-first scoped production rollout")
rollout_parser.add_argument("--plan", required=True, help="path to prod-auto-deploy plan JSON")
rollout_parser.add_argument("--response", required=True, help="path to write aggregate response JSON")
@@ -530,16 +457,6 @@ def main() -> int:
if args.command == "wait-ci":
wait_for_ci_context(dict(os.environ))
return 0
if args.command == "check-superseded":
newer = superseded_by(dict(os.environ))
if newer:
print(newer)
return 0
# Exit 10 (not 0, not 1): "this job is still the latest". The
# workflow treats only exit 0 as superseded; 10 means proceed to
# the strict verify. A non-zero code here is informational, not a
# failure — the workflow step swallows it.
return 10
if args.command == "rollout":
rollout_from_plan_file(args.plan, args.response, dict(os.environ))
return 0
@@ -1,198 +0,0 @@
"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
is structurally correct. This test validates the *runtime* path: submitting an
APPROVED review to a PR whose head contains the current gate workflows causes
Gitea Actions to queue the qa-review + security-review workflows and POST the
branch-protection-required (pull_request_target) contexts within a reasonable
window.
Skipped when Gitea API credentials are not available. Intended for:
- manual developer verification
- CI jobs provisioned with a service-account token
Environment:
GITEA_HOST — default: git.moleculesai.app
GITEA_TOKEN — token with read:repository + write:issues (for review POST)
REPO — default: molecule-ai/molecule-core
LIVEFIRE_PR_NUMBER — optional; if omitted the test tries to find a
suitable open PR automatically, or skips.
LIVEFIRE_TIMEOUT_SEC — default: 120
"""
import base64
import json
import os
import time
import urllib.error
import urllib.request
from pathlib import Path
import pytest
import yaml
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
REQUIRED_CONTEXTS = [
"qa-review / approved (pull_request_target)",
"security-review / approved (pull_request_target)",
]
skip_no_token = pytest.mark.skipif(
not GITEA_TOKEN,
reason="GITEA_TOKEN not set — live-fire test requires API credentials",
)
def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
url = f"https://{GITEA_HOST}/api/v1{path}"
headers = {
"Authorization": f"token {GITEA_TOKEN}",
"Content-Type": "application/json",
}
data = json.dumps(body).encode() if body else None
req = urllib.request.Request(url, data=data, headers=headers, method=method)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
raw = resp.read()
code = resp.status
except urllib.error.HTTPError as exc:
raw = exc.read()
code = exc.code
payload = json.loads(raw) if raw else {}
return code, payload
def _get_pr(number: int) -> dict:
code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
if code != 200:
pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
return pr
def _list_open_prs() -> list[dict]:
code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
if code != 200:
pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
return prs
def _pr_has_trigger_in_head(pr: dict) -> bool:
"""Return True if the PR head contains pull_request_review in both workflows."""
head_sha = pr["head"]["sha"]
for wf_name in ("qa-review.yml", "security-review.yml"):
path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
code, payload = _api("GET", path)
if code != 200:
return False
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
wf = yaml.safe_load(raw)
on = wf.get(True) or wf.get("on") or {}
if isinstance(on, str):
if on != "pull_request_review":
return False
elif "pull_request_review" not in on:
return False
return True
def _find_suitable_pr() -> dict:
if LIVEFIRE_PR_NUMBER:
pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
if pr.get("state") != "open":
pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
return pr
prs = _list_open_prs()
for pr in prs:
if _pr_has_trigger_in_head(pr):
return pr
pytest.skip("No open PR found whose head contains the pull_request_review trigger")
def _submit_approved_review(pr_number: int) -> dict:
code, review = _api(
"POST",
f"/repos/{REPO}/pulls/{pr_number}/reviews",
{"body": "Live-fire test APPROVED review", "event": "APPROVED"},
)
# 200 = created, 422 = review already exists (idempotent enough for our purposes)
if code not in (200, 201, 422):
pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
return review
def _get_status_updated_at(sha: str) -> dict[str, str]:
"""Return mapping context -> updated_at for required contexts on this SHA."""
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
if code != 200:
return {}
result: dict[str, str] = {}
for st in statuses:
ctx = st.get("context", "")
if ctx in REQUIRED_CONTEXTS:
result[ctx] = st.get("updated_at", st.get("created_at", ""))
return result
def _poll_fresh_statuses(
sha: str,
prior_updated_at: dict[str, str],
timeout_sec: int = LIVEFIRE_TIMEOUT_SEC,
) -> dict[str, str]:
"""Poll until required contexts appear with updated_at fresher than prior."""
deadline = time.monotonic() + timeout_sec
found: dict[str, str] = {}
while time.monotonic() < deadline:
code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
if code == 200:
for st in statuses:
ctx = st.get("context", "")
if ctx in REQUIRED_CONTEXTS:
updated_at = st.get("updated_at", st.get("created_at", ""))
# Fresh if the context was absent before, OR its timestamp changed.
if ctx not in prior_updated_at or updated_at != prior_updated_at[ctx]:
found[ctx] = st.get("state", st.get("status", ""))
if all(ctx in found for ctx in REQUIRED_CONTEXTS):
return found
time.sleep(5)
return found
@skip_no_token
class TestGateAutoFireLive:
def test_auto_fire_posts_required_contexts(self):
"""Submit APPROVED review; assert BP-required contexts appear fresh within timeout."""
pr = _find_suitable_pr()
pr_number = pr["number"]
head_sha = pr["head"]["sha"]
# Capture pre-existing status timestamps so we can prove FRESH contexts
# were posted after the review submission (not stale from a prior run).
prior_updated_at = _get_status_updated_at(head_sha)
_submit_approved_review(pr_number)
found = _poll_fresh_statuses(head_sha, prior_updated_at)
missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
if missing:
pytest.fail(
f"After {LIVEFIRE_TIMEOUT_SEC}s, fresh contexts still missing: {missing}. "
f"Found: {found}. Prior timestamps: {prior_updated_at}. "
f"PR #{pr_number} head={head_sha}. "
f"This indicates the pull_request_review trigger did not fire at runtime."
)
# The contexts appeared fresh — that's the proof of auto-fire.
# We do NOT assert success vs failure; the evaluator decides that.
# The point of #2159 is that the workflows QUEUE and POST at all.
for ctx, state in found.items():
assert state in ("pending", "success", "failure"), (
f"Unexpected state {state!r} for {ctx}"
)
@@ -1,145 +0,0 @@
"""Stale-head diagnostic test for #2159.
Deterministically reports whether a PR's HEAD contains the pull_request_review
trigger in qa-review.yml and security-review.yml. If the trigger is absent,
auto-fire on APPROVED review is impossible for that PR.
This is used as a self-diagnostic for future stale-PR situations (PRs opened
before #2157 merged, or branches cut from old bases).
Environment:
GITEA_HOST — default: git.moleculesai.app
GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
REPO — default: molecule-ai/molecule-core
PR_NUMBER — required when running against a real PR
"""
import base64
import json
import os
import urllib.error
import urllib.request
from pathlib import Path
import pytest
import yaml
GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
PR_NUMBER = os.environ.get("PR_NUMBER", "")
ROOT = Path(__file__).resolve().parents[2]
def _api(method: str, path: str) -> tuple[int, dict]:
url = f"https://{GITEA_HOST}/api/v1{path}"
headers = {"Authorization": f"token {GITEA_TOKEN}"}
req = urllib.request.Request(url, headers=headers, method=method)
try:
with urllib.request.urlopen(req, timeout=30) as resp:
return resp.status, json.loads(resp.read())
except urllib.error.HTTPError as exc:
body = exc.read()
return exc.code, json.loads(body) if body else {}
def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
code, payload = _api("GET", path)
if code != 200:
pytest.fail(
f"GET {path} returned HTTP {code}: {payload}. "
f"Cannot determine whether PR head contains the trigger."
)
raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
return yaml.safe_load(raw)
def _fetch_workflow_local(workflow_name: str) -> dict:
p = ROOT / "workflows" / workflow_name
if not p.exists():
pytest.fail(f"Local workflow file not found: {p}")
return yaml.safe_load(p.read_text())
def _has_pull_request_review_trigger(wf: dict) -> bool:
on = wf.get(True) or wf.get("on") or {}
if isinstance(on, list):
return "pull_request_review" in on
if isinstance(on, dict):
return "pull_request_review" in on
if isinstance(on, str):
return on == "pull_request_review"
return False
def _diagnose_pr(pr_number: int) -> dict[str, bool]:
code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
if code != 200:
pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
head_ref = pr["head"]["ref"]
head_sha = pr["head"]["sha"]
results: dict[str, bool] = {}
for wf_name in ("qa-review.yml", "security-review.yml"):
wf = _fetch_workflow_from_ref(wf_name, head_sha)
results[wf_name] = _has_pull_request_review_trigger(wf)
return {
"pr_number": pr_number,
"head_ref": head_ref,
"head_sha": head_sha,
"triggers": results,
"auto_fire_possible": all(results.values()),
}
def _diagnose_local() -> dict[str, bool]:
results: dict[str, bool] = {}
for wf_name in ("qa-review.yml", "security-review.yml"):
wf = _fetch_workflow_local(wf_name)
results[wf_name] = _has_pull_request_review_trigger(wf)
return {
"pr_number": None,
"head_ref": "local-checkout",
"head_sha": None,
"triggers": results,
"auto_fire_possible": all(results.values()),
}
class TestStaleHeadDiagnostic:
"""Test deterministically reports 'auto-fire impossible for this PR' when
the PR head lacks the pull_request_review trigger.
"""
def test_local_checkout_has_pull_request_review_trigger(self):
"""Local files (the ones in this checkout) must contain the trigger.
This is the baseline: if the checkout itself is stale, every PR cut
from it will also be stale.
"""
diag = _diagnose_local()
missing = [n for n, ok in diag["triggers"].items() if not ok]
if missing:
pytest.fail(
f"Local checkout is missing pull_request_review trigger in: {missing}. "
f"This branch cannot produce PRs that auto-fire."
)
@pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
@pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
def test_pr_head_has_pull_request_review_trigger(self):
"""When PR_NUMBER is given, assert the PR head contains the trigger."""
diag = _diagnose_pr(int(PR_NUMBER))
if not diag["auto_fire_possible"]:
missing = [n for n, ok in diag["triggers"].items() if not ok]
pytest.fail(
f"Auto-fire impossible for PR #{diag['pr_number']}. "
f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
f"Missing trigger in: {missing}. "
f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
)
@@ -486,88 +486,3 @@ def test_scoped_rollout_dry_run_does_not_assert_coverage():
sleep=lambda _s: None,
)
assert aggregate["ok"] is True
# --- Superseded-deploy guard (false-stale fix) -----------------------------
#
# Scenario this fixes: no `concurrency:` on the prod-deploy workflow means two
# close main pushes run BOTH deploy-production jobs. eb31bcf (Fix A) and 286338
# (Fix C) merge back-to-back; the 286338 job rolls the fleet to staging-2863380
# first; the OLDER eb31bcf job's strict verify then sees tenants on 2863380 and
# false-reds "stale" though the fleet is AHEAD. superseded_by detects that main's
# head is no longer eb31bcf and lets the older job succeed without weakening the
# behind-tenant signal for whichever job IS the latest.
def test_superseded_by_returns_newer_head_when_main_moved_ahead(monkeypatch):
# eb31bcf job: main head is now 2863380 -> superseded, return the newer head.
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
newer = prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"})
assert newer == "2863380fullhash"
def test_superseded_by_none_when_this_job_is_still_head(monkeypatch):
# 2863380 job (the latest): head == our SHA -> NOT superseded -> strict verify
# runs, so a genuinely-behind tenant still fails loudly.
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
def test_superseded_by_matches_on_short_vs_full_sha_prefix(monkeypatch):
# GITHUB_SHA is full; Gitea may return a different-length id. Equal prefixes
# must NOT count as superseded (avoid false-skipping the real latest job).
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380")
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380FULLHASH")
assert prod.superseded_by({"GITHUB_SHA": "2863380fullhash"}) is None
def test_superseded_by_fail_safe_returns_none_when_head_unreadable(monkeypatch):
# Fail-safe: unreadable head (no token / API error) must NOT be treated as
# superseded, so the strict verify still runs and never silently greens.
monkeypatch.setattr(prod, "current_branch_head", lambda _env: None)
assert prod.superseded_by({"GITHUB_SHA": "eb31bcffullhash"}) is None
def test_superseded_by_none_without_github_sha(monkeypatch):
monkeypatch.setattr(prod, "current_branch_head", lambda _env: "2863380fullhash")
assert prod.superseded_by({}) is None
def test_current_branch_head_parses_gitea_branch_commit_id(monkeypatch):
captured = {}
def fake_optional(url, _token):
captured["url"] = url
return 200, {"name": "main", "commit": {"id": "2863380fullhash"}}
monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
head = prod.current_branch_head(
{"GITEA_TOKEN": "secret", "GITHUB_REPOSITORY": "molecule-ai/molecule-core"}
)
assert head == "2863380fullhash"
assert captured["url"].endswith("/repos/molecule-ai/molecule-core/branches/main")
def test_current_branch_head_uses_ref_name_branch(monkeypatch):
captured = {}
def fake_optional(url, _token):
captured["url"] = url
return 200, {"commit": {"sha": "deadbeef"}}
monkeypatch.setattr(prod, "_api_json_optional", fake_optional)
head = prod.current_branch_head(
{"GITEA_TOKEN": "secret", "GITHUB_REF_NAME": "release"}
)
assert head == "deadbeef"
assert captured["url"].endswith("/branches/release")
def test_current_branch_head_none_without_token():
assert prod.current_branch_head({}) is None
def test_current_branch_head_none_on_non_200(monkeypatch):
monkeypatch.setattr(prod, "_api_json_optional", lambda _u, _t: (500, None))
assert prod.current_branch_head({"GITEA_TOKEN": "secret"}) is None
+4 -6
View File
@@ -123,9 +123,8 @@ jobs:
# integration). See internal#512 for the class defect.
runs-on: docker-host
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
outputs:
api: ${{ steps.decide.outputs.api }}
steps:
@@ -161,9 +160,8 @@ jobs:
# detect-changes for the full rationale.
runs-on: docker-host
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
timeout-minutes: 15
env:
# Unique per-run container names so concurrent runs on the host-
+8 -30
View File
@@ -278,38 +278,16 @@ jobs:
export NEXT_PUBLIC_WS_URL="ws://127.0.0.1:${PLATFORM_PORT}/ws"
npx next dev --turbopack -p "${CANVAS_PORT}" > canvas.log 2>&1 &
echo $! > canvas.pid
# Readiness must wait for the actual chat route to *compile*, not
# just for the dev server to bind the port. `next dev --turbopack`
# accepts the TCP connection well before it has compiled a route
# on first request, so a bare `curl /` can 200 (or hang) while the
# page the tests load is still building. We therefore probe the
# real route the specs navigate to (`/?m=chat`) and require a 2xx,
# which only happens once Turbopack has finished the first
# compile. The previous 30s budget was also too tight for a cold
# Turbopack first-compile on a loaded operator-host runner — the
# `Canvas did not start in 30s` flake. Raise to 120s (job
# timeout-minutes is 15, so this is comfortably bounded) and probe
# every 2s.
READY=""
for i in $(seq 1 60); do
# Tempfile-routed -w + set +e/-e prevents curl-exit-code
# pollution of the captured status (lint-curl-status-capture.yml).
set +e
curl -s -o /dev/null -w '%{http_code}' "http://localhost:${CANVAS_PORT}/?m=chat" > /tmp/canvas-ready.code
set -e
CODE=$(cat /tmp/canvas-ready.code 2>/dev/null || echo "000")
if [ "$CODE" -ge 200 ] && [ "$CODE" -lt 400 ]; then
echo "Canvas (chat route compiled) up after ~$((i*2))s (HTTP ${CODE})"
READY=1
break
for i in $(seq 1 30); do
if curl -sf "http://localhost:${CANVAS_PORT}" > /dev/null 2>&1; then
echo "Canvas up after ${i}s"
exit 0
fi
sleep 2
sleep 1
done
if [ -z "$READY" ]; then
echo "::error::Canvas chat route did not compile in 120s (last HTTP ${CODE})"
cat canvas.log || true
exit 1
fi
echo "::error::Canvas did not start in 30s"
cat canvas.log || true
exit 1
- name: Run Playwright E2E tests
if: needs.detect-changes.outputs.chat == 'true'
@@ -88,9 +88,8 @@ jobs:
# surprises and keeps the routing rule discoverable in one place.
runs-on: docker-host
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
outputs:
handlers: ${{ steps.filter.outputs.handlers }}
steps:
@@ -120,9 +119,8 @@ jobs:
# exists). See detect-changes for the full routing rationale.
runs-on: docker-host
# mc#1982 Phase 3 (RFC §1): surface broken workflows without blocking.
# mc#1982: mask removed. If regressions appear, root-fix the underlying
# test — do NOT renew the mask silently.
continue-on-error: false
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
continue-on-error: true
env:
# Unique name per run so concurrent jobs don't collide on the
# bridge network. ${RUN_ID}-${RUN_ATTEMPT} is unique even across
@@ -49,56 +49,37 @@ jobs:
GITHUB_SERVER_URL: https://git.moleculesai.app
steps:
- name: Identify runner
id: identify
continue-on-error: true
run: |
set -eu
echo "arch=$(uname -m)"
echo "kernel=$(uname -sr)"
echo "shell=$BASH_VERSION"
# Sanity: must actually be arm64. If amd64 sneaks in here,
# the job skips gracefully rather than hard-failing, because
# a mislabelled runner is an ops concern, not a code defect.
# Pilot lane must not make main red (#2146).
# fail fast — that means the label routing is wrong.
case "$(uname -m)" in
aarch64|arm64)
echo "arm64 confirmed"
echo "arm64=true" >> "$GITHUB_OUTPUT"
;;
*)
echo "ERROR: expected arm64, got $(uname -m) — label routing may be wrong"
echo "arm64=false" >> "$GITHUB_OUTPUT"
exit 1
;;
aarch64|arm64) echo "arm64 confirmed" ;;
*) echo "ERROR: expected arm64, got $(uname -m)"; exit 1 ;;
esac
- name: Checkout
if: steps.identify.outputs.arm64 == 'true'
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Install shellcheck (arm64)
if: steps.identify.outputs.arm64 == 'true'
continue-on-error: true
run: |
set -eu
if command -v shellcheck >/dev/null 2>&1; then
echo "shellcheck already present: $(shellcheck --version | head -1)"
else
# Prefer apt if the runner base ships it; else download the
# correct platform binary (darwin vs linux).
# Prefer apt if the runner base ships it; else download arm64 binary.
if command -v apt-get >/dev/null 2>&1; then
sudo apt-get update -qq
sudo apt-get install -y --no-install-recommends shellcheck
else
SC_VER=v0.10.0
if [ "$(uname -s)" = "Darwin" ]; then
SC_PKG="shellcheck-${SC_VER}.darwin.aarch64.tar.xz"
else
SC_PKG="shellcheck-${SC_VER}.linux.aarch64.tar.xz"
fi
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/${SC_PKG}" \
curl -fsSL "https://github.com/koalaman/shellcheck/releases/download/${SC_VER}/shellcheck-${SC_VER}.linux.aarch64.tar.xz" \
| tar -xJf - --strip-components=1
sudo mv shellcheck /usr/local/bin/
fi
@@ -106,15 +87,14 @@ jobs:
shellcheck --version | head -2
- name: Run shellcheck on .gitea/scripts/*.sh
if: steps.identify.outputs.arm64 == 'true'
continue-on-error: true
run: |
set -eu
# Only the scripts we control under .gitea/scripts. Pilot
# scope is intentionally narrow — broaden in a follow-up
# once the lane is proven.
if ! command -v shellcheck >/dev/null 2>&1 || ! shellcheck --version >/dev/null 2>&1; then
echo "WARN: shellcheck not functional — skipping (pilot mode)"
if ! command -v shellcheck >/dev/null 2>&1; then
echo "WARN: shellcheck binary not found — skipping (pilot mode)"
exit 0
fi
# NOTE: macOS ships Bash 3.2 (Apple license), no `mapfile`
@@ -16,24 +16,14 @@ name: publish-workspace-server-image
#
# Image tags produced:
# :staging-<sha> — per-commit digest, stable for canary verify
# :staging-latest — tracks most recent BUILD on this branch (set by the
# build job, last-writer-wins, NOT prod-gated)
# :latest — tracks the most recent PROD-PROMOTED build. Re-pointed by the
# deploy-production job ONLY after green main CI + canary +
# fleet rollout + /buildinfo verification pass. So :latest ==
# "current prod image", never the raw build. (Added 2026-06-03
# after a stale :latest — last moved 2026-05-10 — reverted a
# production tenant on a no-arg redeploy.)
# :staging-latest — tracks most recent build on this branch
#
# Production auto-deploy:
# After both platform and tenant images are pushed, deploy-production waits
# for strict required push contexts on the same SHA to go green, then
# calls the production CP redeploy-fleet endpoint with target_tag=
# staging-<sha>. On success (rollout + buildinfo verified) it re-points
# :latest to the same SHA. Set repo variable or secret
# PROD_AUTO_DEPLOY_DISABLED=true to stop production rollout while keeping
# image publishing enabled — in which case :latest is NOT advanced either
# (correct: an unpromoted build must not become :latest).
# staging-<sha>. Set repo variable or secret PROD_AUTO_DEPLOY_DISABLED=true
# to stop production rollout while keeping image publishing enabled.
#
# Primary ECR target: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/*
# Optional staging tenant mirror target:
@@ -263,19 +253,6 @@ jobs:
PROD_AUTO_DEPLOY_DRY_RUN: ${{ vars.PROD_AUTO_DEPLOY_DRY_RUN || '' }}
PROD_ALLOW_NON_PROD_CP_URL: ${{ vars.PROD_ALLOW_NON_PROD_CP_URL || '' }}
steps:
# The publish runner's default HOME (/home/hongming) is not writable, so
# git/docker credential saves fail (`Error saving credentials: mkdir
# /home/hongming: permission denied`) and halt the production rollout
# (#2193). Point HOME + DOCKER_CONFIG at the writable job temp dir —
# mirrors build-and-push's "Prepare writable Docker config" fix above.
- name: Prepare writable HOME + Docker config
run: |
set -euo pipefail
H="$RUNNER_TEMP/auto-deploy-home"
mkdir -p "$H/.docker"
echo "HOME=$H" >> "$GITHUB_ENV"
echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
- name: Checkout
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
@@ -386,36 +363,6 @@ jobs:
run: |
set -euo pipefail
RESP="$RUNNER_TEMP/prod-redeploy-response.json"
# Superseded-job guard. This workflow has no `concurrency:` (header
# explains why: Gitea 1.22.6 cancels queued prod deploys). So two
# close main pushes run BOTH deploy-production jobs. The newer one
# rolls the fleet to its (newer) build first; this older job's strict
# equality check below would then see tenants on the NEWER SHA and
# false-red "$slug is stale" even though the fleet is AHEAD, not
# behind (git SHAs aren't ordered; /buildinfo exposes only git_sha).
#
# If main's current head is no longer THIS job's SHA, a newer commit
# has landed and this deploy is superseded — the newest job's verify
# is authoritative. Skip strict verify and succeed. exit 0 => newer
# head printed (superseded); exit 10 => still the latest, proceed to
# the strict verify so a genuinely-behind tenant still fails loudly.
set +e
NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
SUPERSEDED_EXIT=$?
set -e
if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
echo "::notice::Superseded deploy: main head is now ${NEWER_HEAD:0:7} (this job deployed ${GITHUB_SHA:0:7}). The fleet is at or ahead of this build; the newer deploy job's verify is authoritative. Skipping strict SHA verify."
{
echo ""
echo "### Buildinfo verification skipped — superseded deploy"
echo ""
echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
echo "A newer deploy job is rolling the fleet forward; its verify is authoritative."
} >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
mapfile -t SLUGS < <(jq -r '.results[]? | .slug' "$RESP")
if [ ${#SLUGS[@]} -eq 0 ]; then
echo "::error::No tenants returned from redeploy-fleet; refusing to mark production deploy verified."
@@ -462,65 +409,3 @@ jobs:
if [ "$STALE_COUNT" -gt 0 ] || [ "$UNHEALTHY_COUNT" -gt 0 ] || [ "$UNREACHABLE_COUNT" -gt 0 ]; then
exit 1
fi
# Re-point :latest to the just-promoted image — ONLY after the
# production rollout + buildinfo verification above have passed.
#
# WHY HERE (promote point), not at build time:
# The platform-tenant ECR `:latest` tag was last moved 2026-05-10
# and went 3.5 weeks stale because the build step only pushes
# :staging-<sha> + :staging-latest and never re-points :latest. A
# no-arg POST /cp/admin/tenants/:slug/redeploy (whose default tag
# fell through to "latest") then pulled the 3.5-week-old image and
# REVERTED the tenant (incident: molecule-adk-demo, 2026-06-03).
#
# The defense-in-depth half of this fix changes that redeploy
# default to :staging-latest, but :latest itself must also be
# kept meaningful. We make :latest track the PROD-BLESSED build,
# not the raw build: by living at the end of deploy-production —
# after `wait-ci` (green main CI), the canary-first batched fleet
# rollout, AND the /buildinfo SHA verification — :latest only ever
# advances to a SHA that is actually green and confirmed running
# across the live fleet. So `:latest` == "current prod image",
# and any consumer that pulls :latest (legacy callers, manual
# `docker pull`, a redeploy that somehow still resolves "latest")
# gets the blessed image instead of whatever happened to build.
#
# Re-tag is digest-level (imagetools create), so no rebuild and
# :latest is byte-identical to :staging-<sha> for this commit.
- name: Promote :latest to the verified prod image
if: ${{ steps.plan.outputs.enabled == 'true' }}
env:
TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
STAGING_TENANT_IMAGE_NAME: ${{ env.STAGING_TENANT_IMAGE_NAME }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-2
run: |
set -euo pipefail
SHA_TAG="staging-${GITHUB_SHA::7}"
PROD_ECR_REGISTRY="${TENANT_IMAGE_NAME%%/*}"
STAGING_ECR_REGISTRY="${STAGING_TENANT_IMAGE_NAME%%/*}"
aws ecr get-login-password --region us-east-2 | \
docker login --username AWS --password-stdin "${PROD_ECR_REGISTRY}"
aws ecr get-login-password --region us-east-2 | \
docker login --username AWS --password-stdin "${STAGING_ECR_REGISTRY}"
# imagetools create copies the source manifest to the new tag by
# digest (no pull/rebuild). :latest now points at the exact image
# that just passed the prod gate.
docker buildx imagetools create \
--tag "${TENANT_IMAGE_NAME}:latest" \
"${TENANT_IMAGE_NAME}:${SHA_TAG}"
docker buildx imagetools create \
--tag "${STAGING_TENANT_IMAGE_NAME}:latest" \
"${STAGING_TENANT_IMAGE_NAME}:${SHA_TAG}"
{
echo ""
echo "### :latest promoted"
echo ""
echo "Re-pointed \`platform-tenant:latest\` → \`${SHA_TAG}\` (prod + staging ECR)."
echo ":latest now tracks the prod-blessed, fleet-verified image."
} >> "$GITHUB_STEP_SUMMARY"
+2 -17
View File
@@ -60,26 +60,11 @@ test.describe("MobileChat", () => {
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 15_000 });
// Reload and deterministically wait for the chat-history GET that
// rehydrates the transcript to come back 2xx, rather than racing a
// fixed-timeout render assertion against an in-flight fetch. The
// server now persists the a2a_receive row SYNCHRONOUSLY before the
// send's 200 (workspace-server logA2ASuccess), so the row is
// guaranteed present by the time this GET runs — the wait is for
// hydration latency, not for a still-racing write.
const historyResponse = page.waitForResponse(
(resp) =>
resp.url().includes("/chat-history") &&
resp.request().method() === "GET" &&
resp.status() === 200,
{ timeout: 15_000 },
);
await page.reload();
await page.waitForSelector("[data-testid='chat-panel']", { timeout: 10_000 });
await historyResponse;
await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible();
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible();
await expect(page.getByText("Mobile persistence", { exact: true })).toBeVisible({ timeout: 5_000 });
await expect(page.getByText("Echo: Mobile persistence")).toBeVisible({ timeout: 5_000 });
});
test("composer auto-grows with multi-line text", async ({ page }) => {
@@ -8,12 +8,9 @@ import { ExternalConnectModal, type ExternalConnectionInfo } from "./ExternalCon
import {
ProviderModelSelector,
buildProviderCatalog,
buildProviderCatalogFromRegistry,
findProviderForModel,
type SelectorModel,
type SelectorValue,
type RegistryProvider,
type RegistryModel,
} from "./ProviderModelSelector";
interface WorkspaceOption {
@@ -35,16 +32,6 @@ interface TemplateSpec {
model?: string;
models?: SelectorModel[];
providers?: string[];
// internal#718 P3 registry-served fields (additive; absent on older
// backends and for non-registry runtimes). When registry_backed is true the
// provider→model catalog is built from registry_providers/registry_models so
// each model's DERIVED provider (e.g. moonshot/kimi-k2.6 → "platform") drives
// the dropdown bucket and the create payload's llm_provider — instead of the
// legacy inferVendor heuristic that slash-splits the id into "moonshot".
// Mirrors ConfigTab's RuntimeOption loader (RFC#340 Fix C).
registry_backed?: boolean;
registry_providers?: RegistryProvider[];
registry_models?: RegistryModel[];
}
const DEFAULT_RUNTIME = "claude-code";
@@ -181,53 +168,15 @@ export function CreateWorkspaceButton() {
}),
[runtime, templateSpecs],
);
// The /templates row backing the LLM picker: an explicitly-selected
// workspace template wins, else the base runtime template row.
const llmSourceSpec = useMemo<TemplateSpec | null>(
() => selectedTemplateSpec ?? selectedRuntimeTemplateSpec,
const llmModels = useMemo(
() => {
const sourceSpec = selectedTemplateSpec ?? selectedRuntimeTemplateSpec;
if (!sourceSpec?.models?.length) return [];
return sourceSpec.models;
},
[selectedRuntimeTemplateSpec, selectedTemplateSpec],
);
// internal#718 P3 / RFC#340 Fix C: a runtime is registry-backed when the
// /templates row says so AND it served a non-empty registry_models set.
// Mirrors ConfigTab's `registryBacked` derivation exactly.
const registryBacked = useMemo(
() =>
llmSourceSpec?.registry_backed === true &&
(llmSourceSpec.registry_models?.length ?? 0) > 0,
[llmSourceSpec],
);
// Models fed to the selector dropdown. For a registry-backed runtime use the
// registry-served native set, carrying each model's DERIVED provider so the
// selector buckets it correctly (moonshot/kimi-k2.6 → "platform", not the
// inferVendor "moonshot"). Otherwise fall back to the template-served
// models[] + the legacy heuristic — same fallback ConfigTab keeps.
const llmModels = useMemo<SelectorModel[]>(
() => {
if (registryBacked) {
return (llmSourceSpec?.registry_models ?? []).map((m) => ({
id: m.id,
name: m.name,
...(m.provider ? { provider: m.provider } : {}),
}));
}
return llmSourceSpec?.models?.length ? llmSourceSpec.models : [];
},
[registryBacked, llmSourceSpec],
);
// Registry-backed path: build the catalog from registry_providers/
// registry_models so dropdown labels + billing + the derived provider come
// from the provider-registry SSOT (restores the "Platform" bucket). Legacy
// path: re-infer from models[] via buildProviderCatalog (inferVendor).
const llmCatalog = useMemo(
() =>
registryBacked
? buildProviderCatalogFromRegistry(
llmSourceSpec?.registry_providers ?? [],
llmSourceSpec?.registry_models ?? [],
)
: buildProviderCatalog(llmModels),
[registryBacked, llmSourceSpec, llmModels],
);
const llmCatalog = useMemo(() => buildProviderCatalog(llmModels), [llmModels]);
const selectedLLMProvider = useMemo(
() => llmCatalog.find((p) => p.id === llmSelection.providerId) ?? llmCatalog[0],
[llmCatalog, llmSelection.providerId],
@@ -235,7 +184,7 @@ export function CreateWorkspaceButton() {
useEffect(() => {
if (llmCatalog.length === 0) return;
const sourceDefault = llmSourceSpec?.model?.trim();
const sourceDefault = (selectedTemplateSpec ?? selectedRuntimeTemplateSpec)?.model?.trim();
const platformProvider = llmCatalog.find((p) => p.vendor === "platform");
const matched = sourceDefault ? findProviderForModel(llmCatalog, sourceDefault) : null;
const next = platformProvider ?? matched ?? llmCatalog[0];
@@ -248,7 +197,7 @@ export function CreateWorkspaceButton() {
envVars: next.envVars,
});
setLLMSecret("");
}, [llmCatalog, llmSourceSpec]);
}, [llmCatalog, selectedRuntimeTemplateSpec, selectedTemplateSpec]);
// Reset form and load workspaces whenever dialog opens
useEffect(() => {
@@ -512,7 +461,6 @@ export function CreateWorkspaceButton() {
</div>
<ProviderModelSelector
models={llmModels}
catalog={registryBacked ? llmCatalog : undefined}
value={llmSelection}
onChange={(next) => {
setLLMSelection(next);
@@ -454,128 +454,6 @@ describe("CreateWorkspaceDialog — dynamic runtime provider picker", () => {
});
});
// ---------------------------------------------------------------------------
// Registry-backed provider catalog (RFC#340 Fix C)
//
// Regression guard for the mis-bucketing bug: when a registry-backed
// claude-code template serves `moonshot/kimi-k2.6` whose DERIVED provider is
// `platform`, the dialog must build the dropdown from registry_providers/
// registry_models (buildProviderCatalogFromRegistry) — NOT the legacy
// inferVendor heuristic which slash-splits the id into "moonshot". The
// distinguishing trait of this fixture: the plain `models[]` array does NOT
// carry an explicit `provider` field, so the LEGACY path would bucket the
// model under "moonshot" and send llm_provider:"moonshot". Only the
// registry-backed path yields the Platform bucket + llm_provider:"platform".
// ---------------------------------------------------------------------------
// claude-code template whose plain models[] is UN-annotated (no explicit
// provider). The derived-provider annotation lives ONLY in registry_models.
const REGISTRY_TEMPLATE = {
id: "claude-code-default",
name: "Claude Code Agent",
runtime: "claude-code",
model: "moonshot/kimi-k2.6",
// Legacy fields — note: NO explicit provider on the platform model, so the
// legacy inferVendor path would slash-split it into "moonshot".
providers: ["platform", "minimax", "anthropic"],
models: [
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", required_env: [] },
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", required_env: ["ANTHROPIC_API_KEY"] },
],
// Registry-served SSOT (internal#718 P3). DeriveProvider resolved
// moonshot/kimi-k2.6 → "platform"; MiniMax-M2.7 → "minimax".
registry_backed: true,
registry_providers: [
{ name: "platform", display_name: "Platform", auth_env: [], billing_mode: "platform_managed" },
{ name: "minimax", display_name: "MiniMax", auth_env: ["MINIMAX_API_KEY"], billing_mode: "byok" },
{ name: "anthropic", display_name: "Anthropic API", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "byok" },
],
registry_models: [
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", billing_mode: "platform_managed" },
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", provider: "minimax", billing_mode: "byok" },
{ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", provider: "anthropic", billing_mode: "byok" },
],
};
describe("CreateWorkspaceDialog — registry-backed provider catalog (RFC#340 Fix C)", () => {
beforeEach(() => {
mockGet.mockImplementation(async (url: string) => {
if (url === "/templates") {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return [REGISTRY_TEMPLATE] as any;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return SAMPLE_WORKSPACES as any;
});
});
it("shows the Platform provider bucket for the registry-backed claude-code runtime", async () => {
await openDialog();
const providerSelect = await waitFor(() => {
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
expect(sel).toBeTruthy();
return sel;
});
const labels = Array.from(providerSelect.options).map((o) => o.text.trim());
// Registry display_name "Platform" appears — NOT "moonshot" from the
// legacy slash-split heuristic.
expect(labels).toContain("Platform");
expect(labels).not.toContain("moonshot");
// Bucket id is the registry-keyed id, vendor is the bare provider name.
const values = Array.from(providerSelect.options).map((o) => o.value);
expect(values).toContain("registry|platform");
});
it("sends llm_provider: platform (not moonshot) for moonshot/kimi-k2.6", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "Kimi Agent" },
});
// Wait for the registry default to settle on the Platform bucket + model.
await waitFor(() => {
const modelSelect = document.querySelector("[data-testid='model-select']") as HTMLSelectElement;
expect(modelSelect?.value).toBe("moonshot/kimi-k2.6");
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.model).toBe("moonshot/kimi-k2.6");
expect(body.llm_provider).toBe("platform");
// Platform is auth-env-free → no BYOK secret.
expect(body.secrets).toBeUndefined();
});
it("buckets MiniMax-M2.7 under its derived provider and sends llm_provider: minimax", async () => {
await openDialog();
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
target: { value: "MiniMax Agent" },
});
await waitFor(() => {
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
expect(Array.from(sel.options).map((o) => o.value)).toContain("registry|minimax");
});
fireEvent.change(document.querySelector("[data-testid='provider-select']") as HTMLSelectElement, {
target: { value: "registry|minimax" },
});
fireEvent.change(document.getElementById("llm-secret-input") as HTMLInputElement, {
target: { value: "sk-minimax-test" },
});
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
fireEvent.click(createBtn!);
await waitFor(() => expect(mockPost).toHaveBeenCalled());
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
expect(body.model).toBe("MiniMax-M2.7");
expect(body.llm_provider).toBe("minimax");
expect(body.secrets).toEqual({ MINIMAX_API_KEY: "sk-minimax-test" });
});
});
// ---------------------------------------------------------------------------
// budget_limit field tests (#541)
// ---------------------------------------------------------------------------
+2
View File
@@ -34,6 +34,8 @@
],
"org_templates": [
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
{"name": "free-beats-all", "repo": "molecule-ai/molecule-ai-org-template-free-beats-all", "ref": "main"},
{"name": "medo-smoke", "repo": "molecule-ai/molecule-ai-org-template-medo-smoke", "ref": "main"},
{"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"},
{"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"}
]
-131
View File
@@ -1,131 +0,0 @@
# Developer SOP — PR review gate auto-fire and stale-head handling
> Last updated: 2026-06-03 (cp#2159 follow-up)
>
> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling
> repos using the `qa-review` + `security-review` branch-protection gates.
---
## 1. Gitea PR-head workflow-selection rule
**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea
loads the workflow definition from the **PR's HEAD branch**, not from the
base (`main`) branch.
This is different from GitHub Actions, where `pull_request_target` always
loads workflows from the base branch. Gitea's behaviour means:
- A PR that was opened **before** the `pull_request_review` trigger was added
to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review,
because its HEAD still contains the old workflow YAML (no trigger).
- A PR that was opened **after** the trigger was added (or that has been
rebased onto a commit containing the trigger) **WILL** auto-fire, because its
HEAD contains the new workflow YAML.
### Ops implication
| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review |
|---|---|
| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. |
| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. |
---
## 2. Standard core-PR flow (post-#2157)
```
1. Author opens PR from a branch based on current main
→ qa-review + security-review workflows run on pull_request_target
→ status contexts post (initial eval, usually red until reviews land)
2. Reviewers submit real APPROVED reviews
→ If PR head has the trigger: workflows AUTO-FIRE on pull_request_review
→ Contexts flip green (or stay red if reviewer is not in team)
3. [Optional] If contexts did not flip (stale head, event lost, etc.):
→ Anyone can comment `/qa-recheck` or `/security-recheck`
→ sop-checklist.yml refires the evaluator (read-only, idempotent)
4. Both qa-review + security-review contexts are green
→ Plain Do:merge (no force-merge needed)
```
### Key point
The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the
primary path. PRs cut from current main should auto-fire without manual
intervention.
---
## 3. Diagnosing a stale head
If a PR has real team-member APPROVED reviews but the qa/security contexts
remain red and no workflow run appears on the PR's "Actions" tab for the
review event, the PR head is likely stale.
### Quick check
```bash
# From the PR page, look at the head commit SHA, then:
curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=<HEAD_SHA>" \
| jq -r '.content' | base64 -d | grep -c 'pull_request_review'
# 0 → stale head (no trigger in that version of the workflow)
# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker)
```
### Automated diagnostic
The test suite includes `test_gate_stale_head_diagnostic.py`, which reports
"auto-fire impossible for this PR" when the head lacks the trigger. Run it
in CI or locally with:
```bash
PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v
```
---
## 4. Rebasing vs. slash-refire
| Approach | When to use | Trade-off |
|---|---|---|
| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected |
| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head |
**Do not** use slash-refire as a substitute for rebasing a stale head. If the
workflow YAML in the PR head does not contain `pull_request_review`, no amount
of rechecking will make auto-fire work.
---
## 5. Live-fire verification
The `test_gate_auto_fire_live.py` regression test exercises the full runtime
path: it submits an APPROVED review to a test PR and polls for the
`(pull_request_target)` status contexts. It is skipped when no API token is
available, and is intended to catch runtime non-fire that static structural
tests (e.g. `test_gate_review_auto_fire.py`) cannot detect.
Run manually with:
```bash
export GITEA_HOST=git.moleculesai.app
export GITEA_TOKEN=<your-token>
export REPO=molecule-ai/molecule-core
export LIVEFIRE_PR_NUMBER=<test-pr-number>
python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
```
---
## References
- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
the `pull_request_review` trigger, NOT a workflow code defect)
- #765 — static structural regression test for gate configuration
- #2157 — merged trigger addition (`pull_request_review` types: [submitted])
- #2020 — milestone confirming gate infrastructure is stable
- RFC#324 — qa-review + security-review design
-18
View File
@@ -858,24 +858,6 @@ fi
if echo "$AGENT_TEXT" | grep -qiE "exceeded your current quota|insufficient_quota"; then
fail "A2A — PROVIDER QUOTA EXHAUSTED (NOT a platform regression). Operator action: top up MOLECULE_STAGING_OPENAI_API_KEY billing or rotate to a higher-quota org at Settings → Secrets and Variables → Actions. Tracked in #2578. Raw: $AGENT_TEXT"
fi
# Empty-completion class — the agent runtime reached the LLM and got a
# 2xx back, but the assistant turn carried NO text part (empty content,
# or tool_calls/reasoning-only with no surfaced text), so the runtime
# returns the literal "Error: message contained no text content." as its
# reply text. Steps 0-7 passing means the platform is healthy (CP up,
# tenant provisioned, workspace online + routable, A2A delivery e2e); the
# break is the configured completion BACKEND returning an empty turn — a
# model/provider-side regression, NOT a workspace-server or harness bug,
# and NOT NOT_CONFIGURED (that fails earlier, at boot). Name it explicitly
# so the canary alert points at the model, not the platform: a generic
# "error-shaped response" misdirects triage to workspace-server. Observed
# 2026-06-03/04 across every staging canary on MODEL_SLUG=MiniMax-M2 (the
# canary default since #2710) — 100% on the parent's first cold turn,
# identical on main's scheduled synthetic E2E and on PRs (so it is an
# environmental backend regression, never PR-introduced).
if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (MiniMax-M2 since #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
fi
# Generic catch-all — falls through if none of the known regressions hit.
if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
fail "A2A returned an error-shaped response: $AGENT_TEXT"
+5 -56
View File
@@ -26,12 +26,11 @@ import (
// the update cycle — no ssh, no re-provision, no ops toil.
//
// Contract (paired with cp-side GET /cp/tenants/config):
//
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
// Authorization: Bearer <ADMIN_TOKEN>
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
// 401 on bearer mismatch or unknown org
// Request: GET {MOLECULE_CP_URL or https://api.moleculesai.app}/cp/tenants/config
// Authorization: Bearer <ADMIN_TOKEN>
// X-Molecule-Org-Id: <MOLECULE_ORG_ID>
// Response: 200 {"MOLECULE_CP_SHARED_SECRET":"…","MOLECULE_CP_URL":"…", …}
// 401 on bearer mismatch or unknown org
//
// Best-effort: any failure logs and returns — main() keeps booting.
// Self-hosted deploys without MOLECULE_ORG_ID or ADMIN_TOKEN set
@@ -106,53 +105,3 @@ func refreshEnvFromCP() error {
log.Printf("CP env refresh: applied %d values from %s/cp/tenants/config", applied, base)
return nil
}
// requiredLLMEnvVars is the set of LLM proxy env vars a managed SaaS
// tenant must have populated after refreshEnvFromCP. cp#469 (tenant
// proxy-env delivery) — guaranteed CP-delivered creds reach the
// tenant process env on boot. Per Researcher Task #37 / Spec 2 and
// Task #46 (watch-fail-first test).
//
// Key set byte-matched against Researcher's verified emission in
// controlplane tenant_config.go:140-144 (Researcher REQUEST_CHANGES
// iterate body, 3987f59c). The four keys below ARE the LLM-proxy
// subset of the 8 CP-emitted keys; OPENAI_BASE_URL / OPENAI_API_KEY /
// ANTHROPIC_BASE_URL / ANTHROPIC_API_KEY are out of scope for cp#469
// (different feature surfaces — direct-to-provider fallbacks, not
// the proxy). v2 fix: MOLECULE_LLM_USAGE_TOKEN, MOLECULE_LLM_USAGE_URL,
// MOLECULE_LLM_BASE_URL, MOLECULE_LLM_ANTHROPIC_BASE_URL — note the
// 4th key is namespaced MOLECULE_LLM_ANTHROPIC_BASE_URL, NOT bare
// ANTHROPIC_BASE_URL. Bare ANTHROPIC_BASE_URL is a separate CP-emitted
// key for direct-provider use, not the LLM proxy.
var requiredLLMEnvVars = []string{
"MOLECULE_LLM_USAGE_TOKEN",
"MOLECULE_LLM_USAGE_URL", // CRITICAL fix v2: was MOLECULE_LLM_URL in v1
"MOLECULE_LLM_BASE_URL",
"MOLECULE_LLM_ANTHROPIC_BASE_URL", // CRITICAL fix v3: was ANTHROPIC_BASE_URL in v2 (different key!)
}
// assertManagedTenantHasLLMEnv verifies that, when running as a
// managed SaaS tenant (MOLECULE_ORG_ID + ADMIN_TOKEN both set), all
// required LLM proxy env vars are populated after refreshEnvFromCP.
//
// Self-hosted (no orgID/adminToken) is exempt — dev must not be
// blocked here. Managed tenants with missing LLM keys fail with
// MISSING_CP_LLM_ENV so they do not silently boot with broken proxy
// creds. Caller in main.go decides whether to log and continue or
// log.Fatalf depending on deployment context.
func assertManagedTenantHasLLMEnv() error {
if os.Getenv("MOLECULE_ORG_ID") == "" || os.Getenv("ADMIN_TOKEN") == "" {
// Self-hosted dev / not yet provisioned — not a managed tenant.
return nil
}
var missing []string
for _, k := range requiredLLMEnvVars {
if os.Getenv(k) == "" {
missing = append(missing, k)
}
}
if len(missing) > 0 {
return fmt.Errorf("MISSING_CP_LLM_ENV: required LLM proxy keys not set after refreshEnvFromCP: %v", missing)
}
return nil
}
@@ -5,7 +5,6 @@ import (
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
)
@@ -60,138 +59,6 @@ func TestRefreshEnvFromCP_AppliesCPResponse(t *testing.T) {
}
}
// TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: watch-fail-first
// per Researcher Task #46. When running as a managed tenant
// (MOLECULE_ORG_ID + ADMIN_TOKEN set), missing LLM proxy env vars
// after refreshEnvFromCP MUST surface as MISSING_CP_LLM_ENV, not be
// silently accepted. Without this guard, a CP that loses its LLM
// creds (e.g. during an incident) would let a tenant boot and then
// fail later at first LLM call — worse than a loud refusal here.
func TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Stub CP returns a CP response WITHOUT any of the required
// LLM keys — simulates the failure mode where the CP side
// dropped or never had the LLM creds for this org.
w.Header().Set("Content-Type", "application/json")
fmt.Fprint(w, `{"MOLECULE_CP_SHARED_SECRET":"x","MOLECULE_CP_URL":"https://api.moleculesai.app"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-managed-1")
t.Setenv("ADMIN_TOKEN", "admin-tok")
t.Setenv("MOLECULE_CP_URL", srv.URL)
// Clear all LLM keys to simulate the boot-without-LLM-env failure mode.
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
// refreshEnvFromCP itself should succeed — CP is reachable, returned 200.
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
// The boot assertion must catch the missing LLM keys.
err := assertManagedTenantHasLLMEnv()
if err == nil {
t.Fatal("expected MISSING_CP_LLM_ENV error for managed tenant without LLM keys, got nil")
}
if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
}
}
// TestRefreshEnvFromCP_ManagedTenantHappyPath: when the CP returns
// all 4 LLM-proxy keys, the gate must PASS — no MISSING_CP_LLM_ENV
// for a properly-configured managed tenant. Watch-fail counterpart
// to TestRefreshEnvFromCP_ManagedTenantRequiresLLMKeys: if THIS test
// ever fires MISSING_CP_LLM_ENV on the byte-correct key set, the
// requiredLLMEnvVars list has drifted from the CP emission again.
// Per Researcher REQUEST_CHANGES TEST ADEQUACY note.
func TestRefreshEnvFromCP_ManagedTenantHappyPath(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// Return ALL 4 LLM-proxy keys — names byte-matched to
// tenant_config.go:140-144 CP emission.
fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com","MOLECULE_LLM_ANTHROPIC_BASE_URL":"https://llm.example.com/anthropic"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-managed-happy")
t.Setenv("ADMIN_TOKEN", "admin-tok")
t.Setenv("MOLECULE_CP_URL", srv.URL)
// Pre-clear so we can verify the refresh actually populated them.
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
// Sanity: refresh actually applied the keys.
if got := os.Getenv("MOLECULE_LLM_USAGE_TOKEN"); got != "tok-1" {
t.Errorf("refresh did not apply USAGE_TOKEN: got %q", got)
}
// The boot assertion must pass — no MISSING_CP_LLM_ENV.
if err := assertManagedTenantHasLLMEnv(); err != nil {
t.Errorf("managed happy path must not MISSING_CP_LLM_ENV, got: %v", err)
}
}
// TestRefreshEnvFromCP_ManagedTenantPartialEnv: when the CP returns
// 3 of 4 LLM-proxy keys (one missing), the gate must STILL catch it
// and the error must name the missing key. Per Researcher
// REQUEST_CHANGES TEST ADEQUACY note — partial-env coverage is
// critical because the production failure mode is usually "one
// key dropped" not "all keys dropped".
func TestRefreshEnvFromCP_ManagedTenantPartialEnv(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
// 3 of 4 — MOLECULE_LLM_ANTHROPIC_BASE_URL is missing.
fmt.Fprint(w, `{"MOLECULE_LLM_USAGE_TOKEN":"tok-1","MOLECULE_LLM_USAGE_URL":"https://llm.example.com/usage","MOLECULE_LLM_BASE_URL":"https://llm.example.com"}`)
}))
defer srv.Close()
t.Setenv("MOLECULE_ORG_ID", "org-managed-partial")
t.Setenv("ADMIN_TOKEN", "admin-tok")
t.Setenv("MOLECULE_CP_URL", srv.URL)
// Pre-clear all 4 so the 3 that come back from CP are the only
// ones set; the 4th (MOLECULE_LLM_ANTHROPIC_BASE_URL) stays empty.
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
if err := refreshEnvFromCP(); err != nil {
t.Fatalf("refreshEnvFromCP: %v", err)
}
err := assertManagedTenantHasLLMEnv()
if err == nil {
t.Fatal("expected MISSING_CP_LLM_ENV for partial env (3 of 4 keys), got nil")
}
if !strings.Contains(err.Error(), "MISSING_CP_LLM_ENV") {
t.Errorf("expected error to contain MISSING_CP_LLM_ENV, got: %v", err)
}
if !strings.Contains(err.Error(), "MOLECULE_LLM_ANTHROPIC_BASE_URL") {
t.Errorf("expected error to name the missing key MOLECULE_LLM_ANTHROPIC_BASE_URL, got: %v", err)
}
}
// TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop: self-hosted
// (no orgID/adminToken) must NOT block on missing LLM keys — dev
// ergonomics matter and the assertion's contract is "managed only".
func TestAssertManagedTenantHasLLMEnv_NotManagedIsNoop(t *testing.T) {
t.Setenv("MOLECULE_ORG_ID", "")
t.Setenv("ADMIN_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "")
t.Setenv("MOLECULE_LLM_USAGE_URL", "")
t.Setenv("MOLECULE_LLM_BASE_URL", "")
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "")
if err := assertManagedTenantHasLLMEnv(); err != nil {
t.Errorf("self-hosted (not managed) must not block, got: %v", err)
}
}
// TestRefreshEnvFromCP_CPUnreachableDoesNotFailBoot: network errors must
// return non-nil BUT main.go treats that as warn-and-continue. We assert
// the function returns an error (not a panic) so the caller can log.
+1 -10
View File
@@ -82,16 +82,6 @@ func main() {
log.Printf("CP env refresh: %v (continuing with baked-in env)", err)
}
// Managed-tenant boot assertion (cp#469 — tenant proxy-env delivery).
// If we're a managed SaaS tenant (orgID + adminToken set), all required
// LLM proxy env vars must be present after refresh. Missing keys block
// the tenant from booting with broken LLM creds — silent-fail is worse
// than a loud refusal. Self-hosted (no orgID/adminToken) short-circuits
// inside the assertion, so this never fires for dev.
if err := assertManagedTenantHasLLMEnv(); err != nil {
log.Fatalf("Managed tenant boot assertion: %v", err)
}
// Secrets encryption. In MOLECULE_ENV=prod, boot refuses to start
// without a valid SECRETS_ENCRYPTION_KEY (fail-secure — Top-5 #5).
// In any other environment, missing keys just log a warning and
@@ -369,6 +359,7 @@ func main() {
// (WorkspaceHandler.BootstrapFailed) wires its own capture inline.
registry.BootFailureRescueHook = handlers.BootFailureRescueHook
// Provision-timeout sweep — flips workspaces that have been stuck in
// status='provisioning' past the timeout window to 'failed' and emits
// WORKSPACE_PROVISION_TIMEOUT. Without this the UI banner is cosmetic
+2 -4
View File
@@ -149,11 +149,9 @@ func markFailed(ctx context.Context, wsID string, broadcaster *events.Broadcaste
models.StatusFailed, msg, wsID); dbErr != nil {
log.Printf("bundle import: failed to mark workspace %s as failed: %v", wsID, dbErr)
}
if bcErr := broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
broadcaster.RecordAndBroadcast(ctx, string(events.EventWorkspaceProvisionFailed), wsID, map[string]interface{}{
"error": msg,
}); bcErr != nil {
log.Printf("bundle import: failed to broadcast provision failed for %s: %v", wsID, bcErr)
}
})
}
func nilIfEmpty(s string) interface{} {
@@ -407,14 +407,12 @@ func (m *Manager) HandleInbound(ctx context.Context, ch ChannelRow, msg *Inbound
// Broadcast event
if m.broadcaster != nil {
if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
"channel_id": ch.ID,
"channel_type": ch.ChannelType,
"username": msg.Username,
"direction": "inbound",
}); err != nil {
log.Printf("Channels: failed to broadcast inbound event: %v", err)
}
})
}
return nil
@@ -455,13 +453,11 @@ func (m *Manager) SendOutbound(ctx context.Context, channelID string, text strin
}
if m.broadcaster != nil {
if err := m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
m.broadcaster.RecordAndBroadcast(ctx, string(events.EventChannelMessage), ch.WorkspaceID, map[string]interface{}{
"channel_id": ch.ID,
"channel_type": ch.ChannelType,
"direction": "outbound",
}); err != nil {
log.Printf("Channels: failed to broadcast outbound event: %v", err)
}
})
}
return nil
@@ -517,9 +517,7 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
// Acknowledge the button press (removes loading spinner)
ackCfg := tgbotapi.NewCallback(cb.ID, "Received")
if _, err := bot.Send(ackCfg); err != nil {
log.Printf("telegram: failed to send callback ack: %v", err)
}
bot.Send(ackCfg)
// Update the message to show what was clicked
decision := "approved"
@@ -531,9 +529,7 @@ func (t *TelegramAdapter) StartPolling(ctx context.Context, config map[string]in
cb.Message.MessageID,
cb.Message.Text+"\n\n✅ CEO "+decision,
)
if _, err := bot.Send(editMsg); err != nil {
log.Printf("telegram: failed to send edit message: %v", err)
}
bot.Send(editMsg)
// Route the decision as an inbound message to the agent
inbound := &InboundMessage{
@@ -383,48 +383,23 @@ func (h *WorkspaceHandler) logA2ASuccess(ctx context.Context, workspaceID, calle
}
summary := a2aMethod + " → " + wsNameForLog
toolTrace := extractToolTrace(respBody)
// DATA-LOSS FIX (internal#470 / #1347 push-mode sibling): this
// a2a_receive row is the ONLY durable record of a push-mode chat
// round-trip — request_body carries the user's message, response_body
// carries the agent's reply, and chat-history hydration
// (messagestore.PostgresMessageStore) reads BOTH back to rebuild the
// transcript on canvas reopen / reload. It MUST be written
// SYNCHRONOUSLY, before proxyA2ARequest returns and ProxyA2A flushes
// the 200 to the canvas — otherwise the canvas sees the reply
// acknowledged (and rendered optimistically) while the row is still
// racing in a detached goroutine, and a reload (or a workspace-server
// restart / deploy / OOM) between the 200 and the goroutine's commit
// loses the message permanently on reopen.
//
// This mirrors the discipline already applied to the poll-mode ingest
// path (logA2AReceiveQueued / persistUserMessageAtIngest); the
// push-mode counterpart was left async, which the E2E Chat
// "history persists across reload" test surfaced as an intermittent
// red (the reload out-raced the INSERT).
//
// - context.WithoutCancel: a client disconnect on chat-exit (which
// cancels the inbound request ctx) MUST NOT abort this write.
// - SYNCHRONOUS (no goAsync): the row must be durable before the 200.
// - Best-effort: LogActivity logs+swallows INSERT errors internally,
// so a DB hiccup never blocks or fails the user's send — behaviour
// for that one request is never worse than the pre-fix async path.
// - The post-commit ACTIVITY_LOGGED broadcast still fires inside
// LogActivity; the durable row is the truth the canvas re-reads.
logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
defer cancel()
LogActivity(logCtx, h.broadcaster, ActivityParams{
WorkspaceID: workspaceID,
ActivityType: "a2a_receive",
SourceID: nilIfEmpty(callerID),
TargetID: &workspaceID,
Method: &a2aMethod,
Summary: &summary,
RequestBody: json.RawMessage(body),
ResponseBody: json.RawMessage(respBody),
ToolTrace: toolTrace,
DurationMs: &durationMs,
Status: logStatus,
parent := ctx
h.goAsync(func() {
logCtx, cancel := context.WithTimeout(context.WithoutCancel(parent), 30*time.Second)
defer cancel()
LogActivity(logCtx, h.broadcaster, ActivityParams{
WorkspaceID: workspaceID,
ActivityType: "a2a_receive",
SourceID: nilIfEmpty(callerID),
TargetID: &workspaceID,
Method: &a2aMethod,
Summary: &summary,
RequestBody: json.RawMessage(body),
ResponseBody: json.RawMessage(respBody),
ToolTrace: toolTrace,
DurationMs: &durationMs,
Status: logStatus,
})
})
if callerID == "" && statusCode < 400 {
@@ -246,20 +246,6 @@ func MarkQueueItemFailed(ctx context.Context, id, errMsg string) {
}
}
// QueueDepth returns the number of currently-queued (not dispatched/completed)
// items for a workspace. Used by the busy-return response body so callers
// can see how many ahead of them.
func QueueDepth(ctx context.Context, workspaceID string) int {
var n int
if err := db.DB.QueryRowContext(ctx,
`SELECT COUNT(*) FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued'`,
workspaceID,
).Scan(&n); err != nil {
log.Printf("A2AQueue: QueueDepth query failed for workspace %s: %v", workspaceID, err)
}
return n
}
// DropStaleQueueItems marks queued items older than maxAge as 'dropped' with a
// system-generated reason so PM agents stop processing stale post-incident noise.
// Called with a workspaceID to scope cleanup to one workspace, or empty to sweep
@@ -60,10 +60,10 @@ func sanitizeErrorDetailForBroadcast(s string) string {
}
type ActivityHandler struct {
broadcaster events.EventEmitter
broadcaster *events.Broadcaster
}
func NewActivityHandler(b events.EventEmitter) *ActivityHandler {
func NewActivityHandler(b *events.Broadcaster) *ActivityHandler {
return &ActivityHandler{broadcaster: b}
}
File diff suppressed because it is too large Load Diff
@@ -54,29 +54,23 @@ func (h *ApprovalsHandler) Create(c *gin.Context) {
return
}
if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalRequested), workspaceID, map[string]interface{}{
"approval_id": approvalID,
"action": body.Action,
"reason": body.Reason,
"task_id": body.TaskID,
}); err != nil {
log.Printf("approvals: failed to broadcast approval requested: %v", err)
}
})
// Auto-escalate to parent
var parentID *string
if err := db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID); err != nil {
log.Printf("approvals: failed to lookup parent for escalation: %v", err)
}
db.DB.QueryRowContext(ctx, `SELECT parent_id FROM workspaces WHERE id = $1`, workspaceID).Scan(&parentID)
if parentID != nil {
if err := h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
h.broadcaster.RecordAndBroadcast(ctx, string(events.EventApprovalEscalated), *parentID, map[string]interface{}{
"approval_id": approvalID,
"from_workspace_id": workspaceID,
"action": body.Action,
"reason": body.Reason,
}); err != nil {
log.Printf("approvals: failed to broadcast approval escalated: %v", err)
}
})
}
c.JSON(http.StatusCreated, gin.H{"approval_id": approvalID, "status": "pending"})
@@ -227,13 +221,11 @@ func (h *ApprovalsHandler) Decide(c *gin.Context) {
eventType = "APPROVAL_DENIED"
}
if err := h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
h.broadcaster.RecordAndBroadcast(ctx, eventType, workspaceID, map[string]interface{}{
"approval_id": approvalID,
"decision": body.Decision,
"decided_by": decidedBy,
}); err != nil {
log.Printf("approvals: failed to broadcast approval decision: %v", err)
}
})
c.JSON(http.StatusOK, gin.H{"status": body.Decision, "approval_id": approvalID})
}
@@ -102,10 +102,10 @@ func pushDelegationResultToInbox(ctx context.Context, sourceID, delegationID, st
// and the A2A request runs in the background.
type DelegationHandler struct {
workspace *WorkspaceHandler
broadcaster events.EventEmitter
broadcaster *events.Broadcaster
}
func NewDelegationHandler(wh *WorkspaceHandler, b events.EventEmitter) *DelegationHandler {
func NewDelegationHandler(wh *WorkspaceHandler, b *events.Broadcaster) *DelegationHandler {
return &DelegationHandler{workspace: wh, broadcaster: b}
}
@@ -17,7 +17,6 @@ package handlers
import (
"fmt"
"sort"
"strings"
)
@@ -56,95 +55,3 @@ func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
"model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)",
model, runtime)
}
// validateDerivedProviderInRegistry (issue #2172) is the provider-side companion
// to validateRegisteredModelForRuntime. The model-side check asks "is this
// (runtime, model) in the registry?"; the provider-side check asks "is the
// provider this model DERIVES to — the same one the adapter will resolve at
// boot — a known provider in providers.yaml?"
//
// Live trigger (adk-demo Assistant, 2026-06-03): workspace config
// `model=moonshot/kimi-k2.6` (claude-code) → adapter derives `provider=moonshot`
// → `ValueError: provider=moonshot not in providers registry` at BOOT. The
// save was accepted (no validation at the API boundary), and the failure only
// surfaced when the agent tried to register. CI never saw it. The drift gate
// (RFC#580) validates TEMPLATES against the registry, NOT per-workspace
// configs; the existing model-side check rejects a model the runtime doesn't
// own but says nothing about the DERIVED provider's registry membership.
//
// Returns:
//
// (true, "") — pass: model is empty (MODEL_REQUIRED owns it), the
// runtime is not in the registry (fail-open for
// federated / non-first-party runtimes — mirror of the
// model-side check's federation contract), the registry
// failed to load (build-time gate owns it), OR the
// derived provider name is a known provider in the
// registry's `providers:` list.
// (false, reason) — reject: a known (runtime, model) pair derives to a
// provider name absent from the providers list. This is
// the structural class the adk-demo boot failure belongs
// to — the registry's `runtimes:` block references a
// provider not declared in `providers:`, which by
// construction is a registry-data bug. Catching it at
// config-SAVE keeps it out of the agent-boot path.
//
// Defense-in-depth: by construction, a model in a runtime's native provider set
// has a provider that IS in the catalog (the runtime ref names a provider from
// the providers list). So the rejection path is primarily a registry-consistency
// guard. The real value is the FAIL-LOUD semantics — any future drift between
// `providers:` and `runtimes:` fails the create call with a clear pointer to
// the missing provider, instead of silently wedging the agent at boot.
func validateDerivedProviderInRegistry(runtime, model string) (bool, string) {
model = strings.TrimSpace(model)
if model == "" {
return true, "" // MODEL_REQUIRED owns this.
}
m, err := providerRegistry()
if err != nil || m == nil {
// Registry unavailable (build-time defect the gates catch). Fail open —
// do not block create on a registry-load failure.
return true, ""
}
// DeriveProvider is fail-closed for unknown runtimes. Mirror the
// model-side check's federation contract: a runtime the registry does
// NOT know (langgraph / external / kimi / mock / federated) is allowed
// to pass through. DeriveProvider's `unknown runtime` error IS that
// signal — treat it as fail-open, identical to ModelsForRuntime's
// not-found behavior above.
p, err := m.DeriveProvider(runtime, model, nil)
if err != nil {
// Either the runtime is unknown (fail-open by contract) OR the model
// is not native to the runtime (the model-side validator already
// rejected this — DeriveProvider's error here means
// validateRegisteredModelForRuntime should have caught it. Don't
// double-reject: pass through and let the model-side response own
// the message).
return true, ""
}
// Defense-in-depth: confirm the DERIVED provider is a known entry in the
// providers list. By construction it should be (DeriveProvider only
// returns a Provider that was looked up by name from `providers:`), but
// a future federation merge could introduce a runtime ref pointing at a
// contributed provider absent from the core catalog. Reject loudly here
// rather than letting the save reach the agent-boot path and wedge with
// "provider=X not in providers registry" (the original adk-demo class).
for _, candidate := range m.Providers {
if candidate.Name == p.Name {
return true, ""
}
}
// Build a sorted, comma-separated list of valid provider names so the
// operator/caller sees the actionable list (the boot-time error message
// the adk-demo class produced does NOT include this — the fix is to
// surface it at the API boundary, where the caller can fix the request
// without a stuck workspace + operator page).
valid := make([]string, 0, len(m.Providers))
for _, c := range m.Providers {
valid = append(valid, c.Name)
}
sort.Strings(valid)
return false, fmt.Sprintf(
"derived provider %q (for model %q on runtime %q) is not in the providers registry; pick a model whose derived provider is one of: %s",
p.Name, model, runtime, strings.Join(valid, ", "))
}
@@ -6,17 +6,8 @@ package handlers
// fail OPEN (allow) for a runtime the registry doesn't know yet (federation /
// langgraph/etc. not in the first-party registry) so the existing knownRuntimes
// gate stays authoritative there.
//
// TestValidateDerivedProviderInRegistry (issue #2172) is the provider-side
// companion: once the model-side check passes, confirm the DERIVED provider
// (the one the adapter will resolve at boot) is a known provider in
// providers.yaml. Catches the adk-demo "provider=X not in providers registry"
// class at config-SAVE time instead of letting it wedge the agent at boot.
import (
"strings"
"testing"
)
import "testing"
func TestValidateRegisteredModelForRuntime(t *testing.T) {
type tc struct {
@@ -89,163 +80,3 @@ func TestValidateRegisteredModelForRuntime(t *testing.T) {
})
}
}
func TestValidateDerivedProviderInRegistry(t *testing.T) {
type tc struct {
name string
runtime string
model string
wantOK bool
// wantReasonContains: a substring the rejection reason must include
// (skipped for OK cases). Pins the actionable list / derivation pointer
// so the caller knows which provider was missing and what the valid
// set looks like — this is the fix that distinguishes the new gate
// from the boot-time "provider=X not in providers registry" string
// it replaces.
wantReasonContains string
}
cases := []tc{
// PASS — every native (runtime, model) in the catalog derives to a
// provider that IS in the providers list. These are the live corpus
// entries; the test pins the registry-consistency invariant.
{
name: "claude_code_anthropic_api_native",
runtime: "claude-code",
model: "claude-sonnet-4-6",
wantOK: true,
},
{
name: "claude_code_kimi_coding_native",
runtime: "claude-code",
model: "kimi-for-coding",
wantOK: true,
},
{
name: "claude_code_minimax_native",
runtime: "claude-code",
model: "MiniMax-M2.7",
wantOK: true,
},
{
name: "claude_code_platform_namespaced",
runtime: "claude-code",
model: "moonshot/kimi-k2.6",
wantOK: true,
},
{
name: "codex_openai_subscription_default_arm",
runtime: "codex",
model: "gpt-5.5",
wantOK: true,
},
{
name: "codex_platform_namespaced",
runtime: "codex",
model: "openai/gpt-5.4-mini",
wantOK: true,
},
{
name: "hermes_kimi_coding",
runtime: "hermes",
model: "kimi-coding/kimi-k2",
wantOK: true,
},
{
name: "hermes_platform_namespaced",
runtime: "hermes",
model: "moonshot/kimi-k2.6",
wantOK: true,
},
{
name: "openclaw_kimi_coding",
runtime: "openclaw",
model: "moonshot:kimi-k2.6",
wantOK: true,
},
// FAIL — model-side validator catches this, but the provider-side
// gate is called AFTER it in Create and inherits the fail-open
// contract for "model is not native to runtime" (DeriveProvider
// errors → allow, letting the model-side response own the message).
// This is the deliberate "don't double-reject" decision.
{
name: "unregistered_model_pass_through_to_model_side",
runtime: "claude-code",
model: "totally-made-up-model-xyz",
wantOK: true, // pass-through: model-side validator owns the rejection
},
// Federation contract — mirror of the model-side test above.
{
name: "langgraph_runtime_failopen",
runtime: "langgraph",
model: "anything-goes",
wantOK: true,
},
{
name: "external_runtime_failopen",
runtime: "external",
model: "whatever",
wantOK: true,
},
// Empty model — MODEL_REQUIRED owns it; allow.
{
name: "empty_model_allowed_other_gate_owns_it",
runtime: "claude-code",
model: "",
wantOK: true,
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
ok, why := validateDerivedProviderInRegistry(c.runtime, c.model)
if ok != c.wantOK {
t.Errorf("validateDerivedProviderInRegistry(%q,%q) ok=%v want %v (reason=%q)",
c.runtime, c.model, ok, c.wantOK, why)
}
if !c.wantOK && c.wantReasonContains != "" && !strings.Contains(why, c.wantReasonContains) {
t.Errorf("rejection reason missing %q: got %q", c.wantReasonContains, why)
}
})
}
}
// TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider walks every
// (runtime, model) pair in the registry's native model sets and asserts each
// one derives to a provider that IS in the providers list. This is the
// static regression gate the issue calls for ("a CI test fails if any shipped
// demo/template config references an unregistered provider") — generalized
// to the catalog as a whole: if anyone edits providers.yaml such that a
// `runtimes:` block names a provider absent from `providers:`, this test
// fires before the bad config can reach a customer workspace.
//
// By construction this invariant should always hold (DeriveProvider only
// returns a Provider that was looked up by name from `providers:`), so the
// test primarily guards against future federation merges that introduce a
// runtime ref pointing at a contributed provider absent from the core
// catalog — exactly the failure shape the adk-demo Assistant wedge
// belongs to.
func TestRegistryConsistency_AllNativeModelsDeriveToKnownProvider(t *testing.T) {
m, err := providerRegistry()
if err != nil || m == nil {
t.Skipf("providerRegistry unavailable in test env (err=%v); skipping consistency walk", err)
}
providerNames := make(map[string]struct{}, len(m.Providers))
for _, p := range m.Providers {
providerNames[p.Name] = struct{}{}
}
for runtimeName, runtime := range m.Runtimes {
for _, ref := range runtime.Providers {
for _, modelID := range ref.Models {
p, err := m.DeriveProvider(runtimeName, modelID, nil)
if err != nil {
t.Errorf("catalog invariant broken: runtime=%q model=%q failed DeriveProvider: %v",
runtimeName, modelID, err)
continue
}
if _, ok := providerNames[p.Name]; !ok {
t.Errorf("catalog invariant broken: runtime=%q model=%q derives to provider %q which is not in the providers list (refs=%q)",
runtimeName, modelID, p.Name, ref.Name)
}
}
}
}
}
@@ -1,21 +0,0 @@
package handlers
import (
"testing"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
)
// Proper-SSOT (task #65): required_env is DERIVED from the resolved provider's
// serving classification (IsPlatform), not hand-authored — platform injects
// creds server-side (none required), BYOK requires its auth_env.
func TestRequiredEnvForRegistryProvider(t *testing.T) {
if got := requiredEnvForRegistryProvider(providers.Provider{Name: providers.PlatformProviderName}); got != nil {
t.Errorf("platform provider requiredEnv = %v; want nil (creds injected server-side)", got)
}
byok := providers.Provider{Name: "google", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}}
got := requiredEnvForRegistryProvider(byok)
if len(got) != 2 || got[0] != "GEMINI_API_KEY" {
t.Errorf("byok requiredEnv = %v; want its auth_env", got)
}
}
@@ -176,10 +176,6 @@ func TestResolveAgentURLForRestartSignal_CacheMiss(t *testing.T) {
// TestGracefulPreRestart_Success verifies that when the workspace returns 200,
// the signal is logged as acknowledged without error.
func TestGracefulPreRestart_Success(t *testing.T) {
hWrapper := &resolveURLTestWrapper{
WorkspaceHandler: newHandlerWithTestDeps(t),
testURL: "http://fake-agent.example/agent",
}
_ = setupTestDB(t)
// httptest server simulating the workspace container's /signals/restart_pending
@@ -209,15 +205,18 @@ func TestGracefulPreRestart_Success(t *testing.T) {
})
}))
defer srv.Close()
hWrapper.testURL = srv.URL + "/agent"
// Pre-populate Redis cache with the test server URL
_ = setupTestRedisWithURL(t, srv.URL)
// gracefulPreRestart runs in a goroutine; wait for it before db.DB is restored.
// Must be registered AFTER setupTestDB (LIFO: async wait → db.DB restore).
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
// Use a wrapper so gracefulPreRestart runs through the embedded handler.
hWrapper := &resolveURLTestWrapper{
WorkspaceHandler: newHandlerWithTestDeps(t),
testURL: srv.URL + "/agent",
}
// gracefulPreRestart runs in a goroutine with its own timeout.
// We give it time to complete before the test ends.
hWrapper.gracefulPreRestart(context.Background(), "ws-ack-789")
time.Sleep(200 * time.Millisecond)
}
@@ -225,22 +224,19 @@ func TestGracefulPreRestart_Success(t *testing.T) {
// TestGracefulPreRestart_NotImplemented verifies that when the workspace returns
// 404 (old SDK version), the platform proceeds gracefully (log + no error).
func TestGracefulPreRestart_NotImplemented(t *testing.T) {
hWrapper := &resolveURLTestWrapper{
WorkspaceHandler: newHandlerWithTestDeps(t),
testURL: "http://fake-agent.example/agent",
}
_ = setupTestDB(t)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer srv.Close()
hWrapper.testURL = srv.URL + "/agent"
_ = setupTestRedisWithURL(t, srv.URL)
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
hWrapper := &resolveURLTestWrapper{
WorkspaceHandler: newHandlerWithTestDeps(t),
testURL: srv.URL + "/agent",
}
hWrapper.gracefulPreRestart(context.Background(), "ws-noimpl-999")
time.Sleep(200 * time.Millisecond)
@@ -250,18 +246,15 @@ func TestGracefulPreRestart_NotImplemented(t *testing.T) {
// TestGracefulPreRestart_ConnectionRefused verifies that when the workspace
// is unreachable, the platform proceeds gracefully without error.
func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
_ = setupTestDB(t)
mr := setupTestRedisWithURL(t, "http://localhost:19999/agent") // nothing listening on 19999
_ = mr
hWrapper := &resolveURLTestWrapper{
WorkspaceHandler: newHandlerWithTestDeps(t),
testURL: "http://localhost:19999/agent",
}
_ = setupTestDB(t)
// Nothing listening on 19999 — deliberate connection failure.
mr := setupTestRedisWithURL(t, "http://localhost:19999/agent")
_ = mr
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
hWrapper.gracefulPreRestart(context.Background(), "ws-unreachable-000")
time.Sleep(200 * time.Millisecond)
@@ -271,17 +264,13 @@ func TestGracefulPreRestart_ConnectionRefused(t *testing.T) {
// TestGracefulPreRestart_URLResolutionError verifies that when URL resolution
// fails, the platform proceeds gracefully without blocking the restart.
func TestGracefulPreRestart_URLResolutionError(t *testing.T) {
_ = setupTestDB(t)
_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
hWrapper := &resolveURLTestWrapper{
WorkspaceHandler: newHandlerWithTestDeps(t),
errToReturn: context.DeadlineExceeded,
}
_ = setupTestDB(t)
_ = setupTestRedis(t) // empty → URL resolution will fail in resolveAgentURLForRestartSignal
// Must be registered AFTER setupTestDB so LIFO order is: async wait → db.DB restore.
// This ensures goroutines (which access both DB and Redis) are drained before
// any cleanup fires. setupTestRedis comes after newHandlerWithTestDeps
// so the handler holds the correct Redis client reference.
waitForHandlerAsyncBeforeDBCleanup(t, hWrapper.WorkspaceHandler)
hWrapper.gracefulPreRestart(context.Background(), "ws-url-err-111")
@@ -44,20 +44,6 @@ func billingModeForRegistryProvider(p providers.Provider) string {
return LLMBillingModeBYOK
}
// requiredEnvForRegistryProvider derives the env vars the USER must supply for
// a model owned by the resolved provider — the proper-SSOT single fact behind
// the canvas "Missing API Keys" preflight (task #65). The closed platform
// provider injects credentials server-side (the metered proxy) -> nothing
// required; BYOK providers require their auth_env. Derived from IsPlatform +
// AuthEnv so a template can no longer hand-author a required_env that drifts
// from the registry's serving classification.
func requiredEnvForRegistryProvider(p providers.Provider) []string {
if p.IsPlatform() {
return nil
}
return p.AuthEnv
}
// enrichFromRegistry populates the registry-served fields on a templateSummary
// when its runtime is known to the provider registry. It is a no-op (leaves
// RegistryBacked=false and the registry slices nil) for a runtime the registry
@@ -112,7 +98,6 @@ func enrichFromRegistry(summary *templateSummary, runtime string) {
if derived, derr := m.DeriveProvider(runtime, id, nil); derr == nil {
ms.Provider = derived.Name
ms.BillingMode = billingModeForRegistryProvider(derived)
ms.RequiredEnv = requiredEnvForRegistryProvider(derived)
}
// If DeriveProvider errors (ambiguous/overlap — a manifest defect the
// loader's tests pin against), still serve the id without a provider
@@ -474,32 +474,6 @@ func (h *WorkspaceHandler) Create(c *gin.Context) {
})
return
}
// issue #2172 (provider-side companion): once the (runtime, model)
// pair is known to be registered, confirm the DERIVED provider
// (the one the adapter will resolve at boot) is a known provider
// in the providers.yaml catalog. Live trigger (adk-demo Assistant,
// 2026-06-03): the model-side check passed for a registry-resident
// model whose derived provider name was NOT in the providers list,
// so the save was accepted and the agent wedged at boot with
// "provider=X not in providers registry". This check is a
// defense-in-depth registry-consistency guard: by construction a
// model in a runtime's native set derives to a provider that IS in
// the catalog, so the rejection path is primarily a registry-data
// invariant — any future drift between `providers:` and `runtimes:`
// fails the create with a clear pointer to the missing provider
// rather than silently wedging the agent. Fails open for runtimes
// the registry doesn't know (langgraph/external/kimi/mock/federated
// — the federation contract the model-side check also honors).
if ok, why := validateDerivedProviderInRegistry(payload.Runtime, payload.Model); !ok {
log.Printf("Create: 422 DERIVED_PROVIDER_NOT_IN_REGISTRY (runtime=%q model=%q): %s [issue #2172 hard-reject]", payload.Runtime, payload.Model, why)
c.JSON(http.StatusUnprocessableEntity, gin.H{
"error": why,
"runtime": payload.Runtime,
"model": payload.Model,
"code": "DERIVED_PROVIDER_NOT_IN_REGISTRY",
})
return
}
}
ctx := c.Request.Context()
@@ -56,20 +56,7 @@ func PatchAbilities(c *gin.Context) {
return
}
// Atomic update: when both fields are supplied, apply them in one SQL
// statement so the request is all-or-nothing (#2131). A partial mutation
// (e.g. broadcast_enabled updated but talk_to_user_enabled failing) would
// leave the workspace in an ambiguous capability state.
if body.BroadcastEnabled != nil && body.TalkToUserEnabled != nil {
if _, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET broadcast_enabled = $2, talk_to_user_enabled = $3, updated_at = now() WHERE id = $1`,
id, *body.BroadcastEnabled, *body.TalkToUserEnabled,
); err != nil {
log.Printf("PatchAbilities both-fields for %s: %v", id, err)
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
return
}
} else if body.BroadcastEnabled != nil {
if body.BroadcastEnabled != nil {
if _, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET broadcast_enabled = $2, updated_at = now() WHERE id = $1`,
id, *body.BroadcastEnabled,
@@ -78,7 +65,9 @@ func PatchAbilities(c *gin.Context) {
c.JSON(http.StatusInternalServerError, gin.H{"error": "update failed"})
return
}
} else if body.TalkToUserEnabled != nil {
}
if body.TalkToUserEnabled != nil {
if _, err := db.DB.ExecContext(ctx,
`UPDATE workspaces SET talk_to_user_enabled = $2, updated_at = now() WHERE id = $1`,
id, *body.TalkToUserEnabled,
@@ -130,8 +130,11 @@ func TestPatchAbilities_BothFields(t *testing.T) {
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
WithArgs(wsUUID1).
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
WithArgs(wsUUID1, true, true).
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
WithArgs(wsUUID1, true).
WillReturnResult(sqlmock.NewResult(0, 1))
mock.ExpectExec(`UPDATE workspaces SET talk_to_user_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
WithArgs(wsUUID1, true).
WillReturnResult(sqlmock.NewResult(0, 1))
w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
@@ -179,25 +182,19 @@ func TestPatchAbilities_TalkToUserUpdateError(t *testing.T) {
}
}
// TestPatchAbilities_BothFields_UpdateError — regression for #2131. When
// both fields are supplied the handler uses a single combined UPDATE. A
// failure of that UPDATE must leave the workspace unchanged (atomic).
func TestPatchAbilities_BothFields_UpdateError(t *testing.T) {
func TestPatchAbilities_BothFields_BroadcastFails(t *testing.T) {
mock, cleanup := withMockDB(t)
defer cleanup()
mock.ExpectQuery(`SELECT EXISTS\(SELECT 1 FROM workspaces WHERE id = \$1 AND status != 'removed'\)`).
WithArgs(wsUUID1).
WillReturnRows(sqlmock.NewRows([]string{"exists"}).AddRow(true))
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, talk_to_user_enabled = \$3, updated_at = now\(\) WHERE id = \$1`).
WithArgs(wsUUID1, true, true).
mock.ExpectExec(`UPDATE workspaces SET broadcast_enabled = \$2, updated_at = now\(\) WHERE id = \$1`).
WithArgs(wsUUID1, true).
WillReturnError(errors.New("disk full"))
w := patchAbilitiesReq(t, wsUUID1, `{"broadcast_enabled":true,"talk_to_user_enabled":true}`)
if w.Code != http.StatusInternalServerError {
t.Fatalf("expected 500, got %d: %s", w.Code, w.Body.String())
}
// Because only one UPDATE is issued, there is no partial-mutation
// path to assert against; sqlmock implicitly verifies no second
// exec occurred.
}
@@ -95,14 +95,6 @@ func TestIntegration_BroadcastOrgRoot_NonRootSenderResolvesToRoot(t *testing.T)
}
})
// Pre-test hygiene: if a prior run crashed or was killed, its rows may
// still be in the shared integration DB. Remove them before inserting so
// the unique index workspaces_parent_name_uniq does not conflict.
if _, err := conn.ExecContext(ctx,
`DELETE FROM workspaces WHERE name LIKE $1`, prefix+"%"); err != nil {
t.Logf("pre-test cleanup (non-fatal): %v", err)
}
rootID := uuid.New().String()
midID := uuid.New().String()
leafID := uuid.New().String()
@@ -613,32 +613,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
if model == "" {
log.Printf("ensureDefaultConfig: workspace %s reached provisioning with empty model — Create handler should have rejected this; rendering empty model: \"\" in config.yaml (workspace will boot not_configured)", workspaceID)
}
// Derive the provider from the providers manifest and stamp it into the
// generated config BEFORE claude-code model normalization strips the
// slash-prefix. DeriveProvider needs the FULL, un-normalized model id
// (e.g. "moonshot/kimi-k2.6") for the exact-id match that resolves the
// canvas claude-code case to provider=platform — normalizing to
// "kimi-k2.6" first would lose that match.
//
// Why this exists (RFC#340 Fix A): a canvas-created claude-code workspace
// with model "moonshot/kimi-k2.6" booted NOT_CONFIGURED — the adapter
// derived provider="moonshot" (slash-split of the model id) which is not
// in the providers registry. CP bakes `provider: platform` via heredoc,
// but the cp#329 config-bundle fetch overwrites /configs/config.yaml with
// THIS (previously providerless) bundle version, so molecule-runtime
// config.py re-derived the wrong provider. Stamping the manifest-derived
// provider here (mirroring CP's buildModelProviderYAML shape) makes the
// config the adapter reads carry the canonical provider.
//
// Reuses the SAME manifest path the config-SAVE validators use
// (providerRegistry() + Manifest.DeriveProvider; see
// model_registry_validation.go). On a derive MISS (unknown/unregistered
// model, or registry unavailable) provider is left empty and the field is
// omitted below — preserving today's behavior; never fail provisioning on
// a derive miss.
derivedProvider := deriveDefaultConfigProvider(runtime, model)
if runtime == "claude-code" {
model = normalizeClaudeCodeModel(model)
}
@@ -666,14 +640,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
// Model always at top level — config.py reads raw["model"] for all runtimes.
configYAML += fmt.Sprintf("model: %s\n", quoteModel)
// Stamp the manifest-derived provider at top level (mirroring CP's
// buildModelProviderYAML). Omitted entirely on a derive miss so the prior
// behavior — no `provider:` key, runtime re-derives — is preserved for
// unregistered models (requirement #3).
if derivedProvider != "" {
configYAML += fmt.Sprintf("provider: '%s'\n", yamlEscapeSingleQuotedProvider(derivedProvider))
}
// Add runtime_config. required_env is intentionally omitted — the
// platform injects secrets at container-start time via the secrets API,
// and preflight already validates that the env vars are present before
@@ -683,10 +649,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
if runtime == "claude-code" {
configYAML += fmt.Sprintf(" model: %s\n", quoteModel)
}
// Mirror the top-level provider under runtime_config (CP writes both).
if derivedProvider != "" {
configYAML += fmt.Sprintf(" provider: '%s'\n", yamlEscapeSingleQuotedProvider(derivedProvider))
}
configYAML += " timeout: 0\n"
files["config.yaml"] = []byte(configYAML)
@@ -695,48 +657,6 @@ func (h *WorkspaceHandler) ensureDefaultConfig(workspaceID string, payload model
return files
}
// deriveDefaultConfigProvider resolves the provider name the adapter should
// see for (runtime, model) using the SAME providers manifest the config-SAVE
// validators use (providerRegistry() + Manifest.DeriveProvider; see
// model_registry_validation.go). It is intentionally fail-OPEN: any miss
// (empty model, registry unavailable, unknown runtime, or a model the runtime
// does not own) returns "" so the caller omits the `provider:` field and the
// generated config keeps its pre-fix shape. It NEVER fails provisioning.
//
// `model` must be the FULL, un-normalized id (e.g. "moonshot/kimi-k2.6") so
// DeriveProvider's exact-id match resolves the canvas claude-code case to
// provider=platform. The availableAuthEnv arg is nil here — config-generation
// has no per-workspace auth context yet (secrets are injected at container
// start), matching the validators' nil call.
func deriveDefaultConfigProvider(runtime, model string) string {
if strings.TrimSpace(model) == "" {
return ""
}
m, err := providerRegistry()
if err != nil || m == nil {
// Registry unavailable (a build-time defect the gen/sync gates catch).
// Fail open — do not stamp a provider, do not block provisioning.
return ""
}
p, err := m.DeriveProvider(runtime, model, nil)
if err != nil {
// Unknown runtime (federation / non-first-party) or a model the
// runtime does not own. Either way, omit the provider and let the
// runtime fall back to its prior derivation — preserving today's
// behavior for unregistered models.
return ""
}
return p.Name
}
// yamlEscapeSingleQuotedProvider escapes a value for a YAML single-quoted
// scalar, mirroring CP's buildModelProviderYAML (a literal single quote is
// doubled). Provider names are registry-controlled identifiers, so this is a
// defense-in-depth measure rather than a hot path.
func yamlEscapeSingleQuotedProvider(v string) string {
return strings.ReplaceAll(v, "'", "''")
}
func normalizeClaudeCodeModel(model string) string {
model = strings.TrimSpace(model)
if before, after, ok := strings.Cut(model, "/"); ok && before != "" && after != "" {
@@ -363,74 +363,6 @@ runtime_config:
}
}
// TestEnsureDefaultConfig_StampsDerivedProvider pins RFC#340 Fix A: a
// canvas-created claude-code workspace with model "moonshot/kimi-k2.6" must
// have the manifest-derived provider stamped into config.yaml at BOTH the top
// level and under runtime_config, so the cp#329 config-bundle the adapter
// reads no longer leaves the runtime to slash-split "moonshot/..." → an
// unregistered provider="moonshot" (the original NOT_CONFIGURED boot). The
// canonical manifest exact-id-matches "moonshot/kimi-k2.6" to provider=platform.
func TestEnsureDefaultConfig_StampsDerivedProvider(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
files := handler.ensureDefaultConfig("ws-moonshot", models.CreateWorkspacePayload{
Name: "Kimi Agent",
Tier: 2,
Runtime: "claude-code",
Model: "moonshot/kimi-k2.6",
})
var parsed struct {
Model string `yaml:"model"`
Provider string `yaml:"provider"`
RuntimeConfig struct {
Model string `yaml:"model"`
Provider string `yaml:"provider"`
} `yaml:"runtime_config"`
}
if err := yaml.Unmarshal(files["config.yaml"], &parsed); err != nil {
t.Fatalf("generated YAML invalid: %v\n%s", err, files["config.yaml"])
}
if parsed.Provider != "platform" {
t.Errorf("top-level provider = %q, want platform\n%s", parsed.Provider, files["config.yaml"])
}
if parsed.RuntimeConfig.Provider != "platform" {
t.Errorf("runtime_config.provider = %q, want platform\n%s", parsed.RuntimeConfig.Provider, files["config.yaml"])
}
// The claude-code model normalization still strips the slash prefix.
if parsed.Model != "kimi-k2.6" {
t.Errorf("top-level model = %q, want kimi-k2.6\n%s", parsed.Model, files["config.yaml"])
}
}
// TestEnsureDefaultConfig_DeriveMissOmitsProvider pins requirement #3: a model
// the providers manifest does NOT recognize for the runtime (a derive miss)
// must NOT write any `provider:` key — neither top-level nor under
// runtime_config — preserving the pre-fix behavior (no empty `provider:`,
// provisioning never fails on a miss). "gpt-4o" is not a registered
// claude-code model, so DeriveProvider errors and the field is omitted.
func TestEnsureDefaultConfig_DeriveMissOmitsProvider(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
files := handler.ensureDefaultConfig("ws-derivemiss", models.CreateWorkspacePayload{
Name: "Unregistered Agent",
Tier: 1,
Runtime: "claude-code",
Model: "gpt-4o",
})
content := string(files["config.yaml"])
if strings.Contains(content, "provider:") {
t.Errorf("derive miss must NOT write any provider: key, got:\n%s", content)
}
// Sanity: a derive miss must still produce a valid, model-bearing config.
if !strings.Contains(content, `model: "gpt-4o"`) {
t.Errorf("derive miss should still render the model, got:\n%s", content)
}
}
func TestEnsureDefaultConfig_CustomModel(t *testing.T) {
broadcaster := newTestBroadcaster()
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
@@ -876,9 +876,8 @@ func (h *WorkspaceHandler) runRestartCycle(workspaceID string) {
h.provisionWorkspaceAutoSync(workspaceID, "", nil, payload)
// sendRestartContext is a one-way notification to the new container; safe
// to fire async — the next restart cycle won't depend on it completing.
// Tracked via h.goAsync so tests can wait for it via h.asyncWG before
// closing the sqlmock. Without this, untracked goroutines hit the restored
// mock and cause "was not expected" errors in parallel CI execution (mc#1264).
// Tracked via goAsync so the test harness can drain it before the
// global db.DB swap (sendRestartContext reads db.DB).
h.goAsync(func() { h.sendRestartContext(workspaceID, restartData) })
}
@@ -16,7 +16,7 @@ const SchemaVersion = 1
// Fingerprint is a stable content hash of the generated projection (schema
// version + provider catalog + runtime native sets). It changes iff the
// registry DATA changes (comment-only YAML edits do not churn it).
const Fingerprint = "a491f5ff8a17ef59"
const Fingerprint = "8f733b112695b926"
// GenProvider is the generated projection of one provider catalog entry —
// the subset a downstream consumer needs to derive + display a provider.
@@ -50,7 +50,7 @@ var Providers = []GenProvider{
{Name: "openai-api", DisplayName: "OpenAI API", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^openai-api[:/]", IsPlatform: false, UpstreamVendor: "openai"},
{Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"},
{Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"},
{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_API_KEY", "MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
{Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^mimo-", IsPlatform: false},
{Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^glm-", IsPlatform: false},
{Name: "kimi-coding", DisplayName: "Moonshot Kimi (coding-tuned)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^kimi-", IsPlatform: false},
@@ -89,9 +89,8 @@ var Runtimes = map[string][]GenRuntimeRef{
{Name: "platform", Models: []string{"openai/gpt-5.4", "openai/gpt-5.4-mini"}},
},
"google-adk": {
{Name: "platform", Models: []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"}},
{Name: "google", Models: []string{"gemini-2.5-pro", "gemini-2.5-flash"}},
{Name: "vertex", Models: []string{"vertex:gemini-2.5-pro"}},
{Name: "google", Models: []string{"gemini-2.5-pro"}},
},
"hermes": {
{Name: "kimi-coding", Models: []string{"kimi-coding/kimi-k2"}},
@@ -1,42 +0,0 @@
package providers
import "testing"
// Proper-SSOT (task #65): google-adk keyless Gemini resolves to the closed
// platform provider -> IsPlatform=true; BYOK AI Studio -> google. The
// platform: select ids are registered so workspace-create accepts them
// (was 422 UNREGISTERED_MODEL_FOR_RUNTIME).
func TestGoogleADK_PlatformGeminiResolvesToPlatform(t *testing.T) {
m, err := LoadManifest()
if err != nil {
t.Fatal(err)
}
for _, id := range []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"} {
p, err := m.DeriveProvider("google-adk", id, nil)
if err != nil {
t.Fatalf("%s: %v", id, err)
}
if p.Name != PlatformProviderName || !p.IsPlatform() {
t.Errorf("%s -> %q IsPlatform=%v; want platform", id, p.Name, p.IsPlatform())
}
}
p, err := m.DeriveProvider("google-adk", "gemini-2.5-pro", nil)
if err != nil {
t.Fatal(err)
}
if p.IsPlatform() || p.Name != "google" {
t.Errorf("gemini-2.5-pro -> %q IsPlatform=%v; want google byok", p.Name, p.IsPlatform())
}
models, _ := m.ModelsForRuntime("google-adk")
want := map[string]bool{"platform:gemini-2.5-pro": false, "platform:gemini-2.5-flash": false}
for _, id := range models {
if _, ok := want[id]; ok {
want[id] = true
}
}
for id, ok := range want {
if !ok {
t.Errorf("%s not registered for google-adk — create would 422", id)
}
}
}
@@ -292,7 +292,7 @@ providers:
# PR-1 simplification when only claude-code referenced platform.
base_url_template: "https://api.moleculesai.app/api/v1/internal/llm/openai/v1"
base_url_anthropic: "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1"
auth_env: [MOLECULE_LLM_USAGE_TOKEN]
auth_env: [ANTHROPIC_API_KEY, MOLECULE_LLM_USAGE_TOKEN]
auth_token_env: ANTHROPIC_API_KEY
# Adapter routes kimi- / moonshot/ through platform by default. No bare
# vendor prefix of its own; it multiplexes other vendors' slugs. Match
@@ -412,24 +412,14 @@ providers:
model_prefix_match: "^gemini-"
model_aliases: []
# Google Vertex AI — served via the Molecule CP LLM proxy (NOT on-box ADC).
# google-adk routes ALL Gemini through the proxy, which mints a Vertex token
# server-side over Workload Identity Federation (AWS -> GCP STS -> SA; see
# internal/vertexauth + llm_proxy.go google/vertex case) and meters usage to
# org credits. The former on-box ADC delivery (a NON-SECRET external_account
# cred-config written to /configs/gcp-adc.json via GOOGLE_APPLICATION_CREDENTIALS,
# injected by the provisioner) was REMOVED: a tenant has EC2 root and could
# have used that credential to call Vertex DIRECTLY, bypassing metering
# (keyless-Vertex billing leak — task #64 / RFC internal#763; provisioner
# force-off + template routes vertex: through the proxy). The `vertex:`
# namespace + this entry remain for proxy routing/billing of Vertex-upstream
# requests, distinct from the API-key `google` vendor's ^gemini-
# (TestNoAmbiguousModelMatch).
#
# NOTE: display_name ("keyless ADC") and auth_env (GOOGLE_APPLICATION_CREDENTIALS)
# are now VESTIGIAL — no consumer reads auth_env post-leak-fix, but it must stay
# non-empty (providers.go validate). Left as-is to keep this a comment-only,
# regen-free change; retiring them is a registry-regen follow-up.
# Google Vertex AI — KEYLESS arm (mirrors the anthropic-oauth / anthropic-api
# and openai-subscription / openai-api split: same vendor, distinct auth).
# google-adk serves Gemini via Vertex using Application Default Credentials
# over Workload Identity Federation (AWS EC2 role -> GCP STS -> SA), injected
# by the provisioner (cp#416 + envs.yaml vertex block) as a NON-SECRET
# external_account cred-config at GOOGLE_APPLICATION_CREDENTIALS. No API key.
# Distinct `vertex:` model namespace keeps it unambiguous vs the API-key
# `google` vendor's ^gemini- (TestNoAmbiguousModelMatch).
- name: vertex
display_name: "Google Vertex AI (keyless ADC)"
vendor_logo: "google"
@@ -854,24 +844,11 @@ runtimes:
# this runtimes entry declares the selectable model set.
google-adk:
providers:
# Platform-managed (keyless, metered) Gemini via the Molecule LLM proxy ->
# Vertex AI (server-side WIF mint; NO credential on the tenant box — the
# keyless-Vertex leak fix). Resolves to the closed `platform` provider ->
# IsPlatform=true -> platform_managed billing + required_env=[] (derived).
# The org-compliant default. The runtime translates the `platform:` select
# id to the bare wire id the proxy routes to Vertex.
- name: platform
models:
- platform:gemini-2.5-pro
- platform:gemini-2.5-flash
# API-key BYOK arm: AI Studio (the tenant's OWN GOOGLE_API_KEY).
- name: google
models:
- gemini-2.5-pro
- gemini-2.5-flash
# DEPRECATED transitional: vertex: ids stay registered until templates
# move to platform: (superseded by the platform arm above). Remove in a
# cleanup once no template references vertex:gemini-*.
# Keyless Vertex (org-compliant default): Gemini via Vertex AI + ADC/WIF.
- name: vertex
models:
- vertex:gemini-2.5-pro
- vertex:gemini-2.5-pro
# API-key BYOK arm: AI Studio GEMINI_API_KEY/GOOGLE_API_KEY.
- name: google
models:
- gemini-2.5-pro
@@ -29,7 +29,7 @@ import (
// canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
// synced from molecule-controlplane. Bumped deliberately on each re-sync (see
// file doc). Cross-checked live by the sync-providers-yaml CI workflow.
const canonicalProvidersYAMLSHA256 = "021ae082c2bbbbb61c406cae03205ac6b7fff160ae5976cfc64de3de676d02b2"
const canonicalProvidersYAMLSHA256 = "dec73199e26cee2d395a0acece99771618d3879dc5ca724ba57cb5b38079c6ce"
func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
sum := sha256.Sum256(embeddedYAML)