Compare commits
65 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 48b6011e17 | |||
| cc99d3fff4 | |||
| 71010e618a | |||
| 53ec08cbdb | |||
| d34d09db01 | |||
| d7484f7df4 | |||
| 1d88a6ed0e | |||
| 1818d03014 | |||
| 8812285932 | |||
| 48aebdfcc4 | |||
| 2304d84b46 | |||
| 484a257067 | |||
| 197409f10d | |||
| 6a44d8b175 | |||
| 79162509d0 | |||
| 9b19759ceb | |||
| acdb368a4f | |||
| 5f6b9b242e | |||
| 8517b8e776 | |||
| d0edc74dc0 | |||
| ddc4e8190c | |||
| aa9ea5f99f | |||
| a409d9032a | |||
| 911d9ce3c8 | |||
| 21268f0fe4 | |||
| 9fbb546815 | |||
| 53cd36dae2 | |||
| bfd1e560b5 | |||
| 724e2f0fcc | |||
| bebce13202 | |||
| d6958d89df | |||
| e8cf93b0e5 | |||
| a9ded9240f | |||
| 28519c6dbe | |||
| 1cb19df712 | |||
| 41b842cc98 | |||
| 99607e0f58 | |||
| 1019a8e2c6 | |||
| 556ec2fe4f | |||
| eaf1f5438e | |||
| 6e68244437 | |||
| 0016a39f5c | |||
| 376e6ab89a | |||
| 8186baf902 | |||
| 4f26ed69c3 | |||
| aa7bc922d7 | |||
| 3d9e15e1c6 | |||
| 274f5be98d | |||
| 185ff47fed | |||
| 293bfb6abe | |||
| 57fe54ccc3 | |||
| 5a41dc330c | |||
| e13031a951 | |||
| 3b19919a95 | |||
| e9de8af66c | |||
| 88ee252c28 | |||
| b55ee4705c | |||
| f2f25657b3 | |||
| 7c455027d9 | |||
| 78d6cb9d4b | |||
| 8bd00bb94a | |||
| b55f1be46c | |||
| dac6e3046b | |||
| 038159a610 | |||
| 3fccfad3ae |
@@ -8,7 +8,8 @@ pair diverges.
|
||||
Sources:
|
||||
A. `.gitea/workflows/ci.yml` jobs (CI source — the actual job set)
|
||||
B. `status_check_contexts` in branch_protections (the merge gate)
|
||||
C. `REQUIRED_CHECKS` env in audit-force-merge.yml (the audit env)
|
||||
C. `REQUIRED_CHECKS_JSON` (preferred) or `REQUIRED_CHECKS` (legacy)
|
||||
env in audit-force-merge.yml (the audit env)
|
||||
|
||||
Three failure classes:
|
||||
F1 Job in (A) is not under the sentinel's `needs:` — sentinel
|
||||
@@ -250,13 +251,21 @@ def sentinel_needs(ci_doc: dict) -> set[str]:
|
||||
return set(needs)
|
||||
|
||||
|
||||
def required_checks_env(audit_doc: dict) -> set[str]:
|
||||
"""Pull the REQUIRED_CHECKS env value from audit-force-merge.yml.
|
||||
def required_checks_env(audit_doc: dict, branch: str) -> set[str]:
|
||||
"""Pull the required-checks env value from audit-force-merge.yml.
|
||||
|
||||
Walks the YAML AST per `feedback_behavior_based_ast_gates`: we do
|
||||
NOT grep for `REQUIRED_CHECKS:` — that breaks under reformatting,
|
||||
NOT grep for env keys — that breaks under reformatting,
|
||||
multi-job workflows, or a future move of the env to a different
|
||||
step. Instead, look inside every job's every step's `env:` map."""
|
||||
found: list[str] = []
|
||||
step. Instead, look inside every job's every step's `env:` map.
|
||||
|
||||
Supports two variants:
|
||||
- REQUIRED_CHECKS_JSON (preferred): JSON dict keyed by branch name.
|
||||
We extract the array for the target branch.
|
||||
- REQUIRED_CHECKS (legacy): newline-separated list of context names.
|
||||
"""
|
||||
found_json: list[str] = []
|
||||
found_legacy: list[str] = []
|
||||
jobs = audit_doc.get("jobs", {})
|
||||
if not isinstance(jobs, dict):
|
||||
sys.stderr.write(f"::warning::{AUDIT_WORKFLOW_PATH} has no jobs: mapping\n")
|
||||
@@ -268,27 +277,67 @@ def required_checks_env(audit_doc: dict) -> set[str]:
|
||||
if not isinstance(step, dict):
|
||||
continue
|
||||
step_env = step.get("env") or {}
|
||||
if isinstance(step_env, dict) and "REQUIRED_CHECKS" in step_env:
|
||||
v = step_env["REQUIRED_CHECKS"]
|
||||
if isinstance(v, str):
|
||||
found.append(v)
|
||||
if not found:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS env not found in any step of "
|
||||
f"{AUDIT_WORKFLOW_PATH}\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
if len(found) > 1:
|
||||
# Defensive: refuse to guess which one is canonical.
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS env present in {len(found)} steps; ambiguous\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
raw = found[0]
|
||||
# YAML block-scalars (`|`) leave a trailing newline + blanks; trim
|
||||
# consistently with audit-force-merge.sh's parser so both sides
|
||||
# produce identical sets.
|
||||
return {line.strip() for line in raw.splitlines() if line.strip()}
|
||||
if isinstance(step_env, dict):
|
||||
if "REQUIRED_CHECKS_JSON" in step_env:
|
||||
v = step_env["REQUIRED_CHECKS_JSON"]
|
||||
if isinstance(v, str):
|
||||
found_json.append(v)
|
||||
if "REQUIRED_CHECKS" in step_env:
|
||||
v = step_env["REQUIRED_CHECKS"]
|
||||
if isinstance(v, str):
|
||||
found_legacy.append(v)
|
||||
|
||||
# JSON variant takes precedence.
|
||||
if found_json:
|
||||
if len(found_json) > 1:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON env present in {len(found_json)} steps; ambiguous\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
try:
|
||||
parsed = json.loads(found_json[0])
|
||||
except json.JSONDecodeError as e:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON is not valid JSON: {e}\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
if not isinstance(parsed, dict):
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON parsed to {type(parsed).__name__}, expected dict\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
branch_checks = parsed.get(branch)
|
||||
if branch_checks is None:
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON has no entry for branch '{branch}'\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
if not isinstance(branch_checks, list):
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS_JSON['{branch}'] is {type(branch_checks).__name__}, expected list\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
return {str(item).strip() for item in branch_checks if str(item).strip()}
|
||||
|
||||
# Legacy variant fallback.
|
||||
if found_legacy:
|
||||
if len(found_legacy) > 1:
|
||||
# Defensive: refuse to guess which one is canonical.
|
||||
sys.stderr.write(
|
||||
f"::error::REQUIRED_CHECKS env present in {len(found_legacy)} steps; ambiguous\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
raw = found_legacy[0]
|
||||
# YAML block-scalars (`|`) leave a trailing newline + blanks; trim
|
||||
# consistently with audit-force-merge.sh's parser so both sides
|
||||
# produce identical sets.
|
||||
return {line.strip() for line in raw.splitlines() if line.strip()}
|
||||
|
||||
sys.stderr.write(
|
||||
f"::error::Neither REQUIRED_CHECKS_JSON nor REQUIRED_CHECKS env found in any step of "
|
||||
f"{AUDIT_WORKFLOW_PATH}\n"
|
||||
)
|
||||
sys.exit(3)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
@@ -330,7 +379,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
|
||||
jobs = ci_job_names(ci_doc)
|
||||
jobs_all = ci_jobs_all(ci_doc)
|
||||
needs = sentinel_needs(ci_doc)
|
||||
env_set = required_checks_env(audit_doc)
|
||||
env_set = required_checks_env(audit_doc, branch)
|
||||
|
||||
# Protection
|
||||
# api() raises ApiError on non-2xx. Transient 5xx should fail loud.
|
||||
@@ -524,7 +573,7 @@ def render_body(branch: str, findings: list[str], debug: dict) -> str:
|
||||
"- **F2**: rename the protection context to match an emitter, "
|
||||
"or remove it from `status_check_contexts` "
|
||||
"(PATCH `/api/v1/repos/{owner}/{repo}/branch_protections/{branch}`).",
|
||||
"- **F3a / F3b**: bring `REQUIRED_CHECKS` env in "
|
||||
"- **F3a / F3b**: bring `REQUIRED_CHECKS_JSON` (or `REQUIRED_CHECKS` legacy) env in "
|
||||
"`.gitea/workflows/audit-force-merge.yml` into set-equality with "
|
||||
"`status_check_contexts` (single PR, both files).",
|
||||
"",
|
||||
|
||||
@@ -26,6 +26,10 @@ PROFILES: dict[str, dict[str, str]] = {
|
||||
"handlers": (
|
||||
r"^workspace-server/internal/handlers/"
|
||||
r"|^workspace-server/internal/wsauth/"
|
||||
# #2149: the scheduler real-PG integration tests run in this same
|
||||
# workflow (they reuse its migrated Postgres), so changes to the
|
||||
# scheduler package must trigger the job too.
|
||||
r"|^workspace-server/internal/scheduler/"
|
||||
r"|^workspace-server/migrations/"
|
||||
r"|^\.gitea/workflows/handlers-postgres-integration\.yml$"
|
||||
),
|
||||
@@ -174,3 +178,4 @@ def main(argv: list[str]) -> int:
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv[1:]))
|
||||
|
||||
|
||||
@@ -1228,10 +1228,13 @@ def main(argv: list[str] | None = None) -> int:
|
||||
)
|
||||
|
||||
na_desc = ", ".join(sorted(na_descs)) if na_descs else "(none)"
|
||||
na_status_state = "success" if na_descs else "pending"
|
||||
# internal#818: na-declarations is an informational context, not a merge
|
||||
# gate. An empty declaration list is a terminal success state — pending
|
||||
# here poisons the PR combined status.
|
||||
na_status_state = "success"
|
||||
# review-check.sh reads the description to discover which gates are N/A.
|
||||
# Include the gate names so it can grep for them.
|
||||
na_description = f"N/A: {na_desc}" if na_descs else "N/A: (none)"
|
||||
na_description = f"N/A: {na_desc}"
|
||||
|
||||
if not args.dry_run:
|
||||
client.post_status(
|
||||
|
||||
@@ -114,6 +114,19 @@ if [ -z "$WHOAMI" ]; then
|
||||
fi
|
||||
echo "::notice::token resolves to user: $WHOAMI"
|
||||
|
||||
# 0.5 Read PR head SHA so we can reject stale approvals after head moves
|
||||
# (internal#816). Reviews carry the commit_id they were submitted against.
|
||||
HEAD_SHA=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/pulls/${PR_NUMBER}" | jq -r '.head.sha // ""') || true
|
||||
if [ -z "$HEAD_SHA" ]; then
|
||||
echo "::error::Failed to fetch PR head SHA — token may be invalid."
|
||||
if [ "${SOP_FAIL_OPEN:-}" = "1" ]; then
|
||||
echo "::warning::SOP_FAIL_OPEN=1 — exiting 0 so CI does not block."
|
||||
exit 0
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
debug "pr-head-sha=$HEAD_SHA"
|
||||
|
||||
# 1. Read tier label. || true ensures set -euo pipefail does not abort the
|
||||
# script if curl or jq fails (e.g. 401 from empty token).
|
||||
LABELS=$(curl -sS -H "$AUTH" "${API}/repos/${OWNER}/${NAME}/issues/${PR_NUMBER}/labels" | jq -r '.[].name') || true
|
||||
@@ -265,7 +278,7 @@ if [ $_REVIEWS_EXIT -ne 0 ] || [ -z "$REVIEWS" ]; then
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
APPROVERS=$(echo "$REVIEWS" | jq -r '[.[] | select(.state=="APPROVED") | .user.login] | unique | .[]') || true
|
||||
APPROVERS=$(echo "$REVIEWS" | jq -r --arg head_sha "$HEAD_SHA" '[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]') || true
|
||||
if [ -z "$APPROVERS" ]; then
|
||||
echo "::error::No approving reviews on this PR. Set SOP_DEBUG=1 and re-run for diagnostics."
|
||||
exit 1
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import importlib.util
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
@@ -36,6 +37,76 @@ def _make_audit_doc(required_checks: list[str]) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _make_audit_doc_json(required_checks_json: dict) -> dict:
|
||||
return {
|
||||
"jobs": {
|
||||
"audit": {
|
||||
"steps": [
|
||||
{"env": {"REQUIRED_CHECKS_JSON": json.dumps(required_checks_json)}}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# required_checks_env — dual-variant parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_required_checks_env_prefers_json_over_legacy():
|
||||
doc = {
|
||||
"jobs": {
|
||||
"audit": {
|
||||
"steps": [
|
||||
{
|
||||
"env": {
|
||||
"REQUIRED_CHECKS_JSON": json.dumps(
|
||||
{"main": ["ctx-a"], "staging": ["ctx-b"]}
|
||||
),
|
||||
"REQUIRED_CHECKS": "ctx-legacy\nctx-old",
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
assert drift.required_checks_env(doc, "main") == {"ctx-a"}
|
||||
assert drift.required_checks_env(doc, "staging") == {"ctx-b"}
|
||||
|
||||
|
||||
def test_required_checks_env_falls_back_to_legacy():
|
||||
doc = _make_audit_doc(["legacy-ctx"])
|
||||
assert drift.required_checks_env(doc, "main") == {"legacy-ctx"}
|
||||
|
||||
|
||||
def test_required_checks_env_json_missing_branch_fails():
|
||||
doc = _make_audit_doc_json({"staging": ["ctx-b"]})
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
def test_required_checks_env_json_malformed_fails():
|
||||
doc = {
|
||||
"jobs": {
|
||||
"audit": {
|
||||
"steps": [
|
||||
{"env": {"REQUIRED_CHECKS_JSON": "not-json"}}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
try:
|
||||
drift.required_checks_env(doc, "main")
|
||||
except SystemExit as exc:
|
||||
assert exc.code == 3
|
||||
else:
|
||||
raise AssertionError("expected SystemExit(3)")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# sentinel_needs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -571,3 +571,44 @@ def test_current_branch_head_none_without_token():
|
||||
def test_current_branch_head_none_on_non_200(monkeypatch):
|
||||
monkeypatch.setattr(prod, "_api_json_optional", lambda _u, _t: (500, None))
|
||||
assert prod.current_branch_head({"GITEA_TOKEN": "secret"}) is None
|
||||
|
||||
|
||||
# --- #2213: superseded check must fire BEFORE production side effects ----------
|
||||
#
|
||||
# Real incident shape: two main pushes land ~2 min apart. The OLDER deploy job
|
||||
# (GITHUB_SHA=7a72516, target staging-7a72516) started LATE — main head was
|
||||
# already 7f25373. The #2194 guard only protected the *verify* step, so the
|
||||
# older job still:
|
||||
# 1. rolled the canary (hongming) BACKWARD to staging-7a72516 (the #2213 red,
|
||||
# seen as the newer job's verify reading hongming on the old SHA), then
|
||||
# 2. promoted :latest backward to the older image,
|
||||
# before finally skipping verify. The workflow now calls this same superseded
|
||||
# check BEFORE the redeploy + promote steps and gates both off when it fires.
|
||||
# These tests pin the contract that check-superseded relies on for the exact
|
||||
# incident shape.
|
||||
|
||||
|
||||
def test_superseded_by_fires_for_older_job_when_newer_already_head(monkeypatch):
|
||||
# Older job (7a72516) re-checks the head just before rollout and finds the
|
||||
# newer merge (7f25373) already owns main -> superseded -> skip side effects.
|
||||
monkeypatch.setattr(
|
||||
prod, "current_branch_head", lambda _env: "7f25373309eca54a36f08c371ff783c3a47c3f8d"
|
||||
)
|
||||
newer = prod.superseded_by(
|
||||
{"GITHUB_SHA": "7a72516f7e7ba1a710c4f393fef08be8d22e1866"}
|
||||
)
|
||||
assert newer == "7f25373309eca54a36f08c371ff783c3a47c3f8d"
|
||||
|
||||
|
||||
def test_superseded_by_none_for_latest_job_so_it_still_rolls(monkeypatch):
|
||||
# The newer job (7f25373) IS the head -> NOT superseded -> it proceeds to
|
||||
# roll the fleet and verify, so a genuinely-behind tenant still fails loud.
|
||||
monkeypatch.setattr(
|
||||
prod, "current_branch_head", lambda _env: "7f25373309eca54a36f08c371ff783c3a47c3f8d"
|
||||
)
|
||||
assert (
|
||||
prod.superseded_by(
|
||||
{"GITHUB_SHA": "7f25373309eca54a36f08c371ff783c3a47c3f8d"}
|
||||
)
|
||||
is None
|
||||
)
|
||||
|
||||
@@ -1299,3 +1299,108 @@ class TestGetCIStatus(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
sop.get_ci_status(client, "o", "r", "sha1"), "unknown"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# internal#818 — na-declarations status must be terminal success
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestNaDeclarationsStatusTerminal(unittest.TestCase):
|
||||
"""Regression for internal#818: the na-declarations context is
|
||||
informational, not a merge gate. An empty N/A declaration list must
|
||||
post `success` (not `pending`) so it does not poison the PR combined
|
||||
status."""
|
||||
|
||||
def _run_with_fake_client(self, fake_client_class):
|
||||
"""Swap GiteaClient temporarily and invoke main() with a fake token."""
|
||||
orig_client = sop.GiteaClient
|
||||
orig_token = os.environ.get("GITEA_TOKEN")
|
||||
try:
|
||||
sop.GiteaClient = fake_client_class
|
||||
os.environ["GITEA_TOKEN"] = "fake-token"
|
||||
return sop.main([
|
||||
"--owner", "o", "--repo", "r", "--pr", "1",
|
||||
"--config", CONFIG_PATH,
|
||||
"--gitea-host", "git.example.com",
|
||||
])
|
||||
finally:
|
||||
sop.GiteaClient = orig_client
|
||||
if orig_token is None:
|
||||
os.environ.pop("GITEA_TOKEN", None)
|
||||
else:
|
||||
os.environ["GITEA_TOKEN"] = orig_token
|
||||
|
||||
def test_empty_na_descriptions_posts_success(self):
|
||||
posted = []
|
||||
|
||||
class FakeClient(sop.GiteaClient):
|
||||
def get_pr(self, owner, repo, pr):
|
||||
return {
|
||||
"state": "open",
|
||||
"user": {"login": "alice"},
|
||||
"head": {"sha": "abc123"},
|
||||
"labels": [],
|
||||
}
|
||||
|
||||
def get_issue_comments(self, owner, repo, issue, max_comments=None):
|
||||
return []
|
||||
|
||||
def resolve_team_id(self, org, team_name):
|
||||
return None
|
||||
|
||||
def is_team_member(self, team_id, login):
|
||||
return False
|
||||
|
||||
def post_status(self, owner, repo, sha, state, context,
|
||||
description, target_url=""):
|
||||
posted.append({
|
||||
"state": state,
|
||||
"context": context,
|
||||
"description": description,
|
||||
})
|
||||
|
||||
rc = self._run_with_fake_client(FakeClient)
|
||||
self.assertEqual(rc, 0)
|
||||
na_posts = [p for p in posted if "na-declarations" in p["context"]]
|
||||
self.assertEqual(len(na_posts), 1, f"expected one na-declarations post, got {posted}")
|
||||
self.assertEqual(na_posts[0]["state"], "success")
|
||||
self.assertEqual(na_posts[0]["description"], "N/A: (none)")
|
||||
|
||||
def test_populated_na_descriptions_posts_success(self):
|
||||
posted = []
|
||||
|
||||
class FakeClient(sop.GiteaClient):
|
||||
def get_pr(self, owner, repo, pr):
|
||||
return {
|
||||
"state": "open",
|
||||
"user": {"login": "alice"},
|
||||
"head": {"sha": "abc123"},
|
||||
"labels": [],
|
||||
}
|
||||
|
||||
def get_issue_comments(self, owner, repo, issue, max_comments=None):
|
||||
return [
|
||||
{"user": {"login": "bob"}, "body": "/sop-n/a qa-review N/A: docs-only"},
|
||||
]
|
||||
|
||||
def resolve_team_id(self, org, team_name):
|
||||
return 1
|
||||
|
||||
def is_team_member(self, team_id, login):
|
||||
return True
|
||||
|
||||
def post_status(self, owner, repo, sha, state, context,
|
||||
description, target_url=""):
|
||||
posted.append({
|
||||
"state": state,
|
||||
"context": context,
|
||||
"description": description,
|
||||
})
|
||||
|
||||
rc = self._run_with_fake_client(FakeClient)
|
||||
self.assertEqual(rc, 0)
|
||||
na_posts = [p for p in posted if "na-declarations" in p["context"]]
|
||||
self.assertEqual(len(na_posts), 1)
|
||||
self.assertEqual(na_posts[0]["state"], "success")
|
||||
self.assertIn("qa-review", na_posts[0]["description"])
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env bash
|
||||
# Regression test for internal#816 — sop-tier-check must ignore APPROVED
|
||||
# reviews that were submitted against an old PR head SHA.
|
||||
#
|
||||
# Bug: the script collected approvers with
|
||||
# jq '[.[] | select(.state=="APPROVED") | .user.login]'
|
||||
# without filtering on .commit_id == HEAD_SHA. After a PR head moved,
|
||||
# stale approvals looked valid to the tier gate.
|
||||
#
|
||||
# Fix: the jq filter now includes
|
||||
# select(.state=="APPROVED" and .commit_id == $head_sha)
|
||||
# where $head_sha is the current PR head fetched from the API.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# jq may not be on PATH in all environments (e.g. dev containers).
|
||||
PATH="/tmp/bin:$PATH"
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1"
|
||||
local expected="$2"
|
||||
local got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# Sample reviews matching the shape from Gitea API
|
||||
REVIEWS_JSON='[
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"bob"}},
|
||||
{"state":"APPROVED","commit_id":"old456","user":{"login":"alice"}},
|
||||
{"state":"COMMENT","commit_id":"abc123","user":{"login":"carol"}},
|
||||
{"state":"APPROVED","commit_id":"abc123","user":{"login":"dave"}},
|
||||
{"state":"REQUEST_CHANGES","commit_id":"abc123","user":{"login":"eve"}}
|
||||
]'
|
||||
|
||||
echo "test: jq filter keeps only APPROVED on current head"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "current-head approvers" "bob dave" "$(echo "$GOT" | tr '\n' ' ' | sed 's/ $//')"
|
||||
|
||||
echo "test: jq filter with all-stale reviews yields empty"
|
||||
GOT=$(echo "$REVIEWS_JSON" | jq -r --arg head_sha "new789" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "all-stale yields empty" "" "$GOT"
|
||||
|
||||
echo "test: jq filter handles null commit_id gracefully"
|
||||
NULL_JSON='[{"state":"APPROVED","commit_id":null,"user":{"login":"mallory"}}]'
|
||||
GOT=$(echo "$NULL_JSON" | jq -r --arg head_sha "abc123" \
|
||||
'[.[] | select(.state=="APPROVED" and .commit_id == $head_sha) | .user.login] | unique | .[]')
|
||||
assert_eq "null commit_id excluded" "" "$GOT"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -96,6 +96,7 @@ env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# bp-exempt: advisory arm64 pilot, non-gating by design (internal#418).
|
||||
fast-checks:
|
||||
name: fast-checks
|
||||
# AND-set: only the Mac arm64 runner advertises macos-self-hosted.
|
||||
|
||||
+37
-38
@@ -25,10 +25,9 @@
|
||||
# sufficient for `actions/checkout` against this same repo.
|
||||
#
|
||||
# 4. Docs — no docs/scripts reference github.com URLs that need swapping.
|
||||
# The canvas-deploy-reminder step writes a `ghcr.io/...` image
|
||||
# reference into the step summary text — that's documentation prose
|
||||
# pointing at the ECR-mirrored canvas image and stays unchanged for
|
||||
# this port (a separate cleanup if ghcr→ECR sweep is in scope).
|
||||
# The canvas-deploy-status step (core#2226, formerly canvas-deploy-reminder)
|
||||
# writes the canvas ordered-deploy status into the step summary; it points
|
||||
# at the ECR canvas image and the publish workflow, no ghcr.io prose.
|
||||
#
|
||||
# Cross-links:
|
||||
# - RFC: internal#219 (CI/CD hard-gate hardening)
|
||||
@@ -389,61 +388,61 @@ jobs:
|
||||
|
||||
# mc#959 root-fix (sre)
|
||||
|
||||
canvas-deploy-reminder:
|
||||
name: Canvas Deploy Reminder
|
||||
canvas-deploy-status:
|
||||
# core#2226: replaces the old advisory "Canvas Deploy Reminder". The canvas
|
||||
# image now has a real ORDERED auto-deploy (publish-canvas-image.yml:
|
||||
# build → push :staging-<sha> → wait green main CI → promote :latest by
|
||||
# digest), and docker-compose pins via CANVAS_IMAGE_TAG. There is no longer
|
||||
# a manual "go run docker compose pull by hand" step to remind operators
|
||||
# about — so this job just records, on a canvas-touching main push, that the
|
||||
# ordered deploy is handling it (and where to watch), instead of prescribing
|
||||
# a manual action that determinism made obsolete.
|
||||
name: Canvas Deploy Status
|
||||
runs-on: docker-host
|
||||
# mc#1982 root-fix: added job-level `if:` so ci-required-drift.py's
|
||||
# ci_job_names() detects this as github.ref-gated and skips it from F1.
|
||||
# The step-level exit 0 handles the "not main push" case; the job-level
|
||||
# `if:` makes the gating explicit so the drift script sees it.
|
||||
# Runs on both main and staging pushes; step exits 0 when not applicable.
|
||||
# Job-level `if:` so ci-required-drift.py's ci_job_names() detects this as
|
||||
# github.ref-gated and skips it from the required-context F1 set (mc#1982).
|
||||
# Step-level exit 0 handles the "not a canvas main push" case.
|
||||
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/staging' }}
|
||||
needs: [changes, canvas-build]
|
||||
steps:
|
||||
- name: Write deploy reminder to step summary
|
||||
- name: Record canvas ordered-deploy status
|
||||
env:
|
||||
COMMIT_SHA: ${{ github.sha }}
|
||||
CANVAS_CHANGED: ${{ needs.changes.outputs.canvas }}
|
||||
EVENT_NAME: ${{ github.event_name }}
|
||||
REF_NAME: ${{ github.ref }}
|
||||
# github.server_url resolves via the workflow-level env override
|
||||
# to the Gitea instance, so the RUN_URL points at the Gitea run
|
||||
# page (not github.com). See feedback_act_runner_github_server_url.
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
# github.server_url resolves via the workflow-level env override to the
|
||||
# Gitea instance, so RUN_URL points at the Gitea run page (not github.com).
|
||||
RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ "$CANVAS_CHANGED" != "true" ] || [ "$EVENT_NAME" != "push" ] || [ "$REF_NAME" != "refs/heads/main" ]; then
|
||||
echo "Canvas deploy reminder not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
|
||||
echo "Canvas deploy status not applicable for event=$EVENT_NAME ref=$REF_NAME canvas_changed=$CANVAS_CHANGED."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Write body to a temp file — avoids backtick escaping in shell.
|
||||
cat > /tmp/deploy-reminder.md << 'BODY'
|
||||
## Canvas build passed — deploy required
|
||||
cat > /tmp/deploy-status.md << 'BODY'
|
||||
## Canvas ordered deploy in progress — no manual action required
|
||||
|
||||
The `publish-canvas-image` workflow is now building a fresh Docker image
|
||||
(`ghcr.io/molecule-ai/canvas:latest`) in the background.
|
||||
This canvas-touching main push triggers `publish-canvas-image`, which now
|
||||
runs an ORDERED, CI-gated deploy (core#2226) — the same shape as the
|
||||
platform's deploy-production:
|
||||
|
||||
Once it completes (~3–5 min), apply on the host machine with:
|
||||
```bash
|
||||
cd <runner-workspace>
|
||||
git pull origin main
|
||||
docker compose pull canvas && docker compose up -d canvas
|
||||
```
|
||||
1. Build → push `molecule-ai/canvas:staging-<sha>` + `:staging-latest`.
|
||||
2. Wait for green main CI on this SHA.
|
||||
3. Promote `:latest` to the verified `:staging-<sha>` by digest.
|
||||
|
||||
If you need to rebuild from local source instead (e.g. testing unreleased
|
||||
changes or a new `NEXT_PUBLIC_*` URL), use:
|
||||
```bash
|
||||
docker compose build canvas && docker compose up -d canvas
|
||||
```
|
||||
Tenants/hosts pin via `CANVAS_IMAGE_TAG` (default `latest` = the last
|
||||
CI-green build), so a deploy is reproducible — no hand-run
|
||||
`docker compose pull` needed. Watch the run in the canvas publish workflow.
|
||||
BODY
|
||||
printf '\n> Posted automatically by CI · commit `%s` · [build log](%s)\n' \
|
||||
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-reminder.md
|
||||
printf '\n> Posted automatically by CI · commit `%s` · [publish workflow](%s)\n' \
|
||||
"$COMMIT_SHA" "$RUN_URL" >> /tmp/deploy-status.md
|
||||
|
||||
# Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY,
|
||||
# which both GitHub Actions and Gitea Actions render as the
|
||||
# workflow run's summary page. (#75 / PR-D)
|
||||
cat /tmp/deploy-reminder.md >> "$GITHUB_STEP_SUMMARY"
|
||||
# Gitea has no commit-comments API; write to GITHUB_STEP_SUMMARY, which
|
||||
# both GitHub and Gitea Actions render as the run's summary page.
|
||||
cat /tmp/deploy-status.md >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
# Python Lint & Test — required check, always runs.
|
||||
# Runtime Python moved to molecule-ai-workspace-runtime. Keep this context as
|
||||
|
||||
@@ -327,7 +327,12 @@ jobs:
|
||||
# start-redis steps point at this run's per-run host ports.
|
||||
./platform-server > platform.log 2>&1 &
|
||||
echo $! > platform.pid
|
||||
- name: Wait for /health
|
||||
- name: Wait for /health (with migration completion gate)
|
||||
# Issue #2205: 30 one-second probes is insufficient when the migration
|
||||
# chain is still running; /health can flip true before migrations
|
||||
# finish, so subsequent steps that touch the DB fail. Hybrid fix:
|
||||
# bump timeout to 300s AND gate exit on the same workspaces-table
|
||||
# existence check the downstream "Assert migrations applied" uses.
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
# Readiness signal: the platform binds /health only AFTER the full
|
||||
@@ -343,13 +348,21 @@ jobs:
|
||||
# background platform-server process has exited (e.g. a broken
|
||||
# migration crashed it), we stop and fail loudly at once instead of
|
||||
# waiting out the whole budget.
|
||||
DEADLINE_SECS=180 # cold-start + full migration chain headroom
|
||||
#
|
||||
# Issue #2205: /health can flip true before migrations finish on a
|
||||
# growing chain, so we gate exit on the workspaces-table existence
|
||||
# check the downstream "Assert migrations applied" uses.
|
||||
DEADLINE_SECS=300 # cold-start + full migration chain headroom
|
||||
PLATFORM_PID="$(cat workspace-server/platform.pid 2>/dev/null || true)"
|
||||
start=$(date +%s)
|
||||
while :; do
|
||||
if curl -sf "$BASE/health" > /dev/null; then
|
||||
echo "Platform healthy after $(( $(date +%s) - start ))s"
|
||||
exit 0
|
||||
tables=$(docker exec "$PG_CONTAINER" psql -U dev -d molecule -tAc \
|
||||
"SELECT count(*) FROM information_schema.tables WHERE table_schema='public' AND table_name='workspaces'" 2>/dev/null || echo "0")
|
||||
if [ "$tables" = "1" ]; then
|
||||
echo "Platform healthy + migrations applied after $(( $(date +%s) - start ))s"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
# Fast-fail: if the platform process died, /health will never come.
|
||||
if [ -n "$PLATFORM_PID" ] && ! kill -0 "$PLATFORM_PID" 2>/dev/null; then
|
||||
@@ -358,12 +371,13 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
if [ "$(( $(date +%s) - start ))" -ge "$DEADLINE_SECS" ]; then
|
||||
echo "::error::Platform did not become healthy within ${DEADLINE_SECS}s — see log below"
|
||||
echo "::error::Platform did not become healthy with migrations applied within ${DEADLINE_SECS}s — see log below"
|
||||
cat workspace-server/platform.log || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
- name: Assert migrations applied
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
|
||||
@@ -126,6 +126,7 @@ jobs:
|
||||
# push/dispatch/cron only (30+ min). This is NOT a fake-green mask of
|
||||
# the real assertion — it validates the driving script's bash syntax
|
||||
# and inline-python so a broken test script fails at PR time.
|
||||
# bp-required: pending #1296 — PR emitter, not yet required (tracked in #1296).
|
||||
pr-validate:
|
||||
name: E2E Peer Visibility
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -167,16 +167,30 @@ jobs:
|
||||
- if: needs.detect-changes.outputs.canvas == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify admin token present
|
||||
# Skip-if-absent (core#2225), mirroring the serving-e2e gate's
|
||||
# skip-if-secret-unset contract: a MISSING CI secret is an operator
|
||||
# CONFIG gap, not a code regression, so it must not paint this E2E
|
||||
# red. When CP_STAGING_ADMIN_API_TOKEN is unset we emit a LOUD
|
||||
# ::warning:: + ::notice:: and skip the real provision/test steps (the
|
||||
# job still completes green). When the secret IS present we run the
|
||||
# full suite exactly as before. Operators: set
|
||||
# CP_STAGING_ADMIN_API_TOKEN as a repo/org Actions secret on
|
||||
# molecule-core to actually exercise this E2E.
|
||||
- name: Check admin token (skip-if-absent)
|
||||
id: token_check
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::Missing CP_STAGING_ADMIN_API_TOKEN"
|
||||
exit 2
|
||||
echo "::warning::CP_STAGING_ADMIN_API_TOKEN is not set on this runner — SKIPPING the staging canvas E2E (cannot auth to staging CP). This is an operator config gap, not a code failure; set the secret on molecule-core (repo or org Actions secrets) to run it. See core#2225."
|
||||
echo "::notice::E2E Staging Canvas skipped: CP_STAGING_ADMIN_API_TOKEN absent."
|
||||
echo "present=false" >> "$GITHUB_OUTPUT"
|
||||
else
|
||||
echo "CP_STAGING_ADMIN_API_TOKEN present ✓ — running staging canvas E2E."
|
||||
echo "present=true" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Set up Node
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: '20'
|
||||
@@ -184,11 +198,11 @@ jobs:
|
||||
cache-dependency-path: canvas/package-lock.json
|
||||
|
||||
- name: Install canvas deps
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
run: npm ci
|
||||
|
||||
- name: Install Playwright browsers
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
PREBAKED_PLAYWRIGHT=/ms-playwright
|
||||
@@ -200,7 +214,7 @@ jobs:
|
||||
npx playwright install --with-deps chromium
|
||||
|
||||
- name: Run staging canvas E2E
|
||||
if: needs.detect-changes.outputs.canvas == 'true'
|
||||
if: needs.detect-changes.outputs.canvas == 'true' && steps.token_check.outputs.present == 'true'
|
||||
run: npx playwright test --config=playwright.staging.config.ts
|
||||
|
||||
- name: Upload Playwright report on failure
|
||||
|
||||
@@ -0,0 +1,196 @@
|
||||
name: E2E Staging Reconciler (heals terminated EC2)
|
||||
|
||||
# Live staging proof for the core#2261 instance-state reconciler
|
||||
# (workspace-server/internal/registry/cp_instance_reconciler.go). The
|
||||
# real-infra complement to the deterministic unit tests: provisions a real
|
||||
# staging workspace, TERMINATES its EC2, and asserts the reconciler flips it
|
||||
# off 'online' (PRIMARY gate) and auto-reprovisions on a new instance_id
|
||||
# (SECONDARY, best-effort). See
|
||||
# tests/e2e/test_reconciler_heals_terminated_instance.sh for the assertion
|
||||
# contract + timeouts.
|
||||
#
|
||||
# Modeled on e2e-staging-saas.yml. Same secrets + same Gitea-port caveats:
|
||||
# - Dropped workflow_dispatch.inputs (Gitea 1.22.6 parser rejects them).
|
||||
# - Dropped merge_group / environment (no Gitea equivalent).
|
||||
# - Workflow-level env.GITHUB_SERVER_URL pinned per
|
||||
# feedback_act_runner_github_server_url.
|
||||
#
|
||||
# NOT a required check (yet). This is a brand-new live E2E that provisions +
|
||||
# terminates real EC2 (costs money, shares the cp#245 cold-boot flake
|
||||
# surface). A new live e2e must NOT hard-gate every merge until it has a
|
||||
# green track record. continue-on-error: true surfaces failures without
|
||||
# blocking. PROMOTE to branch-required (flip continue-on-error → false AND
|
||||
# add "E2E Staging Reconciler" to branch protection) once it has run green on
|
||||
# main for several consecutive days — same de-flake discipline the
|
||||
# platform-boot job in e2e-staging-saas.yml documents.
|
||||
|
||||
on:
|
||||
# Run when the reconciler itself, the script, or the libs it depends on
|
||||
# change — so a reconciler regression is caught on the PR that introduces
|
||||
# it (paths filter), plus a daily schedule to catch infra/AMI drift.
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'workspace-server/internal/registry/cp_instance_reconciler.go'
|
||||
- 'tests/e2e/test_reconciler_heals_terminated_instance.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- '.gitea/workflows/e2e-staging-reconciler.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'workspace-server/internal/registry/cp_instance_reconciler.go'
|
||||
- 'tests/e2e/test_reconciler_heals_terminated_instance.sh'
|
||||
- 'tests/e2e/lib/aws_leak_check.sh'
|
||||
- 'tests/e2e/lib/model_slug.sh'
|
||||
- '.gitea/workflows/e2e-staging-reconciler.yml'
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# 08:00 UTC daily — offset from e2e-staging-saas (07:00) so the two live
|
||||
# harnesses don't fight over staging's per-hour org-creation quota.
|
||||
- cron: '0 8 * * *'
|
||||
|
||||
# Serialize against itself: staging has a finite per-hour org-creation quota,
|
||||
# and a cancelled run mid-teardown leaks EC2. cancel-in-progress: false
|
||||
# mirrors e2e-staging-saas.yml.
|
||||
concurrency:
|
||||
group: e2e-staging-reconciler
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# PR-validation path: always posts success so a workflow-only / script-only
|
||||
# PR has a status check (this workflow's real job only fires on the paths
|
||||
# filter). Mirrors the pr-validate job in e2e-staging-saas.yml.
|
||||
pr-validate:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
continue-on-error: true
|
||||
- name: YAML validation (best-effort)
|
||||
run: |
|
||||
echo "e2e-staging-reconciler.yml — PR validation: workflow YAML is valid."
|
||||
echo "Live E2E step runs only when the reconciler / script / libs change."
|
||||
continue-on-error: true
|
||||
|
||||
e2e-staging-reconciler:
|
||||
name: E2E Staging Reconciler
|
||||
runs-on: ubuntu-latest
|
||||
# NOT required yet — surface failures without blocking merges. Flip to
|
||||
# false + add to branch protection once green on main for a de-flake
|
||||
# window (see the header note). mc#1982: do not renew this mask silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 60
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
env:
|
||||
MOLECULE_CP_URL: https://staging-api.moleculesai.app
|
||||
# Single admin-bearer secret drives provision + tenant-token retrieval +
|
||||
# teardown (= Railway staging CP_ADMIN_API_TOKEN). Same secret name the
|
||||
# saas workflow canonicalised to under internal#322.
|
||||
MOLECULE_ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
# Leak-check is REQUIRED here: this test deliberately terminates an EC2,
|
||||
# so teardown MUST positively confirm no slug-tagged box survives.
|
||||
E2E_AWS_LEAK_CHECK: required
|
||||
E2E_AWS_TERMINATE_LEAKS: '1'
|
||||
# claude-code + MiniMax is the cheapest boot-to-online path (same as the
|
||||
# saas job). The reconciler test never makes a completion, but the key is
|
||||
# wired so the first boot reaches online on the same path the saas
|
||||
# harness uses. First non-empty wins in the script's priority chain.
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
E2E_RUNTIME: claude-code
|
||||
E2E_MODEL_SLUG: MiniMax-M2
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Verify required secrets present
|
||||
run: |
|
||||
if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
for var in AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY; do
|
||||
if [ -z "${!var:-}" ]; then
|
||||
echo "::error::$var not set — this test terminates an EC2 and verifies no leak; AWS creds are mandatory"
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
echo "Required secrets present ✓"
|
||||
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$MOLECULE_CP_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (got HTTP $code). Skipping — not a reconciler bug."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy ✓"
|
||||
|
||||
- name: Run reconciler heal E2E
|
||||
id: e2e
|
||||
run: bash tests/e2e/test_reconciler_heals_terminated_instance.sh
|
||||
|
||||
# Belt-and-braces teardown: the script installs its own EXIT trap, but if
|
||||
# the runner is cancelled the trap may not fire. This always() step
|
||||
# double-deletes any e2e-rec-* org from THIS run. The admin DELETE is
|
||||
# idempotent so double-invoking is safe.
|
||||
- name: Teardown safety net (runs on cancel/failure)
|
||||
if: always()
|
||||
env:
|
||||
ADMIN_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
run: |
|
||||
set +e
|
||||
orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "
|
||||
import json, sys, os, datetime
|
||||
run_id = os.environ.get('GITHUB_RUN_ID', '')
|
||||
d = json.load(sys.stdin)
|
||||
today = datetime.date.today()
|
||||
yesterday = today - datetime.timedelta(days=1)
|
||||
dates = (today.strftime('%Y%m%d'), yesterday.strftime('%Y%m%d'))
|
||||
# Slug shape: e2e-rec-YYYYMMDD-<run_id>-<attempt>-...
|
||||
if run_id:
|
||||
prefixes = tuple(f'e2e-rec-{d}-{run_id}-' for d in dates)
|
||||
else:
|
||||
prefixes = tuple(f'e2e-rec-{d}-' for d in dates)
|
||||
candidates = [o['slug'] for o in d.get('orgs', [])
|
||||
if any(o.get('slug','').startswith(p) for p in prefixes)
|
||||
and o.get('instance_status') not in ('purged',)]
|
||||
print('\n'.join(candidates))
|
||||
" 2>/dev/null)
|
||||
leaks=()
|
||||
for slug in $orgs; do
|
||||
echo "Safety-net teardown: $slug"
|
||||
set +e
|
||||
curl -sS -o /tmp/rec-cleanup.out -w "%{http_code}" \
|
||||
-X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$slug\"}" >/tmp/rec-cleanup.code
|
||||
set -e
|
||||
code=$(cat /tmp/rec-cleanup.code 2>/dev/null || echo "000")
|
||||
if [ "$code" = "200" ] || [ "$code" = "204" ]; then
|
||||
echo "[teardown] deleted $slug (HTTP $code)"
|
||||
else
|
||||
echo "::warning::reconciler teardown for $slug returned HTTP $code — sweep-stale-e2e-orgs will catch it within ~45 min. Body: $(head -c 300 /tmp/rec-cleanup.out 2>/dev/null)"
|
||||
leaks+=("$slug")
|
||||
fi
|
||||
done
|
||||
if [ ${#leaks[@]} -gt 0 ]; then
|
||||
echo "::warning::reconciler teardown left ${#leaks[@]} leak(s): ${leaks[*]}"
|
||||
fi
|
||||
exit 0
|
||||
@@ -243,7 +243,8 @@ jobs:
|
||||
# MUST exist for the integration tests to be meaningful. Hard-
|
||||
# fail if any didn't land — that would be a real regression we
|
||||
# want loud.
|
||||
for tbl in delegations workspaces activity_logs pending_uploads; do
|
||||
# workspace_schedules added for the #2149 scheduler integration tests.
|
||||
for tbl in delegations workspaces activity_logs pending_uploads workspace_schedules; do
|
||||
if ! psql -h "${PG_HOST}" -U postgres -d molecule -tA \
|
||||
-c "SELECT 1 FROM information_schema.tables WHERE table_name = '$tbl'" \
|
||||
| grep -q 1; then
|
||||
@@ -274,6 +275,16 @@ jobs:
|
||||
# workflow runs don't fight over a host-net 5432 port.
|
||||
go test -tags=integration -timeout 5m -v ./internal/handlers/ -run "^TestIntegration_"
|
||||
|
||||
- if: needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Run scheduler integration tests (#2149)
|
||||
run: |
|
||||
# #2149: real-PG regression coverage for the scheduler firing loop
|
||||
# (tick → A2A fire → write-back of last_run_at/next_run_at/run_count/
|
||||
# activity_logs jsonb incl. invalid-UTF-8 sanitization + sweepPhantomBusy).
|
||||
# Reuses the same migrated Postgres (workspace_schedules / activity_logs
|
||||
# / workspaces all landed by the migration replay step above).
|
||||
go test -tags=integration -timeout 5m -v ./internal/scheduler/ -run "^TestIntegration_"
|
||||
|
||||
- if: failure() && needs.detect-changes.outputs.handlers == 'true'
|
||||
name: Diagnostic dump on failure
|
||||
env:
|
||||
|
||||
@@ -14,10 +14,37 @@ name: publish-canvas-image
|
||||
# authenticate to ghcr.io.
|
||||
#
|
||||
|
||||
# Builds and pushes the canvas Docker image to ECR whenever a commit lands
|
||||
# on main that touches canvas code. Previously canvas changes were visible in
|
||||
# CI (npm run build passed) but the live container was never updated —
|
||||
# operators had to manually run `docker compose build canvas` each time.
|
||||
# Builds, pushes, and (ordered) deploys the standalone canvas Docker image to
|
||||
# ECR whenever a commit lands on main that touches canvas code.
|
||||
#
|
||||
# Ordered deploy (core#2226) — mirrors publish-workspace-server-image.yml so the
|
||||
# standalone `molecule-ai/canvas` image is deterministic + verifiable, not a
|
||||
# side effect of the platform fleet pulling a mutable `:latest`:
|
||||
#
|
||||
# build-and-push: build → push :staging-<sha> + :staging-latest + :sha-<sha>
|
||||
# (does NOT move :latest — an unpromoted build must never
|
||||
# become the prod-blessed tag).
|
||||
# promote-canvas: waits for green main CI on this SHA, then re-points
|
||||
# :latest to the verified :staging-<sha> by digest
|
||||
# (imagetools create — no rebuild). So `:latest` == the
|
||||
# current prod-blessed canvas, byte-identical to staging-<sha>.
|
||||
#
|
||||
# Tag scheme produced (parallels platform-tenant):
|
||||
# :staging-<sha> — per-commit immutable digest, what docker-compose pins to.
|
||||
# :staging-latest — most recent BUILD on main (last-writer-wins, NOT gated).
|
||||
# :sha-<sha> — kept for back-compat with any consumer pinning the old tag.
|
||||
# :latest — most recent CI-GREEN build. Only moved by promote-canvas.
|
||||
#
|
||||
# WHY this is the canvas analogue of the platform's deploy-production, not a
|
||||
# literal copy: the standalone canvas co-deploys with the platform on the same
|
||||
# host via the root docker-compose.yml (`docker compose pull && up -d`). Gating
|
||||
# the canvas `:latest` promotion on the SAME green-main-CI signal the platform
|
||||
# deploy waits on makes platform + canvas roll together by the same SHA. The
|
||||
# canvas has no per-tenant fleet of its own and no /buildinfo endpoint, so there
|
||||
# is no fleet-rollout / per-tenant verify step to mirror here — CI-green +
|
||||
# digest-pin + immutable :staging-<sha> is the determinism contract. (A future
|
||||
# canvas /buildinfo would let this assert the served SHA like the platform does;
|
||||
# tracked in core#2226.)
|
||||
#
|
||||
# Mirror of publish-platform-image.yml, adapted for the Next.js canvas layer.
|
||||
# See that workflow for inline notes on macOS Keychain isolation and QEMU.
|
||||
@@ -30,6 +57,7 @@ on:
|
||||
# platform-only / docs-only / MCP-only merges.
|
||||
- 'canvas/**'
|
||||
- '.gitea/workflows/publish-canvas-image.yml'
|
||||
workflow_dispatch:
|
||||
# NOTE (Gitea port): the original GitHub workflow had a
|
||||
# `workflow_dispatch:` manual trigger for the
|
||||
# non-canvas-merge-but-need-fresh-image scenario. Dropped in the
|
||||
@@ -69,6 +97,10 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
outputs:
|
||||
# Exposed so promote-canvas re-points :latest to the EXACT per-commit tag
|
||||
# this build produced (digest-level), never a re-resolved mutable tag.
|
||||
staging_sha: ${{ steps.tags.outputs.staging_sha }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -140,6 +172,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
echo "sha=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
echo "staging_sha=staging-${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Resolve build args
|
||||
id: build_args
|
||||
@@ -175,8 +208,19 @@ jobs:
|
||||
build-args: |
|
||||
NEXT_PUBLIC_PLATFORM_URL=${{ steps.build_args.outputs.platform_url }}
|
||||
NEXT_PUBLIC_WS_URL=${{ steps.build_args.outputs.ws_url }}
|
||||
# Bake the merge SHA into the image so /api/buildinfo reports the
|
||||
# served canvas SHA (core#2235). Mirrors how the platform image
|
||||
# surfaces GIT_SHA at /buildinfo. Full 40-char SHA (not the
|
||||
# 7-char tag) so the fleet redeploy verification can match exactly.
|
||||
BUILD_SHA=${{ github.sha }}
|
||||
# Ordered deploy (core#2226): the build job pushes the immutable
|
||||
# per-commit tag + the build-tracking staging-latest + the legacy
|
||||
# back-compat :sha-<sha> tag. It does NOT push :latest — :latest is
|
||||
# the prod-blessed tag and is only re-pointed by promote-canvas after
|
||||
# green main CI, so an unpromoted/red build can never become :latest.
|
||||
tags: |
|
||||
${{ env.IMAGE_NAME }}:latest
|
||||
${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.staging_sha }}
|
||||
${{ env.IMAGE_NAME }}:staging-latest
|
||||
${{ env.IMAGE_NAME }}:sha-${{ steps.tags.outputs.sha }}
|
||||
# Gitea artifact-cache reachability is best-effort on the operator
|
||||
# runner network. Do not let cache export fail an image that already
|
||||
@@ -185,3 +229,107 @@ jobs:
|
||||
org.opencontainers.image.source=https://git.moleculesai.app/${{ github.repository }}
|
||||
org.opencontainers.image.revision=${{ github.sha }}
|
||||
org.opencontainers.image.description=Molecule AI canvas (Next.js 15 + React Flow)
|
||||
|
||||
# bp-exempt: post-merge canvas promote side-effect; merge is gated by CI /
|
||||
# all-required and this job waits for green push CI on the SHA before acting.
|
||||
promote-canvas:
|
||||
name: Promote canvas :latest to CI-green build
|
||||
needs: build-and-push
|
||||
# Only on a real main push — workflow_dispatch / non-main never promotes.
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
||||
# Side-effect deploy only; the image publish above is the durable artifact.
|
||||
# mc#1982: do NOT renew this mask silently — it mirrors deploy-production's
|
||||
# contract (a flaky promote must not red the ship lane), tracked in core#2226.
|
||||
continue-on-error: true
|
||||
runs-on: publish
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
# Same green-main-CI gate the platform deploy-production waits on, so
|
||||
# platform + canvas advance :latest off the identical signal/SHA.
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
GITEA_TOKEN: ${{ secrets.PROD_AUTO_DEPLOY_CONTROL_TOKEN || secrets.AUTO_SYNC_TOKEN }}
|
||||
CI_STATUS_TIMEOUT_SECONDS: "3600"
|
||||
# Re-uses the platform's disable kill-switch: when prod auto-deploy is
|
||||
# paused, the canvas :latest promote pauses too (correct — an unpromoted
|
||||
# build must not become :latest while the fleet is frozen).
|
||||
PROD_AUTO_DEPLOY_DISABLED: ${{ vars.PROD_AUTO_DEPLOY_DISABLED || secrets.PROD_AUTO_DEPLOY_DISABLED || '' }}
|
||||
steps:
|
||||
# The publish runner's default HOME (/home/hongming) is not writable, so
|
||||
# docker credential saves fail and halt the promote (#2193 on the platform
|
||||
# side). Point HOME + DOCKER_CONFIG at the writable job temp dir.
|
||||
- name: Prepare writable HOME + Docker config
|
||||
run: |
|
||||
set -euo pipefail
|
||||
H="$RUNNER_TEMP/canvas-promote-home"
|
||||
mkdir -p "$H/.docker"
|
||||
echo "HOME=$H" >> "$GITHUB_ENV"
|
||||
echo "DOCKER_CONFIG=$H/.docker" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Resolve promote gate
|
||||
id: gate
|
||||
env:
|
||||
PROD_AUTO_DEPLOY_DISABLED: ${{ env.PROD_AUTO_DEPLOY_DISABLED }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if [ -n "${PROD_AUTO_DEPLOY_DISABLED:-}" ]; then
|
||||
case "$(printf '%s' "$PROD_AUTO_DEPLOY_DISABLED" | tr '[:upper:]' '[:lower:]')" in
|
||||
1|true|yes|on|disabled|disable)
|
||||
echo "enabled=false" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Canvas :latest promote skipped: PROD_AUTO_DEPLOY_DISABLED=$PROD_AUTO_DEPLOY_DISABLED"
|
||||
{
|
||||
echo "## Canvas :latest promote skipped"
|
||||
echo ""
|
||||
echo "Reason: \`PROD_AUTO_DEPLOY_DISABLED=$PROD_AUTO_DEPLOY_DISABLED\`. The CI-green build is published as \`:staging-${GITHUB_SHA::7}\`; \`:latest\` was left unchanged."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
exit 0 ;;
|
||||
esac
|
||||
fi
|
||||
if [ -z "${GITEA_TOKEN:-}" ]; then
|
||||
echo "::error::AUTO_SYNC_TOKEN/PROD_AUTO_DEPLOY_CONTROL_TOKEN is required so the canvas promote can wait for green CI."
|
||||
exit 1
|
||||
fi
|
||||
echo "enabled=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Wait for green main CI on this SHA
|
||||
if: ${{ steps.gate.outputs.enabled == 'true' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Same SSOT wait the platform deploy uses: blocks until the required
|
||||
# push contexts (CI / all-required (push) + Secret scan) go green on
|
||||
# THIS sha, and fails closed if any required context terminally fails.
|
||||
python3 .gitea/scripts/prod-auto-deploy.py wait-ci
|
||||
|
||||
- name: Promote canvas :latest to the CI-green image
|
||||
if: ${{ steps.gate.outputs.enabled == 'true' }}
|
||||
env:
|
||||
IMAGE_NAME: ${{ env.IMAGE_NAME }}
|
||||
STAGING_SHA_TAG: ${{ needs.build-and-push.outputs.staging_sha }}
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_DEFAULT_REGION: us-east-2
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Fail-safe: if the build job's output didn't propagate, recompute the
|
||||
# immutable per-commit tag from the SHA so we never promote a guess.
|
||||
SHA_TAG="${STAGING_SHA_TAG:-staging-${GITHUB_SHA::7}}"
|
||||
ECR_REGISTRY="${IMAGE_NAME%%/*}"
|
||||
aws ecr get-login-password --region us-east-2 | \
|
||||
docker login --username AWS --password-stdin "${ECR_REGISTRY}"
|
||||
|
||||
# Digest-level re-tag (no pull/rebuild): :latest becomes byte-identical
|
||||
# to the verified :staging-<sha> for this commit.
|
||||
docker buildx imagetools create \
|
||||
--tag "${IMAGE_NAME}:latest" \
|
||||
"${IMAGE_NAME}:${SHA_TAG}"
|
||||
|
||||
{
|
||||
echo "## Canvas :latest promoted"
|
||||
echo ""
|
||||
echo "Re-pointed \`molecule-ai/canvas:latest\` → \`${SHA_TAG}\` (by digest)."
|
||||
echo ":latest now tracks the CI-green canvas build for commit \`${GITHUB_SHA::7}\`."
|
||||
echo ""
|
||||
echo "Tenants/hosts that \`docker compose pull canvas\` now get the same build the platform deploy rolled for this SHA."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
|
||||
@@ -115,6 +115,19 @@ jobs:
|
||||
echo "Docker daemon OK"
|
||||
echo "::endgroup::"
|
||||
|
||||
# Pre-flight: verify every repo in manifest.json actually exists.
|
||||
#
|
||||
# Why: deleting a template repo without updating manifest.json breaks
|
||||
# clone-manifest.sh with a generic git 404, which looks like a
|
||||
# transient network error and wastes debug time. We catch it here
|
||||
# with a per-entry ::error:: annotation naming the missing repo
|
||||
# (issue #2192). This is the push-time complement to PR #2186's
|
||||
# PR-time manifest-entry-existence gate.
|
||||
- name: Validate manifest entries exist
|
||||
run: |
|
||||
set -euo pipefail
|
||||
bash scripts/check-manifest-repos-exist.sh manifest.json
|
||||
|
||||
# Pre-clone manifest deps before docker build.
|
||||
#
|
||||
# Why: workspace-template-* repos on Gitea are private. The pre-fix
|
||||
@@ -262,6 +275,11 @@ jobs:
|
||||
PROD_AUTO_DEPLOY_BATCH_SIZE: ${{ vars.PROD_AUTO_DEPLOY_BATCH_SIZE || '3' }}
|
||||
PROD_AUTO_DEPLOY_DRY_RUN: ${{ vars.PROD_AUTO_DEPLOY_DRY_RUN || '' }}
|
||||
PROD_ALLOW_NON_PROD_CP_URL: ${{ vars.PROD_ALLOW_NON_PROD_CP_URL || '' }}
|
||||
# #2213: per-tenant /buildinfo settle budget. A freshly-swapped tenant can
|
||||
# keep serving the old image at the edge for a short drain window; the
|
||||
# verify step polls each tenant up to this budget before declaring it stale.
|
||||
PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS: ${{ vars.PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS || '240' }}
|
||||
PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS: ${{ vars.PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS || '20' }}
|
||||
steps:
|
||||
# The publish runner's default HOME (/home/hongming) is not writable, so
|
||||
# git/docker credential saves fail (`Error saving credentials: mkdir
|
||||
@@ -320,8 +338,50 @@ jobs:
|
||||
set -euo pipefail
|
||||
python3 .gitea/scripts/prod-auto-deploy.py wait-ci
|
||||
|
||||
- name: Call production CP redeploy-fleet
|
||||
# Superseded-job guard — BEFORE any production side effect (#2213).
|
||||
#
|
||||
# This workflow has no `concurrency:` (see header: Gitea 1.22.6 cancels
|
||||
# queued prod deploys). So two close main pushes run BOTH deploy-production
|
||||
# jobs. The verify step already skips its strict /buildinfo check when this
|
||||
# job is superseded (#2194) — but that guard was AFTER the redeploy and the
|
||||
# :latest promote, so an OLDER job that started late still:
|
||||
# 1. rolled the whole fleet BACKWARD to its older tag (canary hongming
|
||||
# was reverted from the newer SHA — the #2213 red), then
|
||||
# 2. promoted :latest backward to the older image,
|
||||
# and only THEN skipped verify and exited green. A superseded job must do
|
||||
# NEITHER. We re-check the branch head here, immediately before the rollout,
|
||||
# and skip every side effect when a newer commit already owns main.
|
||||
#
|
||||
# exit 0 + non-empty stdout => superseded (newer head printed); the redeploy
|
||||
# and promote steps are gated off via this output. exit 10 => this job is
|
||||
# still the latest, proceed to roll the fleet. Fail-safe: a head that can't
|
||||
# be read returns NOT-superseded (exit 10), so a genuine deploy is never
|
||||
# silently skipped. (Re-checked again at verify time to catch a newer job
|
||||
# that lands DURING this rollout.)
|
||||
- name: Check superseded before production side effects
|
||||
id: supersede
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
set +e
|
||||
NEWER_HEAD="$(python3 .gitea/scripts/prod-auto-deploy.py check-superseded)"
|
||||
SUPERSEDED_EXIT=$?
|
||||
set -e
|
||||
if [ "$SUPERSEDED_EXIT" -eq 0 ] && [ -n "$NEWER_HEAD" ]; then
|
||||
echo "superseded=true" >> "$GITHUB_OUTPUT"
|
||||
echo "::notice::Superseded before rollout: main head is now ${NEWER_HEAD:0:7} (this job deploys ${GITHUB_SHA:0:7}). Skipping redeploy + :latest promote so an older job never rolls the fleet backward."
|
||||
{
|
||||
echo "## Production auto-deploy skipped — superseded before rollout"
|
||||
echo ""
|
||||
echo "This deploy job's SHA \`${GITHUB_SHA:0:7}\` is no longer the head of \`main\` (now \`${NEWER_HEAD:0:7}\`)."
|
||||
echo "A newer deploy job owns the fleet; rolling it backward to this older build would revert tenants and \`:latest\`. No side effects performed."
|
||||
} >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "superseded=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- name: Call production CP redeploy-fleet
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
python3 .gitea/scripts/prod-auto-deploy.py assert-enabled
|
||||
@@ -380,7 +440,11 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Verify reachable tenants report this SHA
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' }}
|
||||
# Skip when superseded BEFORE rollout: the redeploy step did not run, so
|
||||
# there is no redeploy-fleet response to verify against and the newer job
|
||||
# owns verification (#2213). The in-step guard below still catches the
|
||||
# case where a newer job lands DURING this job's rollout.
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
|
||||
env:
|
||||
TENANT_DOMAIN: moleculesai.app
|
||||
run: |
|
||||
@@ -422,6 +486,20 @@ jobs:
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Per-tenant settle/retry budget (#2213). A tenant whose container the
|
||||
# CP just swapped can keep serving the OLD image at the edge for a short
|
||||
# window while the old container drains — /buildinfo returns HTTP 200
|
||||
# with the previous SHA, which `curl --retry` does NOT retry (it only
|
||||
# retries connection/5xx failures, not a stale-but-200 body). Without a
|
||||
# settle window a still-rolling tenant false-reds "stale" on the very
|
||||
# first poll. So poll each tenant's /buildinfo until it reports the
|
||||
# target SHA or the budget is exhausted; only THEN declare it stale or
|
||||
# unreachable. This never masks a genuinely stuck tenant — a tenant that
|
||||
# never reaches the target within the budget still fails loud (and the
|
||||
# superseded-job revert class is already blocked before rollout above).
|
||||
SETTLE_BUDGET_SECONDS="${PROD_AUTO_DEPLOY_VERIFY_BUDGET_SECONDS:-240}"
|
||||
SETTLE_INTERVAL_SECONDS="${PROD_AUTO_DEPLOY_VERIFY_INTERVAL_SECONDS:-20}"
|
||||
|
||||
STALE_COUNT=0
|
||||
UNREACHABLE_COUNT=0
|
||||
UNHEALTHY_COUNT=0
|
||||
@@ -433,18 +511,36 @@ jobs:
|
||||
continue
|
||||
fi
|
||||
url="https://${slug}.${TENANT_DOMAIN}/buildinfo"
|
||||
body="$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$url" || true)"
|
||||
actual="$(echo "$body" | jq -r '.git_sha // ""' 2>/dev/null || echo "")"
|
||||
if [ -z "$actual" ]; then
|
||||
echo "::error::$slug did not return /buildinfo after deploy."
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
continue
|
||||
fi
|
||||
if [ "$actual" != "$GITHUB_SHA" ]; then
|
||||
echo "::error::$slug is stale: actual=${actual:0:7}, expected=${GITHUB_SHA:0:7}"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
else
|
||||
deadline=$(( $(date +%s) + SETTLE_BUDGET_SECONDS ))
|
||||
actual=""
|
||||
last_actual=""
|
||||
on_target=false
|
||||
while :; do
|
||||
body="$(curl -sS --max-time 30 --retry 3 --retry-delay 5 --retry-connrefused "$url" || true)"
|
||||
actual="$(echo "$body" | jq -r '.git_sha // ""' 2>/dev/null || echo "")"
|
||||
[ -n "$actual" ] && last_actual="$actual"
|
||||
if [ "$actual" = "$GITHUB_SHA" ]; then
|
||||
on_target=true
|
||||
break
|
||||
fi
|
||||
now=$(date +%s)
|
||||
if [ "$now" -ge "$deadline" ]; then
|
||||
break
|
||||
fi
|
||||
# Still rolling (stale 200) or transiently unreachable — wait and
|
||||
# re-poll within the settle budget rather than failing on first read.
|
||||
remaining=$(( deadline - now ))
|
||||
echo "$slug: waiting for target SHA (have '${actual:0:7}', want ${GITHUB_SHA:0:7}; ${remaining}s left)"
|
||||
sleep "$SETTLE_INTERVAL_SECONDS"
|
||||
done
|
||||
if [ "$on_target" = true ]; then
|
||||
echo "$slug: ${actual:0:7}"
|
||||
elif [ -z "$last_actual" ]; then
|
||||
echo "::error::$slug did not return /buildinfo after deploy (waited ${SETTLE_BUDGET_SECONDS}s)."
|
||||
UNREACHABLE_COUNT=$((UNREACHABLE_COUNT + 1))
|
||||
else
|
||||
echo "::error::$slug is stale: actual=${last_actual:0:7}, expected=${GITHUB_SHA:0:7} (waited ${SETTLE_BUDGET_SECONDS}s)"
|
||||
STALE_COUNT=$((STALE_COUNT + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -488,8 +584,12 @@ jobs:
|
||||
#
|
||||
# Re-tag is digest-level (imagetools create), so no rebuild and
|
||||
# :latest is byte-identical to :staging-<sha> for this commit.
|
||||
# Gate on supersede: a superseded older job must NOT move :latest backward
|
||||
# to its older image (#2213 — 275383 promoted :latest → the older
|
||||
# staging-7a72516 after a newer job had already shipped). :latest must only
|
||||
# ever advance under the job that owns main's head.
|
||||
- name: Promote :latest to the verified prod image
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' }}
|
||||
if: ${{ steps.plan.outputs.enabled == 'true' && steps.supersede.outputs.superseded != 'true' }}
|
||||
env:
|
||||
TENANT_IMAGE_NAME: ${{ env.TENANT_IMAGE_NAME }}
|
||||
STAGING_TENANT_IMAGE_NAME: ${{ env.STAGING_TENANT_IMAGE_NAME }}
|
||||
|
||||
@@ -60,6 +60,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# bp-required: pending #718 — soak-then-promote, not in BP yet.
|
||||
compare:
|
||||
name: Compare synced providers.yaml against controlplane canonical
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -67,6 +67,7 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# bp-required: pending #718 — soak-then-promote, not in BP yet.
|
||||
verify:
|
||||
name: Regenerate providers artifact and fail on drift
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -24,6 +24,17 @@ COPY --from=builder /app/public ./public
|
||||
EXPOSE 3000
|
||||
ENV PORT=3000
|
||||
ENV HOSTNAME="0.0.0.0"
|
||||
# Git SHA the image was built from, surfaced at /api/buildinfo so canvas
|
||||
# deploys are verifiable by the served SHA the same way workspace-server's
|
||||
# /buildinfo is (core#2235). Wired from `${{ github.sha }}` in
|
||||
# publish-canvas-image.yml. Server-only (not NEXT_PUBLIC_) — the route
|
||||
# handler reads it at runtime on the standalone Node server, so it stays
|
||||
# out of the client bundle. Set on the final stage (not the builder) so it
|
||||
# lives in the runtime env that force-dynamic reads per request. Default
|
||||
# "dev" matches the route + workspace-server sentinel: an unwired build
|
||||
# fails the SHA comparison closed instead of looking deployed.
|
||||
ARG BUILD_SHA=dev
|
||||
ENV BUILD_SHA=$BUILD_SHA
|
||||
# Non-root runtime — use addgroup/adduser without fixed GID/UID to avoid conflicts with base image
|
||||
RUN addgroup canvas 2>/dev/null || true && adduser -G canvas -s /bin/sh -D canvas 2>/dev/null || true
|
||||
USER canvas
|
||||
|
||||
@@ -241,7 +241,14 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
name: "E2E Canvas Test",
|
||||
runtime: "hermes",
|
||||
tier: 2,
|
||||
model: "gpt-4o",
|
||||
// Provider-registry SSOT (internal#718) registers ONLY Kimi models for
|
||||
// the hermes runtime — `moonshot/kimi-k2.6` is the platform-managed
|
||||
// entry (workspace-server/internal/providers/providers.yaml, hermes ->
|
||||
// platform). The old `gpt-4o` was never a registered hermes model and
|
||||
// now 422s UNREGISTERED_MODEL_FOR_RUNTIME (core#2225). This workspace
|
||||
// defaults closed to platform_managed (see the boot-shape note below),
|
||||
// so a platform-namespaced model id is the registry-correct choice.
|
||||
model: "moonshot/kimi-k2.6",
|
||||
}),
|
||||
});
|
||||
if (ws.status >= 400 || !ws.body?.id) {
|
||||
@@ -264,7 +271,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
// CP proxy env absent) made a platform_managed workspace ABORT AT BOOT
|
||||
// with MISSING_PLATFORM_PROXY when MOLECULE_LLM_BASE_URL /
|
||||
// MOLECULE_LLM_USAGE_TOKEN are not present in the tenant's env. The
|
||||
// canvas E2E creates a bare hermes/gpt-4o workspace, which defaults
|
||||
// canvas E2E creates a bare hermes/moonshot platform workspace, which defaults
|
||||
// closed to platform_managed (workspace_provision.go:~1009), and the
|
||||
// staging tenant does not carry the CP proxy env — so the agent never
|
||||
// starts. Pre-#2162 this same workspace booted credential-less (the bug
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
/**
|
||||
* Canvas /api/buildinfo — version-display endpoint mirroring
|
||||
* workspace-server's /buildinfo. Lets `curl <url>/api/buildinfo`
|
||||
* confirm which git SHA is live on a canvas deployment.
|
||||
* confirm which git SHA is live on a canvas deployment (core#2235).
|
||||
*/
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { GET } from "../route";
|
||||
|
||||
const ENV_KEYS = ["VERCEL_GIT_COMMIT_SHA", "VERCEL_GIT_COMMIT_REF", "VERCEL_ENV"];
|
||||
const ENV_KEYS = [
|
||||
"BUILD_SHA",
|
||||
"VERCEL_GIT_COMMIT_SHA",
|
||||
"VERCEL_GIT_COMMIT_REF",
|
||||
"VERCEL_ENV",
|
||||
];
|
||||
|
||||
describe("GET /api/buildinfo", () => {
|
||||
let saved: Record<string, string | undefined>;
|
||||
@@ -23,13 +28,24 @@ describe("GET /api/buildinfo", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("returns dev sentinel when Vercel env vars are unset", async () => {
|
||||
it("returns dev sentinel when no SHA source is set", async () => {
|
||||
const res = await GET();
|
||||
const body = await res.json();
|
||||
expect(body).toEqual({ git_sha: "dev", git_ref: "", vercel_env: "local" });
|
||||
});
|
||||
|
||||
it("reports the SHA Vercel injected at build time", async () => {
|
||||
it("reports BUILD_SHA baked into the Docker image (fleet deploy path)", async () => {
|
||||
// BUILD_SHA is the authoritative source for the ECR-image fleet deploy,
|
||||
// which never runs on Vercel. It must win even when a Vercel var is also
|
||||
// present in the environment.
|
||||
process.env.BUILD_SHA = "deadbeefcafe";
|
||||
process.env.VERCEL_GIT_COMMIT_SHA = "should-not-win";
|
||||
const res = await GET();
|
||||
const body = await res.json();
|
||||
expect(body.git_sha).toBe("deadbeefcafe");
|
||||
});
|
||||
|
||||
it("falls back to the SHA Vercel injected when BUILD_SHA is unset", async () => {
|
||||
process.env.VERCEL_GIT_COMMIT_SHA = "abc1234567890";
|
||||
process.env.VERCEL_GIT_COMMIT_REF = "main";
|
||||
process.env.VERCEL_ENV = "production";
|
||||
|
||||
@@ -1,17 +1,36 @@
|
||||
import { NextResponse } from "next/server";
|
||||
|
||||
// Mirror of workspace-server's GET /buildinfo (PR #2398). Lets a developer
|
||||
// confirm which git SHA is live on a canvas deployment with the same
|
||||
// `curl <url>/buildinfo` flow they use against tenant workspaces.
|
||||
// or the fleet redeploy workflow confirm which git SHA is live on a canvas
|
||||
// deployment with the same `curl <url>/api/buildinfo` flow used against
|
||||
// tenant workspaces (core#2235; cross-ref core#2226).
|
||||
//
|
||||
// Vercel injects VERCEL_GIT_COMMIT_SHA / _REF / VERCEL_ENV at build time
|
||||
// from the deploying commit; outside Vercel (local `next dev`, harness)
|
||||
// these are unset and the endpoint reports `git_sha: "dev"`. Same sentinel
|
||||
// the workspace-server uses pre-ldflags-injection so both surfaces speak
|
||||
// the same vocabulary.
|
||||
// SHA source, in priority order:
|
||||
// 1. BUILD_SHA — server-only env baked into the canvas Docker image at
|
||||
// build time (Dockerfile `ARG BUILD_SHA` → `ENV BUILD_SHA`, wired
|
||||
// from `${{ github.sha }}` in publish-canvas-image.yml). This is the
|
||||
// authoritative source for the fleet's ECR-image deploy path, which
|
||||
// does NOT run on Vercel. Read server-side here (App Router route
|
||||
// handler runs on the standalone Node server, `output: "standalone"`),
|
||||
// so it is intentionally NOT a NEXT_PUBLIC_ var — keeping it out of
|
||||
// the client bundle.
|
||||
// 2. VERCEL_GIT_COMMIT_SHA — Vercel injects this at build time when the
|
||||
// canvas is deployed via Vercel rather than the Docker image.
|
||||
// 3. "dev" — local `next dev` / test harness, where neither is set. Same
|
||||
// sentinel workspace-server uses pre-ldflags-injection, so both
|
||||
// surfaces speak the same vocabulary and an unconfigured deploy
|
||||
// fails the SHA comparison closed instead of round-tripping "".
|
||||
//
|
||||
// force-dynamic so the response is evaluated at request time against the
|
||||
// runtime env of the standalone server (where ENV BUILD_SHA lives), not
|
||||
// frozen into a static asset at `next build`.
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
export async function GET() {
|
||||
const sha =
|
||||
process.env.BUILD_SHA ?? process.env.VERCEL_GIT_COMMIT_SHA ?? "dev";
|
||||
return NextResponse.json({
|
||||
git_sha: process.env.VERCEL_GIT_COMMIT_SHA ?? "dev",
|
||||
git_sha: sha,
|
||||
git_ref: process.env.VERCEL_GIT_COMMIT_REF ?? "",
|
||||
vercel_env: process.env.VERCEL_ENV ?? "local",
|
||||
});
|
||||
|
||||
@@ -10,6 +10,7 @@ import {
|
||||
buildProviderCatalog,
|
||||
buildProviderCatalogFromRegistry,
|
||||
findProviderForModel,
|
||||
isPlatformManagedProvider,
|
||||
type SelectorModel,
|
||||
type SelectorValue,
|
||||
type RegistryProvider,
|
||||
@@ -290,7 +291,15 @@ export function CreateWorkspaceButton() {
|
||||
setError("Model is required");
|
||||
return;
|
||||
}
|
||||
if (!isExternal && selectedLLMProvider?.envVars.length && !llmSecret.trim()) {
|
||||
// Platform-managed providers need NO user credential — the platform injects
|
||||
// its own usage token (MOLECULE_LLM_USAGE_TOKEN = tenant admin_token) at
|
||||
// provision time. Only BYOK providers require a user-supplied key. (#2245)
|
||||
if (
|
||||
!isExternal &&
|
||||
!isPlatformManagedProvider(selectedLLMProvider) &&
|
||||
selectedLLMProvider?.envVars.length &&
|
||||
!llmSecret.trim()
|
||||
) {
|
||||
setError("Provider credential is required");
|
||||
return;
|
||||
}
|
||||
@@ -325,7 +334,11 @@ export function CreateWorkspaceButton() {
|
||||
? {
|
||||
model: llmSelection.model.trim(),
|
||||
llm_provider: nativeProvider.vendor,
|
||||
...(nativeProvider.envVars.length > 0
|
||||
// Only BYOK providers carry a user secret. For platform-managed
|
||||
// the token is provisioner-injected; sending an (empty) secret
|
||||
// here would clobber it — so omit it entirely. (#2245)
|
||||
...(nativeProvider.envVars.length > 0 &&
|
||||
!isPlatformManagedProvider(nativeProvider)
|
||||
? { secrets: { [nativeProvider.envVars[0]]: llmSecret.trim() } }
|
||||
: {}),
|
||||
}
|
||||
@@ -521,20 +534,26 @@ export function CreateWorkspaceButton() {
|
||||
idPrefix="create-workspace-llm"
|
||||
variant="stack"
|
||||
/>
|
||||
{selectedLLMProvider.envVars.length > 0 && (
|
||||
<div>
|
||||
<label htmlFor="llm-secret-input" className="text-[11px] text-ink-mid block mb-1">
|
||||
{selectedLLMProvider.envVars[0]}
|
||||
</label>
|
||||
<input
|
||||
id="llm-secret-input"
|
||||
type="password"
|
||||
value={llmSecret}
|
||||
onChange={(e) => setLLMSecret(e.target.value)}
|
||||
autoComplete="off"
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors font-mono"
|
||||
/>
|
||||
{isPlatformManagedProvider(selectedLLMProvider) ? (
|
||||
<div className="text-[11px] text-ink-soft">
|
||||
Platform-managed — no API key required.
|
||||
</div>
|
||||
) : (
|
||||
selectedLLMProvider.envVars.length > 0 && (
|
||||
<div>
|
||||
<label htmlFor="llm-secret-input" className="text-[11px] text-ink-mid block mb-1">
|
||||
{selectedLLMProvider.envVars[0]}
|
||||
</label>
|
||||
<input
|
||||
id="llm-secret-input"
|
||||
type="password"
|
||||
value={llmSecret}
|
||||
onChange={(e) => setLLMSecret(e.target.value)}
|
||||
autoComplete="off"
|
||||
className="w-full bg-surface-card/60 border border-line/50 rounded-lg px-3 py-2 text-sm text-ink placeholder-ink-soft focus:outline-none focus:border-accent/60 focus:ring-1 focus:ring-accent/20 transition-colors font-mono"
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -55,6 +55,21 @@ export interface ProviderEntry {
|
||||
billingMode?: "platform_managed" | "byok";
|
||||
}
|
||||
|
||||
/** A provider is "platform-managed" when the Molecule platform proxies the LLM
|
||||
* call and injects its own usage credential — the tenant admin_token, surfaced
|
||||
* to the workspace as MOLECULE_LLM_USAGE_TOKEN by the CP provisioner
|
||||
* (controlplane ec2.go: `MOLECULE_LLM_USAGE_TOKEN="$ADMIN_TOKEN"`). The user
|
||||
* supplies NO key for these: the credential is internal plumbing, not a user
|
||||
* input. Detected by vendor==="platform" (the platform proxy provider, which
|
||||
* declares MOLECULE_LLM_USAGE_TOKEN in its AuthEnv) OR
|
||||
* billingMode==="platform_managed" (registry-backed, internal#718 P3). BYOK
|
||||
* providers return false and DO require a user-supplied credential. */
|
||||
export function isPlatformManagedProvider(
|
||||
p?: Pick<ProviderEntry, "vendor" | "billingMode"> | null,
|
||||
): boolean {
|
||||
return p?.vendor === "platform" || p?.billingMode === "platform_managed";
|
||||
}
|
||||
|
||||
/** RegistryProvider mirrors one entry of GET /templates `registry_providers`
|
||||
* (workspace-server registryProviderView): the registry's native provider for
|
||||
* a runtime, with its display label, auth-env NAMES, and billing mode. This is
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
||||
import { render, screen, fireEvent, waitFor, cleanup } from "@testing-library/react";
|
||||
import { CreateWorkspaceButton } from "../CreateWorkspaceDialog";
|
||||
import { isPlatformManagedProvider } from "../ProviderModelSelector";
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
api: {
|
||||
@@ -65,6 +66,34 @@ const SAMPLE_TEMPLATES = [
|
||||
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: [] },
|
||||
],
|
||||
},
|
||||
// #2245 fixtures. The real registry `platform` provider declares
|
||||
// MOLECULE_LLM_USAGE_TOKEN in its auth_env — the default mock above masks the
|
||||
// bug by using required_env:[]. This template gives the platform provider a
|
||||
// non-empty auth env (matching production) so the credential-suppression
|
||||
// logic is actually exercised.
|
||||
{
|
||||
id: "platform-managed-test",
|
||||
name: "Platform Managed Test",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
providers: ["platform", "minimax"],
|
||||
models: [
|
||||
{ id: "moonshot/kimi-k2.6", name: "Kimi K2.6", provider: "platform", required_env: ["MOLECULE_LLM_USAGE_TOKEN"] },
|
||||
{ id: "MiniMax-M2.7", name: "MiniMax M2.7", required_env: ["MINIMAX_API_KEY"] },
|
||||
],
|
||||
},
|
||||
// BYOK-only template (no platform provider) — the credential requirement
|
||||
// MUST still hold for these (no-regression guard).
|
||||
{
|
||||
id: "byok-only-test",
|
||||
name: "BYOK Only Test",
|
||||
runtime: "claude-code",
|
||||
model: "openai/gpt-4o",
|
||||
providers: ["openai"],
|
||||
models: [
|
||||
{ id: "openai/gpt-4o", name: "GPT-4o", required_env: ["OPENAI_API_KEY"] },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
beforeEach(() => {
|
||||
@@ -498,6 +527,25 @@ const REGISTRY_TEMPLATE = {
|
||||
],
|
||||
};
|
||||
|
||||
// Registry-backed platform provider WITH a non-empty auth_env — this matches
|
||||
// the PRODUCTION provider view, which ships the raw AuthEnv
|
||||
// ([MOLECULE_LLM_USAGE_TOKEN]). REGISTRY_TEMPLATE above uses auth_env:[] so it
|
||||
// never exercises suppression; this one drives the billingMode==="platform_
|
||||
// managed" branch end-to-end through buildProviderCatalogFromRegistry. (#2245)
|
||||
const REGISTRY_TEMPLATE_PLATFORM_AUTHENV = {
|
||||
...REGISTRY_TEMPLATE,
|
||||
registry_providers: [
|
||||
{
|
||||
name: "platform",
|
||||
display_name: "Platform",
|
||||
auth_env: ["MOLECULE_LLM_USAGE_TOKEN"],
|
||||
billing_mode: "platform_managed",
|
||||
},
|
||||
{ name: "minimax", display_name: "MiniMax", auth_env: ["MINIMAX_API_KEY"], billing_mode: "byok" },
|
||||
{ name: "anthropic", display_name: "Anthropic API", auth_env: ["ANTHROPIC_API_KEY"], billing_mode: "byok" },
|
||||
],
|
||||
};
|
||||
|
||||
describe("CreateWorkspaceDialog — registry-backed provider catalog (RFC#340 Fix C)", () => {
|
||||
beforeEach(() => {
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
@@ -574,6 +622,41 @@ describe("CreateWorkspaceDialog — registry-backed provider catalog (RFC#340 Fi
|
||||
expect(body.llm_provider).toBe("minimax");
|
||||
expect(body.secrets).toEqual({ MINIMAX_API_KEY: "sk-minimax-test" });
|
||||
});
|
||||
|
||||
it("suppresses the credential for a registry-backed platform provider that declares an auth_env — billingMode path (#2245)", async () => {
|
||||
// Override the default REGISTRY_TEMPLATE (auth_env:[]) with the production-
|
||||
// shaped one whose platform provider declares MOLECULE_LLM_USAGE_TOKEN.
|
||||
mockGet.mockImplementation(async (url: string) => {
|
||||
if (url === "/templates") {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return [REGISTRY_TEMPLATE_PLATFORM_AUTHENV] as any;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
return SAMPLE_WORKSPACES as any;
|
||||
});
|
||||
await openDialog();
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
|
||||
target: { value: "Registry Platform Agent" },
|
||||
});
|
||||
// Platform is the default bucket; even with a non-empty auth_env the key
|
||||
// field must NOT render (suppressed via billingMode==="platform_managed").
|
||||
await waitFor(() => {
|
||||
const sel = document.querySelector("[data-testid='provider-select']") as HTMLSelectElement;
|
||||
expect(sel?.value).toBe("registry|platform");
|
||||
});
|
||||
expect(screen.getByText("Platform-managed — no API key required.")).toBeTruthy();
|
||||
expect(document.getElementById("llm-secret-input")).toBeNull();
|
||||
|
||||
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
|
||||
fireEvent.click(createBtn!);
|
||||
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
expect(screen.queryByText("Provider credential is required")).toBeNull();
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.llm_provider).toBe("platform");
|
||||
// The provisioner-injected MOLECULE_LLM_USAGE_TOKEN must NOT be clobbered.
|
||||
expect(body.secrets).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -657,3 +740,70 @@ describe("CreateWorkspaceDialog — budget_limit field", () => {
|
||||
expect(budgetInput.value).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("CreateWorkspaceDialog — platform-managed credential suppression (#2245)", () => {
|
||||
describe("isPlatformManagedProvider", () => {
|
||||
it("is true for the platform proxy vendor", () => {
|
||||
expect(isPlatformManagedProvider({ vendor: "platform" })).toBe(true);
|
||||
});
|
||||
it("is true for a registry billingMode of platform_managed", () => {
|
||||
expect(
|
||||
isPlatformManagedProvider({ vendor: "minimax", billingMode: "platform_managed" }),
|
||||
).toBe(true);
|
||||
});
|
||||
it("is false for a BYOK provider", () => {
|
||||
expect(isPlatformManagedProvider({ vendor: "anthropic", billingMode: "byok" })).toBe(false);
|
||||
expect(isPlatformManagedProvider({ vendor: "minimax" })).toBe(false);
|
||||
});
|
||||
it("is false for null/undefined", () => {
|
||||
expect(isPlatformManagedProvider(null)).toBe(false);
|
||||
expect(isPlatformManagedProvider(undefined)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
it("platform-managed provider with a declared auth env requires NO credential, hides the key field, and sends NO secret", async () => {
|
||||
await openDialog();
|
||||
await setTemplate("platform-managed-test");
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
|
||||
target: { value: "Platform Agent" },
|
||||
});
|
||||
|
||||
// The credential input must NOT render for platform-managed; a "no key
|
||||
// required" note appears instead.
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText("Platform-managed — no API key required.")).toBeTruthy(),
|
||||
);
|
||||
expect(screen.queryByLabelText("MOLECULE_LLM_USAGE_TOKEN")).toBeNull();
|
||||
|
||||
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
|
||||
fireEvent.click(createBtn!);
|
||||
|
||||
await waitFor(() => expect(mockPost).toHaveBeenCalled());
|
||||
// No validation error, and the provisioner-injected token is NOT clobbered
|
||||
// by an empty secret.
|
||||
expect(screen.queryByText("Provider credential is required")).toBeNull();
|
||||
const body = mockPost.mock.calls[0][1] as Record<string, unknown>;
|
||||
expect(body.llm_provider).toBe("platform");
|
||||
expect(body.secrets).toBeUndefined();
|
||||
});
|
||||
|
||||
it("BYOK provider still requires a credential and renders the key field (no-regression)", async () => {
|
||||
await openDialog();
|
||||
await setTemplate("byok-only-test");
|
||||
fireEvent.change(screen.getByPlaceholderText("e.g. SEO Agent"), {
|
||||
target: { value: "BYOK Agent" },
|
||||
});
|
||||
|
||||
// The credential field IS rendered for BYOK...
|
||||
await waitFor(() => expect(screen.getByLabelText("OPENAI_API_KEY")).toBeTruthy());
|
||||
|
||||
const createBtn = screen.getAllByRole("button").find((b) => b.textContent === "Create");
|
||||
fireEvent.click(createBtn!);
|
||||
|
||||
// ...and create stays blocked until it's filled.
|
||||
await waitFor(() =>
|
||||
expect(screen.getByText("Provider credential is required")).toBeTruthy(),
|
||||
);
|
||||
expect(mockPost).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import { useCallback, useEffect, useRef, useState } from "react";
|
||||
import { api } from "@/lib/api";
|
||||
import type RFB from "@novnc/novnc";
|
||||
|
||||
@@ -33,6 +33,11 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
const [controlBusy, setControlBusy] = useState(false);
|
||||
const [sessionUrl, setSessionUrl] = useState<string | null>(null);
|
||||
const requestGeneration = useRef(0);
|
||||
// Freshest signed session URL (token bound to the lease's expires_at). The
|
||||
// renewal timer keeps this current WITHOUT swapping the live stream's
|
||||
// sessionUrl (which would needlessly reconnect the desktop); the stream uses
|
||||
// it only when it has to reconnect after an unclean drop.
|
||||
const latestSessionUrlRef = useRef<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
const generation = requestGeneration.current + 1;
|
||||
@@ -41,6 +46,7 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
setStatus(null);
|
||||
setControl(null);
|
||||
setSessionUrl(null);
|
||||
latestSessionUrlRef.current = null;
|
||||
setError(null);
|
||||
setControlError(null);
|
||||
setControlBusy(false);
|
||||
@@ -69,6 +75,41 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
};
|
||||
}, [workspaceId]);
|
||||
|
||||
// Acquire (or re-acquire) the display-control lease as the current holder.
|
||||
// Re-acquiring extends the 300s server-side lock AND returns a freshly-signed
|
||||
// session URL (token bound to the new expires_at). Used both to renew the
|
||||
// lease on a timer and to mint a non-stale token for each reconnect — a
|
||||
// cached URL can be past its ~300s expiry, which would make a reconnect 401.
|
||||
const reacquireSession = useCallback(async (): Promise<string | null> => {
|
||||
const generation = requestGeneration.current;
|
||||
try {
|
||||
const next = await api.post<DisplayControlStatus>(
|
||||
`/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ controller: "user", ttl_seconds: 300 },
|
||||
);
|
||||
if (requestGeneration.current !== generation) return null;
|
||||
setControl(next);
|
||||
if (next.session_url) latestSessionUrlRef.current = next.session_url;
|
||||
return next.session_url ?? null;
|
||||
} catch {
|
||||
// Transient failure, or another holder took over: the live stream keeps
|
||||
// running on its existing connection; a reconnect re-evaluates control.
|
||||
return null;
|
||||
}
|
||||
}, [workspaceId]);
|
||||
|
||||
// Renew the lease while we hold it. The lock is a 300s lease with no
|
||||
// server-side auto-renewal, so without this the control (and the session
|
||||
// token) silently expire mid-session — the user appears "kicked" every ~5
|
||||
// minutes. We renew well inside the TTL and do not touch the live stream.
|
||||
useEffect(() => {
|
||||
if (!sessionUrl) return;
|
||||
const timer = setInterval(() => {
|
||||
void reacquireSession();
|
||||
}, 120_000);
|
||||
return () => clearInterval(timer);
|
||||
}, [sessionUrl, reacquireSession]);
|
||||
|
||||
const acquireControl = async () => {
|
||||
const generation = requestGeneration.current;
|
||||
const controlPath = `/workspaces/${workspaceId}/display/control`;
|
||||
@@ -82,6 +123,7 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControl(next);
|
||||
setSessionUrl(next.session_url || null);
|
||||
latestSessionUrlRef.current = next.session_url || null;
|
||||
} catch (err) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControlError("Failed to take control");
|
||||
@@ -108,6 +150,7 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControl(next);
|
||||
setSessionUrl(null);
|
||||
latestSessionUrlRef.current = null;
|
||||
} catch (err) {
|
||||
if (requestGeneration.current !== generation) return;
|
||||
setControlError("Failed to release control");
|
||||
@@ -235,7 +278,11 @@ export function DisplayTab({ workspaceId }: Props) {
|
||||
/>
|
||||
</div>
|
||||
{sessionUrl ? (
|
||||
<DesktopStream sessionUrl={sessionUrl} />
|
||||
<DesktopStream
|
||||
sessionUrl={sessionUrl}
|
||||
latestSessionUrlRef={latestSessionUrlRef}
|
||||
reacquireSession={reacquireSession}
|
||||
/>
|
||||
) : (
|
||||
<div className="flex flex-1 items-center justify-center p-8 text-center">
|
||||
<div>
|
||||
@@ -311,7 +358,15 @@ function DisplayControlBar({
|
||||
);
|
||||
}
|
||||
|
||||
function DesktopStream({ sessionUrl }: { sessionUrl: string }) {
|
||||
function DesktopStream({
|
||||
sessionUrl,
|
||||
latestSessionUrlRef,
|
||||
reacquireSession,
|
||||
}: {
|
||||
sessionUrl: string;
|
||||
latestSessionUrlRef: { current: string | null };
|
||||
reacquireSession: () => Promise<string | null>;
|
||||
}) {
|
||||
const containerRef = useRef<HTMLDivElement | null>(null);
|
||||
const rfbRef = useRef<RFB | null>(null);
|
||||
const [streamError, setStreamError] = useState<string | null>(null);
|
||||
@@ -329,20 +384,37 @@ function DesktopStream({ sessionUrl }: { sessionUrl: string }) {
|
||||
clipboardTimer = setTimeout(() => setClipboardStatus(null), 2500);
|
||||
};
|
||||
|
||||
async function connect() {
|
||||
let attempts = 0;
|
||||
let retryTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
const maxAttempts = 10;
|
||||
|
||||
async function connect(reacquire = false) {
|
||||
setStreamError(null);
|
||||
try {
|
||||
// On a reconnect, mint a fresh lease + token first — the original token
|
||||
// is only ~300s, so a cached URL can be expired and would 401. The
|
||||
// initial connect already holds a fresh token from acquireControl.
|
||||
if (reacquire) await reacquireSession();
|
||||
const mod = await import("@novnc/novnc");
|
||||
if (cancelled || !containerRef.current) return;
|
||||
const stream = displayWebSocketConnection(sessionUrl);
|
||||
const stream = displayWebSocketConnection(latestSessionUrlRef.current || sessionUrl);
|
||||
rfb = new mod.default(containerRef.current, stream.url, {
|
||||
wsProtocols: ["binary", `molecule-display-token.${stream.token}`],
|
||||
});
|
||||
rfbRef.current = rfb;
|
||||
rfb.scaleViewport = true;
|
||||
rfb.resizeSession = true;
|
||||
// Do NOT request a server-side resize: the workspace display runs a
|
||||
// fixed Xorg modeline and x11vnc rejects SetDesktopSize ("Resize is
|
||||
// administratively prohibited"), which spams the console on every
|
||||
// (re)connect. scaleViewport already fits the fixed framebuffer to the
|
||||
// container client-side, so we don't need the server to resize.
|
||||
rfb.resizeSession = false;
|
||||
rfb.focusOnClick = true;
|
||||
rfb.focus({ preventScroll: true });
|
||||
rfb.addEventListener("connect", () => {
|
||||
attempts = 0;
|
||||
if (!cancelled) setStreamError(null);
|
||||
});
|
||||
rfb.addEventListener("clipboard", (event: Event) => {
|
||||
const text = (event as CustomEvent<{ text?: string }>).detail?.text ?? "";
|
||||
if (!text) return;
|
||||
@@ -353,7 +425,20 @@ function DesktopStream({ sessionUrl }: { sessionUrl: string }) {
|
||||
});
|
||||
rfb.addEventListener("disconnect", (event: Event) => {
|
||||
const detail = (event as CustomEvent<{ clean?: boolean }>).detail;
|
||||
if (!cancelled && !detail?.clean) setStreamError("Desktop stream disconnected.");
|
||||
rfbRef.current = null;
|
||||
if (cancelled || detail?.clean) return;
|
||||
// Auto-reconnect after an unclean drop (idle/network blip, brief
|
||||
// agent hiccup); bounded backoff so a genuinely-dead session still
|
||||
// surfaces an error instead of looping forever.
|
||||
if (attempts < maxAttempts) {
|
||||
attempts += 1;
|
||||
setStreamError(`Reconnecting to desktop… (attempt ${attempts})`);
|
||||
retryTimer = setTimeout(() => {
|
||||
if (!cancelled) void connect(true);
|
||||
}, Math.min(1000 * attempts, 5000));
|
||||
} else {
|
||||
setStreamError("Desktop stream disconnected.");
|
||||
}
|
||||
});
|
||||
} catch {
|
||||
if (!cancelled) setStreamError("Desktop stream could not be opened.");
|
||||
@@ -363,11 +448,12 @@ function DesktopStream({ sessionUrl }: { sessionUrl: string }) {
|
||||
connect();
|
||||
return () => {
|
||||
cancelled = true;
|
||||
if (retryTimer) clearTimeout(retryTimer);
|
||||
if (clipboardTimer) clearTimeout(clipboardTimer);
|
||||
rfbRef.current = null;
|
||||
rfb?.disconnect();
|
||||
};
|
||||
}, [sessionUrl]);
|
||||
}, [sessionUrl, reacquireSession, latestSessionUrlRef]);
|
||||
|
||||
useEffect(() => {
|
||||
const onPaste = (event: ClipboardEvent) => {
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { cleanup, fireEvent, render, screen, waitFor } from "@testing-library/react";
|
||||
|
||||
const { mockGet, mockPost, mockRFBConstructor, mockRFBClipboardPasteFrom, mockRFBFocus } = vi.hoisted(() => ({
|
||||
const { mockGet, mockPost, mockRFBConstructor, mockRFBClipboardPasteFrom, mockRFBFocus, rfbInstances } = vi.hoisted(() => ({
|
||||
mockGet: vi.fn(),
|
||||
mockPost: vi.fn(),
|
||||
mockRFBConstructor: vi.fn(),
|
||||
mockRFBClipboardPasteFrom: vi.fn(),
|
||||
mockRFBFocus: vi.fn(),
|
||||
rfbInstances: [] as EventTarget[],
|
||||
}));
|
||||
|
||||
vi.mock("@/lib/api", () => ({
|
||||
@@ -31,6 +32,7 @@ vi.mock("@novnc/novnc", () => ({
|
||||
this.url = url;
|
||||
this.options = options;
|
||||
mockRFBConstructor(target, url, options);
|
||||
rfbInstances.push(this);
|
||||
}
|
||||
clipboardPasteFrom(text: string) {
|
||||
mockRFBClipboardPasteFrom(text);
|
||||
@@ -52,6 +54,7 @@ describe("DisplayTab", () => {
|
||||
mockRFBConstructor.mockReset();
|
||||
mockRFBClipboardPasteFrom.mockReset();
|
||||
mockRFBFocus.mockReset();
|
||||
rfbInstances.length = 0;
|
||||
});
|
||||
|
||||
it("renders unavailable state for non-display workspaces", async () => {
|
||||
@@ -400,6 +403,62 @@ describe("DisplayTab", () => {
|
||||
});
|
||||
expect(screen.getByRole("button", { name: "Take control" })).toBeTruthy();
|
||||
});
|
||||
|
||||
it("auto-reconnects the desktop stream after an unclean disconnect but not a clean one", async () => {
|
||||
mockGet
|
||||
.mockResolvedValueOnce({
|
||||
available: true,
|
||||
mode: "desktop-control",
|
||||
protocol: "novnc",
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
})
|
||||
.mockResolvedValueOnce({ controller: "none" });
|
||||
// Initial acquire returns token "signed"; the reconnect re-acquire mints a
|
||||
// FRESH token "signed2" (the lock/token is only ~300s — reconnecting with a
|
||||
// cached, possibly-expired token would 401 and never recover).
|
||||
mockPost
|
||||
.mockResolvedValueOnce({
|
||||
controller: "user",
|
||||
controlled_by: "admin-token",
|
||||
expires_at: "2026-05-23T08:48:27Z",
|
||||
session_url: "/workspaces/ws-display/display/session/websockify#token=signed",
|
||||
})
|
||||
.mockResolvedValue({
|
||||
controller: "user",
|
||||
controlled_by: "admin-token",
|
||||
expires_at: "2026-05-23T08:53:27Z",
|
||||
session_url: "/workspaces/ws-display/display/session/websockify#token=signed2",
|
||||
});
|
||||
|
||||
render(<DisplayTab workspaceId="ws-display" />);
|
||||
await waitFor(() => {
|
||||
expect(screen.getByRole("button", { name: "Take control" })).toBeTruthy();
|
||||
});
|
||||
fireEvent.click(screen.getByRole("button", { name: "Take control" }));
|
||||
await waitFor(() => {
|
||||
expect(rfbInstances.length).toBe(1);
|
||||
});
|
||||
expect(mockRFBConstructor.mock.calls[0][2].wsProtocols).toContain("molecule-display-token.signed");
|
||||
|
||||
// An idle/network drop closes the websocket uncleanly. The client must
|
||||
// re-acquire a fresh token and reconnect instead of giving up — this is the
|
||||
// "disconnects every ~5 min and stays dead" report.
|
||||
rfbInstances[0].dispatchEvent(new CustomEvent("disconnect", { detail: { clean: false } }));
|
||||
await waitFor(
|
||||
() => {
|
||||
expect(rfbInstances.length).toBe(2);
|
||||
},
|
||||
{ timeout: 3000 },
|
||||
);
|
||||
// Reconnect dialed with the FRESH token, not the stale original.
|
||||
expect(mockRFBConstructor.mock.calls[1][2].wsProtocols).toContain("molecule-display-token.signed2");
|
||||
|
||||
// A clean disconnect (the user released control) must NOT reconnect.
|
||||
rfbInstances[1].dispatchEvent(new CustomEvent("disconnect", { detail: { clean: true } }));
|
||||
await new Promise((resolve) => setTimeout(resolve, 1100));
|
||||
expect(rfbInstances.length).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
function deferred<T>() {
|
||||
|
||||
+25
-8
@@ -159,15 +159,28 @@ services:
|
||||
|
||||
# --- Canvas ---
|
||||
canvas:
|
||||
# The publish-canvas-image CI workflow pushes a fresh image to GHCR on
|
||||
# every canvas/** merge to main. To update the running container:
|
||||
# docker compose pull canvas && docker compose up -d canvas
|
||||
# First-time local setup or testing unreleased changes — build from source:
|
||||
# docker compose build canvas && docker compose up -d canvas
|
||||
# The publish-canvas-image CI workflow runs an ORDERED deploy (core#2226):
|
||||
# build → push :staging-<sha> + :staging-latest → (after green main CI)
|
||||
# re-point :latest to the verified :staging-<sha> by digest. So both tags
|
||||
# below resolve to a CI-green, reproducible build, never a raw/red one.
|
||||
#
|
||||
# Reproducible deploy: pin CANVAS_IMAGE_TAG to the immutable per-commit tag
|
||||
# the ordered deploy produced, e.g.
|
||||
# CANVAS_IMAGE_TAG=staging-<sha> docker compose pull canvas && docker compose up -d canvas
|
||||
# This makes a tenant/host deploy reproducible (resolves the standing
|
||||
# `TODO: pin canvas ECR image digest`). Unset it and the default `latest`
|
||||
# is the prod-blessed tag the ordered deploy keeps pointed at the last
|
||||
# green build — still deterministic vs. the old raw `:latest`.
|
||||
#
|
||||
# To pin by content digest instead of tag (fully immutable):
|
||||
# aws ecr describe-images --repository-name molecule-ai/canvas \
|
||||
# --image-tags staging-<sha> --region us-east-2 \
|
||||
# --query 'imageDetails[0].imageDigest' --output text
|
||||
# then set CANVAS_IMAGE_TAG=staging-<sha>@<digest> (compose passes it through).
|
||||
#
|
||||
# Note: ECR images require AWS auth — `aws ecr get-login-password --region us-east-2 | docker login --username AWS --password-stdin 153263036946.dkr.ecr.us-east-2.amazonaws.com` before pull.
|
||||
# Digest-pin requires: aws ecr describe-images --repository-name molecule-ai/canvas --image-tags latest --query 'imageDetails[0].imageDigest'
|
||||
# TODO: pin canvas ECR image digest once AWS creds are available in CI.
|
||||
image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:latest
|
||||
# Local dev keeps working via the `build:` context below (docker compose build canvas).
|
||||
image: 153263036946.dkr.ecr.us-east-2.amazonaws.com/molecule-ai/canvas:${CANVAS_IMAGE_TAG:-latest}
|
||||
build:
|
||||
context: ./canvas
|
||||
dockerfile: Dockerfile
|
||||
@@ -175,6 +188,10 @@ services:
|
||||
NEXT_PUBLIC_PLATFORM_URL: ${NEXT_PUBLIC_PLATFORM_URL:-http://localhost:${PLATFORM_PUBLISH_PORT:-8080}}
|
||||
NEXT_PUBLIC_WS_URL: ${NEXT_PUBLIC_WS_URL:-ws://localhost:${PLATFORM_PUBLISH_PORT:-8080}/ws}
|
||||
NEXT_PUBLIC_ADMIN_TOKEN: ${ADMIN_TOKEN:-}
|
||||
# SHA surfaced at /api/buildinfo (core#2235). CI passes the real merge
|
||||
# SHA via the publish-canvas-image workflow build-args; local compose
|
||||
# builds default to "dev" (the route's unwired sentinel).
|
||||
BUILD_SHA: ${BUILD_SHA:-dev}
|
||||
depends_on:
|
||||
platform:
|
||||
condition: service_healthy
|
||||
|
||||
+1
-3
@@ -28,9 +28,7 @@
|
||||
{"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "main"},
|
||||
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||
|
||||
Executable
+76
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/env bash
|
||||
# check-manifest-repos-exist.sh — fail-fast guard: verify every repo listed in
|
||||
# manifest.json actually exists on Gitea before the expensive clone step runs.
|
||||
#
|
||||
# WHY: deleting an org-template/workspace-template repo that is still listed in
|
||||
# manifest.json breaks clone-manifest.sh with a generic git 404 error. The
|
||||
# failure is deep in the publish-workspace-server-image workflow and looks like
|
||||
# a transient network issue, wasting debug time. This script surfaces the
|
||||
# problem immediately with a per-entry ::error:: annotation naming the missing
|
||||
# repo (issue #2192).
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/check-manifest-repos-exist.sh <manifest.json>
|
||||
#
|
||||
# Exit:
|
||||
# 0 all repos exist
|
||||
# 1 one or more repos 404 (printed to stderr)
|
||||
# 2 bad usage / missing inputs
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
MANIFEST="${1:-manifest.json}"
|
||||
GITEA_API="${GITEA_API:-https://git.moleculesai.app/api/v1/repos}"
|
||||
|
||||
if [ ! -f "$MANIFEST" ]; then
|
||||
echo "::error::manifest not found: $MANIFEST" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Strip JSON5-style // comments before parsing (same as clone-manifest.sh)
|
||||
_strip_comments() {
|
||||
sed 's/^[[:space:]]*\/\/.*//' "$MANIFEST"
|
||||
}
|
||||
|
||||
MANIFEST_JSON="$(_strip_comments)"
|
||||
|
||||
MISSING=0
|
||||
TOTAL=0
|
||||
|
||||
# Categories to check — must match clone-manifest.sh categories
|
||||
check_category() {
|
||||
local category="$1"
|
||||
local count
|
||||
count=$(echo "$MANIFEST_JSON" | jq -r ".${category} | length")
|
||||
|
||||
local i=0
|
||||
while [ "$i" -lt "$count" ]; do
|
||||
local name repo
|
||||
name=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].name")
|
||||
repo=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].repo")
|
||||
TOTAL=$((TOTAL + 1))
|
||||
|
||||
# Check repo existence via Gitea API (public endpoint, no auth needed)
|
||||
http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "${GITEA_API}/${repo}" 2>/dev/null || true)
|
||||
|
||||
if [ "$http_code" != "200" ]; then
|
||||
echo "::error::manifest.json ${category} entry '${name}' → repo '${repo}' returned HTTP ${http_code} (expected 200). Delete the manifest entry BEFORE deleting the repo." >&2
|
||||
MISSING=$((MISSING + 1))
|
||||
fi
|
||||
|
||||
i=$((i + 1))
|
||||
done
|
||||
}
|
||||
|
||||
echo "==> Checking manifest repo existence against ${GITEA_API} ..."
|
||||
check_category "plugins"
|
||||
check_category "workspace_templates"
|
||||
check_category "org_templates"
|
||||
|
||||
if [ "$MISSING" -gt 0 ]; then
|
||||
echo "::error::${MISSING}/${TOTAL} manifest entries are missing — fix manifest.json before publishing." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✓ All ${TOTAL} manifest entries resolved (HTTP 200)."
|
||||
exit 0
|
||||
@@ -234,9 +234,30 @@ elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os;k=os.environ['E2E_OPENAI_API_KEY'];print(json.dumps({'OPENAI_API_KEY':k,'OPENAI_BASE_URL':'https://api.openai.com/v1','MODEL_PROVIDER':'openai:gpt-4o','HERMES_INFERENCE_PROVIDER':'custom','HERMES_CUSTOM_BASE_URL':'https://api.openai.com/v1','HERMES_CUSTOM_API_KEY':k,'HERMES_CUSTOM_API_MODE':'chat_completions'}))")
|
||||
fi
|
||||
|
||||
# Workspace-create now enforces the MODEL_REQUIRED contract: there is NO
|
||||
# platform-side default model for a runtime (feedback_workspace_model_required_
|
||||
# no_platform_default). Every create MUST carry an explicit `model`, or the CP
|
||||
# rejects it with MODEL_REQUIRED before this gate's peer-visibility assertion
|
||||
# can run. We pick a PLATFORM-MANAGED id (Molecule owns billing — no tenant key
|
||||
# needed; this gate only needs the workspace to boot + list peers, not heavy
|
||||
# LLM work), validated against the controlplane providers SSOT
|
||||
# (internal/providers/providers.yaml runtimes.<rt>.providers[platform].models):
|
||||
# claude-code → anthropic/claude-sonnet-4-6 (platform claude model)
|
||||
# hermes/openclaw → moonshot/kimi-k2.6 (their only platform family)
|
||||
# E2E_MODEL_SLUG overrides for operator-dispatched runs.
|
||||
pv_platform_model_for_runtime() {
|
||||
if [ -n "${E2E_MODEL_SLUG:-}" ]; then printf '%s' "$E2E_MODEL_SLUG"; return 0; fi
|
||||
case "$1" in
|
||||
claude-code) printf 'anthropic/claude-sonnet-4-6' ;;
|
||||
hermes|openclaw) printf 'moonshot/kimi-k2.6' ;;
|
||||
*) printf 'moonshot/kimi-k2.6' ;;
|
||||
esac
|
||||
}
|
||||
|
||||
log "4/6 provisioning parent (claude-code) + one sibling per runtime under test..."
|
||||
PARENT_MODEL=$(pv_platform_model_for_runtime claude-code)
|
||||
P_RESP=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"pv-parent\",\"runtime\":\"claude-code\",\"tier\":3,\"secrets\":$SECRETS_JSON}")
|
||||
-d "{\"name\":\"pv-parent\",\"runtime\":\"claude-code\",\"model\":\"$PARENT_MODEL\",\"tier\":3,\"secrets\":$SECRETS_JSON}")
|
||||
PARENT_ID=$(echo "$P_RESP" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
[ -n "$PARENT_ID" ] || fail "parent create failed: $(echo "$P_RESP" | head -c 300)"
|
||||
log " PARENT_ID=$PARENT_ID"
|
||||
@@ -245,8 +266,9 @@ log " PARENT_ID=$PARENT_ID"
|
||||
declare -A WS_IDS WS_TOKENS
|
||||
ALL_WS_IDS="$PARENT_ID"
|
||||
for rt in $PV_RUNTIMES; do
|
||||
RT_MODEL=$(pv_platform_model_for_runtime "$rt")
|
||||
R=$(tenant_call POST /workspaces \
|
||||
-d "{\"name\":\"pv-$rt\",\"runtime\":\"$rt\",\"tier\":2,\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
-d "{\"name\":\"pv-$rt\",\"runtime\":\"$rt\",\"model\":\"$RT_MODEL\",\"tier\":2,\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
WID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null)
|
||||
WTOK=$(echo "$R" | extract_auth_token)
|
||||
[ -n "$WID" ] || fail "$rt workspace create failed: $(printf '%s' "$R" | head -c 300)"
|
||||
|
||||
+493
@@ -0,0 +1,493 @@
|
||||
#!/usr/bin/env bash
|
||||
# Live staging E2E — the CP instance-state reconciler heals a terminated EC2.
|
||||
#
|
||||
# Real-infra complement to the deterministic unit tests for core#2261
|
||||
# (workspace-server/internal/registry/cp_instance_reconciler.go). Those unit
|
||||
# tests pin the reconcile logic against fakes; THIS script proves the loop
|
||||
# actually runs in a real tenant's workspace-server and drives the EXISTING
|
||||
# offline + auto-heal machinery against real AWS.
|
||||
#
|
||||
# Root regression (core#2247): a SaaS workspace whose EC2 is terminated out
|
||||
# from under the platform (manual AWS action, spot reclaim, CP reap) fell
|
||||
# through every existing liveness pass and kept reading status='online'
|
||||
# forever, pointing at a dead instance. The reconciler closes that gap with
|
||||
# CPProvisioner.IsRunning and feeds a clean "not running" into onOffline →
|
||||
# RestartByID (existing-volume reprovision).
|
||||
#
|
||||
# What this test does:
|
||||
# 1. Provision a fresh staging org + ONE workspace (same default
|
||||
# runtime/model as the full-saas harness, so it actually boots).
|
||||
# 2. Poll the tenant API until the workspace is status=online; capture its
|
||||
# instance_id.
|
||||
# 3. KILL it — terminate that exact EC2 via `aws ec2 terminate-instances`.
|
||||
# 4. Assert the reconciler heals it:
|
||||
# PRIMARY (gate) — within ~180s the workspace status LEAVES
|
||||
# 'online' (the reconciler detected the dead
|
||||
# instance via IsRunning and flipped it). This
|
||||
# is the core regression guard: a dead instance
|
||||
# must NOT keep reading 'online'.
|
||||
# SECONDARY (best-effort) — within ~10 min it auto-reprovisions:
|
||||
# status returns to 'online' with a NEW
|
||||
# instance_id (onOffline → RestartByID
|
||||
# existing-volume heal). If reprovision doesn't
|
||||
# finish in the bound we log it clearly but let
|
||||
# the PRIMARY assertion stand as the gate (see
|
||||
# the comment at the secondary block — a future
|
||||
# tightening that promotes this to a hard gate is
|
||||
# deliberately one edit away).
|
||||
# 5. Teardown ALWAYS (EXIT trap): delete the tenant + leak-sweep so no EC2
|
||||
# is orphaned, even on a mid-test failure.
|
||||
#
|
||||
# Auth model + provisioning conventions are copied verbatim from
|
||||
# test_staging_full_saas.sh (single MOLECULE_ADMIN_TOKEN → CP admin; per-
|
||||
# tenant admin token + X-Molecule-Org-Id header for tenant API). The kill
|
||||
# primitive + leak sweep reuse lib/aws_leak_check.sh.
|
||||
#
|
||||
# Required env:
|
||||
# MOLECULE_CP_URL default: https://staging-api.moleculesai.app
|
||||
# MOLECULE_ADMIN_TOKEN CP admin bearer — Railway staging CP_ADMIN_API_TOKEN
|
||||
#
|
||||
# Optional env (mirrors the full-saas harness where they overlap):
|
||||
# E2E_RUNTIME claude-code (default)
|
||||
# E2E_PROVISION_TIMEOUT_SECS default 900 (cold EC2 budget)
|
||||
# E2E_WORKSPACE_ONLINE_TIMEOUT_SECS default 3600 (cold-boot worst-case)
|
||||
# E2E_RECONCILE_OFFLINE_TIMEOUT_SECS default 180 (PRIMARY: leave 'online'.
|
||||
# Reconciler cadence is 60s — 3 cycles +
|
||||
# AWS terminate-visibility slack.)
|
||||
# E2E_REPROVISION_TIMEOUT_SECS default 600 (SECONDARY: back to online
|
||||
# with a NEW instance_id)
|
||||
# E2E_MINIMAX_API_KEY / E2E_ANTHROPIC_API_KEY / E2E_OPENAI_API_KEY
|
||||
# LLM key (same priority chain as
|
||||
# full-saas; needed so the FIRST boot
|
||||
# reaches online). Empty → '{}' (the
|
||||
# workspace still boots online; the LLM
|
||||
# key only matters for a completion,
|
||||
# which this test never makes).
|
||||
# E2E_KEEP_ORG 1 → skip teardown (debugging only)
|
||||
# E2E_RUN_ID Slug suffix; CI: ${GITHUB_RUN_ID}
|
||||
# E2E_AWS_LEAK_CHECK auto (default) | required | off
|
||||
# E2E_AWS_TERMINATE_LEAKS 1 → terminate slug-tagged leaked EC2 at
|
||||
# teardown
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 happy path (PRIMARY assertion held; SECONDARY logged either way)
|
||||
# 1 generic failure (incl. PRIMARY assertion failed = regression)
|
||||
# 2 missing required env
|
||||
# 3 provisioning timed out
|
||||
# 4 teardown left orphan resources
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
CP_URL="${MOLECULE_CP_URL:-https://staging-api.moleculesai.app}"
|
||||
ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway staging CP_ADMIN_API_TOKEN}"
|
||||
RUNTIME="${E2E_RUNTIME:-claude-code}"
|
||||
PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
|
||||
WORKSPACE_ONLINE_TIMEOUT_SECS="${E2E_WORKSPACE_ONLINE_TIMEOUT_SECS:-3600}"
|
||||
# PRIMARY bound: the reconciler ticks every 60s; it needs one cycle to see
|
||||
# the dead instance after AWS makes the terminate visible to DescribeInstances
|
||||
# (typically seconds, but can lag). 180s = ~3 cycles + slack.
|
||||
RECONCILE_OFFLINE_TIMEOUT_SECS="${E2E_RECONCILE_OFFLINE_TIMEOUT_SECS:-180}"
|
||||
# SECONDARY bound: full existing-volume reprovision (new EC2 boot + agent
|
||||
# bootstrap) is a multi-minute cold path.
|
||||
REPROVISION_TIMEOUT_SECS="${E2E_REPROVISION_TIMEOUT_SECS:-600}"
|
||||
RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
|
||||
|
||||
# Slug MUST start with e2e- so sweep-stale-e2e-orgs.yml reaps any orphan this
|
||||
# run leaks (lint_cleanup_traps.sh enforces the e2e-/rt-e2e- prefix for any
|
||||
# staging tenant E2E; we honour it here too even though our filename isn't
|
||||
# *staging*).
|
||||
SLUG="e2e-rec-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
|
||||
SLUG=$(echo "$SLUG" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-' | head -c 32)
|
||||
|
||||
log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
# Per-runtime model slug dispatch — shared with the full-saas harness.
|
||||
# shellcheck disable=SC1091
|
||||
# shellcheck source=lib/model_slug.sh
|
||||
source "$(dirname "$0")/lib/model_slug.sh"
|
||||
# AWS kill primitive + leak sweep (e2e_aws_region / e2e_ec2_instances_for_slug /
|
||||
# e2e_terminate_instances / e2e_verify_no_ec2_leaks_for_slug).
|
||||
# shellcheck disable=SC1091
|
||||
# shellcheck source=lib/aws_leak_check.sh
|
||||
source "$(dirname "$0")/lib/aws_leak_check.sh"
|
||||
|
||||
CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
|
||||
# ─── cleanup trap ───────────────────────────────────────────────────────
|
||||
# Identical teardown contract to test_staging_full_saas.sh: delete the
|
||||
# tenant (synchronous GDPR cascade), poll for the org row to disappear, then
|
||||
# assert no slug-tagged EC2 survives. A leaked resource at teardown is a CI
|
||||
# failure (exit 4). The trap is installed UP-FRONT so a mid-test failure
|
||||
# (including a failed PRIMARY assertion) still cleans up.
|
||||
CLEANUP_DONE=0
|
||||
cleanup_org() {
|
||||
# Capture upstream exit code IMMEDIATELY — must be the first statement in
|
||||
# the trap, before any command (including the CLEANUP_DONE check) clobbers $?.
|
||||
local entry_rc=$?
|
||||
|
||||
if [ "$CLEANUP_DONE" = "1" ]; then return 0; fi
|
||||
CLEANUP_DONE=1
|
||||
|
||||
if [ "${E2E_KEEP_ORG:-0}" = "1" ]; then
|
||||
log "E2E_KEEP_ORG=1 — skipping teardown. Manually delete $SLUG when done."
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "🧹 Tearing down org $SLUG..."
|
||||
|
||||
# 120s curl budget for the synchronous DELETE cascade (EC2 terminate alone
|
||||
# is 30-60s), then poll up to 60s for organizations.status='purged'/gone.
|
||||
if curl "${CURL_COMMON[@]}" --max-time 120 -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"confirm\":\"$SLUG\"}" >/dev/null 2>&1; then
|
||||
ok "Teardown request accepted"
|
||||
else
|
||||
log "Teardown returned non-2xx (may already be gone)"
|
||||
fi
|
||||
|
||||
local leak_count=1
|
||||
local elapsed=0
|
||||
while [ "$elapsed" -lt 60 ]; do
|
||||
leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
|
||||
| python3 -c "import json,sys; d=json.load(sys.stdin); print(sum(1 for o in d.get('orgs', []) if o.get('slug')=='$SLUG' and o.get('status') != 'purged'))" \
|
||||
2>/dev/null || echo 1)
|
||||
if [ "$leak_count" = "0" ]; then
|
||||
break
|
||||
fi
|
||||
sleep 5
|
||||
elapsed=$((elapsed + 5))
|
||||
done
|
||||
|
||||
if [ "$leak_count" != "0" ]; then
|
||||
echo "⚠️ LEAK: org $SLUG still present post-teardown after ${elapsed}s (count=$leak_count)" >&2
|
||||
exit 4
|
||||
fi
|
||||
local aws_leak_rc=0
|
||||
e2e_verify_no_ec2_leaks_for_slug "$SLUG" || aws_leak_rc=$?
|
||||
if [ "$aws_leak_rc" != "0" ]; then
|
||||
case "$aws_leak_rc" in
|
||||
2) exit 2 ;;
|
||||
*) exit 4 ;;
|
||||
esac
|
||||
fi
|
||||
ok "Teardown clean — no orphan org or EC2 resources for $SLUG (${elapsed}s)"
|
||||
|
||||
# Normalize unexpected upstream exit codes to 1 — `set -e` propagates the
|
||||
# raw exit code of the failing command (e.g. curl exits 22 under
|
||||
# --fail-with-body), but this script's contract only emits {0,1,2,3,4}.
|
||||
case "$entry_rc" in
|
||||
0|1|2|3|4) ;;
|
||||
*) exit 1 ;;
|
||||
esac
|
||||
}
|
||||
trap cleanup_org EXIT INT TERM
|
||||
|
||||
# ─── 0. Preflight ───────────────────────────────────────────────────────
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
log " Staging reconciler-heals-terminated-instance E2E (core#2261)"
|
||||
log " CP: $CP_URL"
|
||||
log " Slug: $SLUG"
|
||||
log " Runtime: $RUNTIME"
|
||||
log " Online timeout: ${WORKSPACE_ONLINE_TIMEOUT_SECS}s"
|
||||
log " PRIMARY (offline): ${RECONCILE_OFFLINE_TIMEOUT_SECS}s"
|
||||
log " SECONDARY (reprov): ${REPROVISION_TIMEOUT_SECS}s"
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
|
||||
log "0/6 Preflight: CP reachable?"
|
||||
curl "${CURL_COMMON[@]}" "$CP_URL/health" >/dev/null || fail "CP health check failed"
|
||||
ok "CP reachable"
|
||||
|
||||
admin_call() {
|
||||
local method="$1"; shift
|
||||
local path="$1"; shift
|
||||
curl "${CURL_COMMON[@]}" -X "$method" "$CP_URL$path" \
|
||||
-H "Authorization: Bearer $ADMIN_TOKEN" \
|
||||
-H "Content-Type: application/json" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# ─── 1. Create org ──────────────────────────────────────────────────────
|
||||
log "1/6 Creating org $SLUG via /cp/admin/orgs..."
|
||||
CREATE_RESP=$(admin_call POST /cp/admin/orgs \
|
||||
-d "{\"slug\":\"$SLUG\",\"name\":\"E2E $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}")
|
||||
echo "$CREATE_RESP" | python3 -m json.tool >/dev/null || fail "Org create returned non-JSON: $CREATE_RESP"
|
||||
ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
[ -z "$ORG_ID" ] && fail "Org create response missing 'id': $CREATE_RESP"
|
||||
ok "Org created (id=$ORG_ID)"
|
||||
|
||||
# ─── 2. Wait for tenant provisioning ────────────────────────────────────
|
||||
log "2/6 Waiting for tenant provisioning (up to ${PROVISION_TIMEOUT_SECS}s)..."
|
||||
DEADLINE=$(( $(date +%s) + PROVISION_TIMEOUT_SECS ))
|
||||
LAST_STATUS=""
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$DEADLINE" ]; then
|
||||
fail "Tenant provisioning timed out after ${PROVISION_TIMEOUT_SECS}s (last: $LAST_STATUS)"
|
||||
fi
|
||||
LIST_JSON=$(admin_call GET /cp/admin/orgs 2>/dev/null || echo '{"orgs":[]}')
|
||||
# /cp/admin/orgs exposes 'instance_status' (org_instances.status), NOT 'status'.
|
||||
STATUS=$(echo "$LIST_JSON" | python3 -c "
|
||||
import json, sys
|
||||
d = json.load(sys.stdin)
|
||||
for o in d.get('orgs', []):
|
||||
if o.get('slug') == '$SLUG':
|
||||
print(o.get('instance_status', ''))
|
||||
sys.exit(0)
|
||||
print('')
|
||||
" 2>/dev/null || echo "")
|
||||
if [ "$STATUS" != "$LAST_STATUS" ]; then
|
||||
log " status → $STATUS"
|
||||
LAST_STATUS="$STATUS"
|
||||
fi
|
||||
case "$STATUS" in
|
||||
running) break ;;
|
||||
failed)
|
||||
log "── DIAGNOSTIC BURST (step 2 — tenant provisioning failed) ──"
|
||||
echo "$LIST_JSON" | python3 -c "
|
||||
import json, sys
|
||||
d = json.load(sys.stdin)
|
||||
for o in d.get('orgs', []):
|
||||
if o.get('slug') == '$SLUG':
|
||||
print(json.dumps(o, indent=2))
|
||||
sys.exit(0)
|
||||
print('(no org row found for slug=$SLUG — DB drift?)')
|
||||
" 2>&1 | sed 's/^/ /'
|
||||
log "── END DIAGNOSTIC ──"
|
||||
# Tenant provisioning failures are a CP-side fault, not a reconciler
|
||||
# regression — exit 3 (provisioning) to keep the signal honest.
|
||||
echo "[$(date +%H:%M:%S)] ❌ Tenant provisioning failed for $SLUG (see diagnostic above)" >&2
|
||||
exit 3
|
||||
;;
|
||||
*) sleep 15 ;;
|
||||
esac
|
||||
done
|
||||
ok "Tenant provisioning complete"
|
||||
|
||||
# Derive tenant domain from CP hostname (same logic as the full-saas harness).
|
||||
CP_HOST=$(echo "$CP_URL" | sed -E 's#^https?://##; s#/.*$##')
|
||||
case "$CP_HOST" in
|
||||
api.*) DERIVED_DOMAIN="${CP_HOST#api.}" ;;
|
||||
staging-api.*) DERIVED_DOMAIN="staging.${CP_HOST#staging-api.}" ;;
|
||||
*) DERIVED_DOMAIN="$CP_HOST" ;;
|
||||
esac
|
||||
TENANT_DOMAIN="${MOLECULE_TENANT_DOMAIN:-$DERIVED_DOMAIN}"
|
||||
TENANT_URL="https://$SLUG.$TENANT_DOMAIN"
|
||||
log " TENANT_URL=$TENANT_URL"
|
||||
|
||||
# ─── 3. Retrieve per-tenant admin token ────────────────────────────────
|
||||
log "3/6 Fetching per-tenant admin token..."
|
||||
TENANT_TOKEN_RESP=$(admin_call GET "/cp/admin/orgs/$SLUG/admin-token")
|
||||
TENANT_TOKEN=$(echo "$TENANT_TOKEN_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('admin_token',''))" 2>/dev/null || echo "")
|
||||
[ -z "$TENANT_TOKEN" ] && fail "Could not retrieve per-tenant admin token for $SLUG"
|
||||
ok "Tenant admin token retrieved (len=${#TENANT_TOKEN})"
|
||||
|
||||
# Wait for tenant TLS / DNS propagation before any tenant API call.
|
||||
log " Waiting for tenant TLS / DNS propagation..."
|
||||
TLS_DEADLINE=$(( $(date +%s) + 15 * 60 ))
|
||||
while true; do
|
||||
if curl -sSfk --max-time 5 "$TENANT_URL/health" >/dev/null 2>&1; then
|
||||
break
|
||||
fi
|
||||
if [ "$(date +%s)" -gt "$TLS_DEADLINE" ]; then
|
||||
fail "Tenant URL never responded 2xx on /health within 15m"
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
ok "Tenant reachable at $TENANT_URL"
|
||||
|
||||
tenant_call() {
|
||||
local method="$1"; shift
|
||||
local path="$1"; shift
|
||||
# X-Molecule-Org-Id is REQUIRED — the tenant guard 404s anything without it
|
||||
# (it does NOT 403, to hide tenant existence from org scanners).
|
||||
curl "${CURL_COMMON[@]}" -X "$method" "$TENANT_URL$path" \
|
||||
-H "Authorization: Bearer $TENANT_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
# Helper: read a single field off GET /workspaces/<id>. Echoes '' on any
|
||||
# error so callers can poll without `set -e` aborting on a transient blip.
|
||||
ws_field() {
|
||||
local wid="$1"; local field="$2"
|
||||
tenant_call GET "/workspaces/$wid" 2>/dev/null \
|
||||
| python3 -c "import json,sys; print(json.load(sys.stdin).get('$field') or '')" 2>/dev/null \
|
||||
|| echo ""
|
||||
}
|
||||
|
||||
# ─── 4. Provision ONE workspace ─────────────────────────────────────────
|
||||
# Same secrets-injection priority chain as the full-saas harness so the
|
||||
# FIRST boot reaches online. We never make a completion in this test (the
|
||||
# whole exercise is instance-state, not the LLM), so an absent key is
|
||||
# tolerable — but wiring the same keys keeps boot behaviour identical to the
|
||||
# sibling and avoids a config path that only this test would exercise.
|
||||
SECRETS_JSON='{}'
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os; print(json.dumps({'MINIMAX_API_KEY': os.environ['E2E_MINIMAX_API_KEY']}))")
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "import json,os; print(json.dumps({'ANTHROPIC_API_KEY': os.environ['E2E_ANTHROPIC_API_KEY']}))")
|
||||
elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_OPENAI_API_KEY']
|
||||
print(json.dumps({
|
||||
'OPENAI_API_KEY': k,
|
||||
'OPENAI_BASE_URL': 'https://api.openai.com/v1',
|
||||
'MODEL_PROVIDER': 'openai:gpt-4o',
|
||||
'HERMES_INFERENCE_PROVIDER': 'custom',
|
||||
'HERMES_CUSTOM_BASE_URL': 'https://api.openai.com/v1',
|
||||
'HERMES_CUSTOM_API_KEY': k,
|
||||
'HERMES_CUSTOM_API_MODE': 'chat_completions',
|
||||
}))
|
||||
")
|
||||
fi
|
||||
|
||||
MODEL_SLUG=$(pick_model_slug "$RUNTIME")
|
||||
log " MODEL_SLUG=$MODEL_SLUG"
|
||||
|
||||
log "4/6 Provisioning workspace (runtime=$RUNTIME)..."
|
||||
WS_RESP=$(tenant_call POST /workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"E2E Reconciler\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"secrets\":$SECRETS_JSON}")
|
||||
WS_ID=$(echo "$WS_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
|
||||
[ -z "$WS_ID" ] && fail "Workspace create response missing 'id': $WS_RESP"
|
||||
log " WS_ID=$WS_ID"
|
||||
|
||||
# Wait for the workspace to reach status=online and capture its instance_id.
|
||||
log " Waiting for workspace to reach status=online (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min)..."
|
||||
ONLINE_DEADLINE=$(( $(date +%s) + WORKSPACE_ONLINE_TIMEOUT_SECS ))
|
||||
ORIGINAL_INSTANCE_ID=""
|
||||
WS_LAST_STATUS=""
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$ONLINE_DEADLINE" ]; then
|
||||
WS_LAST_ERR=$(ws_field "$WS_ID" "last_sample_error")
|
||||
fail "Workspace $WS_ID never reached status=online within ${WORKSPACE_ONLINE_TIMEOUT_SECS}s (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR)"
|
||||
fi
|
||||
WS_STATUS=$(ws_field "$WS_ID" "status")
|
||||
if [ "$WS_STATUS" != "$WS_LAST_STATUS" ]; then
|
||||
log " $WS_ID → $WS_STATUS"
|
||||
WS_LAST_STATUS="$WS_STATUS"
|
||||
fi
|
||||
if [ "$WS_STATUS" = "online" ]; then
|
||||
ORIGINAL_INSTANCE_ID=$(ws_field "$WS_ID" "instance_id")
|
||||
if [ -n "$ORIGINAL_INSTANCE_ID" ]; then
|
||||
break
|
||||
fi
|
||||
# online but instance_id not surfaced yet — keep polling briefly.
|
||||
log " $WS_ID online but instance_id not populated yet — waiting"
|
||||
fi
|
||||
# 'failed' is transient on cold boot (bootstrap-watcher deadline vs heartbeat
|
||||
# recovery, cp#245). Keep polling; only the deadline hard-fails.
|
||||
sleep 10
|
||||
done
|
||||
ok "Workspace online (instance_id=$ORIGINAL_INSTANCE_ID)"
|
||||
|
||||
# ─── 5. Kill the EC2 ────────────────────────────────────────────────────
|
||||
# Terminate the EXACT instance the workspace reported. Prefer the captured
|
||||
# instance_id (precise — kills only this workspace's box); fall back to the
|
||||
# slug-tag describe if the API didn't surface an id (shouldn't happen — we
|
||||
# only break out of the online-wait once instance_id is non-empty).
|
||||
log "5/6 KILLING the workspace EC2 to simulate an out-of-band termination..."
|
||||
if ! e2e_aws_creds_available; then
|
||||
fail "AWS CLI/creds unavailable — cannot terminate the EC2 to exercise the reconciler. Set AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY (the CI workflow wires these)."
|
||||
fi
|
||||
AWS_REGION_RESOLVED=$(e2e_aws_region)
|
||||
if [ -n "$ORIGINAL_INSTANCE_ID" ]; then
|
||||
log " Terminating $ORIGINAL_INSTANCE_ID in $AWS_REGION_RESOLVED (aws ec2 terminate-instances)..."
|
||||
aws ec2 terminate-instances --region "$AWS_REGION_RESOLVED" --instance-ids "$ORIGINAL_INSTANCE_ID" >/dev/null \
|
||||
|| fail "aws ec2 terminate-instances failed for $ORIGINAL_INSTANCE_ID"
|
||||
KILLED_IDS="$ORIGINAL_INSTANCE_ID"
|
||||
else
|
||||
# Fallback path — find by slug tag and terminate.
|
||||
log " instance_id was empty — falling back to slug-tag describe ($SLUG)..."
|
||||
ROWS=$(e2e_ec2_instances_for_slug "$SLUG" 2>/dev/null || echo "")
|
||||
KILLED_IDS=$(echo "$ROWS" | awk 'NF {print $1}' | sort -u | tr '\n' ' ')
|
||||
[ -n "$KILLED_IDS" ] || fail "No slug-tagged EC2 found for $SLUG — nothing to terminate"
|
||||
log " Terminating $KILLED_IDS in $AWS_REGION_RESOLVED..."
|
||||
e2e_terminate_instances "$KILLED_IDS" || fail "terminate-instances failed for $KILLED_IDS"
|
||||
fi
|
||||
ok "Terminated EC2: $KILLED_IDS — reconciler should now detect the dead instance"
|
||||
|
||||
# ─── 6a. PRIMARY assertion — workspace leaves 'online' ─────────────────
|
||||
# This is THE regression gate for core#2261/#2247. The reconciler runs every
|
||||
# 60s in the tenant's workspace-server; when CPProvisioner.IsRunning returns a
|
||||
# clean "not running" for the terminated EC2, onOffline flips the row off
|
||||
# 'online'. A dead instance that keeps reading 'online' is exactly the bug.
|
||||
log "6a/6 PRIMARY: asserting workspace leaves 'online' within ${RECONCILE_OFFLINE_TIMEOUT_SECS}s (reconciler heal-detection)..."
|
||||
OFFLINE_DEADLINE=$(( $(date +%s) + RECONCILE_OFFLINE_TIMEOUT_SECS ))
|
||||
LEFT_ONLINE=0
|
||||
REC_LAST_STATUS=""
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$OFFLINE_DEADLINE" ]; then
|
||||
break
|
||||
fi
|
||||
REC_STATUS=$(ws_field "$WS_ID" "status")
|
||||
if [ "$REC_STATUS" != "$REC_LAST_STATUS" ]; then
|
||||
log " $WS_ID status → ${REC_STATUS:-<empty>}"
|
||||
REC_LAST_STATUS="$REC_STATUS"
|
||||
fi
|
||||
# Any non-online status (offline/provisioning/awaiting_agent/restarting/…)
|
||||
# proves the reconciler acted. We deliberately don't pin the exact target
|
||||
# status: onOffline flips offline AND kicks RestartByID, so the row may race
|
||||
# straight into a provisioning/restarting state — all of which are "no longer
|
||||
# falsely online".
|
||||
if [ -n "$REC_STATUS" ] && [ "$REC_STATUS" != "online" ]; then
|
||||
LEFT_ONLINE=1
|
||||
ok "PRIMARY held — workspace left 'online' (now '$REC_STATUS') after EC2 termination"
|
||||
break
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
||||
if [ "$LEFT_ONLINE" != "1" ]; then
|
||||
fail "PRIMARY FAILED (core#2261 regression): workspace $WS_ID still reads status=online ${RECONCILE_OFFLINE_TIMEOUT_SECS}s after its EC2 ($KILLED_IDS) was terminated. The reconciler did NOT detect the dead instance — a terminated EC2 is masquerading as a healthy workspace."
|
||||
fi
|
||||
|
||||
# ─── 6b. SECONDARY assertion — auto-reprovision (best-effort) ──────────
|
||||
# The onOffline → RestartByID existing-volume heal should bring the workspace
|
||||
# back to 'online' on a NEW instance_id. This is best-effort: a full EC2 cold
|
||||
# reprovision is a multi-minute path that shares the same boot-flake surface
|
||||
# as the initial provision. If it doesn't finish within the bound we LOG it
|
||||
# clearly but DO NOT fail — the PRIMARY assertion above is the gate.
|
||||
#
|
||||
# FUTURE TIGHTENING (deliberately one edit away): once this reprovision path
|
||||
# is proven reliable on staging, promote the `log "SECONDARY ..."` soft-miss
|
||||
# below to a `fail ...` so a stuck reprovision becomes a hard gate.
|
||||
log "6b/6 SECONDARY (best-effort): asserting auto-reprovision to online with a NEW instance_id within ${REPROVISION_TIMEOUT_SECS}s..."
|
||||
REPROV_DEADLINE=$(( $(date +%s) + REPROVISION_TIMEOUT_SECS ))
|
||||
REPROV_OK=0
|
||||
REPROV_LAST_STATUS=""
|
||||
NEW_INSTANCE_ID=""
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$REPROV_DEADLINE" ]; then
|
||||
break
|
||||
fi
|
||||
RP_STATUS=$(ws_field "$WS_ID" "status")
|
||||
if [ "$RP_STATUS" != "$REPROV_LAST_STATUS" ]; then
|
||||
log " $WS_ID status → ${RP_STATUS:-<empty>}"
|
||||
REPROV_LAST_STATUS="$RP_STATUS"
|
||||
fi
|
||||
if [ "$RP_STATUS" = "online" ]; then
|
||||
NEW_INSTANCE_ID=$(ws_field "$WS_ID" "instance_id")
|
||||
if [ -n "$NEW_INSTANCE_ID" ] && [ "$NEW_INSTANCE_ID" != "$ORIGINAL_INSTANCE_ID" ]; then
|
||||
REPROV_OK=1
|
||||
break
|
||||
fi
|
||||
# online again but instance_id either not surfaced yet or still the old
|
||||
# (terminated) id — keep polling until the reprovision swaps it.
|
||||
fi
|
||||
sleep 15
|
||||
done
|
||||
|
||||
if [ "$REPROV_OK" = "1" ]; then
|
||||
ok "SECONDARY held — auto-reprovisioned to online on NEW instance_id=$NEW_INSTANCE_ID (was $ORIGINAL_INSTANCE_ID)"
|
||||
else
|
||||
# Soft-miss — see FUTURE TIGHTENING note above. PRIMARY is the gate.
|
||||
log "⚠️ SECONDARY not satisfied within ${REPROVISION_TIMEOUT_SECS}s (status=${REPROV_LAST_STATUS:-<empty>}, instance_id=${NEW_INSTANCE_ID:-<none>}, original=$ORIGINAL_INSTANCE_ID). NOT failing — the PRIMARY heal-detection assertion is the gate; reprovision is a slower, flakier cold path. Promote this to a hard fail once it's proven reliable."
|
||||
fi
|
||||
|
||||
ok "Reconciler live E2E PASSED — PRIMARY heal-detection held (SECONDARY: $([ "$REPROV_OK" = "1" ] && echo "held" || echo "soft-miss, logged"))"
|
||||
# Teardown runs via the EXIT trap.
|
||||
@@ -1041,7 +1041,7 @@ print(json.dumps({
|
||||
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
|
||||
},
|
||||
'configuration': {'max_tokens': 4}
|
||||
'configuration': {'max_tokens': 32}
|
||||
}
|
||||
}))
|
||||
")
|
||||
|
||||
@@ -18,6 +18,7 @@ No network. No live Gitea calls.
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import os
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
@@ -117,6 +118,31 @@ def _write_audit_yaml(tmp_path: Path, required_checks: list[str]) -> Path:
|
||||
return p
|
||||
|
||||
|
||||
def _write_audit_yaml_json(tmp_path: Path, required_checks_json: dict) -> Path:
|
||||
"""Write a synthetic audit-force-merge.yml with REQUIRED_CHECKS_JSON env."""
|
||||
block = json.dumps(required_checks_json, indent=2)
|
||||
text = textwrap.dedent(
|
||||
f"""\
|
||||
name: audit-force-merge
|
||||
on:
|
||||
schedule:
|
||||
- cron: '*/30 * * * *'
|
||||
jobs:
|
||||
audit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Run audit
|
||||
env:
|
||||
REQUIRED_CHECKS_JSON: |
|
||||
{block.replace(chr(10), chr(10) + ' ')}
|
||||
run: bash .gitea/scripts/audit-force-merge.sh
|
||||
"""
|
||||
)
|
||||
p = tmp_path / "audit-force-merge.yml"
|
||||
p.write_text(text, encoding="utf-8")
|
||||
return p
|
||||
|
||||
|
||||
def _make_stub_api(responses: dict):
|
||||
"""Build a fake `api()` callable.
|
||||
|
||||
@@ -363,6 +389,107 @@ def test_happy_path_no_drift(drift_module, tmp_path, monkeypatch):
|
||||
assert findings == [], findings
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# REQUIRED_CHECKS_JSON variant drift tests
|
||||
# --------------------------------------------------------------------------
|
||||
def test_f3a_env_wider_than_protection_json_variant(drift_module, tmp_path, monkeypatch):
|
||||
"""F3a: REQUIRED_CHECKS_JSON env has a context NOT in protection."""
|
||||
ci = _write_ci_yaml(
|
||||
tmp_path,
|
||||
jobs={"build": {"runs-on": "ubuntu-latest"}},
|
||||
sentinel_needs=["build"],
|
||||
)
|
||||
audit = _write_audit_yaml_json(
|
||||
tmp_path,
|
||||
{"main": ["ci / build (pull_request)", "ci / ghost (pull_request)"]},
|
||||
)
|
||||
_patch_paths(drift_module, monkeypatch, ci, audit)
|
||||
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/main"): (
|
||||
200,
|
||||
{"status_check_contexts": ["ci / build (pull_request)"]},
|
||||
),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
|
||||
findings, _ = drift_module.detect_drift("main")
|
||||
assert any("F3a" in f and "ghost" in f for f in findings), findings
|
||||
|
||||
|
||||
def test_f3b_protection_wider_than_env_json_variant(drift_module, tmp_path, monkeypatch):
|
||||
"""F3b: protection has a context NOT in REQUIRED_CHECKS_JSON env."""
|
||||
ci = _write_ci_yaml(
|
||||
tmp_path,
|
||||
jobs={
|
||||
"build": {"runs-on": "ubuntu-latest"},
|
||||
"test": {"runs-on": "ubuntu-latest"},
|
||||
},
|
||||
sentinel_needs=["build", "test"],
|
||||
)
|
||||
audit = _write_audit_yaml_json(
|
||||
tmp_path,
|
||||
{"main": ["ci / build (pull_request)"]},
|
||||
)
|
||||
_patch_paths(drift_module, monkeypatch, ci, audit)
|
||||
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/main"): (
|
||||
200,
|
||||
{
|
||||
"status_check_contexts": [
|
||||
"ci / build (pull_request)",
|
||||
"ci / test (pull_request)",
|
||||
]
|
||||
},
|
||||
),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
|
||||
findings, _ = drift_module.detect_drift("main")
|
||||
assert any("F3b" in f and "ci / test (pull_request)" in f for f in findings), findings
|
||||
|
||||
|
||||
def test_happy_path_no_drift_json_variant(drift_module, tmp_path, monkeypatch):
|
||||
"""Happy path with REQUIRED_CHECKS_JSON: all aligned."""
|
||||
ci = _write_ci_yaml(
|
||||
tmp_path,
|
||||
jobs={
|
||||
"build": {"runs-on": "ubuntu-latest"},
|
||||
"test": {"runs-on": "ubuntu-latest"},
|
||||
},
|
||||
sentinel_needs=["build", "test"],
|
||||
)
|
||||
audit = _write_audit_yaml_json(
|
||||
tmp_path,
|
||||
{
|
||||
"main": [
|
||||
"ci / build (pull_request)",
|
||||
"ci / test (pull_request)",
|
||||
"ci / all-required (pull_request)",
|
||||
]
|
||||
},
|
||||
)
|
||||
_patch_paths(drift_module, monkeypatch, ci, audit)
|
||||
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/main"): (
|
||||
200,
|
||||
{
|
||||
"status_check_contexts": [
|
||||
"ci / build (pull_request)",
|
||||
"ci / test (pull_request)",
|
||||
"ci / all-required (pull_request)",
|
||||
]
|
||||
},
|
||||
),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
|
||||
findings, _ = drift_module.detect_drift("main")
|
||||
assert findings == [], findings
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# MUST-FIX 1: find_open_issue must raise on transient HTTP errors
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
@@ -337,6 +337,25 @@ func main() {
|
||||
})
|
||||
}
|
||||
|
||||
// CP-mode instance-state reconciler — authoritative EC2-liveness pass
|
||||
// for SaaS workspaces (core#2261). Every other liveness sweep keys off
|
||||
// a PROXY (Redis TTL, agent heartbeat, local Docker, or
|
||||
// runtime='external'); a SaaS claude-code workspace whose EC2 was
|
||||
// terminated/stopped falls through ALL of them and stays status='online'
|
||||
// pointing at a dead instance_id forever (root cause: core#2247). This
|
||||
// loop asks the ONE authoritative question the others lack —
|
||||
// cpProv.IsRunning (CP DescribeInstances-equivalent) — for each online
|
||||
// SaaS row, and on a CLEAN "not running" feeds it into the SAME
|
||||
// onWorkspaceOffline closure the other sweeps use (status flip +
|
||||
// RestartByID reprovision, existing volume). Fail-safe: IsRunning is
|
||||
// (true, err) on any transient error, so a CP blip never flips a healthy
|
||||
// workspace.
|
||||
if cpProv != nil {
|
||||
go supervised.RunWithRecover(ctx, "cp-instance-reconciler", func(c context.Context) {
|
||||
registry.StartCPInstanceReconciler(c, cpProv, onWorkspaceOffline, 60*time.Second)
|
||||
})
|
||||
}
|
||||
|
||||
// Pending-uploads GC sweep — deletes acked rows past their retention
|
||||
// window plus unacked rows past expires_at. Without this the
|
||||
// pending_uploads table grows unbounded; even with the 24h hard TTL,
|
||||
|
||||
@@ -152,7 +152,7 @@ func extractAttachmentsFromMessageParts(body map[string]interface{}) []map[strin
|
||||
if kind == "" {
|
||||
kind, _ = part["type"].(string)
|
||||
}
|
||||
if kind != "file" && kind != "image" && kind != "audio" {
|
||||
if kind != "file" && kind != "image" && kind != "audio" && kind != "video" {
|
||||
continue
|
||||
}
|
||||
// The file sub-object holds uri/mime_type/name. The a2a-sdk v1
|
||||
@@ -380,12 +380,18 @@ func (h *ActivityHandler) List(c *gin.Context) {
|
||||
// "row not found" — both indicate the cursor is no longer usable for
|
||||
// this caller, no information leak.
|
||||
var cursorTime time.Time
|
||||
var cursorSeq int64
|
||||
usingCursor := false
|
||||
if sinceID != "" {
|
||||
// Resolve BOTH ordering-key components of the cursor row. The feed is
|
||||
// ordered by (created_at, seq), so the strictly-after filter below must
|
||||
// compare the full tuple — comparing created_at alone silently drops a
|
||||
// row written in the SAME microsecond as the cursor row (the boundary
|
||||
// skip the since_id E2E intermittently tripped over).
|
||||
err := db.DB.QueryRowContext(c.Request.Context(),
|
||||
`SELECT created_at FROM activity_logs WHERE id = $1 AND workspace_id = $2`,
|
||||
`SELECT created_at, seq FROM activity_logs WHERE id = $1 AND workspace_id = $2`,
|
||||
sinceID, workspaceID,
|
||||
).Scan(&cursorTime)
|
||||
).Scan(&cursorTime, &cursorSeq)
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
c.JSON(http.StatusGone, gin.H{
|
||||
"error": "since_id cursor not found (row may have been pruned or belongs to a different workspace); omit since_id to reset",
|
||||
@@ -492,10 +498,20 @@ func (h *ActivityHandler) List(c *gin.Context) {
|
||||
argIdx++
|
||||
}
|
||||
if usingCursor {
|
||||
// Strictly after — never replay the cursor row itself.
|
||||
query += fmt.Sprintf(" AND "+actCol+"created_at > $%d", argIdx)
|
||||
args = append(args, cursorTime)
|
||||
argIdx++
|
||||
// Strictly after the cursor on the FULL ordering key (created_at, seq).
|
||||
// Tuple comparison: a row is "after" the cursor if its created_at is
|
||||
// later, OR it shares the cursor's created_at but has a higher seq.
|
||||
// This (a) never replays the cursor row itself and (b) — unlike a bare
|
||||
// `created_at > cursor` — never drops a row written in the same
|
||||
// microsecond as the cursor row. Expressed as the expanded boolean
|
||||
// rather than a row-value `(created_at, seq) > ($t, $s)` so it composes
|
||||
// with the actCol qualifier prefix and the existing placeholder/arg
|
||||
// builder cleanly.
|
||||
query += fmt.Sprintf(
|
||||
" AND ("+actCol+"created_at > $%d OR ("+actCol+"created_at = $%d AND "+actCol+"seq > $%d))",
|
||||
argIdx, argIdx, argIdx+1)
|
||||
args = append(args, cursorTime, cursorSeq)
|
||||
argIdx += 2
|
||||
}
|
||||
|
||||
// Polling clients (since_id) need oldest-first within the new window so
|
||||
@@ -503,9 +519,13 @@ func (h *ActivityHandler) List(c *gin.Context) {
|
||||
// since_id) keeps DESC — that's the canvas/UI shape and changing it
|
||||
// would surprise existing callers.
|
||||
if usingCursor {
|
||||
query += fmt.Sprintf(" ORDER BY "+actCol+"created_at ASC LIMIT $%d", argIdx)
|
||||
// (created_at, seq) ASC — seq is the deterministic tiebreaker for rows
|
||||
// sharing a microsecond-collided created_at. Replays in recorded order.
|
||||
query += fmt.Sprintf(" ORDER BY "+actCol+"created_at ASC, "+actCol+"seq ASC LIMIT $%d", argIdx)
|
||||
} else {
|
||||
query += fmt.Sprintf(" ORDER BY "+actCol+"created_at DESC LIMIT $%d", argIdx)
|
||||
// (created_at, seq) DESC — same tiebreaker, newest-first for the
|
||||
// canvas/recent-feed shape.
|
||||
query += fmt.Sprintf(" ORDER BY "+actCol+"created_at DESC, "+actCol+"seq DESC LIMIT $%d", argIdx)
|
||||
}
|
||||
args = append(args, limit)
|
||||
|
||||
@@ -680,7 +700,8 @@ func buildSessionSearchQuery(workspaceID, query string, limit int) (string, []in
|
||||
COALESCE(status, '') AS status,
|
||||
request_body,
|
||||
response_body,
|
||||
created_at
|
||||
created_at,
|
||||
seq
|
||||
FROM activity_logs
|
||||
WHERE workspace_id = $1
|
||||
)
|
||||
@@ -702,7 +723,13 @@ func buildSessionSearchQuery(workspaceID, query string, limit int) (string, []in
|
||||
args = append(args, "%"+query+"%")
|
||||
}
|
||||
|
||||
sqlQuery += ` ORDER BY created_at DESC LIMIT $` + strconv.Itoa(len(args)+1)
|
||||
// Deterministic order: created_at alone is not unique (same-microsecond
|
||||
// rows), so tie-break on the monotonic seq — same fix as the since_id feed
|
||||
// (§ No flakes: no unstable sorts, even on an unused surface). `seq` is
|
||||
// projected through the session_items CTE above so this outer ORDER BY can
|
||||
// reference it — the outer SELECT can only sort on the CTE's output columns,
|
||||
// not on activity_logs directly.
|
||||
sqlQuery += ` ORDER BY created_at DESC, seq DESC LIMIT $` + strconv.Itoa(len(args)+1)
|
||||
args = append(args, limit)
|
||||
return sqlQuery, args
|
||||
}
|
||||
|
||||
@@ -118,6 +118,23 @@ func TestExtractAttachmentsFromRequestBody_ImageAndAudio(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractAttachmentsFromRequestBody_VideoPart(t *testing.T) {
|
||||
// Video parts are accepted in message-parts envelope (issue #2222).
|
||||
body := []byte(`{"jsonrpc":"2.0","method":"message/send","params":{"message":{"parts":[
|
||||
{"kind":"video","file":{"uri":"workspace:clip.mp4","mime_type":"video/mp4","name":"clip.mp4"}}
|
||||
]}}}`)
|
||||
atts := extractAttachmentsFromRequestBody(body)
|
||||
if len(atts) != 1 {
|
||||
t.Fatalf("want 1 attachment, got %d", len(atts))
|
||||
}
|
||||
if atts[0]["kind"] != "video" {
|
||||
t.Errorf("kind: want video, got %v", atts[0]["kind"])
|
||||
}
|
||||
if atts[0]["uri"] != "workspace:clip.mp4" {
|
||||
t.Errorf("uri mismatch: %v", atts[0]["uri"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractAttachmentsFromRequestBody_LegacyV0TypeDiscriminator(t *testing.T) {
|
||||
// Legacy v0 shape: type=file (not kind), inlined fields (no nested .file)
|
||||
body := []byte(`{"jsonrpc":"2.0","method":"message/send","params":{"message":{"parts":[
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
// activity_seq_backfill_integration_test.go — REAL Postgres proof of the
|
||||
// invariant the 20260604000000_activity_logs_seq.up.sql migration guarantees:
|
||||
// every activity_logs row carries a NON-NULL `seq`, both for rows that existed
|
||||
// before the migration ran (assigned during the ALTER TABLE rewrite) and for
|
||||
// rows created afterward via the normal INSERT path (assigned by the IDENTITY
|
||||
// default). This is the coverage CR2 (#2339 review) correctly flagged as
|
||||
// missing on PR #2258.
|
||||
//
|
||||
// WHY THIS IS A SEPARATE TEST from activity_since_id_ordering_integration_test.go:
|
||||
// that test pins the *ordering* contract (same-microsecond rows come back in a
|
||||
// deterministic (created_at, seq) order). THIS test pins the *backfill* contract
|
||||
// — that `seq` is never NULL — and the consequence the reviewer doubted: a
|
||||
// pre-existing/backfilled row is usable as a since_id cursor because its seq is
|
||||
// non-null, so the tuple cursor `(created_at, seq)` the handler builds is well
|
||||
// defined for it.
|
||||
//
|
||||
// EMPIRICAL BASIS (PostgreSQL 16.13, the prod PG version):
|
||||
// - `ALTER TABLE activity_logs ADD COLUMN seq BIGINT GENERATED BY DEFAULT AS
|
||||
// IDENTITY` rewrites the table and assigns seq to EXISTING rows in physical
|
||||
// table-scan order — they are NON-NULL, not left NULL as the review claimed.
|
||||
// - The identity sequence then advances ABOVE max(seq), so the next INSERT
|
||||
// that omits seq gets max+1 with no collision.
|
||||
// Run against any Postgres 15/16 the integration harness boots — the property
|
||||
// holds on both.
|
||||
//
|
||||
// Run with (same harness as activity_delegation_a2a_integration_test.go):
|
||||
//
|
||||
// docker run --rm -d --name pg-integration \
|
||||
// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
|
||||
// -p 55432:5432 postgres:15-alpine
|
||||
// sleep 4
|
||||
// # apply migrations (incl. 20260604000000_activity_logs_seq.up.sql) then:
|
||||
// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
|
||||
// go test -tags=integration ./internal/handlers/ -run Integration_ActivityLogs_Seq
|
||||
//
|
||||
// WATCH-IT-FAIL: if `seq` were left nullable / un-backfilled (the failure mode
|
||||
// the reviewer hypothesized), the NULL-count assertion in _NoNull trips, and
|
||||
// the since_id-on-a-backfilled-row case in _SinceIDOnBackfilledRow trips because
|
||||
// the handler cannot read a non-null seq for the cursor row. With the migration
|
||||
// as written both are green every run.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// TestIntegration_ActivityLogs_SeqBackfill_NoNull pins the core migration
|
||||
// invariant: AFTER migrations have run, NO activity_logs row may have a NULL
|
||||
// seq — neither rows that the seedActivityRowAt path inserts (IDENTITY default)
|
||||
// nor any row the schema carries. It also proves the IDENTITY sequence keeps
|
||||
// producing distinct, non-null seq for fresh inserts (no collision, no NULL).
|
||||
//
|
||||
// This is the assertion that would FAIL if the ALTER had left existing rows
|
||||
// with NULL seq (the reviewer's claim) — table-scan backfill makes it pass.
|
||||
func TestIntegration_ActivityLogs_SeqBackfill_NoNull(t *testing.T) {
|
||||
conn := integrationDB_ActivityDelegationA2A(t)
|
||||
_ = conn
|
||||
wsID := seedWorkspace(t, conn, "test-2151-seq-backfill-nonull")
|
||||
|
||||
// Insert several rows via the normal path. seq is left to the IDENTITY
|
||||
// default — exactly how production writes activity_logs.
|
||||
t0 := time.Date(2026, 6, 4, 9, 0, 0, 0, time.UTC)
|
||||
const n = 5
|
||||
ids := make([]string, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
ids = append(ids, seedActivityRowAt(t, wsID, "backfill-row", t0.Add(time.Duration(i)*time.Second)))
|
||||
}
|
||||
|
||||
// (a) No row in this workspace may have a NULL seq. If the column were
|
||||
// un-backfilled / nullable this is > 0 and the test fails.
|
||||
var nullCount int
|
||||
if err := db.DB.QueryRowContext(context.Background(),
|
||||
`SELECT COUNT(*) FROM activity_logs WHERE workspace_id = $1 AND seq IS NULL`,
|
||||
wsID,
|
||||
).Scan(&nullCount); err != nil {
|
||||
t.Fatalf("null-seq count query: %v", err)
|
||||
}
|
||||
if nullCount != 0 {
|
||||
t.Fatalf("found %d activity_logs rows with NULL seq — migration did NOT backfill/assign seq", nullCount)
|
||||
}
|
||||
|
||||
// Belt-and-suspenders: the GLOBAL invariant (no NULL seq anywhere in the
|
||||
// table) is what the migration actually guarantees. Assert it too, so a
|
||||
// regression that nulls seq for rows written by some other path is caught.
|
||||
var globalNull int
|
||||
if err := db.DB.QueryRowContext(context.Background(),
|
||||
`SELECT COUNT(*) FROM activity_logs WHERE seq IS NULL`,
|
||||
).Scan(&globalNull); err != nil {
|
||||
t.Fatalf("global null-seq count query: %v", err)
|
||||
}
|
||||
if globalNull != 0 {
|
||||
t.Fatalf("found %d activity_logs rows table-wide with NULL seq — seq must be non-null for every row", globalNull)
|
||||
}
|
||||
|
||||
// (b) The IDENTITY sequence yields DISTINCT, monotonic, non-null seq for
|
||||
// the rows we just inserted (proves the normal insert path gets a real seq,
|
||||
// and that the sequence advanced past any backfilled max instead of
|
||||
// colliding). We read them back in insert order and require strictly
|
||||
// increasing, all-non-null seq.
|
||||
rows, err := db.DB.QueryContext(context.Background(),
|
||||
`SELECT seq FROM activity_logs WHERE workspace_id = $1 ORDER BY created_at ASC, seq ASC`,
|
||||
wsID,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("read-back seq query: %v", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
var seqs []int64
|
||||
for rows.Next() {
|
||||
var s *int64 // pointer so a NULL would scan as nil rather than 0
|
||||
if err := rows.Scan(&s); err != nil {
|
||||
t.Fatalf("scan seq: %v", err)
|
||||
}
|
||||
if s == nil {
|
||||
t.Fatal("a freshly-inserted activity_logs row has NULL seq — IDENTITY default did not fire")
|
||||
}
|
||||
seqs = append(seqs, *s)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
t.Fatalf("rows err: %v", err)
|
||||
}
|
||||
if len(seqs) != n {
|
||||
t.Fatalf("expected %d rows, read back %d", n, len(seqs))
|
||||
}
|
||||
for i := 1; i < len(seqs); i++ {
|
||||
if seqs[i] <= seqs[i-1] {
|
||||
t.Fatalf("seq not strictly increasing in insert order: %v (IDENTITY collision / reuse)", seqs)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_ActivityLogs_SeqBackfill_SinceIDOnBackfilledRow pins the
|
||||
// consequence the reviewer doubted: a row whose seq came from the migration /
|
||||
// IDENTITY (i.e. NOT explicitly set by the caller) is usable as a since_id
|
||||
// cursor, and a SECOND row sharing its exact created_at microsecond is returned
|
||||
// (not dropped). This proves the handler's (created_at, seq) tuple cursor
|
||||
// resolves a same-timestamp boundary that a created_at-only cursor would drop,
|
||||
// AND that the cursor row's seq is non-null (else the handler could not build
|
||||
// the tuple at all).
|
||||
//
|
||||
// Distinct from _BoundaryRowSameMicrosecondNotSkipped in the ordering test:
|
||||
// here the explicit angle under test is "the cursor row's seq is a
|
||||
// migration/IDENTITY-assigned (backfilled-style) value, non-null, and the
|
||||
// handler uses it" — i.e. the backfill behavior is what makes the boundary
|
||||
// resolution work, pinned head-on.
|
||||
func TestIntegration_ActivityLogs_SeqBackfill_SinceIDOnBackfilledRow(t *testing.T) {
|
||||
conn := integrationDB_ActivityDelegationA2A(t)
|
||||
_ = conn
|
||||
wsID := seedWorkspace(t, conn, "test-2151-seq-backfill-sinceid")
|
||||
|
||||
tSame := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
|
||||
// Cursor row: seq comes purely from the IDENTITY default (never set by
|
||||
// the caller) — the same assignment mechanism the migration uses to
|
||||
// backfill pre-existing rows. The "next" row shares the exact created_at
|
||||
// microsecond and is inserted afterward, so it gets a strictly higher seq.
|
||||
cursorID := seedActivityRowAt(t, wsID, "sinceid-cursor", tSame)
|
||||
nextID := seedActivityRowAt(t, wsID, "sinceid-next-same-us", tSame)
|
||||
|
||||
// Prove the precondition the reviewer doubted: the cursor row's seq is
|
||||
// NON-NULL, so the handler can read it to build the (created_at, seq)
|
||||
// tuple. If it were NULL the handler's cursor lookup would yield a NULL
|
||||
// seq and the strictly-after tuple comparison would mis-behave.
|
||||
var cursorSeq *int64
|
||||
if err := db.DB.QueryRowContext(context.Background(),
|
||||
`SELECT seq FROM activity_logs WHERE id = $1`, cursorID,
|
||||
).Scan(&cursorSeq); err != nil {
|
||||
t.Fatalf("read cursor seq: %v", err)
|
||||
}
|
||||
if cursorSeq == nil {
|
||||
t.Fatal("cursor row has NULL seq — a since_id cursor on a backfilled-style row would be unusable")
|
||||
}
|
||||
|
||||
h := NewActivityHandler(nil)
|
||||
c, w := newTestGinContext()
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
q := c.Request.URL.Query()
|
||||
q.Set("since_id", cursorID)
|
||||
q.Set("type", "a2a_receive")
|
||||
q.Set("limit", "10")
|
||||
c.Request.URL.RawQuery = q.Encode()
|
||||
|
||||
h.List(c)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("List returned %d, want 200: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
// Exactly the one same-microsecond row after the cursor — present (not
|
||||
// dropped by a strict created_at-only filter) and the cursor itself
|
||||
// excluded (strictly-after on the full tuple).
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("same-microsecond row after backfilled-style cursor dropped: expected 1 row, got %d: %+v",
|
||||
len(resp), resp)
|
||||
}
|
||||
if got, _ := resp[0]["id"].(string); got != nextID {
|
||||
t.Fatalf("expected boundary row id %s, got %s", nextID, got)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,162 @@
|
||||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
// activity_since_id_ordering_integration_test.go — REAL Postgres proof that
|
||||
// the poll-mode since_id activity feed (#2339) is DETERMINISTICALLY ordered
|
||||
// even when multiple rows collide on the same created_at microsecond.
|
||||
//
|
||||
// This is the test that the original bug report mis-labeled a "flake".
|
||||
// sqlmock cannot catch it: sqlmock returns rows in the order the test stuffs
|
||||
// them, so it can never reveal a non-deterministic ORDER BY. Only a real
|
||||
// planner over real same-created_at rows exposes it.
|
||||
//
|
||||
// Run with (same harness as activity_delegation_a2a_integration_test.go):
|
||||
//
|
||||
// docker run --rm -d --name pg-integration \
|
||||
// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
|
||||
// -p 55432:5432 postgres:15-alpine
|
||||
// sleep 4
|
||||
// # apply migrations (incl. 20260604000000_activity_logs_seq.up.sql) then:
|
||||
// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
|
||||
// go test -tags=integration ./internal/handlers/ -run Integration_SinceID
|
||||
//
|
||||
// WATCH-IT-FAIL: against the pre-fix handler (ORDER BY created_at only, no
|
||||
// seq tiebreaker, and `created_at > cursor` strict) this test is unstable —
|
||||
// the equal-created_at rows come back in arbitrary planner order so the
|
||||
// ordered-id assertion fails intermittently, and the same-microsecond
|
||||
// boundary row is dropped so the count assertion fails. With the fix
|
||||
// (ORDER BY created_at, seq + tuple cursor) it is green every run.
|
||||
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// seedActivityRowAt inserts one activity_logs row with an explicit created_at
|
||||
// (so the test can force microsecond-equal collisions) and a unique summary;
|
||||
// returns the generated id. seq is left to the IDENTITY default — Postgres
|
||||
// assigns it in INSERT order, which is the deterministic tiebreaker under test.
|
||||
// db.DB has been hot-swapped to the integration connection by
|
||||
// integrationDB_ActivityDelegationA2A(t) in the calling test.
|
||||
func seedActivityRowAt(t *testing.T, wsID, summary string, createdAt time.Time) string {
|
||||
t.Helper()
|
||||
var id string
|
||||
err := db.DB.QueryRowContext(context.Background(), `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, summary, status, created_at)
|
||||
VALUES ($1, 'a2a_receive', $2, 'ok', $3)
|
||||
RETURNING id
|
||||
`, wsID, summary, createdAt).Scan(&id)
|
||||
if err != nil {
|
||||
t.Fatalf("seedActivityRowAt(%q): %v", summary, err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// TestIntegration_SinceID_StableOrderingSameMicrosecond proves the feed is
|
||||
// deterministic when rows share a created_at, AND that the same-microsecond
|
||||
// boundary row immediately after the cursor is NOT dropped.
|
||||
func TestIntegration_SinceID_StableOrderingSameMicrosecond(t *testing.T) {
|
||||
conn := integrationDB_ActivityDelegationA2A(t)
|
||||
_ = conn
|
||||
wsID := seedWorkspace(t, conn, "test-2151-sinceid-ordering")
|
||||
|
||||
// One earlier row to serve as the cursor (the "last processed" row).
|
||||
tCursor := time.Date(2026, 6, 4, 12, 0, 0, 0, time.UTC)
|
||||
cursorID := seedActivityRowAt(t, wsID, "cursor-row", tCursor)
|
||||
|
||||
// Three rows that ALL collide on the exact same created_at microsecond,
|
||||
// inserted in a known order. Pre-fix, ORDER BY created_at alone returns
|
||||
// these in arbitrary planner order.
|
||||
tEqual := time.Date(2026, 6, 4, 12, 0, 1, 0, time.UTC)
|
||||
idA := seedActivityRowAt(t, wsID, "equal-A", tEqual)
|
||||
idB := seedActivityRowAt(t, wsID, "equal-B", tEqual)
|
||||
idCc := seedActivityRowAt(t, wsID, "equal-C", tEqual)
|
||||
wantOrder := []string{idA, idB, idCc}
|
||||
|
||||
// Drive the handler exactly as a polling client would.
|
||||
h := NewActivityHandler(nil)
|
||||
c, w := newTestGinContext()
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
q := c.Request.URL.Query()
|
||||
q.Set("since_id", cursorID)
|
||||
q.Set("type", "a2a_receive")
|
||||
q.Set("limit", "10")
|
||||
c.Request.URL.RawQuery = q.Encode()
|
||||
|
||||
h.List(c)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("List returned %d, want 200: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
|
||||
// All three equal-created_at rows must be present (boundary not dropped)
|
||||
// and the cursor row itself must be excluded (strictly-after).
|
||||
if len(resp) != len(wantOrder) {
|
||||
t.Fatalf("expected %d rows after cursor (the 3 equal-created_at rows), got %d: %+v",
|
||||
len(wantOrder), len(resp), resp)
|
||||
}
|
||||
|
||||
gotOrder := make([]string, len(resp))
|
||||
for i, row := range resp {
|
||||
idVal, _ := row["id"].(string)
|
||||
gotOrder[i] = idVal
|
||||
}
|
||||
for i := range wantOrder {
|
||||
if gotOrder[i] != wantOrder[i] {
|
||||
t.Fatalf("non-deterministic ordering: got id order %v, want %v (seq tiebreaker not applied)",
|
||||
gotOrder, wantOrder)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestIntegration_SinceID_BoundaryRowSameMicrosecondNotSkipped isolates the
|
||||
// cursor-boundary bug: a row written in the SAME microsecond as the cursor
|
||||
// row (but with a higher seq) must still be returned. Pre-fix the strict
|
||||
// `created_at > cursor` filter silently dropped it.
|
||||
func TestIntegration_SinceID_BoundaryRowSameMicrosecondNotSkipped(t *testing.T) {
|
||||
conn := integrationDB_ActivityDelegationA2A(t)
|
||||
_ = conn
|
||||
wsID := seedWorkspace(t, conn, "test-2151-sinceid-boundary")
|
||||
|
||||
tSame := time.Date(2026, 6, 4, 13, 0, 0, 0, time.UTC)
|
||||
// Cursor row and the next row share the exact same created_at; the next
|
||||
// row is inserted afterwards so it gets a higher seq.
|
||||
cursorID := seedActivityRowAt(t, wsID, "boundary-cursor", tSame)
|
||||
nextID := seedActivityRowAt(t, wsID, "boundary-next-same-us", tSame)
|
||||
|
||||
h := NewActivityHandler(nil)
|
||||
c, w := newTestGinContext()
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
q := c.Request.URL.Query()
|
||||
q.Set("since_id", cursorID)
|
||||
q.Set("type", "a2a_receive")
|
||||
q.Set("limit", "10")
|
||||
c.Request.URL.RawQuery = q.Encode()
|
||||
|
||||
h.List(c)
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("List returned %d, want 200: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp []map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("unmarshal: %v", err)
|
||||
}
|
||||
if len(resp) != 1 {
|
||||
t.Fatalf("same-microsecond boundary row dropped: expected exactly the 1 next row, got %d rows: %+v",
|
||||
len(resp), resp)
|
||||
}
|
||||
if got, _ := resp[0]["id"].(string); got != nextID {
|
||||
t.Fatalf("expected boundary row id %s, got %s", nextID, got)
|
||||
}
|
||||
}
|
||||
@@ -26,17 +26,21 @@ func TestActivityHandler_SinceID_ReturnsNewerASC(t *testing.T) {
|
||||
|
||||
cursorID := "act-cursor-42"
|
||||
cursorTime := time.Date(2026, 4, 30, 5, 0, 0, 0, time.UTC)
|
||||
cursorSeq := int64(42)
|
||||
|
||||
// Step 1: cursor lookup — must include workspace_id scope so a UUID
|
||||
// from another workspace can't be used.
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
// from another workspace can't be used. Now resolves BOTH ordering-key
|
||||
// components (created_at, seq) so the strictly-after filter can compare
|
||||
// the full tuple.
|
||||
mock.ExpectQuery(`SELECT created_at, seq FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs(cursorID, "ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"created_at"}).AddRow(cursorTime))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"created_at", "seq"}).AddRow(cursorTime, cursorSeq))
|
||||
|
||||
// Step 2: main query with the cursor's created_at as a > filter,
|
||||
// ASC ordering. Args: workspace_id, cursorTime, limit.
|
||||
// Step 2: main query with the cursor's (created_at, seq) as a tuple
|
||||
// strictly-after filter, (created_at, seq) ASC ordering.
|
||||
// Args: workspace_id, cursorTime, cursorSeq, limit.
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", cursorTime, 100).
|
||||
WithArgs("ws-1", cursorTime, cursorSeq, 100).
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
@@ -64,7 +68,7 @@ func TestActivityHandler_SinceID_ReturnsNewerASC(t *testing.T) {
|
||||
func TestActivityHandler_SinceID_CursorNotFound_410(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
mock.ExpectQuery(`SELECT created_at, seq FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs("act-gone", "ws-1").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
@@ -96,7 +100,7 @@ func TestActivityHandler_SinceID_CrossWorkspaceCursor_410(t *testing.T) {
|
||||
|
||||
// Cursor exists in DB but the WHERE workspace_id = $2 filter excludes
|
||||
// it — sqlmock returns no rows, which is what Postgres would do.
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
mock.ExpectQuery(`SELECT created_at, seq FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs("act-other-ws", "ws-1").
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
|
||||
@@ -120,20 +124,23 @@ func TestActivityHandler_SinceID_CrossWorkspaceCursor_410(t *testing.T) {
|
||||
|
||||
// TestActivityHandler_SinceID_CombinedWithSinceSecs: both filters apply
|
||||
// together (AND). Argument order in the main query: workspace_id,
|
||||
// since_secs, cursorTime, limit. Sanity-checks the placeholder index
|
||||
// arithmetic in the query builder.
|
||||
// since_secs, cursorTime, cursorSeq, limit. Sanity-checks the placeholder
|
||||
// index arithmetic in the query builder (the cursor now binds TWO args —
|
||||
// the (created_at, seq) tuple — so since_secs no longer shifts the tail by
|
||||
// one but by two).
|
||||
func TestActivityHandler_SinceID_CombinedWithSinceSecs(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
cursorID := "act-c"
|
||||
cursorTime := time.Date(2026, 4, 30, 4, 0, 0, 0, time.UTC)
|
||||
cursorSeq := int64(7)
|
||||
|
||||
mock.ExpectQuery(`SELECT created_at FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
mock.ExpectQuery(`SELECT created_at, seq FROM activity_logs WHERE id = \$1 AND workspace_id = \$2`).
|
||||
WithArgs(cursorID, "ws-1").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"created_at"}).AddRow(cursorTime))
|
||||
WillReturnRows(sqlmock.NewRows([]string{"created_at", "seq"}).AddRow(cursorTime, cursorSeq))
|
||||
|
||||
mock.ExpectQuery("SELECT id, workspace_id, activity_type").
|
||||
WithArgs("ws-1", 600, cursorTime, 100).
|
||||
WithArgs("ws-1", 600, cursorTime, cursorSeq, 100).
|
||||
WillReturnRows(newActivityRows())
|
||||
|
||||
broadcaster := newTestBroadcaster()
|
||||
|
||||
@@ -126,6 +126,32 @@ var mcpAllTools = []mcpTool{
|
||||
"type": "string",
|
||||
"description": "The task description to send to the target workspace",
|
||||
},
|
||||
"attachments": map[string]interface{}{
|
||||
"type": "array",
|
||||
"description": "Optional files to send with the task. Each item must include uri and name; mimeType and size are optional.",
|
||||
"items": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"uri": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Workspace attachment URI, usually workspace:/absolute/path",
|
||||
},
|
||||
"name": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Display filename",
|
||||
},
|
||||
"mimeType": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Optional MIME type",
|
||||
},
|
||||
"size": map[string]interface{}{
|
||||
"type": "number",
|
||||
"description": "Optional file size in bytes",
|
||||
},
|
||||
},
|
||||
"required": []string{"uri", "name"},
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": []string{"workspace_id", "task"},
|
||||
},
|
||||
@@ -144,6 +170,32 @@ var mcpAllTools = []mcpTool{
|
||||
"type": "string",
|
||||
"description": "The task description to send to the target workspace",
|
||||
},
|
||||
"attachments": map[string]interface{}{
|
||||
"type": "array",
|
||||
"description": "Optional files to send with the task. Each item must include uri and name; mimeType and size are optional.",
|
||||
"items": map[string]interface{}{
|
||||
"type": "object",
|
||||
"properties": map[string]interface{}{
|
||||
"uri": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Workspace attachment URI, usually workspace:/absolute/path",
|
||||
},
|
||||
"name": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Display filename",
|
||||
},
|
||||
"mimeType": map[string]interface{}{
|
||||
"type": "string",
|
||||
"description": "Optional MIME type",
|
||||
},
|
||||
"size": map[string]interface{}{
|
||||
"type": "number",
|
||||
"description": "Optional file size in bytes",
|
||||
},
|
||||
},
|
||||
"required": []string{"uri", "name"},
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": []string{"workspace_id", "task"},
|
||||
},
|
||||
|
||||
@@ -285,6 +285,121 @@ func TestMCPHandler_DelegateTaskAsync_RoutesThroughPlatformA2AProxy(t *testing.T
|
||||
// goroutine returns early and never calls proxyA2ARequest with a nil/empty
|
||||
// body. Before the fix the goroutine logged the error and fell through,
|
||||
// dispatching a malformed A2A request.
|
||||
|
||||
func TestMCPHandler_DelegateTask_WithAttachments(t *testing.T) {
|
||||
h, mock := newMCPHandler(t)
|
||||
callerID := "11111111-1111-1111-1111-111111111111"
|
||||
targetID := "22222222-2222-2222-2222-222222222222"
|
||||
parentID := "33333333-3333-3333-3333-333333333333"
|
||||
|
||||
expectCanCommunicateSiblings(mock, callerID, targetID, parentID)
|
||||
mock.ExpectExec(`(?s)INSERT INTO activity_logs.*'delegation'.*'delegate'`).
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
h.a2aProxy = func(ctx context.Context, workspaceID string, body []byte, proxyCallerID string, logActivity bool) (int, []byte, error) {
|
||||
if workspaceID != targetID || proxyCallerID != callerID {
|
||||
t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
|
||||
}
|
||||
bodyStr := string(body)
|
||||
if !strings.Contains(bodyStr, `"text":"review this video"`) {
|
||||
t.Fatalf("A2A body missing task text: %s", bodyStr)
|
||||
}
|
||||
if !strings.Contains(bodyStr, `"kind":"video"`) {
|
||||
t.Fatalf("A2A body missing video attachment kind: %s", bodyStr)
|
||||
}
|
||||
if !strings.Contains(bodyStr, `"uri":"workspace:/tmp/clip.mp4"`) {
|
||||
t.Fatalf("A2A body missing attachment uri: %s", bodyStr)
|
||||
}
|
||||
if !strings.Contains(bodyStr, `"mime_type":"video/mp4"`) {
|
||||
t.Fatalf("A2A body missing attachment mime_type: %s", bodyStr)
|
||||
}
|
||||
return 200, []byte(`{"result":{"message":{"parts":[{"text":"done"}]}}}`), nil
|
||||
}
|
||||
|
||||
out, err := h.toolDelegateTask(context.Background(), callerID, map[string]interface{}{
|
||||
"workspace_id": targetID,
|
||||
"task": "review this video",
|
||||
"attachments": []interface{}{
|
||||
map[string]interface{}{
|
||||
"uri": "workspace:/tmp/clip.mp4",
|
||||
"name": "clip.mp4",
|
||||
"mimeType": "video/mp4",
|
||||
"size": 12345,
|
||||
},
|
||||
},
|
||||
}, mcpCallTimeout)
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task returned error: %v", err)
|
||||
}
|
||||
if out != "done" {
|
||||
t.Fatalf("delegate_task response = %q, want done", out)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMCPHandler_DelegateTaskAsync_WithAttachments(t *testing.T) {
|
||||
h, mock := newMCPHandler(t)
|
||||
callerID := "11111111-1111-1111-1111-111111111111"
|
||||
targetID := "22222222-2222-2222-2222-222222222222"
|
||||
parentID := "33333333-3333-3333-3333-333333333333"
|
||||
|
||||
expectCanCommunicateSiblings(mock, callerID, targetID, parentID)
|
||||
mock.ExpectExec(`(?s)INSERT INTO activity_logs.*'delegation'.*'delegate'`).
|
||||
WithArgs(callerID, callerID, targetID, "Delegating to "+targetID, sqlmock.AnyArg(), "pending").
|
||||
WillReturnResult(sqlmock.NewResult(1, 1))
|
||||
mock.ExpectExec(`UPDATE activity_logs`).
|
||||
WithArgs("dispatched", "", callerID, sqlmock.AnyArg()).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
called := make(chan []byte, 1)
|
||||
h.a2aProxy = func(ctx context.Context, workspaceID string, body []byte, proxyCallerID string, logActivity bool) (int, []byte, error) {
|
||||
if workspaceID != targetID || proxyCallerID != callerID {
|
||||
t.Fatalf("unexpected proxy route target=%q caller=%q", workspaceID, proxyCallerID)
|
||||
}
|
||||
called <- body
|
||||
return 200, []byte(`{"result":{"message":{"parts":[{"text":"accepted"}]}}}`), nil
|
||||
}
|
||||
|
||||
out, err := h.toolDelegateTaskAsync(context.Background(), callerID, map[string]interface{}{
|
||||
"workspace_id": targetID,
|
||||
"task": "async work with image",
|
||||
"attachments": []interface{}{
|
||||
map[string]interface{}{
|
||||
"uri": "workspace:/tmp/screenshot.png",
|
||||
"name": "screenshot.png",
|
||||
"mimeType": "image/png",
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("delegate_task_async returned error: %v", err)
|
||||
}
|
||||
if !strings.Contains(out, `"status":"dispatched"`) {
|
||||
t.Fatalf("delegate_task_async response = %s", out)
|
||||
}
|
||||
waitGlobalAsyncForTest()
|
||||
select {
|
||||
case body := <-called:
|
||||
bodyStr := string(body)
|
||||
if !strings.Contains(bodyStr, `"kind":"image"`) {
|
||||
t.Fatalf("A2A body missing image attachment kind: %s", bodyStr)
|
||||
}
|
||||
if !strings.Contains(bodyStr, `"uri":"workspace:/tmp/screenshot.png"`) {
|
||||
t.Fatalf("A2A body missing attachment uri: %s", bodyStr)
|
||||
}
|
||||
default:
|
||||
t.Fatal("async delegate did not call platform A2A proxy")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
func TestMCPHandler_DelegateTaskAsync_MarshalFailureDoesNotCallProxy(t *testing.T) {
|
||||
h, mock := newMCPHandler(t)
|
||||
callerID := "11111111-1111-1111-1111-111111111111"
|
||||
|
||||
@@ -187,6 +187,28 @@ func (h *MCPHandler) toolGetWorkspaceInfo(ctx context.Context, workspaceID strin
|
||||
return string(b), nil
|
||||
}
|
||||
|
||||
// buildA2AMessageParts constructs the A2A message parts array from a task string
|
||||
// and optional attachments. The text part always comes first; attachment parts
|
||||
// follow in the order provided, with kind derived from MIME type.
|
||||
func buildA2AMessageParts(task string, attachments []AgentMessageAttachment) []map[string]interface{} {
|
||||
parts := []map[string]interface{}{
|
||||
{"type": "text", "text": task},
|
||||
}
|
||||
for _, att := range attachments {
|
||||
kind := kindFromMimeType(att.MimeType)
|
||||
filePart := map[string]interface{}{
|
||||
"kind": kind,
|
||||
"file": map[string]interface{}{
|
||||
"uri": att.URI,
|
||||
"mime_type": att.MimeType,
|
||||
"name": att.Name,
|
||||
},
|
||||
}
|
||||
parts = append(parts, filePart)
|
||||
}
|
||||
return parts
|
||||
}
|
||||
|
||||
func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args map[string]interface{}, timeout time.Duration) (string, error) {
|
||||
targetID, _ := args["workspace_id"].(string)
|
||||
task, _ := args["task"].(string)
|
||||
@@ -208,6 +230,8 @@ func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args
|
||||
// Non-fatal: still make the A2A call even if activity log write fails.
|
||||
}
|
||||
|
||||
attachments, _ := parseAgentMessageAttachments(args["attachments"])
|
||||
|
||||
a2aBody, err := json.Marshal(map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": uuid.New().String(),
|
||||
@@ -215,7 +239,7 @@ func (h *MCPHandler) toolDelegateTask(ctx context.Context, callerID string, args
|
||||
"params": map[string]interface{}{
|
||||
"message": map[string]interface{}{
|
||||
"role": "user",
|
||||
"parts": []map[string]interface{}{{"type": "text", "text": task}},
|
||||
"parts": buildA2AMessageParts(task, attachments),
|
||||
"messageId": uuid.New().String(),
|
||||
},
|
||||
},
|
||||
@@ -275,6 +299,8 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
bgCtx, cancel := context.WithTimeout(context.Background(), mcpAsyncCallTimeout)
|
||||
defer cancel()
|
||||
|
||||
attachments, _ := parseAgentMessageAttachments(args["attachments"])
|
||||
|
||||
a2aBody, marshalErr := marshalA2ABody(map[string]interface{}{
|
||||
"jsonrpc": "2.0",
|
||||
"id": delegationID,
|
||||
@@ -282,7 +308,7 @@ func (h *MCPHandler) toolDelegateTaskAsync(ctx context.Context, callerID string,
|
||||
"params": map[string]interface{}{
|
||||
"message": map[string]interface{}{
|
||||
"role": "user",
|
||||
"parts": []map[string]interface{}{{"type": "text", "text": task}},
|
||||
"parts": buildA2AMessageParts(task, attachments),
|
||||
"messageId": uuid.New().String(),
|
||||
},
|
||||
},
|
||||
|
||||
@@ -24,13 +24,23 @@ import (
|
||||
// validateRegisteredModelForRuntime reports whether (runtime, model) is
|
||||
// selectable per the provider registry. Returns:
|
||||
//
|
||||
// (true, "") — allowed: model is registered for this runtime, OR the
|
||||
// runtime is not in the registry (fail-open), OR model=="".
|
||||
// (false, reason) — rejected: the runtime IS registered but the model is not
|
||||
// in its native ModelsForRuntime set.
|
||||
// (true, "") — allowed: model is on the runtime's platform menu
|
||||
// (ModelsForRuntime) OR DeriveProvider(runtime, model)
|
||||
// RESOLVES a native provider (the cp#529 routability-aware
|
||||
// BYOK path), OR the runtime is not in the registry
|
||||
// (fail-open), OR model=="".
|
||||
// (false, reason) — rejected: the runtime IS registered, the model is not on
|
||||
// its platform menu, AND no native provider prefix-owns it
|
||||
// (genuinely unroutable).
|
||||
//
|
||||
// model=="" is allowed here: the MODEL_REQUIRED gate owns the empty-model case,
|
||||
// so this validator must not double-reject it.
|
||||
//
|
||||
// ROUTABILITY-AWARE (cp#529, CTO Option C): the final predicate is an OR —
|
||||
// `model ∈ ModelsForRuntime(runtime)` OR `DeriveProvider(runtime, model, nil)`
|
||||
// resolves. The platform menu carries platform-billed ids; the DeriveProvider
|
||||
// path covers BYOK ids that prefix-match a name-only native arm (no platform
|
||||
// billing). The drift checker in molecule-controlplane mirrors this exact OR.
|
||||
func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
|
||||
model = strings.TrimSpace(model)
|
||||
if model == "" {
|
||||
@@ -52,6 +62,24 @@ func validateRegisteredModelForRuntime(runtime, model string) (bool, string) {
|
||||
return true, ""
|
||||
}
|
||||
}
|
||||
// ROUTABILITY-AWARE allow path (cp#529, CTO-approved Option C). The model is
|
||||
// NOT on the runtime's platform menu (ModelsForRuntime) — but a model can be
|
||||
// legitimately SELECTABLE without being a platform-menu id: a BYOK id whose
|
||||
// prefix matches one of the runtime's NATIVE provider arms (a name-only arm
|
||||
// added in providers.yaml) resolves to a concrete provider via DeriveProvider
|
||||
// even though it carries no platform billing. Allow it iff DeriveProvider
|
||||
// resolves a provider for (runtime, model). A genuinely-unroutable id (no
|
||||
// native provider prefix-owns it) still falls through to the 422 below.
|
||||
//
|
||||
// BILLING GUARDRAIL: only CONFIRMED-NON-PLATFORM (BYOK) providers are wired as
|
||||
// name-only arms in providers.yaml (never platform/anthropic-*/openai-*/
|
||||
// moonshot/minimax/google/vertex), so a DeriveProvider-resolved id reached by
|
||||
// THIS path can never bill the platform's key for a customer's model. The
|
||||
// platform-menu ids that DO carry platform billing are already allowed by the
|
||||
// exact-membership loop above; this path only ever resolves to a BYOK arm.
|
||||
if _, derr := m.DeriveProvider(runtime, model, nil); derr == nil {
|
||||
return true, ""
|
||||
}
|
||||
return false, fmt.Sprintf(
|
||||
"model %q is not a registered model for runtime %q; pick one of the runtime's registered models (provider-registry SSOT, internal#718)",
|
||||
model, runtime)
|
||||
|
||||
@@ -79,6 +79,50 @@ func TestValidateRegisteredModelForRuntime(t *testing.T) {
|
||||
model: "",
|
||||
wantOK: true,
|
||||
},
|
||||
// ---- cp#529 routability-aware allow path -------------------------------
|
||||
{
|
||||
// BYOK passthrough id: NOT on hermes's platform menu, but the
|
||||
// openrouter name-only native arm prefix-owns it → DeriveProvider
|
||||
// resolves → ALLOWED (no platform billing — openrouter is BYOK).
|
||||
name: "byok_passthrough_routable_now_allowed",
|
||||
runtime: "hermes",
|
||||
model: "openrouter/anthropic/claude-3.5-sonnet",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
// BYOK namespaced vendor id: deepseek's widened ^deepseek[-:/]
|
||||
// matches the vendor/ form on a name-only hermes arm → allowed.
|
||||
name: "byok_namespaced_vendor_routable_now_allowed",
|
||||
runtime: "hermes",
|
||||
model: "deepseek/deepseek-chat",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
// claude-code bare GLM- BYOK id: zai name-only arm + (?i)^(glm-|…)
|
||||
// matches → DeriveProvider resolves → allowed.
|
||||
name: "claude_code_bare_glm_byok_routable_now_allowed",
|
||||
runtime: "claude-code",
|
||||
model: "GLM-4.6",
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
// Genuinely UNROUTABLE id: no native hermes arm prefix-owns bare
|
||||
// gpt-4o (the platform-shared openai vendor is NOT wired into hermes
|
||||
// — billing guardrail), so DeriveProvider errors → still 422.
|
||||
name: "genuinely_unroutable_still_rejected",
|
||||
runtime: "hermes",
|
||||
model: "gpt-4o",
|
||||
wantOK: false,
|
||||
},
|
||||
{
|
||||
// A namespaced vendor id NOW routable on hermes via the dedicated
|
||||
// byok-openai provider (cp#529 BYOK-vendor arms): routes with the
|
||||
// tenant's OPENAI_API_KEY → BYOK billing, never the platform key.
|
||||
name: "byok_openai_namespaced_routable_now_allowed",
|
||||
runtime: "hermes",
|
||||
model: "openai/gpt-4o",
|
||||
wantOK: true,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
@@ -109,58 +153,58 @@ func TestValidateDerivedProviderInRegistry(t *testing.T) {
|
||||
// provider that IS in the providers list. These are the live corpus
|
||||
// entries; the test pins the registry-consistency invariant.
|
||||
{
|
||||
name: "claude_code_anthropic_api_native",
|
||||
name: "claude_code_anthropic_api_native",
|
||||
runtime: "claude-code",
|
||||
model: "claude-sonnet-4-6",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "claude_code_kimi_coding_native",
|
||||
name: "claude_code_kimi_coding_native",
|
||||
runtime: "claude-code",
|
||||
model: "kimi-for-coding",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "claude_code_minimax_native",
|
||||
name: "claude_code_minimax_native",
|
||||
runtime: "claude-code",
|
||||
model: "MiniMax-M2.7",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "claude_code_platform_namespaced",
|
||||
name: "claude_code_platform_namespaced",
|
||||
runtime: "claude-code",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "codex_openai_subscription_default_arm",
|
||||
name: "codex_openai_subscription_default_arm",
|
||||
runtime: "codex",
|
||||
model: "gpt-5.5",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "codex_platform_namespaced",
|
||||
name: "codex_platform_namespaced",
|
||||
runtime: "codex",
|
||||
model: "openai/gpt-5.4-mini",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "hermes_kimi_coding",
|
||||
name: "hermes_kimi_coding",
|
||||
runtime: "hermes",
|
||||
model: "kimi-coding/kimi-k2",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "hermes_platform_namespaced",
|
||||
name: "hermes_platform_namespaced",
|
||||
runtime: "hermes",
|
||||
model: "moonshot/kimi-k2.6",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "openclaw_kimi_coding",
|
||||
name: "openclaw_kimi_coding",
|
||||
runtime: "openclaw",
|
||||
model: "moonshot:kimi-k2.6",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
// FAIL — model-side validator catches this, but the provider-side
|
||||
// gate is called AFTER it in Create and inherits the fail-open
|
||||
@@ -168,30 +212,30 @@ func TestValidateDerivedProviderInRegistry(t *testing.T) {
|
||||
// errors → allow, letting the model-side response own the message).
|
||||
// This is the deliberate "don't double-reject" decision.
|
||||
{
|
||||
name: "unregistered_model_pass_through_to_model_side",
|
||||
name: "unregistered_model_pass_through_to_model_side",
|
||||
runtime: "claude-code",
|
||||
model: "totally-made-up-model-xyz",
|
||||
wantOK: true, // pass-through: model-side validator owns the rejection
|
||||
wantOK: true, // pass-through: model-side validator owns the rejection
|
||||
},
|
||||
// Federation contract — mirror of the model-side test above.
|
||||
{
|
||||
name: "langgraph_runtime_failopen",
|
||||
name: "langgraph_runtime_failopen",
|
||||
runtime: "langgraph",
|
||||
model: "anything-goes",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
{
|
||||
name: "external_runtime_failopen",
|
||||
name: "external_runtime_failopen",
|
||||
runtime: "external",
|
||||
model: "whatever",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
// Empty model — MODEL_REQUIRED owns it; allow.
|
||||
{
|
||||
name: "empty_model_allowed_other_gate_owns_it",
|
||||
name: "empty_model_allowed_other_gate_owns_it",
|
||||
runtime: "claude-code",
|
||||
model: "",
|
||||
wantOK: true,
|
||||
wantOK: true,
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
|
||||
@@ -161,7 +161,7 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
// 1. Strip plugin's rule/fragment markers from CLAUDE.md (mirrors
|
||||
// AgentskillsAdaptor.uninstall lines 184-188). Best-effort: if
|
||||
// the user edited CLAUDE.md, our marker stays untouched.
|
||||
h.stripPluginMarkersFromMemory(ctx, containerName, pluginName)
|
||||
h.stripPluginMarkersFromMemory(ctx, workspaceID, containerName, pluginName)
|
||||
|
||||
// 2. Remove copied skill dirs declared in the plugin's plugin.yaml.
|
||||
for _, skill := range skillNames {
|
||||
@@ -171,9 +171,11 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
log.Printf("Plugin uninstall: skipping invalid skill name %q in %s: %v", skill, pluginName, err)
|
||||
continue
|
||||
}
|
||||
_, _ = h.execAsRoot(ctx, containerName, []string{
|
||||
if _, rmErr := h.execAsRoot(ctx, containerName, []string{
|
||||
"rm", "-rf", "/configs/skills/" + skill,
|
||||
})
|
||||
}); rmErr != nil {
|
||||
log.Printf("Plugin uninstall: failed to remove skill %s from %s: %v", skill, workspaceID, rmErr)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Delete the plugin directory itself (as root to handle file ownership).
|
||||
|
||||
@@ -393,7 +393,7 @@ func (h *PluginsHandler) readPluginSkillsFromContainer(ctx context.Context, cont
|
||||
// `# Plugin: <name> /` — mirrors AgentskillsAdaptor.uninstall's stripping
|
||||
// logic so install/uninstall are symmetric. Best-effort: silent on read or
|
||||
// write failure, since the rest of uninstall must still succeed.
|
||||
func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, containerName, pluginName string) {
|
||||
func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, workspaceID, containerName, pluginName string) {
|
||||
// Use sed via bash -c for atomic in-place delete: drop the marker line
|
||||
// and the blank line that follows it (install adds a leading blank line
|
||||
// before the marker via append_to_memory). Three sed passes mirror the
|
||||
@@ -417,7 +417,9 @@ func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, conta
|
||||
`awk 'BEGIN{skip=0; blanks=0} /^%s/{skip=1; blanks=0; next} skip==1 && /^[[:space:]]*$/{blanks++; if(blanks>=2){skip=0; print; next} next} /^# Plugin: /{if(skip==1)skip=0} skip==1{next} {print}' /configs/CLAUDE.md > /tmp/claude.new && mv /tmp/claude.new /configs/CLAUDE.md`,
|
||||
regexpEscapeForAwk(marker),
|
||||
)
|
||||
_, _ = h.execAsRoot(ctx, containerName, []string{"bash", "-c", script})
|
||||
if _, awkErr := h.execAsRoot(ctx, containerName, []string{"bash", "-c", script}); awkErr != nil {
|
||||
log.Printf("Plugin uninstall: failed to strip markers from CLAUDE.md for %s in %s: %v", pluginName, workspaceID, awkErr)
|
||||
}
|
||||
}
|
||||
|
||||
// regexpEscapeForAwk escapes characters that have special meaning inside an
|
||||
|
||||
@@ -0,0 +1,331 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/gorilla/websocket"
|
||||
)
|
||||
|
||||
// rfbGreeting is the first frame a real websockify/RFB backend writes on
|
||||
// connect. The fake backend below sends these exact bytes so the positive
|
||||
// test can prove the upstream's first binary frame survives the reverse
|
||||
// proxy chain (the "WS 1006" regression surface from core#2247 was the
|
||||
// upgrade/handshake silently failing before any RFB byte reached the
|
||||
// browser).
|
||||
var rfbGreeting = []byte("RFB 003.008\n")
|
||||
|
||||
// newFakeWebsockifyBackend stands up an httptest.NewServer that upgrades the
|
||||
// websocket, writes the RFB greeting as a binary frame, then echoes every
|
||||
// frame it receives back to the client. No EC2, noVNC, or SSH involved — it
|
||||
// is the stand-in for the on-instance :6080 websockify listener that
|
||||
// realDisplayForward would normally tunnel to.
|
||||
func newFakeWebsockifyBackend(t *testing.T) *httptest.Server {
|
||||
t.Helper()
|
||||
upgrader := websocket.Upgrader{
|
||||
// The proxy rewrites Sec-WebSocket-Protocol to "binary"; accept any
|
||||
// origin/subprotocol so the fake backend never rejects the handshake.
|
||||
CheckOrigin: func(*http.Request) bool { return true },
|
||||
Subprotocols: []string{"binary"},
|
||||
HandshakeTimeout: 5 * time.Second,
|
||||
EnableCompression: false,
|
||||
}
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
conn, err := upgrader.Upgrade(w, r, nil)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer conn.Close()
|
||||
if err := conn.WriteMessage(websocket.BinaryMessage, rfbGreeting); err != nil {
|
||||
return
|
||||
}
|
||||
for {
|
||||
mt, msg, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if err := conn.WriteMessage(mt, msg); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
return srv
|
||||
}
|
||||
|
||||
// wireDisplayForwardToBackend overrides the injectable displayForward package
|
||||
// var so DisplaySession proxies to the fake backend instead of opening an EIC
|
||||
// SSH tunnel. Restored via t.Cleanup. The returned *url.URL is the http://
|
||||
// backend address (the reverse proxy upgrades it to ws:// natively under
|
||||
// Go 1.25's ReverseProxy WebSocket support).
|
||||
func wireDisplayForwardToBackend(t *testing.T, backendURL string) {
|
||||
t.Helper()
|
||||
target, err := url.Parse(backendURL)
|
||||
if err != nil {
|
||||
t.Fatalf("parse backend URL %q: %v", backendURL, err)
|
||||
}
|
||||
prev := displayForward
|
||||
displayForward = func(_ context.Context, _ string, fn func(target *url.URL) error) error {
|
||||
return fn(target)
|
||||
}
|
||||
t.Cleanup(func() { displayForward = prev })
|
||||
}
|
||||
|
||||
// newDisplaySessionTestServer mounts DisplaySession on a gin router behind an
|
||||
// httptest.NewServer so a real websocket client can dial the route end-to-end.
|
||||
// It returns the base ws:// URL for the websockify route.
|
||||
func newDisplaySessionTestServer(t *testing.T, handler *WorkspaceHandler) *httptest.Server {
|
||||
t.Helper()
|
||||
r := gin.New()
|
||||
// Mirror the production registration in internal/router/router.go:
|
||||
// GET /workspaces/:id/display/session/*proxyPath -> wh.DisplaySession
|
||||
r.GET("/workspaces/:id/display/session/*proxyPath", handler.DisplaySession)
|
||||
srv := httptest.NewServer(r)
|
||||
t.Cleanup(srv.Close)
|
||||
return srv
|
||||
}
|
||||
|
||||
const (
|
||||
displayProxyWorkspaceID = "ws-display"
|
||||
displayProxyInstanceID = "i-0fakedeadbeef00001"
|
||||
displayProxyControlledBy = "admin-token"
|
||||
)
|
||||
|
||||
// expectDisplaySessionTargetRow mocks loadWorkspaceDisplaySessionTarget's
|
||||
// workspaces SELECT. mode "desktop-control" + a non-empty instance_id is the
|
||||
// "display enabled, tunnel available" shape. (Note: the compute validator
|
||||
// accepts modes none/desktop-control/gpu-desktop-control and protocols
|
||||
// dcv/novnc — "novnc" is a *protocol*, not a mode, so the enabled rows use
|
||||
// mode=desktop-control,protocol=novnc.)
|
||||
func expectDisplaySessionTargetRow(mock sqlmock.Sqlmock, computeJSON, instanceID string) {
|
||||
mock.ExpectQuery(`SELECT COALESCE\(compute, '\{\}'::jsonb\), COALESCE\(instance_id, ''\) FROM workspaces WHERE id = \$1`).
|
||||
WithArgs(displayProxyWorkspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"compute", "instance_id"}).AddRow(computeJSON, instanceID))
|
||||
}
|
||||
|
||||
// expectActiveDisplayControlRow mocks loadActiveDisplayControl's locks SELECT
|
||||
// returning an active lock owned by controlledBy expiring at expiresAt.
|
||||
func expectActiveDisplayControlRow(mock sqlmock.Sqlmock, controlledBy string, expiresAt time.Time) {
|
||||
mock.ExpectQuery(`SELECT controller, controlled_by, expires_at FROM workspace_display_control_locks WHERE workspace_id = \$1 AND expires_at > now\(\)`).
|
||||
WithArgs(displayProxyWorkspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"controller", "controlled_by", "expires_at"}).
|
||||
AddRow("user", controlledBy, expiresAt))
|
||||
}
|
||||
|
||||
const enabledComputeJSON = `{"display":{"mode":"desktop-control","protocol":"novnc","width":1280,"height":800}}`
|
||||
|
||||
// dialDisplaySession dials the websockify route on the given test server with
|
||||
// the supplied Sec-WebSocket-Protocol values. It returns the conn (nil on
|
||||
// failure), the HTTP response, and the dial error.
|
||||
func dialDisplaySession(t *testing.T, srv *httptest.Server, subprotocols []string) (*websocket.Conn, *http.Response, error) {
|
||||
t.Helper()
|
||||
wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") + "/workspaces/" + displayProxyWorkspaceID + "/display/session/websockify"
|
||||
dialer := websocket.Dialer{
|
||||
HandshakeTimeout: 5 * time.Second,
|
||||
Subprotocols: subprotocols,
|
||||
}
|
||||
return dialer.Dial(wsURL, nil)
|
||||
}
|
||||
|
||||
// TestDisplaySessionProxy_Positive proves the full take-control WS-proxy path
|
||||
// without any network/EC2: a valid signed token + active lock + enabled
|
||||
// display upgrades successfully (HTTP 101), the backend's RFB greeting arrives
|
||||
// through the proxy, and a client->server byte round-trips back (bidirectional
|
||||
// proxy chain). This is the direct regression guard for the "WS 1006" failure
|
||||
// class in core#2247.
|
||||
func TestDisplaySessionProxy_Positive(t *testing.T) {
|
||||
t.Setenv("DISPLAY_SESSION_SIGNING_SECRET", "test-secret")
|
||||
mock := setupTestDB(t)
|
||||
backend := newFakeWebsockifyBackend(t)
|
||||
wireDisplayForwardToBackend(t, backend.URL)
|
||||
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
srv := newDisplaySessionTestServer(t, handler)
|
||||
|
||||
expiresAt := time.Now().Add(5 * time.Minute)
|
||||
expectDisplaySessionTargetRow(mock, enabledComputeJSON, displayProxyInstanceID)
|
||||
expectActiveDisplayControlRow(mock, displayProxyControlledBy, expiresAt)
|
||||
|
||||
token := signDisplaySessionToken(displayProxyWorkspaceID, displayProxyControlledBy, expiresAt)
|
||||
if token == "" {
|
||||
t.Fatal("signDisplaySessionToken returned empty token")
|
||||
}
|
||||
|
||||
conn, resp, err := dialDisplaySession(t, srv, []string{"binary", displaySessionTokenProtocolPrefix + token})
|
||||
if err != nil {
|
||||
body := ""
|
||||
if resp != nil {
|
||||
body = resp.Status
|
||||
}
|
||||
t.Fatalf("websocket dial failed: %v (resp=%s)", err, body)
|
||||
}
|
||||
t.Cleanup(func() { conn.Close() })
|
||||
if resp.StatusCode != http.StatusSwitchingProtocols {
|
||||
t.Fatalf("expected 101 Switching Protocols, got %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// 1. The backend's RFB greeting must arrive through the proxy.
|
||||
conn.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||
mt, msg, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
t.Fatalf("read greeting through proxy failed: %v", err)
|
||||
}
|
||||
if mt != websocket.BinaryMessage || string(msg) != string(rfbGreeting) {
|
||||
t.Fatalf("greeting = %q (type %d), want %q binary", msg, mt, rfbGreeting)
|
||||
}
|
||||
|
||||
// 2. A client->server byte must echo back (bidirectional chain).
|
||||
probe := []byte{0x13, 0x37, 0x00, 0xff}
|
||||
if err := conn.WriteMessage(websocket.BinaryMessage, probe); err != nil {
|
||||
t.Fatalf("write probe through proxy failed: %v", err)
|
||||
}
|
||||
conn.SetReadDeadline(time.Now().Add(5 * time.Second))
|
||||
_, echo, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
t.Fatalf("read echo through proxy failed: %v", err)
|
||||
}
|
||||
if string(echo) != string(probe) {
|
||||
t.Fatalf("echo = %q, want %q", echo, probe)
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDisplaySessionProxy_Rejections is table-driven over the failure surface.
|
||||
// Each case asserts the WS upgrade does NOT happen (dial errors / no 101) and
|
||||
// the right HTTP status is returned, WITHOUT ever reaching the fake backend.
|
||||
func TestDisplaySessionProxy_Rejections(t *testing.T) {
|
||||
t.Setenv("DISPLAY_SESSION_SIGNING_SECRET", "test-secret")
|
||||
pastExpiry := time.Now().Add(-5 * time.Minute)
|
||||
futureExpiry := time.Now().Add(5 * time.Minute)
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
// expect wires the sqlmock rows that the handler will actually read
|
||||
// for this case (the locks SELECT is only reached for token cases).
|
||||
expect func(mock sqlmock.Sqlmock)
|
||||
// subprotocols sent on the dial (token header, if any).
|
||||
subprotocols []string
|
||||
// proxyPath overrides the default "/websockify" route segment.
|
||||
proxyPath string
|
||||
wantStatus int
|
||||
}{
|
||||
{
|
||||
name: "missing token -> 403",
|
||||
expect: func(m sqlmock.Sqlmock) {
|
||||
expectDisplaySessionTargetRow(m, enabledComputeJSON, displayProxyInstanceID)
|
||||
expectActiveDisplayControlRow(m, displayProxyControlledBy, futureExpiry)
|
||||
},
|
||||
subprotocols: []string{"binary"},
|
||||
wantStatus: http.StatusForbidden,
|
||||
},
|
||||
{
|
||||
name: "tampered token -> 403",
|
||||
expect: func(m sqlmock.Sqlmock) {
|
||||
expectDisplaySessionTargetRow(m, enabledComputeJSON, displayProxyInstanceID)
|
||||
expectActiveDisplayControlRow(m, displayProxyControlledBy, futureExpiry)
|
||||
},
|
||||
subprotocols: []string{"binary", displaySessionTokenProtocolPrefix + "garbage.not-a-valid-mac"},
|
||||
wantStatus: http.StatusForbidden,
|
||||
},
|
||||
{
|
||||
name: "expired lock -> 403",
|
||||
expect: func(m sqlmock.Sqlmock) {
|
||||
expectDisplaySessionTargetRow(m, enabledComputeJSON, displayProxyInstanceID)
|
||||
// Active-lock query filters expires_at > now(), so an
|
||||
// expired lock returns no rows -> found=false -> 403.
|
||||
m.ExpectQuery(`SELECT controller, controlled_by, expires_at FROM workspace_display_control_locks WHERE workspace_id = \$1 AND expires_at > now\(\)`).
|
||||
WithArgs(displayProxyWorkspaceID).
|
||||
WillReturnError(sql.ErrNoRows)
|
||||
},
|
||||
// Token signed against the past expiry would also fail validation
|
||||
// even if a stale lock row were returned.
|
||||
subprotocols: []string{"binary", displaySessionTokenProtocolPrefix +
|
||||
signDisplaySessionToken(displayProxyWorkspaceID, displayProxyControlledBy, pastExpiry)},
|
||||
wantStatus: http.StatusForbidden,
|
||||
},
|
||||
{
|
||||
name: "display mode none -> 404",
|
||||
expect: func(m sqlmock.Sqlmock) {
|
||||
expectDisplaySessionTargetRow(m, `{"display":{"mode":"none"}}`, displayProxyInstanceID)
|
||||
},
|
||||
subprotocols: []string{"binary"},
|
||||
wantStatus: http.StatusNotFound,
|
||||
},
|
||||
{
|
||||
name: "empty instance_id -> 503",
|
||||
expect: func(m sqlmock.Sqlmock) {
|
||||
expectDisplaySessionTargetRow(m, enabledComputeJSON, "")
|
||||
},
|
||||
subprotocols: []string{"binary"},
|
||||
wantStatus: http.StatusServiceUnavailable,
|
||||
},
|
||||
{
|
||||
name: "wrong proxyPath -> 404",
|
||||
expect: func(m sqlmock.Sqlmock) {
|
||||
expectDisplaySessionTargetRow(m, enabledComputeJSON, displayProxyInstanceID)
|
||||
},
|
||||
subprotocols: []string{"binary"},
|
||||
proxyPath: "/frames",
|
||||
wantStatus: http.StatusNotFound,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
// A backend that fatals if it is ever reached — proves these
|
||||
// rejections happen strictly before any proxy dial.
|
||||
reached := false
|
||||
backend := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {
|
||||
reached = true
|
||||
}))
|
||||
t.Cleanup(backend.Close)
|
||||
wireDisplayForwardToBackend(t, backend.URL)
|
||||
|
||||
handler := NewWorkspaceHandler(newTestBroadcaster(), nil, "http://localhost:8080", t.TempDir())
|
||||
srv := newDisplaySessionTestServer(t, handler)
|
||||
tc.expect(mock)
|
||||
|
||||
proxyPath := tc.proxyPath
|
||||
if proxyPath == "" {
|
||||
proxyPath = "/websockify"
|
||||
}
|
||||
wsURL := "ws" + strings.TrimPrefix(srv.URL, "http") +
|
||||
"/workspaces/" + displayProxyWorkspaceID + "/display/session" + proxyPath
|
||||
dialer := websocket.Dialer{HandshakeTimeout: 5 * time.Second, Subprotocols: tc.subprotocols}
|
||||
conn, resp, err := dialer.Dial(wsURL, nil)
|
||||
if conn != nil {
|
||||
conn.Close()
|
||||
}
|
||||
if err == nil {
|
||||
t.Fatalf("expected WS upgrade to fail, but dial succeeded")
|
||||
}
|
||||
if resp == nil {
|
||||
t.Fatalf("expected an HTTP response on rejected upgrade, got nil (err=%v)", err)
|
||||
}
|
||||
if resp.StatusCode != tc.wantStatus {
|
||||
t.Fatalf("status = %d, want %d", resp.StatusCode, tc.wantStatus)
|
||||
}
|
||||
if resp.StatusCode == http.StatusSwitchingProtocols {
|
||||
t.Fatalf("upgrade unexpectedly succeeded (101)")
|
||||
}
|
||||
if reached {
|
||||
t.Fatalf("rejection leaked to the upstream backend")
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -99,10 +99,16 @@ func TestDeriveProvider_UnregisteredErrors(t *testing.T) {
|
||||
runtime string
|
||||
model string
|
||||
}{
|
||||
// gpt-* is OpenAI — not in claude-code's native set.
|
||||
// gpt-* is OpenAI — not in claude-code's native set (no openai arm;
|
||||
// the platform-shared openai vendor is never wired into a BYOK runtime).
|
||||
{"claude-code", "gpt-5.5"},
|
||||
// deepseek is a catalog provider but in NO runtime's native set.
|
||||
{"claude-code", "deepseek-v4-pro"},
|
||||
// qwen-* is alibaba — a catalog provider NOT wired into claude-code
|
||||
// (cp#529 wires alibaba only into hermes; claude-code's name-only BYOK
|
||||
// arms are zai/deepseek/xiaomi-mimo). So it stays unregistered here.
|
||||
// (NB: deepseek-* IS now routable on claude-code via the deepseek
|
||||
// name-only arm — see the routability tests — so it is no longer a valid
|
||||
// "unregistered" example; qwen replaces it.)
|
||||
{"claude-code", "qwen-max"},
|
||||
// codex is OpenAI-only — a kimi id is unregistered for it.
|
||||
{"codex", "kimi-for-coding"},
|
||||
// a slug no provider in the manifest matches at all.
|
||||
|
||||
@@ -16,7 +16,7 @@ const SchemaVersion = 1
|
||||
// Fingerprint is a stable content hash of the generated projection (schema
|
||||
// version + provider catalog + runtime native sets). It changes iff the
|
||||
// registry DATA changes (comment-only YAML edits do not churn it).
|
||||
const Fingerprint = "a491f5ff8a17ef59"
|
||||
const Fingerprint = "ec6b93409e7b9cf8"
|
||||
|
||||
// GenProvider is the generated projection of one provider catalog entry —
|
||||
// the subset a downstream consumer needs to derive + display a provider.
|
||||
@@ -51,26 +51,31 @@ var Providers = []GenProvider{
|
||||
{Name: "moonshot", DisplayName: "Moonshot (Kimi)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOONSHOT_API_KEY", "KIMI_API_KEY"}, ModelPrefixMatch: "^moonshot[:/-]", IsPlatform: false, UpstreamVendor: "moonshot"},
|
||||
{Name: "minimax", DisplayName: "MiniMax", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^minimax-m", IsPlatform: false, UpstreamVendor: "minimax"},
|
||||
{Name: "platform", DisplayName: "Platform", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MOLECULE_LLM_USAGE_TOKEN"}, ModelPrefixMatch: "^platform/", IsPlatform: true},
|
||||
{Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^mimo-", IsPlatform: false},
|
||||
{Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^glm-", IsPlatform: false},
|
||||
{Name: "xiaomi-mimo", DisplayName: "Xiaomi MiMo", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^(mimo-|xiaomi[:/])", IsPlatform: false},
|
||||
{Name: "zai", DisplayName: "Z.ai (GLM)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GLM_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "(?i)^(glm-|zai[:/])", IsPlatform: false},
|
||||
{Name: "kimi-coding", DisplayName: "Moonshot Kimi (coding-tuned)", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KIMI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^kimi-", IsPlatform: false},
|
||||
{Name: "deepseek", DisplayName: "DeepSeek", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DEEPSEEK_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^deepseek-", IsPlatform: false},
|
||||
{Name: "deepseek", DisplayName: "DeepSeek", Protocol: "anthropic", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DEEPSEEK_API_KEY", "ANTHROPIC_AUTH_TOKEN", "ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^deepseek[-:/]", IsPlatform: false},
|
||||
{Name: "google", DisplayName: "Google Gemini", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}, ModelPrefixMatch: "^gemini-", IsPlatform: false},
|
||||
{Name: "vertex", DisplayName: "Google Vertex AI (keyless ADC)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GOOGLE_APPLICATION_CREDENTIALS"}, ModelPrefixMatch: "^vertex:", IsPlatform: false},
|
||||
{Name: "alibaba", DisplayName: "Alibaba Qwen (DashScope)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DASHSCOPE_API_KEY", "ALIBABA_API_KEY"}, ModelPrefixMatch: "^qwen-", IsPlatform: false},
|
||||
{Name: "nousresearch", DisplayName: "Nous Research (Hermes)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NOUSRESEARCH_API_KEY"}, ModelPrefixMatch: "^nousresearch/", IsPlatform: false},
|
||||
{Name: "openrouter", DisplayName: "OpenRouter (any model)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENROUTER_API_KEY"}, ModelPrefixMatch: "^openrouter/", IsPlatform: false},
|
||||
{Name: "huggingface", DisplayName: "Hugging Face Inference", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"HUGGINGFACE_API_KEY", "HF_TOKEN"}, ModelPrefixMatch: "^huggingface/", IsPlatform: false},
|
||||
{Name: "ai-gateway", DisplayName: "Vercel AI Gateway", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"AI_GATEWAY_API_KEY"}, ModelPrefixMatch: "^ai-gateway/", IsPlatform: false},
|
||||
{Name: "opencode-zen", DisplayName: "OpenCode Zen", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_ZEN_API_KEY"}, ModelPrefixMatch: "^opencode-zen/", IsPlatform: false},
|
||||
{Name: "opencode-go", DisplayName: "OpenCode Go", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_GO_API_KEY"}, ModelPrefixMatch: "^opencode-go/", IsPlatform: false},
|
||||
{Name: "kilocode", DisplayName: "Kilo Code", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KILOCODE_API_KEY"}, ModelPrefixMatch: "^kilocode/", IsPlatform: false},
|
||||
{Name: "minimax-cn", DisplayName: "MiniMax China", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^minimax-cn/", IsPlatform: false},
|
||||
{Name: "ollama-cloud", DisplayName: "Ollama Cloud", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OLLAMA_CLOUD_API_KEY"}, ModelPrefixMatch: "^ollama-cloud/", IsPlatform: false},
|
||||
{Name: "alibaba", DisplayName: "Alibaba Qwen (DashScope)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"DASHSCOPE_API_KEY", "ALIBABA_API_KEY"}, ModelPrefixMatch: "(?i)^(qwen|alibaba[:/])", IsPlatform: false},
|
||||
{Name: "nousresearch", DisplayName: "Nous Research (Hermes)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NOUSRESEARCH_API_KEY"}, ModelPrefixMatch: "^nousresearch[:/]", IsPlatform: false},
|
||||
{Name: "openrouter", DisplayName: "OpenRouter (any model)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENROUTER_API_KEY"}, ModelPrefixMatch: "^openrouter[:/]", IsPlatform: false},
|
||||
{Name: "huggingface", DisplayName: "Hugging Face Inference", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"HUGGINGFACE_API_KEY", "HF_TOKEN"}, ModelPrefixMatch: "^huggingface[:/]", IsPlatform: false},
|
||||
{Name: "ai-gateway", DisplayName: "Vercel AI Gateway", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"AI_GATEWAY_API_KEY"}, ModelPrefixMatch: "^ai-gateway[:/]", IsPlatform: false},
|
||||
{Name: "opencode-zen", DisplayName: "OpenCode Zen", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_ZEN_API_KEY"}, ModelPrefixMatch: "^opencode-zen[:/]", IsPlatform: false},
|
||||
{Name: "opencode-go", DisplayName: "OpenCode Go", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OPENCODE_GO_API_KEY"}, ModelPrefixMatch: "^opencode-go[:/]", IsPlatform: false},
|
||||
{Name: "kilocode", DisplayName: "Kilo Code", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"KILOCODE_API_KEY"}, ModelPrefixMatch: "^kilocode[:/]", IsPlatform: false},
|
||||
{Name: "minimax-cn", DisplayName: "MiniMax China", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY", "ANTHROPIC_AUTH_TOKEN"}, ModelPrefixMatch: "^minimax-cn[:/]", IsPlatform: false},
|
||||
{Name: "ollama-cloud", DisplayName: "Ollama Cloud", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OLLAMA_CLOUD_API_KEY"}, ModelPrefixMatch: "^ollama-cloud[:/]", IsPlatform: false},
|
||||
{Name: "ollama", DisplayName: "Ollama (self-hosted)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"OLLAMA_HOST"}, ModelPrefixMatch: "^ollama/", IsPlatform: false},
|
||||
{Name: "nvidia", DisplayName: "NVIDIA NIM", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NVIDIA_API_KEY"}, ModelPrefixMatch: "^nvidia/", IsPlatform: false},
|
||||
{Name: "arcee", DisplayName: "Arcee", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ARCEE_API_KEY"}, ModelPrefixMatch: "^arcee/", IsPlatform: false},
|
||||
{Name: "custom", DisplayName: "Custom OpenAI-compat endpoint", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"CUSTOM_API_KEY", "OPENAI_API_KEY"}, ModelPrefixMatch: "^custom/", IsPlatform: false},
|
||||
{Name: "nvidia", DisplayName: "NVIDIA NIM", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"NVIDIA_API_KEY"}, ModelPrefixMatch: "^nvidia[:/]", IsPlatform: false},
|
||||
{Name: "arcee", DisplayName: "Arcee", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"ARCEE_API_KEY"}, ModelPrefixMatch: "^arcee[:/]", IsPlatform: false},
|
||||
{Name: "custom", DisplayName: "Custom OpenAI-compat endpoint", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"CUSTOM_API_KEY", "OPENAI_API_KEY"}, ModelPrefixMatch: "^custom[:/]", IsPlatform: false},
|
||||
{Name: "byok-anthropic", DisplayName: "Anthropic (BYOK)", Protocol: "anthropic", AuthMode: "anthropic_api", AuthEnv: []string{"ANTHROPIC_API_KEY"}, ModelPrefixMatch: "^anthropic/", IsPlatform: false},
|
||||
{Name: "byok-openai", DisplayName: "OpenAI (BYOK)", Protocol: "openai", AuthMode: "anthropic_api", AuthEnv: []string{"OPENAI_API_KEY"}, ModelPrefixMatch: "^openai[:/]", IsPlatform: false},
|
||||
{Name: "byok-gemini", DisplayName: "Google Gemini (BYOK)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GEMINI_API_KEY", "GOOGLE_API_KEY"}, ModelPrefixMatch: "^gemini/", IsPlatform: false},
|
||||
{Name: "byok-minimax", DisplayName: "MiniMax (BYOK)", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"MINIMAX_API_KEY"}, ModelPrefixMatch: "(?i)^(minimax[:/]|codex-minimax-)", IsPlatform: false},
|
||||
{Name: "groq", DisplayName: "Groq", Protocol: "openai", AuthMode: "third_party_anthropic_compat", AuthEnv: []string{"GROQ_API_KEY"}, ModelPrefixMatch: "^groq:", IsPlatform: false},
|
||||
}
|
||||
|
||||
// Runtimes maps each runtime to its native provider+model set, runtime names
|
||||
@@ -82,23 +87,51 @@ var Runtimes = map[string][]GenRuntimeRef{
|
||||
{Name: "kimi-coding", Models: []string{"kimi-for-coding", "kimi-k2.5", "kimi-k2", "moonshot:kimi-k2.6", "moonshot:kimi-k2.5"}},
|
||||
{Name: "minimax", Models: []string{"MiniMax-M2", "MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M3", "minimax:MiniMax-M2", "minimax:MiniMax-M2.7", "minimax:MiniMax-M2.7-highspeed", "minimax:MiniMax-M3"}},
|
||||
{Name: "platform", Models: []string{"anthropic/claude-opus-4-7", "anthropic/claude-sonnet-4-6", "moonshot/kimi-k2.6", "moonshot/kimi-k2.5", "minimax/MiniMax-M2.7", "minimax/MiniMax-M2.7-highspeed", "minimax/MiniMax-M3"}},
|
||||
{Name: "zai", Models: []string{}},
|
||||
{Name: "deepseek", Models: []string{}},
|
||||
{Name: "xiaomi-mimo", Models: []string{}},
|
||||
},
|
||||
"codex": {
|
||||
{Name: "openai-subscription", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
|
||||
{Name: "openai-api", Models: []string{"gpt-5.5", "gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex", "gpt-5.3-codex-spark", "gpt-5.2"}},
|
||||
{Name: "platform", Models: []string{"openai/gpt-5.4", "openai/gpt-5.4-mini"}},
|
||||
{Name: "byok-minimax", Models: []string{}},
|
||||
},
|
||||
"google-adk": {
|
||||
{Name: "platform", Models: []string{"platform:gemini-2.5-pro", "platform:gemini-2.5-flash"}},
|
||||
{Name: "google", Models: []string{"gemini-2.5-pro", "gemini-2.5-flash"}},
|
||||
{Name: "vertex", Models: []string{"vertex:gemini-2.5-pro"}},
|
||||
},
|
||||
"hermes": {
|
||||
{Name: "kimi-coding", Models: []string{"kimi-coding/kimi-k2"}},
|
||||
{Name: "platform", Models: []string{"moonshot/kimi-k2.6", "moonshot/kimi-k2.5"}},
|
||||
{Name: "openrouter", Models: []string{}},
|
||||
{Name: "huggingface", Models: []string{}},
|
||||
{Name: "ai-gateway", Models: []string{}},
|
||||
{Name: "opencode-zen", Models: []string{}},
|
||||
{Name: "opencode-go", Models: []string{}},
|
||||
{Name: "kilocode", Models: []string{}},
|
||||
{Name: "custom", Models: []string{}},
|
||||
{Name: "nvidia", Models: []string{}},
|
||||
{Name: "arcee", Models: []string{}},
|
||||
{Name: "ollama-cloud", Models: []string{}},
|
||||
{Name: "minimax-cn", Models: []string{}},
|
||||
{Name: "nousresearch", Models: []string{}},
|
||||
{Name: "deepseek", Models: []string{}},
|
||||
{Name: "zai", Models: []string{}},
|
||||
{Name: "xiaomi-mimo", Models: []string{}},
|
||||
{Name: "alibaba", Models: []string{}},
|
||||
{Name: "byok-anthropic", Models: []string{}},
|
||||
{Name: "byok-gemini", Models: []string{}},
|
||||
{Name: "byok-openai", Models: []string{}},
|
||||
{Name: "byok-minimax", Models: []string{}},
|
||||
},
|
||||
"openclaw": {
|
||||
{Name: "kimi-coding", Models: []string{"moonshot:kimi-k2.6", "moonshot:kimi-k2.5"}},
|
||||
{Name: "platform", Models: []string{"moonshot/kimi-k2.6", "moonshot/kimi-k2.5"}},
|
||||
{Name: "openrouter", Models: []string{}},
|
||||
{Name: "custom", Models: []string{}},
|
||||
{Name: "byok-openai", Models: []string{}},
|
||||
{Name: "byok-minimax", Models: []string{}},
|
||||
{Name: "groq", Models: []string{}},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -257,9 +257,20 @@ func parseManifest(raw []byte) (*Manifest, error) {
|
||||
return nil, fmt.Errorf("providers: runtime %q references provider %q twice", rt, ref.Name)
|
||||
}
|
||||
refSeen[ref.Name] = struct{}{}
|
||||
if len(ref.Models) == 0 {
|
||||
return nil, fmt.Errorf("providers: runtime %q provider %q has no model ids", rt, ref.Name)
|
||||
}
|
||||
// A NAME-ONLY arm (zero model ids) is permitted (cp#529): it adds
|
||||
// NOTHING to the runtime's platform menu (ModelsForRuntime only
|
||||
// iterates ref.Models, so an empty Models contributes no selectable
|
||||
// id — additive, zero platform-menu change) yet wires the provider
|
||||
// into the runtime's NATIVE prefix-routing set, so a BYOK id the
|
||||
// provider's model_prefix_match matches becomes routable via
|
||||
// DeriveProvider step-4. This is the mechanism the cp#529
|
||||
// routability-aware enforcer keys off: a name-only BYOK arm makes a
|
||||
// passthrough id (openrouter/…, deepseek-…, etc.) resolve to a
|
||||
// concrete provider without ever appearing on the platform menu.
|
||||
// BILLING GUARDRAIL: only CONFIRMED-NON-PLATFORM (BYOK) providers
|
||||
// are wired as name-only arms — never `platform`/anthropic-*/
|
||||
// openai-*/moonshot/minimax/google/vertex — so a name-only arm can
|
||||
// never route a customer model through the platform's key.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -317,7 +317,7 @@ providers:
|
||||
# Adapter prefix "mimo-"; canvas /^mimo-/i. proxy routing TBD (PR-3).
|
||||
# NOTE: canvas has a duplicate "xiaomi" VENDOR_LABELS key aliasing the
|
||||
# same vendor — collapsed into this one entry.
|
||||
model_prefix_match: "^mimo-"
|
||||
model_prefix_match: "(?i)^(mimo-|xiaomi[:/])"
|
||||
model_aliases: []
|
||||
|
||||
# ===========================================================================
|
||||
@@ -334,7 +334,7 @@ providers:
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# Adapter prefix "glm-" (lowercased match catches GLM-4.6); canvas /^GLM-/i.
|
||||
# canvas-only + adapter-only today; proxy routing TBD (PR-3).
|
||||
model_prefix_match: "(?i)^glm-"
|
||||
model_prefix_match: "(?i)^(glm-|zai[:/])"
|
||||
model_aliases: []
|
||||
|
||||
# ===========================================================================
|
||||
@@ -385,7 +385,7 @@ providers:
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# Adapter prefix "deepseek-"; canvas /^deepseek-/i. adapter+canvas only;
|
||||
# proxy routing TBD (PR-3).
|
||||
model_prefix_match: "^deepseek-"
|
||||
model_prefix_match: "^deepseek[-:/]"
|
||||
model_aliases: []
|
||||
|
||||
# ===========================================================================
|
||||
@@ -452,7 +452,7 @@ providers:
|
||||
auth_env: [DASHSCOPE_API_KEY, ALIBABA_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD. canvas /^qwen-/i.
|
||||
model_prefix_match: "^qwen-"
|
||||
model_prefix_match: "(?i)^(qwen|alibaba[:/])"
|
||||
model_aliases: []
|
||||
|
||||
- name: nousresearch
|
||||
@@ -466,7 +466,7 @@ providers:
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD. Slash-prefix id
|
||||
# (e.g. nousresearch/hermes-4-70b).
|
||||
model_prefix_match: "^nousresearch/"
|
||||
model_prefix_match: "^nousresearch[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: openrouter
|
||||
@@ -479,7 +479,7 @@ providers:
|
||||
auth_env: [OPENROUTER_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD. Wildcard: openrouter/<model>.
|
||||
model_prefix_match: "^openrouter/"
|
||||
model_prefix_match: "^openrouter[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: huggingface
|
||||
@@ -492,7 +492,7 @@ providers:
|
||||
auth_env: [HUGGINGFACE_API_KEY, HF_TOKEN]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD. Wildcard: huggingface/<model>.
|
||||
model_prefix_match: "^huggingface/"
|
||||
model_prefix_match: "^huggingface[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: ai-gateway
|
||||
@@ -505,7 +505,7 @@ providers:
|
||||
auth_env: [AI_GATEWAY_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^ai-gateway/"
|
||||
model_prefix_match: "^ai-gateway[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: opencode-zen
|
||||
@@ -518,7 +518,7 @@ providers:
|
||||
auth_env: [OPENCODE_ZEN_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^opencode-zen/"
|
||||
model_prefix_match: "^opencode-zen[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: opencode-go
|
||||
@@ -531,7 +531,7 @@ providers:
|
||||
auth_env: [OPENCODE_GO_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^opencode-go/"
|
||||
model_prefix_match: "^opencode-go[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: kilocode
|
||||
@@ -544,7 +544,7 @@ providers:
|
||||
auth_env: [KILOCODE_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^kilocode/"
|
||||
model_prefix_match: "^kilocode[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: minimax-cn
|
||||
@@ -559,7 +559,7 @@ providers:
|
||||
# canvas-only today; proxy routing TBD. China endpoint sibling of `minimax`
|
||||
# (api.minimaxi.com). Matched only by the explicit slash-prefix so it does
|
||||
# NOT collide with `minimax`'s (?i)^minimax- in the overlap guard.
|
||||
model_prefix_match: "^minimax-cn/"
|
||||
model_prefix_match: "^minimax-cn[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: ollama-cloud
|
||||
@@ -572,7 +572,7 @@ providers:
|
||||
auth_env: [OLLAMA_CLOUD_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^ollama-cloud/"
|
||||
model_prefix_match: "^ollama-cloud[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: ollama
|
||||
@@ -598,7 +598,7 @@ providers:
|
||||
auth_env: [NVIDIA_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^nvidia/"
|
||||
model_prefix_match: "^nvidia[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: arcee
|
||||
@@ -611,7 +611,7 @@ providers:
|
||||
auth_env: [ARCEE_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD.
|
||||
model_prefix_match: "^arcee/"
|
||||
model_prefix_match: "^arcee[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: custom
|
||||
@@ -624,7 +624,109 @@ providers:
|
||||
auth_env: [CUSTOM_API_KEY, OPENAI_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# canvas-only today; proxy routing TBD. Wildcard free-text: custom/<model>.
|
||||
model_prefix_match: "^custom/"
|
||||
model_prefix_match: "^custom[:/]"
|
||||
model_aliases: []
|
||||
|
||||
# ===========================================================================
|
||||
# DEDICATED BYOK-VENDOR providers (cp#529). These exist so the NAMESPACED
|
||||
# BYOK ids the hermes/openclaw/codex templates offer for the SHARED upstream
|
||||
# vendors (anthropic, openai, gemini, minimax, groq) become routable with the
|
||||
# TENANT's OWN vendor key — WITHOUT routing them through the platform-shared
|
||||
# `platform` provider (which would bill the platform's key: a money bug).
|
||||
#
|
||||
# Each is NON-PLATFORM (name != "platform") -> IsPlatform()==false -> BYOK
|
||||
# billing: the workspace env supplies the vendor key, never the platform key.
|
||||
#
|
||||
# COLLISION-FREE BY CONSTRUCTION: every matcher is NAMESPACED (anchored on the
|
||||
# `vendor/` slash form or `vendor:` colon form) so it is DISJOINT from the
|
||||
# platform vendors' BARE matchers (anthropic-api `^claude`, openai-subscription
|
||||
# `^gpt-`, openai-api `^openai-api[:/]`, minimax `(?i)^minimax-m`,
|
||||
# google `^gemini-`, minimax-cn `^minimax-cn[:/]`). DeriveProvider's overlap
|
||||
# guard (no slug may match two native providers) stays green — verified for all
|
||||
# 20 residual ids (cp#529).
|
||||
#
|
||||
# These siblings of the platform/upstream vendor entries point at the SAME
|
||||
# PUBLIC upstream base URLs, but carry NO upstream_vendor (they are BYOK
|
||||
# passthroughs, not proxy upstream targets — the proxy never dials a tenant's
|
||||
# own key) and use the namespaced matchers above instead of the bare proxy
|
||||
# prefixes.
|
||||
# ===========================================================================
|
||||
- name: byok-anthropic
|
||||
display_name: "Anthropic (BYOK)"
|
||||
vendor_logo: "anthropic"
|
||||
protocol: anthropic
|
||||
auth_mode: anthropic_api
|
||||
base_url_template: "https://api.anthropic.com/v1"
|
||||
base_url_anthropic: "https://api.anthropic.com/v1"
|
||||
auth_env: [ANTHROPIC_API_KEY]
|
||||
auth_token_env: ANTHROPIC_API_KEY
|
||||
# Namespaced BYOK form `anthropic/<model>` (hermes). DISJOINT from
|
||||
# anthropic-api's bare `^claude` and anthropic-oauth's alias set.
|
||||
model_prefix_match: "^anthropic/"
|
||||
model_aliases: []
|
||||
|
||||
- name: byok-openai
|
||||
display_name: "OpenAI (BYOK)"
|
||||
vendor_logo: "openai"
|
||||
protocol: openai
|
||||
auth_mode: anthropic_api # openai-protocol; auth is a bearer API key.
|
||||
base_url_template: "https://api.openai.com/v1"
|
||||
base_url_anthropic: null
|
||||
auth_env: [OPENAI_API_KEY]
|
||||
auth_token_env: OPENAI_API_KEY
|
||||
# Namespaced BYOK forms `openai/<model>` (hermes) + `openai:<model>`
|
||||
# (openclaw). DISJOINT from openai-subscription's bare `^gpt-` and
|
||||
# openai-api's `^openai-api[:/]` (the dash after `openai` keeps the two
|
||||
# apart: `openai:` / `openai/` never start with `openai-api`).
|
||||
model_prefix_match: "^openai[:/]"
|
||||
model_aliases: []
|
||||
|
||||
- name: byok-gemini
|
||||
display_name: "Google Gemini (BYOK)"
|
||||
vendor_logo: "google"
|
||||
protocol: openai
|
||||
auth_mode: third_party_anthropic_compat
|
||||
base_url_template: "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||
base_url_anthropic: null
|
||||
auth_env: [GEMINI_API_KEY, GOOGLE_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# Namespaced BYOK form `gemini/<model>` (hermes). DISJOINT from the `google`
|
||||
# vendor's bare `^gemini-` and `vertex`'s `^vertex:`.
|
||||
model_prefix_match: "^gemini/"
|
||||
model_aliases: []
|
||||
|
||||
- name: byok-minimax
|
||||
display_name: "MiniMax (BYOK)"
|
||||
vendor_logo: "minimax"
|
||||
protocol: openai
|
||||
auth_mode: third_party_anthropic_compat
|
||||
base_url_template: "https://api.minimax.io/v1"
|
||||
base_url_anthropic: null
|
||||
auth_env: [MINIMAX_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# Namespaced BYOK forms `minimax:<model>` (openclaw) + `minimax/<model>`
|
||||
# (hermes), PLUS the codex-runtime alias `codex-minimax-m2.7` (the codex
|
||||
# template's `minimax-token-plan` route — same upstream api.minimax.io,
|
||||
# tenant MINIMAX_API_KEY). The `codex-minimax-` leg is NARROWLY anchored so
|
||||
# it resolves that one codex id WITHOUT a broad matcher: it is DISJOINT from
|
||||
# `minimax` (?i)^minimax-m (which needs `minimax-m`, not `codex-`) and from
|
||||
# `minimax-cn` ^minimax-cn[:/]. Verified collision-free for all 20 residual
|
||||
# ids + codex-minimax-m2.7 (cp#529).
|
||||
model_prefix_match: "(?i)^(minimax[:/]|codex-minimax-)"
|
||||
model_aliases: []
|
||||
|
||||
- name: groq
|
||||
display_name: "Groq"
|
||||
vendor_logo: "groq"
|
||||
protocol: openai
|
||||
auth_mode: third_party_anthropic_compat
|
||||
base_url_template: "https://api.groq.com/openai/v1"
|
||||
base_url_anthropic: null
|
||||
auth_env: [GROQ_API_KEY]
|
||||
auth_token_env: ANTHROPIC_AUTH_TOKEN
|
||||
# Namespaced BYOK form `groq:<model>` (openclaw). No other provider matches
|
||||
# the `groq:` prefix.
|
||||
model_prefix_match: "^groq:"
|
||||
model_aliases: []
|
||||
|
||||
# =============================================================================
|
||||
@@ -762,6 +864,16 @@ runtimes:
|
||||
- minimax/MiniMax-M2.7
|
||||
- minimax/MiniMax-M2.7-highspeed
|
||||
- minimax/MiniMax-M3
|
||||
# NAME-ONLY BYOK arms (cp#529): zero model ids → add NOTHING to the
|
||||
# platform menu (ModelsForRuntime), but wire these CONFIRMED-NON-PLATFORM
|
||||
# providers into claude-code's NATIVE prefix-routing set so the bare BYOK
|
||||
# ids the claude-code template offers (GLM-*, deepseek-*, mimo-*) resolve
|
||||
# via DeriveProvider. BILLING-SAFE: zai/deepseek/xiaomi-mimo are tenant-key
|
||||
# (BYOK) providers — never platform-shared — so routing through them bills
|
||||
# the tenant's own key, never the platform's.
|
||||
- name: zai
|
||||
- name: deepseek
|
||||
- name: xiaomi-mimo
|
||||
|
||||
# hermes: native Kimi only (kimi-coding gateway). hermes-agent owns its own
|
||||
# broad provider matrix, but the CTO native matrix for the Molecule
|
||||
@@ -777,6 +889,38 @@ runtimes:
|
||||
models:
|
||||
- moonshot/kimi-k2.6
|
||||
- moonshot/kimi-k2.5
|
||||
# NAME-ONLY BYOK arms (cp#529): zero model ids → no addition to the
|
||||
# platform menu, but wire hermes's CONFIRMED-NON-PLATFORM passthrough +
|
||||
# bare-vendor providers into its NATIVE prefix-routing set so the BYOK
|
||||
# ids the hermes template offers (openrouter/…, huggingface/…, deepseek/…,
|
||||
# zai:…, etc.) resolve via DeriveProvider. ALL tenant-key (BYOK).
|
||||
- name: openrouter
|
||||
- name: huggingface
|
||||
- name: ai-gateway
|
||||
- name: opencode-zen
|
||||
- name: opencode-go
|
||||
- name: kilocode
|
||||
- name: custom
|
||||
- name: nvidia
|
||||
- name: arcee
|
||||
- name: ollama-cloud
|
||||
- name: minimax-cn
|
||||
- name: nousresearch
|
||||
- name: deepseek
|
||||
- name: zai
|
||||
- name: xiaomi-mimo
|
||||
- name: alibaba
|
||||
# DEDICATED BYOK-VENDOR arms (cp#529): the namespaced ids hermes offers for
|
||||
# the SHARED upstream vendors (anthropic/claude-*, gemini/*, openai/*,
|
||||
# minimax/*) NOW resolve to these tenant-key BYOK-vendor providers — NOT
|
||||
# the platform-shared `platform` provider (which would bill the platform's
|
||||
# key). NAME-ONLY (no models) → no platform-menu change, prefix-routing
|
||||
# only, BYOK-billed. This converts the last 12 hermes residual ids from
|
||||
# cp#529 drift to routable.
|
||||
- name: byok-anthropic
|
||||
- name: byok-gemini
|
||||
- name: byok-openai
|
||||
- name: byok-minimax
|
||||
|
||||
# codex: OpenAI — BYOK split across TWO native providers
|
||||
# (openai-subscription + openai-api), mirroring claude-code's anthropic
|
||||
@@ -828,6 +972,14 @@ runtimes:
|
||||
models:
|
||||
- openai/gpt-5.4
|
||||
- openai/gpt-5.4-mini
|
||||
# NAME-ONLY BYOK arm (cp#529): the codex template offers a BYOK MiniMax
|
||||
# token-plan model `codex-minimax-m2.7` (its `minimax-token-plan` provider:
|
||||
# base_url api.minimax.io, tenant MINIMAX_API_KEY, model_id_override
|
||||
# codex-MiniMax-M2.7). It resolves to byok-minimax via the narrowly-anchored
|
||||
# `codex-minimax-` leg of byok-minimax's matcher (same upstream, tenant key)
|
||||
# — NOT a broad matcher. NAME-ONLY → no platform-menu change, BYOK-billed.
|
||||
# Converts the last codex residual id from cp#529 drift to routable.
|
||||
- name: byok-minimax
|
||||
|
||||
# openclaw: native Kimi only. openclaw's moonshot: model prefix + a
|
||||
# KIMI_API_KEY (sk-kimi-*) routes to api.kimi.com/coding (kimi-for-coding),
|
||||
@@ -846,6 +998,21 @@ runtimes:
|
||||
models:
|
||||
- moonshot/kimi-k2.6
|
||||
- moonshot/kimi-k2.5
|
||||
# NAME-ONLY BYOK arms (cp#529): zero model ids → no platform-menu change,
|
||||
# but wire openclaw's CONFIRMED-NON-PLATFORM passthroughs into its NATIVE
|
||||
# prefix-routing set so the BYOK colon/slash ids the openclaw template
|
||||
# offers (openrouter:…, custom:…) resolve via DeriveProvider. BYOK only.
|
||||
- name: openrouter
|
||||
- name: custom
|
||||
# DEDICATED BYOK-VENDOR arms (cp#529): openclaw's default model is
|
||||
# `minimax:MiniMax-M2.7`, plus it offers `openai:*` and `groq:*` BYOK ids.
|
||||
# These NOW resolve to the tenant-key BYOK-vendor providers (NOT the
|
||||
# platform key). NAME-ONLY → prefix-routing only, BYOK-billed. This converts
|
||||
# the last 7 openclaw residual ids from cp#529 drift to routable AND makes
|
||||
# the runtime's DEFAULT model (minimax:MiniMax-M2.7) resolve.
|
||||
- name: byok-openai
|
||||
- name: byok-minimax
|
||||
- name: groq
|
||||
|
||||
|
||||
# google-adk: Gemini via Vertex AI, keyless ADC (Workload Identity
|
||||
@@ -869,9 +1036,3 @@ runtimes:
|
||||
models:
|
||||
- gemini-2.5-pro
|
||||
- gemini-2.5-flash
|
||||
# DEPRECATED transitional: vertex: ids stay registered until templates
|
||||
# move to platform: (superseded by the platform arm above). Remove in a
|
||||
# cleanup once no template references vertex:gemini-*.
|
||||
- name: vertex
|
||||
models:
|
||||
- vertex:gemini-2.5-pro
|
||||
@@ -249,11 +249,17 @@ func TestGoogleADKRuntimeRegistered(t *testing.T) {
|
||||
}
|
||||
|
||||
// TestVertexProviderRegistered locks the keyless Vertex provider variant in the
|
||||
// providers.yaml SSOT. google-adk serves Gemini via Vertex AI with ADC/WIF
|
||||
// (no API key); the registry must model that as a first-class "vertex" provider
|
||||
// (auth_env GOOGLE_APPLICATION_CREDENTIALS, ^vertex: namespace) distinct from
|
||||
// the API-key "google" vendor, and the google-adk runtime must offer both arms.
|
||||
// See project_canvas_runtime_dropdown_ssot_fix.
|
||||
// providers.yaml SSOT. google-adk serves platform-managed Gemini via the LLM
|
||||
// proxy -> Vertex AI with server-side WIF (no on-box key); the registry must
|
||||
// still model the keyless "vertex" provider (auth_env GOOGLE_APPLICATION_CREDENTIALS,
|
||||
// ^vertex: namespace) as a first-class entry distinct from the API-key "google"
|
||||
// vendor, so the proxy can still route/bill any Vertex-upstream request that
|
||||
// carries a `vertex:` id. The TRANSITIONAL `vertex:` arm on the google-adk
|
||||
// RUNTIME (the selectable model set) was removed in cp#514 now that templates
|
||||
// default to `platform:`; the runtime offers only the `platform` + API-key
|
||||
// `google` arms. A saved `vertex:gemini-*` model still RESOLVES harmlessly via
|
||||
// this standalone provider (it is just no longer offered as a new selection).
|
||||
// See project_canvas_runtime_dropdown_ssot_fix + cp#514.
|
||||
func TestVertexProviderRegistered(t *testing.T) {
|
||||
ps, err := Load()
|
||||
if err != nil {
|
||||
@@ -298,20 +304,23 @@ func TestVertexProviderRegistered(t *testing.T) {
|
||||
for _, p := range provs {
|
||||
names[p.Name] = true
|
||||
}
|
||||
if !names["vertex"] {
|
||||
t.Errorf("google-adk runtime should offer the keyless vertex arm; got %v", names)
|
||||
// cp#514: the transitional `vertex` arm was dropped from the google-adk
|
||||
// runtime. The runtime keeps the platform-managed default + the API-key
|
||||
// google arm; the standalone `vertex` PROVIDER (asserted above) survives
|
||||
// for ^vertex: resolution but is no longer a selectable runtime arm.
|
||||
if names["vertex"] {
|
||||
t.Errorf("google-adk runtime should NOT offer the transitional vertex arm (removed cp#514); got %v", names)
|
||||
}
|
||||
if !names["platform"] {
|
||||
t.Errorf("google-adk runtime should keep the platform-managed arm; got %v", names)
|
||||
}
|
||||
if !names["google"] {
|
||||
t.Errorf("google-adk runtime should keep the API-key google arm; got %v", names)
|
||||
}
|
||||
models, _ := m.ModelsForRuntime("google-adk")
|
||||
hasVertexModel := false
|
||||
for _, id := range models {
|
||||
if id == "vertex:gemini-2.5-pro" {
|
||||
hasVertexModel = true
|
||||
t.Errorf("google-adk models should NOT include vertex:gemini-2.5-pro (removed cp#514); got %v", models)
|
||||
}
|
||||
}
|
||||
if !hasVertexModel {
|
||||
t.Errorf("google-adk models should include vertex:gemini-2.5-pro; got %v", models)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,19 +17,39 @@ import (
|
||||
// of its native vendors the proxy can serve — kimi for hermes/openclaw,
|
||||
// openai for codex, anthropic+kimi+minimax for claude-code.
|
||||
//
|
||||
// cp#529 adds NAME-ONLY BYOK arms (zero model ids) to claude-code/hermes/
|
||||
// openclaw: they add NOTHING to the platform menu (ModelsForRuntime) but wire
|
||||
// CONFIRMED-NON-PLATFORM providers into the runtime's NATIVE prefix-routing set
|
||||
// so a matching BYOK id resolves via DeriveProvider. ProvidersForRuntime returns
|
||||
// the full native arm set (menu + name-only), so the expected sets below include
|
||||
// them. The platform-shared/denylist providers are NEVER wired into a BYOK arm.
|
||||
//
|
||||
// claude-code -> anthropic (oauth+api), kimi (kimi-coding), minimax, platform
|
||||
// + BYOK name-only: zai, deepseek, xiaomi-mimo
|
||||
// hermes -> kimi (kimi-coding), platform
|
||||
// codex -> openai (subscription + api), platform
|
||||
// openclaw -> kimi (kimi-coding), platform
|
||||
// + BYOK name-only: openrouter, huggingface, ai-gateway,
|
||||
// opencode-zen, opencode-go, kilocode, custom, nvidia, arcee,
|
||||
// ollama-cloud, minimax-cn, nousresearch, deepseek, zai,
|
||||
// xiaomi-mimo, alibaba
|
||||
// codex -> openai (subscription + api), platform (no BYOK name-only)
|
||||
// openclaw -> kimi (kimi-coding), platform + BYOK name-only: openrouter, custom
|
||||
var runtimeNativeProviders = map[string][]string{
|
||||
"claude-code": {"anthropic-api", "anthropic-oauth", "kimi-coding", "minimax", "platform"},
|
||||
"hermes": {"kimi-coding", "platform"},
|
||||
"claude-code": {"anthropic-api", "anthropic-oauth", "kimi-coding", "minimax", "platform", "zai", "deepseek", "xiaomi-mimo"},
|
||||
"hermes": {"kimi-coding", "platform",
|
||||
"openrouter", "huggingface", "ai-gateway", "opencode-zen", "opencode-go",
|
||||
"kilocode", "custom", "nvidia", "arcee", "ollama-cloud", "minimax-cn",
|
||||
"nousresearch", "deepseek", "zai", "xiaomi-mimo", "alibaba",
|
||||
// cp#529 dedicated BYOK-vendor name-only arms (shared-vendor namespaced ids).
|
||||
"byok-anthropic", "byok-gemini", "byok-openai", "byok-minimax"},
|
||||
// codex's OpenAI BYOK is split across the OAuth subscription arm
|
||||
// (openai-subscription) and the direct-key arm (openai-api), mirroring
|
||||
// claude-code's anthropic oauth+api split; platform openai via the proxy
|
||||
// Responses surface.
|
||||
"codex": {"openai-subscription", "openai-api", "platform"},
|
||||
"openclaw": {"kimi-coding", "platform"},
|
||||
// Responses surface. cp#529 adds the byok-minimax name-only arm so the
|
||||
// template's BYOK MiniMax token-plan id (codex-minimax-m2.7) resolves.
|
||||
"codex": {"openai-subscription", "openai-api", "platform", "byok-minimax"},
|
||||
"openclaw": {"kimi-coding", "platform", "openrouter", "custom",
|
||||
// cp#529 dedicated BYOK-vendor name-only arms (openai:/minimax:/groq:).
|
||||
"byok-openai", "byok-minimax", "groq"},
|
||||
}
|
||||
|
||||
func sortedCopy(in []string) []string {
|
||||
@@ -253,6 +273,56 @@ func TestParseManifest_ValidBaseline(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseManifest_NameOnlyArm proves a NAME-ONLY runtime arm (zero model
|
||||
// ids) is PERMITTED (cp#529) and is additive: it contributes nothing to the
|
||||
// runtime's platform menu (ModelsForRuntime) yet wires the provider into the
|
||||
// runtime's NATIVE prefix-routing set so a matching BYOK id resolves via
|
||||
// DeriveProvider. This is the loader half of the cp#529 routability change.
|
||||
func TestParseManifest_NameOnlyArm(t *testing.T) {
|
||||
const y = `
|
||||
schema_version: 1
|
||||
providers:
|
||||
- name: openai
|
||||
display_name: "OpenAI"
|
||||
protocol: openai
|
||||
auth_mode: anthropic_api
|
||||
auth_env: [OPENAI_API_KEY]
|
||||
model_prefix_match: "^gpt-"
|
||||
- name: openrouter
|
||||
display_name: "OpenRouter"
|
||||
protocol: openai
|
||||
auth_mode: third_party_anthropic_compat
|
||||
auth_env: [OPENROUTER_API_KEY]
|
||||
model_prefix_match: "^openrouter[:/]"
|
||||
runtimes:
|
||||
codex:
|
||||
providers:
|
||||
- name: openai
|
||||
models: [gpt-5.5]
|
||||
- name: openrouter
|
||||
`
|
||||
m, err := parseManifest([]byte(y))
|
||||
if err != nil {
|
||||
t.Fatalf("parseManifest(name-only arm) error = %v; want nil (name-only arms are permitted)", err)
|
||||
}
|
||||
// The name-only arm adds NOTHING to the platform menu.
|
||||
models, err := m.ModelsForRuntime("codex")
|
||||
if err != nil {
|
||||
t.Fatalf("ModelsForRuntime(codex) error = %v", err)
|
||||
}
|
||||
if len(models) != 1 || models[0] != "gpt-5.5" {
|
||||
t.Fatalf("ModelsForRuntime(codex) = %v; want [gpt-5.5] (name-only arm must not add a menu id)", models)
|
||||
}
|
||||
// …yet a BYOK id matching the name-only arm's prefix now ROUTES.
|
||||
p, err := m.DeriveProvider("codex", "openrouter/anthropic/claude-3.5-sonnet", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("DeriveProvider(codex, openrouter/…) error = %v; want it to resolve via the name-only arm", err)
|
||||
}
|
||||
if p.Name != "openrouter" {
|
||||
t.Fatalf("DeriveProvider resolved to %q; want openrouter", p.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseManifest_FailDirection is the load-bearing-guard proof: each case
|
||||
// breaks the manifest in one way and asserts the matching error fires. If a
|
||||
// future edit removes a guard, the corresponding case flips red.
|
||||
@@ -287,19 +357,6 @@ runtimes:
|
||||
`,
|
||||
wantErr: "empty native provider set",
|
||||
},
|
||||
{
|
||||
name: "provider ref with no models",
|
||||
yaml: `
|
||||
schema_version: 1
|
||||
providers:
|
||||
- {name: openai, display_name: "OpenAI", protocol: openai, auth_mode: anthropic_api, auth_env: [OPENAI_API_KEY], model_prefix_match: "^gpt-"}
|
||||
runtimes:
|
||||
codex:
|
||||
providers:
|
||||
- {name: openai, models: []}
|
||||
`,
|
||||
wantErr: "no model ids",
|
||||
},
|
||||
{
|
||||
name: "duplicate provider ref",
|
||||
yaml: `
|
||||
|
||||
@@ -29,7 +29,7 @@ import (
|
||||
// canonicalProvidersYAMLSHA256 is the sha256 of the canonical providers.yaml as
|
||||
// synced from molecule-controlplane. Bumped deliberately on each re-sync (see
|
||||
// file doc). Cross-checked live by the sync-providers-yaml CI workflow.
|
||||
const canonicalProvidersYAMLSHA256 = "021ae082c2bbbbb61c406cae03205ac6b7fff160ae5976cfc64de3de676d02b2"
|
||||
const canonicalProvidersYAMLSHA256 = "846ddef11ec423ebf2e96b5da21bd89129dbc3f0a2d14ac086940e005c079387"
|
||||
|
||||
func TestSyncedYAMLMatchesCanonicalSHA(t *testing.T) {
|
||||
sum := sha256.Sum256(embeddedYAML)
|
||||
|
||||
@@ -0,0 +1,178 @@
|
||||
package registry
|
||||
|
||||
// cp_instance_reconciler.go — authoritative EC2-state reconcile for
|
||||
// SaaS workspaces (core#2261).
|
||||
//
|
||||
// Root cause (core#2247): every existing liveness pass keys off a PROXY
|
||||
// for "is this workspace alive?":
|
||||
//
|
||||
// - StartLivenessMonitor — Redis TTL expiry (agent stopped heartbeating).
|
||||
// - StartHealthSweep (Docker pass) — local Docker daemon (prov != nil only).
|
||||
// - StartHealthSweep (remote pass) — last_heartbeat_at freshness for
|
||||
// runtime='external' rows.
|
||||
// - StartCPOrphanSweeper — status='removed' rows with a stray instance_id.
|
||||
//
|
||||
// A SaaS claude-code workspace whose EC2 was terminated/stopped out from
|
||||
// under us (manual AWS action, spot reclaim, CP-side reap, etc.) falls
|
||||
// through ALL of them: it's not 'removed' (so the orphan sweeper skips
|
||||
// it), it's not runtime='external' (so the heartbeat pass skips it), and
|
||||
// on a pure-SaaS front-door prov == nil so the Docker pass never runs.
|
||||
// The registry kept status='online' pointing at a dead instance forever.
|
||||
//
|
||||
// This sweeper closes that gap with the ONE authoritative check the
|
||||
// others lack: CPProvisioner.IsRunning, which ultimately asks the
|
||||
// control-plane "is this EC2 actually running?" (DescribeInstances-
|
||||
// equivalent). When the answer is a CLEAN "no" it feeds the workspace
|
||||
// into the EXISTING offline/auto-heal machinery (onOffline → status flip
|
||||
// + RestartByID reprovision with the existing volume) — no new healing
|
||||
// path, just real ground truth driving the one we already have.
|
||||
//
|
||||
// Guardrails:
|
||||
// - FAIL-SAFE: IsRunning is (true, err) on any transient DB/transport
|
||||
// error and (false, nil) ONLY when CP genuinely reports the instance
|
||||
// is not running. We act ONLY on (false, nil); any err short-circuits
|
||||
// to "leave it alone" so a CP blip never flips a healthy workspace.
|
||||
// - ONLINE + SaaS ONLY: status='online', instance_id present, and
|
||||
// runtime <> 'external'. Paused/hibernated/removed/provisioning/
|
||||
// awaiting_agent rows are out of scope; external rows are covered by
|
||||
// the remote-heartbeat pass.
|
||||
// - Per-cycle row cap + per-workspace timeout so one slow CP call can't
|
||||
// stall the sweep.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
)
|
||||
|
||||
// InstanceRunningChecker is the narrow dependency the reconciler takes
|
||||
// from the CP provisioner. *provisioner.CPProvisioner satisfies this
|
||||
// naturally; tests inject fakes.
|
||||
//
|
||||
// Contract (load-bearing): IsRunning is FAIL-SAFE — it returns
|
||||
// (true, err) on transient DB/transport errors and (false, nil) ONLY
|
||||
// when CP reports the instance is genuinely not running. The reconciler
|
||||
// flips a workspace offline strictly on (false, nil).
|
||||
type InstanceRunningChecker interface {
|
||||
IsRunning(ctx context.Context, workspaceID string) (bool, error)
|
||||
}
|
||||
|
||||
// CPInstanceReconcileLimit caps the per-cycle row count so a sustained
|
||||
// CP slowdown can't make a single sweep cycle run unbounded. With a 60s
|
||||
// cadence and a per-workspace timeout below, this bounds worst-case
|
||||
// cycle wall-time and lets subsequent cycles drain any backlog.
|
||||
const CPInstanceReconcileLimit = 200
|
||||
|
||||
// cpInstanceCheckTimeout bounds a single IsRunning call so one slow CP
|
||||
// round-trip can't stall the whole sweep. Each workspace gets its own
|
||||
// timeout context derived from the cycle context.
|
||||
const cpInstanceCheckTimeout = 10 * time.Second
|
||||
|
||||
// StartCPInstanceReconciler runs the authoritative EC2-state reconcile
|
||||
// loop until ctx is cancelled. A nil checker makes the loop a no-op
|
||||
// (matches the nil-tolerant pattern of the sibling CP sweeper).
|
||||
//
|
||||
// Caller is expected to gate on `cpProv != nil` (matching how
|
||||
// StartCPOrphanSweeper is gated at the wiring site in cmd/server/main.go)
|
||||
// — passing a nil *CPProvisioner here would also short-circuit, but the
|
||||
// gate at the call site keeps the call shape symmetric across sweepers.
|
||||
//
|
||||
// interval <= 0 falls back to the default 60s cadence so a misconfigured
|
||||
// caller can't spin a zero-duration ticker (which panics).
|
||||
func StartCPInstanceReconciler(ctx context.Context, checker InstanceRunningChecker, onOffline OfflineHandler, interval time.Duration) {
|
||||
if checker == nil {
|
||||
log.Println("cp-instance-reconciler: checker is nil — reconciler disabled")
|
||||
return
|
||||
}
|
||||
if interval <= 0 {
|
||||
interval = 60 * time.Second
|
||||
}
|
||||
log.Printf("cp-instance-reconciler started — reconciling online SaaS workspaces against real EC2 state every %s", interval)
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
// Kick once at boot so a platform restart starts healing immediately
|
||||
// rather than waiting a full interval.
|
||||
reconcileOnce(ctx, checker, onOffline)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
log.Println("cp-instance-reconciler: shutdown")
|
||||
return
|
||||
case <-ticker.C:
|
||||
reconcileOnce(ctx, checker, onOffline)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reconcileOnce executes one reconcile pass. Defensive against db.DB
|
||||
// being nil so a misconfigured boot doesn't panic.
|
||||
//
|
||||
// Scope: online + SaaS-EC2 workspaces only. runtime='external' rows are
|
||||
// excluded (covered by the remote-heartbeat pass); paused/hibernated/
|
||||
// removed/provisioning/awaiting_agent are excluded by the status filter.
|
||||
func reconcileOnce(ctx context.Context, checker InstanceRunningChecker, onOffline OfflineHandler) {
|
||||
if db.DB == nil {
|
||||
return
|
||||
}
|
||||
|
||||
rows, err := db.DB.QueryContext(ctx, `
|
||||
SELECT id::text
|
||||
FROM workspaces
|
||||
WHERE status = 'online'
|
||||
AND instance_id IS NOT NULL
|
||||
AND instance_id != ''
|
||||
AND COALESCE(runtime, '') <> 'external'
|
||||
ORDER BY updated_at DESC
|
||||
LIMIT $1
|
||||
`, CPInstanceReconcileLimit)
|
||||
if err != nil {
|
||||
log.Printf("cp-instance-reconciler: DB query failed: %v", err)
|
||||
return
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var ids []string
|
||||
for rows.Next() {
|
||||
var id string
|
||||
if scanErr := rows.Scan(&id); scanErr != nil {
|
||||
log.Printf("cp-instance-reconciler: row scan failed: %v", scanErr)
|
||||
continue
|
||||
}
|
||||
ids = append(ids, id)
|
||||
}
|
||||
if iterErr := rows.Err(); iterErr != nil {
|
||||
log.Printf("cp-instance-reconciler: rows iteration failed: %v", iterErr)
|
||||
return
|
||||
}
|
||||
|
||||
for _, id := range ids {
|
||||
// Per-workspace timeout so one slow CP round-trip can't stall
|
||||
// the whole sweep.
|
||||
checkCtx, cancel := context.WithTimeout(ctx, cpInstanceCheckTimeout)
|
||||
running, checkErr := checker.IsRunning(checkCtx, id)
|
||||
cancel()
|
||||
|
||||
if checkErr != nil {
|
||||
// FAIL-SAFE: transient DB/transport error (or a no-backend
|
||||
// signal). IsRunning returns (true, err) on these, so never
|
||||
// flip — leave the row online and retry next cycle.
|
||||
log.Printf("cp-instance-reconciler: IsRunning(%s) errored, leaving online (fail-safe): %v", id, checkErr)
|
||||
continue
|
||||
}
|
||||
if running {
|
||||
continue
|
||||
}
|
||||
|
||||
// CLEAN "not running" — CP authoritatively reports the EC2 is
|
||||
// terminated/stopped/absent. Feed it into the existing offline +
|
||||
// auto-heal machinery: onOffline flips the row offline and
|
||||
// triggers RestartByID, which reprovisions with the existing
|
||||
// volume.
|
||||
log.Printf("cp-instance-reconciler: workspace %s is status=online but its EC2 is not running (terminated/stopped) — flipping offline + triggering reprovision", id)
|
||||
if onOffline != nil {
|
||||
onOffline(ctx, id)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,282 @@
|
||||
package registry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
)
|
||||
|
||||
// fakeRunningChecker implements InstanceRunningChecker for the
|
||||
// instance-reconciler tests. Records every IsRunning call so tests can
|
||||
// assert which workspace IDs were probed, and returns a per-id
|
||||
// (running, err) pair so we can model CP's three answers:
|
||||
//
|
||||
// (true, nil) — instance is running.
|
||||
// (false, nil) — CLEAN "not running" (terminated/stopped/absent).
|
||||
// (true, err) — transient DB/transport error (FAIL-SAFE path).
|
||||
type fakeRunningChecker struct {
|
||||
mu sync.Mutex
|
||||
running map[string]bool
|
||||
errs map[string]error
|
||||
calls []string
|
||||
}
|
||||
|
||||
func (f *fakeRunningChecker) IsRunning(_ context.Context, wsID string) (bool, error) {
|
||||
f.mu.Lock()
|
||||
defer f.mu.Unlock()
|
||||
f.calls = append(f.calls, wsID)
|
||||
if err, ok := f.errs[wsID]; ok {
|
||||
// Mirror CPProvisioner.IsRunning: (true, err) on transient errors
|
||||
// so callers stay on the alive path.
|
||||
return true, err
|
||||
}
|
||||
return f.running[wsID], nil
|
||||
}
|
||||
|
||||
// recordingOffline is an OfflineHandler that records the workspace IDs
|
||||
// it was invoked with.
|
||||
type recordingOffline struct {
|
||||
mu sync.Mutex
|
||||
calls []string
|
||||
}
|
||||
|
||||
func (r *recordingOffline) handler() OfflineHandler {
|
||||
return func(_ context.Context, wsID string) {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
r.calls = append(r.calls, wsID)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *recordingOffline) got() []string {
|
||||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
out := make([]string, len(r.calls))
|
||||
copy(out, r.calls)
|
||||
return out
|
||||
}
|
||||
|
||||
// expectReconcileQuery registers the reconciler's SELECT, pinning the
|
||||
// scope-critical predicates: status='online', instance_id present, and
|
||||
// runtime <> 'external'. A future widening that drops any of these (e.g.
|
||||
// sweeping paused rows, or external rows the heartbeat pass owns) fails
|
||||
// every test that uses this helper.
|
||||
func expectReconcileQuery(mock sqlmock.Sqlmock, rows *sqlmock.Rows) {
|
||||
mock.ExpectQuery(`(?s)^\s*SELECT id::text\s+FROM workspaces\s+WHERE status = 'online'\s+AND instance_id IS NOT NULL\s+AND instance_id != ''\s+AND COALESCE\(runtime, ''\) <> 'external'\s+ORDER BY updated_at DESC\s+LIMIT \$1`).
|
||||
WithArgs(CPInstanceReconcileLimit).
|
||||
WillReturnRows(rows)
|
||||
}
|
||||
|
||||
// TestReconcileOnce_NotRunning_FlipsOffline — the core bug (core#2247):
|
||||
// an online SaaS workspace whose EC2 is terminated. CP reports a CLEAN
|
||||
// (false, nil); onOffline MUST be called with that id so the existing
|
||||
// auto-heal (status flip + RestartByID reprovision) kicks in.
|
||||
func TestReconcileOnce_NotRunning_FlipsOffline(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{running: map[string]bool{"ws-dead": false}}
|
||||
off := &recordingOffline{}
|
||||
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}).AddRow("ws-dead"))
|
||||
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if got := off.got(); len(got) != 1 || got[0] != "ws-dead" {
|
||||
t.Fatalf("expected onOffline(ws-dead), got %v", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReconcileOnce_Running_DoesNotFlip — healthy steady state. CP
|
||||
// reports (true, nil); the workspace stays online, onOffline is NOT
|
||||
// called.
|
||||
func TestReconcileOnce_Running_DoesNotFlip(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{running: map[string]bool{"ws-alive": true}}
|
||||
off := &recordingOffline{}
|
||||
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}).AddRow("ws-alive"))
|
||||
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if got := off.got(); len(got) != 0 {
|
||||
t.Fatalf("running workspace must NOT be flipped offline, got %v", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReconcileOnce_TransientError_DoesNotFlip — FAIL-SAFE contract.
|
||||
// IsRunning returns (true, err) on a transient DB/transport blip; the
|
||||
// reconciler MUST NOT flip the workspace offline. This is the guardrail
|
||||
// that stops a CP outage from cascading every healthy workspace through
|
||||
// reprovision.
|
||||
func TestReconcileOnce_TransientError_DoesNotFlip(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{
|
||||
errs: map[string]error{"ws-blip": errors.New("cp provisioner: status: connection reset")},
|
||||
}
|
||||
off := &recordingOffline{}
|
||||
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}).AddRow("ws-blip"))
|
||||
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if got := off.got(); len(got) != 0 {
|
||||
t.Fatalf("fail-safe violated: transient IsRunning error must NOT flip offline, got %v", got)
|
||||
}
|
||||
if calls := checker.calls; len(calls) != 1 || calls[0] != "ws-blip" {
|
||||
t.Fatalf("expected IsRunning(ws-blip), got %v", checker.calls)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReconcileOnce_QueryScopeExcludesExternalAndNonOnline — pins the
|
||||
// SELECT predicate. The regex in expectReconcileQuery requires
|
||||
// status='online' AND runtime <> 'external'; if a future edit widens the
|
||||
// scope to include paused/hibernated/removed rows or external rows (owned
|
||||
// by the heartbeat pass), this query no longer matches and sqlmock fails
|
||||
// the test. With the predicate intact, a DB that has only out-of-scope
|
||||
// rows returns empty → no IsRunning, no flip.
|
||||
func TestReconcileOnce_QueryScopeExcludesExternalAndNonOnline(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{}
|
||||
off := &recordingOffline{}
|
||||
|
||||
// The predicate filters out external + non-online rows server-side,
|
||||
// modelled as the empty result those filters produce.
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}))
|
||||
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if len(checker.calls) != 0 {
|
||||
t.Fatalf("out-of-scope rows must never reach IsRunning, got %v", checker.calls)
|
||||
}
|
||||
if got := off.got(); len(got) != 0 {
|
||||
t.Fatalf("expected no offline flips for out-of-scope rows, got %v", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReconcileOnce_MixedBatch — each row is judged independently: the
|
||||
// dead one flips, the alive one and the transient-error one don't.
|
||||
func TestReconcileOnce_MixedBatch(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{
|
||||
running: map[string]bool{"ws-dead": false, "ws-alive": true},
|
||||
errs: map[string]error{"ws-blip": errors.New("503")},
|
||||
}
|
||||
off := &recordingOffline{}
|
||||
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}).
|
||||
AddRow("ws-dead").
|
||||
AddRow("ws-alive").
|
||||
AddRow("ws-blip"))
|
||||
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if got := off.got(); len(got) != 1 || got[0] != "ws-dead" {
|
||||
t.Fatalf("expected only ws-dead flipped, got %v", got)
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReconcileOnce_QueryError — DB transient failure. Reconcile returns
|
||||
// without panicking and never probes IsRunning or flips anything.
|
||||
func TestReconcileOnce_QueryError(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{}
|
||||
off := &recordingOffline{}
|
||||
|
||||
mock.ExpectQuery(`(?s)^\s*SELECT id::text\s+FROM workspaces`).
|
||||
WithArgs(CPInstanceReconcileLimit).
|
||||
WillReturnError(errors.New("connection refused"))
|
||||
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if len(checker.calls) != 0 || len(off.got()) != 0 {
|
||||
t.Fatalf("query error must short-circuit; calls=%v offline=%v", checker.calls, off.got())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Fatalf("unmet expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestReconcileOnce_NilDB — defensive against db.DB being nil. Must not
|
||||
// panic, must not probe, must not flip.
|
||||
func TestReconcileOnce_NilDB(t *testing.T) {
|
||||
saved := db.DB
|
||||
db.DB = nil
|
||||
t.Cleanup(func() { db.DB = saved })
|
||||
|
||||
checker := &fakeRunningChecker{}
|
||||
off := &recordingOffline{}
|
||||
reconcileOnce(context.Background(), checker, off.handler())
|
||||
|
||||
if len(checker.calls) != 0 || len(off.got()) != 0 {
|
||||
t.Fatalf("nil db.DB must short-circuit; calls=%v offline=%v", checker.calls, off.got())
|
||||
}
|
||||
}
|
||||
|
||||
// TestStartCPInstanceReconciler_NilCheckerDisabled — boot-safety: a SaaS
|
||||
// CP without cpProv configured must not start the loop (immediate return,
|
||||
// no goroutine leak).
|
||||
func TestStartCPInstanceReconciler_NilCheckerDisabled(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
StartCPInstanceReconciler(ctx, nil, nil, 60*time.Second)
|
||||
close(done)
|
||||
}()
|
||||
select {
|
||||
case <-done:
|
||||
// expected — nil checker short-circuits.
|
||||
case <-time.After(500 * time.Millisecond):
|
||||
t.Fatal("StartCPInstanceReconciler(nil) did not return immediately")
|
||||
}
|
||||
}
|
||||
|
||||
// TestStartCPInstanceReconciler_RunsOnceImmediatelyAndExitsOnCancel —
|
||||
// cadence contract: one sweep at boot (so a restart starts healing
|
||||
// immediately), and the loop terminates on ctx cancel.
|
||||
func TestStartCPInstanceReconciler_RunsOnceImmediatelyAndExitsOnCancel(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
checker := &fakeRunningChecker{}
|
||||
off := &recordingOffline{}
|
||||
|
||||
// Boot sweep query. The 60s ticker won't fire inside the test window;
|
||||
// register a second optional expectation so a stray tick can't fail.
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}))
|
||||
expectReconcileQuery(mock, sqlmock.NewRows([]string{"id"}))
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
StartCPInstanceReconciler(ctx, checker, off.handler(), 60*time.Second)
|
||||
close(done)
|
||||
}()
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
cancel()
|
||||
select {
|
||||
case <-done:
|
||||
// expected
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatal("StartCPInstanceReconciler did not exit on ctx cancel")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,558 @@
|
||||
//go:build integration
|
||||
// +build integration
|
||||
|
||||
// scheduler_integration_test.go — REAL Postgres integration tests for the
|
||||
// workspace-server cron scheduler firing loop. Regression coverage for
|
||||
// molecule-core issue #2149 (filed under SOP rule internal#765).
|
||||
//
|
||||
// Run with:
|
||||
//
|
||||
// docker run --rm -d --name pg-integration \
|
||||
// -e POSTGRES_PASSWORD=test -e POSTGRES_DB=molecule \
|
||||
// -p 55432:5432 postgres:15-alpine
|
||||
// sleep 4
|
||||
// # apply every migration up.sql / legacy .sql in lexicographic order
|
||||
// for f in $(ls workspace-server/migrations/*.sql | grep -v '\.down\.sql$' | sort); do \
|
||||
// psql "postgres://postgres:test@localhost:55432/molecule?sslmode=disable" -f "$f"; done
|
||||
// cd workspace-server
|
||||
// INTEGRATION_DB_URL="postgres://postgres:test@localhost:55432/molecule?sslmode=disable" \
|
||||
// go test -tags=integration ./internal/scheduler/ -run '^TestIntegration_'
|
||||
//
|
||||
// CI: .gitea/workflows/handlers-postgres-integration.yml runs these on every
|
||||
// PR/push that touches workspace-server/internal/scheduler/ (the
|
||||
// `handlers-postgres` detect-changes profile was extended to include the
|
||||
// scheduler package + this workflow file).
|
||||
//
|
||||
// Why these are NOT the existing sqlmock unit tests (scheduler_test.go)
|
||||
// --------------------------------------------------------------------
|
||||
// The strict-sqlmock unit tests pin which SQL statements fire — fast, no DB.
|
||||
// But sqlmock CANNOT validate:
|
||||
// - the activity_logs `$3::jsonb` cast (#2026 wedge) — sqlmock never parses
|
||||
// the payload, so an invalid-UTF-8 jsonb body that wedges a real INSERT
|
||||
// looks "green" under mock.ExpectExec(`INSERT INTO activity_logs`).
|
||||
// - the ROW STATE after tick()/fireSchedule run: that last_run_at,
|
||||
// next_run_at, run_count, last_status actually landed on the row.
|
||||
// - sweepPhantomBusy's NOT IN (SELECT … activity_logs) subquery semantics
|
||||
// against real rows — it has no unit test at all (#2149).
|
||||
//
|
||||
// A SQL regression here = a fleet-wide silent cron outage (#85 ran 12h before
|
||||
// detection). These tests boot a real Postgres, insert real rows, run the
|
||||
// production tick()/sweepPhantomBusy, and SELECT the rows back to assert the
|
||||
// observable end state — the gap sqlmock structurally cannot cover.
|
||||
//
|
||||
// Watch-fail intent: each test is written to FAIL on a regression of the
|
||||
// behavior under test (e.g. drop the activity_logs INSERT, drop the
|
||||
// write-back UPDATE, drop the UTF-8 sanitize, or break the phantom-busy
|
||||
// subquery) and to PASS against the current-correct scheduler.go.
|
||||
|
||||
package scheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
mdb "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
_ "github.com/lib/pq"
|
||||
)
|
||||
|
||||
// ── test doubles ──────────────────────────────────────────────────────────
|
||||
|
||||
// recordingProxy is an A2AProxy that records each fire and returns a
|
||||
// configurable response. Used to assert that tick()/fireSchedule actually
|
||||
// reached the A2A boundary for the due schedule.
|
||||
type recordingProxy struct {
|
||||
status int
|
||||
body []byte
|
||||
err error
|
||||
|
||||
fires int
|
||||
lastBody []byte
|
||||
lastCaller string
|
||||
lastLogFlag bool
|
||||
lastWSID string
|
||||
}
|
||||
|
||||
func (p *recordingProxy) ProxyA2ARequest(
|
||||
_ context.Context, workspaceID string, body []byte, callerID string, logActivity bool,
|
||||
) (int, []byte, error) {
|
||||
p.fires++
|
||||
p.lastWSID = workspaceID
|
||||
p.lastBody = body
|
||||
p.lastCaller = callerID
|
||||
p.lastLogFlag = logActivity
|
||||
if p.err != nil {
|
||||
return 0, nil, p.err
|
||||
}
|
||||
return p.status, p.body, nil
|
||||
}
|
||||
|
||||
// ── connection + fixture helpers ──────────────────────────────────────────
|
||||
|
||||
// integrationDB returns the configured integration-test connection or skips
|
||||
// the test if INTEGRATION_DB_URL is unset. Hot-swaps the package-level
|
||||
// mdb.DB so the production scheduler helpers (tick, fireSchedule,
|
||||
// sweepPhantomBusy) operate on this connection; restores it via t.Cleanup.
|
||||
//
|
||||
// NOT SAFE FOR t.Parallel(): the package-global swap races across tests.
|
||||
func integrationDB(t *testing.T) *sql.DB {
|
||||
t.Helper()
|
||||
url := os.Getenv("INTEGRATION_DB_URL")
|
||||
if url == "" {
|
||||
t.Skip("INTEGRATION_DB_URL not set; skipping (local devs: see file header)")
|
||||
}
|
||||
conn, err := sql.Open("postgres", url)
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
if err := conn.PingContext(ctx); err != nil {
|
||||
t.Fatalf("ping: %v", err)
|
||||
}
|
||||
// Clean slate. activity_logs + workspace_schedules cascade off workspaces,
|
||||
// but we DELETE explicitly (and in FK order) so a partial prior run can't
|
||||
// leave orphan rows that perturb the next test's assertions.
|
||||
cctx, ccancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer ccancel()
|
||||
for _, q := range []string{
|
||||
`DELETE FROM activity_logs`,
|
||||
`DELETE FROM workspace_schedules`,
|
||||
`DELETE FROM workspaces`,
|
||||
} {
|
||||
if _, err := conn.ExecContext(cctx, q); err != nil {
|
||||
t.Fatalf("cleanup %q: %v", q, err)
|
||||
}
|
||||
}
|
||||
prev := mdb.DB
|
||||
mdb.DB = conn
|
||||
t.Cleanup(func() {
|
||||
mdb.DB = prev
|
||||
conn.Close()
|
||||
})
|
||||
return conn
|
||||
}
|
||||
|
||||
// insertWorkspace inserts a workspace row and returns its UUID. active is the
|
||||
// initial active_tasks value; status defaults to 'online' (valid workspace_status enum).
|
||||
func insertWorkspace(t *testing.T, conn *sql.DB, name string, active int) string {
|
||||
t.Helper()
|
||||
var id string
|
||||
err := conn.QueryRowContext(context.Background(), `
|
||||
INSERT INTO workspaces (name, status, active_tasks, max_concurrent_tasks)
|
||||
VALUES ($1, 'online', $2, 1)
|
||||
RETURNING id
|
||||
`, name, active).Scan(&id)
|
||||
if err != nil {
|
||||
t.Fatalf("insertWorkspace(%s): %v", name, err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// insertSchedule inserts an enabled workspace_schedules row whose next_run_at
|
||||
// is in the past (so tick() picks it up immediately) and returns its UUID.
|
||||
func insertSchedule(t *testing.T, conn *sql.DB, wsID, name, cronExpr, prompt string) string {
|
||||
t.Helper()
|
||||
var id string
|
||||
err := conn.QueryRowContext(context.Background(), `
|
||||
INSERT INTO workspace_schedules
|
||||
(workspace_id, name, cron_expr, timezone, prompt, enabled, next_run_at, source)
|
||||
VALUES ($1, $2, $3, 'UTC', $4, true, now() - interval '1 minute', 'runtime')
|
||||
RETURNING id
|
||||
`, wsID, name, cronExpr, prompt).Scan(&id)
|
||||
if err != nil {
|
||||
t.Fatalf("insertSchedule(%s): %v", name, err)
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
type scheduleState struct {
|
||||
lastRunAt sql.NullTime
|
||||
nextRunAt sql.NullTime
|
||||
runCount int
|
||||
lastStatus string
|
||||
lastError string
|
||||
}
|
||||
|
||||
func readScheduleState(t *testing.T, conn *sql.DB, id string) scheduleState {
|
||||
t.Helper()
|
||||
var st scheduleState
|
||||
var status, errStr sql.NullString
|
||||
err := conn.QueryRowContext(context.Background(), `
|
||||
SELECT last_run_at, next_run_at, run_count, last_status, last_error
|
||||
FROM workspace_schedules WHERE id = $1
|
||||
`, id).Scan(&st.lastRunAt, &st.nextRunAt, &st.runCount, &status, &errStr)
|
||||
if err != nil {
|
||||
t.Fatalf("readScheduleState(%s): %v", id, err)
|
||||
}
|
||||
st.lastStatus = status.String
|
||||
st.lastError = errStr.String
|
||||
return st
|
||||
}
|
||||
|
||||
// ── TestIntegration_TickFiresAndWritesBack (#2149 core) ───────────────────
|
||||
//
|
||||
// Insert one due schedule, run tick() once, and assert the full firing
|
||||
// loop landed against a REAL Postgres:
|
||||
// - the A2A proxy was invoked exactly once for the schedule's workspace
|
||||
// - the post-fire UPDATE wrote last_run_at (was NULL), advanced next_run_at
|
||||
// into the future, bumped run_count to 1, set last_status='ok'
|
||||
// - a cron_run activity_logs row was inserted with VALID jsonb request_body
|
||||
// (the `$3::jsonb` cast #2026 path) carrying the schedule metadata
|
||||
//
|
||||
// Regression watch-fail: if a refactor drops the write-back UPDATE, the
|
||||
// activity_logs INSERT, or breaks the jsonb cast, this test fails where every
|
||||
// sqlmock unit test stays green.
|
||||
func TestIntegration_TickFiresAndWritesBack(t *testing.T) {
|
||||
conn := integrationDB(t)
|
||||
|
||||
wsID := insertWorkspace(t, conn, "cron-fire-ws", 0)
|
||||
schedID := insertSchedule(t, conn, wsID, "hourly-audit", "0 * * * *", "run the hourly audit")
|
||||
|
||||
proxy := &recordingProxy{
|
||||
status: 200,
|
||||
body: []byte(`{"jsonrpc":"2.0","result":{"kind":"message","parts":[{"kind":"text","text":"done"}]},"id":"1"}`),
|
||||
}
|
||||
s := New(proxy, nil)
|
||||
s.tick(context.Background())
|
||||
|
||||
// 1. A2A boundary reached exactly once for the right workspace.
|
||||
if proxy.fires != 1 {
|
||||
t.Fatalf("proxy fires = %d, want 1 (tick must fire the one due schedule)", proxy.fires)
|
||||
}
|
||||
if proxy.lastWSID != wsID {
|
||||
t.Errorf("proxy fired for workspace %q, want %q", proxy.lastWSID, wsID)
|
||||
}
|
||||
// Empty callerID = canvas-style (bypasses access control); logActivity=true.
|
||||
if proxy.lastCaller != "" {
|
||||
t.Errorf("callerID = %q, want empty (canvas-style scheduler fire)", proxy.lastCaller)
|
||||
}
|
||||
if !proxy.lastLogFlag {
|
||||
t.Error("logActivity flag = false, want true")
|
||||
}
|
||||
|
||||
// 2. Row write-back.
|
||||
st := readScheduleState(t, conn, schedID)
|
||||
if !st.lastRunAt.Valid {
|
||||
t.Error("last_run_at is NULL after fire, want set (write-back UPDATE did not land)")
|
||||
}
|
||||
if !st.nextRunAt.Valid {
|
||||
t.Fatal("next_run_at is NULL after fire, want a future timestamp")
|
||||
}
|
||||
if !st.nextRunAt.Time.After(time.Now()) {
|
||||
t.Errorf("next_run_at = %v, want a time in the future (schedule would tight-loop otherwise)", st.nextRunAt.Time)
|
||||
}
|
||||
if st.runCount != 1 {
|
||||
t.Errorf("run_count = %d, want 1", st.runCount)
|
||||
}
|
||||
if st.lastStatus != "ok" {
|
||||
t.Errorf("last_status = %q, want \"ok\"", st.lastStatus)
|
||||
}
|
||||
|
||||
// 3. activity_logs cron_run row with valid jsonb request_body.
|
||||
var actCount int
|
||||
var summary, status string
|
||||
var reqBody []byte
|
||||
err := conn.QueryRowContext(context.Background(), `
|
||||
SELECT count(*) OVER (), summary, status, request_body
|
||||
FROM activity_logs
|
||||
WHERE workspace_id = $1 AND activity_type = 'cron_run'
|
||||
LIMIT 1
|
||||
`, wsID).Scan(&actCount, &summary, &status, &reqBody)
|
||||
if err == sql.ErrNoRows {
|
||||
t.Fatal("no cron_run activity_logs row inserted after fire (#152/#2026 path missing)")
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("read activity_logs: %v", err)
|
||||
}
|
||||
if actCount != 1 {
|
||||
t.Errorf("cron_run activity_logs rows = %d, want 1", actCount)
|
||||
}
|
||||
if status != "ok" {
|
||||
t.Errorf("activity_logs.status = %q, want \"ok\"", status)
|
||||
}
|
||||
// request_body must be valid jsonb carrying the schedule_id — proves the
|
||||
// `$3::jsonb` cast accepted the payload (the #2026 wedge surface).
|
||||
var sid string
|
||||
if err := conn.QueryRowContext(context.Background(), `
|
||||
SELECT request_body->>'schedule_id'
|
||||
FROM activity_logs WHERE workspace_id = $1 AND activity_type = 'cron_run' LIMIT 1
|
||||
`, wsID).Scan(&sid); err != nil {
|
||||
t.Fatalf("request_body is not queryable jsonb: %v", err)
|
||||
}
|
||||
if sid != schedID {
|
||||
t.Errorf("activity_logs request_body->>'schedule_id' = %q, want %q", sid, schedID)
|
||||
}
|
||||
}
|
||||
|
||||
// ── TestIntegration_InvalidUTF8PromptSanitizedIntoJsonb (#2026 / #2149) ────
|
||||
//
|
||||
// The agent-editable prompt can carry raw invalid-UTF-8 bytes. Postgres jsonb
|
||||
// columns REJECT invalid UTF-8, which (pre-#2026) wedged the activity_logs
|
||||
// INSERT and held the transaction open — stalling the whole scheduler.
|
||||
// fireSchedule now sanitizeUTF8()s every string before the `$3::jsonb` insert.
|
||||
//
|
||||
// Postgres TEXT columns (workspace_schedules.prompt) also reject invalid UTF-8
|
||||
// in a UTF-8 database, so we cannot INSERT the bad bytes through the fixture.
|
||||
// Instead we insert a valid prompt, then call fireSchedule directly with a
|
||||
// scheduleRow whose Prompt field contains the invalid bytes — this simulates
|
||||
// the real regression path (e.g. truncation splitting a multi-byte rune, or
|
||||
// an agent-edited template arriving via a path that bypasses DB validation).
|
||||
//
|
||||
// Assertions:
|
||||
// - the fire still completed (write-back UPDATE landed)
|
||||
// - the cron_run activity_logs row was inserted (the jsonb cast accepted
|
||||
// the SANITIZED payload — the INSERT did not wedge)
|
||||
// - the stored request_body is queryable jsonb (valid UTF-8 on disk)
|
||||
//
|
||||
// Watch-fail: remove the sanitizeUTF8() wrapping around the jsonb payload and
|
||||
// this test fails on a real Postgres (INSERT errors / row absent), while the
|
||||
// sqlmock unit test that only checks "an INSERT fired" stays green.
|
||||
func TestIntegration_InvalidUTF8PromptSanitizedIntoJsonb(t *testing.T) {
|
||||
conn := integrationDB(t)
|
||||
|
||||
wsID := insertWorkspace(t, conn, "utf8-ws", 0)
|
||||
// Insert with valid UTF-8 — Postgres TEXT rejects 0x80/0xff.
|
||||
schedID := insertSchedule(t, conn, wsID, "utf8-job", "0 * * * *", "valid prompt")
|
||||
|
||||
// Prompt with invalid UTF-8: orphan continuation byte + bare 0xff.
|
||||
badPrompt := "audit \x80 report \xff end"
|
||||
row := scheduleRow{
|
||||
ID: schedID,
|
||||
WorkspaceID: wsID,
|
||||
Name: "utf8-job",
|
||||
CronExpr: "0 * * * *",
|
||||
Timezone: "UTC",
|
||||
Prompt: badPrompt,
|
||||
}
|
||||
|
||||
proxy := &recordingProxy{
|
||||
status: 200,
|
||||
body: []byte(`{"result":{"kind":"message","parts":[{"kind":"text","text":"ok"}]}}`),
|
||||
}
|
||||
s := New(proxy, nil)
|
||||
s.fireSchedule(context.Background(), row)
|
||||
|
||||
if proxy.fires != 1 {
|
||||
t.Fatalf("proxy fires = %d, want 1", proxy.fires)
|
||||
}
|
||||
|
||||
// Write-back must have landed despite the bad prompt bytes.
|
||||
st := readScheduleState(t, conn, schedID)
|
||||
if st.runCount != 1 || st.lastStatus != "ok" {
|
||||
t.Errorf("post-fire state run_count=%d last_status=%q, want 1/\"ok\" "+
|
||||
"(invalid-UTF-8 prompt must not block the fire)", st.runCount, st.lastStatus)
|
||||
}
|
||||
|
||||
// The cron_run activity_logs row MUST exist — proving the `$3::jsonb`
|
||||
// INSERT accepted the sanitized payload (did not wedge on invalid UTF-8).
|
||||
var n int
|
||||
if err := conn.QueryRowContext(context.Background(), `
|
||||
SELECT count(*) FROM activity_logs
|
||||
WHERE workspace_id = $1 AND activity_type = 'cron_run'
|
||||
`, wsID).Scan(&n); err != nil {
|
||||
t.Fatalf("count cron_run rows: %v", err)
|
||||
}
|
||||
if n != 1 {
|
||||
t.Fatalf("cron_run activity_logs rows = %d, want 1 — the jsonb INSERT wedged "+
|
||||
"on invalid UTF-8 (the #2026 regression)", n)
|
||||
}
|
||||
|
||||
// The stored prompt inside request_body must be queryable + valid UTF-8.
|
||||
var storedPrompt string
|
||||
if err := conn.QueryRowContext(context.Background(), `
|
||||
SELECT request_body->>'prompt'
|
||||
FROM activity_logs WHERE workspace_id = $1 AND activity_type = 'cron_run' LIMIT 1
|
||||
`, wsID).Scan(&storedPrompt); err != nil {
|
||||
t.Fatalf("request_body->>'prompt' not queryable jsonb: %v", err)
|
||||
}
|
||||
if storedPrompt == "" {
|
||||
t.Error("stored prompt is empty, want the sanitized prompt text")
|
||||
}
|
||||
// Round-trip through Postgres jsonb guarantees valid UTF-8; assert the
|
||||
// replacement character replaced the bad bytes rather than them surviving.
|
||||
for i := 0; i < len(storedPrompt); i++ {
|
||||
if storedPrompt[i] == 0x80 || storedPrompt[i] == 0xff {
|
||||
t.Fatalf("stored prompt still contains raw invalid byte 0x%x at %d", storedPrompt[i], i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── TestIntegration_TickErrorStatusWriteBack (#2149) ──────────────────────
|
||||
//
|
||||
// When the A2A proxy returns a transport error, fireSchedule must still write
|
||||
// back: last_status='error', last_error populated, next_run_at advanced (so
|
||||
// the schedule does not get stuck re-firing), run_count bumped. Verifies the
|
||||
// error path persists to a real row, not just that "an UPDATE fired".
|
||||
func TestIntegration_TickErrorStatusWriteBack(t *testing.T) {
|
||||
conn := integrationDB(t)
|
||||
|
||||
wsID := insertWorkspace(t, conn, "err-ws", 0)
|
||||
schedID := insertSchedule(t, conn, wsID, "err-job", "0 * * * *", "do work")
|
||||
|
||||
proxy := &recordingProxy{err: context.DeadlineExceeded}
|
||||
s := New(proxy, nil)
|
||||
s.tick(context.Background())
|
||||
|
||||
st := readScheduleState(t, conn, schedID)
|
||||
if st.lastStatus != "error" {
|
||||
t.Errorf("last_status = %q, want \"error\"", st.lastStatus)
|
||||
}
|
||||
if st.lastError == "" {
|
||||
t.Error("last_error is empty, want the proxy error text persisted (#152)")
|
||||
}
|
||||
if st.runCount != 1 {
|
||||
t.Errorf("run_count = %d, want 1 (run still counted on error)", st.runCount)
|
||||
}
|
||||
if !st.nextRunAt.Valid || !st.nextRunAt.Time.After(time.Now()) {
|
||||
t.Errorf("next_run_at not advanced to future on error path (= %v) — schedule would tight-loop", st.nextRunAt)
|
||||
}
|
||||
// The error activity_logs row must carry status='error' + error_detail.
|
||||
var status, errDetail string
|
||||
if err := conn.QueryRowContext(context.Background(), `
|
||||
SELECT status, COALESCE(error_detail,'') FROM activity_logs
|
||||
WHERE workspace_id = $1 AND activity_type = 'cron_run' LIMIT 1
|
||||
`, wsID).Scan(&status, &errDetail); err != nil {
|
||||
t.Fatalf("read error activity_logs: %v", err)
|
||||
}
|
||||
if status != "error" {
|
||||
t.Errorf("activity_logs.status = %q, want \"error\"", status)
|
||||
}
|
||||
if errDetail == "" {
|
||||
t.Error("activity_logs.error_detail empty on error fire, want the error message (#152)")
|
||||
}
|
||||
}
|
||||
|
||||
// ── TestIntegration_SweepPhantomBusy (#2149 — no prior test) ──────────────
|
||||
//
|
||||
// sweepPhantomBusy resets active_tasks=0 for workspaces stuck busy with NO
|
||||
// activity_logs row in the last phantomStaleThreshold window, and must LEAVE
|
||||
// ALONE workspaces that have recent activity. The NOT IN (SELECT DISTINCT
|
||||
// workspace_id FROM activity_logs WHERE created_at > now() - interval) subquery
|
||||
// is exactly the kind of set-semantics that sqlmock cannot validate — there is
|
||||
// no unit test for this method at all (#2149).
|
||||
//
|
||||
// Fixture:
|
||||
// - phantomWS: active_tasks=3, NO recent activity_log → must reset to 0
|
||||
// - recentWS: active_tasks=2, activity_log 1 min ago → must stay at 2
|
||||
// - staleWS: active_tasks=1, activity_log 30 min ago → must reset to 0
|
||||
// - removedWS: active_tasks=4, status='removed', no activity → must stay (status guard)
|
||||
// - idleWS: active_tasks=0 → untouched (not >0)
|
||||
//
|
||||
// Watch-fail: break the subquery (e.g. drop the status!='removed' guard, or
|
||||
// invert the NOT IN), and the asserted end-state diverges on a real Postgres.
|
||||
func TestIntegration_SweepPhantomBusy(t *testing.T) {
|
||||
conn := integrationDB(t)
|
||||
|
||||
phantomWS := insertWorkspace(t, conn, "phantom-ws", 3)
|
||||
recentWS := insertWorkspace(t, conn, "recent-ws", 2)
|
||||
staleWS := insertWorkspace(t, conn, "stale-ws", 1)
|
||||
idleWS := insertWorkspace(t, conn, "idle-ws", 0)
|
||||
|
||||
// removedWS: busy but status='removed' — the sweep must skip it.
|
||||
var removedWS string
|
||||
if err := conn.QueryRowContext(context.Background(), `
|
||||
INSERT INTO workspaces (name, status, active_tasks, max_concurrent_tasks)
|
||||
VALUES ('removed-ws', 'removed', 4, 1) RETURNING id
|
||||
`).Scan(&removedWS); err != nil {
|
||||
t.Fatalf("insert removedWS: %v", err)
|
||||
}
|
||||
|
||||
// recentWS has a fresh activity_log (1 min ago → inside the 10-min window).
|
||||
if _, err := conn.ExecContext(context.Background(), `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, status, created_at)
|
||||
VALUES ($1, 'a2a_receive', 'ok', now() - interval '1 minute')
|
||||
`, recentWS); err != nil {
|
||||
t.Fatalf("insert recent activity_log: %v", err)
|
||||
}
|
||||
// staleWS has only an OLD activity_log (30 min ago → outside the window).
|
||||
if _, err := conn.ExecContext(context.Background(), `
|
||||
INSERT INTO activity_logs (workspace_id, activity_type, status, created_at)
|
||||
VALUES ($1, 'a2a_receive', 'ok', now() - interval '30 minutes')
|
||||
`, staleWS); err != nil {
|
||||
t.Fatalf("insert stale activity_log: %v", err)
|
||||
}
|
||||
|
||||
s := New(nil, nil)
|
||||
s.sweepPhantomBusy(context.Background())
|
||||
|
||||
active := func(id string) int {
|
||||
var n int
|
||||
if err := conn.QueryRowContext(context.Background(),
|
||||
`SELECT active_tasks FROM workspaces WHERE id = $1`, id).Scan(&n); err != nil {
|
||||
t.Fatalf("read active_tasks(%s): %v", id, err)
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
if got := active(phantomWS); got != 0 {
|
||||
t.Errorf("phantomWS active_tasks = %d, want 0 (busy + no recent activity → must be swept)", got)
|
||||
}
|
||||
if got := active(staleWS); got != 0 {
|
||||
t.Errorf("staleWS active_tasks = %d, want 0 (only stale activity → must be swept)", got)
|
||||
}
|
||||
if got := active(recentWS); got != 2 {
|
||||
t.Errorf("recentWS active_tasks = %d, want 2 (recent activity → must NOT be swept)", got)
|
||||
}
|
||||
if got := active(removedWS); got != 4 {
|
||||
t.Errorf("removedWS active_tasks = %d, want 4 (status='removed' → sweep must skip it)", got)
|
||||
}
|
||||
if got := active(idleWS); got != 0 {
|
||||
t.Errorf("idleWS active_tasks = %d, want 0 (was never busy)", got)
|
||||
}
|
||||
|
||||
// The swept rows must also have current_task cleared.
|
||||
var ct string
|
||||
if err := conn.QueryRowContext(context.Background(),
|
||||
`SELECT COALESCE(current_task,'') FROM workspaces WHERE id = $1`, phantomWS).Scan(&ct); err != nil {
|
||||
t.Fatalf("read current_task: %v", err)
|
||||
}
|
||||
if ct != "" {
|
||||
t.Errorf("phantomWS current_task = %q, want empty after sweep", ct)
|
||||
}
|
||||
}
|
||||
|
||||
// ── TestIntegration_NativeSchedulerSkipAdvancesNextRunAt (#2149) ──────────
|
||||
//
|
||||
// When a workspace's adapter owns scheduling natively, tick() must SKIP the
|
||||
// fire but still advance next_run_at (so the row doesn't tight-loop on every
|
||||
// poll) — observability (next_run_at) is preserved while the fire is dropped.
|
||||
// Asserts the native-skip UPDATE landed on a real row and the proxy was NOT
|
||||
// invoked. This is the native-skip UPDATE path #2149 calls out — sqlmock can
|
||||
// only assert an UPDATE fired, not that next_run_at moved forward.
|
||||
func TestIntegration_NativeSchedulerSkipAdvancesNextRunAt(t *testing.T) {
|
||||
conn := integrationDB(t)
|
||||
|
||||
wsID := insertWorkspace(t, conn, "native-ws", 0)
|
||||
schedID := insertSchedule(t, conn, wsID, "native-job", "0 * * * *", "native run")
|
||||
|
||||
// Capture the pre-tick next_run_at (it is in the past by construction).
|
||||
before := readScheduleState(t, conn, schedID)
|
||||
if !before.nextRunAt.Valid || before.nextRunAt.Time.After(time.Now()) {
|
||||
t.Fatalf("precondition: next_run_at should start in the past, got %v", before.nextRunAt)
|
||||
}
|
||||
|
||||
proxy := &recordingProxy{status: 200, body: []byte(`{}`)}
|
||||
s := New(proxy, nil)
|
||||
// Every workspace reports native scheduling → fire must be skipped.
|
||||
s.SetNativeSchedulerCheck(func(string) bool { return true })
|
||||
s.tick(context.Background())
|
||||
|
||||
if proxy.fires != 0 {
|
||||
t.Errorf("proxy fires = %d, want 0 (native-scheduler workspace must NOT fire)", proxy.fires)
|
||||
}
|
||||
|
||||
after := readScheduleState(t, conn, schedID)
|
||||
if !after.nextRunAt.Valid || !after.nextRunAt.Time.After(time.Now()) {
|
||||
t.Errorf("next_run_at = %v, want advanced into the future (native-skip UPDATE must still run)", after.nextRunAt)
|
||||
}
|
||||
// Skip path does NOT bump run_count or write last_run_at (no fire happened).
|
||||
if after.runCount != 0 {
|
||||
t.Errorf("run_count = %d, want 0 (skip must not count as a run)", after.runCount)
|
||||
}
|
||||
if after.lastRunAt.Valid {
|
||||
t.Error("last_run_at set on native-skip, want NULL (no fire occurred)")
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
-- Rollback for 20260604000000_activity_logs_seq.up.sql.
|
||||
-- Drops the feed-ordering index and the monotonic seq column.
|
||||
-- Run manually by an operator via psql; the boot-time runner never applies
|
||||
-- *.down.sql (see RunMigrations in internal/db/postgres.go, issue #211).
|
||||
|
||||
DROP INDEX IF EXISTS idx_activity_ws_created_seq;
|
||||
|
||||
ALTER TABLE activity_logs
|
||||
DROP COLUMN IF EXISTS seq;
|
||||
@@ -0,0 +1,54 @@
|
||||
-- Add a monotonic `seq` tiebreaker to activity_logs to make the poll-mode
|
||||
-- since_id activity feed (#2339) deterministically ordered.
|
||||
--
|
||||
-- ROOT CAUSE this fixes: the feed orders by created_at ASC/DESC with NO
|
||||
-- tiebreaker, and activity_logs.id is a random gen_random_uuid() — there is
|
||||
-- no monotonic column to break ties. Two rows inserted in the same
|
||||
-- microsecond (back-to-back A2A logging) share a created_at and come back in
|
||||
-- arbitrary planner order, so the E2E intermittently sees
|
||||
-- hello-from-e2e-3 before hello-from-e2e-2. Not a flake — a missing
|
||||
-- tiebreaker. (Second, related bug fixed in the handler: the since_id cursor
|
||||
-- filtered `created_at > cursor` strictly, silently dropping a row written in
|
||||
-- the same microsecond as the cursor row. The composite key below lets the
|
||||
-- handler compare the full (created_at, seq) tuple.)
|
||||
--
|
||||
-- `seq` is a GENERATED BY DEFAULT AS IDENTITY BIGINT — a UNIQUE,
|
||||
-- monotonic-once-assigned tiebreaker. Precisely (verified on PostgreSQL
|
||||
-- 16.13, the prod version):
|
||||
-- * Backfill: adding the IDENTITY column to a populated table REWRITES the
|
||||
-- table and assigns `seq` to every EXISTING row during the ALTER, in
|
||||
-- PHYSICAL TABLE-SCAN order (NOT NULL — existing rows do get a value).
|
||||
-- That order is not guaranteed to equal historical insertion order.
|
||||
-- * The identity sequence then advances ABOVE max(seq), so every subsequent
|
||||
-- INSERT that omits `seq` gets a fresh value strictly greater than the
|
||||
-- backfilled max — collision-free with the backfilled rows.
|
||||
-- * GENERATED BY DEFAULT (not ALWAYS) so existing INSERTs that don't name
|
||||
-- `seq` keep working and a caller may still override it if ever needed.
|
||||
--
|
||||
-- What `seq` is NOT, and why that's fine:
|
||||
-- * NOT guaranteed gap-free — rolled-back transactions burn sequence values.
|
||||
-- * NOT a strict commit-order guarantee under concurrency — two concurrent
|
||||
-- INSERTs may commit in the opposite order to the `seq` values they drew.
|
||||
-- Neither property is needed. The feed only requires a TOTAL, STABLE
|
||||
-- tiebreaker so that (created_at, seq) is a deterministic order: for any two
|
||||
-- rows it always sorts them the same way and never ties. `seq` being unique
|
||||
-- and non-null on every row delivers exactly that. Same-created_at rows were
|
||||
-- returned in ARBITRARY order before this migration; afterward they have a
|
||||
-- fixed, repeatable order — strictly better, never worse. New traffic is fully
|
||||
-- deterministic; the backfill makes historical rows deterministic too.
|
||||
--
|
||||
-- Idempotent: ADD COLUMN IF NOT EXISTS + CREATE INDEX IF NOT EXISTS so the
|
||||
-- boot-time runner (and the CI migrate-replay step) can re-apply this safely.
|
||||
|
||||
ALTER TABLE activity_logs
|
||||
ADD COLUMN IF NOT EXISTS seq BIGINT GENERATED BY DEFAULT AS IDENTITY;
|
||||
|
||||
-- Composite index supporting the feed query: WHERE workspace_id = $1
|
||||
-- AND created_at <cmp> $t ORDER BY created_at, seq. The (workspace_id,
|
||||
-- created_at, seq) prefix serves both the ASC cursor path and the DESC recent
|
||||
-- path (Postgres reads the same btree backwards for DESC). This is distinct
|
||||
-- from migration 009's idx_activity_ws_type_time (workspace_id, activity_type,
|
||||
-- created_at) — that one is type-prefixed and can't drive a type-agnostic feed
|
||||
-- scan — and from 048's per-peer source_id/target_id indexes.
|
||||
CREATE INDEX IF NOT EXISTS idx_activity_ws_created_seq
|
||||
ON activity_logs (workspace_id, created_at, seq);
|
||||
Reference in New Issue
Block a user