Compare commits
116 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bf0db08c7c | |||
| e441def8b3 | |||
| 51f83260df | |||
| 2fa68b1f23 | |||
| 1c07d65561 | |||
| c950dcbd6e | |||
| 79e34175c9 | |||
| e5daf96dab | |||
| 4b56cabe24 | |||
| b057994cac | |||
| be1f38b7b5 | |||
| d4be3e383a | |||
| 7fb66f473d | |||
| be387623c6 | |||
| 61d8fdc9ec | |||
| 032befab27 | |||
| 2b78e29138 | |||
| d49a31ff29 | |||
| 1963356317 | |||
| d61d9af761 | |||
| 74c1c4e7dd | |||
| 37942699d3 | |||
| 9707f124c4 | |||
| c57559c05d | |||
| 0c64f1eaf0 | |||
| 90852601cc | |||
| 2f53bbac6c | |||
| 2f5536fd48 | |||
| 757768aee4 | |||
| 8522b4e368 | |||
| a85d4c8f89 | |||
| 1e0507ad9e | |||
| df32264adf | |||
| 426f693053 | |||
| 70001f0dc9 | |||
| 7e313d1c77 | |||
| ee6e8e10a9 | |||
| 09f8527a90 | |||
| 81aa23574c | |||
| 08f77162c4 | |||
| 944652b13c | |||
| e4a336ac57 | |||
| d2ebca553f | |||
| 41409fb2df | |||
| 13d951b32b | |||
| 2013e88909 | |||
| 9bb903c565 | |||
| 9c661f7020 | |||
| ba8ecdeb48 | |||
| 193a959d01 | |||
| 4b3eb5022a | |||
| 1955fdd0e5 | |||
| 7ca572f220 | |||
| ce5101d926 | |||
| 9555a2c258 | |||
| a329c97363 | |||
| 6d2db3d0cc | |||
| 8135ee4c3a | |||
| 99087a41c4 | |||
| a5211f69e4 | |||
| b7294aa729 | |||
| f1558b548d | |||
| 2efeb6a1bb | |||
| 4b3590e3dc | |||
| 64fdfa6e77 | |||
| 797351bbba | |||
| 0130f293c8 | |||
| 264894da89 | |||
| 2171c47cfa | |||
| ca80894ffc | |||
| f78fef4c97 | |||
| ba78894858 | |||
| 2e31f27304 | |||
| 9efd06034c | |||
| d3f93efabf | |||
| 6f56b1fa30 | |||
| d3d108a636 | |||
| 3747fe2f49 | |||
| c3fd113780 | |||
| e2ae5b1854 | |||
| 467c10526b | |||
| 8fb5dbed59 | |||
| be46aabf78 | |||
| 74a3299a53 | |||
| c351adc46d | |||
| bb82e42901 | |||
| 74fd08144d | |||
| 885cf423cc | |||
| 91ee92795b | |||
| 75d3a3102b | |||
| acde83b602 | |||
| d037e24cb0 | |||
| d1c0a66e14 | |||
| 9d23a7ef9f | |||
| 4752a78d21 | |||
| 6f7fa42b9c | |||
| 6884fff0b2 | |||
| 27f8f4dba2 | |||
| d063ecd186 | |||
| 48b6011e17 | |||
| fc6850196b | |||
| cc99d3fff4 | |||
| 8a5c6cf771 | |||
| f0dec49793 | |||
| 10b7f8a99a | |||
| 32e6427483 | |||
| e7968115ba | |||
| f0b6079a82 | |||
| e6da3b29fb | |||
| 968d77fc1a | |||
| bcd8ce1c80 | |||
| 8547a7d845 | |||
| b1178c968d | |||
| 5e4577cfe7 | |||
| 7d1cf6cc35 | |||
| 25f601f8f8 |
+14
-5
@@ -19,13 +19,22 @@ REDIS_URL=redis://localhost:6379
|
||||
# itself to 3000 in canvas/package.json, so sourcing this file before
|
||||
# `npm run dev` won't accidentally make Next.js try to bind 8080.
|
||||
PORT=8080
|
||||
# ---- Admin credential — REQUIRED to close issue #684 (AdminAuth bearer bypass) ----
|
||||
# ---- Admin credential — REQUIRED in EVERY environment (auth is fail-closed) ----
|
||||
# Auth is fail-CLOSED everywhere now (harden/no-fail-open-auth): there is NO
|
||||
# dev-mode escape hatch. AdminAuth / WorkspaceAuth / discovery all require a
|
||||
# real credential. The canvas authenticates by sending this value as a bearer
|
||||
# (it reads NEXT_PUBLIC_ADMIN_TOKEN — set it to the SAME value).
|
||||
# When ADMIN_TOKEN is set, only this value is accepted on /admin/* and /approvals/* routes.
|
||||
# Without it, any valid workspace bearer token can call admin endpoints (backward compat
|
||||
# fallback, still vulnerable). Set this in every environment, rotate when compromised.
|
||||
# Generate: openssl rand -base64 32
|
||||
# (When unset, a fresh install 401s on admin routes and any valid workspace bearer
|
||||
# is the only deprecated fallback once tokens exist — set ADMIN_TOKEN to close #684.)
|
||||
# Generate: openssl rand -base64 32 (scripts/dev-start.sh provisions a fixed dev value)
|
||||
# Store in fly secrets / deployment env — NEVER commit the actual value here.
|
||||
ADMIN_TOKEN=
|
||||
# NEXT_PUBLIC_ADMIN_TOKEN= # Canvas-side mirror of ADMIN_TOKEN. The canvas
|
||||
# bakes this into its bundle and sends it as the
|
||||
# bearer. MUST equal ADMIN_TOKEN (next.config.ts
|
||||
# warns if the pair is half-set). dev-start.sh
|
||||
# exports it for you.
|
||||
SECRETS_ENCRYPTION_KEY= # 32-byte key (raw or base64). Leave empty for plaintext (dev only).
|
||||
CONFIGS_DIR= # Path to workspace-configs-templates/ (auto-discovered if empty)
|
||||
PLUGINS_DIR= # Path to plugins/ directory (default: /plugins in container)
|
||||
@@ -34,7 +43,7 @@ PLUGINS_DIR= # Path to plugins/ directory (default: /plugins i
|
||||
# MOLECULE_MCP_ALLOW_SEND_MESSAGE= # Set to "true" to include send_message_to_user in the MCP bridge tool list (issue #810). Excluded by default to prevent unintended WebSocket pushes from CLI sessions.
|
||||
# MOLECULE_MCP_URL=http://localhost:8080 # Platform URL for opencode MCP config (opencode.json). Same as PLATFORM_URL; separate var so opencode configs can reference it without ambiguity.
|
||||
# WORKSPACE_DIR= # Optional global host path bind-mounted to /workspace in every container. Per-workspace workspace_dir column overrides this; if neither is set each workspace gets an isolated Docker named volume.
|
||||
MOLECULE_ENV=development # Environment label (development/staging/production). Used for log tagging and for the AdminAuth dev-mode escape hatch (lets the Canvas dashboard keep working after the first workspace is created, when ADMIN_TOKEN is unset). SaaS deployments MUST set MOLECULE_ENV=production.
|
||||
MOLECULE_ENV=development # Environment label (development/staging/production). Used for log tagging and for NON-security local-dev conveniences (loopback HTTP bind, relaxed rate-limit bucket). It is NOT an auth lever — auth is fail-closed in every environment. SaaS deployments MUST set MOLECULE_ENV=production.
|
||||
# MOLECULE_ENABLE_TEST_TOKENS= # Set to 1 to expose GET /admin/workspaces/:id/test-token (mints a fresh bearer token for E2E scripts). The route is auto-enabled when MOLECULE_ENV != production; this flag is the explicit override. Leave unset/0 in prod — the route 404s unless enabled.
|
||||
# MOLECULE_ORG_ID= # SaaS only: org UUID set by control plane on tenant machines. When set, workspace provisioning auto-routes through the control plane API instead of Docker.
|
||||
# CP_PROVISION_URL= # Override control plane URL for workspace provisioning (default: https://api.moleculesai.app). Only needed for testing against a non-production control plane.
|
||||
|
||||
@@ -361,15 +361,17 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
|
||||
"""Returns (findings, debug). Empty findings == no drift.
|
||||
|
||||
Raises:
|
||||
ApiError: propagated from the protection fetch only when the
|
||||
failure is likely a transient Gitea outage (5xx).
|
||||
403/404 from the protection endpoint is treated as
|
||||
"cannot determine drift for this branch" — a token-
|
||||
scope issue (missing repo-admin on DRIFT_BOT_TOKEN) or
|
||||
a repo with no protection set should not turn the
|
||||
hourly cron red. The workflow continues to the next
|
||||
branch; no [ci-drift] issue is filed for a branch
|
||||
whose protection cannot be read.
|
||||
ApiError: propagated (fail-closed) on a transient Gitea outage
|
||||
(5xx) AND on a 401/403 auth failure from the protection
|
||||
endpoint. A 401/403 means DRIFT_BOT_TOKEN cannot read
|
||||
branch protections at all — drift is UNVERIFIABLE, so
|
||||
this HARD gate must fail loud rather than green
|
||||
undetected drift (the regression class it exists to
|
||||
catch). An authenticated 404 (branch genuinely has no
|
||||
protection, e.g. staging pre-rollout) is the one
|
||||
tolerated skip: it returns ([], debug) with a loud
|
||||
::warning:: and the workflow continues to the next
|
||||
branch.
|
||||
"""
|
||||
findings: list[str] = []
|
||||
|
||||
@@ -403,17 +405,38 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
|
||||
m = _re.search(r"HTTP (\d{3})", msg)
|
||||
if m:
|
||||
http_status = int(m.group(1))
|
||||
if http_status in (403, 404):
|
||||
# Token lacks scope OR branch has no protection. Cannot
|
||||
# determine drift — skip this branch. Do NOT exit non-zero;
|
||||
# the issue IS the alarm, not a red workflow.
|
||||
# FAIL-CLOSED contract (was fail-open: 403 AND 404 both returned
|
||||
# [] with no signal — fixed). This is a HARD gate (no
|
||||
# continue-on-error → false) running hourly on a PROTECTED context
|
||||
# (schedule/dispatch on main). We split auth-failure from
|
||||
# genuinely-absent:
|
||||
# 401/403 → AUTH FAILURE: the token cannot read branch
|
||||
# protections at all, so drift CANNOT be determined for ANY
|
||||
# branch. Greening the hourly cron here means jobs↔protection
|
||||
# drift goes silently undetected — exactly the regression class
|
||||
# this sentinel exists to catch. Raise so the workflow fails
|
||||
# loud / fails closed.
|
||||
# 404 → authenticated absent resource: this specific branch has
|
||||
# no protection (e.g. `staging` before its protection rollout).
|
||||
# Genuinely nothing to diff against — skip THIS branch with a
|
||||
# loud ::warning::, continue to the next.
|
||||
if http_status in (401, 403):
|
||||
sys.stderr.write(
|
||||
f"::error::GET {protection_path} returned HTTP {http_status} — "
|
||||
f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 "
|
||||
f"requires it for this endpoint) OR branch has no protection "
|
||||
f"configured. Cannot determine drift for {branch}; "
|
||||
f"skipping. Fix: grant repo-admin to mc-drift-bot or "
|
||||
f"configure protection on {branch}.\n"
|
||||
f"::error::GET {protection_path} returned HTTP "
|
||||
f"{http_status} — DRIFT_BOT_TOKEN cannot read branch "
|
||||
f"protections (needs repo-admin scope). AUTH FAILURE: "
|
||||
f"drift CANNOT be determined, so this HARD gate FAILS "
|
||||
f"CLOSED rather than greening undetected drift. Fix: grant "
|
||||
f"repo-admin to mc-drift-bot (org team `drift-bot`, "
|
||||
f"perm=admin) — fix the token, not the lint.\n"
|
||||
)
|
||||
raise
|
||||
if http_status == 404:
|
||||
sys.stderr.write(
|
||||
f"::warning::GET {protection_path} returned HTTP 404 — "
|
||||
f"branch '{branch}' has no protection configured "
|
||||
f"(authenticated absent resource). Skipping drift check for "
|
||||
f"{branch}; if it SHOULD be protected, configure it.\n"
|
||||
)
|
||||
debug = {
|
||||
"branch": branch,
|
||||
@@ -424,7 +447,7 @@ def detect_drift(branch: str) -> tuple[list[str], dict]:
|
||||
"audit_env_checks": sorted(env_set),
|
||||
}
|
||||
return [], debug
|
||||
# 5xx — propagate (transient outage, fail loud per design).
|
||||
# 5xx / other — propagate (transient outage, fail loud per design).
|
||||
raise
|
||||
if not isinstance(protection, dict):
|
||||
sys.stderr.write(
|
||||
|
||||
@@ -1,16 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
"""gitea-merge-queue — conservative serialized merge bot for Gitea.
|
||||
|
||||
Gitea 1.22.6 has auto-merge (`pull_auto_merge`) but no GitHub-style merge
|
||||
Gitea 1.22.6+ has auto-merge (`pull_auto_merge`) but no GitHub-style merge
|
||||
queue. This script provides the missing serialized policy in user space:
|
||||
|
||||
1. Pick the oldest open PR carrying QUEUE_LABEL.
|
||||
2. Refuse to act unless main is green.
|
||||
1. Scan open same-repo PRs that are NOT opted out (auto-discovery, see below),
|
||||
oldest-first, skipping drafts, until an ACTIONABLE one is found. A non-ready
|
||||
candidate (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals,
|
||||
or red required CI) is SKIPPED so it cannot head-of-line block newer ready
|
||||
PRs; the scan continues to the next candidate.
|
||||
2. Refuse to act unless main's BP-required contexts are green.
|
||||
3. Refuse fork PRs; the queue may only mutate same-repo branches.
|
||||
4. If the PR branch does not contain current main, call Gitea's
|
||||
/pulls/{n}/update endpoint and stop. CI must rerun on the updated head.
|
||||
5. If the updated PR head has all required contexts green, merge with the
|
||||
non-bypass merge actor token.
|
||||
5. Merge ONLY when, on the PR's CURRENT head sha:
|
||||
- >= REQUIRED_APPROVALS distinct GENUINE official APPROVED reviews from
|
||||
the recognised reviewer set (not stale, not dismissed, commit_id ==
|
||||
current head), AND
|
||||
- no open official REQUEST_CHANGES on the current head, AND
|
||||
- every BP-required status context is green, AND
|
||||
- the PR is mergeable.
|
||||
|
||||
Authoritative gates (fail-closed):
|
||||
- The REQUIRED status contexts come from BRANCH PROTECTION
|
||||
(`status_check_contexts`), not a hand-maintained env list. If branch
|
||||
protection cannot be enumerated, the queue HOLDS (does not merge blindly).
|
||||
- NON-required reds (qa-review, security-review, sop-tier, sop-checklist
|
||||
when not branch-required, E2E Chat, Staging SaaS, ci-arm64-advisory, any
|
||||
continue-on-error job) MUST NOT block. They are reported, never gating.
|
||||
- `force_merge=true` is used ONLY when the merge is blocked *solely* by
|
||||
missing-but-non-required governance contexts (required are green + genuine
|
||||
approvals present). It is NEVER used to bypass a failing REQUIRED context
|
||||
or missing approvals.
|
||||
|
||||
Auto-discovery (opt-OUT, label-optional):
|
||||
The queue is SELF-SUSTAINING — a ready PR does NOT need a human (or an agent)
|
||||
to add the `merge-queue` label first. When AUTO_DISCOVER is on (default), the
|
||||
queue enumerates ALL open same-repo PRs and considers any that meets the full
|
||||
merge bar (genuine approvals on current head + BP-required green + mergeable +
|
||||
no open REQUEST_CHANGES). The merge bar above is UNCHANGED; auto-discovery only
|
||||
changes WHICH PRs are considered, not whether they are mergeable.
|
||||
|
||||
This deliberately removes the historical dependency on an agent adding the
|
||||
`merge-queue` label — agent Gitea tokens lack `write:issue` (labels are
|
||||
issue-scoped), so they could never self-label and the queue stalled. The label
|
||||
is now OPTIONAL metadata, not a gate.
|
||||
|
||||
SAFETY is preserved as opt-OUT: any PR carrying an opt-out label
|
||||
(OPT_OUT_LABELS — `merge-queue-hold`, `do-not-auto-merge`, `wip`, `draft` by
|
||||
default) is skipped (never auto-considered, never merged). Draft PRs
|
||||
(draft=true STATE) are also skipped; the literal `draft` LABEL is an
|
||||
additional explicit opt-out a human can apply without converting to a draft.
|
||||
A human who wants to keep a PR out of autonomous merging just adds one of
|
||||
those labels. Setting AUTO_DISCOVER=0 restores the legacy opt-IN behaviour
|
||||
(only PRs already carrying QUEUE_LABEL are considered).
|
||||
|
||||
Head-of-line (HOL) safety has two complementary layers:
|
||||
(a) The queue SCANS THROUGH the FIFO candidate list and skips any non-ready
|
||||
PR (REQUEST_CHANGES, mergeable!=True, insufficient genuine approvals, or
|
||||
red required CI) instead of locking on the oldest and waiting, so a PR
|
||||
that can never become ready without human action does not block newer
|
||||
ready PRs.
|
||||
(b) For the candidate the scan acts on, two permanent failure modes HOLD the
|
||||
PR (apply HOLD_LABEL) and let the scan CONTINUE to the next candidate
|
||||
rather than re-selecting the same wedged PR every tick:
|
||||
- a permanent permission/4xx merge error (403/404/405), and
|
||||
- a persistent branch-update conflict (the /update endpoint returns
|
||||
HTTP 409 because the PR branch cannot be merged with main without a
|
||||
manual rebase). A conflict will not self-resolve, so retrying it
|
||||
every tick would HOL-block every ready PR behind it (issue #2352).
|
||||
|
||||
Status-fetch is fail-closed: if the combined status for a sha cannot be
|
||||
fetched, the PR is skipped this tick (never treated as green).
|
||||
|
||||
The script is intentionally one-PR-per-run. Workflow/cron concurrency should
|
||||
serialize invocations so two green PRs cannot merge against the same main.
|
||||
@@ -40,6 +101,33 @@ WATCH_BRANCH = _env("WATCH_BRANCH", default="main")
|
||||
QUEUE_LABEL = _env("QUEUE_LABEL", default="merge-queue")
|
||||
HOLD_LABEL = _env("HOLD_LABEL", default="merge-queue-hold")
|
||||
UPDATE_STYLE = _env("UPDATE_STYLE", default="merge")
|
||||
# Auto-discovery (opt-OUT). When truthy (default), the queue considers ALL open
|
||||
# same-repo PRs that meet the merge bar, not only PRs already carrying
|
||||
# QUEUE_LABEL — so the queue is self-sustaining without any human/agent labeling
|
||||
# (agent tokens lack write:issue and cannot self-label). Set AUTO_DISCOVER=0 to
|
||||
# restore the legacy opt-IN behaviour (QUEUE_LABEL required to be considered).
|
||||
AUTO_DISCOVER = _env("AUTO_DISCOVER", default="1").strip().lower() not in {
|
||||
"0",
|
||||
"false",
|
||||
"no",
|
||||
"off",
|
||||
"",
|
||||
}
|
||||
# Opt-OUT labels. A PR carrying ANY of these is skipped (never auto-considered,
|
||||
# never merged) — the human escape hatch from autonomous merging. HOLD_LABEL is
|
||||
# always included so the existing hold semantics keep working. `do-not-auto-merge`
|
||||
# and `wip` let a human keep a PR out of the auto-merge path without removing it.
|
||||
# `draft` is included as a literal label too: Gitea draft STATE (draft=true) is
|
||||
# already skipped via _issue_is_draft, but a "draft" LABEL is an additional,
|
||||
# explicit opt-out signal a human can apply without converting the PR to a draft.
|
||||
OPT_OUT_LABELS = {
|
||||
name.strip()
|
||||
for name in _env(
|
||||
"OPT_OUT_LABELS",
|
||||
default="do-not-auto-merge,wip,draft",
|
||||
).split(",")
|
||||
if name.strip()
|
||||
} | ({HOLD_LABEL} if HOLD_LABEL else set())
|
||||
REQUIRED_CONTEXTS_RAW = _env(
|
||||
"REQUIRED_CONTEXTS",
|
||||
default=(
|
||||
@@ -57,6 +145,24 @@ PUSH_REQUIRED_CONTEXTS_RAW = _env(
|
||||
default="CI / all-required (push)",
|
||||
)
|
||||
|
||||
# Recognised official-reviewer set. A merge requires this many DISTINCT genuine
|
||||
# approvals (not stale/dismissed, on the current head sha) from accounts in
|
||||
# this set. The set is the real agents-team reviewer roster; founder/CTO-agent
|
||||
# accounts are intentionally excluded so the queue cannot be satisfied by a
|
||||
# human/owner approval alone — it must be a genuine peer review.
|
||||
REVIEWER_SET = {
|
||||
name.strip()
|
||||
for name in _env(
|
||||
"REVIEWER_SET",
|
||||
default="agent-reviewer,agent-researcher,agent-reviewer-cr2",
|
||||
).split(",")
|
||||
if name.strip()
|
||||
}
|
||||
# Default mirrors molecule-core branch protection (required_approvals: 2). The
|
||||
# authoritative value is read from branch protection at runtime; this is only
|
||||
# the fallback when BP does not specify one.
|
||||
REQUIRED_APPROVALS_DEFAULT = int(_env("REQUIRED_APPROVALS", default="2") or "2")
|
||||
|
||||
OWNER, NAME = (REPO.split("/", 1) + [""])[:2] if REPO else ("", "")
|
||||
API = f"https://{GITEA_HOST}/api/v1" if GITEA_HOST else ""
|
||||
|
||||
@@ -67,7 +173,27 @@ class ApiError(RuntimeError):
|
||||
|
||||
class MergePermissionError(ApiError):
|
||||
"""Merge failed with a permanent permission error (403/404/405).
|
||||
The queue should skip this PR and move to the next one."""
|
||||
The queue should HOLD this PR and move to the next one."""
|
||||
|
||||
|
||||
class BranchUpdateConflictError(ApiError):
|
||||
"""Updating the PR branch with the base hit a merge-conflict (HTTP 409).
|
||||
|
||||
A true merge-conflict is NOT transient: the branch cannot be auto-updated
|
||||
until a human/agent rebases it. The queue should HOLD this PR (apply
|
||||
HOLD_LABEL) and advance to the next candidate, exactly like the permission
|
||||
path — otherwise the conflicted PR sits at the queue head and is retried
|
||||
every tick forever, head-of-line-blocking every ready PR behind it.
|
||||
|
||||
NOTE: distinct from mergeable=None, which is Gitea STILL COMPUTING conflict
|
||||
state — that case is handled as a transient WAIT (no hold). This error is
|
||||
only raised on an explicit 409 returned by the /update endpoint."""
|
||||
|
||||
|
||||
class BranchProtectionUnavailable(ApiError):
|
||||
"""Branch protection (the authoritative required-context source) could not
|
||||
be enumerated. The queue must HOLD rather than merge with an unverified
|
||||
required-context set (fail-closed, no fail-open)."""
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
@@ -75,6 +201,20 @@ class MergeDecision:
|
||||
ready: bool
|
||||
action: str
|
||||
reason: str
|
||||
# When ready is True, force indicates the merge is blocked SOLELY by
|
||||
# missing-but-non-required governance contexts (required are green +
|
||||
# genuine approvals present), so force_merge=true is justified to bypass
|
||||
# ONLY those non-required contexts. Defaults False.
|
||||
force: bool = False
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class BranchProtection:
|
||||
"""The subset of branch protection the queue depends on."""
|
||||
|
||||
required_contexts: list[str]
|
||||
required_approvals: int
|
||||
block_on_rejected_reviews: bool
|
||||
|
||||
|
||||
def _require_runtime_env() -> None:
|
||||
@@ -191,6 +331,117 @@ def required_contexts_green(
|
||||
return not missing_or_bad, missing_or_bad
|
||||
|
||||
|
||||
def parse_branch_protection(body: Any) -> BranchProtection:
|
||||
"""Extract the queue-relevant fields from a branch_protections payload.
|
||||
|
||||
Fail-closed: raises BranchProtectionUnavailable when status checks are
|
||||
expected but the required-context list cannot be enumerated. We never fall
|
||||
back to a hand-maintained env list as the authoritative required set —
|
||||
doing so risks merging when a real required context is red/missing.
|
||||
"""
|
||||
if not isinstance(body, dict):
|
||||
raise BranchProtectionUnavailable("branch protection response not an object")
|
||||
enable = bool(body.get("enable_status_check"))
|
||||
contexts_raw = body.get("status_check_contexts")
|
||||
if not enable:
|
||||
# Status checks not enforced by BP at all. With no required contexts
|
||||
# the queue would gate on approvals only — acceptable, but make it
|
||||
# explicit and let the caller decide.
|
||||
contexts: list[str] = []
|
||||
else:
|
||||
if not isinstance(contexts_raw, list):
|
||||
raise BranchProtectionUnavailable(
|
||||
"enable_status_check is true but status_check_contexts is not a list"
|
||||
)
|
||||
contexts = [c for c in contexts_raw if isinstance(c, str) and c.strip()]
|
||||
if not contexts:
|
||||
raise BranchProtectionUnavailable(
|
||||
"enable_status_check is true but status_check_contexts is empty"
|
||||
)
|
||||
approvals = body.get("required_approvals")
|
||||
required_approvals = (
|
||||
int(approvals) if isinstance(approvals, int) else REQUIRED_APPROVALS_DEFAULT
|
||||
)
|
||||
return BranchProtection(
|
||||
required_contexts=contexts,
|
||||
required_approvals=required_approvals,
|
||||
block_on_rejected_reviews=bool(body.get("block_on_rejected_reviews")),
|
||||
)
|
||||
|
||||
|
||||
def get_branch_protection(branch: str) -> BranchProtection:
|
||||
"""Fetch branch protection for `branch`; fail-closed if unavailable."""
|
||||
try:
|
||||
_, body = api("GET", f"/repos/{OWNER}/{NAME}/branch_protections/{branch}")
|
||||
except ApiError as exc:
|
||||
raise BranchProtectionUnavailable(
|
||||
f"could not fetch branch protection for {branch}: {exc}"
|
||||
) from exc
|
||||
return parse_branch_protection(body)
|
||||
|
||||
|
||||
def genuine_approvals(
|
||||
reviews: list[dict],
|
||||
*,
|
||||
head_sha: str,
|
||||
reviewer_set: set[str],
|
||||
) -> tuple[set[str], list[str]]:
|
||||
"""Reduce a PR's reviews to genuine official approvals on the CURRENT head.
|
||||
|
||||
Returns (approvers, request_changes) where:
|
||||
- approvers is the set of distinct logins (in reviewer_set) whose LATEST
|
||||
review on the current head is an official, non-stale, non-dismissed
|
||||
APPROVED, and
|
||||
- request_changes is the list of logins (in reviewer_set) whose latest
|
||||
official review on the current head is REQUEST_CHANGES.
|
||||
|
||||
"Current head" is enforced two ways, because Gitea exposes both signals:
|
||||
a review must be `official` and NOT `stale`/`dismissed`, AND when the
|
||||
review carries a commit_id it must equal head_sha. A review with no
|
||||
commit_id but stale=False/dismissed=False is accepted (older Gitea rows).
|
||||
We take each reviewer's LATEST submission (reviews arrive oldest-first), so
|
||||
a later REQUEST_CHANGES correctly supersedes an earlier APPROVED and vice
|
||||
versa.
|
||||
"""
|
||||
latest_by_user: dict[str, dict] = {}
|
||||
for review in reviews:
|
||||
if not isinstance(review, dict):
|
||||
continue
|
||||
user = (review.get("user") or {}).get("login")
|
||||
if not isinstance(user, str) or user not in reviewer_set:
|
||||
continue
|
||||
state = str(review.get("state") or "").upper()
|
||||
if state not in {"APPROVED", "REQUEST_CHANGES"}:
|
||||
continue # ignore COMMENT/PENDING/DISMISSED-state rows
|
||||
# reviews are returned oldest-first; later entries overwrite → latest wins
|
||||
latest_by_user[user] = review
|
||||
|
||||
approvers: set[str] = set()
|
||||
request_changes: list[str] = []
|
||||
for user, review in latest_by_user.items():
|
||||
if not review.get("official"):
|
||||
continue
|
||||
if review.get("stale") or review.get("dismissed"):
|
||||
continue
|
||||
commit_id = review.get("commit_id")
|
||||
if isinstance(commit_id, str) and commit_id and head_sha:
|
||||
if commit_id != head_sha:
|
||||
continue # review was on a previous head
|
||||
state = str(review.get("state") or "").upper()
|
||||
if state == "APPROVED":
|
||||
approvers.add(user)
|
||||
elif state == "REQUEST_CHANGES":
|
||||
request_changes.append(user)
|
||||
return approvers, request_changes
|
||||
|
||||
|
||||
def get_pull_reviews(pr_number: int) -> list[dict]:
|
||||
_, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/reviews")
|
||||
if not isinstance(body, list):
|
||||
raise ApiError(f"PR #{pr_number} reviews response not list")
|
||||
return body
|
||||
|
||||
|
||||
def label_names(issue: dict) -> set[str]:
|
||||
return {
|
||||
label["name"]
|
||||
@@ -219,6 +470,85 @@ def choose_next_queued_issue(
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def _issue_is_draft(issue: dict) -> bool:
|
||||
"""True if the issue/PR is a draft.
|
||||
|
||||
The /issues listing exposes draft state under the `pull_request` sub-object
|
||||
(`{"draft": true}`); some Gitea versions also surface a top-level `draft`.
|
||||
Either is honoured. Drafts are never auto-considered for merging.
|
||||
"""
|
||||
pr = issue.get("pull_request")
|
||||
if isinstance(pr, dict) and pr.get("draft") is True:
|
||||
return True
|
||||
return issue.get("draft") is True
|
||||
|
||||
|
||||
def choose_candidate_issues(
|
||||
issues: list[dict],
|
||||
*,
|
||||
queue_label: str,
|
||||
opt_out_labels: set[str],
|
||||
auto_discover: bool,
|
||||
) -> list[dict]:
|
||||
"""All open PRs eligible for a merge attempt this tick, oldest-first.
|
||||
|
||||
This is the auto-discovery selector. It does NOT change the merge bar — it
|
||||
only changes WHICH PRs are considered:
|
||||
|
||||
- auto_discover=True (default): every open same-repo PR is a candidate,
|
||||
EXCEPT those carrying an opt-out label or marked draft. The QUEUE_LABEL
|
||||
is optional metadata, not a gate, so a ready PR reaches the queue with no
|
||||
human/agent labeling (the write:issue gap is removed).
|
||||
- auto_discover=False: legacy opt-IN — only PRs carrying queue_label are
|
||||
candidates (still skipping opt-out labels and drafts).
|
||||
|
||||
Opt-out is the safety escape hatch: any opt_out_labels member present skips
|
||||
the PR entirely (never considered, never merged). Ordering is oldest-first
|
||||
(created_at, then number) to preserve the serialized FIFO ordering.
|
||||
|
||||
Returns the FULL ordered list (not just the head) so process_once can SCAN
|
||||
THROUGH non-ready candidates instead of locking on the oldest. A non-ready
|
||||
auto-discovered PR (e.g. one with REQUEST_CHANGES or mergeable=false, which
|
||||
can never become ready without human action) must NOT head-of-line block the
|
||||
newer ready PRs behind it — the readiness check happens per-candidate in
|
||||
process_once, and a `wait` candidate is skipped to the next one.
|
||||
"""
|
||||
candidates = []
|
||||
for issue in issues:
|
||||
if "pull_request" not in issue:
|
||||
continue
|
||||
labels = label_names(issue)
|
||||
if opt_out_labels & labels:
|
||||
continue # opt-out: human kept this PR out of autonomous merging
|
||||
if _issue_is_draft(issue):
|
||||
continue # drafts are never auto-merged
|
||||
if not auto_discover and queue_label not in labels:
|
||||
continue # legacy opt-IN: require the queue label
|
||||
candidates.append(issue)
|
||||
candidates.sort(key=lambda issue: (issue.get("created_at") or "", int(issue["number"])))
|
||||
return candidates
|
||||
|
||||
|
||||
def choose_next_candidate_issue(
|
||||
issues: list[dict],
|
||||
*,
|
||||
queue_label: str,
|
||||
opt_out_labels: set[str],
|
||||
auto_discover: bool,
|
||||
) -> dict | None:
|
||||
"""The oldest eligible candidate, or None. Thin head-of-list wrapper around
|
||||
choose_candidate_issues; retained for callers/tests that only want the head.
|
||||
process_once uses the full list (choose_candidate_issues) so it can scan past
|
||||
non-ready PRs rather than HOL-block on the oldest."""
|
||||
candidates = choose_candidate_issues(
|
||||
issues,
|
||||
queue_label=queue_label,
|
||||
opt_out_labels=opt_out_labels,
|
||||
auto_discover=auto_discover,
|
||||
)
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
def pr_contains_base_sha(commits: list[dict], base_sha: str) -> bool:
|
||||
for commit in commits:
|
||||
sha = commit.get("sha") or commit.get("id")
|
||||
@@ -233,36 +563,87 @@ def pr_has_current_base(pr: dict, commits: list[dict], main_sha: str) -> bool:
|
||||
return pr_contains_base_sha(commits, main_sha)
|
||||
|
||||
|
||||
def _non_required_red_present(
|
||||
latest: dict[str, dict],
|
||||
required_contexts: list[str],
|
||||
) -> bool:
|
||||
"""True if any NON-required context is non-success.
|
||||
|
||||
Such reds are the governance/SOP/advisory checks Gitea may still treat as
|
||||
"missing required context" at merge time even though branch protection does
|
||||
not require them. Their presence is what justifies force_merge=true (we
|
||||
have already verified every REQUIRED context is green and approvals are
|
||||
genuine, so force only bypasses these non-required reds).
|
||||
"""
|
||||
required = set(required_contexts)
|
||||
for context, status in latest.items():
|
||||
if context in required:
|
||||
continue
|
||||
if status_state(status) != "success":
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def evaluate_merge_readiness(
|
||||
*,
|
||||
main_status: dict,
|
||||
pr_status: dict,
|
||||
required_contexts: list[str],
|
||||
required_approvals: int,
|
||||
approvers: set[str],
|
||||
request_changes: list[str],
|
||||
pr_has_current_base: bool,
|
||||
mergeable: bool,
|
||||
pr_labels: set[str] | None = None,
|
||||
) -> MergeDecision:
|
||||
# Check push-required contexts explicitly instead of combined state.
|
||||
# Combined state can be "failure" due to non-blocking jobs
|
||||
# (continue-on-error: true) that don't actually gate merges.
|
||||
# CI / all-required (push) is the authoritative gate — it respects
|
||||
# continue-on-error and correctly aggregates all blocking failures.
|
||||
# 1) Main's push-required contexts must be green. Combined state can be
|
||||
# "failure" due to non-blocking jobs (continue-on-error: true) that do
|
||||
# not gate merges, so check the explicit required set, not combined.
|
||||
main_latest = latest_statuses_by_context(main_status.get("statuses") or [])
|
||||
main_ok, main_bad = required_contexts_green(main_latest, push_required_contexts())
|
||||
if not main_ok:
|
||||
return MergeDecision(False, "pause", "main required contexts not green: " + ", ".join(main_bad))
|
||||
|
||||
# 2) PR head must contain current main.
|
||||
if not pr_has_current_base:
|
||||
return MergeDecision(False, "update", "PR head does not contain current main")
|
||||
|
||||
# Check explicit required contexts instead of combined state. Combined state
|
||||
# can be "failure" due to non-blocking jobs with continue-on-error: true
|
||||
# (e.g. publish-runtime-autobump/pr-validate, qa-review on stale tokens).
|
||||
# The required_contexts list is the authoritative gate — it includes only
|
||||
# the checks that actually block merges.
|
||||
# 3) No open official REQUEST_CHANGES on the current head.
|
||||
if request_changes:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
"open REQUEST_CHANGES on current head from: " + ", ".join(sorted(request_changes)),
|
||||
)
|
||||
|
||||
# 4) Enough distinct genuine official approvals on the current head.
|
||||
if len(approvers) < required_approvals:
|
||||
return MergeDecision(
|
||||
False, "wait",
|
||||
f"insufficient genuine approvals on current head: have "
|
||||
f"{len(approvers)} ({', '.join(sorted(approvers)) or 'none'}), "
|
||||
f"need {required_approvals}",
|
||||
)
|
||||
|
||||
# 5) Every BRANCH-PROTECTION-REQUIRED status context must be green. This is
|
||||
# the authoritative status gate — NON-required reds (qa-review,
|
||||
# security-review, sop-tier/sop-checklist when not BP-required, E2E Chat,
|
||||
# Staging SaaS, ci-arm64-advisory, continue-on-error jobs) are NOT
|
||||
# consulted here and must not block.
|
||||
latest = latest_statuses_by_context(pr_status.get("statuses") or [])
|
||||
ok, missing_or_bad = required_contexts_green(latest, required_contexts, pr_labels)
|
||||
if not ok:
|
||||
return MergeDecision(False, "wait", "required contexts not green: " + ", ".join(missing_or_bad))
|
||||
return MergeDecision(True, "merge", "ready")
|
||||
|
||||
# 6) Gitea must consider the PR mergeable (no conflicts).
|
||||
if not mergeable:
|
||||
return MergeDecision(False, "wait", "PR is not mergeable (conflicts)")
|
||||
|
||||
# Ready. Use force_merge ONLY if the merge would otherwise be blocked by
|
||||
# missing-but-non-required governance contexts. Required are green and
|
||||
# approvals are genuine, so force only bypasses non-required reds — never a
|
||||
# failing required context or missing approval.
|
||||
force = _non_required_red_present(latest, required_contexts)
|
||||
return MergeDecision(True, "merge", "ready", force=force)
|
||||
|
||||
|
||||
def get_branch_head(branch: str) -> str:
|
||||
@@ -280,6 +661,12 @@ def get_combined_status(sha: str) -> dict:
|
||||
The /status endpoint caps the `statuses` array at 30 entries (Gitea
|
||||
default page size), so we fetch the full list via /statuses with a
|
||||
higher limit. The combined `state` still comes from /status.
|
||||
|
||||
Fail-closed: the PRIMARY /status fetch must succeed. If it raises, the
|
||||
error propagates so the caller skips this PR this tick (we never treat a
|
||||
failed status fetch as green — dev-sop "no fail-open"). Only the SECONDARY
|
||||
/statuses enrichment (which merely extends the per-context list beyond the
|
||||
30-entry cap) is best-effort; if it fails we still have the combined set.
|
||||
"""
|
||||
_, combined = api("GET", f"/repos/{OWNER}/{NAME}/commits/{sha}/status")
|
||||
if not isinstance(combined, dict):
|
||||
@@ -329,6 +716,31 @@ def list_queued_issues() -> list[dict]:
|
||||
return body
|
||||
|
||||
|
||||
def list_candidate_issues(*, auto_discover: bool) -> list[dict]:
|
||||
"""Open PR issues eligible for consideration this tick.
|
||||
|
||||
With auto_discover=True (default) this enumerates ALL open PRs (no label
|
||||
filter) so the queue is self-sustaining — a ready PR is considered without
|
||||
any human/agent first adding QUEUE_LABEL. With auto_discover=False it falls
|
||||
back to the legacy label-filtered listing (opt-IN). Opt-out filtering and
|
||||
draft-skipping happen in choose_next_candidate_issue, not here.
|
||||
"""
|
||||
if not auto_discover:
|
||||
return list_queued_issues()
|
||||
_, body = api(
|
||||
"GET",
|
||||
f"/repos/{OWNER}/{NAME}/issues",
|
||||
query={
|
||||
"state": "open",
|
||||
"type": "pulls",
|
||||
"limit": "50",
|
||||
},
|
||||
)
|
||||
if not isinstance(body, list):
|
||||
raise ApiError("candidate issues response not list")
|
||||
return body
|
||||
|
||||
|
||||
def get_pull(pr_number: int) -> dict:
|
||||
_, body = api("GET", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}")
|
||||
if not isinstance(body, dict):
|
||||
@@ -354,30 +766,97 @@ def update_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
print(f"::notice::updating PR #{pr_number} with base branch via style={UPDATE_STYLE}")
|
||||
if dry_run:
|
||||
return
|
||||
try:
|
||||
api(
|
||||
"POST",
|
||||
f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/update",
|
||||
query={"style": UPDATE_STYLE},
|
||||
expect_json=False,
|
||||
)
|
||||
except ApiError as exc:
|
||||
# Gitea returns HTTP 409 when the base cannot be merged into the PR
|
||||
# branch because of a real conflict. The queue cannot auto-resolve a
|
||||
# conflict, so re-raise as BranchUpdateConflictError; process_once HOLDs
|
||||
# the PR and advances (HOL guard) instead of retrying it forever.
|
||||
# Match the HTTP STATUS token ("-> HTTP 409") specifically, not a bare
|
||||
# "409" substring — the PR number or path can itself contain "409"
|
||||
# (e.g. /pulls/1409/update) and must not be misread as a conflict.
|
||||
if "-> HTTP 409" in str(exc):
|
||||
raise BranchUpdateConflictError(str(exc)) from exc
|
||||
raise # re-raise other ApiErrors unchanged
|
||||
|
||||
|
||||
def add_label_by_name(pr_number: int, label_name: str, *, dry_run: bool) -> None:
|
||||
"""Apply an existing repo label (by name) to a PR/issue.
|
||||
|
||||
Used to HOLD a wedged PR so the queue advances. Resolves the label id from
|
||||
the repo label set; if the label does not exist, raises ApiError (the
|
||||
caller decides whether that is fatal).
|
||||
"""
|
||||
print(f"::notice::applying label '{label_name}' to PR #{pr_number}")
|
||||
if dry_run:
|
||||
return
|
||||
_, labels = api("GET", f"/repos/{OWNER}/{NAME}/labels", query={"limit": "100"})
|
||||
label_id = None
|
||||
if isinstance(labels, list):
|
||||
for label in labels:
|
||||
if isinstance(label, dict) and label.get("name") == label_name:
|
||||
label_id = label.get("id")
|
||||
break
|
||||
if label_id is None:
|
||||
raise ApiError(f"label '{label_name}' not found in repo {OWNER}/{NAME}")
|
||||
api(
|
||||
"POST",
|
||||
f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/update",
|
||||
query={"style": UPDATE_STYLE},
|
||||
expect_json=False,
|
||||
f"/repos/{OWNER}/{NAME}/issues/{pr_number}/labels",
|
||||
body={"labels": [label_id]},
|
||||
)
|
||||
|
||||
|
||||
def merge_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
payload = {
|
||||
def hold_pr(pr_number: int, hold_note: str, *, dry_run: bool) -> None:
|
||||
"""Apply HOLD_LABEL to a wedged PR so the queue advances past it.
|
||||
|
||||
choose_next_queued_issue skips HOLD_LABEL-bearing PRs, so this is the HOL
|
||||
guard: a PR the queue cannot make progress on (permanent permission error
|
||||
or unresolvable branch-update conflict) is held and a human/agent fixes it,
|
||||
rather than the queue re-selecting it every tick forever. If the label
|
||||
cannot be applied we still post the explanatory comment so the wedge is at
|
||||
least visible — but we never loop on the PR.
|
||||
"""
|
||||
try:
|
||||
add_label_by_name(pr_number, HOLD_LABEL, dry_run=dry_run)
|
||||
except ApiError as label_exc:
|
||||
sys.stderr.write(
|
||||
f"::error::could not apply HOLD_LABEL to PR #{pr_number}: {label_exc}\n"
|
||||
)
|
||||
hold_note += (
|
||||
f"\n\n(NOTE: could not apply the hold label automatically: "
|
||||
f"{label_exc}. Please add `{HOLD_LABEL}` manually.)"
|
||||
)
|
||||
post_comment(pr_number, hold_note, dry_run=dry_run)
|
||||
|
||||
|
||||
def merge_pull(pr_number: int, *, dry_run: bool, force: bool = False) -> None:
|
||||
payload: dict[str, Any] = {
|
||||
"Do": "merge",
|
||||
"MergeTitleField": f"Merge PR #{pr_number} via Gitea merge queue",
|
||||
"MergeMessageField": (
|
||||
"Serialized merge by gitea-merge-queue after current-main, "
|
||||
"SOP, and required CI checks were green."
|
||||
"genuine approvals, and required CI checks were green."
|
||||
),
|
||||
}
|
||||
print(f"::notice::merging PR #{pr_number}")
|
||||
if force:
|
||||
# force_merge bypasses ONLY missing-but-non-required governance
|
||||
# contexts. The caller has already verified required contexts are green
|
||||
# and genuine approvals are present, so this never bypasses a failing
|
||||
# required context or an approval shortfall.
|
||||
payload["force_merge"] = True
|
||||
print(f"::notice::merging PR #{pr_number}{' (force_merge: non-required reds)' if force else ''}")
|
||||
if dry_run:
|
||||
return
|
||||
try:
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
|
||||
except ApiError as exc:
|
||||
# Re-raise permission-like errors so process_once can skip this PR.
|
||||
# Re-raise permission-like errors so process_once can HOLD this PR.
|
||||
# 403 = no push access, 404 = repo/pr not found, 405 = not allowed.
|
||||
msg = str(exc)
|
||||
for code in ("403", "404", "405"):
|
||||
@@ -387,7 +866,25 @@ def merge_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
|
||||
|
||||
def process_once(*, dry_run: bool = False) -> int:
|
||||
contexts = required_contexts(REQUIRED_CONTEXTS_RAW)
|
||||
# Required status contexts come from BRANCH PROTECTION, not a hand-kept env
|
||||
# list. Fail-closed: if BP cannot be enumerated, HOLD the whole tick rather
|
||||
# than merge against an unverified required set.
|
||||
try:
|
||||
bp = get_branch_protection(WATCH_BRANCH)
|
||||
except BranchProtectionUnavailable as exc:
|
||||
sys.stderr.write(
|
||||
f"::error::queue held: branch protection for {WATCH_BRANCH} "
|
||||
f"unavailable (fail-closed): {exc}\n"
|
||||
)
|
||||
return 0
|
||||
contexts = bp.required_contexts
|
||||
required_approvals = bp.required_approvals
|
||||
print(
|
||||
f"::notice::queue policy from branch protection: "
|
||||
f"required_approvals={required_approvals} "
|
||||
f"required_contexts={contexts or '[none]'}"
|
||||
)
|
||||
|
||||
main_sha = get_branch_head(WATCH_BRANCH)
|
||||
main_status = get_combined_status(main_sha)
|
||||
# Check push-required contexts explicitly instead of combined state.
|
||||
@@ -398,83 +895,199 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
print(f"::notice::queue paused: {WATCH_BRANCH}@{main_sha[:8]} required contexts not green: {', '.join(main_bad)}")
|
||||
return 0
|
||||
|
||||
issue = choose_next_queued_issue(
|
||||
list_queued_issues(),
|
||||
candidates = choose_candidate_issues(
|
||||
list_candidate_issues(auto_discover=AUTO_DISCOVER),
|
||||
queue_label=QUEUE_LABEL,
|
||||
hold_label=HOLD_LABEL,
|
||||
opt_out_labels=OPT_OUT_LABELS,
|
||||
auto_discover=AUTO_DISCOVER,
|
||||
)
|
||||
if not issue:
|
||||
print("::notice::merge queue empty")
|
||||
if not candidates:
|
||||
print(
|
||||
"::notice::no merge candidates "
|
||||
f"(auto_discover={'on' if AUTO_DISCOVER else 'off'})"
|
||||
)
|
||||
return 0
|
||||
|
||||
# HOL fix: SCAN THROUGH the FIFO candidate list until a PR we can ACT on is
|
||||
# found, instead of locking on the oldest and waiting. A non-ready candidate
|
||||
# (decision.action == "wait": REQUEST_CHANGES, mergeable!=True, insufficient
|
||||
# genuine approvals, or red required CI) is SKIPPED — it must NOT head-of-line
|
||||
# block the newer ready PRs behind it. The merge bar is unchanged: a skipped
|
||||
# PR is never merged, and the first ACTIONABLE candidate (an "update" that
|
||||
# advances a stale branch, or a fully-ready "merge") terminates the scan.
|
||||
#
|
||||
# `update` is treated as actionable, not skippable: a PR whose head merely
|
||||
# lacks current main is in a legitimate in-progress state (updating it +
|
||||
# rerunning CI moves it toward ready), unlike a PR that can never become
|
||||
# ready without a human (RC / conflict), which is a `wait` and gets skipped.
|
||||
for issue in candidates:
|
||||
decision, ctx = _evaluate_candidate(
|
||||
issue,
|
||||
main_sha=main_sha,
|
||||
main_status=main_status,
|
||||
required_contexts=contexts,
|
||||
required_approvals=required_approvals,
|
||||
dry_run=dry_run,
|
||||
)
|
||||
if decision is None:
|
||||
continue # not merge-eligible (not-open / opted-out / fork / wrong base)
|
||||
pr_number = ctx["pr_number"]
|
||||
print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
|
||||
if decision.action == "wait":
|
||||
# Non-ready: skip to the next candidate (no HOL block, no merge).
|
||||
continue
|
||||
if decision.action == "update":
|
||||
try:
|
||||
update_pull(pr_number, dry_run=dry_run)
|
||||
except BranchUpdateConflictError as exc:
|
||||
# The branch cannot be updated with main because of a real
|
||||
# conflict (HTTP 409 from /update). This is the #2352 HOL guard:
|
||||
# a conflict will not self-resolve without a human/agent rebase,
|
||||
# so re-attempting the update every tick would head-of-line block
|
||||
# every ready PR behind it. HOLD this PR (apply HOLD_LABEL, which
|
||||
# is an opt-out label so later ticks skip it) and CONTINUE the
|
||||
# scan so a newer ready PR can still merge this tick. Fail-closed:
|
||||
# a held PR is skipped, never merged.
|
||||
sys.stderr.write(
|
||||
f"::error::branch-update conflict for PR #{pr_number}: {exc}\n"
|
||||
)
|
||||
hold_note = (
|
||||
"merge-queue: could not update this branch with "
|
||||
f"`{WATCH_BRANCH}` — the update returned a merge conflict "
|
||||
f"(HTTP 409) that the queue cannot auto-resolve ({exc}). "
|
||||
f"Applied `{HOLD_LABEL}` to unblock the queue (HOL guard). "
|
||||
f"Fix: rebase/merge `{WATCH_BRANCH}` into this branch and "
|
||||
f"resolve the conflicts, then remove `{HOLD_LABEL}` to requeue."
|
||||
)
|
||||
hold_pr(pr_number, hold_note, dry_run=dry_run)
|
||||
continue # held — keep scanning for a mergeable candidate
|
||||
post_comment(
|
||||
pr_number,
|
||||
(
|
||||
f"merge-queue: updated this branch with `{WATCH_BRANCH}` at "
|
||||
f"`{main_sha[:12]}`. Waiting for CI on the refreshed head."
|
||||
),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return 0
|
||||
if decision.ready:
|
||||
latest_main_sha = get_branch_head(WATCH_BRANCH)
|
||||
if latest_main_sha != main_sha:
|
||||
print(
|
||||
f"::notice::main moved {main_sha[:8]} -> {latest_main_sha[:8]}; "
|
||||
"deferring to next tick"
|
||||
)
|
||||
return 0
|
||||
try:
|
||||
merge_pull(pr_number, dry_run=dry_run, force=decision.force)
|
||||
except MergePermissionError as exc:
|
||||
# Permanent merge failure (HTTP 403/404/405). HOLD this PR by
|
||||
# applying HOLD_LABEL (it becomes an opt-out label, so subsequent
|
||||
# ticks skip it) and CONTINUE scanning so the queue still advances
|
||||
# to the next ready PR this tick rather than stalling.
|
||||
sys.stderr.write(f"::error::merge permission error for PR #{pr_number}: {exc}\n")
|
||||
hold_note = (
|
||||
"merge-queue: merge failed with a permanent permission error "
|
||||
f"({exc}). No available token has Can-merge permission for this "
|
||||
f"PR. Applied `{HOLD_LABEL}` to unblock the queue (HOL guard). "
|
||||
f"Fix: grant Can-merge to the queue token, then remove "
|
||||
f"`{HOLD_LABEL}` to requeue."
|
||||
)
|
||||
try:
|
||||
add_label_by_name(pr_number, HOLD_LABEL, dry_run=dry_run)
|
||||
except ApiError as label_exc:
|
||||
# If we cannot even apply the hold label, fall back to a comment
|
||||
# so the wedge is at least visible; do NOT loop on this PR.
|
||||
sys.stderr.write(
|
||||
f"::error::could not apply HOLD_LABEL to PR #{pr_number}: {label_exc}\n"
|
||||
)
|
||||
hold_note += (
|
||||
f"\n\n(NOTE: could not apply the hold label automatically: "
|
||||
f"{label_exc}. Please add `{HOLD_LABEL}` manually.)"
|
||||
)
|
||||
post_comment(pr_number, hold_note, dry_run=dry_run)
|
||||
continue # held — keep scanning for a mergeable candidate
|
||||
return 0
|
||||
return 0
|
||||
|
||||
|
||||
def _evaluate_candidate(
|
||||
issue: dict,
|
||||
*,
|
||||
main_sha: str,
|
||||
main_status: dict,
|
||||
required_contexts: list[str],
|
||||
required_approvals: int,
|
||||
dry_run: bool,
|
||||
) -> tuple[MergeDecision | None, dict]:
|
||||
"""Evaluate a single auto-discovered candidate against the full merge bar.
|
||||
|
||||
Returns (decision, ctx) where ctx carries {"pr_number"}. A None decision
|
||||
means the PR is not merge-eligible at all (not open / opted-out / draft /
|
||||
fork / wrong base) and the caller should skip to the next candidate; for
|
||||
fork / wrong-base the explanatory comment is posted here before returning.
|
||||
|
||||
The merge bar is UNCHANGED from the single-PR path — this only factors the
|
||||
per-PR evaluation out so process_once can scan multiple candidates. A failed
|
||||
status fetch still raises (fail-closed): it propagates to the caller so the
|
||||
PR is never treated as green.
|
||||
"""
|
||||
pr_number = int(issue["number"])
|
||||
ctx = {"pr_number": pr_number}
|
||||
pr = get_pull(pr_number)
|
||||
if pr.get("state") != "open":
|
||||
print(f"::notice::PR #{pr_number} is not open; skipping")
|
||||
return 0
|
||||
return None, ctx
|
||||
# Defensive opt-out/draft re-check on the authoritative pull payload: the
|
||||
# /issues listing's label/draft view can lag, but the merge bar must respect
|
||||
# the live pull state. (choose_candidate_issues already filtered on the
|
||||
# listing; this guards against a stale listing racing a just-added opt-out.)
|
||||
if OPT_OUT_LABELS & label_names(pr):
|
||||
print(f"::notice::PR #{pr_number} carries an opt-out label; skipping")
|
||||
return None, ctx
|
||||
if pr.get("draft") is True:
|
||||
print(f"::notice::PR #{pr_number} is a draft; skipping")
|
||||
return None, ctx
|
||||
if pr.get("base", {}).get("ref") != WATCH_BRANCH:
|
||||
post_comment(pr_number, f"merge-queue: skipped; base branch is not `{WATCH_BRANCH}`.", dry_run=dry_run)
|
||||
return 0
|
||||
return None, ctx
|
||||
if pr.get("head", {}).get("repo_id") != pr.get("base", {}).get("repo_id"):
|
||||
post_comment(pr_number, "merge-queue: skipped; fork PRs are not supported by the serialized queue.", dry_run=dry_run)
|
||||
return 0
|
||||
return None, ctx
|
||||
|
||||
head_sha = pr.get("head", {}).get("sha")
|
||||
if not isinstance(head_sha, str) or len(head_sha) < 7:
|
||||
raise ApiError(f"PR #{pr_number} missing head sha")
|
||||
commits = get_pull_commits(pr_number)
|
||||
current_base = pr_has_current_base(pr, commits, main_sha)
|
||||
# Fail-closed: a failed status fetch raises here and propagates (the PR is
|
||||
# never treated as green).
|
||||
pr_status = get_combined_status(head_sha)
|
||||
pr_labels = label_names(pr)
|
||||
# FAIL-CLOSED: Gitea returns mergeable=None (or omits the field) while it is
|
||||
# still COMPUTING conflict state. Only the literal True is decisive proof the
|
||||
# PR is conflict-free; None and False both mean "not (yet) mergeable". We must
|
||||
# NOT autonomously merge on an unknown — treat anything but True as not-yet-
|
||||
# mergeable so evaluate_merge_readiness returns a "wait" decision.
|
||||
mergeable = pr.get("mergeable") is True
|
||||
|
||||
reviews = get_pull_reviews(pr_number)
|
||||
approvers, request_changes = genuine_approvals(
|
||||
reviews, head_sha=head_sha, reviewer_set=REVIEWER_SET
|
||||
)
|
||||
|
||||
decision = evaluate_merge_readiness(
|
||||
main_status=main_status,
|
||||
pr_status=pr_status,
|
||||
required_contexts=contexts,
|
||||
required_contexts=required_contexts,
|
||||
required_approvals=required_approvals,
|
||||
approvers=approvers,
|
||||
request_changes=request_changes,
|
||||
pr_has_current_base=current_base,
|
||||
mergeable=mergeable,
|
||||
pr_labels=pr_labels,
|
||||
)
|
||||
|
||||
print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
|
||||
if decision.action == "update":
|
||||
update_pull(pr_number, dry_run=dry_run)
|
||||
post_comment(
|
||||
pr_number,
|
||||
(
|
||||
f"merge-queue: updated this branch with `{WATCH_BRANCH}` at "
|
||||
f"`{main_sha[:12]}`. Waiting for CI on the refreshed head."
|
||||
),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return 0
|
||||
if decision.ready:
|
||||
latest_main_sha = get_branch_head(WATCH_BRANCH)
|
||||
if latest_main_sha != main_sha:
|
||||
print(
|
||||
f"::notice::main moved {main_sha[:8]} -> {latest_main_sha[:8]}; "
|
||||
"deferring to next tick"
|
||||
)
|
||||
return 0
|
||||
try:
|
||||
merge_pull(pr_number, dry_run=dry_run)
|
||||
except MergePermissionError as exc:
|
||||
# Permanent merge failure (HTTP 403/404/405). Post a comment so
|
||||
# maintainers know why, then return 0 so this tick is done.
|
||||
# The PR stays in the queue; future ticks can retry after the
|
||||
# permission issue is resolved.
|
||||
sys.stderr.write(f"::error::merge permission error for PR #{pr_number}: {exc}\n")
|
||||
post_comment(
|
||||
pr_number,
|
||||
(
|
||||
"merge-queue: merge failed with HTTP 405 'User not allowed to merge PR'. "
|
||||
"No available token has Can-merge permission on this repo. "
|
||||
"Fix: grant Can-merge to a token, or add a maintain/admin collaborator. "
|
||||
"Skipping to next queued PR on next tick."
|
||||
),
|
||||
dry_run=dry_run,
|
||||
)
|
||||
return 0
|
||||
return 0
|
||||
return 0
|
||||
return decision, ctx
|
||||
|
||||
|
||||
def main() -> int:
|
||||
|
||||
@@ -40,20 +40,24 @@ Context-format note (Gitea 1.22.6):
|
||||
|
||||
Exit codes:
|
||||
0 — no required workflow has a paths/paths-ignore filter (clean) OR
|
||||
branch_protections endpoint returned 403/404 (token-scope issue;
|
||||
surfaced via ::error:: but non-fatal so a missing scope doesn't
|
||||
red-X every PR — fix the token, not the lint).
|
||||
branch_protections returned an authenticated 404 (branch
|
||||
genuinely has no protection; ::warning:: surfaced).
|
||||
1 — at least one required workflow has a paths/paths-ignore filter
|
||||
(the gate-degrading defect class).
|
||||
2 — env contract violation (missing GITEA_TOKEN/HOST/REPO/BRANCH).
|
||||
3 — workflows directory missing or workflow YAML unparseable.
|
||||
4 — protection response shape unexpected (non-dict body on 2xx).
|
||||
4 — FAIL-CLOSED verification failure: branch_protections 401/403
|
||||
auth failure (token can't read BP), 5xx transient (propagated
|
||||
ApiError), or unexpected response shape. This is a HARD gate on
|
||||
a protected context — it MUST NOT green when it cannot verify.
|
||||
|
||||
Auth note: `GET /repos/.../branch_protections/{branch}` requires
|
||||
repo-admin role in Gitea 1.22.6. The workflow-default `GITHUB_TOKEN`
|
||||
is non-admin; we re-use `DRIFT_BOT_TOKEN` (same persona that powers
|
||||
ci-required-drift.yml). If `DRIFT_BOT_TOKEN` is unavailable in a future
|
||||
context, the script falls through gracefully (exit 0 + ::error::).
|
||||
ci-required-drift.yml). A 401/403 from a missing-scope token is an
|
||||
AUTH FAILURE that FAILS CLOSED (exit 4) — fix the token, not the
|
||||
lint. Only an authenticated 404 (genuinely-absent protection) is a
|
||||
tolerated graceful skip.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -309,14 +313,36 @@ def run() -> int:
|
||||
msg = str(e)
|
||||
m = re.search(r"HTTP (\d{3})", msg)
|
||||
http_status = int(m.group(1)) if m else None
|
||||
if http_status in (403, 404):
|
||||
# FAIL-CLOSED contract (was fail-open: 403 AND 404 both exit 0 —
|
||||
# fixed). This is a HARD gate (no continue-on-error → false) on a
|
||||
# PROTECTED context: pull_request (same-repo; fork PRs can't carry
|
||||
# DRIFT_BOT_TOKEN) + workflow_dispatch. We split auth-failure from
|
||||
# genuinely-absent:
|
||||
# 401/403 → AUTH FAILURE: the token cannot read branch
|
||||
# protections, so we CANNOT enumerate the required-check set
|
||||
# and CANNOT verify the no-paths-filter invariant. Fail loud /
|
||||
# fail closed (exit 4) — do NOT green an unverifiable gate.
|
||||
# 404 → authenticated absent resource: branch genuinely has no
|
||||
# protection. Nothing to enumerate; tolerated degradation,
|
||||
# surfaced loudly (exit 0 with ::warning::).
|
||||
if http_status in (401, 403):
|
||||
sys.stderr.write(
|
||||
f"::error::GET {protection_path} returned HTTP {http_status} — "
|
||||
f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 "
|
||||
f"requires it for this endpoint) OR branch '{BRANCH}' has "
|
||||
f"no protection configured. Cannot enumerate required "
|
||||
f"checks; skipping lint with exit 0 to avoid red-X on "
|
||||
f"every PR. Fix: grant repo-admin to mc-drift-bot.\n"
|
||||
f"::error::GET {protection_path} returned HTTP "
|
||||
f"{http_status} — DRIFT_BOT_TOKEN cannot read branch "
|
||||
f"protections (needs repo-admin scope). AUTH FAILURE: "
|
||||
f"cannot enumerate required checks, so this lint FAILS "
|
||||
f"CLOSED rather than greening a gate it could not verify. "
|
||||
f"Fix: grant repo-admin to mc-drift-bot (org team "
|
||||
f"`drift-bot`, perm=admin) — fix the token, not the lint.\n"
|
||||
)
|
||||
return 4
|
||||
if http_status == 404:
|
||||
sys.stderr.write(
|
||||
f"::warning::GET {protection_path} returned HTTP 404 — "
|
||||
f"branch '{BRANCH}' has no protection configured "
|
||||
f"(authenticated absent resource). No required contexts to "
|
||||
f"check. If '{BRANCH}' SHOULD be protected, this is a real "
|
||||
f"finding.\n"
|
||||
)
|
||||
return 0
|
||||
raise
|
||||
|
||||
@@ -36,7 +36,8 @@ Daily scheduled run + workflow_dispatch:
|
||||
|
||||
1. GET `branch_protections/{BRANCH}` (needs DRIFT_BOT_TOKEN with
|
||||
repo-admin scope; same persona as ci-required-drift.yml).
|
||||
Graceful-degrade on 403/404 per Tier 2a contract.
|
||||
FAIL CLOSED on 401/403 (auth failure → exit 2); a genuine
|
||||
authenticated 404 (no protection) is a loud ::warning:: skip.
|
||||
|
||||
2. Walk `.gitea/workflows/*.yml` via PyYAML AST. For each workflow,
|
||||
enumerate its emitted contexts: `{workflow.name} / {job.name or
|
||||
@@ -59,10 +60,14 @@ Daily scheduled run + workflow_dispatch:
|
||||
|
||||
Exit codes
|
||||
----------
|
||||
0 — clean OR API 403/404 (graceful-degrade, surfaces ::error::).
|
||||
0 — clean, OR an authenticated 404 (branch genuinely has no
|
||||
protection — surfaces ::warning::, not a fail-open).
|
||||
1 — at least one BP context has no emitter.
|
||||
2 — env contract violation, workflows-dir missing, or YAML parse
|
||||
error.
|
||||
2 — env contract violation, workflows-dir missing, YAML parse
|
||||
error, OR a fail-closed verification failure: 401/403 auth
|
||||
failure (token can't read BP) or transient/unexpected API
|
||||
error. This is a HARD gate on a protected context (schedule/
|
||||
dispatch on main) — it MUST NOT green when it cannot verify.
|
||||
|
||||
Env
|
||||
---
|
||||
@@ -394,28 +399,49 @@ def run() -> int:
|
||||
return 2
|
||||
|
||||
# 1. Pull BP.
|
||||
#
|
||||
# FAIL-CLOSED contract (was fail-open with exit 0 — fixed). This lint
|
||||
# is a HARD gate (continue-on-error: false) and only ever runs on a
|
||||
# PROTECTED context: schedule + workflow_dispatch on `main`. There is
|
||||
# NO fork/advisory split here — the DRIFT_BOT_TOKEN secret is always
|
||||
# present and trusted, so an auth failure or transient error is a real
|
||||
# inability-to-verify, not a legitimate degradation. We MUST fail loud
|
||||
# (`::error::` + nonzero) rather than green a gate we could not check.
|
||||
status, bp = api("GET", f"/repos/{repo}/branch_protections/{branch}")
|
||||
if status == "forbidden":
|
||||
sys.stderr.write(
|
||||
f"::error::GET branch_protections/{branch} returned HTTP 403 — "
|
||||
f"DRIFT_BOT_TOKEN lacks repo-admin scope (Gitea 1.22.6 requires "
|
||||
f"it for this endpoint). Skipping lint with exit 0 to avoid "
|
||||
f"red-X on every run. Fix: grant repo-admin to mc-drift-bot. "
|
||||
f"Per Tier 2a contract.\n"
|
||||
f"::error::GET branch_protections/{branch} returned HTTP "
|
||||
f"401/403 — DRIFT_BOT_TOKEN cannot read branch protections "
|
||||
f"(needs repo-admin scope; Gitea requires it for this "
|
||||
f"endpoint). This is an AUTH FAILURE, not an absent resource: "
|
||||
f"the lint CANNOT verify the BP↔emitter invariant, so it FAILS "
|
||||
f"CLOSED instead of greening a gate it could not check. Fix: "
|
||||
f"grant repo-admin to mc-drift-bot (org team `drift-bot`, "
|
||||
f"perm=admin) — fix the token, not the lint.\n"
|
||||
)
|
||||
return 0
|
||||
return 2
|
||||
if status == "not_found":
|
||||
# Genuine 404 WITH a valid token = branch has no protection
|
||||
# configured. On `main` this is itself suspicious (main should
|
||||
# always be protected) but it is a real, authenticated read of an
|
||||
# absent resource — not an auth failure — so we surface it loudly
|
||||
# but do not hard-fail on the genuinely-absent case.
|
||||
print(
|
||||
f"::notice::branch '{branch}' has no protection configured; "
|
||||
f"nothing to lint."
|
||||
f"::warning::branch '{branch}' has no protection configured "
|
||||
f"(authenticated 404); nothing to lint. If '{branch}' SHOULD be "
|
||||
f"protected, this is a real finding — configure branch "
|
||||
f"protection."
|
||||
)
|
||||
return 0
|
||||
if status != "ok" or not isinstance(bp, dict):
|
||||
sys.stderr.write(
|
||||
f"::error::branch_protections/{branch} response unexpected; "
|
||||
f"status={status}. Treating as transient; exit 0.\n"
|
||||
f"::error::branch_protections/{branch} read failed with "
|
||||
f"status={status} (transient/unexpected). The lint CANNOT "
|
||||
f"verify the BP↔emitter invariant on this run; FAILING CLOSED "
|
||||
f"rather than greening unverified. Re-run; if it persists, "
|
||||
f"investigate Gitea API health / token validity.\n"
|
||||
)
|
||||
return 0
|
||||
return 2
|
||||
|
||||
bp_contexts: list[str] = list(bp.get("status_check_contexts") or [])
|
||||
if not bp_contexts:
|
||||
|
||||
@@ -57,10 +57,14 @@ comment unrelated to the new job.
|
||||
Exit codes
|
||||
----------
|
||||
0 — no new emissions, all new emissions have valid directives,
|
||||
or BP read errored (graceful-degrade per Tier 2a contract).
|
||||
OR an authenticated 404 (branch genuinely has no protection
|
||||
to verify against — surfaces ::warning::, not a fail-open).
|
||||
1 — at least one new emission lacks a directive, or has
|
||||
`bp-required: yes` but the context is missing from BP.
|
||||
2 — env contract violation or YAML parse error.
|
||||
2 — env contract violation, YAML parse error, OR a fail-closed
|
||||
verification failure: 401/403 auth failure (token can't read
|
||||
BP) or transient/unexpected API error. HARD gate on a
|
||||
same-repo PR context — MUST NOT green when it cannot verify.
|
||||
|
||||
Env
|
||||
---
|
||||
@@ -420,33 +424,51 @@ def run() -> int:
|
||||
return 0
|
||||
|
||||
# Step 3 — fetch BP context list.
|
||||
#
|
||||
# FAIL-CLOSED contract (was fail-open with exit 0 — fixed). This is a
|
||||
# HARD gate (continue-on-error: false) that runs on `pull_request`
|
||||
# against `main`. On molecule-core, `pull_request` runs are same-repo
|
||||
# (fork PRs cannot carry the DRIFT_BOT_TOKEN secret), so this is a
|
||||
# PROTECTED/trusted context with no legitimate fork-degradation. An
|
||||
# auth failure or transient error means we CANNOT verify a NEW
|
||||
# bp-required emission is actually in BP — so we MUST fail loud rather
|
||||
# than green the gate. (A genuinely-absent 404 read with a valid token
|
||||
# is the one tolerated degradation: there is no BP to check against.)
|
||||
status, bp = api("GET", f"/repos/{repo}/branch_protections/{branch}")
|
||||
bp_contexts: set[str] = set()
|
||||
if status == "forbidden":
|
||||
sys.stderr.write(
|
||||
f"::error::GET branch_protections/{branch} returned HTTP 403 — "
|
||||
f"DRIFT_BOT_TOKEN lacks repo-admin scope. Cannot verify "
|
||||
f"bp-required directives; skipping lint with exit 0 per "
|
||||
f"Tier 2a contract. Fix the token, not the lint.\n"
|
||||
f"::error::GET branch_protections/{branch} returned HTTP "
|
||||
f"401/403 — DRIFT_BOT_TOKEN cannot read branch protections "
|
||||
f"(needs repo-admin scope). This is an AUTH FAILURE: the lint "
|
||||
f"CANNOT verify the bp-required directives on this PR, so it "
|
||||
f"FAILS CLOSED instead of greening unverified. Fix: grant "
|
||||
f"repo-admin to mc-drift-bot (org team `drift-bot`) — fix the "
|
||||
f"token, not the lint.\n"
|
||||
)
|
||||
return 0
|
||||
return 2
|
||||
elif status == "not_found":
|
||||
# Branch has no protection — nothing to verify against; the
|
||||
# bp-required: yes directive can't be satisfied. Treat as
|
||||
# graceful-skip rather than red-X.
|
||||
# Authenticated 404 — branch genuinely has no protection. There is
|
||||
# nothing to verify a `bp-required: yes` directive against, so this
|
||||
# is the one tolerated degradation. Surface loudly (on `main` a
|
||||
# missing protection is itself a real finding) but do not hard-fail.
|
||||
print(
|
||||
f"::notice::branch '{branch}' has no protection; cannot verify "
|
||||
f"bp-required directives. Skipping (exit 0)."
|
||||
f"::warning::branch '{branch}' has no protection (authenticated "
|
||||
f"404); cannot verify bp-required directives. If '{branch}' "
|
||||
f"SHOULD be protected this is a real finding."
|
||||
)
|
||||
return 0
|
||||
elif status == "ok" and isinstance(bp, dict):
|
||||
bp_contexts = set(bp.get("status_check_contexts") or [])
|
||||
else:
|
||||
sys.stderr.write(
|
||||
f"::error::branch_protections/{branch} response unexpected; "
|
||||
f"status={status}. Treating as transient; exit 0.\n"
|
||||
f"::error::branch_protections/{branch} read failed with "
|
||||
f"status={status} (transient/unexpected). CANNOT verify "
|
||||
f"bp-required directives on this PR; FAILING CLOSED rather than "
|
||||
f"greening unverified. Re-run; if persistent, check Gitea API "
|
||||
f"health / token validity.\n"
|
||||
)
|
||||
return 0
|
||||
return 2
|
||||
|
||||
# Step 4 — validate each new emission's directive.
|
||||
violations: list[str] = []
|
||||
|
||||
@@ -174,6 +174,16 @@ def parse_directives(
|
||||
if not parts:
|
||||
continue
|
||||
first = parts[0]
|
||||
# Em-dash (U+2014) is a common visual separator in user-written
|
||||
# notes, e.g. /sop-ack Five-Axis — five-axis-review
|
||||
# If raw_slug contains an em-dash, split on the first one so
|
||||
# the part before becomes the slug and the rest becomes the note.
|
||||
note_from_slug = ""
|
||||
slug_source = raw_slug
|
||||
emdash_idx = raw_slug.find("—")
|
||||
if emdash_idx != -1:
|
||||
slug_source = raw_slug[:emdash_idx].strip()
|
||||
note_from_slug = raw_slug[emdash_idx + 1 :].strip()
|
||||
# If the slug-capture greedily matched multiple words (e.g.
|
||||
# "comprehensive testing"), preserve normalize behavior: join
|
||||
# the WHOLE first-word-token only; trailing words get appended to
|
||||
@@ -186,13 +196,19 @@ def parse_directives(
|
||||
# as slug and "testing extra-note" as note. We defer the
|
||||
# disambiguation to the caller via the returned canonical
|
||||
# slug. For simplicity: try the WHOLE captured string first.
|
||||
canonical = normalize_slug(raw_slug, numeric_aliases)
|
||||
canonical = normalize_slug(slug_source, numeric_aliases)
|
||||
else:
|
||||
canonical = normalize_slug(first, numeric_aliases)
|
||||
canonical = normalize_slug(slug_source, numeric_aliases)
|
||||
note_from_group = (m.group(3) or "").strip()
|
||||
# If we collapsed multi-word slug into kebab and there's a
|
||||
# trailing-text group too, append it.
|
||||
entry = (kind, canonical, note_from_group)
|
||||
# The em-dash (U+2014) is a visual separator; the regex puts it
|
||||
# in group(3) because it is outside the slug character class.
|
||||
# Strip it so "/sop-ack slug — note" yields just "note".
|
||||
if note_from_group.startswith("—"):
|
||||
note_from_group = note_from_group[1:].strip()
|
||||
# Combine note_from_slug (em-dash split) with note_from_group
|
||||
# (trailing text after the slug captured by the regex group).
|
||||
combined_note = (note_from_slug + " " + note_from_group).strip()
|
||||
entry = (kind, canonical, combined_note)
|
||||
if kind == "sop-n/a":
|
||||
na_directives.append(entry)
|
||||
else:
|
||||
|
||||
@@ -290,48 +290,75 @@ debug "approvers: $(echo "$APPROVERS" | tr '\n' ' ')"
|
||||
# Pre/post spaces ensure case patterns *${_t}* match even when the name
|
||||
# is the first or last entry (bash case *word* needs delimiters on both sides).
|
||||
#
|
||||
# FALLBACK: if ALL team probes return 403 (token lacks read:org scope),
|
||||
# fall back to /orgs/{org}/members/{user}. This returns 204 for any org
|
||||
# member — a superset of team membership. Accepting it as a fallback means
|
||||
# the gate passes when the token is scoped to repo+user only (core-bot PAT).
|
||||
# This is safe because: (a) org membership is a prerequisite for every
|
||||
# eligible team; (b) the AND-composition of internal#189 still requires
|
||||
# multiple independent approvers; (c) any token with read:repository can
|
||||
# see the approving reviews, so bypass requires a colluding approver.
|
||||
# FAIL-CLOSED AUTHORIZATION (security: SOP tier gate is an AUTHORIZATION gate).
|
||||
#
|
||||
# This used to fall back to /orgs/{org}/members/{user} whenever every team
|
||||
# probe failed and credit any org member as a member of EVERY queried team.
|
||||
# That was a privilege-escalation: org membership is NOT team membership, so
|
||||
# a 403/visibility/token-scope gap on the team probes silently promoted a
|
||||
# plain org member to satisfy tier:high (ceo). An inability-to-verify became
|
||||
# an authorization GRANT. The fallback is REMOVED — org membership must never
|
||||
# satisfy a team-gated tier.
|
||||
#
|
||||
# A team-membership probe has exactly three meaningful outcomes:
|
||||
# 200 / 204 → the user IS a member of that team (credit it)
|
||||
# 404 → the user is definitively NOT a member (no credit, verified)
|
||||
# anything else (403 / 401 / 5xx / curl failure / non-numeric)
|
||||
# → membership CANNOT be read (cannot-verify)
|
||||
#
|
||||
# Per the dev-sop fail-closed rule (inability-to-verify = failure, never a
|
||||
# pass — and here, never an authorization grant), a cannot-verify outcome on
|
||||
# ANY probe is a HARD infra failure: we publish a loud cannot-verify error and
|
||||
# exit non-zero. We do NOT proceed to evaluate the tier expression on a partial
|
||||
# / unverifiable membership picture, because doing so could let an unverifiable
|
||||
# approver's clause silently fail-or-pass on incomplete data. Fix the token
|
||||
# scope (read:organization) or the runner network — not the gate.
|
||||
declare -A APPROVER_TEAMS
|
||||
_verify_failed="" # accumulates "<user>:<team>(HTTP <code>)" for probes we could not read
|
||||
for U in $APPROVERS; do
|
||||
[ "$U" = "$PR_AUTHOR" ] && debug "skip self-review by $U" && continue
|
||||
_any_team_success="no"
|
||||
for T in "${!TEAM_ID[@]}"; do
|
||||
ID="${TEAM_ID[$T]}"
|
||||
set +e
|
||||
CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/teams/${ID}/members/${U}")
|
||||
debug "probe: $U in team $T (id=$ID) → HTTP $CODE"
|
||||
if [ "$CODE" = "200" ] || [ "$CODE" = "204" ]; then
|
||||
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
|
||||
debug "$U qualifies for team $T"
|
||||
_any_team_success="yes"
|
||||
_curl_exit=$?
|
||||
set -e
|
||||
debug "probe: $U in team $T (id=$ID) → HTTP $CODE (curl exit=$_curl_exit)"
|
||||
if [ "$_curl_exit" -ne 0 ]; then
|
||||
# curl itself failed (DNS, connection refused, timeout) — unreachable.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(curl exit ${_curl_exit})"
|
||||
continue
|
||||
fi
|
||||
done
|
||||
# Fallback: if every team probe returned 403, try org membership.
|
||||
# "??" teams were never resolved to IDs so they never entered the loop.
|
||||
# If the user is an org member, credit them as being in each queried team
|
||||
# (engineers, managers, ceo are all org-level). This is safe because org
|
||||
# membership is a prerequisite for all three, and bypass requires a colluding
|
||||
# approver (same risk as before the AND-composition).
|
||||
if [ "$_any_team_success" = "no" ]; then
|
||||
ORG_CODE=$(curl -sS -o /dev/null -w '%{http_code}' -H "$AUTH" \
|
||||
"${API}/orgs/${OWNER}/members/${U}")
|
||||
debug "probe: $U in org $OWNER (fallback) → HTTP $ORG_CODE"
|
||||
if [ "$ORG_CODE" = "204" ]; then
|
||||
for T in "${!TEAM_ID[@]}"; do
|
||||
case "$CODE" in
|
||||
200|204)
|
||||
APPROVER_TEAMS[$U]="${APPROVER_TEAMS[$U]:- } ${APPROVER_TEAMS[$U]:+ }$T "
|
||||
done
|
||||
debug "$U credited as org member for all queried teams (fallback — token may lack read:org)"
|
||||
fi
|
||||
fi
|
||||
debug "$U qualifies for team $T"
|
||||
;;
|
||||
404)
|
||||
# Definitively not a member of this team — a verified negative.
|
||||
debug "$U is NOT a member of team $T (verified 404)"
|
||||
;;
|
||||
*)
|
||||
# 403/401/5xx/etc — membership is unreadable. Do NOT treat as "not a
|
||||
# member" and do NOT fall back to org membership. This is cannot-verify.
|
||||
_verify_failed="${_verify_failed}${_verify_failed:+, }${U}:${T}(HTTP ${CODE})"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
# Fail-closed: if ANY membership probe could not be read, we cannot make an
|
||||
# authorization decision. Publish a loud cannot-verify / infra-failed status
|
||||
# and exit non-zero. Never grant the tier on unverifiable membership.
|
||||
if [ -n "$_verify_failed" ]; then
|
||||
echo "::error::sop-tier-check CANNOT VERIFY team membership — gate FAILS CLOSED."
|
||||
echo "::error::Unreadable membership probe(s): ${_verify_failed}"
|
||||
echo "::error::A team-membership probe returned 403/401/5xx (or curl failed). The SOP tier gate is an authorization gate; an inability to verify team membership is treated as a FAILURE, never a pass. Org membership is NOT team membership and is never credited as a fallback."
|
||||
echo "::error::Fix: ensure GITEA_TOKEN (SOP_TIER_CHECK_TOKEN) has read:organization scope and the Gitea API is reachable from the runner, then re-run. Do NOT relax this gate."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 7. Evaluate the tier expression.
|
||||
#
|
||||
# legacy OR-gate: use the simplified loop from before internal#189.
|
||||
|
||||
@@ -105,12 +105,26 @@ if [ "${SOP_REFIRE_DISABLE_RATE_LIMIT:-}" != "1" ]; then
|
||||
fi
|
||||
|
||||
# 3. Invoke sop-tier-check.sh with the env it expects.
|
||||
# The canonical workflow intentionally fail-opens the job conclusion
|
||||
# (`bash .gitea/scripts/sop-tier-check.sh || true`) while Gitea branch
|
||||
# protection enforces reviewer approvals separately. Keep the refire path
|
||||
# aligned with that workflow status behavior; otherwise /refire-tier-check can
|
||||
# post a hard failure that the canonical pull_request_target workflow would
|
||||
# not publish.
|
||||
#
|
||||
# FAIL-CLOSED contract (was fail-open — fixed 2026-06-05,
|
||||
# fix/core-ci-fail-closed). The previous shape was:
|
||||
# bash "$SCRIPT" || true
|
||||
# TIER_EXIT=0 # <-- hardcoded success
|
||||
# which discarded the real verdict and ALWAYS POSTed
|
||||
# `state=success` for the REQUIRED context
|
||||
# `sop-tier-check / tier-check (pull_request)`. That meant ANY
|
||||
# collaborator could comment `/refire-tier-check` to forcibly green
|
||||
# the SOP-6 approval gate on the PR head SHA — a fail-open AND a
|
||||
# privilege bypass of branch protection. The canonical
|
||||
# pull_request_target workflow's conclusion publishes the same
|
||||
# context honestly (red on a real violation); the refire MUST mirror
|
||||
# THAT honesty, not a discarded exit code.
|
||||
#
|
||||
# We now capture the script's real exit code under `set +e` and POST
|
||||
# success ONLY when it actually exited 0. sop-tier-check.sh itself
|
||||
# fails closed on infra faults (no SOP_FAIL_OPEN in this refire env),
|
||||
# so a bad token / unreachable API / missing jq → non-zero → we POST
|
||||
# `state=failure`, never a false green.
|
||||
#
|
||||
# SOP_REFIRE_TIER_CHECK_SCRIPT env var lets tests substitute a mock —
|
||||
# sop-tier-check.sh uses bash 4+ associative arrays which trigger a known
|
||||
@@ -125,7 +139,10 @@ if [ ! -f "$SCRIPT" ]; then
|
||||
fi
|
||||
|
||||
# Re-invoke. Pipe stdout/stderr through so the runner log shows the
|
||||
# tier-check decision inline.
|
||||
# tier-check decision inline. Capture the REAL exit code (set +e so a
|
||||
# non-zero verdict doesn't abort this script under set -e) — the POST
|
||||
# below keys off it, so a failed tier-check posts state=failure.
|
||||
set +e
|
||||
GITEA_TOKEN="$GITEA_TOKEN" \
|
||||
GITEA_HOST="$GITEA_HOST" \
|
||||
REPO="$REPO" \
|
||||
@@ -133,8 +150,9 @@ GITEA_TOKEN="$GITEA_TOKEN" \
|
||||
PR_AUTHOR="$PR_AUTHOR" \
|
||||
SOP_DEBUG="${SOP_DEBUG:-0}" \
|
||||
SOP_LEGACY_CHECK="${SOP_LEGACY_CHECK:-0}" \
|
||||
bash "$SCRIPT" || true
|
||||
TIER_EXIT=0
|
||||
bash "$SCRIPT"
|
||||
TIER_EXIT=$?
|
||||
set -e
|
||||
debug "sop-tier-check.sh exit=$TIER_EXIT"
|
||||
|
||||
# 4. POST the resulting status.
|
||||
@@ -170,4 +188,12 @@ if [ "$POST_HTTP" != "200" ] && [ "$POST_HTTP" != "201" ]; then
|
||||
fi
|
||||
|
||||
echo "::notice::sop-tier-refire posted state=$STATE for context=\"$CONTEXT\" on sha=$HEAD_SHA"
|
||||
exit "$TIER_EXIT"
|
||||
# Exit 0: the refire JOB succeeded — it re-evaluated the gate and posted
|
||||
# an HONEST status. The gate VERDICT is carried by the POSTed status
|
||||
# ($STATE), which is what branch protection reads; a failing tier-check
|
||||
# posts state=failure (red on the PR), so there is no fail-open. We do
|
||||
# NOT also exit non-zero on a failing verdict — that would double-signal
|
||||
# the same failure as both a red status AND a red refire job. The
|
||||
# fail-open that mattered (TIER_EXIT hardcoded to 0 → always state=success)
|
||||
# is fixed above by capturing the real exit code.
|
||||
exit 0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -208,6 +208,22 @@ class TestParseDirectives(unittest.TestCase):
|
||||
d = self.parse_ack_revoke("/sop-ack Comprehensive_Testing")
|
||||
self.assertEqual(d[0][1], "comprehensive-testing")
|
||||
|
||||
def test_emdash_separator_parsed_correctly(self):
|
||||
# Em-dash (U+2014) between slug and note is common in practice.
|
||||
# /sop-ack Five-Axis — five-axis-review
|
||||
# → slug = five-axis, note = — five-axis-review
|
||||
d = self.parse_ack_revoke("/sop-ack Five-Axis — five-axis-review")
|
||||
self.assertEqual(len(d), 1)
|
||||
self.assertEqual(d[0][1], "five-axis")
|
||||
self.assertIn("five-axis-review", d[0][2])
|
||||
|
||||
def test_emdash_no_note(self):
|
||||
# Em-dash at end of slug: only slug, no note content
|
||||
d = self.parse_ack_revoke("/sop-ack Five-Axis —")
|
||||
self.assertEqual(len(d), 1)
|
||||
self.assertEqual(d[0][1], "five-axis")
|
||||
self.assertEqual(d[0][2], "") # em-dash is separator-only → empty note
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# section_marker_present
|
||||
|
||||
+272
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env bash
|
||||
# Security regression test for the SOP tier-gate AUTHORIZATION bypass.
|
||||
#
|
||||
# Bug (fixed in fix/sop-tier-authz-no-org-fallback):
|
||||
# sop-tier-check.sh probed team membership at /teams/{id}/members/{user}.
|
||||
# If EVERY team probe failed (e.g. 403 — token lacks read:organization, or
|
||||
# any visibility/flakiness gap), it FELL BACK to /orgs/{org}/members/{user}
|
||||
# and credited that org member as a member of EVERY queried team. The
|
||||
# evaluator then treated those synthetic memberships as real, so a plain
|
||||
# NON-CEO org member satisfied tier:high (ceo). A visibility/auth gap became
|
||||
# a real highest-tier authorization PASS — privilege escalation.
|
||||
#
|
||||
# Fix (fail-closed authorization):
|
||||
# - The org-member ⇒ "member of all teams" fallback is REMOVED. Org
|
||||
# membership is never credited as team membership.
|
||||
# - A team probe that returns anything other than 200/204 (member) or 404
|
||||
# (verified non-member) is a CANNOT-VERIFY condition: the gate fails loud
|
||||
# (exit 1) with a cannot-verify status and never grants the tier.
|
||||
#
|
||||
# Method: this is a true end-to-end test. It prepends a fake `curl` to PATH
|
||||
# that serves canned Gitea API responses keyed by URL, then runs the REAL
|
||||
# sop-tier-check.sh. The fake exercises the genuine probe→credit→evaluate
|
||||
# path — no logic is re-implemented in the test.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
THIS_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
SCRIPT_DIR="$(cd "$THIS_DIR/.." && pwd)"
|
||||
SCRIPT="$SCRIPT_DIR/sop-tier-check.sh"
|
||||
|
||||
command -v jq >/dev/null 2>&1 || { echo "::error::jq required but not found"; exit 1; }
|
||||
[ -f "$SCRIPT" ] || { echo "::error::sop-tier-check.sh not found at $SCRIPT — test must fail loudly if the script is absent"; exit 1; }
|
||||
|
||||
# sop-tier-check.sh uses `declare -A` (associative arrays), which require
|
||||
# bash >= 4. CI runners (Ubuntu) ship bash 5; macOS ships 3.2. Resolve a
|
||||
# bash >= 4 to run the script under.
|
||||
pick_bash() {
|
||||
local c
|
||||
for c in bash /opt/homebrew/bin/bash /usr/local/bin/bash /bin/bash; do
|
||||
local p; p="$(command -v "$c" 2>/dev/null || true)"
|
||||
[ -n "$p" ] || continue
|
||||
local maj; maj="$("$p" -c 'echo "${BASH_VERSINFO[0]}"' 2>/dev/null || echo 0)"
|
||||
if [ "${maj:-0}" -ge 4 ]; then echo "$p"; return 0; fi
|
||||
done
|
||||
return 1
|
||||
}
|
||||
BASH4="$(pick_bash)" || { echo "::error::need bash >= 4 to run sop-tier-check.sh (associative arrays); none found"; exit 1; }
|
||||
echo "using bash: $BASH4 ($("$BASH4" -c 'echo $BASH_VERSION'))"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
assert_eq() {
|
||||
local label="$1" expected="$2" got="$3"
|
||||
if [ "$expected" = "$got" ]; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label"
|
||||
echo " expected: <$expected>"
|
||||
echo " got: <$got>"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " FAIL $label (missing substring: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
assert_not_contains() {
|
||||
local label="$1" haystack="$2" needle="$3"
|
||||
if printf '%s' "$haystack" | grep -qF -- "$needle"; then
|
||||
echo " FAIL $label (unexpected substring present: <$needle>)"
|
||||
FAIL=$((FAIL + 1))
|
||||
else
|
||||
echo " PASS $label"
|
||||
PASS=$((PASS + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fake-curl harness.
|
||||
#
|
||||
# The real script calls curl in two shapes:
|
||||
# (a) body capture: curl -sS -H AUTH URL -> prints JSON body
|
||||
# (b) http-code: curl -sS -o FILE -w '%{http_code}' -H AUTH URL
|
||||
# (c) http-code only: curl -sS -o /dev/null -w '%{http_code}' -H AUTH URL
|
||||
#
|
||||
# Our fake reads the URL (last non-flag arg), looks up a response in fixture
|
||||
# files under $FIXDIR, and emits body and/or http-code accordingly.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
make_harness() {
|
||||
# $1 = scenario dir to populate with fixtures
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
mkdir -p "$BIN"
|
||||
cat > "$BIN/curl" <<'FAKE'
|
||||
#!/usr/bin/env bash
|
||||
# Fake curl for sop-tier-check authz tests. Looks up canned responses by URL.
|
||||
set -u
|
||||
FIXDIR="${SOP_TEST_FIXDIR:?SOP_TEST_FIXDIR unset}"
|
||||
|
||||
url=""
|
||||
out=""
|
||||
want_code="no"
|
||||
prev=""
|
||||
for a in "$@"; do
|
||||
case "$prev" in
|
||||
-o) out="$a" ;;
|
||||
esac
|
||||
case "$a" in
|
||||
http*://*) url="$a" ;;
|
||||
'%{http_code}') want_code="yes" ;;
|
||||
esac
|
||||
# -w '%{http_code}' arrives as the value of the -w flag
|
||||
if [ "$prev" = "-w" ] && [ "$a" = '%{http_code}' ]; then want_code="yes"; fi
|
||||
prev="$a"
|
||||
done
|
||||
|
||||
# Map URL -> fixture key (a filename-safe slug).
|
||||
# We only need the path after /api/v1.
|
||||
path="${url#*/api/v1}"
|
||||
slug="$(printf '%s' "$path" | tr '/?=&' '____')"
|
||||
|
||||
body_file="$FIXDIR/body${slug}"
|
||||
code_file="$FIXDIR/code${slug}"
|
||||
|
||||
# Emit body to -o target (or capture for stdout) when a body fixture exists.
|
||||
body=""
|
||||
if [ -f "$body_file" ]; then body="$(cat "$body_file")"; fi
|
||||
if [ -n "$out" ]; then
|
||||
printf '%s' "$body" > "$out"
|
||||
else
|
||||
printf '%s' "$body"
|
||||
fi
|
||||
|
||||
# Emit http code when requested.
|
||||
if [ "$want_code" = "yes" ]; then
|
||||
if [ -f "$code_file" ]; then
|
||||
printf '%s' "$(cat "$code_file")"
|
||||
else
|
||||
printf '200'
|
||||
fi
|
||||
fi
|
||||
exit 0
|
||||
FAKE
|
||||
chmod +x "$BIN/curl"
|
||||
echo "$BIN"
|
||||
}
|
||||
|
||||
# Common fixtures shared by scenarios. $1 = FIXDIR, $2 = approver login,
|
||||
# $3 = tier label name (e.g. tier:high), $4 = teams JSON.
|
||||
seed_common() {
|
||||
local FIXDIR="$1" approver="$2" tier="$3" teams_json="$4"
|
||||
mkdir -p "$FIXDIR"
|
||||
# /user -> whoami
|
||||
printf '%s' '{"login":"sop-bot"}' > "$FIXDIR/body_user"
|
||||
# PR head sha
|
||||
printf '%s' '{"head":{"sha":"headsha1"}}' \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42"
|
||||
# labels
|
||||
printf '%s' "[{\"name\":\"$tier\"}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_issues_42_labels"
|
||||
# org teams list
|
||||
printf '%s' "$teams_json" > "$FIXDIR/body_orgs_molecule-ai_teams"
|
||||
printf '%s' '200' > "$FIXDIR/code_orgs_molecule-ai_teams"
|
||||
# reviews: one APPROVED on current head by $approver
|
||||
printf '%s' "[{\"state\":\"APPROVED\",\"commit_id\":\"headsha1\",\"user\":{\"login\":\"$approver\"}}]" \
|
||||
> "$FIXDIR/body_repos_molecule-ai_molecule-core_pulls_42_reviews"
|
||||
}
|
||||
|
||||
run_script() {
|
||||
# $1 = FIXDIR (must contain bin/curl). Returns combined stdout+stderr; sets RC.
|
||||
local FIXDIR="$1"
|
||||
local BIN="$FIXDIR/bin"
|
||||
set +e
|
||||
OUT=$(
|
||||
SOP_TEST_FIXDIR="$FIXDIR" \
|
||||
PATH="$BIN:$PATH" \
|
||||
GITEA_TOKEN="faketoken" \
|
||||
GITEA_HOST="git.moleculesai.app" \
|
||||
REPO="molecule-ai/molecule-core" \
|
||||
PR_NUMBER="42" \
|
||||
PR_AUTHOR="pr-author" \
|
||||
SOP_DEBUG="0" \
|
||||
SOP_LEGACY_CHECK="0" \
|
||||
"$BASH4" "$SCRIPT" 2>&1
|
||||
)
|
||||
RC=$?
|
||||
set -e
|
||||
printf '%s' "$OUT"
|
||||
return $RC
|
||||
}
|
||||
|
||||
TEAMS_JSON='[{"name":"ceo","id":10},{"name":"engineers","id":11},{"name":"managers","id":12}]'
|
||||
|
||||
echo "=============================================================="
|
||||
echo "Scenario 1: tier:high, team probe 403 (cannot read), approver"
|
||||
echo " is a plain org member but NOT in ceo team."
|
||||
echo " EXPECT: tier NOT granted (fail-closed cannot-verify)."
|
||||
echo "=============================================================="
|
||||
S1="$(mktemp -d)"
|
||||
make_harness "$S1" >/dev/null
|
||||
seed_common "$S1" "org-only-bob" "tier:high" "$TEAMS_JSON"
|
||||
# Team membership probe for ceo (id=10) returns 403 — cannot read.
|
||||
printf '%s' '403' > "$S1/code_teams_10_members_org-only-bob"
|
||||
# The OLD bug path: org membership probe would 204 and synthetic-credit.
|
||||
printf '%s' '204' > "$S1/code_orgs_molecule-ai_members_org-only-bob"
|
||||
set +e
|
||||
OUT1="$(run_script "$S1")"; RC1=$?
|
||||
set -e
|
||||
echo "$OUT1" | sed 's/^/ /'
|
||||
echo " (exit=$RC1)"
|
||||
assert_eq "S1 exit non-zero (tier NOT granted)" "1" "$([ "$RC1" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S1 did NOT print PASSED" "$OUT1" "sop-tier-check PASSED"
|
||||
assert_contains "S1 cannot-verify error surfaced" "$OUT1" "CANNOT VERIFY"
|
||||
assert_contains "S1 names the unreadable probe (403)" "$OUT1" "HTTP 403"
|
||||
rm -rf "$S1"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 2: tier:high, genuine ceo team member (probe 204)."
|
||||
echo " EXPECT: tier GRANTED."
|
||||
echo "=============================================================="
|
||||
S2="$(mktemp -d)"
|
||||
make_harness "$S2" >/dev/null
|
||||
seed_common "$S2" "real-ceo" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '204' > "$S2/code_teams_10_members_real-ceo" # ceo team: member
|
||||
set +e
|
||||
OUT2="$(run_script "$S2")"; RC2=$?
|
||||
set -e
|
||||
echo "$OUT2" | sed 's/^/ /'
|
||||
echo " (exit=$RC2)"
|
||||
assert_eq "S2 exit zero (granted)" "0" "$RC2"
|
||||
assert_contains "S2 printed PASSED" "$OUT2" "sop-tier-check PASSED"
|
||||
rm -rf "$S2"
|
||||
|
||||
echo
|
||||
echo "=============================================================="
|
||||
echo "Scenario 3: tier:high, approver is an org member but a VERIFIED"
|
||||
echo " non-member of ceo (team probe 404). Org probe would"
|
||||
echo " 204 — must NEVER be synthetic-credited."
|
||||
echo " EXPECT: tier NOT granted (clause FAIL), no fallback."
|
||||
echo "=============================================================="
|
||||
S3="$(mktemp -d)"
|
||||
make_harness "$S3" >/dev/null
|
||||
seed_common "$S3" "org-member-carol" "tier:high" "$TEAMS_JSON"
|
||||
printf '%s' '404' > "$S3/code_teams_10_members_org-member-carol" # verified NOT in ceo
|
||||
printf '%s' '204' > "$S3/code_orgs_molecule-ai_members_org-member-carol" # org member (must be ignored)
|
||||
set +e
|
||||
OUT3="$(run_script "$S3")"; RC3=$?
|
||||
set -e
|
||||
echo "$OUT3" | sed 's/^/ /'
|
||||
echo " (exit=$RC3)"
|
||||
assert_eq "S3 exit non-zero (tier NOT granted)" "1" "$([ "$RC3" -ne 0 ] && echo 1 || echo 0)"
|
||||
assert_not_contains "S3 did NOT print PASSED" "$OUT3" "sop-tier-check PASSED"
|
||||
assert_contains "S3 reported a real clause FAIL (not cannot-verify)" "$OUT3" "FAILED for tier:high"
|
||||
assert_not_contains "S3 did NOT cannot-verify (404 is a verified negative)" "$OUT3" "CANNOT VERIFY"
|
||||
rm -rf "$S3"
|
||||
|
||||
echo
|
||||
echo "------"
|
||||
echo "PASS=$PASS FAIL=$FAIL"
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -246,21 +246,24 @@ assert_contains "T1 POST context is sop-tier-check / tier-check" \
|
||||
'"context": "sop-tier-check / tier-check (pull_request)"' "$POSTED"
|
||||
assert_contains "T1 description names commenter" "test-runner" "$POSTED"
|
||||
|
||||
# T2: missing tier label → tier-check fails internally, but refire status
|
||||
# matches the canonical workflow's fail-open job conclusion.
|
||||
# T2: missing tier label → tier-check fails internally (mock exits 1).
|
||||
# FAIL-CLOSED contract (fix/core-ci-fail-closed): refire now captures the
|
||||
# REAL exit code and POSTs state=failure — it does NOT forge a green on
|
||||
# the required context. The refire job itself still exits 0 (it succeeded
|
||||
# at posting an honest failure status).
|
||||
run_scenario "T2_no_tier_label" "fail_no_label"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T2 exit code 0 (canonical fail-open)" "0" "$RC"
|
||||
assert_contains "T2 POSTed state=success" '"state": "success"' "$POSTED"
|
||||
assert_eq "T2 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T2 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T3: tier:low present but ZERO approving reviews → internal tier check fails,
|
||||
# refire status remains aligned with the canonical workflow.
|
||||
# T3: tier:low present but ZERO approving reviews → internal tier check
|
||||
# fails (mock exits 1). Refire POSTs state=failure, never a false green.
|
||||
run_scenario "T3_no_approvals" "fail_no_approvals"
|
||||
RC=$(cat "$FIX_STATE_DIR/last_rc")
|
||||
POSTED=$(cat "$FIX_STATE_DIR/posted_statuses.jsonl" 2>/dev/null || true)
|
||||
assert_eq "T3 exit code 0 (canonical fail-open)" "0" "$RC"
|
||||
assert_contains "T3 POSTed state=success" '"state": "success"' "$POSTED"
|
||||
assert_eq "T3 exit code 0 (posted an honest status)" "0" "$RC"
|
||||
assert_contains "T3 POSTed state=failure (no forged green)" '"state": "failure"' "$POSTED"
|
||||
|
||||
# T4: closed PR — refire is a no-op (no POST, exit 0)
|
||||
run_scenario "T4_closed" "pass"
|
||||
|
||||
@@ -205,5 +205,5 @@ n/a_gates:
|
||||
required_teams: [security, managers, ceo]
|
||||
description: >-
|
||||
Security review N/A when this change has no security surface
|
||||
(docs-only, pure-frontend, dependency-only). A security/owners
|
||||
(docs-only, pure-frontend, dependency-only). A security/managers/ceo
|
||||
member must post /sop-n/a security-review to activate.
|
||||
|
||||
@@ -34,11 +34,6 @@ jobs:
|
||||
check:
|
||||
name: Block forbidden paths
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking
|
||||
# the PR. Follow-up PR flips this off after surfaced defects are
|
||||
# triaged.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
|
||||
@@ -364,6 +364,25 @@ jobs:
|
||||
# check missed. If a refactor weakens the gate to a shape check,
|
||||
# this step goes red on every PR.
|
||||
bash tests/e2e/test_completion_assert_unit.sh
|
||||
# harden/e2e-staging-saas-failclosed: fail-direction proof for the
|
||||
# E2E_REQUIRE_LIVE fail-closed-on-skip guard in
|
||||
# test_staging_full_saas.sh. Offline (no LLM/network/provisioning):
|
||||
# asserts the guard exits 5 when a live lifecycle did NOT run and
|
||||
# passes when all milestones fired — so a refactor that lets the
|
||||
# staging gate report green without a real provision→online→A2A
|
||||
# cycle goes red on every PR.
|
||||
bash tests/e2e/test_require_live_guard_unit.sh
|
||||
# harden/enforce-ci-gates-core-v2 (PR #2286): fail-direction proof
|
||||
# for the E2E_REQUIRE_LIVE zero-validated gate in
|
||||
# test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
|
||||
# Offline (no LLM/network/provisioning): sources that script under
|
||||
# its unit source-guard and drives the REAL evaluate_require_live_gate
|
||||
# — asserts REQUIRE_LIVE=1 + zero validated → RED (the false-green
|
||||
# trap), REQUIRE_LIVE=1 + >=1 validated → GREEN, and REQUIRE_LIVE
|
||||
# unset + zero validated → GREEN (loud skip). CI can't provision a
|
||||
# live arm to prove this, so this unit test IS the regression gate:
|
||||
# a revert of the zero-validated→RED logic goes red on every PR.
|
||||
bash tests/e2e/test_require_live_priority_gate_unit.sh
|
||||
|
||||
- if: ${{ needs.changes.outputs.scripts == 'true' }}
|
||||
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
|
||||
|
||||
@@ -272,6 +272,33 @@ jobs:
|
||||
echo "::error::Redis did not become ready in 15s"
|
||||
docker logs "$REDIS_CONTAINER" || true
|
||||
exit 1
|
||||
- name: Set deterministic admin token for the e2e platform
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: |
|
||||
# AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:164)
|
||||
# reads ADMIN_TOKEN. Setting it (a) closes isDevModeFailOpen (devmode.go:50
|
||||
# returns false when ADMIN_TOKEN is non-empty), so admin routes require a
|
||||
# bearer, and (b) makes Tier-2b accept a bearer that constant-time-equals
|
||||
# ADMIN_TOKEN. The platform process inherits ADMIN_TOKEN from $GITHUB_ENV.
|
||||
#
|
||||
# MOLECULE_ADMIN_TOKEN is the var the e2e scripts send as the bearer
|
||||
# (tests/e2e/_lib.sh:33 e2e_mint_workspace_token, and the run_mock
|
||||
# org-import curl). Set BOTH to the SAME value so the bearer the test
|
||||
# sends == the secret the platform checks. Deterministic test value;
|
||||
# this platform is ephemeral, single-run, and never reachable off-host.
|
||||
E2E_ADMIN_TOKEN="e2e-api-admin-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
echo "ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_ADMIN_TOKEN=${E2E_ADMIN_TOKEN}" >> "$GITHUB_ENV"
|
||||
echo "Admin token configured for the e2e platform (ADMIN_TOKEN + MOLECULE_ADMIN_TOKEN)."
|
||||
# Channels e2e test seam (core#2332 P1.10). These env-gated overrides
|
||||
# let the LIVE Slack-webhook send path + Telegram discover path target
|
||||
# the local mock upstreams that tests/e2e/test_channels_e2e.sh binds,
|
||||
# so the outbound serialize+POST is provable in CI (was unit-mock-only).
|
||||
# Inert in prod/staging — those deploys never set these. The fixed
|
||||
# loopback ports MUST match the script's E2E_CHANNELS_*_PORT defaults.
|
||||
echo "MOLECULE_CHANNELS_TEST_WEBHOOK_BASE=http://127.0.0.1:18099/" >> "$GITHUB_ENV"
|
||||
echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE=http://127.0.0.1:18098" >> "$GITHUB_ENV"
|
||||
echo "Channels test seam configured (webhook+telegram mock bases on fixed loopback ports)."
|
||||
- name: Build platform
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
working-directory: workspace-server
|
||||
@@ -394,11 +421,65 @@ jobs:
|
||||
- name: Run E2E API tests
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_api.sh
|
||||
- name: Run keyless feature-contract E2E (terminal-diagnose / webhooks / budget / checkpoints / audit / traces / session-search / rescue / llm-billing-mode / resume / hibernate)
|
||||
# Keyless required-lane coverage for feature endpoints that ship without
|
||||
# an LLM key (runtime=external fixture). Each asserts the real HTTP
|
||||
# contract + a meaningful failure mode (401/400/fail-closed) so a
|
||||
# regression goes RED, not silently green. The mock-runtime A2A canned
|
||||
# round-trip is covered by the priority-runtimes `mock` arm, not here.
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_keyless_feature_contracts_e2e.sh
|
||||
- name: Run secrets-dispatch contract test (keyless SECRETS_JSON branch order)
|
||||
# Previously orphaned (no workflow referenced it). Hermetic unit-style
|
||||
# contract over test_staging_full_saas.sh's LLM-key branch precedence —
|
||||
# needs no platform, no bearer, no network. Guards the 2026-05-03
|
||||
# "wrong key shape wins" incident class.
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_secrets_dispatch.sh
|
||||
- name: Run notify-with-attachments E2E
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
run: bash tests/e2e/test_notify_attachments_e2e.sh
|
||||
- name: Run priority-runtimes E2E (claude-code + hermes — skips when keys absent)
|
||||
- name: "Run channels + data-prune E2E (REQUIRE-LIVE: mock upstream proves send+discover, purge proves prune)"
|
||||
# core#2332 P1.10. Stands up a local mock upstream, points the LIVE
|
||||
# Slack-webhook send + Telegram discover paths at it via the
|
||||
# production-inert test seam configured above, and asserts the mock
|
||||
# RECEIVED the serialized payload (send) + round-tripped the bot/chat
|
||||
# (discover). Then exercises the RFC #734 data-prune: DELETE
|
||||
# ?purge=true removes the target's durable child data while a sibling
|
||||
# survives. E2E_REQUIRE_LIVE=1 ⇒ a missing/regressed seam is RED, not a
|
||||
# silent skip. The platform inherits the MOLECULE_CHANNELS_TEST_* bases
|
||||
# from $GITHUB_ENV; the script's mock ports match them (18099/18098).
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
env:
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
run: bash tests/e2e/test_channels_e2e.sh
|
||||
- name: "Run priority-runtimes E2E (REQUIRE-LIVE: mock validates the runtime plumbing end-to-end)"
|
||||
# E2E_REQUIRE_LIVE=1 is ON: the run MUST validate >=1 runtime end-to-end
|
||||
# or it exits NON-zero (RED). This is now SAFE because the `mock` arm can
|
||||
# actually provision in CI: the only blocker was that POST /org/import and
|
||||
# POST /admin/workspaces/:id/tokens are AdminAuth-gated
|
||||
# (router.go:778 + :427) and this job previously configured NO admin token,
|
||||
# so every admin call 401'd ("admin auth required"). The "Set deterministic
|
||||
# admin token" step above now sets ADMIN_TOKEN on the platform AND exports
|
||||
# the matching MOLECULE_ADMIN_TOKEN the e2e scripts send as the bearer, so
|
||||
# the mock arm can org-import → online → mint token → canned A2A reply →
|
||||
# validated(). That guarantees VALIDATED>=1 on a healthy platform, so the
|
||||
# REQUIRED `E2E API Smoke Test` gate now HONESTLY validates a runtime
|
||||
# end-to-end; if the mock plumbing (DB insert, status flip, A2A proxy,
|
||||
# activity logging, or the admin-auth wiring) genuinely breaks, the gate
|
||||
# goes RED instead of false-green. The zero-validated→RED decision is also
|
||||
# regression-gated WITHOUT provisioning by the bash unit test
|
||||
# tests/e2e/test_require_live_priority_gate_unit.sh (wired into ci.yml's
|
||||
# "Run E2E bash unit tests" job), so a revert of that logic still fails CI.
|
||||
#
|
||||
# MiniMax stays an OPPORTUNISTIC best-effort arm: create is registry-fragile
|
||||
# in CI (422 UNREGISTERED_MODEL_FOR_RUNTIME), so a miss is reported via
|
||||
# bestfail() and never reds the gate — mock carries the required validation,
|
||||
# MiniMax is a bonus real-LLM check when it comes up. ZERO new credentials.
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
env:
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
run: bash tests/e2e/test_priority_runtimes_e2e.sh
|
||||
- name: Install standalone runtime parser from Gitea registry
|
||||
if: needs.detect-changes.outputs.api == 'true'
|
||||
|
||||
@@ -113,6 +113,29 @@ jobs:
|
||||
runs-on: docker-host
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
#
|
||||
# PROMOTION-READINESS (toward required gate — do NOT flip continue-on-error
|
||||
# without CTO sign-off, that's the irreversible call):
|
||||
# NOW FAIL-CLOSED:
|
||||
# - Postgres/Redis/platform/canvas readiness are already bounded
|
||||
# readiness-polls that hard-fail (and dump logs) at their deadline,
|
||||
# not fixed sleeps — preserved.
|
||||
# - passWithNoTests:false + forbidOnly (playwright.config.ts) → a
|
||||
# renamed/moved spec or stray test.only can no longer green the lane.
|
||||
# - REQUIRE-LIVE guard in "Run Playwright E2E tests" → chat==true must
|
||||
# actually execute >=1 test, else exit 1.
|
||||
# - chat-desktop "activity log" test no longer swallows its assertion.
|
||||
# STILL BLOCKS PROMOTION:
|
||||
# - The echo round-trip asserts on rendered "Echo: ..." text but never
|
||||
# asserts the echo runtime actually RECEIVED the A2A request
|
||||
# (fixtures/echo-runtime.ts exposes lastRequest, unused) — an
|
||||
# optimistic client-side render could pass without a real round-trip.
|
||||
# Add a server-received assertion before required.
|
||||
# - The "No-op pass" path (detect-changes chat!=true) is a legitimate
|
||||
# paths-filter skip, but a required gate needs it to be a neutral
|
||||
# check, not a green "success", so a skipped heavy lane can't be
|
||||
# mistaken for a passed one.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 15
|
||||
env:
|
||||
@@ -334,11 +357,32 @@ jobs:
|
||||
- name: Run Playwright E2E tests
|
||||
if: needs.detect-changes.outputs.chat == 'true'
|
||||
working-directory: canvas
|
||||
env:
|
||||
# CI=1 activates forbidOnly in playwright.config.ts (a stray
|
||||
# `test.only` would otherwise green the suite while skipping the
|
||||
# rest). passWithNoTests:false (also in the config) already makes
|
||||
# a zero-match selection exit non-zero.
|
||||
CI: "1"
|
||||
run: |
|
||||
set -euo pipefail
|
||||
export E2E_PLATFORM_URL="http://127.0.0.1:${PLATFORM_PORT}"
|
||||
export E2E_DATABASE_URL="${DATABASE_URL}"
|
||||
export PLAYWRIGHT_BASE_URL="http://localhost:${CANVAS_PORT}"
|
||||
npx playwright test e2e/chat-desktop.spec.ts e2e/chat-mobile.spec.ts
|
||||
|
||||
# REQUIRE-LIVE guard (mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE):
|
||||
# this lane reached here only because detect-changes said chat==true,
|
||||
# so it MUST actually execute the round-trip specs. `pipefail` makes
|
||||
# a real test failure (playwright non-zero) abort here under `set -e`;
|
||||
# passWithNoTests:false makes a zero-match selection non-zero too. The
|
||||
# explicit grep below is belt-and-braces: assert the list reporter
|
||||
# printed an executed-count summary, so a silent all-skip / no-op can
|
||||
# never report green.
|
||||
npx playwright test e2e/chat-desktop.spec.ts e2e/chat-mobile.spec.ts \
|
||||
--reporter=list 2>&1 | tee /tmp/pw-chat.out
|
||||
if ! grep -qE '[0-9]+ (passed|failed|skipped)' /tmp/pw-chat.out; then
|
||||
echo "::error::E2E Chat REQUIRE-LIVE: chat==true but Playwright reported no executed tests — specs missing or all-skipped, refusing to report green."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Dump platform log on failure
|
||||
if: failure() && needs.detect-changes.outputs.chat == 'true'
|
||||
|
||||
@@ -12,9 +12,30 @@ name: E2E Staging Canvas (Playwright)
|
||||
#
|
||||
|
||||
# Playwright test suite that provisions a fresh staging org per run and
|
||||
# verifies every workspace-panel tab renders without crashing. Complements
|
||||
# e2e-staging-saas.yml (which tests the API shape) by exercising the
|
||||
# actual browser + canvas bundle against live staging.
|
||||
# verifies every workspace-panel tab renders REAL content (not just an
|
||||
# empty/errored container). Complements e2e-staging-saas.yml (which tests
|
||||
# the API shape) by exercising the actual browser + canvas bundle against
|
||||
# live staging.
|
||||
#
|
||||
# PROMOTION-READINESS (toward making this a HARD merge-gate):
|
||||
# NOW RELIABLE (spec hardened — staging-tabs.spec.ts):
|
||||
# - All waits condition-based (toBeVisible/toHaveAttribute/expect.poll);
|
||||
# no fixed waitForTimeout in the spec.
|
||||
# - Tabs asserted on settled REAL content, not "container visible".
|
||||
# - ErrorBoundary + visible error alerts fail non-degraded tabs.
|
||||
# - Tab-list parity-checked vs live DOM; fail-closed on missing tenant.
|
||||
# STILL BLOCKS PROMOTION-TO-REQUIRED (do NOT remove continue-on-error —
|
||||
# CTO-owned, RFC internal#219 §1):
|
||||
# - Infra dependency: real staging EC2 per run (12-20 min cold boot);
|
||||
# AWS/Cloudflare/CP availability would become merge-blockers.
|
||||
# - Shared-zone TLS/DNS/ACME propagation flake surface is upstream of
|
||||
# this repo and outside its control.
|
||||
# - Required-gate correctness needs CP_STAGING_ADMIN_API_TOKEN GUARANTEED
|
||||
# present; today's skip-if-absent (core#2225) is right for non-gating
|
||||
# but would skip-green a required check.
|
||||
# - Single hermes/platform_managed workspace; agent-dependent content
|
||||
# (live chat/traces round-trip) not exercised on staging (#2162).
|
||||
# The full checklist lives at the foot of canvas/e2e/staging-tabs.spec.ts.
|
||||
#
|
||||
# Triggers: push to main, PR touching canvas sources + this workflow only
|
||||
# after the PR enters `merge-queue`, manual dispatch, and scheduled cron to
|
||||
|
||||
@@ -85,6 +85,26 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
#
|
||||
# PROMOTION-READINESS (toward required gate — do NOT flip continue-on-error
|
||||
# without CTO sign-off, that's the irreversible call):
|
||||
# NOW FAIL-CLOSED:
|
||||
# - Missing CP_STAGING_ADMIN_API_TOKEN → hard exit 2 (preflight).
|
||||
# - Staging CP unhealthy → hard exit 1 (preflight, not a workspace bug).
|
||||
# - Harness E2E_REQUIRE_LIVE=1 → exit 5 if a clean exit didn't prove
|
||||
# all four awaiting_agent transitions (no silent skip).
|
||||
# - Sweep transition (step 6) is now a bounded readiness-poll, not a
|
||||
# fixed sleep + one-shot assert → no more sweep-cadence flake.
|
||||
# - register / re-register retry ONLY transient edge 5xx (bounded),
|
||||
# fail closed on 4xx → no more cold-boot-502 flake.
|
||||
# STILL BLOCKS PROMOTION:
|
||||
# - Single shared staging tenant + EC2 quota window: an infra-side
|
||||
# provisioning outage (not a code bug) would turn the gate red.
|
||||
# Needs an infra-class vs code-class signal split before required.
|
||||
# - "CP unhealthy → exit 1" currently looks identical to a real
|
||||
# failure on the run page; required-gate would need it demoted to
|
||||
# a neutral/skip so staging flakiness can't block merges.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 25
|
||||
|
||||
@@ -124,6 +144,15 @@ jobs:
|
||||
|
||||
- name: Run external-runtime E2E
|
||||
id: e2e
|
||||
# E2E_REQUIRE_LIVE=1: the harness fails CLOSED (exit 5) if it ever
|
||||
# reaches a clean exit without proving all four awaiting_agent
|
||||
# transitions. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE — a
|
||||
# silent skip / early-return / dropped assertion can no longer
|
||||
# masquerade as green. Token-missing and CP-unhealthy already
|
||||
# hard-fail in the two preflight steps above, so reaching this step
|
||||
# means a real cycle is expected.
|
||||
env:
|
||||
E2E_REQUIRE_LIVE: "1"
|
||||
run: bash tests/e2e/test_staging_external_runtime.sh
|
||||
|
||||
# Mirror the e2e-staging-saas.yml safety net: if the runner is
|
||||
|
||||
@@ -101,18 +101,29 @@ jobs:
|
||||
# so teardown MUST positively confirm no slug-tagged box survives.
|
||||
E2E_AWS_LEAK_CHECK: required
|
||||
E2E_AWS_TERMINATE_LEAKS: '1'
|
||||
# claude-code + MiniMax is the cheapest boot-to-online path (same as the
|
||||
# saas job). The reconciler test never makes a completion, but the key is
|
||||
# wired so the first boot reaches online on the same path the saas
|
||||
# harness uses. First non-empty wins in the script's priority chain.
|
||||
E2E_MINIMAX_API_KEY: ${{ secrets.MOLECULE_STAGING_MINIMAX_API_KEY }}
|
||||
E2E_ANTHROPIC_API_KEY: ${{ secrets.MOLECULE_STAGING_ANTHROPIC_API_KEY }}
|
||||
E2E_OPENAI_API_KEY: ${{ secrets.MOLECULE_STAGING_OPENAI_API_KEY }}
|
||||
E2E_RUNTIME: claude-code
|
||||
# Platform-managed create path (moonshot/kimi-k2.6, no tenant key) — the
|
||||
# combo proven to create cleanly; this test only needs the ws online.
|
||||
#
|
||||
# DELIBERATELY no E2E_MODEL_SLUG and no E2E_*_API_KEY here — mirror the
|
||||
# e2e-staging-platform-boot job in e2e-staging-saas.yml. On
|
||||
# E2E_LLM_PATH=platform the harness sends EMPTY secrets and lets
|
||||
# pick_model_slug return the platform default moonshot/kimi-k2.6 (a member
|
||||
# of the providers.yaml claude-code `platform` arm → provider=platform,
|
||||
# billed by the CP LLM proxy, NO tenant key required).
|
||||
#
|
||||
# The previous wiring set E2E_MODEL_SLUG: MiniMax-M2 (a BARE id in the
|
||||
# providers.yaml `minimax` BYOK arm → provider=minimax, requires
|
||||
# MINIMAX_API_KEY) while sending secrets={} on the platform path. Because
|
||||
# E2E_MODEL_SLUG wins over the E2E_LLM_PATH=platform branch in
|
||||
# pick_model_slug, the workspace got a keyless BYOK-minimax model, could
|
||||
# not resolve a serving path, and booted to status=failed — never online
|
||||
# (run 223233: "MODEL_SLUG=MiniMax-M2" then "→ failed", "never reached
|
||||
# status=online within 900s"). The BYOK key wiring was equally misleading:
|
||||
# the harness ignores E2E_*_API_KEY on E2E_LLM_PATH=platform, so the keys
|
||||
# only made the contradiction harder to spot. Platform-only is correct
|
||||
# here — this test exercises instance-state, never an LLM completion.
|
||||
E2E_LLM_PATH: platform
|
||||
E2E_MODEL_SLUG: MiniMax-M2
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
|
||||
|
||||
@@ -124,7 +124,12 @@ jobs:
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
timeout-minutes: 45
|
||||
# Raised 45→75: step 10b now exercises pause→resume→online +
|
||||
# hibernate→wake→online, each of which RE-PROVISIONS the parent (CP
|
||||
# re-provision + heartbeat recovery, not a fresh EC2 cold start, but still
|
||||
# minutes). The base provision→online→A2A matrix fits in ~35 min; the two
|
||||
# extra lifecycle reprovisions need headroom under WORKSPACE_ONLINE_TIMEOUT.
|
||||
timeout-minutes: 75
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -173,17 +178,31 @@ jobs:
|
||||
# workflow_dispatch flow (no input wired here yet — runtime
|
||||
# override is enough for ad-hoc).
|
||||
#
|
||||
# #2263 deploy-skew: the claude-code default is the COLON-namespaced BYOK
|
||||
# id `minimax:MiniMax-M2.7`, NOT bare `MiniMax-M2`. The deployed staging
|
||||
# ws-server's compiled registry can lag source; validateRegisteredModelForRuntime
|
||||
# 400s the bare form on an older image (the sibling Platform Boot job, on
|
||||
# the SAME image, succeeds with namespaced `moonshot/kimi-k2.6`). The colon
|
||||
# form stays in the BYOK `minimax` arm (providers.yaml:851) so it resolves
|
||||
# provider=minimax (BYOK) and the #1994 byok-not-platform guard still
|
||||
# passes — the slash/platform form `minimax/MiniMax-M2.7` would not.
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'minimax:MiniMax-M2.7' }}
|
||||
# claude-code MiniMax slug must be the BARE registered id `MiniMax-M2.7`.
|
||||
# It is the BYOK-minimax form: registry_gen.go:88 registers it on the
|
||||
# `minimax` arm (resolves provider=minimax via MINIMAX_API_KEY), so the
|
||||
# #1994 byok-not-platform guard still passes. The COLON form
|
||||
# `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code (internal#718;
|
||||
# derive_provider_matrix_test.go:288) — the claude-code adapter can't
|
||||
# strip the `minimax:` prefix, so workspace-create 422s
|
||||
# UNREGISTERED_MODEL_FOR_RUNTIME (real failure: job 295233, main 4b3590e3).
|
||||
# The slash form `minimax/MiniMax-M2.7` is the platform-billed arm and
|
||||
# would trip the byok guard. #2311 fixed the same colon-vs-bare bug in the
|
||||
# pick_model_slug lib (tests/e2e/lib/model_slug.sh), but this env var
|
||||
# OVERRIDES that lib, so the bare fix has to live here too.
|
||||
E2E_MODEL_SLUG: ${{ github.event.inputs.runtime == 'hermes' && 'openai/gpt-4o' || github.event.inputs.runtime == 'codex' && 'openai/gpt-4o' || github.event.inputs.runtime == 'google-adk' && 'google_genai:gemini-2.5-pro' || 'MiniMax-M2.7' }}
|
||||
E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
# Lifecycle transitions (step 10b): pause→resume→online +
|
||||
# hibernate→wake→online on the provisioned parent. `auto` runs them in
|
||||
# full mode (this job). Set `off` to skip the ~2x-reprovision cost on an
|
||||
# ad-hoc dispatch. The timeout-minutes above is sized for this being on.
|
||||
E2E_LIFECYCLE: auto
|
||||
# Fail-closed-on-skip: in CI the harness MUST prove ≥1 full
|
||||
# provision→online→A2A cycle. If it reaches the end having validated
|
||||
# nothing (a future short-circuit / skip path), it exits 5 rather than
|
||||
# reporting a false green. Mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE.
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -381,6 +400,10 @@ jobs:
|
||||
E2E_MODE: smoke
|
||||
E2E_RUN_ID: "platform-${{ github.run_id }}-${{ github.run_attempt }}"
|
||||
E2E_KEEP_ORG: ${{ github.event.inputs.keep_org && '1' || '0' }}
|
||||
# Fail-closed-on-skip (see BYOK job). smoke mode still runs steps 2/4/7/8b,
|
||||
# so all four required milestones (provisioned/tenant_online/
|
||||
# workspace_online/a2a_roundtrip) fire — the guard is valid for this lane too.
|
||||
E2E_REQUIRE_LIVE: '1'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
name: E2E Workspace Lifecycle (staginge2e)
|
||||
|
||||
# core#2332 P1.10 — close the workspace-lifecycle coverage gap.
|
||||
#
|
||||
# soft-restart / pause / resume / hibernate were only unit-tested (httptest in
|
||||
# workspace-server/internal/handlers/*_test.go) and never proven against a real
|
||||
# container. This drives the Go staginge2e suite
|
||||
# (workspace-server/internal/staginge2e/workspace_lifecycle_test.go) which
|
||||
# provisions a REAL throwaway staging tenant, exercises each lifecycle endpoint,
|
||||
# and asserts OBSERVABLE container state (status transitions + serve reachability
|
||||
# + url-cleared-on-stop) — not just HTTP 200.
|
||||
#
|
||||
# ADVISORY-BY-INFRA. It needs a live staging tenant (~30+ min cold EC2 path), so
|
||||
# the real run is workflow_dispatch / schedule only — NOT per-PR and NOT a
|
||||
# required check. Promotion to a required branch-protection context is a separate
|
||||
# CTO decision (mirrors the cp internal/staginge2e suite, cp#386, and the
|
||||
# peer-visibility flip-to-required pattern, molecule-core#1296).
|
||||
#
|
||||
# HONEST GATE — NO continue-on-error mask (feedback_fix_root_not_symptom). The
|
||||
# PR job validates that the suite COMPILES under -tags=staging_e2e and SKIPs LOUD
|
||||
# without creds (the suite's contract) — a broken test file fails at PR time. The
|
||||
# real assertion runs on dispatch/cron with staging creds.
|
||||
#
|
||||
# Gitea 1.22.6 / act_runner notes honored: no cross-repo uses (mirrored
|
||||
# actions/checkout SHA), per-SHA concurrency, pinned GITHUB_SERVER_URL.
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'workspace-server/internal/handlers/workspace_restart.go'
|
||||
- 'workspace-server/internal/handlers/workspace_crud.go'
|
||||
- 'workspace-server/internal/staginge2e/**'
|
||||
- '.gitea/workflows/e2e-workspace-lifecycle.yml'
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'workspace-server/internal/handlers/workspace_restart.go'
|
||||
- 'workspace-server/internal/handlers/workspace_crud.go'
|
||||
- 'workspace-server/internal/staginge2e/**'
|
||||
- '.gitea/workflows/e2e-workspace-lifecycle.yml'
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# 08:00 UTC daily — offset from e2e-staging-saas (07:00) and
|
||||
# e2e-peer-visibility (07:30) so the three don't collide on the staging
|
||||
# org-creation quota.
|
||||
- cron: '0 8 * * *'
|
||||
|
||||
concurrency:
|
||||
# Per-SHA (feedback_concurrency_group_per_sha).
|
||||
group: e2e-workspace-lifecycle-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
jobs:
|
||||
# PR / compile gate: prove the staginge2e suite compiles under the build tag
|
||||
# and skips LOUD without creds. Cheap, honest, non-required. This is NOT a
|
||||
# fake-green mask of the real assertion — it fails if the test file stops
|
||||
# compiling. bp-required: pending CTO decision (see header).
|
||||
lifecycle-compile-skip:
|
||||
name: E2E Workspace Lifecycle (compile+skip)
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
- name: go vet (staging_e2e tag)
|
||||
working-directory: workspace-server
|
||||
run: go vet -tags staging_e2e ./internal/staginge2e/...
|
||||
- name: Compile + skip-run (must SKIP LOUD without STAGING_E2E)
|
||||
working-directory: workspace-server
|
||||
run: |
|
||||
# No STAGING_E2E / creds → the suite MUST skip (not pass-with-zero-
|
||||
# assertions, not fail-open). `go test` exit 0 with a SKIP line is the
|
||||
# contract. -run pins to the one test so this stays fast.
|
||||
out=$(go test -tags staging_e2e ./internal/staginge2e/ -run TestWorkspaceLifecycle -count=1 -v 2>&1)
|
||||
echo "$out"
|
||||
echo "$out" | grep -q "SKIP: TestWorkspaceLifecycle_Staging" \
|
||||
|| { echo "::error::expected a LOUD skip of TestWorkspaceLifecycle_Staging without creds"; exit 1; }
|
||||
|
||||
# Real STAGING gate: provisions a throwaway tenant, drives the lifecycle
|
||||
# endpoints, asserts observable transitions, scoped teardown.
|
||||
# dispatch / schedule only (30+ min cold EC2).
|
||||
lifecycle-staging:
|
||||
name: E2E Workspace Lifecycle (staging)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
CP_BASE_URL: https://staging-api.moleculesai.app
|
||||
CP_ADMIN_API_TOKEN: ${{ secrets.CP_STAGING_ADMIN_API_TOKEN }}
|
||||
STAGING_E2E: '1'
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
|
||||
with:
|
||||
go-version: 'stable'
|
||||
cache: true
|
||||
cache-dependency-path: workspace-server/go.sum
|
||||
- name: Verify admin token present
|
||||
run: |
|
||||
if [ -z "$CP_ADMIN_API_TOKEN" ]; then
|
||||
echo "::error::CP_STAGING_ADMIN_API_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
|
||||
exit 2
|
||||
fi
|
||||
echo "Admin token present"
|
||||
- name: CP staging health preflight
|
||||
run: |
|
||||
code=$(curl -sS -o /dev/null -w "%{http_code}" --max-time 10 "$CP_BASE_URL/health")
|
||||
if [ "$code" != "200" ]; then
|
||||
echo "::error::Staging CP unhealthy (HTTP $code) — infra, not a lifecycle bug. Failing loud per feedback_fix_root_not_symptom."
|
||||
exit 1
|
||||
fi
|
||||
echo "Staging CP healthy"
|
||||
- name: Run workspace-lifecycle staginge2e
|
||||
working-directory: workspace-server
|
||||
run: go test -tags staging_e2e ./internal/staginge2e/ -run TestWorkspaceLifecycle_Staging -count=1 -v -timeout 50m
|
||||
# Teardown: the test installs a t.Cleanup admin-DELETE of its own tenant
|
||||
# (runs even on a Fatal). We deliberately do NOT add a broad in-workflow
|
||||
# "sweep all e2e-life-* slugs" net here — that could delete a concurrently
|
||||
# running dispatch's fresh tenant (the slug is not run-id scoped). The
|
||||
# age-guarded `sweep-stale-e2e-orgs` workflow (30-min floor, e2e- prefix)
|
||||
# is the final safety net for a tenant orphaned by a hard runner cancel.
|
||||
@@ -7,10 +7,13 @@ name: gitea-merge-queue
|
||||
# the user-space queue bot, one PR per tick, using the non-bypass merge actor.
|
||||
#
|
||||
# Queue contract:
|
||||
# - add label `merge-queue` to an open same-repo PR
|
||||
# - auto-discovery (default): any open same-repo PR is considered — no
|
||||
# `merge-queue` label required (the label is optional metadata now)
|
||||
# - bot updates stale PR heads with current main, then waits for CI
|
||||
# - bot merges only when current main is green and required PR contexts pass
|
||||
# - add `merge-queue-hold` to pause a queued PR without removing it
|
||||
# - bot merges only when current main is green, genuine approvals are present
|
||||
# on the current head, required PR contexts pass, and the PR is mergeable
|
||||
# - add `merge-queue-hold`, `do-not-auto-merge`, or `wip` to keep a PR OUT of
|
||||
# autonomous merging; draft PRs are also skipped
|
||||
|
||||
on:
|
||||
# Schedule moved to operator-config:
|
||||
@@ -48,10 +51,34 @@ jobs:
|
||||
WATCH_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
QUEUE_LABEL: merge-queue
|
||||
HOLD_LABEL: merge-queue-hold
|
||||
# Auto-discovery (opt-OUT). When on (default), the queue considers ALL
|
||||
# open same-repo PRs that meet the merge bar — it does NOT wait for a
|
||||
# human/agent to add `merge-queue`. Agent Gitea tokens lack
|
||||
# write:issue (labels are issue-scoped) and could never self-label,
|
||||
# which stalled the queue; the label is now OPTIONAL metadata. The
|
||||
# merge bar is UNCHANGED — only candidate selection widens. Set
|
||||
# AUTO_DISCOVER=0 to restore legacy opt-IN (require the merge-queue
|
||||
# label to be considered).
|
||||
AUTO_DISCOVER: "1"
|
||||
# Opt-OUT labels: any of these on a PR keeps it OUT of autonomous
|
||||
# merging (the human escape hatch). HOLD_LABEL is always also honoured.
|
||||
# A human who wants a PR held just adds one of these labels.
|
||||
OPT_OUT_LABELS: do-not-auto-merge,wip
|
||||
UPDATE_STYLE: merge
|
||||
REQUIRED_CONTEXTS: >-
|
||||
CI / all-required (pull_request),
|
||||
sop-checklist / all-items-acked (pull_request)
|
||||
# Recognised official-reviewer set. A merge needs >= required_approvals
|
||||
# DISTINCT genuine official approvals from these accounts on the
|
||||
# CURRENT head sha (not stale/dismissed). The required_approvals count
|
||||
# itself is read from branch protection at runtime.
|
||||
REVIEWER_SET: agent-reviewer,agent-researcher,agent-reviewer-cr2
|
||||
# NOTE: REQUIRED_CONTEXTS is no longer the authoritative PR gate. The
|
||||
# queue now reads the required status contexts from BRANCH PROTECTION
|
||||
# (status_check_contexts) so non-required governance reds (qa-review,
|
||||
# security-review, sop-tier, sop-checklist when not branch-required,
|
||||
# E2E Chat, Staging SaaS, ci-arm64-advisory) cannot block a merge.
|
||||
# If branch protection cannot be enumerated the queue HOLDS
|
||||
# (fail-closed). REQUIRED_APPROVALS below is only a fallback used when
|
||||
# branch protection does not specify required_approvals.
|
||||
REQUIRED_APPROVALS: "2"
|
||||
# Push-side required contexts. Checking CI / all-required (push)
|
||||
# explicitly instead of the combined state avoids false-pause when
|
||||
# non-blocking jobs (continue-on-error: true) have failed — those
|
||||
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
# all violate this lint at first — intentional. Flip to false
|
||||
# follow-up after main is clean for 3 days. mc#1982.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true # mc#1982 Phase 3 mask — 14d forced-renewal cadence
|
||||
continue-on-error: true # internal#837 Phase 3 mask — 14d forced-renewal cadence
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
|
||||
|
||||
@@ -123,7 +123,14 @@ jobs:
|
||||
# with a per-entry ::error:: annotation naming the missing repo
|
||||
# (issue #2192). This is the push-time complement to PR #2186's
|
||||
# PR-time manifest-entry-existence gate.
|
||||
#
|
||||
# Token: workspace-template-* repos are PRIVATE, so the existence check
|
||||
# must authenticate (same AUTO_SYNC_TOKEN as the clone step). Without it
|
||||
# an unauthenticated GET 404s on private repos and false-prunes them
|
||||
# (regression that dropped seo-agent/google-adk from the palette).
|
||||
- name: Validate manifest entries exist
|
||||
env:
|
||||
MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
bash scripts/check-manifest-repos-exist.sh manifest.json
|
||||
|
||||
@@ -33,11 +33,24 @@
|
||||
# 2026-05-17 (internal#189 Phase 1).
|
||||
#
|
||||
# BURN-IN CLOSED 2026-05-17 (internal#189 Phase 1): The 7-day burn-in
|
||||
# window closed. continue-on-error: true has been removed from the
|
||||
# tier-check job; AND-composition is now fully enforced. If you need
|
||||
# to temporarily re-introduce a mask, file a tracker and follow the
|
||||
# mc#1982 protocol (Tier 2e lint requires a current tracker within
|
||||
# 2 lines of any continue-on-error: true).
|
||||
# window closed. As of 2026-06-04 the residual masks left behind by the
|
||||
# burn-in are removed for real (the comment previously claimed this while
|
||||
# the masks still persisted — that was stale):
|
||||
# - continue-on-error: true on the jq-install step (redundant; the step
|
||||
# already exits 0) and on the tier-check step (the burn-in mask).
|
||||
# - the `|| true` after the sop-tier-check.sh invocation, which masked
|
||||
# real tier-gate verdicts.
|
||||
# AND-composition is now fully enforced and the tier-check step can
|
||||
# honestly red CI on a real SOP-6 violation.
|
||||
#
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed): this is a
|
||||
# REQUIRED branch-protected gate on `pull_request_target` (always
|
||||
# same-repo, secrets always present — no fork/advisory split). Failing
|
||||
# open on a token/network/jq fault greened the SOP-6 approval gate
|
||||
# WITHOUT verifying approvals — a fail-open on a required context. The
|
||||
# gate now FAILS CLOSED on infra faults too: fix the token/runner, not
|
||||
# the gate. If you ever need to temporarily re-introduce a mask, file a
|
||||
# tracker and follow the mc#1982 protocol.
|
||||
|
||||
name: sop-tier-check
|
||||
|
||||
@@ -90,10 +103,11 @@ jobs:
|
||||
# GitHub releases may be unreachable from some runner networks
|
||||
# (infra#241 follow-up: GitHub timeout after 3s on 5.78.80.188
|
||||
# runners). The sop-tier-check script has its own fallback as a
|
||||
# third line of defense. continue-on-error: true ensures this step
|
||||
# failing does not block the job.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# third line of defense, and this step's final command
|
||||
# (`jq --version ... || echo`) already exits 0 unconditionally — so
|
||||
# the step cannot fail the job on its own.
|
||||
# continue-on-error REMOVED 2026-06-04 (mc#1982 directive: root-fix
|
||||
# and remove, do not renew). It was redundant masking, not a gate.
|
||||
run: |
|
||||
# apt-get is the primary method — Ubuntu package mirrors are reliably
|
||||
# reachable from runner containers. GitHub releases may be blocked
|
||||
@@ -110,11 +124,11 @@ jobs:
|
||||
jq --version 2>/dev/null || echo "::notice::jq not yet available — script fallback will retry"
|
||||
|
||||
- name: Verify tier label + reviewer team membership
|
||||
# continue-on-error: true at step level — job-level is ignored by Gitea
|
||||
# Actions (quirk #10, internal runbooks). Belt-and-suspenders with
|
||||
# SOP_FAIL_OPEN=1 + || true below.
|
||||
# mc#1982: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
# continue-on-error REMOVED 2026-06-04 (expired internal#189 Phase 1
|
||||
# burn-in, window closed 2026-05-17; mc#1982 directive: root-fix and
|
||||
# remove, do not renew). SOP_FAIL_OPEN REMOVED 2026-06-05
|
||||
# (fix/core-ci-fail-closed): the gate now fails CLOSED on infra
|
||||
# faults too (see the env block below), not just on a real verdict.
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
@@ -123,9 +137,26 @@ jobs:
|
||||
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
|
||||
SOP_DEBUG: '0'
|
||||
SOP_LEGACY_CHECK: '0'
|
||||
# SOP_FAIL_OPEN=1 makes the script always exit 0. The UI enforces
|
||||
# the actual merge gate. Combined with continue-on-error: true
|
||||
# above, this step never fails the job regardless of script exit.
|
||||
SOP_FAIL_OPEN: '1'
|
||||
# SOP_FAIL_OPEN REMOVED 2026-06-05 (fix/core-ci-fail-closed).
|
||||
#
|
||||
# This is the REQUIRED branch-protected gate
|
||||
# `sop-tier-check / tier-check (pull_request)`. It runs on
|
||||
# `pull_request_target`, which ALWAYS executes from the base
|
||||
# branch WITH secrets present — there is NO fork/advisory split
|
||||
# and no legitimate "secrets genuinely absent" degradation here.
|
||||
#
|
||||
# SOP_FAIL_OPEN=1 made the script `exit 0` on an empty/invalid
|
||||
# token, an unreachable Gitea API, or missing jq — i.e. an AUTH
|
||||
# FAILURE or unreachable-dependency would green the SOP-6
|
||||
# approval gate WITHOUT verifying that the required teams
|
||||
# actually approved. That is a fail-open on a required gate: a
|
||||
# mis-wired or under-scoped SOP_TIER_CHECK_TOKEN would let any PR
|
||||
# merge past the approval requirement.
|
||||
#
|
||||
# Removing the env unsets it → `${SOP_FAIL_OPEN:-}` is empty in
|
||||
# sop-tier-check.sh → every guarded `exit 0` branch instead falls
|
||||
# through to `exit 1`. Infra faults (bad token / API down / no
|
||||
# jq) now FAIL CLOSED with a loud `::error::`, exactly like a real
|
||||
# SOP-6 violation. Fix the token/runner, not the gate.
|
||||
run: |
|
||||
bash .gitea/scripts/sop-tier-check.sh || true
|
||||
bash .gitea/scripts/sop-tier-check.sh
|
||||
|
||||
@@ -26,11 +26,14 @@ name: sync-providers-yaml
|
||||
# sentinel does not fire on it.
|
||||
#
|
||||
# AUTH: uses AUTO_SYNC_TOKEN (the existing cross-repo read token used to sync
|
||||
# template/provider content from sibling repos). If the secret is absent the
|
||||
# job emits a clear ::warning:: and exits 0 — the hermetic sha pin in
|
||||
# sync_canonical_test.go is the always-on backstop, so a missing cross-repo
|
||||
# token degrades to "hand-edit still caught, live canonical drift not caught"
|
||||
# rather than a hard red that blocks unrelated PRs.
|
||||
# template/provider content from sibling repos). If the secret is absent:
|
||||
# * Trusted contexts (push to main/staging, schedule, same-repo PR,
|
||||
# workflow_dispatch): hard ::error:: + exit 1 (#2158 — silent
|
||||
# fail-open was masking live canonical drift from the daily schedule).
|
||||
# * Untrusted fork PRs: soft ::warning:: + exit 0 (forks cannot receive
|
||||
# secrets, so a hard-fail here would block every fork PR).
|
||||
# The hermetic sha pin in sync_canonical_test.go is the always-on backstop
|
||||
# for hand-edits of core's synced copy regardless of AUTO_SYNC_TOKEN state.
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -74,10 +77,37 @@ jobs:
|
||||
API_ROOT: ${{ github.server_url }}/api/v1
|
||||
run: |
|
||||
set -euo pipefail
|
||||
# Trusted-context detection (per #2158): AUTO_SYNC_TOKEN absence
|
||||
# is a hard failure on contexts that *should* have the secret
|
||||
# (push to main/staging, schedule, same-repo PRs, workflow_dispatch).
|
||||
# Fork PRs cannot receive secrets, so the soft warning is preserved
|
||||
# for that one untrusted case. The hermetic sha pin in
|
||||
# sync_canonical_test.go remains the always-on backstop for
|
||||
# hand-edits of core's synced copy.
|
||||
case "${{ github.event_name }}" in
|
||||
push|schedule|workflow_dispatch)
|
||||
is_trusted=true
|
||||
;;
|
||||
pull_request)
|
||||
if [ "${{ github.event.pull_request.head.repo.fork }}" = "false" ]; then
|
||||
is_trusted=true
|
||||
else
|
||||
is_trusted=false
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
# Unknown event type — treat as trusted to avoid silent failures
|
||||
# on a future event we haven't enumerated.
|
||||
is_trusted=true
|
||||
;;
|
||||
esac
|
||||
if [ -z "${AUTO_SYNC_TOKEN:-}" ]; then
|
||||
echo "::warning::AUTO_SYNC_TOKEN secret missing — skipping the live cross-repo compare."
|
||||
if [ "$is_trusted" = "true" ]; then
|
||||
echo "::error::AUTO_SYNC_TOKEN secret missing on trusted context (${{ github.event_name }}). Live cross-repo canonical-drift detection cannot run — this would silently mask a controlplane-side providers.yaml change from going red on the daily schedule and on same-repo PRs. Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to restore detection."
|
||||
exit 1
|
||||
fi
|
||||
echo "::warning::AUTO_SYNC_TOKEN secret missing on untrusted fork PR — skipping the live cross-repo compare (forks cannot receive secrets)."
|
||||
echo "The hermetic sha pin (sync_canonical_test.go) still gates hand-edits of core's copy."
|
||||
echo "Provision AUTO_SYNC_TOKEN (read scope on molecule-controlplane) to enable live canonical-drift detection."
|
||||
exit 0
|
||||
fi
|
||||
CANON_URL="${API_ROOT}/repos/molecule-ai/molecule-controlplane/raw/internal/providers/providers.yaml?ref=main"
|
||||
|
||||
@@ -90,7 +90,13 @@ jobs:
|
||||
# checked-in artifact; exit 1 (RED) on any drift. This is the
|
||||
# single source of the gate's verdict — the same code path
|
||||
# `go test ./cmd/gen-providers` exercises.
|
||||
go run ./cmd/gen-providers -check
|
||||
if ! go run ./cmd/gen-providers -check; then
|
||||
echo "::error::workspace-server/internal/providers/gen/registry_gen.go is stale (drifted from providers.yaml)."
|
||||
echo "Regenerate and commit it (run from repo root):"
|
||||
echo " make gen # native (needs a local Go toolchain)"
|
||||
echo " make gen-docker # Docker only — no local Go needed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Belt-and-braces — regenerate in place and assert clean tree
|
||||
run: |
|
||||
@@ -101,7 +107,9 @@ jobs:
|
||||
go generate ./...
|
||||
if ! git diff --quiet -- internal/providers/gen/registry_gen.go; then
|
||||
echo "::error::workspace-server/internal/providers/gen/registry_gen.go drifted from providers.yaml."
|
||||
echo "Run 'go generate ./...' (or 'go run ./cmd/gen-providers') in workspace-server/ and commit the result."
|
||||
echo "Regenerate and commit it. No local Go? Use Docker (run from repo root):"
|
||||
echo " make gen # native (needs a local Go toolchain)"
|
||||
echo " make gen-docker # Docker only — no local Go needed"
|
||||
git --no-pager diff -- internal/providers/gen/registry_gen.go | head -80
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -4,7 +4,27 @@
|
||||
# use this Makefile; CI calls docker compose / go test directly so the
|
||||
# Makefile can evolve without breaking the build.
|
||||
|
||||
.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check
|
||||
.PHONY: help dev up down logs build test e2e-peer-visibility openapi-spec openapi-spec-check gen gen-docker gen-check gen-check-docker
|
||||
|
||||
# ─── Provider-registry SSOT codegen (internal#718) ─────────────────────
|
||||
# The Go module lives in workspace-server/. The checked-in artifact
|
||||
# workspace-server/internal/providers/gen/registry_gen.go is a gofmt'd
|
||||
# projection of providers.yaml, drift-gated by
|
||||
# .gitea/workflows/verify-providers-gen.yml. `make gen-docker` runs the SAME
|
||||
# generator inside the pinned golang image so a toolchain-less env (an agent
|
||||
# without Go) can regenerate without a local Go install (core#2332 follow-up).
|
||||
#
|
||||
# BYTE-EQUIVALENCE: gen-docker is byte-identical to native only while
|
||||
# GO_VERSION below matches the `go` directive in workspace-server/go.mod.
|
||||
# NOTE: the CI verify workflow pins setup-go go-version: 'stable' (not '1.25');
|
||||
# that is a latent hazard — a future Go minor could reformat the artifact in CI
|
||||
# vs a 1.25 local. Pin CI to '1.25' to close it (tracked alongside this change).
|
||||
GO_VERSION ?= 1.25
|
||||
GO_IMAGE ?= golang:$(GO_VERSION)
|
||||
DOCKER ?= docker
|
||||
# Mount the Go module (workspace-server) read-write; Go's default -mod=readonly
|
||||
# keeps go.mod/go.sum untouched — only the artifact is written in-place.
|
||||
DOCKER_RUN_WS = $(DOCKER) run --rm -v "$(CURDIR)/workspace-server":/src -w /src $(GO_IMAGE)
|
||||
|
||||
help: ## Show this help.
|
||||
@grep -E '^[a-zA-Z0-9_-]+:.*?## ' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-22s\033[0m %s\n", $$1, $$2}'
|
||||
@@ -56,3 +76,16 @@ openapi-spec: ## Regenerate OpenAPI spec from workspace-server handler annotatio
|
||||
openapi-spec-check: openapi-spec ## CI gate — fail if openapi-spec produces a diff vs the committed file.
|
||||
@git diff --exit-code -- workspace-server/docs/openapi/ \
|
||||
|| (echo "openapi-spec is stale — run 'make openapi-spec' and commit the result" && exit 1)
|
||||
|
||||
# ─── Provider-registry codegen targets ────────────────────────────────
|
||||
gen: ## Regenerate the providers registry artifact natively (needs local Go).
|
||||
cd workspace-server && go generate ./...
|
||||
|
||||
gen-docker: ## Same, inside the pinned $(GO_IMAGE) — Docker only, no local Go.
|
||||
$(DOCKER_RUN_WS) go generate ./...
|
||||
|
||||
gen-check: ## Drift gate (native): exit 1 if the artifact is stale.
|
||||
cd workspace-server && go run ./cmd/gen-providers -check
|
||||
|
||||
gen-check-docker: ## Drift gate inside the pinned $(GO_IMAGE) — Docker only.
|
||||
$(DOCKER_RUN_WS) go run ./cmd/gen-providers -check
|
||||
|
||||
@@ -101,10 +101,19 @@ test.describe("Desktop ChatTab", () => {
|
||||
await textarea.fill("Trigger activity");
|
||||
await page.getByRole("button", { name: /Send/ }).first().click();
|
||||
|
||||
// Activity log container should appear during the send flow.
|
||||
await expect(page.locator("[data-testid='activity-log']").first()).toBeVisible({ timeout: 10_000 }).catch(() => {
|
||||
// Activity log may not be present in all layouts.
|
||||
});
|
||||
// FALSE-GREEN FIX: the prior `.catch(() => {})` swallowed the assertion
|
||||
// entirely, so this test passed whether or not the activity log ever
|
||||
// rendered. The activity-log container is optional per layout, so we
|
||||
// gate on its presence in the DOM: if it's not part of this layout,
|
||||
// skip explicitly (a recorded skip, not a silent pass); if it IS
|
||||
// present, it MUST become visible during the send flow — that's the
|
||||
// behaviour this test exists to protect.
|
||||
const activityLog = page.locator("[data-testid='activity-log']").first();
|
||||
if ((await activityLog.count()) === 0) {
|
||||
test.skip(true, "activity-log not part of this layout");
|
||||
return;
|
||||
}
|
||||
await expect(activityLog).toBeVisible({ timeout: 10_000 });
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -0,0 +1,461 @@
|
||||
/**
|
||||
* Staging canvas E2E — desktop take-control RECONNECT + LEASE-RENEWAL path
|
||||
* (core#2332 "P0.7", the e2e gap left by core#2216).
|
||||
*
|
||||
* Sibling to staging-display.spec.ts. That spec proves the happy path
|
||||
* (acquire → noVNC WS upgrade → first framebuffer frame). It does NOT cover
|
||||
* the two behaviours core#2216 added on top of that happy path:
|
||||
*
|
||||
* (A) RECONNECT re-acquires a FRESH token. When the live WS drops uncleanly
|
||||
* (idle/network blip), DisplayTab.tsx:391-446 calls connect(reacquire=true),
|
||||
* which first awaits reacquireSession() (DisplayTab.tsx:83-99 →
|
||||
* POST /display/control/acquire) to mint a NON-stale lease+token before
|
||||
* reopening the socket. Without this, the cached ~300s token can be past
|
||||
* its expiry and the reconnect would 401 — a dead session that LOOKS like
|
||||
* a reconnect. We assert the reconnect path yields a token bound to a NEW
|
||||
* expires_at AND that a NEW WS opened with that fresh token resumes the
|
||||
* framebuffer (a real frame, not a 1006/403).
|
||||
*
|
||||
* (B) The lease SURVIVES past the 300s window via the renewal cadence.
|
||||
* The lock is a 300s lease with NO server-side auto-renewal
|
||||
* (workspace_display_control.go:27 displayControlDefaultTTLSeconds=300;
|
||||
* loadActiveDisplayControl filters `expires_at > now()`). DisplayTab.tsx:105-111
|
||||
* runs a 120_000ms setInterval that re-acquires as the same holder, which
|
||||
* the server's ON-CONFLICT upsert (workspace_display_control.go:116-123,
|
||||
* `controlled_by = EXCLUDED.controlled_by`) treats as a lease EXTENSION:
|
||||
* expires_at moves forward by a fresh 300s each renewal. We do NOT sleep
|
||||
* 300s of wall-clock to prove this — we drive the renewal CALL the timer
|
||||
* fires (reacquireSession === the same POST) and assert it pushes
|
||||
* expires_at strictly past the ORIGINAL lease window, then confirm the
|
||||
* lock is still live (GET /display/control returns the holder) after a
|
||||
* point in time at which the original, un-renewed lease would already be
|
||||
* expired. That is the observable, deterministic proxy for "the 120s
|
||||
* timer keeps the user from being kicked every ~5 min."
|
||||
*
|
||||
* Auth model, gating, and fail-closed philosophy are IDENTICAL to
|
||||
* staging-display.spec.ts — see that file's header for the full rationale
|
||||
* (same-origin-canvas Origin for the WS upgrade; per-tenant admin bearer for
|
||||
* the acquire/GET POSTs; STAGING_DISPLAY_WORKSPACE_ID is the single activation
|
||||
* knob and a standing desktop EC2 is a CTO cost item; any failure once the gate
|
||||
* env is present is a HARD error, never a silent green, no "flaky" disposition).
|
||||
*
|
||||
* Promote-to-required is a CTO call: like its sibling this only runs when a
|
||||
* standing desktop-capable staging workspace exists, so it cannot be a blanket
|
||||
* required context until that workspace is funded and STAGING_DISPLAY_* is wired
|
||||
* into the e2e-staging-canvas workflow.
|
||||
*/
|
||||
|
||||
import { test, expect } from "@playwright/test";
|
||||
|
||||
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
|
||||
|
||||
// The standing desktop-capable workspace id. Absent => skip loud. Same single
|
||||
// activation knob as staging-display.spec.ts; see that file's header.
|
||||
const DISPLAY_WS_ID = process.env.STAGING_DISPLAY_WORKSPACE_ID;
|
||||
|
||||
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
|
||||
test.skip(
|
||||
!DISPLAY_WS_ID,
|
||||
"STAGING_DISPLAY_WORKSPACE_ID not set — no standing desktop-capable staging " +
|
||||
"workspace to exercise the reconnect/renewal path. Set it to a workspace whose " +
|
||||
"compute.display.mode == 'desktop-control' to activate this real-e2e gate. " +
|
||||
"(Standing that workspace up is a CTO cost item — one always-on desktop EC2.)",
|
||||
);
|
||||
|
||||
// WS upgrade + first-frame budgets mirror staging-display.spec.ts:75-76 — the
|
||||
// EIC tunnel + websockify handshake adds real latency; bounded so a dead path
|
||||
// fails LOUD instead of hanging to the suite timeout.
|
||||
const WS_UPGRADE_TIMEOUT_MS = 30_000;
|
||||
const FIRST_FRAME_TIMEOUT_MS = 30_000;
|
||||
|
||||
// The production lease/renewal contract we are asserting against:
|
||||
// - DEFAULT_TTL_SECONDS: the 300s lease the canvas requests
|
||||
// (DisplayTab.tsx:88 ttl_seconds:300; server default
|
||||
// workspace_display_control.go:27).
|
||||
// - RENEWAL_INTERVAL_MS: the cadence the canvas renews on
|
||||
// (DisplayTab.tsx:109 setInterval(..., 120_000)). We don't sleep it; we
|
||||
// assert the renewal CALL pushes the lease forward.
|
||||
const DEFAULT_TTL_SECONDS = 300;
|
||||
const RENEWAL_INTERVAL_MS = 120_000;
|
||||
|
||||
// Open a real noVNC WebSocket from inside the page (so the browser sends
|
||||
// Origin: <tenant> and the same-origin-canvas AdminAuth path accepts the
|
||||
// upgrade — a browser WS can't set Authorization). Returns the outcome of the
|
||||
// upgrade + first-frame, exactly like staging-display.spec.ts's evaluate
|
||||
// block. Reused here for BOTH the initial connect and the post-drop reconnect
|
||||
// so the two are compared on identical wire mechanics.
|
||||
type WsResult = {
|
||||
ok: boolean;
|
||||
stage: string;
|
||||
detail: string;
|
||||
frameBytes?: number;
|
||||
frameKind?: string;
|
||||
closeCode?: number;
|
||||
};
|
||||
|
||||
async function openDisplayWs(
|
||||
page: import("@playwright/test").Page,
|
||||
rawSessionUrl: string,
|
||||
): Promise<WsResult> {
|
||||
return page.evaluate(
|
||||
async ({ rawSessionUrl, upgradeTimeoutMs, frameTimeoutMs }) => {
|
||||
// Reproduce DisplayTab.tsx:545-552 (displayWebSocketConnection): resolve
|
||||
// against the tenant origin, pull token from the #token fragment, strip
|
||||
// the fragment, switch http(s)->ws(s). Then connect with the exact
|
||||
// subprotocols the canvas uses (DisplayTab.tsx:402).
|
||||
const u = new URL(rawSessionUrl, window.location.href);
|
||||
const token =
|
||||
new URLSearchParams(u.hash.replace(/^#/, "")).get("token") ?? "";
|
||||
if (!token) {
|
||||
return { ok: false, stage: "token-parse", detail: "no #token in session_url" };
|
||||
}
|
||||
u.hash = "";
|
||||
u.protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const wsUrl = u.toString();
|
||||
|
||||
return await new Promise<{
|
||||
ok: boolean;
|
||||
stage: string;
|
||||
detail: string;
|
||||
frameBytes?: number;
|
||||
frameKind?: string;
|
||||
closeCode?: number;
|
||||
}>((resolve) => {
|
||||
let upgraded = false;
|
||||
let settled = false;
|
||||
const finish = (r: {
|
||||
ok: boolean;
|
||||
stage: string;
|
||||
detail: string;
|
||||
frameBytes?: number;
|
||||
frameKind?: string;
|
||||
closeCode?: number;
|
||||
}) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
try {
|
||||
ws.close();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
resolve(r);
|
||||
};
|
||||
|
||||
let ws: WebSocket;
|
||||
try {
|
||||
ws = new WebSocket(wsUrl, [`binary`, `molecule-display-token.${token}`]);
|
||||
} catch (e) {
|
||||
resolve({ ok: false, stage: "construct", detail: String(e) });
|
||||
return;
|
||||
}
|
||||
ws.binaryType = "arraybuffer";
|
||||
|
||||
const upgradeTimer = setTimeout(() => {
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "upgrade-timeout",
|
||||
detail: `WS did not open within ${upgradeTimeoutMs}ms (readyState=${ws.readyState})`,
|
||||
});
|
||||
}, upgradeTimeoutMs);
|
||||
|
||||
let frameTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
ws.onopen = () => {
|
||||
upgraded = true;
|
||||
clearTimeout(upgradeTimer);
|
||||
frameTimer = setTimeout(() => {
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "frame-timeout",
|
||||
detail: `WS upgraded but no framebuffer message within ${frameTimeoutMs}ms`,
|
||||
});
|
||||
}, frameTimeoutMs);
|
||||
};
|
||||
|
||||
ws.onmessage = (ev) => {
|
||||
if (frameTimer) clearTimeout(frameTimer);
|
||||
let bytes = 0;
|
||||
let kind: string = typeof ev.data;
|
||||
if (ev.data instanceof ArrayBuffer) {
|
||||
bytes = ev.data.byteLength;
|
||||
kind = "ArrayBuffer";
|
||||
} else if (typeof Blob !== "undefined" && ev.data instanceof Blob) {
|
||||
bytes = ev.data.size;
|
||||
kind = "Blob";
|
||||
} else if (typeof ev.data === "string") {
|
||||
bytes = ev.data.length;
|
||||
kind = "string";
|
||||
}
|
||||
finish({
|
||||
ok: bytes > 0,
|
||||
stage: "frame",
|
||||
detail:
|
||||
bytes > 0 ? "received framebuffer message" : "first message was empty",
|
||||
frameBytes: bytes,
|
||||
frameKind: kind,
|
||||
});
|
||||
};
|
||||
|
||||
ws.onclose = (ev) => {
|
||||
if (!upgraded) {
|
||||
clearTimeout(upgradeTimer);
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "upgrade-close",
|
||||
detail: `WS closed before upgrade (code=${ev.code}, reason="${ev.reason}") — handshake rejected somewhere in edge → ws-proxy → EIC → websockify → x11vnc`,
|
||||
closeCode: ev.code,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
ws.onerror = () => {
|
||||
if (!upgraded) {
|
||||
clearTimeout(upgradeTimer);
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "upgrade-error",
|
||||
detail: "WS error before upgrade — proxy chain rejected the handshake",
|
||||
});
|
||||
}
|
||||
};
|
||||
});
|
||||
},
|
||||
{
|
||||
rawSessionUrl,
|
||||
upgradeTimeoutMs: WS_UPGRADE_TIMEOUT_MS,
|
||||
frameTimeoutMs: FIRST_FRAME_TIMEOUT_MS,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// Pull the opaque signed token out of a session_url's #token= fragment so we
|
||||
// can compare reconnect tokens for freshness (a reconnect MUST mint a new one
|
||||
// — same token would mean the cached, possibly-expired URL was reused).
|
||||
function tokenOf(sessionUrl: string): string {
|
||||
const hashIdx = sessionUrl.indexOf("#token=");
|
||||
return hashIdx >= 0 ? sessionUrl.slice(hashIdx + "#token=".length) : "";
|
||||
}
|
||||
|
||||
test.describe("staging desktop take-control — reconnect + lease renewal (core#2216)", () => {
|
||||
// Shared staging context resolution — identical to staging-display.spec.ts:90-120.
|
||||
function resolveTenant() {
|
||||
const tenantURL =
|
||||
process.env.STAGING_DISPLAY_TENANT_URL || process.env.STAGING_TENANT_URL;
|
||||
const tenantToken =
|
||||
process.env.STAGING_DISPLAY_TENANT_TOKEN || process.env.STAGING_TENANT_TOKEN;
|
||||
const orgID = process.env.STAGING_DISPLAY_ORG_ID || process.env.STAGING_ORG_ID;
|
||||
if (!tenantURL || !tenantToken) {
|
||||
throw new Error(
|
||||
"STAGING_DISPLAY_WORKSPACE_ID is set but no tenant URL/token is available " +
|
||||
"for the reconnect/renewal gate. Set STAGING_DISPLAY_SLUG so staging-setup.ts " +
|
||||
"resolves STAGING_DISPLAY_TENANT_URL / STAGING_DISPLAY_TENANT_TOKEN for the " +
|
||||
"standing desktop org (or ensure the ephemeral STAGING_TENANT_* exports exist).",
|
||||
);
|
||||
}
|
||||
return { tenantURL, tenantToken, orgID };
|
||||
}
|
||||
|
||||
test.beforeEach(async ({ context }) => {
|
||||
const { tenantToken, orgID } = resolveTenant();
|
||||
await context.setExtraHTTPHeaders({
|
||||
Authorization: `Bearer ${tenantToken}`,
|
||||
...(orgID ? { "X-Molecule-Org-Id": orgID } : {}),
|
||||
});
|
||||
});
|
||||
|
||||
test("reconnect re-acquires a FRESH token and the framebuffer resumes", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = resolveTenant();
|
||||
const workspaceId = DISPLAY_WS_ID as string;
|
||||
|
||||
// Sanity: workspace must be display-available, else the gate is meaningless.
|
||||
const availResp = await page.request.get(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display`,
|
||||
);
|
||||
expect(availResp.status(), `GET /display for ${workspaceId} should be 200`).toBe(200);
|
||||
const avail = await availResp.json();
|
||||
expect(
|
||||
avail.available,
|
||||
`workspace ${workspaceId} is not display-available (reason=${avail.reason}).`,
|
||||
).toBe(true);
|
||||
|
||||
// 1. Initial acquire — the happy-path lease the user starts with.
|
||||
const firstResp = await page.request.post(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
|
||||
);
|
||||
expect(
|
||||
firstResp.status(),
|
||||
`initial acquire should be 200; body: ${await firstResp.text()}`,
|
||||
).toBe(200);
|
||||
const first = await firstResp.json();
|
||||
expect(first.controller, "controller should be 'user'").toBe("user");
|
||||
expect(typeof first.session_url, "acquire missing session_url").toBe("string");
|
||||
const firstUrl: string = first.session_url;
|
||||
expect(firstUrl, "session_url should carry #token=").toContain("#token=");
|
||||
const firstToken = tokenOf(firstUrl);
|
||||
expect(firstToken.length, "first token should be non-empty").toBeGreaterThan(0);
|
||||
|
||||
// Anchor Origin to the tenant so the same-origin-canvas WS upgrade is accepted.
|
||||
await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
|
||||
|
||||
// 2. Establish the live WS on the FIRST token — proves the session is real.
|
||||
const initial = await openDisplayWs(page, firstUrl);
|
||||
expect(
|
||||
initial.ok,
|
||||
`initial connect failed at stage="${initial.stage}": ${initial.detail}` +
|
||||
(initial.closeCode ? ` (close code ${initial.closeCode})` : ""),
|
||||
).toBe(true);
|
||||
expect(initial.stage, `initial connect should reach 'frame'; got '${initial.stage}'`).toBe(
|
||||
"frame",
|
||||
);
|
||||
|
||||
// 3. Simulate an unclean drop. openDisplayWs() already closed its socket
|
||||
// on finish(), so the live stream is gone here — exactly the state
|
||||
// DisplayTab's "disconnect" handler (DisplayTab.tsx:426-442) enters
|
||||
// before it calls connect(reacquire=true).
|
||||
|
||||
// 4. Reconnect path: mint a FRESH lease+token FIRST, the way
|
||||
// connect(reacquire=true) → reacquireSession() does (DisplayTab.tsx:397
|
||||
// / :83-99). This is a re-acquire by the SAME holder, so the server's
|
||||
// ON-CONFLICT upsert extends the lease and returns a new signed URL.
|
||||
const reResp = await page.request.post(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
|
||||
);
|
||||
expect(
|
||||
reResp.status(),
|
||||
`reconnect re-acquire should be 200 (same holder extends, not 409); body: ${await reResp.text()}`,
|
||||
).toBe(200);
|
||||
const re = await reResp.json();
|
||||
expect(re.controller, "reconnect controller should still be 'user'").toBe("user");
|
||||
expect(typeof re.session_url, "reconnect acquire missing session_url").toBe("string");
|
||||
const reUrl: string = re.session_url;
|
||||
const reToken = tokenOf(reUrl);
|
||||
expect(reToken.length, "reconnect token should be non-empty").toBeGreaterThan(0);
|
||||
|
||||
// The reconnect token MUST be fresh — bound to the new expires_at. A
|
||||
// reused token would mean the canvas fell back to a cached, soon-expiring
|
||||
// URL, which is precisely the 401-on-reconnect bug core#2216 fixed. The
|
||||
// signed token embeds expires_at.Unix() (workspace_display_control.go:390),
|
||||
// so a later expiry => a different signature => a different token.
|
||||
expect(
|
||||
reToken,
|
||||
"reconnect should mint a FRESH token (bound to the renewed expires_at), " +
|
||||
"not reuse the original ~300s token — a reused token is the core#2216 401 bug.",
|
||||
).not.toBe(firstToken);
|
||||
expect(
|
||||
new Date(re.expires_at).getTime(),
|
||||
"renewed expires_at should be >= the original (lease extended, not shrunk)",
|
||||
).toBeGreaterThanOrEqual(new Date(first.expires_at).getTime());
|
||||
|
||||
// 5. Reopen the WS on the FRESH token and assert the framebuffer RESUMES —
|
||||
// a real frame, not a dead 1006/403 session. This is the crux: the
|
||||
// reconnect produces a LIVE stream, not a stale-token rejection.
|
||||
const reconnected = await openDisplayWs(page, reUrl);
|
||||
expect(
|
||||
reconnected.ok,
|
||||
`RECONNECT failed at stage="${reconnected.stage}": ${reconnected.detail}` +
|
||||
(reconnected.closeCode ? ` (close code ${reconnected.closeCode})` : "") +
|
||||
" — a 1006/403 here means the fresh-token reconnect did NOT re-establish " +
|
||||
"the proxy chain (edge → ws-proxy → EIC → websockify → x11vnc).",
|
||||
).toBe(true);
|
||||
expect(
|
||||
reconnected.stage,
|
||||
`reconnect should reach 'frame' (framebuffer resumed); got '${reconnected.stage}' (${reconnected.detail})`,
|
||||
).toBe("frame");
|
||||
expect(
|
||||
reconnected.frameBytes ?? 0,
|
||||
`resumed framebuffer message should be non-empty (kind=${reconnected.frameKind})`,
|
||||
).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("renewal pushes the lease past the original 300s window (no kick at ~5min)", async ({
|
||||
page,
|
||||
}) => {
|
||||
const { tenantURL } = resolveTenant();
|
||||
const workspaceId = DISPLAY_WS_ID as string;
|
||||
|
||||
// 1. Acquire the initial 300s lease.
|
||||
const firstResp = await page.request.post(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
|
||||
);
|
||||
expect(
|
||||
firstResp.status(),
|
||||
`initial acquire should be 200; body: ${await firstResp.text()}`,
|
||||
).toBe(200);
|
||||
const first = await firstResp.json();
|
||||
const firstExpiry = new Date(first.expires_at).getTime();
|
||||
expect(Number.isFinite(firstExpiry), "first expires_at should parse").toBe(true);
|
||||
|
||||
// The original lease's hard ceiling: when the un-renewed token/lock dies.
|
||||
const originalLeaseDeadlineMs = firstExpiry;
|
||||
|
||||
// 2. Fire the renewal CALL the 120s timer fires (DisplayTab.tsx:107-109 →
|
||||
// reacquireSession → this same POST). We don't sleep RENEWAL_INTERVAL_MS
|
||||
// of wall-clock; we drive the observable call the timer would make and
|
||||
// assert its EFFECT on the lease. RENEWAL_INTERVAL_MS is asserted to sit
|
||||
// safely inside the TTL so the renew always lands before expiry — if a
|
||||
// future change widened the interval past the TTL, this guard fails.
|
||||
expect(
|
||||
RENEWAL_INTERVAL_MS,
|
||||
"renewal interval must be strictly inside the lease TTL, else the lease " +
|
||||
"expires before the timer renews it (user gets kicked).",
|
||||
).toBeLessThan(DEFAULT_TTL_SECONDS * 1000);
|
||||
|
||||
const renewResp = await page.request.post(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ data: { controller: "user", ttl_seconds: DEFAULT_TTL_SECONDS } },
|
||||
);
|
||||
expect(
|
||||
renewResp.status(),
|
||||
`renewal re-acquire should be 200 (same holder extends); body: ${await renewResp.text()}`,
|
||||
).toBe(200);
|
||||
const renew = await renewResp.json();
|
||||
const renewedExpiry = new Date(renew.expires_at).getTime();
|
||||
|
||||
// 3. The renewal MUST push expires_at strictly PAST the original lease
|
||||
// window — that is the whole point of core#2216's renewal timer: a
|
||||
// fresh 300s starting now, so the lease outlives the original ~300s
|
||||
// deadline and the user is not kicked every ~5 minutes. (now()+300s,
|
||||
// fired before the original 300s elapsed, is strictly later than the
|
||||
// original now()+300s.)
|
||||
expect(
|
||||
renewedExpiry,
|
||||
"renewal should extend the lease strictly past the original 300s deadline " +
|
||||
`(original=${first.expires_at}, renewed=${renew.expires_at}). Equal-or-earlier ` +
|
||||
"means the renewal did NOT extend — the 120s timer would not save the session.",
|
||||
).toBeGreaterThan(originalLeaseDeadlineMs);
|
||||
|
||||
// 4. Confirm the lock is still LIVE after renewal — GET /display/control
|
||||
// only returns a holder when expires_at > now() (loadActiveDisplayControl,
|
||||
// workspace_display_control.go:280). A held controller here proves the
|
||||
// renewed lease is active, not expired.
|
||||
const ctrlResp = await page.request.get(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display/control`,
|
||||
);
|
||||
expect(ctrlResp.status(), "GET /display/control should be 200").toBe(200);
|
||||
const ctrl = await ctrlResp.json();
|
||||
expect(
|
||||
ctrl.controller,
|
||||
"after renewal the lock should still report a live holder (not 'none')",
|
||||
).toBe("user");
|
||||
expect(
|
||||
new Date(ctrl.expires_at).getTime(),
|
||||
"the live lock's expires_at should match the renewed lease (lease is the " +
|
||||
"renewed one, not the original).",
|
||||
).toBeGreaterThan(originalLeaseDeadlineMs);
|
||||
|
||||
// TODO(core#2332, CTO cost item): the assertions above prove the renewal
|
||||
// CALL extends the lease past the original window — the deterministic proxy
|
||||
// for "the 120s interval keeps the lease alive past 300s." To additionally
|
||||
// prove the lease survives a FULL real-time 300s+ idle WS (the literal
|
||||
// wall-clock claim), a long-lived test would hold one WS open >300s while
|
||||
// the 120s timer renews underneath and assert the SAME socket never 1006s.
|
||||
// That needs >5 min of standing-desktop wall-clock per run and is gated on
|
||||
// the standing desktop EC2 being funded; it is NOT exercised here. Promote
|
||||
// either form to a REQUIRED context only on CTO sign-off (cost + cadence).
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,329 @@
|
||||
/**
|
||||
* Staging canvas E2E — REAL desktop take-control path (core#2261 "Gap 1").
|
||||
*
|
||||
* This is the live-e2e gate that the existing staging-tabs.spec.ts does NOT
|
||||
* provide. staging-tabs only opens the 13 declared workspace-panel tabs
|
||||
* (TAB_IDS at staging-tabs.spec.ts:24-38 — `display` is NOT among them) and
|
||||
* asserts they render without a "Failed to load" toast. It never acquires
|
||||
* display control, never opens the noVNC WebSocket, and never asserts a
|
||||
* framebuffer frame arrives. The companion unit test
|
||||
* canvas/src/components/tabs/__tests__/DisplayTab.test.tsx mocks the RFB
|
||||
* constructor (vi.mock("@novnc/novnc"), see its lines 8/20-39) so NO real
|
||||
* WebSocket is ever opened there either. Result: a broken take-control path
|
||||
* (acquire → noVNC WS upgrade → ws-proxy → EIC → websockify → x11vnc → Xvfb)
|
||||
* ships GREEN. This spec closes that gap by exercising the REAL wire path
|
||||
* end to end against a live, desktop-capable staging workspace.
|
||||
*
|
||||
* What it asserts (the real path, no mocks):
|
||||
* 1. POST /workspaces/<id>/display/control/acquire returns 200 with a
|
||||
* session_url that carries the signed token in its `#token=` fragment
|
||||
* (mirrors workspace_display_control.go:signedDisplaySessionURL).
|
||||
* 2. Opening the noVNC WebSocket at session_url with the subprotocols
|
||||
* ["binary", "molecule-display-token.<token>"] (exactly what the canvas
|
||||
* sends — DisplayTab.tsx:339) UPGRADES (onopen fires, readyState===OPEN,
|
||||
* no immediate 1006 abnormal close). A 1006 / 403 means the handshake
|
||||
* failed somewhere in the proxy chain.
|
||||
* 3. At least one BINARY framebuffer message arrives on that socket — a
|
||||
* real frame off x11vnc, not just a panel mount. RFB sends a
|
||||
* ProtocolVersion banner ("RFB 003.00x\n") as the first server message,
|
||||
* which proves the upstream VNC server is live behind the EIC tunnel.
|
||||
*
|
||||
* Auth model (important): the WS upgrade is gated by workspace-server
|
||||
* middleware.AdminAuth. A browser WebSocket CANNOT set an Authorization
|
||||
* header, so in production the canvas WS upgrade passes AdminAuth via the
|
||||
* same-origin-canvas path (wsauth_middleware.go:isSameOriginCanvas, which
|
||||
* keys off the Origin header the browser sets automatically on a same-origin
|
||||
* WS upgrade). We therefore open the socket from inside the browser page via
|
||||
* page.evaluate AFTER navigating to the tenant origin — so the browser sends
|
||||
* `Origin: https://<slug>.staging.moleculesai.app`, exactly as production
|
||||
* does. The acquire POST (which CAN carry a header) uses the per-tenant admin
|
||||
* bearer set on the context. This is the faithful production handshake, not a
|
||||
* synthetic one.
|
||||
*
|
||||
* Gate / cost: this test only runs when STAGING_DISPLAY_WORKSPACE_ID points
|
||||
* at a STANDING desktop-capable workspace (compute.display.mode ==
|
||||
* "desktop-control"). We deliberately do NOT provision one in the shared
|
||||
* staging-setup.ts: a desktop AMI boots in ~12-15 min and would tax the
|
||||
* existing tabs harness on every run. Standing that workspace up is a cost
|
||||
* item for the CTO (one always-on desktop EC2 on staging). Until that exists,
|
||||
* the test SKIPS loud. When the env IS present, any failure in
|
||||
* provision/acquire/upgrade is a HARD error — fail-closed, never silently
|
||||
* green (no "flaky" disposition: a 1006 names a broken proxy hop).
|
||||
*/
|
||||
|
||||
import { test, expect } from "@playwright/test";
|
||||
|
||||
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
|
||||
|
||||
// The standing desktop-capable workspace id. Absent => skip loud. This is
|
||||
// the single knob that activates the gate; see file header for the cost note.
|
||||
const DISPLAY_WS_ID = process.env.STAGING_DISPLAY_WORKSPACE_ID;
|
||||
|
||||
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
|
||||
test.skip(
|
||||
!DISPLAY_WS_ID,
|
||||
"STAGING_DISPLAY_WORKSPACE_ID not set — no standing desktop-capable staging " +
|
||||
"workspace to exercise the take-control path. Set it to a workspace whose " +
|
||||
"compute.display.mode == 'desktop-control' to activate this real-e2e gate. " +
|
||||
"(Standing that workspace up is a CTO cost item — one always-on desktop EC2.)",
|
||||
);
|
||||
|
||||
// How long we wait for the WS to upgrade + deliver the first frame. The EIC
|
||||
// tunnel + websockify handshake adds real latency on top of the edge; budget
|
||||
// generously but bounded, so a genuinely-dead path fails LOUD instead of
|
||||
// hanging to the suite timeout.
|
||||
const WS_UPGRADE_TIMEOUT_MS = 30_000;
|
||||
const FIRST_FRAME_TIMEOUT_MS = 30_000;
|
||||
|
||||
test.describe("staging desktop take-control (real noVNC path)", () => {
|
||||
test("acquire → WS upgrades → first framebuffer frame arrives", async ({
|
||||
page,
|
||||
context,
|
||||
}) => {
|
||||
// The standing desktop workspace lives in its OWN standing org (it can't
|
||||
// live in the per-run ephemeral org — that gets torn down each run). When
|
||||
// STAGING_DISPLAY_SLUG is configured, staging-setup.ts resolves that org's
|
||||
// tenant URL / admin token / org id and exports them under STAGING_DISPLAY_*.
|
||||
// Fall back to the ephemeral org's exports only if the display org wasn't
|
||||
// separately configured (e.g. the desktop workspace happens to live in the
|
||||
// run's own tenant — not the expected topology, but supported).
|
||||
const tenantURL =
|
||||
process.env.STAGING_DISPLAY_TENANT_URL || process.env.STAGING_TENANT_URL;
|
||||
const tenantToken =
|
||||
process.env.STAGING_DISPLAY_TENANT_TOKEN || process.env.STAGING_TENANT_TOKEN;
|
||||
const orgID =
|
||||
process.env.STAGING_DISPLAY_ORG_ID || process.env.STAGING_ORG_ID;
|
||||
|
||||
// Fail-closed: when the gate env IS present (we got past the skips above),
|
||||
// the rest of the staging context MUST be wired or this is a hard error,
|
||||
// never a silent pass. Mirrors staging-tabs.spec.ts:53-57.
|
||||
if (!tenantURL || !tenantToken) {
|
||||
throw new Error(
|
||||
"STAGING_DISPLAY_WORKSPACE_ID is set but no tenant URL/token is available " +
|
||||
"for the take-control gate. Set STAGING_DISPLAY_SLUG so staging-setup.ts " +
|
||||
"resolves STAGING_DISPLAY_TENANT_URL / STAGING_DISPLAY_TENANT_TOKEN for the " +
|
||||
"standing desktop org (or ensure the ephemeral STAGING_TENANT_* exports exist).",
|
||||
);
|
||||
}
|
||||
|
||||
const workspaceId = DISPLAY_WS_ID as string;
|
||||
|
||||
// The per-tenant admin bearer satisfies AdminAuth for the acquire POST
|
||||
// (which can carry a header). The WS upgrade below relies on Origin
|
||||
// (same-origin canvas), NOT this header.
|
||||
await context.setExtraHTTPHeaders({
|
||||
Authorization: `Bearer ${tenantToken}`,
|
||||
// X-Molecule-Org-Id is required by workspace-server TenantGuard for
|
||||
// cross-org requests routed through the CP edge; staging-setup exports it.
|
||||
// Harmless (and correct) to send on the same-origin tenant box too.
|
||||
...(orgID ? { "X-Molecule-Org-Id": orgID } : {}),
|
||||
});
|
||||
|
||||
// 0. Sanity: the workspace must actually be display-enabled, else the
|
||||
// whole gate is meaningless. Hit the availability endpoint first so a
|
||||
// mis-pointed STAGING_DISPLAY_WORKSPACE_ID fails with a precise message
|
||||
// instead of an opaque acquire error.
|
||||
const availResp = await page.request.get(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display`,
|
||||
);
|
||||
expect(
|
||||
availResp.status(),
|
||||
`GET /display for ${workspaceId} should be 200`,
|
||||
).toBe(200);
|
||||
const avail = await availResp.json();
|
||||
expect(
|
||||
avail.available,
|
||||
`workspace ${workspaceId} is not display-available (reason=${avail.reason}). ` +
|
||||
"STAGING_DISPLAY_WORKSPACE_ID must point at a workspace with " +
|
||||
"compute.display.mode == 'desktop-control' AND a live instance_id.",
|
||||
).toBe(true);
|
||||
|
||||
// 1. Acquire display control. The handler returns session_url +
|
||||
// expires_at; session_url embeds the signed token in its #token=
|
||||
// fragment (workspace_display_control.go:signedDisplaySessionURL).
|
||||
const acquireResp = await page.request.post(
|
||||
`${tenantURL}/workspaces/${workspaceId}/display/control/acquire`,
|
||||
{ data: { controller: "user", ttl_seconds: 300 } },
|
||||
);
|
||||
expect(
|
||||
acquireResp.status(),
|
||||
`acquire should be 200; body: ${await acquireResp.text()}`,
|
||||
).toBe(200);
|
||||
const acquire = await acquireResp.json();
|
||||
expect(acquire.controller, "controller should be 'user'").toBe("user");
|
||||
expect(
|
||||
typeof acquire.session_url,
|
||||
`acquire response missing session_url: ${JSON.stringify(acquire)}`,
|
||||
).toBe("string");
|
||||
|
||||
// The token rides in the URL fragment (#token=...), never as a query
|
||||
// param — confirm the contract the client (DisplayTab.tsx:459-466)
|
||||
// depends on so a server-side change to the URL shape fails HERE.
|
||||
const sessionUrl: string = acquire.session_url;
|
||||
expect(
|
||||
sessionUrl,
|
||||
`session_url should carry the token in a #token= fragment: ${sessionUrl}`,
|
||||
).toContain("#token=");
|
||||
|
||||
// 2. Open the REAL noVNC WebSocket from inside the page, so the browser
|
||||
// sends Origin: <tenant> and the same-origin-canvas AdminAuth path
|
||||
// accepts the upgrade (a browser WS can't set Authorization). We
|
||||
// navigate to the tenant origin first purely to anchor the Origin
|
||||
// header; we don't need the canvas bundle to hydrate.
|
||||
await page.goto(tenantURL, { waitUntil: "domcontentloaded" });
|
||||
|
||||
// Reproduce DisplayTab.tsx:459-466 (displayWebSocketConnection): resolve
|
||||
// session_url against the tenant origin, pull the token out of the
|
||||
// fragment, strip the fragment, switch http(s)->ws(s). Then connect with
|
||||
// the exact subprotocols the canvas uses (DisplayTab.tsx:339).
|
||||
const result = await page.evaluate(
|
||||
async ({ rawSessionUrl, upgradeTimeoutMs, frameTimeoutMs }) => {
|
||||
const u = new URL(rawSessionUrl, window.location.href);
|
||||
const token =
|
||||
new URLSearchParams(u.hash.replace(/^#/, "")).get("token") ?? "";
|
||||
if (!token) {
|
||||
return { ok: false, stage: "token-parse", detail: "no #token in session_url" };
|
||||
}
|
||||
u.hash = "";
|
||||
u.protocol = window.location.protocol === "https:" ? "wss:" : "ws:";
|
||||
const wsUrl = u.toString();
|
||||
|
||||
return await new Promise<{
|
||||
ok: boolean;
|
||||
stage: string;
|
||||
detail: string;
|
||||
frameBytes?: number;
|
||||
frameKind?: string;
|
||||
closeCode?: number;
|
||||
}>((resolve) => {
|
||||
let upgraded = false;
|
||||
let settled = false;
|
||||
const finish = (r: {
|
||||
ok: boolean;
|
||||
stage: string;
|
||||
detail: string;
|
||||
frameBytes?: number;
|
||||
frameKind?: string;
|
||||
closeCode?: number;
|
||||
}) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
try {
|
||||
ws.close();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
resolve(r);
|
||||
};
|
||||
|
||||
let ws: WebSocket;
|
||||
try {
|
||||
ws = new WebSocket(wsUrl, [`binary`, `molecule-display-token.${token}`]);
|
||||
} catch (e) {
|
||||
resolve({ ok: false, stage: "construct", detail: String(e) });
|
||||
return;
|
||||
}
|
||||
ws.binaryType = "arraybuffer";
|
||||
|
||||
const upgradeTimer = setTimeout(() => {
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "upgrade-timeout",
|
||||
detail: `WS did not open within ${upgradeTimeoutMs}ms (readyState=${ws.readyState})`,
|
||||
});
|
||||
}, upgradeTimeoutMs);
|
||||
|
||||
let frameTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
ws.onopen = () => {
|
||||
upgraded = true;
|
||||
clearTimeout(upgradeTimer);
|
||||
// Now wait for the first server message. RFB's ProtocolVersion
|
||||
// banner is the first thing x11vnc sends; if nothing arrives the
|
||||
// tunnel opened but the VNC server behind it is dead.
|
||||
frameTimer = setTimeout(() => {
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "frame-timeout",
|
||||
detail: `WS upgraded but no framebuffer message within ${frameTimeoutMs}ms`,
|
||||
});
|
||||
}, frameTimeoutMs);
|
||||
};
|
||||
|
||||
ws.onmessage = (ev) => {
|
||||
if (frameTimer) clearTimeout(frameTimer);
|
||||
let bytes = 0;
|
||||
let kind: string = typeof ev.data;
|
||||
if (ev.data instanceof ArrayBuffer) {
|
||||
bytes = ev.data.byteLength;
|
||||
kind = "ArrayBuffer";
|
||||
} else if (typeof Blob !== "undefined" && ev.data instanceof Blob) {
|
||||
bytes = ev.data.size;
|
||||
kind = "Blob";
|
||||
} else if (typeof ev.data === "string") {
|
||||
bytes = ev.data.length;
|
||||
kind = "string";
|
||||
}
|
||||
finish({
|
||||
ok: bytes > 0,
|
||||
stage: "frame",
|
||||
detail:
|
||||
bytes > 0
|
||||
? "received framebuffer message"
|
||||
: "first message was empty",
|
||||
frameBytes: bytes,
|
||||
frameKind: kind,
|
||||
});
|
||||
};
|
||||
|
||||
ws.onclose = (ev) => {
|
||||
// A close BEFORE open === failed upgrade (1006 abnormal / 403
|
||||
// forbidden surface here). A close AFTER we already saw a frame is
|
||||
// benign (our own finish() triggered it).
|
||||
if (!upgraded) {
|
||||
clearTimeout(upgradeTimer);
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "upgrade-close",
|
||||
detail: `WS closed before upgrade (code=${ev.code}, reason="${ev.reason}") — handshake rejected somewhere in edge → ws-proxy → EIC → websockify → x11vnc`,
|
||||
closeCode: ev.code,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
ws.onerror = () => {
|
||||
if (!upgraded) {
|
||||
clearTimeout(upgradeTimer);
|
||||
finish({
|
||||
ok: false,
|
||||
stage: "upgrade-error",
|
||||
detail: "WS error before upgrade — proxy chain rejected the handshake",
|
||||
});
|
||||
}
|
||||
};
|
||||
});
|
||||
},
|
||||
{
|
||||
rawSessionUrl: sessionUrl,
|
||||
upgradeTimeoutMs: WS_UPGRADE_TIMEOUT_MS,
|
||||
frameTimeoutMs: FIRST_FRAME_TIMEOUT_MS,
|
||||
},
|
||||
);
|
||||
|
||||
// 3. Assert the real outcome. No "flaky" escape hatch: each failure stage
|
||||
// names the broken hop so a reviewer can act on it directly.
|
||||
expect(
|
||||
result.ok,
|
||||
`take-control failed at stage="${result.stage}": ${result.detail}` +
|
||||
(result.closeCode ? ` (close code ${result.closeCode})` : ""),
|
||||
).toBe(true);
|
||||
expect(
|
||||
result.stage,
|
||||
`expected to reach the 'frame' stage; got '${result.stage}' (${result.detail})`,
|
||||
).toBe("frame");
|
||||
expect(
|
||||
result.frameBytes ?? 0,
|
||||
`framebuffer message should be non-empty (kind=${result.frameKind})`,
|
||||
).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
@@ -337,13 +337,99 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
||||
|
||||
// 7. Hand state off to tests + teardown — overwrite the slug-only
|
||||
// bootstrap state with the full state spec tests need.
|
||||
writeFileSync(
|
||||
stateFile,
|
||||
JSON.stringify({ slug, tenantURL, workspaceId, tenantToken }, null, 2),
|
||||
);
|
||||
//
|
||||
// FAIL-CLOSED handoff: every field the spec reads must be non-empty. If
|
||||
// any is missing here, the spec's env-presence guard would throw with a
|
||||
// generic "did setup run?" message that hides WHICH field was lost. Catch
|
||||
// it at the source — a partial provision must hard-fail setup, never hand
|
||||
// off a half-built state that the spec then has to diagnose (or worse,
|
||||
// skip). This is the loud, fail-closed contract: STAGING was requested,
|
||||
// so an incomplete provision is an error, not a skip.
|
||||
const handoff = { slug, tenantURL, workspaceId, tenantToken };
|
||||
const missingFields = Object.entries(handoff)
|
||||
.filter(([, v]) => !v)
|
||||
.map(([k]) => k);
|
||||
if (missingFields.length > 0) {
|
||||
throw new Error(
|
||||
`[staging-setup] provision incomplete — empty handoff field(s): ` +
|
||||
`${missingFields.join(", ")}. Refusing to hand off a partial state ` +
|
||||
`that would surface downstream as an opaque spec failure.`,
|
||||
);
|
||||
}
|
||||
writeFileSync(stateFile, JSON.stringify(handoff, null, 2));
|
||||
process.env.STAGING_SLUG = slug;
|
||||
process.env.STAGING_TENANT_URL = tenantURL;
|
||||
process.env.STAGING_WORKSPACE_ID = workspaceId;
|
||||
process.env.STAGING_TENANT_TOKEN = tenantToken;
|
||||
// The ephemeral org's UUID — exported so specs that route through the CP
|
||||
// edge can send X-Molecule-Org-Id (workspace-server TenantGuard). The tabs
|
||||
// harness hits the tenant box same-origin and doesn't need it, but the
|
||||
// take-control gate (staging-display.spec.ts) does.
|
||||
process.env.STAGING_ORG_ID = orgID;
|
||||
console.log(`[staging-setup] Ready — ${stateFile}`);
|
||||
|
||||
// 8. (core#2261 Gap 1) Resolve the STANDING desktop-capable org, if one is
|
||||
// configured, for the live take-control e2e (staging-display.spec.ts).
|
||||
//
|
||||
// This block is FULLY env-gated and additive: it provisions NOTHING and is
|
||||
// a no-op unless STAGING_DISPLAY_SLUG is set. We deliberately do NOT spin a
|
||||
// desktop workspace inside this shared setup — a desktop AMI boots in
|
||||
// ~12-15 min and would tax every tabs run. Instead an operator stands up
|
||||
// one always-on desktop org once (a CTO cost item) and points
|
||||
// STAGING_DISPLAY_SLUG + STAGING_DISPLAY_WORKSPACE_ID at it. Here we just
|
||||
// resolve that standing org's tenant URL, admin token, and org id so the
|
||||
// display spec can reach it. Fail-closed: if STAGING_DISPLAY_SLUG is set but
|
||||
// we can't resolve its token/id, we THROW — the gate must never silently
|
||||
// fall back to the (non-desktop) ephemeral org and pass.
|
||||
const displaySlug = process.env.STAGING_DISPLAY_SLUG;
|
||||
if (displaySlug) {
|
||||
console.log(`[staging-setup] Resolving standing desktop org: ${displaySlug}`);
|
||||
|
||||
// org id for the standing slug (admin-orgs row carries it + status).
|
||||
const orgsRes = await jsonFetch(`${CP_URL}/cp/admin/orgs`, { headers: adminAuth });
|
||||
if (orgsRes.status !== 200) {
|
||||
throw new Error(
|
||||
`STAGING_DISPLAY_SLUG=${displaySlug} set, but GET /cp/admin/orgs returned ` +
|
||||
`${orgsRes.status} — cannot resolve the standing desktop org for the ` +
|
||||
`take-control gate.`,
|
||||
);
|
||||
}
|
||||
const displayRow = (orgsRes.body?.orgs || []).find(
|
||||
(o: any) => o.slug === displaySlug,
|
||||
);
|
||||
if (!displayRow?.id) {
|
||||
throw new Error(
|
||||
`STAGING_DISPLAY_SLUG=${displaySlug} not found in /cp/admin/orgs — the ` +
|
||||
`standing desktop org for the take-control gate does not exist. Provision ` +
|
||||
`it (one always-on desktop EC2) or unset STAGING_DISPLAY_SLUG/` +
|
||||
`STAGING_DISPLAY_WORKSPACE_ID to skip the gate.`,
|
||||
);
|
||||
}
|
||||
if (displayRow.instance_status !== "running") {
|
||||
throw new Error(
|
||||
`Standing desktop org ${displaySlug} is '${displayRow.instance_status}', ` +
|
||||
`not 'running' — the take-control gate needs a live desktop tenant. ` +
|
||||
`full row: ${JSON.stringify(displayRow)}`,
|
||||
);
|
||||
}
|
||||
|
||||
const displayTokRes = await jsonFetch(
|
||||
`${CP_URL}/cp/admin/orgs/${displaySlug}/admin-token`,
|
||||
{ headers: adminAuth },
|
||||
);
|
||||
if (displayTokRes.status !== 200 || !displayTokRes.body?.admin_token) {
|
||||
throw new Error(
|
||||
`admin-token fetch for standing desktop org ${displaySlug} returned ` +
|
||||
`${displayTokRes.status}: ${JSON.stringify(displayTokRes.body)}`,
|
||||
);
|
||||
}
|
||||
|
||||
process.env.STAGING_DISPLAY_ORG_ID = displayRow.id;
|
||||
process.env.STAGING_DISPLAY_TENANT_URL = `https://${displaySlug}.${TENANT_DOMAIN}`;
|
||||
process.env.STAGING_DISPLAY_TENANT_TOKEN = displayTokRes.body.admin_token;
|
||||
console.log(
|
||||
`[staging-setup] Standing desktop org resolved: ${displaySlug} ` +
|
||||
`(org_id=${displayRow.id}, url=${process.env.STAGING_DISPLAY_TENANT_URL})`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
+305
-33
@@ -1,7 +1,8 @@
|
||||
/**
|
||||
* Staging canvas E2E — opens each of the 13 workspace-panel tabs against a
|
||||
* fresh staging org provisioned in the global setup. Asserts each tab
|
||||
* renders without throwing and captures a screenshot for visual review.
|
||||
* Staging canvas E2E — opens each workspace-panel tab against a fresh
|
||||
* staging org provisioned in the global setup. Asserts each tab renders
|
||||
* REAL content (not an empty container, not an error state) and captures a
|
||||
* screenshot for visual review.
|
||||
*
|
||||
* Auth model: the tenant platform's AdminAuth middleware accepts a bearer
|
||||
* token OR a WorkOS session cookie. Playwright can't mint a WorkOS
|
||||
@@ -10,17 +11,39 @@
|
||||
* Bearer header via context.setExtraHTTPHeaders(). Every browser
|
||||
* request inherits the header.
|
||||
*
|
||||
* Known SaaS gaps — documented in #1369 and allowed to render errored
|
||||
* content without failing the test (the gate is "no hard crash, no
|
||||
* 'Failed to load' toast"):
|
||||
* PROMOTION-READINESS (see § at bottom of file): this suite is being
|
||||
* hardened toward becoming a HARD merge-gate. It currently runs under
|
||||
* `continue-on-error: true` (RFC internal#219 §1, non-gating) — that is a
|
||||
* deliberate, CTO-owned call and is NOT changed here. The hardening makes
|
||||
* every assertion deterministic so that WHEN promotion happens the gate
|
||||
* does not flap. See the PROMOTION-READINESS block at the foot of this
|
||||
* file for what is now reliable and what still blocks promotion.
|
||||
*
|
||||
* Known SaaS gaps — documented in #1369. These tabs legitimately cannot
|
||||
* load real content in SaaS mode and are allowed an in-panel empty/error
|
||||
* state (NOT a hard crash, NOT an ErrorBoundary):
|
||||
* - Files tab: empty (platform can't docker exec into a remote EC2)
|
||||
* - Terminal tab: WS connect fails
|
||||
* - Peers tab: 401 without workspace-scoped token
|
||||
* These are enumerated in KNOWN_DEGRADED_TABS below and asserted with a
|
||||
* weaker (but still non-trivial) contract: the panel renders and does not
|
||||
* crash the app. Every OTHER tab must render real content.
|
||||
*/
|
||||
|
||||
import { test, expect } from "@playwright/test";
|
||||
import { test, expect, type Page } from "@playwright/test";
|
||||
|
||||
// Tab ids as declared in canvas/src/components/SidePanel.tsx TABS.
|
||||
//
|
||||
// NOTE (drift guard): this list is asserted-complete against the live DOM
|
||||
// below (see "tab list parity" step) so it cannot silently drift out of
|
||||
// sync with SidePanel.tsx TABS the way a hand-maintained constant does.
|
||||
// `display` and `container-config` are intentionally EXCLUDED here:
|
||||
// - `display` is owned by the in-flight take-control e2e (PR #2275 /
|
||||
// staging-display.spec.ts); asserting it here would collide.
|
||||
// - `container-config` only renders when selectedNodeId is set AND is
|
||||
// gated on tier; it is covered by container-config-specific specs.
|
||||
// The parity check accounts for these via EXPECTED_EXTRA_TABS so a NEW
|
||||
// tab appearing in SidePanel still trips the guard.
|
||||
const TAB_IDS = [
|
||||
"chat",
|
||||
"activity",
|
||||
@@ -37,12 +60,131 @@ const TAB_IDS = [
|
||||
"audit",
|
||||
] as const;
|
||||
|
||||
// Tabs present in the DOM that this spec intentionally does not drive.
|
||||
// Keeping this explicit means a genuinely-new tab (not one of these) makes
|
||||
// the parity assertion fail LOUD instead of being silently un-tested.
|
||||
const EXPECTED_EXTRA_TABS = ["display", "container-config"] as const;
|
||||
|
||||
// Tabs that are KNOWN to degrade in SaaS mode (#1369). They get the weaker
|
||||
// "renders + no crash" contract instead of the "real content" contract.
|
||||
// Anything NOT in this set must render real content or the test fails.
|
||||
const KNOWN_DEGRADED_TABS = new Set<string>(["terminal", "files"]);
|
||||
|
||||
const STAGING = process.env.CANVAS_E2E_STAGING === "1";
|
||||
|
||||
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests");
|
||||
// IMPORTANT — fail-closed, not skip-green.
|
||||
//
|
||||
// `test.skip(!STAGING)` is correct ONLY when the operator never asked for a
|
||||
// staging run (CANVAS_E2E_STAGING unset). In that case the workflow's
|
||||
// detect-changes / token-check gates have already decided not to exercise
|
||||
// staging, and skipping is the documented contract.
|
||||
//
|
||||
// But if STAGING *is* requested (CANVAS_E2E_STAGING=1) and global setup did
|
||||
// NOT hand off the tenant state, that is a HARD failure, not a skip — see
|
||||
// the explicit env-presence throw inside the test body. A silent skip there
|
||||
// would let a broken provision ship green, which is exactly the
|
||||
// weak-gate failure this hardening removes (§ No flakes / internal#828).
|
||||
test.skip(!STAGING, "CANVAS_E2E_STAGING not set — staging-only suite, not requested");
|
||||
|
||||
/**
|
||||
* Assert the panel for `tabId` rendered real content.
|
||||
*
|
||||
* Deterministic contract (no fixed waits — every step is condition-based
|
||||
* with Playwright's built-in retry / expect.poll):
|
||||
* 1. The tabpanel container is visible.
|
||||
* 2. The global ErrorBoundary did NOT trip ("Something went wrong").
|
||||
* 3. No visible error alert is shown in the panel.
|
||||
* 4. For non-degraded tabs: the panel settles to non-empty,
|
||||
* non-spinner content (so an empty <div/> or a stuck "Loading…"
|
||||
* spinner FAILS instead of passing as it did before).
|
||||
*/
|
||||
async function assertPanelRendered(page: Page, tabId: string): Promise<void> {
|
||||
const panel = page.locator(`#panel-${tabId}`);
|
||||
|
||||
// (1) Container visible. Built-in retry up to the expect timeout — no
|
||||
// arbitrary waitForTimeout. Mechanism: replaces any reliance on a fixed
|
||||
// settle delay with a real visibility condition.
|
||||
await expect(panel, `panel for ${tabId} never became visible`).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
|
||||
// (2) ErrorBoundary trip = hard crash anywhere in the React subtree.
|
||||
// canvas/src/components/ErrorBoundary.tsx renders "Something went wrong".
|
||||
// The OLD gate only looked for a "Failed to load" toast and would ship
|
||||
// an ErrorBoundary-crashed panel GREEN. Mechanism: assert the crash
|
||||
// surface is absent, retried via expect.poll so a late-mounting crash
|
||||
// banner is still caught.
|
||||
await expect
|
||||
.poll(
|
||||
async () =>
|
||||
page.getByText("Something went wrong", { exact: false }).count(),
|
||||
{
|
||||
message: `tab ${tabId}: ErrorBoundary tripped (Something went wrong)`,
|
||||
timeout: 5_000,
|
||||
},
|
||||
)
|
||||
.toBe(0);
|
||||
|
||||
// (3) No visible error alert inside the panel. Tabs surface load errors
|
||||
// as role="alert" with the real error text (EventsTab/ChannelsTab/
|
||||
// ConfigTab/...). The OLD gate matched ONLY [role=alert]:has-text("Failed
|
||||
// to load") — it missed (a) error messages that don't contain that exact
|
||||
// phrase and (b) error divs that omit role="alert" entirely (e.g.
|
||||
// ActivityTab). We replace it with a broader, but still SaaS-gap-aware,
|
||||
// check: any *visible* alert OR red error banner inside the panel.
|
||||
//
|
||||
// Degraded tabs (#1369) are allowed an error state — for those we only
|
||||
// require no app-level crash (covered by step 2). For every other tab a
|
||||
// visible error alert is a real regression.
|
||||
if (!KNOWN_DEGRADED_TABS.has(tabId)) {
|
||||
const visibleAlerts = panel.locator('[role="alert"]:visible');
|
||||
await expect
|
||||
.poll(async () => visibleAlerts.count(), {
|
||||
message:
|
||||
`tab ${tabId}: a visible error alert is shown in the panel ` +
|
||||
`(was a weak "Failed to load"-only check before)`,
|
||||
timeout: 5_000,
|
||||
})
|
||||
.toBe(0);
|
||||
}
|
||||
|
||||
// (4) Real content. The tabpanel CONTAINER always mounts, so the old
|
||||
// toBeVisible() on the container passed even when the child rendered
|
||||
// nothing. Assert the panel's trimmed innerText is non-empty AND not
|
||||
// stuck on a loading spinner. expect.poll retries until the async
|
||||
// fetch+render settles — replacing the implicit "the network finished
|
||||
// by now" timing assumption with an explicit polled condition.
|
||||
//
|
||||
// Degraded tabs may legitimately be empty (Files in SaaS mode), so they
|
||||
// are exempt from the non-empty requirement; step 2 still guards them
|
||||
// against a hard crash.
|
||||
if (!KNOWN_DEGRADED_TABS.has(tabId)) {
|
||||
await expect
|
||||
.poll(
|
||||
async () => {
|
||||
const text = ((await panel.innerText()) || "").trim();
|
||||
// A panel still showing only a loading spinner has not settled.
|
||||
const stillLoading = /^(loading\b|loading…|loading\.\.\.)/i.test(
|
||||
text,
|
||||
);
|
||||
return text.length > 0 && !stillLoading;
|
||||
},
|
||||
{
|
||||
message:
|
||||
`tab ${tabId}: panel rendered empty or stuck on a loading ` +
|
||||
`spinner — no real content settled (weak "container visible" ` +
|
||||
`gate would have passed this)`,
|
||||
// Generous: real tabs fetch from the tenant over the network.
|
||||
// Polled, so it returns as soon as content appears.
|
||||
timeout: 20_000,
|
||||
},
|
||||
)
|
||||
.toBe(true);
|
||||
}
|
||||
}
|
||||
|
||||
test.describe("staging canvas tabs", () => {
|
||||
test("each workspace-panel tab renders without error", async ({
|
||||
test("each workspace-panel tab renders real content", async ({
|
||||
page,
|
||||
context,
|
||||
}) => {
|
||||
@@ -50,9 +192,16 @@ test.describe("staging canvas tabs", () => {
|
||||
const tenantToken = process.env.STAGING_TENANT_TOKEN;
|
||||
const workspaceId = process.env.STAGING_WORKSPACE_ID;
|
||||
|
||||
// FAIL-CLOSED (not skip): STAGING was requested but global setup did
|
||||
// not export tenant state. A silent skip here would paint a broken
|
||||
// provision GREEN. This is the loud-fail the hardening mandates.
|
||||
if (!tenantURL || !tenantToken || !workspaceId) {
|
||||
throw new Error(
|
||||
"staging-setup.ts did not export STAGING_TENANT_URL / STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID — did global setup run?",
|
||||
"staging-setup.ts did not export STAGING_TENANT_URL / " +
|
||||
"STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID. CANVAS_E2E_STAGING=1 " +
|
||||
"was set (staging WAS requested) but global setup produced no " +
|
||||
"tenant — this is a provisioning failure, NOT a reason to skip. " +
|
||||
"Check the [staging-setup] log above for the real error.",
|
||||
);
|
||||
}
|
||||
|
||||
@@ -152,11 +301,19 @@ test.describe("staging canvas tabs", () => {
|
||||
// omit the URL, so we'd otherwise be flying blind. Logged to the
|
||||
// test's stdout (visible in the workflow log under the failed step).
|
||||
page.on("requestfailed", (req) => {
|
||||
console.log(`[e2e/requestfailed] ${req.method()} ${req.url()}: ${req.failure()?.errorText ?? "?"}`);
|
||||
console.log(
|
||||
`[e2e/requestfailed] ${req.method()} ${req.url()}: ${
|
||||
req.failure()?.errorText ?? "?"
|
||||
}`,
|
||||
);
|
||||
});
|
||||
page.on("response", (res) => {
|
||||
if (res.status() >= 400) {
|
||||
console.log(`[e2e/response-${res.status()}] ${res.request().method()} ${res.url()}`);
|
||||
console.log(
|
||||
`[e2e/response-${res.status()}] ${res
|
||||
.request()
|
||||
.method()} ${res.url()}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -173,9 +330,8 @@ test.describe("staging canvas tabs", () => {
|
||||
// hydrated, even with zero workspaces) or the hydration-error
|
||||
// banner — whichever wins first. Previous version of this wait
|
||||
// used `[role="tablist"]`, but that selector only appears AFTER
|
||||
// a workspace node is clicked (which happens below at L100), so
|
||||
// the wait would always time out at 45s before any meaningful
|
||||
// failure surfaced.
|
||||
// a workspace node is clicked, so the wait would always time out
|
||||
// at 45s before any meaningful failure surfaced.
|
||||
await page.waitForSelector(
|
||||
'[aria-label="Molecule AI workspace canvas"], [data-testid="hydration-error"]',
|
||||
{ timeout: 45_000 },
|
||||
@@ -189,10 +345,20 @@ test.describe("staging canvas tabs", () => {
|
||||
"canvas hydration failed — check staging CP + tenant reachability",
|
||||
).toBe(0);
|
||||
|
||||
// The global ErrorBoundary must not have tripped at the app root
|
||||
// either — a crash before the side panel even opens would otherwise
|
||||
// be invisible until a tab assertion happened to notice it.
|
||||
await expect(
|
||||
page.getByText("Something went wrong", { exact: false }),
|
||||
"app-level ErrorBoundary tripped during hydration",
|
||||
).toHaveCount(0);
|
||||
|
||||
// Click the workspace node to open the side panel. Try a data
|
||||
// attribute first, fall back to a generic role-based selector so
|
||||
// the test doesn't break when the node-card markup changes.
|
||||
const byDataAttr = page.locator(`[data-workspace-id="${workspaceId}"]`).first();
|
||||
const byDataAttr = page
|
||||
.locator(`[data-workspace-id="${workspaceId}"]`)
|
||||
.first();
|
||||
if ((await byDataAttr.count()) > 0) {
|
||||
await byDataAttr.click({ timeout: 10_000 });
|
||||
} else {
|
||||
@@ -202,19 +368,56 @@ test.describe("staging canvas tabs", () => {
|
||||
await firstNode.click({ timeout: 10_000 });
|
||||
}
|
||||
|
||||
await page.waitForSelector('[role="tablist"]', { timeout: 15_000 });
|
||||
// The tablist appears once the side panel mounts. Condition-based
|
||||
// wait — no fixed delay.
|
||||
const tablist = page.locator('[role="tablist"]');
|
||||
await expect(
|
||||
tablist,
|
||||
"side panel tablist never appeared after clicking the workspace node",
|
||||
).toBeVisible({ timeout: 15_000 });
|
||||
|
||||
// Tab-list parity guard. The hand-maintained TAB_IDS constant used to
|
||||
// be able to drift silently out of sync with SidePanel.tsx TABS — a
|
||||
// tab could be added to the UI and never get an assertion, shipping
|
||||
// broken-but-untested. Read the actual tab ids from the DOM and assert
|
||||
// every live tab is either driven by this spec (TAB_IDS) or explicitly
|
||||
// excluded (EXPECTED_EXTRA_TABS). A genuinely-new tab fails LOUD.
|
||||
const liveTabIds = (
|
||||
await tablist.locator('[role="tab"][id^="tab-"]').evaluateAll((els) =>
|
||||
els.map((el) => el.id.replace(/^tab-/, "")),
|
||||
)
|
||||
).sort();
|
||||
const accountedFor = new Set<string>([
|
||||
...TAB_IDS,
|
||||
...EXPECTED_EXTRA_TABS,
|
||||
]);
|
||||
const unaccounted = liveTabIds.filter((id) => !accountedFor.has(id));
|
||||
expect(
|
||||
unaccounted,
|
||||
`SidePanel exposes tab(s) this spec neither drives nor excludes: ` +
|
||||
`${unaccounted.join(", ")}. Add them to TAB_IDS (and assert their ` +
|
||||
`content) or to EXPECTED_EXTRA_TABS with a reason.`,
|
||||
).toHaveLength(0);
|
||||
// And the inverse: every TAB_ID we intend to drive must actually exist
|
||||
// in the DOM, so a renamed/removed tab fails here instead of timing out
|
||||
// on a missing #tab-<id> selector with an opaque message.
|
||||
const missing = TAB_IDS.filter((id) => !liveTabIds.includes(id));
|
||||
expect(
|
||||
missing,
|
||||
`TAB_IDS references tab(s) not present in SidePanel: ${missing.join(
|
||||
", ",
|
||||
)} — the spec's tab list has drifted from SidePanel.tsx TABS.`,
|
||||
).toHaveLength(0);
|
||||
|
||||
for (const tabId of TAB_IDS) {
|
||||
await test.step(`tab: ${tabId}`, async () => {
|
||||
const tabButton = page.locator(`#tab-${tabId}`);
|
||||
// The TABS bar is `overflow-x-auto` (SidePanel.tsx:~tabs
|
||||
// wrapper) — tabs after position ~3 are clipped behind the
|
||||
// right-edge fade gradient on smaller viewports. Playwright's
|
||||
// `toBeVisible()` returns false for clipped elements, so a
|
||||
// bare visibility check fails on `skills` and later tabs in
|
||||
// CI. scrollIntoViewIfNeeded brings the button into view
|
||||
// before the visibility check, mirroring what SidePanel's own
|
||||
// keyboard handler does on arrow-key navigation.
|
||||
// The TABS bar is `overflow-x-auto` — tabs past position ~3 are
|
||||
// clipped behind the right-edge fade gradient on smaller
|
||||
// viewports. Playwright's toBeVisible() returns false for clipped
|
||||
// elements, so a bare visibility check fails on later tabs in CI.
|
||||
// scrollIntoViewIfNeeded brings the button into view before the
|
||||
// visibility check.
|
||||
await tabButton.scrollIntoViewIfNeeded({ timeout: 5_000 });
|
||||
await expect(
|
||||
tabButton,
|
||||
@@ -222,18 +425,34 @@ test.describe("staging canvas tabs", () => {
|
||||
).toBeVisible({ timeout: 5_000 });
|
||||
await tabButton.click();
|
||||
|
||||
const panel = page.locator(`#panel-${tabId}`);
|
||||
await expect(panel, `panel for ${tabId} never rendered`).toBeVisible({
|
||||
timeout: 10_000,
|
||||
});
|
||||
// Confirm the click actually activated this tab before asserting
|
||||
// its content — aria-selected flips on the active tab. This closes
|
||||
// a race where a slow click handler left the PREVIOUS tab's panel
|
||||
// mounted and we asserted the wrong panel's content. Built-in
|
||||
// retry, condition-based, no fixed wait.
|
||||
await expect(
|
||||
tabButton,
|
||||
`tab-${tabId} did not become the selected tab after click`,
|
||||
).toHaveAttribute("aria-selected", "true", { timeout: 5_000 });
|
||||
|
||||
// "Failed to load" toast = hard crash. Known SaaS-mode gaps
|
||||
// (Files empty, Terminal disconnected, Peers 401) surface as
|
||||
// in-panel content, not toasts.
|
||||
// Real-content assertion (the core hardening). See
|
||||
// assertPanelRendered: container visible + no ErrorBoundary + no
|
||||
// visible error alert + settled non-empty content for non-degraded
|
||||
// tabs. Replaces the old "panel visible + no Failed-to-load toast"
|
||||
// pair, which shipped empty/errored panels green.
|
||||
await assertPanelRendered(page, tabId);
|
||||
|
||||
// Belt to the braces: the original toast check stays. A global
|
||||
// "Failed to load" toast (role=alert outside the panel) is still a
|
||||
// crash signal worth catching even though the in-panel checks above
|
||||
// now do the heavy lifting.
|
||||
const errorToasts = await page
|
||||
.locator('[role="alert"]:has-text("Failed to load")')
|
||||
.count();
|
||||
expect(errorToasts, `tab ${tabId}: "Failed to load" toast`).toBe(0);
|
||||
expect(
|
||||
errorToasts,
|
||||
`tab ${tabId}: a global "Failed to load" toast is showing`,
|
||||
).toBe(0);
|
||||
|
||||
await page.screenshot({
|
||||
path: `test-results/staging-tab-${tabId}.png`,
|
||||
@@ -267,3 +486,56 @@ test.describe("staging canvas tabs", () => {
|
||||
).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
/*
|
||||
* PROMOTION-READINESS — staging canvas E2E → HARD merge-gate
|
||||
* ----------------------------------------------------------
|
||||
* NOW RELIABLE (deterministic; these no longer flap on timing):
|
||||
* - Every wait is condition-based (toBeVisible / toHaveAttribute /
|
||||
* expect.poll). There is NO fixed waitForTimeout / sleep in the spec;
|
||||
* the only setTimeout is the bounded poll-interval inside
|
||||
* staging-setup.ts waitFor(), which has a hard deadline.
|
||||
* - Tabs are asserted on REAL settled content (non-empty, non-spinner),
|
||||
* not just "container is visible" — an empty or stuck-loading panel now
|
||||
* fails instead of shipping green.
|
||||
* - The ErrorBoundary ("Something went wrong") is asserted absent at app
|
||||
* hydration AND per tab — a React subtree crash can no longer pass.
|
||||
* - Visible error alerts inside a panel fail non-degraded tabs (was a
|
||||
* weak [role=alert]:has-text("Failed to load")-only check that missed
|
||||
* both other error phrasings and role-less error divs).
|
||||
* - The driven tab list is parity-checked against the live DOM, so a new
|
||||
* SidePanel tab can't ship un-tested and a removed one fails loud.
|
||||
* - Click→activation is confirmed (aria-selected) before asserting the
|
||||
* panel, removing a wrong-panel race.
|
||||
* - The suite is fail-closed: CANVAS_E2E_STAGING=1 with no tenant state
|
||||
* hard-errors (never skips→green); CANVAS_E2E_STAGING unset cleanly
|
||||
* skips (operator did not request staging).
|
||||
*
|
||||
* STILL BLOCKS PROMOTION-TO-REQUIRED (do NOT flip continue-on-error here —
|
||||
* CTO-owned, RFC internal#219 §1):
|
||||
* - INFRA DEPENDENCY: each run provisions a real staging EC2 tenant
|
||||
* (12-20 min cold boot). Required-gate latency + AWS/Cloudflare/CP
|
||||
* availability become merge-blockers. A staging outage would freeze
|
||||
* main even though the code is fine — unacceptable for a required check
|
||||
* until staging has an SLA or this runs against a warm pre-provisioned
|
||||
* pool.
|
||||
* - SHARED-RESOURCE FLAKE SURFACE: TLS/DNS/ACME propagation on a shared
|
||||
* staging zone (staging-setup TLS_TIMEOUT_MS) is outside this repo's
|
||||
* control. Deterministic here ≠ deterministic upstream.
|
||||
* - SECRET DEPENDENCY: CP_STAGING_ADMIN_API_TOKEN must be present on the
|
||||
* runner. The workflow's skip-if-absent (core#2225) keeps a missing
|
||||
* secret from painting red — correct for non-gating, but a REQUIRED
|
||||
* check must instead guarantee the secret is always present, else it
|
||||
* skip-greens the very thing it is supposed to enforce.
|
||||
* - SINGLE-WORKSPACE COVERAGE: one hermes/platform_managed workspace that
|
||||
* does NOT boot an agent on staging (no CP LLM proxy env, workspace-
|
||||
* server #2162). Tabs render, but agent-dependent content paths (live
|
||||
* chat round-trip, traces from a real run) are not exercised.
|
||||
*
|
||||
* PROMOTION CHECKLIST (when CTO signs off on making this required):
|
||||
* 1. Warm pre-provisioned tenant pool OR a staging SLA bounding boot time.
|
||||
* 2. Guarantee CP_STAGING_ADMIN_API_TOKEN on the gating runner; turn the
|
||||
* skip-if-absent into a hard error for the required path.
|
||||
* 3. Decide whether agent-dependent tabs need a wired LLM proxy on the
|
||||
* staging tenant (covers chat/traces real content) before gating them.
|
||||
*/
|
||||
|
||||
@@ -7,6 +7,14 @@ export default defineConfig({
|
||||
fullyParallel: false,
|
||||
workers: 1,
|
||||
retries: 0,
|
||||
// Fail CLOSED when an explicit spec selection matches zero tests.
|
||||
// Playwright defaults this to true, so `playwright test e2e/chat-*.spec.ts`
|
||||
// would exit 0 (green) if those files were renamed/moved/deleted — a
|
||||
// false-green that would silently gut the e2e-chat gate after a refactor.
|
||||
// forbidOnly likewise stops a stray `test.only` from green-ing the suite
|
||||
// while skipping every other case.
|
||||
passWithNoTests: false,
|
||||
forbidOnly: !!process.env.CI,
|
||||
use: {
|
||||
baseURL: process.env.PLAYWRIGHT_BASE_URL || "http://localhost:3000",
|
||||
headless: true,
|
||||
|
||||
@@ -0,0 +1,225 @@
|
||||
# Fail-closed BYOK billing
|
||||
|
||||
**Status:** Proposal — CTO (王泓铭)-refined 2026-06-05.
|
||||
Owners: hongming (CTO)
|
||||
Base: molecule-core main @ `1955fdd0` (2026-06-04)
|
||||
|
||||
This RFC formalizes the **fail-closed BYOK billing** model: the contract that a
|
||||
workspace which intends to run an LLM on the tenant's own credential
|
||||
(bring-your-own-key) must be **rejected at the create API** if that credential is
|
||||
missing or dead — loudly, comprehensively, and synchronously — never created and
|
||||
then wedged at provision time, and never silently fell-through to a
|
||||
platform-billed default.
|
||||
|
||||
It writes down the four hard requirements, audits the current implementation
|
||||
against them (two are met today, one partial, one missing), and specifies the
|
||||
two gaps to close. The derive-from-model SSOT and the platform proxy boundary are
|
||||
**non-goals** here — this RFC is only about closing the credential-validation
|
||||
holes around an already-correct billing-mode resolver.
|
||||
|
||||
## TL;DR
|
||||
|
||||
```
|
||||
create API request (runtime, model[, billing override])
|
||||
│
|
||||
▼
|
||||
derive provider/mode from providers.yaml registry SSOT ── Req1 MET today
|
||||
(explicit operator-override column = escape hatch)
|
||||
│
|
||||
├─ mode == platform_managed ──────────────► create OK (proxy bills)
|
||||
│
|
||||
└─ mode == BYOK
|
||||
│
|
||||
├─ GAP A: credential PRESENT for the derived provider?
|
||||
│ (no → 422 MISSING_BYOK_CREDENTIAL, synchronous, loud)
|
||||
│
|
||||
├─ GAP B: credential VALID? (cheap authed provider call;
|
||||
│ 401/403 → 422 INVALID_BYOK_CREDENTIAL, loud)
|
||||
│
|
||||
▼
|
||||
create OK → provision (re-checks presence as defense-in-depth)
|
||||
```
|
||||
|
||||
## The model — four hard requirements
|
||||
|
||||
1. **Explicit selection drives the adapter.** Provider/mode is *selected*, never
|
||||
guessed. Today the selection is **derived deterministically** from the chosen
|
||||
model via the `providers.yaml` registry SSOT (`DeriveProvider(runtime, model,
|
||||
availableAuthEnv)`); the per-workspace operator-override column is the explicit
|
||||
escape hatch with top precedence. There is no heuristic fallback to a vendor.
|
||||
|
||||
2. **BYOK requires the credential, validated AT CREATION, fail-closed.** A
|
||||
BYOK workspace with no usable credential for the derived provider must be
|
||||
**REJECTED at the create API** with a clear, comprehensive error (which
|
||||
credential / env var, which provider, what to do). It must NOT be created
|
||||
(201) and then wedged late at provision.
|
||||
|
||||
3. **Preflight-validate the credential is VALID, not just present.** Presence is
|
||||
necessary but not sufficient: a present-but-dead token (revoked, expired,
|
||||
wrong-scope) must be caught by a *cheap authenticated provider call* (a
|
||||
models-list or a 1-token completion) and the workspace rejected on 401/403
|
||||
before it goes live.
|
||||
|
||||
4. **Fail LOUD, never silent.** Any missing / invalid / rejected credential
|
||||
errors loudly: comprehensive server logs (provider, env var, code, workspace)
|
||||
plus a user-visible structured reason. It must NEVER silently fall through to
|
||||
`platform_managed` or to any default that bills the platform for what the
|
||||
tenant declared as BYOK.
|
||||
|
||||
## Current-state audit
|
||||
|
||||
References are `path:line` at base `1955fdd0`. Workspace-server paths are relative
|
||||
to `workspace-server/`; the proxy/charge layer lives in the controlplane repo.
|
||||
|
||||
### Req1 — Explicit selection drives the adapter — **MET**
|
||||
|
||||
- `internal/handlers/llm_billing_mode.go:197-264` — `ResolveLLMBillingModeDerived`:
|
||||
precedence 1 = explicit workspace override column; precedence 2 = derive the
|
||||
provider from `(runtime, model)` via the embedded `providers.yaml` registry
|
||||
(`manifest.DeriveProvider`). A specific non-platform vendor → `byok`; a platform
|
||||
provider → `platform_managed`. No guessing.
|
||||
- `internal/handlers/workspace.go:420-503` — create-time validation already
|
||||
hard-rejects (422) an unregistered `(runtime, model)` pair
|
||||
(`UNREGISTERED_MODEL_FOR_RUNTIME`) and a model whose derived provider is absent
|
||||
from the catalog (`DERIVED_PROVIDER_NOT_IN_REGISTRY`), and requires an explicit
|
||||
model (`MODEL_REQUIRED`). The selection input is validated against the SSOT at
|
||||
the boundary.
|
||||
|
||||
### Req4 — Fail loud, never silent — **MET**
|
||||
|
||||
- Default-closed on ambiguity: `internal/handlers/llm_billing_mode.go:26-39` and
|
||||
`:217-252` — every ambiguous / error / no-id path resolves to
|
||||
`platform_managed` *with the error surfaced* (logged + returned on the
|
||||
resolution struct), never a silent BYOK→platform flip that bills the tenant
|
||||
by surprise.
|
||||
- Proxy is platform-managed-only: controlplane `internal/handlers/llm_proxy.go:94,
|
||||
158,223,664-748` — the platform LLM proxy only serves platform-managed traffic;
|
||||
BYOK never routes through it.
|
||||
- Charge layer never bills the platform for BYOK: controlplane
|
||||
`internal/credits/llm_billing.go:156-233` — BYOK usage is not charged to the
|
||||
platform ledger.
|
||||
|
||||
### Req2 — Credential validated at creation, fail-closed — **PARTIAL**
|
||||
|
||||
- The fail-closed BYOK check EXISTS but only at **provision** time:
|
||||
`internal/handlers/workspace_provision_shared.go:225-232` — if
|
||||
`ResolvedMode == BYOK && !HasUsableLLMCred`, the provisioner aborts with
|
||||
`MISSING_BYOK_CREDENTIAL` (molecule-core#1994).
|
||||
- Gap: a credential-less BYOK **create** returns **201** and only fails later at
|
||||
provision. That violates Req2's "rejected at the create API, not
|
||||
created-then-wedged" — the user gets a workspace row and a delayed, async
|
||||
failure instead of a synchronous 4xx.
|
||||
|
||||
### Req3 — Credential is VALID, not just present — **MISSING**
|
||||
|
||||
- `HasUsableLLMCred` is **presence-only**:
|
||||
`internal/handlers/workspace_provision.go:1138-1145` —
|
||||
`hasAnyPlatformManagedLLMKey` returns true if any auth-env key is a non-empty
|
||||
string. There is **no liveness probe anywhere** — a present-but-revoked token
|
||||
passes every gate and the workspace goes live, then wedges at first real LLM
|
||||
call (the failure Req3 exists to pull forward).
|
||||
|
||||
## Scope of work — the two gaps
|
||||
|
||||
### Gap A (Req2): BYOK credential-presence check at the CREATE boundary
|
||||
|
||||
Add a synchronous presence check inside the create handler
|
||||
(`(h *WorkspaceHandler) Create`, `internal/handlers/workspace.go:242`), after
|
||||
billing-mode resolution and the existing registry validation, **in addition to**
|
||||
the provision-time check (keep that as defense-in-depth — do not remove it).
|
||||
|
||||
- When the resolved mode is `byok`, resolve the derived provider's accepted auth
|
||||
env-var names from the `providers.yaml` registry (`auth_env` list, e.g.
|
||||
`[ANTHROPIC_API_KEY, ANTHROPIC_AUTH_TOKEN]` for `anthropic-api`) and confirm at
|
||||
least one is present (non-empty) for the workspace at any in-scope secret level.
|
||||
- On absence: **422** with a structured body:
|
||||
`code: MISSING_BYOK_CREDENTIAL`, plus `provider`, `missing_env` (the candidate
|
||||
env-var names), `billing_mode: byok`, and a human `error` that names the
|
||||
provider, the missing credential, and the remediation ("set
|
||||
`ANTHROPIC_API_KEY` as a workspace or org secret, then retry create"). Reuse the
|
||||
existing `formatMissingBYOKCredentialError` wording where possible so create and
|
||||
provision speak with one voice.
|
||||
- Log loudly with the same `MISSING_BYOK_CREDENTIAL` code the provisioner uses, so
|
||||
the two checkpoints are greppable as one class.
|
||||
|
||||
### Gap B (Req3): credential LIVENESS preflight
|
||||
|
||||
Add a minimal authenticated probe per provider, driven entirely by the
|
||||
`providers.yaml` SSOT — no hardcoded endpoints.
|
||||
|
||||
- Derive the probe target from the registry entry: `protocol`/`auth_mode`,
|
||||
`base_url_template` or `base_url_anthropic`, and the `auth_env` /
|
||||
`auth_token_env` that carries the secret. Make the cheapest authenticated call
|
||||
the surface offers (models-list where available, else a 1-token completion).
|
||||
- Fail-closed on **401/403**: reject the create with **422**
|
||||
`code: INVALID_BYOK_CREDENTIAL` (provider, env var, upstream status, remediation
|
||||
"the credential was found but the provider rejected it — rotate the key").
|
||||
- **Recommendation: probe at create** for fast feedback, with a **provision-time
|
||||
re-check** (the credential can be revoked between create and provision; the
|
||||
provisioner is the last gate before the workspace is live). The provision
|
||||
re-check upgrades `workspace_provision_shared.go:225-232` from presence-only to
|
||||
presence-and-liveness for BYOK.
|
||||
- The probe **must be cheap and time-bounded** (see Risks).
|
||||
- **OAuth-provider nuance:** registry entries with `auth_mode: oauth` and
|
||||
`base_url: null` (e.g. `anthropic-oauth`, codex chatgpt-subscription) have no
|
||||
HTTP surface the platform dials — the CLI talks to the vendor directly. For
|
||||
these, the liveness probe has no cheap server-side equivalent; scope Gap B's
|
||||
*active* probe to keyed providers with a non-null base URL and fall back to the
|
||||
presence check (Gap A) for OAuth modes. Do not block on inventing an OAuth
|
||||
liveness call in this RFC.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- **Not** changing the derive-from-model SSOT. Selection stays
|
||||
`providers.yaml` → `DeriveProvider`; the operator-override column stays the only
|
||||
escape hatch. No new heuristics.
|
||||
- **Not** routing BYOK through the platform proxy. The proxy stays
|
||||
platform-managed-only; this RFC adds validation around BYOK, it does not move
|
||||
BYOK onto a platform code path.
|
||||
- **Not** re-billing or changing the charge layer. BYOK stays off the platform
|
||||
ledger.
|
||||
- **Not** adding an OAuth-subscription liveness call (deferred — see Gap B
|
||||
nuance).
|
||||
|
||||
## Risks
|
||||
|
||||
- **Preflight latency on create.** An authenticated provider round-trip adds
|
||||
hundreds of ms to a few seconds to create. Mitigate with a hard, short timeout
|
||||
(target ≤ ~3s) and a clear, distinct error on timeout — a probe timeout must
|
||||
NOT be treated as "valid" (fail-closed) but must also be distinguishable from a
|
||||
real 401/403 so transient upstream blips are diagnosable. Consider whether a
|
||||
probe timeout should 422 (strict fail-closed) or surface a soft warning and
|
||||
defer to the provision-time re-check; default to fail-closed at create for the
|
||||
loud-feedback goal, with the provision re-check as the safety net.
|
||||
- **Provider rate-limits.** A models-list / 1-token probe consumes the tenant's
|
||||
quota and can be rate-limited (429). A 429 is NOT an auth failure — treat it as
|
||||
inconclusive (do not reject as `INVALID_BYOK_CREDENTIAL`), log it, and defer to
|
||||
the presence check + provision-time re-check rather than blocking create on a
|
||||
429.
|
||||
- **Provider-side flakiness.** 5xx from the provider is inconclusive, same
|
||||
handling as 429 — never silently pass, never hard-reject on a 5xx; log and
|
||||
defer.
|
||||
|
||||
## Test plan
|
||||
|
||||
1. **Gap A — create-time presence (unit + handler):**
|
||||
- BYOK-deriving `(runtime, model)` with NO credential in any scope → **422
|
||||
`MISSING_BYOK_CREDENTIAL`**, body names provider + missing env; no workspace
|
||||
row created.
|
||||
- Same with the credential present → create proceeds (mode `byok`).
|
||||
- `platform_managed`-deriving model with no tenant key → create proceeds
|
||||
(unchanged; proxy path).
|
||||
2. **Gap B — liveness (unit with a stubbed provider HTTP surface):**
|
||||
- Present-but-401/403 key → **422 `INVALID_BYOK_CREDENTIAL`**.
|
||||
- Valid key → create proceeds.
|
||||
- 429 / 5xx / timeout → inconclusive: create NOT rejected as invalid; logged;
|
||||
provision re-check still runs.
|
||||
- `auth_mode: oauth` + `base_url: null` provider → active probe skipped,
|
||||
presence check governs.
|
||||
3. **Provision defense-in-depth (existing + extended):**
|
||||
- Credential revoked between create and provision → provisioner aborts
|
||||
(presence today; liveness re-check after Gap B).
|
||||
- Existing `MISSING_BYOK_CREDENTIAL` provision-abort test stays green.
|
||||
4. **Req4 regression guard:** assert no path flips a BYOK selection to
|
||||
`platform_managed` silently — an absent/dead BYOK credential always produces a
|
||||
loud 4xx with a code, never a 201 that bills the platform.
|
||||
@@ -114,7 +114,7 @@ Opt-in pattern: when `idle_prompt` is non-empty in `config.yaml`, the workspace
|
||||
|
||||
Three Gin middleware classes gate server-side routes. Full contract in `docs/runbooks/admin-auth.md`.
|
||||
|
||||
- **`middleware.AdminAuth(db.DB)`** — strict bearer-only. Used for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak ops intel. Lazy-bootstrap fail-open when `HasAnyLiveTokenGlobal` returns 0.
|
||||
- **`middleware.AdminAuth(db.DB)`** — strict bearer-only and **fail-closed in every environment** (harden/no-fail-open-auth). Used for any route where a forged request could leak prompts/memory, create/mutate workspaces, or leak ops intel. The former lazy-bootstrap fail-open (pass when `HasAnyLiveTokenGlobal` returns 0) and the dev-mode escape hatch have both been removed — a fresh install must provision `ADMIN_TOKEN` to reach admin routes.
|
||||
- **`middleware.CanvasOrBearer(db.DB)`** — accepts a bearer token OR an Origin matching `CORS_ORIGINS`. Used **only** for cosmetic routes where a forged request has zero data/security impact. Currently only on `PUT /canvas/viewport`. Do not extend this to any route that leaks data or creates resources — see the runbook.
|
||||
- **`middleware.WorkspaceAuth(db.DB)`** — binds a bearer token to `:id`. Workspace A's token cannot hit workspace B's sub-routes. Used for the entire `/workspaces/:id/*` group except the A2A proxy (which has its own `CanCommunicate` layer).
|
||||
|
||||
|
||||
+9
-3
@@ -24,7 +24,7 @@ cd molecule-core
|
||||
|
||||
That single script:
|
||||
|
||||
1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs)
|
||||
1. Generates an `ADMIN_TOKEN` into `.env` (first run only — preserved on re-runs) and exports the matching `NEXT_PUBLIC_ADMIN_TOKEN` so the canvas authenticates with it. Auth is **fail-closed in every environment** (including local dev) — there is no dev-mode fail-open; the canvas reaches admin/workspace routes only because it sends this bearer.
|
||||
2. Brings up Postgres, Redis, Langfuse, ClickHouse, and Temporal via `infra/scripts/setup.sh`
|
||||
3. Populates the workspace template + plugin registry from `manifest.json`
|
||||
4. Builds and starts the platform on `http://localhost:8080`
|
||||
@@ -62,11 +62,17 @@ If you only want the raw compose flow:
|
||||
docker compose -f docker-compose.infra.yml up -d
|
||||
```
|
||||
|
||||
> **Auth is fail-closed even in local dev.** Pick any local admin token and
|
||||
> set it on *both* sides — the platform (`ADMIN_TOKEN`) and the canvas
|
||||
> (`NEXT_PUBLIC_ADMIN_TOKEN`, same value). Without it the canvas 401s on every
|
||||
> admin/workspace call. (`scripts/dev-start.sh` does this for you; the manual
|
||||
> steps below set it explicitly.)
|
||||
|
||||
### Step 3: Start the platform
|
||||
|
||||
```bash
|
||||
cd workspace-server
|
||||
go run ./cmd/server
|
||||
ADMIN_TOKEN=dev-local-admin-token MOLECULE_ENV=development go run ./cmd/server
|
||||
```
|
||||
|
||||
The control plane listens on `http://localhost:8080`.
|
||||
@@ -78,7 +84,7 @@ In a new terminal:
|
||||
```bash
|
||||
cd canvas
|
||||
npm install
|
||||
npm run dev
|
||||
NEXT_PUBLIC_ADMIN_TOKEN=dev-local-admin-token npm run dev # MUST match ADMIN_TOKEN above
|
||||
```
|
||||
|
||||
Open `http://localhost:3000`.
|
||||
|
||||
@@ -1,5 +1,29 @@
|
||||
# Admin Authentication Runbook
|
||||
|
||||
## Auth is fail-CLOSED in every environment — `ADMIN_TOKEN` is the bootstrap credential
|
||||
|
||||
Per the CTO "nothing should be fail-open" directive, **every** auth path on the
|
||||
workspace-server fails closed — there is no dev-mode / zero-token / DB-outage
|
||||
hatch that grants access. This includes:
|
||||
|
||||
- `AdminAuth` and `WorkspaceAuth` (admin + per-workspace routes),
|
||||
- `CanvasOrBearer` (the cosmetic `PUT /canvas/viewport` route), and
|
||||
- `validateDiscoveryCaller` (`/registry/:id/peers`, `/registry/discover/:id`).
|
||||
|
||||
Consequence for **bootstrap**: a brand-new self-hosted / dev install has **no
|
||||
DB-backed tokens yet**, and there is no longer a fail-open that lets the first
|
||||
request through. The **only** way to reach admin routes (and to mint the first
|
||||
workspace token via `POST /admin/workspaces/:id/tokens`) is to set `ADMIN_TOKEN`
|
||||
in the platform environment and present it as the bearer. This is the "local
|
||||
mimics production" principle: there is no zero-config bootstrap.
|
||||
|
||||
- **Local dev:** `scripts/dev-start.sh` provisions a deterministic
|
||||
`ADMIN_TOKEN` into `.env` (and exports the matching `NEXT_PUBLIC_ADMIN_TOKEN`
|
||||
so the canvas authenticates with it). See `docs/quickstart.md`.
|
||||
- **Self-hosted / SaaS:** set `ADMIN_TOKEN` to a strong random secret
|
||||
(`openssl rand -base64 32`) in the platform env and bake the matching
|
||||
`NEXT_PUBLIC_ADMIN_TOKEN` into the canvas bundle.
|
||||
|
||||
## Required: set `MOLECULE_ENV` in all non-dev environments
|
||||
|
||||
```bash
|
||||
@@ -7,8 +31,10 @@
|
||||
MOLECULE_ENV=production
|
||||
```
|
||||
|
||||
This matches the production tenant default and disables development-only
|
||||
shortcuts. Staging and production smoke tests should use the real user/API
|
||||
This matches the production tenant default. NOTE: `MOLECULE_ENV` no longer gates
|
||||
any auth decision — it only drives NON-security local-dev conveniences (loopback
|
||||
bind, relaxed rate limit). Setting it to `dev`/`development` does **not** relax
|
||||
authentication. Staging and production smoke tests should use the real user/API
|
||||
workflow: create a workspace, then mint a one-time displayed workspace bearer
|
||||
with `POST /admin/workspaces/:id/tokens`.
|
||||
|
||||
@@ -23,5 +49,7 @@ The platform uses `ADMIN_TOKEN` as the bearer credential for admin-gated endpoin
|
||||
| `POST /org/import` | `Authorization: Bearer <ADMIN_TOKEN>` |
|
||||
| `POST /admin/workspaces/:id/tokens` | `Authorization: Bearer <ADMIN_TOKEN>`; plaintext token returned once |
|
||||
|
||||
Missing or invalid `ADMIN_TOKEN` → AdminAuth fails open in dev mode (no token set), or
|
||||
returns 401 in production mode (token set but invalid).
|
||||
Missing or invalid bearer → **401 in every environment** (fail-closed; no
|
||||
dev-mode fail-open). If the auth datastore is unreachable, auth-gated routes
|
||||
return **503** (`platform_unavailable`) — an availability tradeoff that grants no
|
||||
access — rather than allowing the request through.
|
||||
|
||||
+3
-1
@@ -28,7 +28,9 @@
|
||||
{"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "main"},
|
||||
{"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"},
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"}
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"},
|
||||
{"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||
|
||||
@@ -121,6 +121,92 @@ python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
|
||||
|
||||
---
|
||||
|
||||
## 6. Fail-closed CI integrity — no fail-open gates (MERGE-BLOCKING)
|
||||
|
||||
**Rule:** No CI workflow, CI script, or test check may **FAIL OPEN** — i.e. it
|
||||
must never report GREEN (exit 0, skip, warn-and-continue, `|| true`, or any
|
||||
"return success") when it could **not actually verify its invariant**. A check
|
||||
that cannot verify MUST **fail loud** (`::error::` annotation **and** a nonzero
|
||||
exit) and **fail closed** (treat inability-to-verify as **FAILURE**, never as a
|
||||
pass). An unverifiable check is a red check, full stop.
|
||||
|
||||
This is the same family of bug as the no-flakes rule (§ *No flakes*): a green
|
||||
that isn't real. A flake is a green/red that flips for an unnamed reason; a
|
||||
fail-open gate is a green that was never earned. Both let unverified code reach
|
||||
`main`, and both are merge-blocking.
|
||||
|
||||
### Applies to
|
||||
|
||||
Required / hard gates on **protected contexts**: pushes to `main`, internal
|
||||
protected branches, and **same-repo** PRs (`pull_request_target`). On these
|
||||
contexts the *cause* of an unverifiable run is **irrelevant** — every one of the
|
||||
following MUST fail closed:
|
||||
|
||||
- auth failure (401 / 403),
|
||||
- missing token or identity,
|
||||
- under-scoped credential,
|
||||
- unreachable dependency (network, Infisical, control-plane, registry),
|
||||
- a required test file that is absent or collects zero tests,
|
||||
- any transient error the check cannot prove was benign.
|
||||
|
||||
"I couldn't check" is reported and scored exactly like "the check failed." A
|
||||
gate that can be silently defanged by removing a secret is not a gate.
|
||||
|
||||
### The one allowed exception — explicit trust-boundary split
|
||||
|
||||
Legitimate degradation is permitted **only** where the secret genuinely cannot
|
||||
exist — e.g. **fork PRs**, which by design have no access to repo secrets. Such
|
||||
degradation is allowed **only** when it is:
|
||||
|
||||
1. gated behind an **explicit** fork / advisory branch in the workflow logic
|
||||
(an intentional trust-boundary split, not an incidental `if: secrets...`),
|
||||
2. **clearly marked advisory** in its name and output, and
|
||||
3. **NOT counted as a passing REQUIRED context** — it may inform, it may not
|
||||
satisfy the gate.
|
||||
|
||||
Silent degradation that satisfies a required gate is **forbidden**. If a fork PR
|
||||
needs the real check, it must run via a maintainer-triggered same-repo path
|
||||
(where the secret exists and the check therefore fails closed), not by quietly
|
||||
passing the required context with no verification.
|
||||
|
||||
### Auth-failure vs. genuine-absence — do not conflate
|
||||
|
||||
Distinguish the two so a real finding is never masked and a masked finding is
|
||||
never mistaken for real:
|
||||
|
||||
- **`403` (or 401) on a protected context → fail closed.** You could not verify;
|
||||
that is a check failure, not a finding about the resource.
|
||||
- **A real `404` from a read made *with a valid, sufficiently-scoped token* →
|
||||
the real finding.** The resource is genuinely absent; report it as such.
|
||||
|
||||
A `403` reported as "resource not found" is itself a fail-open bug.
|
||||
|
||||
### Required practice
|
||||
|
||||
Every gate that depends on a token, an identity, or an external read MUST ship
|
||||
with a test or workflow-lint covering the **absent-identity / unauthorized /
|
||||
missing-file path** that asserts the gate **FAILS** (not skips, not passes).
|
||||
Add or update that coverage in the **same PR** that adds or changes the gate.
|
||||
A gate without a proven failure path is not yet a gate.
|
||||
|
||||
### Violations seen in this codebase (all merge-blocking if reintroduced)
|
||||
|
||||
- **serving-e2e** reporting vacuously GREEN when the Infisical identity is
|
||||
absent (no per-(provider × auth) completion was actually exercised).
|
||||
- **branch-protection / BP-drift lints** returning `0` on a `403` instead of
|
||||
failing closed on the unverifiable response.
|
||||
- **verify-template-models** run without `-strict`, so a drift it could not
|
||||
confirm passed silently.
|
||||
- A **referenced-but-absent pytest file** that collects zero tests and reports
|
||||
green — silent pass with no assertions executed.
|
||||
|
||||
Each of these is a fail-open gate and is a merge blocker until it fails loud and
|
||||
closed on protected contexts. See also the production fail-closed defaults in
|
||||
`runbooks/sop-production-cicd.md` (*Production Defaults*), which apply the same
|
||||
principle to deploy-time gates.
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
|
||||
|
||||
@@ -8,26 +8,39 @@ against the latest `main`.
|
||||
|
||||
## Queue Contract
|
||||
|
||||
Add the `merge-queue` label to an open PR when it is ready to merge.
|
||||
**Auto-discovery (opt-OUT, default).** You do NOT need to label a PR. The bot
|
||||
auto-discovers every open same-repo PR and merges any that meets the bar. The
|
||||
`merge-queue` label is now optional metadata, not a gate. This removed the
|
||||
historical autonomy gap: agent Gitea tokens lack `write:issue` (labels are
|
||||
issue-scoped), so agents could never self-label and ready PRs stalled.
|
||||
|
||||
To keep a PR OUT of autonomous merging, add an opt-OUT label:
|
||||
`merge-queue-hold`, `do-not-auto-merge`, or `wip`. Draft PRs are also skipped.
|
||||
|
||||
The bot processes one PR per tick:
|
||||
|
||||
1. Confirms `main` is green.
|
||||
2. Selects the oldest open PR carrying `merge-queue`.
|
||||
3. Skips PRs with `merge-queue-hold`.
|
||||
4. Rejects fork PRs because the queue may only update same-repo branches.
|
||||
5. If the PR head does not contain current `main`, calls Gitea's
|
||||
1. Confirms `main`'s branch-protection-required push contexts are green.
|
||||
2. Selects the oldest open same-repo PR that is NOT opt-out-labeled and NOT a
|
||||
draft (auto-discovery). With `AUTO_DISCOVER=0` it falls back to legacy
|
||||
opt-IN: only PRs carrying `merge-queue` are considered.
|
||||
3. Rejects fork PRs because the queue may only update same-repo branches.
|
||||
4. If the PR head does not contain current `main`, calls Gitea's
|
||||
`/pulls/{n}/update?style=merge` endpoint and waits for CI on the new head.
|
||||
6. Merges only after the current PR head has required contexts green:
|
||||
- `CI / all-required (pull_request)`
|
||||
- `sop-checklist / all-items-acked (pull_request)`
|
||||
5. Merges only when, on the PR's CURRENT head sha:
|
||||
- `>= required_approvals` distinct genuine official `APPROVED` reviews from
|
||||
the recognised reviewer set (read from branch protection; default 2),
|
||||
- no open official `REQUEST_CHANGES`,
|
||||
- every branch-protection-required status context is green, and
|
||||
- the PR is `mergeable` (Gitea returns `True`; `None`/`False` = wait).
|
||||
|
||||
The workflow is serialized with `concurrency`, so two queued PRs cannot be
|
||||
The merge bar is unchanged by auto-discovery — only WHICH PRs are considered
|
||||
changes. The workflow is serialized with `concurrency`, so two PRs cannot be
|
||||
merged against the same observed `main`.
|
||||
|
||||
## Operator Commands
|
||||
|
||||
Queue a PR:
|
||||
Queue a PR (optional — auto-discovery already considers every ready PR; the
|
||||
label is just visible metadata):
|
||||
|
||||
```bash
|
||||
curl -fsS -X POST \
|
||||
@@ -37,7 +50,8 @@ curl -fsS -X POST \
|
||||
-d '{"labels":["merge-queue"]}'
|
||||
```
|
||||
|
||||
Temporarily hold a queued PR:
|
||||
Keep a PR OUT of autonomous merging (opt-OUT — use `merge-queue-hold`,
|
||||
`do-not-auto-merge`, or `wip`):
|
||||
|
||||
```bash
|
||||
curl -fsS -X POST \
|
||||
@@ -56,9 +70,11 @@ REPO=molecule-ai/molecule-core \
|
||||
WATCH_BRANCH=main \
|
||||
QUEUE_LABEL=merge-queue \
|
||||
HOLD_LABEL=merge-queue-hold \
|
||||
AUTO_DISCOVER=1 \
|
||||
OPT_OUT_LABELS=do-not-auto-merge,wip \
|
||||
REVIEWER_SET=agent-reviewer,agent-researcher,agent-reviewer-cr2 \
|
||||
UPDATE_STYLE=merge \
|
||||
REQUIRED_CONTEXTS='CI / all-required (pull_request),sop-checklist / all-items-acked (pull_request)' \
|
||||
python3 .gitea/scripts/gitea-merge-queue.py
|
||||
python3 .gitea/scripts/gitea-merge-queue.py --dry-run
|
||||
```
|
||||
|
||||
Dry run:
|
||||
|
||||
@@ -35,6 +35,7 @@ Every production CI/CD PR must include concrete answers for:
|
||||
- Verification: how production state is proven after deployment.
|
||||
- Logging: proof that CI logs do not contain raw production runtime, SSM, or secret-adjacent output.
|
||||
- Rollback: the exact command, variable, or workflow to return to a known-good tag/digest.
|
||||
- No fail-open gates: required checks fail loud + closed on protected contexts (no skip/`|| true`/`403`-as-pass). See `runbooks/dev-sop.md` § *Fail-closed CI integrity*.
|
||||
|
||||
## Human Review
|
||||
|
||||
|
||||
@@ -50,8 +50,22 @@ check_category() {
|
||||
repo=$(echo "$MANIFEST_JSON" | jq -r ".${category}[$i].repo")
|
||||
TOTAL=$((TOTAL + 1))
|
||||
|
||||
# Check repo existence via Gitea API (public endpoint, no auth needed)
|
||||
http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "${GITEA_API}/${repo}" 2>/dev/null || true)
|
||||
# Check repo existence via Gitea API. Many manifest repos are PRIVATE
|
||||
# (e.g. the workspace templates), so an *unauthenticated* GET returns
|
||||
# 404 even when the repo exists — indistinguishable from a genuinely
|
||||
# missing repo. We therefore authenticate with the same token
|
||||
# clone-manifest.sh uses (MOLECULE_GITEA_TOKEN). A 404 *with* a valid
|
||||
# token still means the repo is truly missing, which is what we want
|
||||
# to catch. If the token is unset (local dev), fall back to an
|
||||
# unauthenticated request — private repos will then 404, so run the
|
||||
# check in CI where the token is present.
|
||||
if [ -n "${MOLECULE_GITEA_TOKEN:-}" ]; then
|
||||
http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 \
|
||||
-H "Authorization: token ${MOLECULE_GITEA_TOKEN}" \
|
||||
"${GITEA_API}/${repo}" 2>/dev/null || true)
|
||||
else
|
||||
http_code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10 "${GITEA_API}/${repo}" 2>/dev/null || true)
|
||||
fi
|
||||
|
||||
if [ "$http_code" != "200" ]; then
|
||||
echo "::error::manifest.json ${category} entry '${name}' → repo '${repo}' returned HTTP ${http_code} (expected 200). Delete the manifest entry BEFORE deleting the repo." >&2
|
||||
|
||||
+51
-22
@@ -46,46 +46,67 @@ cleanup() {
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# ─────────────────────────────────────────────── 1. dev-mode auth posture
|
||||
|
||||
# The AdminAuth middleware closes its fail-open the moment the first
|
||||
# workspace token lands in the DB — at which point /workspaces and
|
||||
# other admin routes 401 unless the caller has either ADMIN_TOKEN or
|
||||
# the dev-mode escape hatch. The canvas at localhost:3000 has no
|
||||
# bearer token to send, so without one of those two paths it can't
|
||||
# call admin endpoints after a workspace exists.
|
||||
#
|
||||
# For local dev the right posture is the dev-mode escape hatch:
|
||||
# SECURITY (harden/no-fail-open-auth): the workspace-server auth chain is
|
||||
# now fail-CLOSED in EVERY environment, dev included. There is NO dev-mode
|
||||
# fail-open escape hatch anymore — AdminAuth / WorkspaceAuth / discovery all
|
||||
# require a real credential. So local dev must AUTHENTICATE, not run open.
|
||||
#
|
||||
# MOLECULE_ENV=development AND ADMIN_TOKEN unset
|
||||
# The clean way to keep the canvas working locally is to provision a
|
||||
# deterministic ADMIN_TOKEN and hand the matching NEXT_PUBLIC_ADMIN_TOKEN to
|
||||
# the canvas bundle. The canvas already attaches `Authorization: Bearer
|
||||
# $NEXT_PUBLIC_ADMIN_TOKEN` on every platform call (canvas/src/lib/api.ts),
|
||||
# and next.config.ts warns if the pair is half-set. We set BOTH here.
|
||||
#
|
||||
# That makes middleware.isDevModeFailOpen() return true and lets the
|
||||
# canvas keep working without a bearer. Setting ADMIN_TOKEN here
|
||||
# would BREAK the canvas (it has no way to read that token in dev).
|
||||
# MOLECULE_ENV=development — dev conveniences (loopback bind, relaxed
|
||||
# rate limit). NOT an auth lever.
|
||||
# ADMIN_TOKEN=<dev value> — server-side bearer AdminAuth/WorkspaceAuth
|
||||
# enforce (Tier-2b). Real credential.
|
||||
# NEXT_PUBLIC_ADMIN_TOKEN — same value, baked into the canvas bundle so
|
||||
# the browser sends the matching bearer.
|
||||
#
|
||||
# For SaaS the platform is provisioned with ADMIN_TOKEN set AND
|
||||
# MOLECULE_ENV=production — either one closes the hatch. So the dev
|
||||
# mode signal here is safe (it's only active when both other knobs
|
||||
# are absent).
|
||||
# For SaaS the platform is provisioned with a random ADMIN_TOKEN + the
|
||||
# canvas image baked with the matching NEXT_PUBLIC_ADMIN_TOKEN, plus
|
||||
# MOLECULE_ENV=production. Same shape, stronger secret.
|
||||
if [ -f "$ENV_FILE" ] && grep -q '^MOLECULE_ENV=' "$ENV_FILE"; then
|
||||
echo "==> Reusing MOLECULE_ENV from existing .env"
|
||||
else
|
||||
echo "==> Setting MOLECULE_ENV=development in .env (dev-mode auth hatch)"
|
||||
echo "==> Setting MOLECULE_ENV=development in .env"
|
||||
{
|
||||
if [ -f "$ENV_FILE" ]; then
|
||||
cat "$ENV_FILE"
|
||||
echo ""
|
||||
fi
|
||||
echo "# Generated by scripts/dev-start.sh on $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
echo "# Local-dev auth posture: dev-mode fail-open lets the canvas at"
|
||||
echo "# localhost:3000 call admin endpoints without a bearer token."
|
||||
echo "# DO NOT set ADMIN_TOKEN here in dev — it would close the hatch"
|
||||
echo "# and the canvas would 401 on every admin call."
|
||||
echo "# Local-dev conveniences (loopback bind, relaxed rate limit)."
|
||||
echo "# Auth is fail-closed even in dev — see ADMIN_TOKEN below."
|
||||
echo "MOLECULE_ENV=development"
|
||||
} > "$ENV_FILE.tmp"
|
||||
mv "$ENV_FILE.tmp" "$ENV_FILE"
|
||||
echo " Saved to $ENV_FILE"
|
||||
fi
|
||||
|
||||
# Provision a deterministic dev ADMIN_TOKEN (idempotent — preserved across
|
||||
# re-runs). This is the credential the canvas authenticates with locally; it
|
||||
# is NOT a secret (it only guards your own localhost stack), so a fixed,
|
||||
# well-known value is fine and keeps re-runs reproducible.
|
||||
DEV_ADMIN_TOKEN="dev-local-admin-token"
|
||||
if [ -f "$ENV_FILE" ] && grep -q '^ADMIN_TOKEN=' "$ENV_FILE"; then
|
||||
echo "==> Reusing ADMIN_TOKEN from existing .env"
|
||||
else
|
||||
echo "==> Provisioning dev ADMIN_TOKEN in .env (fail-closed auth, authenticated canvas)"
|
||||
{
|
||||
cat "$ENV_FILE"
|
||||
echo ""
|
||||
echo "# Dev ADMIN_TOKEN — the canvas authenticates with this locally."
|
||||
echo "# Auth is fail-closed; without a matching bearer the canvas 401s."
|
||||
echo "# Fixed value is fine: it only guards your localhost stack."
|
||||
echo "ADMIN_TOKEN=$DEV_ADMIN_TOKEN"
|
||||
} > "$ENV_FILE.tmp"
|
||||
mv "$ENV_FILE.tmp" "$ENV_FILE"
|
||||
echo " Saved to $ENV_FILE"
|
||||
fi
|
||||
|
||||
# Source .env so the platform inherits ADMIN_TOKEN (and anything else
|
||||
# the user has added — e.g. ANTHROPIC_API_KEY for skipping the canvas
|
||||
# Secrets UI). `set -a` exports every assignment in the sourced file
|
||||
@@ -95,6 +116,12 @@ set -a
|
||||
. "$ENV_FILE"
|
||||
set +a
|
||||
|
||||
# The canvas reads NEXT_PUBLIC_ADMIN_TOKEN at build/dev time and attaches it
|
||||
# as the bearer on every platform call. Mirror the server-side ADMIN_TOKEN
|
||||
# into it so the matched-pair guard in canvas/next.config.ts is satisfied and
|
||||
# the browser authenticates. Exported for the `npm run dev` child below.
|
||||
export NEXT_PUBLIC_ADMIN_TOKEN="$ADMIN_TOKEN"
|
||||
|
||||
# ─────────────────────────────────────────────── 2. infra + templates
|
||||
|
||||
# Use setup.sh (not raw docker-compose) so the template registry gets
|
||||
@@ -195,7 +222,9 @@ cat <<EOF
|
||||
Molecule AI dev environment ready
|
||||
|
||||
Canvas: http://localhost:3000
|
||||
Platform: http://localhost:8080
|
||||
Platform: http://localhost:8080 (bound to loopback in dev)
|
||||
Auth: fail-closed — canvas authenticates with the dev ADMIN_TOKEN
|
||||
(ADMIN_TOKEN + NEXT_PUBLIC_ADMIN_TOKEN, see .env)
|
||||
Logs: /tmp/molecule-platform.log
|
||||
/tmp/molecule-canvas.log
|
||||
|
||||
|
||||
+49
-3
@@ -17,6 +17,33 @@ e2e_extract_token() {
|
||||
python3 "$(dirname "${BASH_SOURCE[0]}")/_extract_token.py"
|
||||
}
|
||||
|
||||
# Populate a curl-args array with the platform admin bearer, IF one is set.
|
||||
#
|
||||
# AdminAuth (workspace-server/internal/middleware/wsauth_middleware.go:161)
|
||||
# fail-opens ONLY while ADMIN_TOKEN is unset AND no workspace token exists yet
|
||||
# (devmode.go:50). The e2e-api CI job now sets ADMIN_TOKEN on the platform and
|
||||
# exports the matching MOLECULE_ADMIN_TOKEN here, which flips fail-open OFF — so
|
||||
# every admin-gated route (GET/POST/DELETE /workspaces, /events, /bundles,
|
||||
# /org/import, …) now requires the EXACT ADMIN_TOKEN as bearer (Tier-2b rejects
|
||||
# workspace bearers, wsauth_middleware.go:250). Helpers that hit admin routes
|
||||
# (e2e_cleanup_all_workspaces, e2e_delete_workspace's default path) must send it.
|
||||
#
|
||||
# Guarded if-set so a bootstrap/dev platform with no admin token (fail-open)
|
||||
# still works with zero auth. Mirrors e2e_mint_workspace_token's admin_auth.
|
||||
#
|
||||
# Usage:
|
||||
# local admin_auth=(); e2e_admin_auth_args admin_auth
|
||||
# curl -s "$BASE/workspaces" ${admin_auth[@]+"${admin_auth[@]}"}
|
||||
e2e_admin_auth_args() {
|
||||
local _outname="$1"
|
||||
local _bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
if [ -n "$_bearer" ]; then
|
||||
eval "$_outname=(-H \"Authorization: Bearer \$_bearer\")"
|
||||
else
|
||||
eval "$_outname=()"
|
||||
fi
|
||||
}
|
||||
|
||||
# Delete every workspace currently on the platform. Use at the top of a
|
||||
# script so count-based assertions are reproducible across runs.
|
||||
# Mint a fresh workspace auth token via the real admin endpoint.
|
||||
@@ -53,19 +80,38 @@ e2e_delete_workspace() {
|
||||
if [ -z "$wid" ]; then
|
||||
return 0
|
||||
fi
|
||||
# DELETE /workspaces/:id and GET /workspaces/:id-for-name are both behind
|
||||
# AdminAuth (router.go:155 GET single is public, but List/Delete are gated at
|
||||
# router.go:165-167). Callers that already pass a per-workspace bearer (e.g.
|
||||
# test_api.sh's NEW_TOKEN) authenticate themselves; the cleanup-trap callers
|
||||
# in poll-mode/notify/priority pass NO curl args and rely on this fallback to
|
||||
# the platform admin bearer so the DELETE doesn't 401 once ADMIN_TOKEN is set.
|
||||
if [ "${#curl_args[@]}" -eq 0 ]; then
|
||||
e2e_admin_auth_args curl_args
|
||||
fi
|
||||
# ${curl_args[@]+"…"} guard: under `set -u` an empty array expands to an
|
||||
# "unbound variable" error on bash <4.4 (macOS 3.2, some Linux). This form
|
||||
# expands to nothing when the array is empty. Callers from the priority-
|
||||
# runtimes EXIT trap pass no extra curl args, so the array IS empty there —
|
||||
# without the guard the trap aborts non-zero AFTER the gate already passed,
|
||||
# turning a validated run RED. (Same idiom already used for CREATED_WSIDS.)
|
||||
if [ -z "$name" ]; then
|
||||
name=$(curl -s "$BASE/workspaces/$wid" "${curl_args[@]}" | python3 -c "import json,sys
|
||||
name=$(curl -s "$BASE/workspaces/$wid" ${curl_args[@]+"${curl_args[@]}"} | python3 -c "import json,sys
|
||||
try:
|
||||
print(json.load(sys.stdin).get('name',''))
|
||||
except Exception:
|
||||
pass" 2>/dev/null || true)
|
||||
fi
|
||||
curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true" \
|
||||
-H "X-Confirm-Name: $name" "${curl_args[@]}" > /dev/null || true
|
||||
-H "X-Confirm-Name: $name" ${curl_args[@]+"${curl_args[@]}"} > /dev/null || true
|
||||
}
|
||||
|
||||
e2e_cleanup_all_workspaces() {
|
||||
curl -s "$BASE/workspaces" | python3 -c "import json,sys
|
||||
# GET /workspaces (list) is AdminAuth-gated (router.go:165). Send the platform
|
||||
# admin bearer if one is set so the list doesn't 401 → empty → no cleanup.
|
||||
local _admin_auth=()
|
||||
e2e_admin_auth_args _admin_auth
|
||||
curl -s "$BASE/workspaces" ${_admin_auth[@]+"${_admin_auth[@]}"} | python3 -c "import json,sys
|
||||
try:
|
||||
[print(f\"{w.get('id','')}\\t{w.get('name','')}\") for w in json.load(sys.stdin)]
|
||||
except Exception:
|
||||
|
||||
+53
-16
@@ -11,10 +11,10 @@
|
||||
# default + 401, see PR #1714.)
|
||||
#
|
||||
# claude-code → auth-aware:
|
||||
# E2E_MINIMAX_API_KEY → "minimax:MiniMax-M2.7"
|
||||
# (colon-namespaced BYOK id; bare
|
||||
# "MiniMax-M2" 400s on a deploy-skewed
|
||||
# staging registry — #2263)
|
||||
# E2E_MINIMAX_API_KEY → "MiniMax-M2.7"
|
||||
# (BARE registered BYOK id — see the
|
||||
# claude-code dispatch arm below for
|
||||
# why bare, not the colon form)
|
||||
# E2E_ANTHROPIC_API_KEY → "claude-sonnet-4-6"
|
||||
# otherwise → "sonnet"
|
||||
#
|
||||
@@ -83,25 +83,62 @@ pick_model_slug() {
|
||||
fi
|
||||
case "$runtime" in
|
||||
hermes) printf 'openai/gpt-4o' ;;
|
||||
claude-code)
|
||||
# seo-agent is a claude-code-adapter template VARIANT selected by
|
||||
# template name (template="seo-agent"), not a distinct registry runtime
|
||||
# (it is absent from manifest.json + runtime_registry.go). Its config.yaml
|
||||
# declares `runtime: claude-code` and copies the claude-code `providers:`
|
||||
# block (providers.yaml:21 "The same block is copy-pasted into the seo-agent
|
||||
# template"), so its model dispatch is IDENTICAL to claude-code's: the BARE
|
||||
# registered MiniMax BYOK id (the staging-default key path), else direct
|
||||
# Anthropic, else the OAuth `sonnet` alias. Sharing the claude-code branch
|
||||
# keeps the SSOT one place — a seo-agent run is just a claude-code run
|
||||
# behind a productized template skin, and (because the runtime resolves to
|
||||
# claude-code server-side) its model must be a *claude-code-registered* form.
|
||||
claude-code|seo-agent)
|
||||
if [ -n "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
# Namespaced (colon) BYOK id, not bare "MiniMax-M2" (#2263 deploy-skew):
|
||||
# bare ids can lag the deployed staging ws-server's compiled registry,
|
||||
# so workspace-create's validateRegisteredModelForRuntime 400s the bare
|
||||
# form on an older image. The colon-namespaced `minimax:MiniMax-M2.7`
|
||||
# resolves the same way the proven-working sibling `moonshot/kimi-k2.6`
|
||||
# does. It stays in the BYOK `minimax` arm (providers.yaml:851), so
|
||||
# DeriveProvider -> provider_selection=minimax (BYOK) and the #1994
|
||||
# byok-not-platform guard (test_staging_full_saas.sh:1000) still passes —
|
||||
# unlike the slash/platform form `minimax/MiniMax-M2.7`, which resolves
|
||||
# to provider=platform and would trip that guard.
|
||||
printf 'minimax:MiniMax-M2.7'
|
||||
# BARE registered BYOK id `MiniMax-M2.7`, NOT the colon form
|
||||
# `minimax:MiniMax-M2.7`. On the claude-code runtime the three MiniMax
|
||||
# spellings have three DISTINCT, intentional outcomes (provider-registry
|
||||
# SSOT, internal#718; pinned by workspace-server/internal/providers/
|
||||
# derive_provider_matrix_test.go, the #2263/#2274 "colon-vs-slash-vs-bare
|
||||
# triple"):
|
||||
# * bare "MiniMax-M2.7" -> provider=minimax (BYOK, MINIMAX_API_KEY)
|
||||
# * slash "minimax/MiniMax-M2.7" -> provider=platform (CP proxy bills)
|
||||
# * colon "minimax:MiniMax-M2.7" -> UNREGISTERED 422 (the claude-code
|
||||
# adapter CANNOT strip the `minimax:` prefix, so the id is not a
|
||||
# registered model for runtime claude-code; create-validation,
|
||||
# internal#718, rejects it)
|
||||
# The bare form is registered in the claude-code `minimax` arm
|
||||
# (registry_gen.go:88 Models=[MiniMax-M2,MiniMax-M2.7,
|
||||
# MiniMax-M2.7-highspeed,MiniMax-M3]) and derives provider=minimax (BYOK
|
||||
# via MINIMAX_API_KEY), so it satisfies the #1994 byok-not-platform guard
|
||||
# (test_staging_full_saas.sh) AND passes create-validation — unlike the
|
||||
# colon form, which 422'd "5/11 Provisioning parent workspace" with
|
||||
# UNREGISTERED_MODEL_FOR_RUNTIME on real staging (job 295075).
|
||||
# NOTE: the colon form IS the correct BYOK-minimax id on openclaw/hermes
|
||||
# (those adapters DO strip `minimax:` — matrix test), but this dispatch
|
||||
# arm only emits for claude-code/seo-agent, where bare is the right form.
|
||||
printf 'MiniMax-M2.7'
|
||||
elif [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
|
||||
printf 'claude-sonnet-4-6'
|
||||
else
|
||||
printf 'sonnet'
|
||||
fi
|
||||
;;
|
||||
# google-adk: Gemini via two distinct provider arms in providers.yaml
|
||||
# runtimes.google-adk:
|
||||
# * platform arm → `platform:gemini-2.5-pro` (keyless Vertex via the CP
|
||||
# LLM proxy + server-side WIF mint; the org-compliant PROD path). This
|
||||
# id is selected via E2E_LLM_PATH=platform above, NOT here.
|
||||
# * google arm (AI Studio BYOK) → bare `gemini-2.5-pro` with the tenant's
|
||||
# own GOOGLE_API_KEY. This is the staging-exercisable path (no WIF
|
||||
# provisioning needed) and is what this branch selects.
|
||||
# The workflow may further override with E2E_MODEL_SLUG=google_genai:gemini-2.5-pro
|
||||
# (the adapter's provider:model spelling) — E2E_MODEL_SLUG wins at the top
|
||||
# of this function, so both forms are supported.
|
||||
google-adk)
|
||||
printf 'gemini-2.5-pro'
|
||||
;;
|
||||
*) printf 'openai/gpt-4o' ;; # safest fallback (matches hermes)
|
||||
esac
|
||||
}
|
||||
|
||||
+61
-41
@@ -15,18 +15,27 @@ SUM_AUTH=()
|
||||
ECHO_URL="https://example.com/echo-agent"
|
||||
SUM_URL="https://example.com/summarizer-agent"
|
||||
|
||||
# AdminAuth-gated calls need a bearer token once any workspace token
|
||||
# exists in the DB. ADMIN_TOKEN is populated after the first workspace
|
||||
# create + real token mint. acurl = "authenticated curl".
|
||||
ADMIN_TOKEN=""
|
||||
# AdminAuth-gated calls (GET/POST/DELETE /workspaces, /events, /bundles)
|
||||
# require the platform admin bearer once ADMIN_TOKEN is set on the server.
|
||||
# Tier-2b (wsauth_middleware.go:250) REJECTS workspace bearer tokens on admin
|
||||
# routes when ADMIN_TOKEN is set, so admin calls MUST send the exact ADMIN_TOKEN
|
||||
# value — which the e2e-api CI job exports here as MOLECULE_ADMIN_TOKEN. acurl =
|
||||
# "admin curl": it always sends the platform admin bearer (if one is set).
|
||||
#
|
||||
# Guarded if-set: a fresh self-hosted/dev platform with no ADMIN_TOKEN fail-opens
|
||||
# (devmode.go:50), so sending no bearer still works there.
|
||||
ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
ADMIN_AUTH=()
|
||||
[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
|
||||
acurl() {
|
||||
if [ -n "$ADMIN_TOKEN" ]; then
|
||||
curl -s -H "Authorization: Bearer $ADMIN_TOKEN" "$@"
|
||||
else
|
||||
curl -s "$@"
|
||||
fi
|
||||
curl -s ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} "$@"
|
||||
}
|
||||
|
||||
# WORKSPACE_TOKEN holds a per-workspace bearer for the WorkspaceAuth-gated
|
||||
# routes (PATCH /workspaces/:id, /activity, …). It is set after the first
|
||||
# create+mint and is NOT interchangeable with the admin bearer.
|
||||
WORKSPACE_TOKEN=""
|
||||
|
||||
# Pre-test cleanup: remove any workspaces left over from prior runs so
|
||||
# count-based assertions ("empty", "count=2") are reproducible.
|
||||
e2e_cleanup_all_workspaces
|
||||
@@ -57,19 +66,22 @@ check "GET /health" '"status":"ok"' "$R"
|
||||
R=$(acurl "$BASE/workspaces")
|
||||
check "GET /workspaces (empty)" '[]' "$R"
|
||||
|
||||
# Test 3: Create workspace A (AdminAuth fail-open — no tokens exist yet)
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
|
||||
# Test 3: Create workspace A. POST /workspaces is AdminAuth-gated (router.go:166);
|
||||
# send the admin bearer (acurl). On a fail-open dev platform acurl sends nothing
|
||||
# and the create still works.
|
||||
R=$(acurl -X POST "$BASE/workspaces" -H "Content-Type: application/json" -d '{"name":"Echo Agent","tier":1,"runtime":"external","external":true}')
|
||||
check "POST /workspaces (create echo)" '"status":"awaiting_agent"' "$R"
|
||||
ECHO_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['id'])")
|
||||
|
||||
ADMIN_TOKEN=$(echo "$R" | e2e_extract_token)
|
||||
if [ -z "$ADMIN_TOKEN" ]; then
|
||||
ADMIN_TOKEN=$(e2e_mint_workspace_token "$ECHO_ID" 2>/dev/null || echo "")
|
||||
# Per-workspace token for Echo, for the WorkspaceAuth-gated routes below.
|
||||
WORKSPACE_TOKEN=$(echo "$R" | e2e_extract_token)
|
||||
if [ -z "$WORKSPACE_TOKEN" ]; then
|
||||
WORKSPACE_TOKEN=$(e2e_mint_workspace_token "$ECHO_ID" 2>/dev/null || echo "")
|
||||
fi
|
||||
if [ -n "$ADMIN_TOKEN" ]; then
|
||||
echo " (acquired admin token: ${ADMIN_TOKEN:0:8}...)"
|
||||
if [ -n "$WORKSPACE_TOKEN" ]; then
|
||||
echo " (acquired Echo workspace token: ${WORKSPACE_TOKEN:0:8}...)"
|
||||
else
|
||||
echo " WARNING: no admin token acquired — subsequent AdminAuth calls will fail"
|
||||
echo " WARNING: no Echo workspace token acquired — WorkspaceAuth calls will fail"
|
||||
fi
|
||||
|
||||
# Test 4: Create workspace B (needs bearer — tokens now exist in DB)
|
||||
@@ -98,7 +110,7 @@ check "GET /workspaces/:id (agent_card null)" '"agent_card":null' "$R"
|
||||
# Test 7: Register echo — use workspace-specific token (from real admin
|
||||
# endpoint), not the admin token. C18 requires a token issued TO THIS
|
||||
# workspace, not just any valid token.
|
||||
ECHO_WS_TOKEN="$ADMIN_TOKEN"
|
||||
ECHO_WS_TOKEN="$WORKSPACE_TOKEN"
|
||||
[ -n "$ECHO_WS_TOKEN" ] && ECHO_AUTH=(-H "Authorization: Bearer $ECHO_WS_TOKEN")
|
||||
R=$(curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" \
|
||||
"${ECHO_AUTH[@]}" \
|
||||
@@ -159,26 +171,29 @@ R=$(curl -s -X POST "$BASE/registry/check-access" -H "Content-Type: application/
|
||||
-d "{\"caller_id\":\"$ECHO_ID\",\"target_id\":\"$SUM_ID\"}")
|
||||
check "POST /registry/check-access (same-org allowed)" '"allowed":true' "$R"
|
||||
|
||||
# Test 15: PATCH workspace (update position)
|
||||
R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
|
||||
# Test 15: PATCH workspace (update position). PATCH /workspaces/:id is
|
||||
# WorkspaceAuth-gated (router.go:227 — #680 IDOR fix), so it needs Echo's OWN
|
||||
# bearer, NOT the admin bearer (WorkspaceAuth rejects the admin token).
|
||||
R=$(curl -s "${ECHO_AUTH[@]}" -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"x":100,"y":200}')
|
||||
check "PATCH /workspaces/:id (position)" '"status":"updated"' "$R"
|
||||
|
||||
R=$(acurl "$BASE/workspaces/$ECHO_ID")
|
||||
check "Position saved (x=100)" '"x":100' "$R"
|
||||
check "Position saved (y=200)" '"y":200' "$R"
|
||||
|
||||
# Test 16: PATCH workspace (update name)
|
||||
R=$(acurl -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"name":"Echo Agent v2"}')
|
||||
# Test 16: PATCH workspace (update name) — WorkspaceAuth-gated; use Echo's token.
|
||||
R=$(curl -s "${ECHO_AUTH[@]}" -X PATCH "$BASE/workspaces/$ECHO_ID" -H "Content-Type: application/json" -d '{"name":"Echo Agent v2"}')
|
||||
check "PATCH /workspaces/:id (name)" '"status":"updated"' "$R"
|
||||
|
||||
R=$(acurl "$BASE/workspaces/$ECHO_ID")
|
||||
check "Name updated" '"name":"Echo Agent v2"' "$R"
|
||||
|
||||
# Test 17: Events (#165 / PR #167 — now admin-gated, bearer required)
|
||||
R=$(acurl "$BASE/events" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
# Test 17: Events (#165 / PR #167 — admin-gated; the admin bearer is required,
|
||||
# and Tier-2b rejects a workspace bearer here, so use acurl's admin token alone).
|
||||
R=$(acurl "$BASE/events")
|
||||
check "GET /events (has events)" 'WORKSPACE_ONLINE' "$R"
|
||||
|
||||
R=$(acurl "$BASE/events/$ECHO_ID" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
R=$(acurl "$BASE/events/$ECHO_ID")
|
||||
check "GET /events/:id (has events for echo)" 'WORKSPACE_ONLINE' "$R"
|
||||
|
||||
# Test 18: Update card
|
||||
@@ -295,7 +310,7 @@ check "active_tasks cleared" '"active_tasks":0' "$R"
|
||||
# endpoint is admin-auth gated and keeps the full record, so operators
|
||||
# can still see task progress from the dashboard without exposing it
|
||||
# over the public per-workspace GET.
|
||||
R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
R=$(acurl "$BASE/workspaces")
|
||||
check "current_task in list response" '"current_task"' "$R"
|
||||
|
||||
# Test 21: Delete
|
||||
@@ -306,18 +321,20 @@ check "current_task in list response" '"current_task"' "$R"
|
||||
# Delete the CHILD (Summarizer) here instead: a child delete does NOT cascade
|
||||
# upward, so the parent Echo survives and count=1 holds. The bundle round-trip
|
||||
# below needs Summarizer's exported config, so capture it BEFORE this delete.
|
||||
BUNDLE=$(curl -s "$BASE/bundles/export/$SUM_ID" -H "Authorization: Bearer $SUM_TOKEN")
|
||||
# GET /bundles/export/:id is admin-gated (router.go:741) — use the admin bearer.
|
||||
BUNDLE=$(acurl "$BASE/bundles/export/$SUM_ID")
|
||||
check "GET /bundles/export/:id" '"name":"Summarizer Agent"' "$BUNDLE"
|
||||
ORIG_NAME=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['name'])")
|
||||
ORIG_TIER=$(echo "$BUNDLE" | python3 -c "import sys,json; print(json.load(sys.stdin)['tier'])")
|
||||
|
||||
# DELETE /workspaces/:id is admin-gated (router.go:167). X-Confirm-Name must
|
||||
# still match the workspace name even with admin auth.
|
||||
R=$(acurl -X DELETE "$BASE/workspaces/$SUM_ID?confirm=true" \
|
||||
-H "Authorization: Bearer $SUM_TOKEN" \
|
||||
-H "X-Confirm-Name: Summarizer Agent")
|
||||
check "DELETE /workspaces/:id" '"status":"removed"' "$R"
|
||||
|
||||
# Parent Echo must survive a child delete — list as Echo and expect count=1.
|
||||
R=$(curl -s "$BASE/workspaces" -H "Authorization: Bearer $ECHO_TOKEN")
|
||||
# Parent Echo must survive a child delete — list (admin) and expect count=1.
|
||||
R=$(acurl "$BASE/workspaces")
|
||||
COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
|
||||
check "List after delete (count=1)" "1" "$COUNT"
|
||||
|
||||
@@ -328,21 +345,21 @@ check "List after delete (count=1)" "1" "$COUNT"
|
||||
echo ""
|
||||
echo "--- Bundle Round-Trip Test ---"
|
||||
|
||||
# Delete the remaining parent Echo — use ECHO_TOKEN (per-workspace) for
|
||||
# WorkspaceAuth and ADMIN_TOKEN for the AdminAuth layer.
|
||||
# Delete the remaining parent Echo — DELETE is admin-gated (router.go:167);
|
||||
# the platform admin bearer (acurl) authorizes it. X-Confirm-Name still required.
|
||||
R=$(acurl -X DELETE "$BASE/workspaces/$ECHO_ID?confirm=true" \
|
||||
-H "Authorization: Bearer $ECHO_TOKEN" \
|
||||
-H "X-Confirm-Name: Echo Agent v2")
|
||||
check "Delete before re-import" '"status":"removed"' "$R"
|
||||
|
||||
# After deleting both workspaces, all per-workspace tokens are revoked.
|
||||
# Clear the now-revoked admin bearer so acurl can use fresh-install fail-open.
|
||||
ADMIN_TOKEN=""
|
||||
# Both workspaces are now deleted. The platform-level ADMIN_TOKEN env is still
|
||||
# set, so admin routes still require the admin bearer (fail-open does NOT
|
||||
# re-engage just because the token table emptied) — keep using acurl's bearer.
|
||||
R=$(acurl "$BASE/workspaces")
|
||||
COUNT=$(echo "$R" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))")
|
||||
check "All workspaces deleted (count=0)" "0" "$COUNT"
|
||||
|
||||
# Re-import from the exported bundle (AdminAuth fail-open — no live tokens)
|
||||
# Re-import from the exported bundle. POST /bundles/import is admin-gated
|
||||
# (router.go:742) — acurl sends the admin bearer.
|
||||
R=$(acurl -X POST "$BASE/bundles/import" -H "Content-Type: application/json" -d "$BUNDLE")
|
||||
check "POST /bundles/import" '"status":"provisioning"' "$R"
|
||||
NEW_ID=$(echo "$R" | python3 -c "import sys,json; print(json.load(sys.stdin)['workspace_id'])")
|
||||
@@ -398,12 +415,15 @@ check "Register re-imported workspace" '"status":"registered"' "$R"
|
||||
REG_NEW_TOKEN=$(echo "$R" | e2e_extract_token)
|
||||
[ -n "$REG_NEW_TOKEN" ] && NEW_TOKEN="$REG_NEW_TOKEN"
|
||||
|
||||
# Re-export and verify agent_card survives the round-trip (#165 / PR #167 — admin-gated)
|
||||
REBUNDLE=$(curl -s "$BASE/bundles/export/$NEW_ID" -H "Authorization: Bearer $NEW_TOKEN")
|
||||
# Re-export and verify agent_card survives the round-trip (#165 / PR #167 —
|
||||
# GET /bundles/export/:id is admin-gated; use the admin bearer).
|
||||
REBUNDLE=$(acurl "$BASE/bundles/export/$NEW_ID")
|
||||
check "Re-exported bundle has agent_card" '"agent_card"' "$REBUNDLE"
|
||||
|
||||
# Clean up — use the token just issued to the re-imported workspace
|
||||
e2e_delete_workspace "$NEW_ID" "$ORIG_NAME" -H "Authorization: Bearer $NEW_TOKEN"
|
||||
# Clean up — DELETE /workspaces/:id is admin-gated; pass no per-call auth so
|
||||
# e2e_delete_workspace falls back to the platform admin bearer (a workspace
|
||||
# bearer would be rejected by Tier-2b).
|
||||
e2e_delete_workspace "$NEW_ID" "$ORIG_NAME"
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
|
||||
Executable
+468
@@ -0,0 +1,468 @@
|
||||
#!/usr/bin/env bash
|
||||
# GATING E2E for the social-channels outbound + discover + data-prune paths
|
||||
# (core#2332 P1.10). Closes two coverage gaps that were previously only
|
||||
# unit-mocked, so a regression in any of them goes RED in the required
|
||||
# `E2E API Smoke Test` lane instead of slipping through:
|
||||
#
|
||||
# (1) Channel SEND end-to-end. Every adapter's SendMessage was only ever
|
||||
# asserted by unit tests that reconstruct the payload by hand and POST
|
||||
# it themselves (see internal/channels/lark_test.go's "we can't change
|
||||
# the prefix const" comment) — nothing proved that a message submitted
|
||||
# through the LIVE platform API actually serializes and POSTs to a
|
||||
# provider endpoint. Here we stand up a local mock-upstream, point a
|
||||
# Slack Incoming-Webhook channel at it, send via
|
||||
# POST /channels/:id/send, and assert the MOCK RECEIVED the correctly
|
||||
# serialized {"text":"..."} body. Real serialize+POST, real HTTP stack,
|
||||
# no real Slack account.
|
||||
#
|
||||
# (2) Channel DISCOVER (POST /channels/discover). Had no test at all. We
|
||||
# point the Telegram discover path at a mock Bot API that serves
|
||||
# getMe + getUpdates and assert the discovered bot username + chat
|
||||
# round-trip back through the handler.
|
||||
#
|
||||
# (3) Workspace data-prune (RFC #734). The user-requested permanent delete
|
||||
# with ?purge=true prunes a workspace's durable child data (channels,
|
||||
# secrets, config, …). We create prunable data on a target workspace
|
||||
# AND a sibling, purge the target, then assert the target's child rows
|
||||
# are GONE while the sibling's SURVIVE.
|
||||
#
|
||||
# ── Test seam (production-inert) ────────────────────────────────────────
|
||||
# Adapters pin their outbound host to the real vendor (hooks.slack.com /
|
||||
# api.telegram.org). Two env-gated overrides — set ONLY by this lane, never
|
||||
# in any prod/staging deploy — let the live send/discover path target a
|
||||
# local mock so the round-trip is provable in CI:
|
||||
#
|
||||
# MOLECULE_CHANNELS_TEST_WEBHOOK_BASE (Slack webhook accept-prefix)
|
||||
# MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE (Telegram Bot API base)
|
||||
#
|
||||
# These must be present in the PLATFORM process env (the workflow exports
|
||||
# them via $GITHUB_ENV before "Start platform"), pointing at the fixed
|
||||
# loopback ports this script binds its mocks on. If they are absent the
|
||||
# platform rejects the mock URLs; under E2E_REQUIRE_LIVE=1 that is a hard
|
||||
# RED (the seam regressed / the workflow wiring broke), otherwise a LOUD
|
||||
# SKIP for ad-hoc local runs that didn't export them.
|
||||
#
|
||||
# NEVER fail-open: a missing assertion target fails the script.
|
||||
#
|
||||
# Required env (defaults shown):
|
||||
# BASE http://127.0.0.1:8080
|
||||
# MOLECULE_ADMIN_TOKEN (admin bearer; matches the platform's ADMIN_TOKEN)
|
||||
# E2E_CHANNELS_WEBHOOK_PORT 18099 (mock Slack webhook upstream)
|
||||
# E2E_CHANNELS_TELEGRAM_PORT 18098 (mock Telegram Bot API upstream)
|
||||
# E2E_REQUIRE_LIVE 0 (1 = seam-absent is RED, not skip)
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source "$(dirname "$0")/_lib.sh" # sets BASE default + admin/token helpers
|
||||
|
||||
WEBHOOK_PORT="${E2E_CHANNELS_WEBHOOK_PORT:-18099}"
|
||||
TELEGRAM_PORT="${E2E_CHANNELS_TELEGRAM_PORT:-18098}"
|
||||
REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
|
||||
|
||||
# The base prefixes the PLATFORM must have been started with. We assert the
|
||||
# adapter accepted a URL under these — proving the platform's env matches.
|
||||
WEBHOOK_BASE="http://127.0.0.1:${WEBHOOK_PORT}/"
|
||||
TELEGRAM_BASE="http://127.0.0.1:${TELEGRAM_PORT}"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
WORK_DIR="$(mktemp -d)"
|
||||
WS_TARGET=""
|
||||
WS_SIBLING=""
|
||||
WS_TARGET_TOK=""
|
||||
WS_SIBLING_TOK=""
|
||||
MOCK_PID=""
|
||||
|
||||
ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
ADMIN_AUTH=()
|
||||
[ -n "$ADMIN_BEARER" ] && ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
|
||||
|
||||
pass() { echo "PASS: $1"; PASS=$((PASS + 1)); }
|
||||
fail() { echo "FAIL: $1"; [ -n "${2:-}" ] && echo " $2"; FAIL=$((FAIL + 1)); }
|
||||
|
||||
# loud_skip records a SKIP and exits according to E2E_REQUIRE_LIVE. NEVER
|
||||
# silently passes — it either hard-fails (require-live) or exits 0 with a
|
||||
# loud banner (ad-hoc local). Mirrors the require-live gate pattern used by
|
||||
# test_priority_runtimes_e2e.sh.
|
||||
loud_skip() {
|
||||
local reason="$1"
|
||||
echo
|
||||
echo "============================================================"
|
||||
if [ "$REQUIRE_LIVE" = "1" ]; then
|
||||
echo "E2E_REQUIRE_LIVE=1 but channels e2e seam is unavailable:"
|
||||
echo " $reason"
|
||||
echo "This is a HARD FAILURE — the platform was not started with the"
|
||||
echo "channels test seam env (MOLECULE_CHANNELS_TEST_WEBHOOK_BASE /"
|
||||
echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE) on the fixed loopback"
|
||||
echo "ports, or the seam regressed. Fix the workflow wiring or the seam."
|
||||
echo "============================================================"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
echo "SKIP (loud): $reason"
|
||||
echo "Set MOLECULE_CHANNELS_TEST_WEBHOOK_BASE=$WEBHOOK_BASE and"
|
||||
echo "MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE=$TELEGRAM_BASE in the"
|
||||
echo "PLATFORM env before starting it, then re-run. (CI sets these.)"
|
||||
echo "============================================================"
|
||||
cleanup
|
||||
exit 0
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
set +e
|
||||
if [ -n "$MOCK_PID" ]; then
|
||||
kill "$MOCK_PID" 2>/dev/null
|
||||
wait "$MOCK_PID" 2>/dev/null
|
||||
fi
|
||||
# Hard-purge any workspaces we created so repeat runs are deterministic.
|
||||
for pair in "$WS_TARGET|$WS_TARGET_TOK|e2e-chan-target" \
|
||||
"$WS_SIBLING|$WS_SIBLING_TOK|e2e-chan-sibling"; do
|
||||
local wid tok name
|
||||
wid="${pair%%|*}"; pair="${pair#*|}"
|
||||
tok="${pair%%|*}"; name="${pair#*|}"
|
||||
[ -z "$wid" ] && continue
|
||||
local auth=("${ADMIN_AUTH[@]}")
|
||||
[ -n "$tok" ] && auth=(-H "Authorization: Bearer $tok")
|
||||
curl -s -X DELETE "$BASE/workspaces/$wid?confirm=true&purge=true" \
|
||||
-H "X-Confirm-Name: $name" "${auth[@]}" >/dev/null 2>&1
|
||||
done
|
||||
rm -rf "$WORK_DIR" 2>/dev/null
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# ── mock upstream ───────────────────────────────────────────────────────
|
||||
# One Python process serves BOTH mocks (different ports). It records the
|
||||
# Slack webhook request body to $WORK_DIR/slack_body.json and answers the
|
||||
# Telegram getMe/getUpdates calls with a deterministic bot+chat fixture.
|
||||
start_mock() {
|
||||
cat > "$WORK_DIR/mock.py" <<'PY'
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
|
||||
WORK_DIR = os.environ["MOCK_WORK_DIR"]
|
||||
WEBHOOK_PORT = int(os.environ["MOCK_WEBHOOK_PORT"])
|
||||
TELEGRAM_PORT = int(os.environ["MOCK_TELEGRAM_PORT"])
|
||||
|
||||
BOT_USERNAME = "e2e_mock_bot"
|
||||
CHAT_ID = -1009876543210
|
||||
CHAT_NAME = "E2E Mock Group"
|
||||
|
||||
|
||||
class SlackHandler(BaseHTTPRequestHandler):
|
||||
def log_message(self, *a): # silence
|
||||
pass
|
||||
|
||||
def do_POST(self):
|
||||
n = int(self.headers.get("Content-Length", "0") or "0")
|
||||
body = self.rfile.read(n)
|
||||
# Persist EXACTLY what the live Slack send path POSTed so the bash
|
||||
# side can assert the serialized payload.
|
||||
with open(os.path.join(WORK_DIR, "slack_body.json"), "wb") as f:
|
||||
f.write(body)
|
||||
with open(os.path.join(WORK_DIR, "slack_meta.json"), "w") as f:
|
||||
json.dump({"path": self.path,
|
||||
"content_type": self.headers.get("Content-Type", "")}, f)
|
||||
# Real Slack Incoming Webhooks reply 200 "ok".
|
||||
self.send_response(200)
|
||||
self.end_headers()
|
||||
self.wfile.write(b"ok")
|
||||
|
||||
|
||||
class TelegramHandler(BaseHTTPRequestHandler):
|
||||
def log_message(self, *a):
|
||||
pass
|
||||
|
||||
def _send(self, obj):
|
||||
payload = json.dumps(obj).encode()
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(payload)))
|
||||
self.end_headers()
|
||||
self.wfile.write(payload)
|
||||
|
||||
def _route(self):
|
||||
# tgbotapi calls <base>/bot<token>/<method>
|
||||
method = self.path.rsplit("/", 1)[-1]
|
||||
if method == "getMe":
|
||||
return self._send({"ok": True, "result": {
|
||||
"id": 4242, "is_bot": True, "first_name": "E2E Mock",
|
||||
"username": BOT_USERNAME, "can_read_all_group_messages": True}})
|
||||
if method == "setMyCommands":
|
||||
return self._send({"ok": True, "result": True})
|
||||
if method == "deleteWebhook":
|
||||
return self._send({"ok": True, "result": True})
|
||||
if method == "getUpdates":
|
||||
# One my_chat_member update so the bot "discovers" a group.
|
||||
return self._send({"ok": True, "result": [{
|
||||
"update_id": 1,
|
||||
"my_chat_member": {
|
||||
"chat": {"id": CHAT_ID, "title": CHAT_NAME, "type": "supergroup"},
|
||||
"from": {"id": 1, "is_bot": False, "first_name": "Op"},
|
||||
"date": 0,
|
||||
"old_chat_member": {"user": {"id": 4242, "is_bot": True,
|
||||
"first_name": "E2E Mock"},
|
||||
"status": "left"},
|
||||
"new_chat_member": {"user": {"id": 4242, "is_bot": True,
|
||||
"first_name": "E2E Mock"},
|
||||
"status": "member"},
|
||||
}}]})
|
||||
# Default OK for any other bot method tgbotapi may probe.
|
||||
return self._send({"ok": True, "result": True})
|
||||
|
||||
def do_POST(self):
|
||||
n = int(self.headers.get("Content-Length", "0") or "0")
|
||||
if n:
|
||||
self.rfile.read(n)
|
||||
self._route()
|
||||
|
||||
def do_GET(self):
|
||||
self._route()
|
||||
|
||||
|
||||
def serve(port, handler):
|
||||
ThreadingHTTPServer(("127.0.0.1", port), handler).serve_forever()
|
||||
|
||||
|
||||
t = threading.Thread(target=serve, args=(TELEGRAM_PORT, TelegramHandler), daemon=True)
|
||||
t.start()
|
||||
serve(WEBHOOK_PORT, SlackHandler)
|
||||
PY
|
||||
MOCK_WORK_DIR="$WORK_DIR" MOCK_WEBHOOK_PORT="$WEBHOOK_PORT" \
|
||||
MOCK_TELEGRAM_PORT="$TELEGRAM_PORT" \
|
||||
python3 "$WORK_DIR/mock.py" &
|
||||
MOCK_PID=$!
|
||||
# Wait for both ports to accept connections (fail loudly if they never do).
|
||||
local up=0
|
||||
for _ in $(seq 1 50); do
|
||||
if curl -s -o /dev/null "http://127.0.0.1:${WEBHOOK_PORT}/" \
|
||||
&& curl -s -o /dev/null "http://127.0.0.1:${TELEGRAM_PORT}/botX/getMe"; then
|
||||
up=1; break
|
||||
fi
|
||||
sleep 0.1
|
||||
done
|
||||
if [ "$up" != "1" ]; then
|
||||
echo "FATAL: mock upstream did not come up on ports $WEBHOOK_PORT/$TELEGRAM_PORT" >&2
|
||||
cleanup
|
||||
exit 2
|
||||
fi
|
||||
}
|
||||
|
||||
json_field() { python3 -c "import sys,json; print(json.load(sys.stdin).get('$1',''))"; }
|
||||
|
||||
create_external_ws() {
|
||||
local name="$1" resp wid
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" "${ADMIN_AUTH[@]}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"$name\",\"runtime\":\"external\",\"external\":true,\"tier\":1}")
|
||||
wid=$(printf '%s' "$resp" | json_field id)
|
||||
if [ -z "$wid" ]; then
|
||||
echo "FATAL: could not create workspace $name: $resp" >&2
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
local tok
|
||||
tok=$(printf '%s' "$resp" | e2e_extract_token)
|
||||
[ -z "$tok" ] && tok=$(e2e_mint_workspace_token "$wid" 2>/dev/null || true)
|
||||
printf '%s\t%s\n' "$wid" "$tok"
|
||||
}
|
||||
|
||||
# ════════════════════════════════════════════════════════════════════════
|
||||
echo "=== Channels + data-prune E2E (core#2332 P1.10) ==="
|
||||
echo "BASE=$BASE webhook_mock=$WEBHOOK_BASE telegram_mock=$TELEGRAM_BASE"
|
||||
|
||||
if ! curl -sf "$BASE/health" >/dev/null 2>&1; then
|
||||
echo "FATAL: platform not reachable at $BASE/health" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
start_mock
|
||||
|
||||
# ── workspaces ──────────────────────────────────────────────────────────
|
||||
IFS=$'\t' read -r WS_TARGET WS_TARGET_TOK < <(create_external_ws "e2e-chan-target-$$")
|
||||
IFS=$'\t' read -r WS_SIBLING WS_SIBLING_TOK < <(create_external_ws "e2e-chan-sibling-$$")
|
||||
echo "target=$WS_TARGET sibling=$WS_SIBLING"
|
||||
|
||||
WS_AUTH=("${ADMIN_AUTH[@]}")
|
||||
[ -n "$WS_TARGET_TOK" ] && WS_AUTH=(-H "Authorization: Bearer $WS_TARGET_TOK")
|
||||
SIB_AUTH=("${ADMIN_AUTH[@]}")
|
||||
[ -n "$WS_SIBLING_TOK" ] && SIB_AUTH=(-H "Authorization: Bearer $WS_SIBLING_TOK")
|
||||
|
||||
# ── (1) SEND end-to-end via a Slack Incoming-Webhook channel ────────────
|
||||
echo
|
||||
echo "--- (1) channel SEND → mock upstream receives serialized payload ---"
|
||||
|
||||
# Create a slack channel whose webhook_url points at our mock. If the
|
||||
# platform wasn't started with the webhook test-base, ValidateConfig
|
||||
# rejects this URL → loud_skip / RED. chat_id is required by SendOutbound.
|
||||
SLACK_CFG=$(python3 -c "import json,sys; print(json.dumps({
|
||||
'webhook_url': sys.argv[1] + 'services/T000/B000/e2e',
|
||||
'chat_id': 'mock-chat'}))" "$WEBHOOK_BASE")
|
||||
CREATE=$(curl -s -X POST "$BASE/workspaces/$WS_TARGET/channels" "${WS_AUTH[@]}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"channel_type\":\"slack\",\"config\":$SLACK_CFG,\"enabled\":true}")
|
||||
CH_ID=$(printf '%s' "$CREATE" | json_field id)
|
||||
if [ -z "$CH_ID" ]; then
|
||||
case "$CREATE" in
|
||||
*"invalid channel config"*)
|
||||
loud_skip "platform rejected mock webhook_url (MOLECULE_CHANNELS_TEST_WEBHOOK_BASE not set on platform): $CREATE" ;;
|
||||
*)
|
||||
fail "create slack channel" "$CREATE" ;;
|
||||
esac
|
||||
else
|
||||
pass "create slack channel pointed at mock upstream (id=$CH_ID)"
|
||||
|
||||
SEND_TEXT="hello from e2e $$"
|
||||
# Send route: wsAuth.POST /workspaces/:id/channels/:channelId/send (the
|
||||
# handler keys off :channelId; :id scopes the workspace bearer).
|
||||
SEND=$(curl -s -w $'\n%{http_code}' -X POST \
|
||||
"$BASE/workspaces/$WS_TARGET/channels/$CH_ID/send" "${WS_AUTH[@]}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"text\":\"$SEND_TEXT\"}")
|
||||
SEND_CODE=$(printf '%s' "$SEND" | tail -n1)
|
||||
if [ "$SEND_CODE" = "200" ]; then
|
||||
pass "POST /channels/:id/send returned 200"
|
||||
else
|
||||
fail "POST /channels/:id/send" "code=$SEND_CODE body=$(printf '%s' "$SEND" | sed '$d')"
|
||||
fi
|
||||
|
||||
# Give the async-free SendOutbound a beat to land at the mock.
|
||||
RECEIVED=""
|
||||
for _ in $(seq 1 30); do
|
||||
if [ -s "$WORK_DIR/slack_body.json" ]; then RECEIVED=1; break; fi
|
||||
sleep 0.1
|
||||
done
|
||||
if [ -n "$RECEIVED" ]; then
|
||||
pass "mock upstream RECEIVED an outbound POST"
|
||||
GOT_TEXT=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('text',''))" \
|
||||
"$WORK_DIR/slack_body.json" 2>/dev/null || true)
|
||||
if [ "$GOT_TEXT" = "$SEND_TEXT" ]; then
|
||||
pass "mock received correctly-serialized {\"text\":...} payload (text matches end-to-end)"
|
||||
else
|
||||
fail "serialized payload mismatch" "want=[$SEND_TEXT] got=[$GOT_TEXT] raw=$(cat "$WORK_DIR/slack_body.json")"
|
||||
fi
|
||||
else
|
||||
fail "mock upstream never received the outbound POST" "send path did not serialize+POST to the configured endpoint"
|
||||
fi
|
||||
fi
|
||||
|
||||
# ── (2) DISCOVER via the Telegram mock Bot API ──────────────────────────
|
||||
echo
|
||||
echo "--- (2) POST /channels/discover (telegram) → mock Bot API ---"
|
||||
# A token matching the telegramTokenRegex (\d+:[A-Za-z0-9_-]{30,}).
|
||||
DISC_TOKEN="424242:AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
|
||||
DISC=$(curl -s -w $'\n%{http_code}' -X POST "$BASE/channels/discover" \
|
||||
"${ADMIN_AUTH[@]}" -H "Content-Type: application/json" \
|
||||
-d "{\"channel_type\":\"telegram\",\"bot_token\":\"$DISC_TOKEN\",\"workspace_id\":\"$WS_TARGET\"}")
|
||||
DISC_CODE=$(printf '%s' "$DISC" | tail -n1)
|
||||
DISC_BODY=$(printf '%s' "$DISC" | sed '$d')
|
||||
if [ "$DISC_CODE" = "200" ]; then
|
||||
pass "POST /channels/discover returned 200"
|
||||
if printf '%s' "$DISC_BODY" | grep -qF '"bot_username":"e2e_mock_bot"'; then
|
||||
pass "discover round-tripped the mock bot username"
|
||||
else
|
||||
fail "discover bot_username" "$DISC_BODY"
|
||||
fi
|
||||
if printf '%s' "$DISC_BODY" | grep -qF '"chat_id":"-1009876543210"'; then
|
||||
pass "discover round-tripped the mock chat id"
|
||||
else
|
||||
fail "discover chat list" "$DISC_BODY"
|
||||
fi
|
||||
else
|
||||
case "$DISC_BODY" in
|
||||
*"Cannot reach Telegram"*|*"Invalid bot token"*|*"Failed to connect"*)
|
||||
# Platform reached the REAL api.telegram.org (seam not set) → can't prove.
|
||||
loud_skip "discover hit real Telegram, not the mock (MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE not set on platform): code=$DISC_CODE $DISC_BODY" ;;
|
||||
*)
|
||||
fail "POST /channels/discover" "code=$DISC_CODE body=$DISC_BODY" ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
# ── (3) Data-prune (RFC #734): purge removes prunable data, sibling survives
|
||||
echo
|
||||
echo "--- (3) data-prune: purge target's child data, sibling survives ---"
|
||||
|
||||
# Seed prunable child data on BOTH workspaces: a channel (already on target)
|
||||
# + a secret on each. We assert via GET /channels which lists workspace_channels.
|
||||
seed_secret() {
|
||||
local wid="$1"; shift
|
||||
curl -s -o /dev/null -X POST "$BASE/workspaces/$wid/secrets" "$@" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"key":"E2E_PRUNE_PROBE","value":"v"}'
|
||||
}
|
||||
seed_secret "$WS_TARGET" "${WS_AUTH[@]}"
|
||||
# Sibling gets its OWN channel so we can prove its rows survive the target purge.
|
||||
SIB_SLACK_CFG=$(python3 -c "import json,sys; print(json.dumps({
|
||||
'webhook_url': sys.argv[1] + 'services/T111/B111/sib',
|
||||
'chat_id': 'sib-chat'}))" "$WEBHOOK_BASE")
|
||||
SIB_CH=$(curl -s -X POST "$BASE/workspaces/$WS_SIBLING/channels" "${SIB_AUTH[@]}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"channel_type\":\"slack\",\"config\":$SIB_SLACK_CFG,\"enabled\":true}")
|
||||
SIB_CH_ID=$(printf '%s' "$SIB_CH" | json_field id)
|
||||
|
||||
# Pre-purge: confirm both workspaces have >=1 channel row.
|
||||
TGT_CH_PRE=$(curl -s "$BASE/workspaces/$WS_TARGET/channels" "${WS_AUTH[@]}")
|
||||
SIB_CH_PRE=$(curl -s "$BASE/workspaces/$WS_SIBLING/channels" "${SIB_AUTH[@]}")
|
||||
TGT_PRE_N=$(printf '%s' "$TGT_CH_PRE" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
|
||||
SIB_PRE_N=$(printf '%s' "$SIB_CH_PRE" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo 0)
|
||||
if [ "${TGT_PRE_N:-0}" -ge 1 ] && [ "${SIB_PRE_N:-0}" -ge 1 ]; then
|
||||
pass "pre-purge: target ($TGT_PRE_N) and sibling ($SIB_PRE_N) both have channel data"
|
||||
else
|
||||
fail "pre-purge seed" "target=$TGT_PRE_N sibling=$SIB_PRE_N (need >=1 each)"
|
||||
fi
|
||||
|
||||
# Permanent delete WITH purge — the RFC #734 prune of durable child data.
|
||||
# DELETE /workspaces/:id is AdminAuth-gated (router.go:167); Tier-2b rejects a
|
||||
# workspace bearer when ADMIN_TOKEN is set, so this MUST use the admin bearer.
|
||||
# X-Confirm-Name must equal the workspace name (the destructive-delete guard).
|
||||
PURGE_AUTH=("${ADMIN_AUTH[@]}")
|
||||
[ ${#PURGE_AUTH[@]} -eq 0 ] && [ -n "$WS_TARGET_TOK" ] && PURGE_AUTH=(-H "Authorization: Bearer $WS_TARGET_TOK")
|
||||
PURGE=$(curl -s -w $'\n%{http_code}' -X DELETE \
|
||||
"$BASE/workspaces/$WS_TARGET?confirm=true&purge=true" \
|
||||
-H "X-Confirm-Name: e2e-chan-target-$$" "${PURGE_AUTH[@]}")
|
||||
PURGE_CODE=$(printf '%s' "$PURGE" | tail -n1)
|
||||
PURGE_BODY=$(printf '%s' "$PURGE" | sed '$d')
|
||||
if [ "$PURGE_CODE" = "200" ] && printf '%s' "$PURGE_BODY" | grep -qF '"status":"purged"'; then
|
||||
pass "DELETE ?purge=true returned purged"
|
||||
else
|
||||
fail "DELETE ?purge=true" "code=$PURGE_CODE body=$PURGE_BODY"
|
||||
fi
|
||||
# Target was purged → its token is revoked; query its channels with admin
|
||||
# bearer. The purge hard-deletes workspace_channels rows for the target.
|
||||
TGT_CH_POST=$(curl -s "$BASE/workspaces/$WS_TARGET/channels" "${ADMIN_AUTH[@]}")
|
||||
TGT_POST_N=$(printf '%s' "$TGT_CH_POST" | python3 -c "import sys,json
|
||||
try:
|
||||
d=json.load(sys.stdin); print(len(d) if isinstance(d,list) else -1)
|
||||
except Exception:
|
||||
print(-1)" 2>/dev/null || echo -1)
|
||||
if [ "${TGT_POST_N:-1}" = "0" ]; then
|
||||
pass "post-purge: target's prunable channel data is GONE (0 rows)"
|
||||
else
|
||||
fail "prune did not remove target channel data" "post-purge target rows=$TGT_POST_N body=$(printf '%s' "$TGT_CH_POST" | head -c 200)"
|
||||
fi
|
||||
WS_TARGET="" # purged; don't re-delete in cleanup
|
||||
|
||||
# Sibling (NON-prunable relative to the target purge) must be untouched.
|
||||
SIB_CH_POST=$(curl -s "$BASE/workspaces/$WS_SIBLING/channels" "${SIB_AUTH[@]}")
|
||||
SIB_POST_N=$(printf '%s' "$SIB_CH_POST" | python3 -c "import sys,json; print(len(json.load(sys.stdin)))" 2>/dev/null || echo -1)
|
||||
if [ "${SIB_POST_N:-0}" -ge 1 ] && printf '%s' "$SIB_CH_POST" | grep -qF "$SIB_CH_ID"; then
|
||||
pass "post-purge: sibling's non-prunable data SURVIVED ($SIB_POST_N rows, channel $SIB_CH_ID intact)"
|
||||
else
|
||||
fail "purge over-reached: sibling data did not survive" "sibling rows=$SIB_POST_N body=$(printf '%s' "$SIB_CH_POST" | head -c 200)"
|
||||
fi
|
||||
|
||||
# ── verdict ─────────────────────────────────────────────────────────────
|
||||
echo
|
||||
echo "=== channels+prune e2e: $PASS passed, $FAIL failed ==="
|
||||
if [ "$FAIL" -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
# Guard against a vacuous green: every section must have produced asserts.
|
||||
if [ "$PASS" -lt 9 ]; then
|
||||
echo "FATAL: only $PASS assertions ran — expected >=9 (send + discover + prune). Refusing to report green." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "ALL CHANNELS + PRUNE E2E CHECKS PASSED"
|
||||
+72
-45
@@ -1,24 +1,30 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E regression suite for the local-dev escape hatches added in
|
||||
# fix/quickstart-bugless. These cover the exact user-facing breakages
|
||||
# that dropped out of the partial squash-merge of PR #1871:
|
||||
# E2E regression suite asserting that "dev mode" is fail-CLOSED.
|
||||
#
|
||||
# 1. GET /workspaces returns 200 with no bearer after tokens exist in
|
||||
# the DB — exercises the AdminAuth Tier-1b dev-mode hatch
|
||||
# (middleware/devmode.go::isDevModeFailOpen).
|
||||
# 2. GET /workspaces/:id/activity returns 200 with no bearer — the
|
||||
# same hatch applied to WorkspaceAuth.
|
||||
# 3. POST /workspaces/:id/a2a doesn't 502-SSRF on a loopback workspace
|
||||
# URL — exercises handlers/ssrf.go::devModeAllowsLoopback.
|
||||
# 4. GET /org/templates returns the curated set populated by
|
||||
# clone-manifest.sh — exercises infra/scripts/setup.sh + the
|
||||
# ListTemplates failure logging in handlers/org.go.
|
||||
# History: this file used to assert the local-dev fail-open escape hatches
|
||||
# (GET /workspaces 200 with NO bearer, /workspaces/:id/activity 200 with no
|
||||
# bearer) added in fix/quickstart-bugless. Under the CTO "nothing should be
|
||||
# fail-open" directive (harden/no-fail-open-auth) those hatches were REMOVED:
|
||||
# auth is fail-CLOSED in EVERY environment, local dev included. This suite now
|
||||
# pins the inverse contract — bearer-less admin/workspace requests 401, and the
|
||||
# SAME requests with the dev ADMIN_TOKEN bearer succeed.
|
||||
#
|
||||
# Requires: platform running on :8080 with MOLECULE_ENV=development and
|
||||
# ADMIN_TOKEN unset. Matches the README quickstart env.
|
||||
# What it verifies:
|
||||
# 1. GET /workspaces 401s with NO bearer once tokens exist (was: 200 via the
|
||||
# removed AdminAuth Tier-1b dev-mode hatch); 200 WITH the admin bearer.
|
||||
# 2. GET /workspaces/:id/activity (and /delegations, /approvals/pending) 401
|
||||
# with no bearer (was: 200 via the WorkspaceAuth hatch); 200 WITH bearer.
|
||||
# 3. GET /org/templates returns the curated set populated by clone-manifest.sh
|
||||
# (unauth-readable bootstrap surface — unchanged).
|
||||
#
|
||||
# Requires: platform running on :8080 with MOLECULE_ENV=development AND
|
||||
# ADMIN_TOKEN set (the dev value), with MOLECULE_ADMIN_TOKEN (or
|
||||
# ADMIN_TOKEN) exported here so the suite can present the bearer.
|
||||
# scripts/dev-start.sh provisions ADMIN_TOKEN locally; the e2e-api CI
|
||||
# job sets it on the platform and exports the matching bearer.
|
||||
#
|
||||
# Usage:
|
||||
# bash tests/e2e/test_dev_mode.sh
|
||||
# MOLECULE_ADMIN_TOKEN=dev-local-admin-token bash tests/e2e/test_dev_mode.sh
|
||||
set -euo pipefail
|
||||
|
||||
# shellcheck source=_lib.sh
|
||||
@@ -46,35 +52,44 @@ check_http() {
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Dev-mode escape-hatch regression tests ==="
|
||||
echo "=== Dev-mode fail-CLOSED regression tests ==="
|
||||
echo ""
|
||||
|
||||
# Pre-test: ensure MOLECULE_ENV=development and no ADMIN_TOKEN are in the
|
||||
# platform's env. The request path doesn't let us read the platform's
|
||||
# env directly, but we can verify the hatch is active by confirming the
|
||||
# expected behaviour under the conditions the test otherwise sets up.
|
||||
# The platform is fail-closed in every environment now, so the suite MUST have
|
||||
# the admin bearer to drive the authenticated (200) assertions. Without it we
|
||||
# cannot create / clean up workspaces — bail loudly rather than silently skip.
|
||||
ADMIN_BEARER="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
if [ -z "$ADMIN_BEARER" ]; then
|
||||
echo "FAIL: MOLECULE_ADMIN_TOKEN/ADMIN_TOKEN not set — auth is fail-closed in"
|
||||
echo " every environment, so this suite needs the dev ADMIN_TOKEN bearer."
|
||||
echo " e.g. MOLECULE_ADMIN_TOKEN=dev-local-admin-token bash $0"
|
||||
exit 1
|
||||
fi
|
||||
ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
|
||||
|
||||
e2e_cleanup_all_workspaces
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Section 1 — AdminAuth dev-mode hatch
|
||||
# Section 1 — AdminAuth is fail-CLOSED (dev-mode hatch removed)
|
||||
# ----------------------------------------------------------------------
|
||||
# Before fix: once any workspace had tokens in the DB, GET /workspaces
|
||||
# closed to unauthenticated callers and the Canvas broke. The hatch
|
||||
# keeps it open specifically in dev mode.
|
||||
|
||||
echo "--- Section 1: AdminAuth dev-mode hatch ---"
|
||||
echo "--- Section 1: AdminAuth fail-closed ---"
|
||||
|
||||
# No bearer → 401 in dev mode (the removed hatch used to return 200).
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces")
|
||||
check_http "GET /workspaces (empty DB)" "200" "$R"
|
||||
check_http "GET /workspaces (no bearer) is fail-CLOSED" "401" "$R"
|
||||
|
||||
# Create a workspace so tokens land in the DB.
|
||||
# With the dev admin bearer → 200.
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces" "${ADMIN_AUTH[@]}")
|
||||
check_http "GET /workspaces (with admin bearer)" "200" "$R"
|
||||
|
||||
# Create a workspace (authenticated) so tokens land in the DB.
|
||||
R=$(curl -s -w "\n%{http_code}" -X POST "$BASE/workspaces" \
|
||||
"${ADMIN_AUTH[@]}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"Dev-Mode-Test","tier":1,"runtime":"external","external":true}')
|
||||
CODE=$(echo "$R" | tail -n1)
|
||||
BODY=$(echo "$R" | sed '$d')
|
||||
check_http "POST /workspaces (create)" "201" "$CODE"
|
||||
check_http "POST /workspaces (create, with admin bearer)" "201" "$CODE"
|
||||
|
||||
WS_ID=$(echo "$BODY" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || true)
|
||||
if [ -z "$WS_ID" ]; then
|
||||
@@ -83,43 +98,55 @@ if [ -z "$WS_ID" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure a real workspace token exists so AdminAuth now sees a live token. On
|
||||
# pre-fix builds the next /workspaces call would 401 — on post-fix it
|
||||
# must stay 200 because MOLECULE_ENV=development + ADMIN_TOKEN unset.
|
||||
# Ensure a real workspace token exists so AdminAuth sees a live token globally.
|
||||
TOKEN=$(echo "$BODY" | e2e_extract_token)
|
||||
if [ -z "$TOKEN" ]; then
|
||||
e2e_mint_workspace_token "$WS_ID" >/dev/null
|
||||
fi
|
||||
|
||||
# With tokens now in the DB, the bearer-less call STILL 401s (no lazy-bootstrap
|
||||
# / dev-mode fall-through), and the authenticated call still 200s.
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces")
|
||||
check_http "GET /workspaces (after token minted, no bearer)" "200" "$R"
|
||||
check_http "GET /workspaces (after token minted, no bearer) is fail-CLOSED" "401" "$R"
|
||||
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/workspaces" "${ADMIN_AUTH[@]}")
|
||||
check_http "GET /workspaces (after token minted, with admin bearer)" "200" "$R"
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Section 2 — WorkspaceAuth dev-mode hatch
|
||||
# Section 2 — WorkspaceAuth is fail-CLOSED (dev-mode hatch removed)
|
||||
# ----------------------------------------------------------------------
|
||||
# Before fix: /workspaces/:id/activity 401'd once tokens existed —
|
||||
# the Canvas side panel's chat history load broke.
|
||||
|
||||
echo ""
|
||||
echo "--- Section 2: WorkspaceAuth dev-mode hatch ---"
|
||||
echo "--- Section 2: WorkspaceAuth fail-closed ---"
|
||||
|
||||
# No bearer → 401 (the removed hatch used to return 200).
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
"$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50")
|
||||
check_http "GET /workspaces/:id/activity (no bearer)" "200" "$R"
|
||||
check_http "GET /workspaces/:id/activity (no bearer) is fail-CLOSED" "401" "$R"
|
||||
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
"$BASE/workspaces/$WS_ID/delegations")
|
||||
check_http "GET /workspaces/:id/delegations (no bearer)" "200" "$R"
|
||||
check_http "GET /workspaces/:id/delegations (no bearer) is fail-CLOSED" "401" "$R"
|
||||
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending")
|
||||
check_http "GET /approvals/pending (no bearer)" "200" "$R"
|
||||
check_http "GET /approvals/pending (no bearer) is fail-CLOSED" "401" "$R"
|
||||
|
||||
# Same requests WITH the admin bearer → 200.
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
"$BASE/workspaces/$WS_ID/activity?type=a2a_receive&limit=50" "${ADMIN_AUTH[@]}")
|
||||
check_http "GET /workspaces/:id/activity (with admin bearer)" "200" "$R"
|
||||
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
"$BASE/workspaces/$WS_ID/delegations" "${ADMIN_AUTH[@]}")
|
||||
check_http "GET /workspaces/:id/delegations (with admin bearer)" "200" "$R"
|
||||
|
||||
R=$(curl -s -o /dev/null -w "%{http_code}" "$BASE/approvals/pending" "${ADMIN_AUTH[@]}")
|
||||
check_http "GET /approvals/pending (with admin bearer)" "200" "$R"
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Section 3 — Template registry populated by setup.sh
|
||||
# ----------------------------------------------------------------------
|
||||
# Before fix: setup.sh didn't run clone-manifest.sh so the template
|
||||
# palette was empty and the molecule-dev in-tree copy was broken.
|
||||
|
||||
# GET /org/templates is an unauthenticated bootstrap surface (the template
|
||||
# palette must render before the user has a credential) — unchanged.
|
||||
echo ""
|
||||
echo "--- Section 3: Template registry ---"
|
||||
|
||||
|
||||
+332
@@ -0,0 +1,332 @@
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
#
|
||||
# test_keyless_feature_contracts_e2e.sh — REQUIRED-lane (E2E API Smoke Test)
|
||||
# keyless HTTP-contract coverage for feature endpoints that ship WITHOUT an
|
||||
# LLM key and had NO e2e assertion before (coverage-audit gap list).
|
||||
#
|
||||
# Why a NEW script (not added to test_api.sh): PR #2286 is concurrently
|
||||
# rewriting test_api.sh's auth helpers + _lib.sh (e2e_admin_auth_args) and the
|
||||
# test_priority_runtimes mock arm. Keeping these assertions in a standalone
|
||||
# file avoids a merge conflict with that in-flight PR and keeps the new feature
|
||||
# coverage independently reviewable. The mock-runtime A2A canned round-trip is
|
||||
# OWNED by #2286's `mock` arm (run_mock) — intentionally NOT duplicated here.
|
||||
#
|
||||
# Every endpoint below is exercised against a runtime=external workspace so NO
|
||||
# LLM key is needed. For each we assert the real HTTP contract: the happy path
|
||||
# AND a meaningful failure mode (401 without auth, 400 on bad input, or the
|
||||
# documented fail-closed status) so the test catches REAL regressions, not
|
||||
# just 200s.
|
||||
#
|
||||
# Auth model (matches workspace-server/internal/middleware/wsauth_middleware.go):
|
||||
# * WorkspaceAuth (/workspaces/:id/*) is STRICT once a token exists — a
|
||||
# bearer-less request 401s (devmode fail-open needs MOLECULE_ENV=dev AND
|
||||
# ADMIN_TOKEN unset, neither of which the e2e-api job sets).
|
||||
# * AdminAuth routes accept the platform ADMIN_TOKEN (post-#2286) OR, when no
|
||||
# ADMIN_TOKEN is configured, any valid workspace bearer (Tier-3 fallback) —
|
||||
# so the workspace token we mint authenticates admin routes in BOTH the
|
||||
# pre-#2286 (no ADMIN_TOKEN) and post-#2286 (ADMIN_TOKEN set) CI shapes.
|
||||
#
|
||||
# Local-run shape (mirrors the e2e-api job — real PG+Redis+platform):
|
||||
# DATABASE_URL=... REDIS_URL=... ADMIN_TOKEN=... ./platform-server &
|
||||
# BASE=http://127.0.0.1:$PORT bash tests/e2e/test_keyless_feature_contracts_e2e.sh
|
||||
|
||||
source "$(dirname "$0")/_lib.sh" # sets BASE default
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
pass() { echo "PASS: $1"; PASS=$((PASS + 1)); }
|
||||
fail() { echo "FAIL: $1"; echo " $2"; FAIL=$((FAIL + 1)); }
|
||||
|
||||
# assert_contains DESC EXPECTED_SUBSTRING ACTUAL
|
||||
assert_contains() {
|
||||
if printf '%s' "$3" | grep -qF "$2"; then
|
||||
pass "$1"
|
||||
else
|
||||
fail "$1" "expected to contain [$2] — got: $3"
|
||||
fi
|
||||
}
|
||||
|
||||
# http_code METHOD URL [curl-args...] → prints the HTTP status code only.
|
||||
http_code() {
|
||||
local method="$1" url="$2"; shift 2
|
||||
curl -s -o /dev/null -w "%{http_code}" -X "$method" "$url" "$@"
|
||||
}
|
||||
|
||||
# body_and_code METHOD URL [curl-args...] → prints "<body>\n<code>".
|
||||
body_and_code() {
|
||||
local method="$1" url="$2"; shift 2
|
||||
curl -s -w $'\n%{http_code}' -X "$method" "$url" "$@"
|
||||
}
|
||||
|
||||
echo "=== Keyless feature HTTP-contract E2E (required lane) ==="
|
||||
echo ""
|
||||
|
||||
# Platform admin bearer when the job set one (#2286 shape). When ADMIN_TOKEN is
|
||||
# configured, AdminAuth's Tier-1 fail-open is OFF even before the first token
|
||||
# exists, so admin-gated create / list / delete must carry it from the start.
|
||||
# Pre-#2286 (no ADMIN_TOKEN) this is empty → fail-open create works bare.
|
||||
ENV_ADMIN="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
ENV_ADMIN_AUTH=()
|
||||
[ -n "$ENV_ADMIN" ] && ENV_ADMIN_AUTH=(-H "Authorization: Bearer $ENV_ADMIN")
|
||||
|
||||
# Reproducible counts across reruns. e2e_cleanup_all_workspaces hits the
|
||||
# admin-gated list/delete; the platform admin bearer (if set) goes via the
|
||||
# MOLECULE_ADMIN_TOKEN/ADMIN_TOKEN env the helper already reads.
|
||||
e2e_cleanup_all_workspaces
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixture: one external workspace, registered → online. Keyless (external=true
|
||||
# means no container is provisioned and no LLM key is consulted).
|
||||
# ---------------------------------------------------------------------------
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
${ENV_ADMIN_AUTH[@]+"${ENV_ADMIN_AUTH[@]}"} \
|
||||
-d '{"name":"Keyless Fixture","tier":1,"runtime":"external","external":true}')
|
||||
WS_ID=$(printf '%s' "$R" | python3 -c "import sys,json; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
if [ -z "$WS_ID" ]; then
|
||||
echo "FATAL: could not create fixture workspace — got: $R" >&2
|
||||
exit 2
|
||||
fi
|
||||
assert_contains "POST /workspaces (external fixture created)" '"status":"awaiting_agent"' "$R"
|
||||
|
||||
# Workspace token: register returns one; else mint via the admin endpoint.
|
||||
WS_TOKEN=$(printf '%s' "$R" | e2e_extract_token)
|
||||
if [ -z "$WS_TOKEN" ]; then
|
||||
WS_TOKEN=$(e2e_mint_workspace_token "$WS_ID" 2>/dev/null || echo "")
|
||||
fi
|
||||
if [ -z "$WS_TOKEN" ]; then
|
||||
echo "FATAL: could not obtain workspace token for $WS_ID" >&2
|
||||
exit 2
|
||||
fi
|
||||
AUTH=(-H "Authorization: Bearer $WS_TOKEN")
|
||||
|
||||
# Admin bearer: explicit platform ADMIN_TOKEN if the job set one (#2286 shape),
|
||||
# else the workspace token (AdminAuth Tier-3 accepts it pre-#2286).
|
||||
ADMIN_BEARER="${ENV_ADMIN:-$WS_TOKEN}"
|
||||
ADMIN_AUTH=(-H "Authorization: Bearer $ADMIN_BEARER")
|
||||
|
||||
# Bring the fixture online so lifecycle (hibernate) has a hibernatable state.
|
||||
curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" "${AUTH[@]}" \
|
||||
-d "{\"id\":\"$WS_ID\",\"url\":\"https://example.com/keyless\",\"agent_card\":{\"name\":\"Keyless Fixture\",\"skills\":[{\"id\":\"noop\",\"name\":\"Noop\"}]}}" >/dev/null
|
||||
|
||||
# ===========================================================================
|
||||
# 1. Terminal diagnose — GET /workspaces/:id/terminal/diagnose (wsAuth)
|
||||
# External workspace has no instance_id → diagnoseLocal path → 200 with a
|
||||
# deterministic report (ok=false, first_failure on docker/container). The
|
||||
# /terminal endpoint itself is a WebSocket upgrade (not HTTP-assertable
|
||||
# keyless); diagnose is its pure-HTTP sibling and the real contract surface.
|
||||
# ===========================================================================
|
||||
echo "--- /terminal/diagnose ---"
|
||||
BC=$(body_and_code GET "$BASE/workspaces/$WS_ID/terminal/diagnose" "${AUTH[@]}")
|
||||
DIAG_CODE=$(printf '%s' "$BC" | tail -n1)
|
||||
DIAG_BODY=$(printf '%s' "$BC" | sed '$d')
|
||||
assert_contains "GET /terminal/diagnose (200 report)" "200" "$DIAG_CODE"
|
||||
assert_contains "GET /terminal/diagnose (carries workspace_id)" "\"workspace_id\":\"$WS_ID\"" "$DIAG_BODY"
|
||||
assert_contains "GET /terminal/diagnose (has steps[])" '"steps"' "$DIAG_BODY"
|
||||
# Failure mode: no bearer → 401 (WorkspaceAuth strict once a token exists).
|
||||
assert_contains "GET /terminal/diagnose (no auth → 401)" "401" \
|
||||
"$(http_code GET "$BASE/workspaces/$WS_ID/terminal/diagnose")"
|
||||
|
||||
# ===========================================================================
|
||||
# 2. Webhooks (public) — POST /webhooks/:type
|
||||
# Public, no auth. telegram adapter: empty update body → (nil,nil) → 200
|
||||
# ignored; non-JSON → parse error → 400; unknown type → 404.
|
||||
# ===========================================================================
|
||||
echo "--- /webhooks/:type ---"
|
||||
BC=$(body_and_code POST "$BASE/webhooks/telegram" -H "Content-Type: application/json" -d '{}')
|
||||
WH_CODE=$(printf '%s' "$BC" | tail -n1)
|
||||
WH_BODY=$(printf '%s' "$BC" | sed '$d')
|
||||
assert_contains "POST /webhooks/telegram (non-message update → 200)" "200" "$WH_CODE"
|
||||
assert_contains "POST /webhooks/telegram (status ignored)" '"status":"ignored"' "$WH_BODY"
|
||||
assert_contains "POST /webhooks/telegram (bad JSON → 400)" "400" \
|
||||
"$(http_code POST "$BASE/webhooks/telegram" -H 'Content-Type: application/json' -d 'not-json')"
|
||||
assert_contains "POST /webhooks/<unknown> (→ 404)" "404" \
|
||||
"$(http_code POST "$BASE/webhooks/nope-not-a-channel" -H 'Content-Type: application/json' -d '{}')"
|
||||
|
||||
# ===========================================================================
|
||||
# 3. Budget — GET /workspaces/:id/budget (wsAuth) + PATCH (admin)
|
||||
# GET: fresh workspace → multi-period view, no limits, zero spend.
|
||||
# PATCH: set monthly limit (admin) → reflected; bad input → 400.
|
||||
# ===========================================================================
|
||||
echo "--- /budget ---"
|
||||
BUD=$(curl -s "$BASE/workspaces/$WS_ID/budget" "${AUTH[@]}")
|
||||
assert_contains "GET /budget (has periods map)" '"periods"' "$BUD"
|
||||
assert_contains "GET /budget (monthly_spend 0 on fresh ws)" '"monthly_spend":0' "$BUD"
|
||||
# PATCH is admin-gated (router.go:419). Set a monthly limit and verify echo.
|
||||
PB=$(curl -s -X PATCH "$BASE/workspaces/$WS_ID/budget" -H "Content-Type: application/json" "${ADMIN_AUTH[@]}" \
|
||||
-d '{"budget_limits":{"monthly":2000}}')
|
||||
assert_contains "PATCH /budget (monthly limit set → echoed)" '"budget_limit":2000' "$PB"
|
||||
# Re-read confirms persistence.
|
||||
assert_contains "GET /budget (limit persisted)" '"budget_limit":2000' \
|
||||
"$(curl -s "$BASE/workspaces/$WS_ID/budget" "${AUTH[@]}")"
|
||||
# Failure: empty body → 400 "budget_limits or budget_limit field is required".
|
||||
assert_contains "PATCH /budget (empty body → 400)" "400" \
|
||||
"$(http_code PATCH "$BASE/workspaces/$WS_ID/budget" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{}')"
|
||||
# Failure: unknown period → 400.
|
||||
assert_contains "PATCH /budget (unknown period → 400)" "400" \
|
||||
"$(http_code PATCH "$BASE/workspaces/$WS_ID/budget" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{"budget_limits":{"yearly":1}}')"
|
||||
# Failure: GET without bearer → 401.
|
||||
assert_contains "GET /budget (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/budget")"
|
||||
|
||||
# ===========================================================================
|
||||
# 4. Checkpoints — POST/GET/DELETE /workspaces/:id/checkpoints* (wsAuth)
|
||||
# Fully self-contained CRUD over workflow_checkpoints (#788). Upsert → latest
|
||||
# → list-by-wfid → delete → 404. Failure modes: missing workflow_id → 400,
|
||||
# empty latest → 404.
|
||||
# ===========================================================================
|
||||
echo "--- /checkpoints ---"
|
||||
WFID="kl-wf-$$"
|
||||
CP=$(curl -s -X POST "$BASE/workspaces/$WS_ID/checkpoints" -H "Content-Type: application/json" "${AUTH[@]}" \
|
||||
-d "{\"workflow_id\":\"$WFID\",\"step_name\":\"step-a\",\"step_index\":1,\"payload\":{\"k\":\"v\"}}")
|
||||
assert_contains "POST /checkpoints (upsert → id + workflow_id)" "\"workflow_id\":\"$WFID\"" "$CP"
|
||||
assert_contains "GET /checkpoints/latest (200 newest)" "\"workflow_id\":\"$WFID\"" \
|
||||
"$(curl -s "$BASE/workspaces/$WS_ID/checkpoints/latest" "${AUTH[@]}")"
|
||||
assert_contains "GET /checkpoints/:wfid (lists the step)" '"step_name":"step-a"' \
|
||||
"$(curl -s "$BASE/workspaces/$WS_ID/checkpoints/$WFID" "${AUTH[@]}")"
|
||||
DEL=$(curl -s -X DELETE "$BASE/workspaces/$WS_ID/checkpoints/$WFID" "${AUTH[@]}")
|
||||
assert_contains "DELETE /checkpoints/:wfid (deleted count)" '"deleted":1' "$DEL"
|
||||
assert_contains "GET /checkpoints/:wfid (after delete → 404)" "404" \
|
||||
"$(http_code GET "$BASE/workspaces/$WS_ID/checkpoints/$WFID" "${AUTH[@]}")"
|
||||
# Failure: missing workflow_id → 400 (binding:required).
|
||||
assert_contains "POST /checkpoints (missing workflow_id → 400)" "400" \
|
||||
"$(http_code POST "$BASE/workspaces/$WS_ID/checkpoints" -H 'Content-Type: application/json' "${AUTH[@]}" -d '{"step_name":"x"}')"
|
||||
# Failure: no bearer → 401.
|
||||
assert_contains "POST /checkpoints (no auth → 401)" "401" \
|
||||
"$(http_code POST "$BASE/workspaces/$WS_ID/checkpoints" -H 'Content-Type: application/json' -d '{"workflow_id":"x","step_name":"y"}')"
|
||||
|
||||
# ===========================================================================
|
||||
# 5. Audit — GET /workspaces/:id/audit (wsAuth)
|
||||
# EU AI Act ledger query (#594). Fresh ws → empty events, total 0,
|
||||
# chain_valid null (AUDIT_LEDGER_SALT unset). Failure: bad RFC3339 from → 400.
|
||||
# ===========================================================================
|
||||
echo "--- /audit ---"
|
||||
AUD=$(curl -s "$BASE/workspaces/$WS_ID/audit" "${AUTH[@]}")
|
||||
assert_contains "GET /audit (total 0 on fresh ws)" '"total":0' "$AUD"
|
||||
assert_contains "GET /audit (chain_valid null without salt)" '"chain_valid":null' "$AUD"
|
||||
assert_contains "GET /audit (bad 'from' → 400)" "400" \
|
||||
"$(http_code GET "$BASE/workspaces/$WS_ID/audit?from=not-a-date" "${AUTH[@]}")"
|
||||
assert_contains "GET /audit (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/audit")"
|
||||
|
||||
# ===========================================================================
|
||||
# 6. Traces — GET /workspaces/:id/traces (wsAuth)
|
||||
# Langfuse proxy (#590). No LANGFUSE_* configured → 200 [] (graceful empty),
|
||||
# never a 5xx. Failure: no auth → 401.
|
||||
# ===========================================================================
|
||||
echo "--- /traces ---"
|
||||
BC=$(body_and_code GET "$BASE/workspaces/$WS_ID/traces" "${AUTH[@]}")
|
||||
TR_CODE=$(printf '%s' "$BC" | tail -n1)
|
||||
TR_BODY=$(printf '%s' "$BC" | sed '$d')
|
||||
assert_contains "GET /traces (200 without Langfuse)" "200" "$TR_CODE"
|
||||
assert_contains "GET /traces (empty list)" '[]' "$TR_BODY"
|
||||
assert_contains "GET /traces (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/traces")"
|
||||
|
||||
# ===========================================================================
|
||||
# 7. Session search — GET /workspaces/:id/session-search (wsAuth)
|
||||
# Searches activity_logs. Seed one activity row, then assert q-filter finds
|
||||
# it and a non-matching q returns []. Failure: no auth → 401.
|
||||
# ===========================================================================
|
||||
echo "--- /session-search ---"
|
||||
curl -s -X POST "$BASE/workspaces/$WS_ID/activity" -H "Content-Type: application/json" "${AUTH[@]}" \
|
||||
-d '{"activity_type":"agent_log","method":"inference","summary":"keyless-needle marker"}' >/dev/null
|
||||
assert_contains "GET /session-search?q=keyless-needle (finds row)" 'keyless-needle' \
|
||||
"$(curl -s "$BASE/workspaces/$WS_ID/session-search?q=keyless-needle" "${AUTH[@]}")"
|
||||
assert_contains "GET /session-search?q=<no-match> (empty)" '[]' \
|
||||
"$(curl -s "$BASE/workspaces/$WS_ID/session-search?q=zzz-no-such-token-zzz" "${AUTH[@]}")"
|
||||
assert_contains "GET /session-search (no auth → 401)" "401" \
|
||||
"$(http_code GET "$BASE/workspaces/$WS_ID/session-search?q=x")"
|
||||
|
||||
# ===========================================================================
|
||||
# 8. Rescue — GET /workspaces/:id/rescue (wsAuth)
|
||||
# RFC internal#742. Fail-CLOSED contract: the e2e-api job has no
|
||||
# MOLECULE_ORG_ID, so the handler returns 503 platform_misconfigured rather
|
||||
# than leaking cross-org. That fail-closed behaviour IS the keyless contract
|
||||
# we gate here (a regression that drops the org guard would flip this to a
|
||||
# 200/404 and turn this assertion RED). Failure mode: no auth → 401.
|
||||
# ===========================================================================
|
||||
echo "--- /rescue ---"
|
||||
BC=$(body_and_code GET "$BASE/workspaces/$WS_ID/rescue" "${AUTH[@]}")
|
||||
RES_CODE=$(printf '%s' "$BC" | tail -n1)
|
||||
RES_BODY=$(printf '%s' "$BC" | sed '$d')
|
||||
if [ "$RES_CODE" = "404" ]; then
|
||||
# MOLECULE_ORG_ID was set in this environment → no-bundle path.
|
||||
assert_contains "GET /rescue (no bundle → 404, org configured)" 'no rescue bundle' "$RES_BODY"
|
||||
else
|
||||
# No MOLECULE_ORG_ID (the e2e-api default) → fail-closed 503.
|
||||
assert_contains "GET /rescue (fail-closed 503 without MOLECULE_ORG_ID)" "503" "$RES_CODE"
|
||||
assert_contains "GET /rescue (platform_misconfigured code)" 'platform_misconfigured' "$RES_BODY"
|
||||
fi
|
||||
assert_contains "GET /rescue (no auth → 401)" "401" "$(http_code GET "$BASE/workspaces/$WS_ID/rescue")"
|
||||
|
||||
# ===========================================================================
|
||||
# 9. LLM billing-mode admin toggle — GET/PUT /admin/workspaces/:id/llm-billing-mode
|
||||
# (AdminAuth). Flip to byok → read back override; bad UUID → 400; missing
|
||||
# 'mode' key → 400; unknown mode → 400.
|
||||
# ===========================================================================
|
||||
echo "--- /admin/workspaces/:id/llm-billing-mode ---"
|
||||
assert_contains "GET llm-billing-mode (resolves a mode)" '"resolved_mode"' \
|
||||
"$(curl -s "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" "${ADMIN_AUTH[@]}")"
|
||||
PUTBM=$(curl -s -X PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H "Content-Type: application/json" "${ADMIN_AUTH[@]}" \
|
||||
-d '{"mode":"byok"}')
|
||||
assert_contains "PUT llm-billing-mode byok (override set)" '"workspace_override":"byok"' "$PUTBM"
|
||||
assert_contains "GET llm-billing-mode (byok persisted)" '"workspace_override":"byok"' \
|
||||
"$(curl -s "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" "${ADMIN_AUTH[@]}")"
|
||||
# Clear the override (null) so we don't leave fixture state skewed.
|
||||
curl -s -X PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H "Content-Type: application/json" "${ADMIN_AUTH[@]}" \
|
||||
-d '{"mode":null}' >/dev/null
|
||||
# Failure: malformed UUID → 400.
|
||||
assert_contains "PUT llm-billing-mode (bad UUID → 400)" "400" \
|
||||
"$(http_code PUT "$BASE/admin/workspaces/not-a-uuid/llm-billing-mode" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{"mode":"byok"}')"
|
||||
# Failure: missing 'mode' key → 400.
|
||||
assert_contains "PUT llm-billing-mode (missing mode → 400)" "400" \
|
||||
"$(http_code PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{}')"
|
||||
# Failure: unknown mode string → 400.
|
||||
assert_contains "PUT llm-billing-mode (unknown mode → 400)" "400" \
|
||||
"$(http_code PUT "$BASE/admin/workspaces/$WS_ID/llm-billing-mode" -H 'Content-Type: application/json' "${ADMIN_AUTH[@]}" -d '{"mode":"bogus-mode"}')"
|
||||
|
||||
# ===========================================================================
|
||||
# 10. Lifecycle — Pause → Resume + Hibernate (wsAuth)
|
||||
# Pause works backend-agnostically (StopWorkspaceAuto no-ops on no backend)
|
||||
# → status=paused. Resume re-provisions: 200 provisioning when a provisioner
|
||||
# is wired (the e2e-api host has Docker), or 503 provisioner-not-available
|
||||
# otherwise — both are valid contracts, so accept either. Failure modes:
|
||||
# resume a non-paused ws → 404; hibernate a non-online ws → 404.
|
||||
# ===========================================================================
|
||||
echo "--- lifecycle (resume / hibernate) ---"
|
||||
# Pause the (online) fixture → status paused.
|
||||
PA=$(curl -s -X POST "$BASE/workspaces/$WS_ID/pause" "${AUTH[@]}")
|
||||
assert_contains "POST /pause (online → paused)" '"status":"paused"' "$PA"
|
||||
# Resume the paused fixture — accept 200 provisioning OR 503 (no provisioner).
|
||||
BC=$(body_and_code POST "$BASE/workspaces/$WS_ID/resume" "${AUTH[@]}")
|
||||
RSM_CODE=$(printf '%s' "$BC" | tail -n1)
|
||||
RSM_BODY=$(printf '%s' "$BC" | sed '$d')
|
||||
if [ "$RSM_CODE" = "200" ]; then
|
||||
assert_contains "POST /resume (paused → provisioning)" '"status":"provisioning"' "$RSM_BODY"
|
||||
elif [ "$RSM_CODE" = "503" ]; then
|
||||
assert_contains "POST /resume (no provisioner → 503 contract)" 'provisioner not available' "$RSM_BODY"
|
||||
else
|
||||
fail "POST /resume (expected 200 or 503)" "got HTTP $RSM_CODE — $RSM_BODY"
|
||||
fi
|
||||
# Failure: resume a workspace that is NOT paused → 404.
|
||||
# (After the resume above it is provisioning/online, not paused.)
|
||||
assert_contains "POST /resume (not-paused → 404)" "404" \
|
||||
"$(http_code POST "$BASE/workspaces/$WS_ID/resume" "${AUTH[@]}")"
|
||||
# Hibernate: bring the fixture back online first, then hibernate it.
|
||||
curl -s -X POST "$BASE/registry/register" -H "Content-Type: application/json" "${AUTH[@]}" \
|
||||
-d "{\"id\":\"$WS_ID\",\"url\":\"https://example.com/keyless\",\"agent_card\":{\"name\":\"Keyless Fixture\",\"skills\":[{\"id\":\"noop\",\"name\":\"Noop\"}]}}" >/dev/null
|
||||
HB=$(curl -s -X POST "$BASE/workspaces/$WS_ID/hibernate" "${AUTH[@]}")
|
||||
assert_contains "POST /hibernate (online → hibernated)" '"status":"hibernated"' "$HB"
|
||||
# Failure: hibernate again (now hibernated, not online/degraded) → 404.
|
||||
assert_contains "POST /hibernate (not-hibernatable → 404)" "404" \
|
||||
"$(http_code POST "$BASE/workspaces/$WS_ID/hibernate" "${AUTH[@]}")"
|
||||
# Failure: no bearer → 401.
|
||||
assert_contains "POST /resume (no auth → 401)" "401" "$(http_code POST "$BASE/workspaces/$WS_ID/resume")"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cleanup — delete the fixture (admin-gated DELETE + per-workspace bearer).
|
||||
# ---------------------------------------------------------------------------
|
||||
e2e_delete_workspace "$WS_ID" "Keyless Fixture" "${ADMIN_AUTH[@]}"
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -48,14 +48,42 @@ run_test "hermes → slash-form (derive-provider.sh contract)" hermes
|
||||
run_test "codex → slash-form fallback" codex "openai/gpt-4o"
|
||||
run_test "claude-code → OAuth/default alias" claude-code "sonnet"
|
||||
|
||||
# BARE registered BYOK id (registry_gen.go:88), NOT colon `minimax:…`. On
|
||||
# claude-code the colon form is intentionally UNREGISTERED (the adapter can't
|
||||
# strip `minimax:`) and 422s create-validation (internal#718, job 295075);
|
||||
# bare resolves to provider=minimax BYOK. Pinned by the matrix test's
|
||||
# colon-vs-slash-vs-bare triple in derive_provider_matrix_test.go.
|
||||
got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + MiniMax key → MiniMax model" "$got" "minimax:MiniMax-M2.7"
|
||||
assert_eq "claude-code + MiniMax key → bare registered MiniMax model" "$got" "MiniMax-M2.7"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + Anthropic API key → Anthropic API model" "$got" "claude-sonnet-4-6"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG; E2E_MINIMAX_API_KEY="mx-priority" E2E_ANTHROPIC_API_KEY="sk-ant-loser" pick_model_slug claude-code)
|
||||
assert_eq "claude-code + both keys → MiniMax priority" "$got" "minimax:MiniMax-M2.7"
|
||||
assert_eq "claude-code + both keys → MiniMax priority (bare)" "$got" "MiniMax-M2.7"
|
||||
|
||||
# ── seo-agent (claude-code-adapter template variant) ──
|
||||
# seo-agent shares the claude-code dispatch branch (it reuses the claude-code
|
||||
# adapter + the same copied providers block). Pin that it resolves IDENTICALLY
|
||||
# to claude-code for every key path so a future refactor can't accidentally
|
||||
# fork seo-agent's model selection from claude-code's.
|
||||
run_test "seo-agent → claude-code default alias" seo-agent "sonnet"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_ANTHROPIC_API_KEY; E2E_MINIMAX_API_KEY="mx-test" pick_model_slug seo-agent)
|
||||
assert_eq "seo-agent + MiniMax key → bare MiniMax model (==claude-code)" "$got" "MiniMax-M2.7"
|
||||
|
||||
got=$(unset E2E_MODEL_SLUG E2E_MINIMAX_API_KEY; E2E_ANTHROPIC_API_KEY="sk-ant-test" pick_model_slug seo-agent)
|
||||
assert_eq "seo-agent + Anthropic key → Anthropic model (==claude-code)" "$got" "claude-sonnet-4-6"
|
||||
|
||||
# ── google-adk (Gemini) ──
|
||||
# AI-Studio BYOK arm → bare gemini-2.5-pro (providers.yaml runtimes.google-adk
|
||||
# `google` arm). The platform/Vertex arm is selected via E2E_LLM_PATH=platform
|
||||
# (a platform: id), not this dispatch. Pin the bare form so a drift to the
|
||||
# platform id (which would change billing/route) is caught.
|
||||
run_test "google-adk → AI-Studio bare gemini id" google-adk "gemini-2.5-pro"
|
||||
|
||||
got=$(E2E_MODEL_SLUG="google_genai:gemini-2.5-pro" pick_model_slug google-adk)
|
||||
assert_eq "google-adk + E2E_MODEL_SLUG override (adapter spelling)" "$got" "google_genai:gemini-2.5-pro"
|
||||
|
||||
# ── Fallback for unknown runtime ──
|
||||
# Picks slash-form (hermes-shaped) since hermes is the historical
|
||||
|
||||
@@ -28,6 +28,13 @@ PASS=0
|
||||
FAIL=0
|
||||
WSID=""
|
||||
|
||||
# GET /workspaces (list) and POST /workspaces (create) are AdminAuth-gated
|
||||
# (router.go:165-166). The e2e-api CI job sets ADMIN_TOKEN on the platform
|
||||
# (fail-open OFF) and exports MOLECULE_ADMIN_TOKEN here, so these calls need the
|
||||
# admin bearer. Guarded if-set so a fail-open dev platform still works.
|
||||
ADMIN_AUTH=()
|
||||
e2e_admin_auth_args ADMIN_AUTH
|
||||
|
||||
cleanup() {
|
||||
# Workspace teardown — best-effort, ignore errors so an unrelated CP
|
||||
# outage doesn't shadow a real test failure.
|
||||
@@ -80,7 +87,7 @@ echo "=== Setup ==="
|
||||
# canvas. Find and delete any with this exact name so the test is safe to
|
||||
# re-run from any state. Match by name (not tag) so this also catches
|
||||
# leftovers created by older script versions.
|
||||
PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
|
||||
PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
|
||||
import json, sys
|
||||
try:
|
||||
print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name") == "Notify E2E"))
|
||||
@@ -96,7 +103,7 @@ done
|
||||
# feedback_workspace_model_required_no_platform_default_dynamic_credential_intake).
|
||||
# Body has no runtime → defaults to claude-code; pass the matching model
|
||||
# that the workspace-creation contract now requires.
|
||||
R=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
R=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d '{"name":"Notify E2E","tier":1,"runtime":"external","external":true,"model":"sonnet"}')
|
||||
WSID=$(echo "$R" | python3 -c 'import json,sys;print(json.load(sys.stdin)["id"])' 2>/dev/null || true)
|
||||
[ -n "$WSID" ] || { echo "Failed to create workspace: $R"; exit 1; }
|
||||
|
||||
@@ -300,7 +300,14 @@ rows = json.load(sys.stdin)
|
||||
def text_of(r):
|
||||
body = r.get('request_body') or {}
|
||||
parts = (body.get('params') or {}).get('message', {}).get('parts') or []
|
||||
return ''.join(p.get('text','') for p in parts if p.get('type')=='text')
|
||||
# A2A v0.3 keys the Part discriminator on 'kind'; legacy senders used
|
||||
# 'type'. ProxyA2A.normalizeA2APayload (#2251) rewrites 'type' -> 'kind'
|
||||
# on ingest, so the stored request_body carries 'kind' even when the
|
||||
# caller posted 'type'. Accept EITHER so this parser asserts on the text
|
||||
# payload, not on which discriminator field the server happened to store.
|
||||
def is_text(p):
|
||||
return p.get('kind') == 'text' or p.get('type') == 'text'
|
||||
return ''.join(p.get('text', '') for p in parts if is_text(p))
|
||||
if len(rows) < 2:
|
||||
print('NEED2_GOT_'+str(len(rows)))
|
||||
else:
|
||||
@@ -309,6 +316,29 @@ else:
|
||||
check_eq "since_id feed orders ASC (oldest-new first, newest-new last)" \
|
||||
"hello-from-e2e-2|hello-from-e2e-3" "$ASC_FIRST"
|
||||
|
||||
# Wire-contract gate (#2251): the caller posted parts with the LEGACY "type"
|
||||
# discriminator, but ProxyA2A.normalizeA2APayload rewrites "type" -> "kind"
|
||||
# (A2A v0.3) BEFORE the row is durably logged. Assert the stored request_body
|
||||
# carries "kind" and no longer carries "type", so a regression that drops the
|
||||
# rename — or a feed that stops storing the normalized body — fails loudly here
|
||||
# instead of silently feeding the polling agent an untagged Part. This is the
|
||||
# end-to-end half of the Go unit tests in a2a_proxy_test.go (which assert the
|
||||
# rename in isolation); this proves it survives the durable activity_logs path.
|
||||
DISC=$(echo "$ASC_RESP" | python3 -c "
|
||||
import json, sys
|
||||
rows = json.load(sys.stdin)
|
||||
kinds, types = [], []
|
||||
for r in rows:
|
||||
body = r.get('request_body') or {}
|
||||
parts = (body.get('params') or {}).get('message', {}).get('parts') or []
|
||||
for p in parts:
|
||||
if 'kind' in p: kinds.append(p['kind'])
|
||||
if 'type' in p: types.append(p['type'])
|
||||
print(('kind' if kinds and not types else 'BAD') + ':' + ','.join(kinds) + '/' + ','.join(types))
|
||||
")
|
||||
check_eq "stored Part uses v0.3 'kind' discriminator, never legacy 'type' (#2251)" \
|
||||
"kind:text,text/" "$DISC"
|
||||
|
||||
# ---------- Phase 6: stale cursor returns 410 ----------
|
||||
echo ""
|
||||
echo "--- Phase 6: Stale / unknown cursor returns 410 ---"
|
||||
|
||||
@@ -7,12 +7,14 @@
|
||||
# extraction (and ongoing template work) can't silently break any
|
||||
# runtime.
|
||||
#
|
||||
# Runtimes covered: claude-code, codex, hermes, openclaw.
|
||||
# Runtimes covered: claude-code, codex, hermes, openclaw, google-adk.
|
||||
# claude-code + hermes have unique
|
||||
# provisioning quirks (claude-code OAuth, hermes 15-min cold-boot)
|
||||
# and stay first-class with their own run_<runtime> functions; the
|
||||
# OpenAI-backed runtimes share run_openai_runtime. Each phase skips cleanly
|
||||
# if its prerequisite secret is missing.
|
||||
# OpenAI-backed runtimes share run_openai_runtime. google-adk has its own
|
||||
# run_google_adk (it asserts manifest registration unconditionally, then drives
|
||||
# its AI-Studio BYOK live arm — keyless-Vertex needs platform WIF CI lacks).
|
||||
# Each phase skips cleanly if its prerequisite secret is missing.
|
||||
#
|
||||
# What this proves:
|
||||
# 1. Provisioning + container boot works for each runtime.
|
||||
@@ -24,13 +26,76 @@
|
||||
# Each phase skips cleanly when its prerequisite secret is absent so a
|
||||
# partially-keyed env (e.g. CI without an OpenAI key) doesn't false-fail.
|
||||
#
|
||||
# REQUIRE-LIVE (false-green guard, mirrors CP serving-e2e's
|
||||
# SERVING_E2E_REQUIRE_LIVE semantics)
|
||||
# ------------------------------------------------------------------
|
||||
# Without a guard, an env with NO live secrets makes every phase SKIP,
|
||||
# leaving PASS=0 FAIL=0 — and the historical `[ "$FAIL" -eq 0 ]` gate
|
||||
# exits 0 (GREEN) while validating ZERO runtimes. That made the REQUIRED
|
||||
# `E2E API Smoke Test` merge gate pass without exercising a single
|
||||
# runtime (false-green).
|
||||
#
|
||||
# Fix: a real "validated arm" counter (VALIDATED) tracks runtimes that
|
||||
# actually ran AND produced a non-error A2A reply. With E2E_REQUIRE_LIVE=1:
|
||||
# if zero arms validated, the run exits NON-zero with a loud message.
|
||||
# Without it (E2E_REQUIRE_LIVE unset/0), a fully-skipped run stays a LOUD
|
||||
# skip + exit 0 for dev convenience.
|
||||
#
|
||||
# This zero-validated→RED decision is the load-bearing logic. It is factored
|
||||
# into evaluate_require_live_gate() (a pure function of $FAIL/$VALIDATED/
|
||||
# $E2E_REQUIRE_LIVE, defined before any platform I/O) and is REGRESSION-GATED
|
||||
# on every PR by tests/e2e/test_require_live_priority_gate_unit.sh, which
|
||||
# sources this file (E2E_PRIORITY_UNIT_SOURCE=1), sets the counters, and
|
||||
# asserts the gate's exit code — no platform, no provisioning, no network.
|
||||
# So the false-green can't silently come back: a revert of the guard fails CI.
|
||||
#
|
||||
# CI POSTURE (REQUIRE-LIVE ON — see .gitea/workflows/e2e-api.yml):
|
||||
# The live e2e-api job SETS E2E_REQUIRE_LIVE=1. The `mock` arm is the
|
||||
# CI-provisionable live-completion arm: it org-imports a mock workspace
|
||||
# (→online→canned A2A reply) with NO external secret. The only thing that
|
||||
# previously blocked it in CI was admin auth — POST /org/import and POST
|
||||
# /admin/workspaces/:id/tokens are AdminAuth-gated, and the job set no admin
|
||||
# token, so every admin call 401'd ("admin auth required"). The job now sets
|
||||
# ADMIN_TOKEN on the platform AND exports the matching MOLECULE_ADMIN_TOKEN
|
||||
# the scripts send, so mock validates end-to-end and VALIDATED>=1 holds on a
|
||||
# healthy platform — the REQUIRED `E2E API Smoke Test` gate now HONESTLY
|
||||
# validates a runtime. If the mock plumbing or the admin-auth wiring breaks,
|
||||
# the gate goes RED (not false-green). The zero-validated→RED decision is also
|
||||
# regression-gated WITHOUT provisioning by the bash unit test above, so a
|
||||
# revert of that logic still fails CI.
|
||||
#
|
||||
# LIVE ARMS (run when their prerequisite is present; opportunistic):
|
||||
# - `mock` (run_mock) is the no-key REQUIRE-LIVE backbone: a virtual
|
||||
# workspace (no container, no EC2, no provider) whose org-import path
|
||||
# short-circuits to status='online' with a canned A2A reply. It validates
|
||||
# in CI now that the e2e-api job wires an admin token (org-import + token
|
||||
# mint are AdminAuth-gated), so it is the guaranteed >=1 validation.
|
||||
# - MiniMax (E2E_MINIMAX_API_KEY, from MOLECULE_STAGING_MINIMAX_API_KEY) is
|
||||
# an OPPORTUNISTIC best-effort real-LLM arm: registry-fragile in CI (422
|
||||
# UNREGISTERED_MODEL_FOR_RUNTIME — see run_minimax header), so a miss is
|
||||
# a best-effort MISS via bestfail() and does NOT red the gate.
|
||||
# The CI e2e-api job sets E2E_REQUIRE_LIVE=1: mock guarantees a validation, so
|
||||
# the REQUIRED gate is honest (RED if the mock plumbing/admin-auth breaks). The
|
||||
# zero-validated→RED logic is also regression-gated by the bash unit test above.
|
||||
#
|
||||
# Usage:
|
||||
# # Enforce REQUIRE-LIVE locally (need >=1 arm to actually validate):
|
||||
# E2E_REQUIRE_LIVE=1 E2E_MINIMAX_API_KEY=... \
|
||||
# tests/e2e/test_priority_runtimes_e2e.sh
|
||||
#
|
||||
# # Default (no enforcement): all-skip stays a LOUD skip + exit 0:
|
||||
# tests/e2e/test_priority_runtimes_e2e.sh
|
||||
#
|
||||
# # Other live arms (if their secrets are configured):
|
||||
# CLAUDE_CODE_OAUTH_TOKEN=... E2E_OPENAI_API_KEY=... \
|
||||
# tests/e2e/test_priority_runtimes_e2e.sh
|
||||
#
|
||||
# # Run only one runtime
|
||||
# E2E_RUNTIMES=mock tests/e2e/test_priority_runtimes_e2e.sh
|
||||
# E2E_RUNTIMES=minimax tests/e2e/test_priority_runtimes_e2e.sh
|
||||
# E2E_RUNTIMES=claude-code tests/e2e/test_priority_runtimes_e2e.sh
|
||||
# E2E_RUNTIMES=hermes tests/e2e/test_priority_runtimes_e2e.sh
|
||||
# E2E_RUNTIMES=google-adk tests/e2e/test_priority_runtimes_e2e.sh # registration always; live arm needs E2E_GOOGLE_API_KEY
|
||||
#
|
||||
# Prereqs:
|
||||
# - workspace-server on http://localhost:8080
|
||||
@@ -41,13 +106,81 @@
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
SKIP=0
|
||||
# VALIDATED counts runtimes that ACTUALLY ran end-to-end (provisioned,
|
||||
# reached online, AND returned a non-error A2A reply). Distinct from PASS,
|
||||
# which also counts sub-assertions like activity-log rows. This is the
|
||||
# signal the REQUIRE-LIVE gate keys off: VALIDATED==0 means we proved
|
||||
# nothing about any runtime, regardless of how many sub-asserts "passed".
|
||||
VALIDATED=0
|
||||
CREATED_WSIDS=()
|
||||
|
||||
# evaluate_require_live_gate — the SINGLE source of the final exit decision.
|
||||
# Pure function of $FAIL, $VALIDATED, and $E2E_REQUIRE_LIVE; performs NO I/O
|
||||
# beyond the loud messages. Returns the exit code the script should exit with:
|
||||
# - FAIL>0 → 1 (a real failure is always red)
|
||||
# - VALIDATED==0 + REQUIRE_LIVE → 1 (false-green trap: proved nothing → RED)
|
||||
# - VALIDATED==0 + !REQUIRE_LIVE → 0 (dev-convenience LOUD skip)
|
||||
# - VALIDATED>=1 → 0 (at least one arm validated end-to-end)
|
||||
# It is a function (not inline tail code) so test_require_live_priority_gate_unit.sh
|
||||
# can drive the REAL decision in isolation — set the counters, call this, assert
|
||||
# the return code — with no platform, no provisioning, no network. That makes the
|
||||
# zero-validated→RED logic a CI-gated regression contract: a future revert of it
|
||||
# fails the unit test on every PR. See that unit test for the fail-direction proof.
|
||||
evaluate_require_live_gate() {
|
||||
# Any real failure is always red.
|
||||
if [ "$FAIL" -ne 0 ]; then
|
||||
return 1
|
||||
fi
|
||||
|
||||
# REQUIRE-LIVE gate (mirrors CP serving-e2e SERVING_E2E_REQUIRE_LIVE).
|
||||
# A run where every runtime SKIPPED proves nothing. In enforced mode
|
||||
# (E2E_REQUIRE_LIVE=1) that MUST be red so the required `E2E API Smoke
|
||||
# Test` gate can't be false-green on an all-skip run.
|
||||
local require_live="${E2E_REQUIRE_LIVE:-0}"
|
||||
if [ "$VALIDATED" -eq 0 ]; then
|
||||
if [ "$require_live" = "1" ] || [ "$require_live" = "true" ]; then
|
||||
echo "::error::E2E_REQUIRE_LIVE is set but ZERO runtimes were validated end-to-end." >&2
|
||||
echo " Every runtime SKIPPED — no live secret was present, so this gate" >&2
|
||||
echo " validated nothing. Wire at least one live arm via Gitea secrets" >&2
|
||||
echo " (E2E_MINIMAX_API_KEY ← MOLECULE_STAGING_MINIMAX_API_KEY is the" >&2
|
||||
echo " default CI arm; CLAUDE_CODE_OAUTH_TOKEN / E2E_OPENAI_API_KEY also" >&2
|
||||
echo " work) so >=1 runtime actually provisions + replies. Failing RED" >&2
|
||||
echo " instead of false-green." >&2
|
||||
return 1
|
||||
fi
|
||||
# Dev convenience: no enforcement requested → loud skip, exit 0.
|
||||
echo "SKIPPED: no live secrets present and E2E_REQUIRE_LIVE is not set — validated" >&2
|
||||
echo " zero runtimes. This is a dev-convenience pass; CI sets" >&2
|
||||
echo " E2E_REQUIRE_LIVE=1 to make zero-validated a hard failure." >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "OK: $VALIDATED runtime(s) validated end-to-end."
|
||||
return 0
|
||||
}
|
||||
|
||||
# Source-guard: when sourced by the unit test (E2E_PRIORITY_UNIT_SOURCE=1) we
|
||||
# stop HERE — the counters + evaluate_require_live_gate are now defined, and we
|
||||
# must NOT fall through to _lib.sh's platform-dependent helpers or the live
|
||||
# pre-sweep curl below (there is no platform in the unit-test environment).
|
||||
if [ "${E2E_PRIORITY_UNIT_SOURCE:-0}" = "1" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
source "$(dirname "$0")/_lib.sh"
|
||||
|
||||
# GET /workspaces (list, router.go:165) and POST /workspaces (create,
|
||||
# router.go:166) are AdminAuth-gated. The e2e-api CI job sets ADMIN_TOKEN on the
|
||||
# platform (fail-open OFF) and exports MOLECULE_ADMIN_TOKEN here, so the
|
||||
# pre-sweep list and every runtime-create must send the admin bearer or they
|
||||
# 401. run_mock uses POST /org/import (also admin-gated) and wires its own admin
|
||||
# auth inline. Guarded if-set so a fail-open dev platform still works.
|
||||
ADMIN_AUTH=()
|
||||
e2e_admin_auth_args ADMIN_AUTH
|
||||
|
||||
cleanup() {
|
||||
# `set -u` + empty array would error on "${CREATED_WSIDS[@]}"; the
|
||||
# ${VAR[@]+"…"} form expands to nothing when the array is unset/empty
|
||||
@@ -58,14 +191,26 @@ cleanup() {
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
pass() { echo " PASS — $1"; PASS=$((PASS + 1)); }
|
||||
fail() { echo " FAIL — $1"; echo " $2"; FAIL=$((FAIL + 1)); }
|
||||
skip() { echo " SKIP — $1"; SKIP=$((SKIP + 1)); }
|
||||
pass() { echo " PASS — $1"; PASS=$((PASS + 1)); }
|
||||
fail() { echo " FAIL — $1"; echo " $2"; FAIL=$((FAIL + 1)); }
|
||||
skip() { echo " SKIP — $1"; SKIP=$((SKIP + 1)); }
|
||||
# Mark a runtime as having been validated end-to-end (online + non-error
|
||||
# A2A reply). Also emits a PASS line so it shows in the results tally.
|
||||
validated() { echo " PASS — $1"; PASS=$((PASS + 1)); VALIDATED=$((VALIDATED + 1)); }
|
||||
# bestfail() is for OPPORTUNISTIC (best-effort) arms whose failure must
|
||||
# NOT red the gate. It does NOT increment FAIL — it only logs + bumps
|
||||
# SKIP so the tally stays honest ("we tried, it didn't validate, but it
|
||||
# was never load-bearing"). Used by the MiniMax arm: MiniMax-create is
|
||||
# fragile in CI (registry-skewed model id, BYOK plumbing — see core#2263
|
||||
# and the run_minimax header), so a MiniMax miss is reported but never
|
||||
# fails the REQUIRED gate. The mock arm is the load-bearing validation
|
||||
# that keeps the gate honest; MiniMax is the real-LLM bonus on top.
|
||||
bestfail() { echo " BEST-EFFORT MISS — $1"; echo " $2"; SKIP=$((SKIP + 1)); }
|
||||
|
||||
# Pre-sweep any prior runs that left workspaces behind (same defence as
|
||||
# test_notify_attachments_e2e.sh: trap fires on normal exit, but a
|
||||
# SIGPIPE / kill -9 can bypass it).
|
||||
PRIOR=$(curl -s "$BASE/workspaces" | python3 -c '
|
||||
PRIOR=$(curl -s "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} | python3 -c '
|
||||
import json, sys
|
||||
try:
|
||||
print(" ".join(w["id"] for w in json.load(sys.stdin) if w.get("name","").startswith("Priority E2E ")))
|
||||
@@ -188,7 +333,7 @@ print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN
|
||||
")
|
||||
local resp wsid
|
||||
# model required (CTO 2026-05-22 SSOT) — pass the deleted DefaultModel("claude-code") value.
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"Priority E2E (claude-code)\",\"runtime\":\"claude-code\",\"model\":\"sonnet\",\"tier\":1,\"secrets\":$secrets}")
|
||||
wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
|
||||
if [ -z "$wsid" ]; then
|
||||
@@ -220,9 +365,9 @@ print(json.dumps({'CLAUDE_CODE_OAUTH_TOKEN': os.environ['CLAUDE_CODE_OAUTH_TOKEN
|
||||
local reply
|
||||
if reply=$(send_test_prompt "$wsid" "$token"); then
|
||||
if echo "$reply" | grep -q "PONG"; then
|
||||
pass "claude-code reply contains PONG"
|
||||
validated "claude-code reply contains PONG"
|
||||
else
|
||||
pass "claude-code reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
validated "claude-code reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
fi
|
||||
assert_activity_logged "claude-code" "$wsid" "$token"
|
||||
else
|
||||
@@ -254,7 +399,7 @@ print(json.dumps({
|
||||
}))
|
||||
")
|
||||
local resp wsid
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"Priority E2E (hermes)\",\"runtime\":\"hermes\",\"tier\":1,\"model\":\"openai/gpt-4o\",\"secrets\":$secrets}")
|
||||
wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
|
||||
if [ -z "$wsid" ]; then
|
||||
@@ -288,9 +433,9 @@ print(json.dumps({
|
||||
local reply
|
||||
if reply=$(send_test_prompt "$wsid" "$token"); then
|
||||
if echo "$reply" | grep -q "PONG"; then
|
||||
pass "hermes reply contains PONG"
|
||||
validated "hermes reply contains PONG"
|
||||
else
|
||||
pass "hermes reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
validated "hermes reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
fi
|
||||
assert_activity_logged "hermes" "$wsid" "$token"
|
||||
else
|
||||
@@ -327,7 +472,7 @@ print(json.dumps({
|
||||
}))
|
||||
")
|
||||
local resp wsid
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" -H "Content-Type: application/json" \
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"Priority E2E ($runtime)\",\"runtime\":\"$runtime\",\"tier\":1,\"model\":\"openai/gpt-4o-mini\",\"secrets\":$secrets}")
|
||||
wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
|
||||
if [ -z "$wsid" ]; then
|
||||
@@ -358,9 +503,9 @@ print(json.dumps({
|
||||
local reply
|
||||
if reply=$(send_test_prompt "$wsid" "$token"); then
|
||||
if echo "$reply" | grep -q "PONG"; then
|
||||
pass "$runtime reply contains PONG"
|
||||
validated "$runtime reply contains PONG"
|
||||
else
|
||||
pass "$runtime reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
validated "$runtime reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
fi
|
||||
assert_activity_logged "$runtime" "$wsid" "$token"
|
||||
else
|
||||
@@ -371,18 +516,387 @@ print(json.dumps({
|
||||
run_codex() { run_openai_runtime "codex" "codex"; }
|
||||
run_openclaw() { run_openai_runtime "openclaw" "openclaw"; }
|
||||
|
||||
WANT="${E2E_RUNTIMES:-claude-code codex hermes openclaw}"
|
||||
####################################################################
|
||||
# google-adk arm — Gemini. REGISTRATION asserted always; LIVE arm is
|
||||
# REQUIRED-when-keyed, LOUD-skip-when-absent (NEVER best-effort/fail-open).
|
||||
####################################################################
|
||||
# google-adk serves Gemini two ways (providers.yaml runtimes.google-adk):
|
||||
# * platform arm → keyless Vertex via the Molecule LLM proxy (server-side
|
||||
# WIF mint, platform_managed billing — the org-default PROD path). It needs
|
||||
# a platform WIF identity that CI does NOT have, so this arm does NOT drive
|
||||
# the keyless-Vertex path (no fail-open arm — we never green a path we can't
|
||||
# actually exercise).
|
||||
# * google arm → AI Studio API-key BYOK (the tenant's OWN GOOGLE/GEMINI
|
||||
# key), bare `gemini-2.5-pro`. This is the CI-/staging-exercisable path and
|
||||
# is what the LIVE portion below drives when E2E_GOOGLE_API_KEY is present.
|
||||
#
|
||||
# Two-part contract (core#2332 P0.1 — google-adk previously had ZERO e2e):
|
||||
# 1. REGISTRATION (always, NO live creds): google-adk MUST be present in the
|
||||
# deployed manifest.json's workspace_templates — that file is the SSOT the
|
||||
# Create-handler's runtime allowlist is derived from (runtime_registry.go::
|
||||
# loadRuntimesFromManifest). If it is absent, a google-adk create 422s
|
||||
# RUNTIME_UNSUPPORTED, so registration is the precondition for ANY serving.
|
||||
# Asserting it offline means even a key-less CI run proves google-adk is
|
||||
# registered (a regression that drops it from the manifest reds the gate).
|
||||
# This does NOT bump VALIDATED — registration is not end-to-end serving.
|
||||
# 2. LIVE (REQUIRED-when-keyed): with E2E_GOOGLE_API_KEY set, provision the
|
||||
# AI-Studio BYOK arm end-to-end (online + non-error A2A reply). A miss here
|
||||
# is a HARD fail() (fail-closed-if-present), exactly like the claude-code /
|
||||
# hermes / openai arms — NOT a best-effort miss. Without the key the live
|
||||
# portion is a LOUD skip() (dev-convenience), same as every keyed arm.
|
||||
run_google_adk() {
|
||||
echo ""
|
||||
echo "=== google-adk (Gemini) — registration + AI-Studio BYOK happy path ==="
|
||||
|
||||
# ── Part 1: REGISTRATION (always; no live creds needed) ──────────────────
|
||||
# Assert google-adk is in the manifest.json workspace_templates SSOT (the
|
||||
# Create-handler allowlist source). WORKSPACE_MANIFEST_PATH override mirrors
|
||||
# the server's own env (runtime_registry.go::manifestPath); otherwise resolve
|
||||
# the monorepo-root manifest.json relative to this script (tests/e2e/ -> repo
|
||||
# root is two levels up).
|
||||
local manifest="${WORKSPACE_MANIFEST_PATH:-$(cd "$(dirname "$0")/../.." && pwd)/manifest.json}"
|
||||
if [ ! -f "$manifest" ]; then
|
||||
fail "google-adk registration" "manifest.json not found at $manifest (cannot verify the runtime allowlist SSOT)"
|
||||
return 0
|
||||
fi
|
||||
local registered
|
||||
registered=$(python3 -c '
|
||||
import json, sys
|
||||
try:
|
||||
m = json.load(open(sys.argv[1]))
|
||||
except Exception as e:
|
||||
print("ERR:%s" % e); sys.exit(0)
|
||||
names = [t.get("name") for t in m.get("workspace_templates", [])]
|
||||
# loadRuntimesFromManifest strips the "-default" vanilla suffix; match the same.
|
||||
norm = {n[:-len("-default")] if isinstance(n, str) and n.endswith("-default") else n for n in names}
|
||||
print("yes" if "google-adk" in norm else "no:%s" % sorted(n for n in norm if n))
|
||||
' "$manifest")
|
||||
if [ "$registered" != "yes" ]; then
|
||||
fail "google-adk registered in manifest.json workspace_templates" \
|
||||
"google-adk absent from the Create-handler runtime allowlist SSOT ($registered) — a create would 422 RUNTIME_UNSUPPORTED"
|
||||
return 0
|
||||
fi
|
||||
pass "google-adk registered in manifest.json workspace_templates (Create-handler allowlist SSOT)"
|
||||
|
||||
# ── Part 2: LIVE arm (REQUIRED-when-keyed, LOUD-skip-when-absent) ─────────
|
||||
# AI-Studio BYOK path: the tenant's own GOOGLE_API_KEY/GEMINI_API_KEY. The
|
||||
# keyless-Vertex PROD path needs a platform WIF identity CI lacks, so it is
|
||||
# NOT exercised here (no fail-open arm). Same env name the staging-full-saas
|
||||
# google-adk arm uses (E2E_GOOGLE_API_KEY).
|
||||
if [ -z "${E2E_GOOGLE_API_KEY:-}" ]; then
|
||||
skip "E2E_GOOGLE_API_KEY not set (google-adk live arm needs an AI-Studio Gemini key; keyless-Vertex needs platform WIF, not available in CI)"
|
||||
return 0
|
||||
fi
|
||||
local secrets
|
||||
secrets=$(python3 -c "
|
||||
import json, os
|
||||
# The google provider (providers.yaml) reads GEMINI_API_KEY / GOOGLE_API_KEY and
|
||||
# dials generativelanguage.googleapis.com with the tenant's OWN key. Inject under
|
||||
# both names the provider accepts so the adapter resolves regardless of order.
|
||||
k = os.environ['E2E_GOOGLE_API_KEY']
|
||||
print(json.dumps({'GOOGLE_API_KEY': k, 'GEMINI_API_KEY': k}))
|
||||
")
|
||||
local resp wsid
|
||||
# Bare `gemini-2.5-pro` is the registered AI-Studio BYOK id for google-adk
|
||||
# (providers.yaml runtimes.google-adk `google` arm). DeriveProvider routes the
|
||||
# bare gemini- id to the google vendor (third_party_anthropic_compat, BYOK).
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"Priority E2E (google-adk)\",\"runtime\":\"google-adk\",\"tier\":1,\"model\":\"gemini-2.5-pro\",\"secrets\":$secrets}")
|
||||
wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
|
||||
if [ -z "$wsid" ]; then
|
||||
fail "create google-adk workspace" "$resp"
|
||||
return 0
|
||||
fi
|
||||
CREATED_WSIDS+=("$wsid")
|
||||
echo " workspace=$wsid"
|
||||
|
||||
# google-adk runtime image cold boot ~30-90s (image already pulled).
|
||||
local final
|
||||
final=$(wait_for_status "$wsid" "online failed" 240) || true
|
||||
if [ "$final" != "online" ]; then
|
||||
fail "google-adk workspace reaches online" "final status: $final"
|
||||
return 0
|
||||
fi
|
||||
pass "google-adk workspace reaches online"
|
||||
|
||||
local token
|
||||
token=$(echo "$resp" | e2e_extract_token)
|
||||
if [ -z "$token" ]; then
|
||||
token=$(e2e_mint_workspace_token "$wsid")
|
||||
fi
|
||||
if [ -z "$token" ]; then
|
||||
fail "resolve google-adk workspace token" "no token returned"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local reply
|
||||
if reply=$(send_test_prompt "$wsid" "$token"); then
|
||||
if echo "$reply" | grep -q "PONG"; then
|
||||
validated "google-adk reply contains PONG"
|
||||
else
|
||||
validated "google-adk reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
fi
|
||||
assert_activity_logged "google-adk" "$wsid" "$token"
|
||||
else
|
||||
fail "google-adk reply" "${reply:-<empty or error>}"
|
||||
fi
|
||||
}
|
||||
|
||||
####################################################################
|
||||
# Mock arm — the GUARANTEED, always-available REQUIRE-LIVE backbone.
|
||||
####################################################################
|
||||
# The mock runtime (workspace-server/internal/handlers/mock_runtime.go)
|
||||
# is a virtual workspace: NO container, NO EC2, NO LLM key. The org-import
|
||||
# path (createWorkspaceTree, org_import.go) short-circuits a runtime=mock
|
||||
# workspace straight to status='online' (no provisioner needed), and the
|
||||
# A2A proxy (a2a_proxy.go → handleMockA2A) synthesises a deterministic
|
||||
# canned JSON-RPC reply with logActivity=true (writes the activity_logs
|
||||
# row too). That makes mock the perfect REQUIRE-LIVE backbone: it
|
||||
# exercises the SAME plumbing every real runtime needs to pass —
|
||||
# provision-decision → status=online → A2A round-trip → activity_logs —
|
||||
# without depending on any external provider key or LLM availability. It
|
||||
# is GREEN on a healthy platform and RED only if that plumbing genuinely
|
||||
# breaks (DB insert, status flip, A2A proxy, activity logging). No more
|
||||
# false-green (zero-validated is impossible when mock works), and no more
|
||||
# can't-go-green (mock needs no secret, so it always runs in CI).
|
||||
#
|
||||
# Why org-import (POST /org/import) instead of POST /workspaces:
|
||||
# The mock→online short-circuit lives ONLY in createWorkspaceTree
|
||||
# (org_import.go). The single-workspace Create handler (workspace.go)
|
||||
# has no mock branch — it routes runtime=mock through
|
||||
# provisionWorkspaceAuto, which in CI's local-build mode has no mock
|
||||
# image and would never reach online. Org-import is the supported path
|
||||
# to a live mock workspace, so the arm drives it.
|
||||
#
|
||||
# The canned reply is one of the "On it!" variants (NOT "PONG"), so this
|
||||
# arm validates on the non-empty / non-error branch — that is the real
|
||||
# contract for mock (it proves the plumbing, not an LLM's instruction-
|
||||
# following).
|
||||
run_mock() {
|
||||
echo ""
|
||||
echo "=== mock (no-key plumbing backbone) happy path ==="
|
||||
# No secret gate — mock ALWAYS runs. That is the whole point: it is the
|
||||
# required-validation arm that keeps E2E_REQUIRE_LIVE honest without a key.
|
||||
|
||||
# Inline single-workspace mock org. model is a required field on the
|
||||
# org-import contract (createWorkspaceTree fails-closed without one);
|
||||
# mock never USES the model, so any non-empty value satisfies the
|
||||
# contract. The org-import path does not run the Create handler's
|
||||
# registry model-validation, so "mock" is accepted as-is.
|
||||
# POST /org/import is AdminAuth-gated (router.go:778). When the platform has
|
||||
# ADMIN_TOKEN set (as the e2e-api CI job now does), an unauthenticated import
|
||||
# 401s with {"error":"admin auth required"}. Send the same admin bearer the
|
||||
# mint helper uses (MOLECULE_ADMIN_TOKEN, ADMIN_TOKEN fallback) — guarded so a
|
||||
# bootstrap/dev platform with no admin token (fail-open) still works.
|
||||
local admin_bearer="${MOLECULE_ADMIN_TOKEN:-${ADMIN_TOKEN:-}}"
|
||||
local admin_auth=()
|
||||
[ -n "$admin_bearer" ] && admin_auth=(-H "Authorization: Bearer $admin_bearer")
|
||||
local import_resp wsid
|
||||
import_resp=$(curl -s -X POST "$BASE/org/import" -H "Content-Type: application/json" \
|
||||
${admin_auth[@]+"${admin_auth[@]}"} \
|
||||
-d '{
|
||||
"template": {
|
||||
"name": "Priority E2E Mock Org",
|
||||
"defaults": {"runtime": "mock", "model": "mock", "tier": 1},
|
||||
"workspaces": [
|
||||
{"name": "Priority E2E (mock)", "runtime": "mock", "model": "mock", "tier": 1}
|
||||
]
|
||||
}
|
||||
}')
|
||||
# org-import returns {"org":..., "count":N, "workspaces":[{"id":...,
|
||||
# "name":...,"tier":...}, ...]} (handlers/org.go:898-901). Pull the id of
|
||||
# the single workspace we declared. (Older "results" key fallback kept for
|
||||
# forward/back compat in case the response shape is ever versioned.)
|
||||
wsid=$(echo "$import_resp" | python3 -c '
|
||||
import json, sys
|
||||
try:
|
||||
d = json.load(sys.stdin)
|
||||
except Exception:
|
||||
sys.exit(0)
|
||||
for r in (d.get("workspaces") or d.get("results") or []):
|
||||
if r.get("name") == "Priority E2E (mock)" and r.get("id"):
|
||||
print(r["id"]); break
|
||||
') || true
|
||||
if [ -z "$wsid" ]; then
|
||||
# mock org-import is the REQUIRE-LIVE backbone and is EXPECTED to succeed in
|
||||
# CI now that the e2e-api job wires an admin token (ADMIN_TOKEN on the
|
||||
# platform + MOLECULE_ADMIN_TOKEN sent above). A missing id here is a REAL
|
||||
# break (admin-auth wiring, org-import create, or the mock short-circuit) and
|
||||
# MUST red the gate — so this is a hard fail(), not a best-effort miss. Under
|
||||
# E2E_REQUIRE_LIVE=1 a FAIL also forces a non-zero exit via
|
||||
# evaluate_require_live_gate. Surface the response so the break is visible
|
||||
# (e.g. {"error":"admin auth required"} would mean the token wiring regressed).
|
||||
fail "create mock workspace (org-import)" "$import_resp"
|
||||
return 0
|
||||
fi
|
||||
CREATED_WSIDS+=("$wsid")
|
||||
echo " workspace=$wsid"
|
||||
|
||||
# Mock goes straight to online (no container boot) — a short budget is
|
||||
# plenty; if it is NOT online quickly the mock short-circuit in
|
||||
# createWorkspaceTree is genuinely broken and the gate SHOULD red.
|
||||
local final
|
||||
final=$(wait_for_status "$wsid" "online failed" 60) || true
|
||||
if [ "$final" != "online" ]; then
|
||||
fail "mock workspace reaches online" "final status: $final (mock should go online without provisioning)"
|
||||
return 0
|
||||
fi
|
||||
pass "mock workspace reaches online"
|
||||
|
||||
# Mock workspaces are not created with an inline token; mint one via the
|
||||
# admin endpoint (same fallback every other arm uses).
|
||||
local token
|
||||
token=$(e2e_mint_workspace_token "$wsid") || true
|
||||
if [ -z "$token" ]; then
|
||||
fail "resolve mock workspace token" "no token returned from POST /admin/workspaces/:id/tokens"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# A2A round-trip. The mock proxy returns a canned non-error reply (one
|
||||
# of the "On it!" variants) — NOT "PONG" — so we validate on the
|
||||
# non-empty branch. A non-error, non-empty reply means the A2A proxy
|
||||
# short-circuit + reply-shape contract are intact end-to-end.
|
||||
local reply
|
||||
if reply=$(send_test_prompt "$wsid" "$token"); then
|
||||
validated "mock reply non-empty (canned; first 80 chars: ${reply:0:80})"
|
||||
assert_activity_logged "mock" "$wsid" "$token"
|
||||
else
|
||||
fail "mock reply" "${reply:-<empty or error>} (mock A2A short-circuit should always return a canned reply)"
|
||||
fi
|
||||
}
|
||||
|
||||
####################################################################
|
||||
# MiniMax live arm — OPPORTUNISTIC (best-effort) real-LLM arm.
|
||||
####################################################################
|
||||
# NOTE: this is now a BEST-EFFORT arm, not the REQUIRE-LIVE backbone.
|
||||
# mock (run_mock above) is the guaranteed, no-key validation that keeps
|
||||
# the gate honest. This arm uses the BARE registered BYOK id `MiniMax-M2.7`
|
||||
# (NOT the colon `minimax:MiniMax-M2.7`): on claude-code the colon form is
|
||||
# INTENTIONALLY unregistered — the claude-code adapter cannot strip the
|
||||
# `minimax:` prefix, so DeriveProvider rejects it 422
|
||||
# UNREGISTERED_MODEL_FOR_RUNTIME before any provisioning (provider-registry
|
||||
# SSOT, internal#718; pinned by derive_provider_matrix_test.go's
|
||||
# colon-vs-slash-vs-bare triple, and observed on real staging job 295075).
|
||||
# The bare id is in claude-code's `minimax` arm (registry_gen.go:88
|
||||
# Models=[MiniMax-M2,MiniMax-M2.7,MiniMax-M2.7-highspeed,MiniMax-M3]) and
|
||||
# derives provider=minimax (BYOK via MINIMAX_API_KEY), so create-validation
|
||||
# accepts it. This arm stays BEST-EFFORT (bestfail, non-gating) for transient
|
||||
# MiniMax provisioning / backend issues — mock carries the REQUIRED gate; if
|
||||
# MiniMax DOES come up it validates as a bonus real-LLM check.
|
||||
# Drives the claude-code runtime against MiniMax (BYOK) using the
|
||||
# already-present Gitea secret MOLECULE_STAGING_MINIMAX_API_KEY,
|
||||
# surfaced into the env as E2E_MINIMAX_API_KEY (same name + secret the
|
||||
# staging-smoke / continuous-synth canaries use — see staging-smoke.yml
|
||||
# and continuous-synth-e2e.yml). NO new credential is introduced.
|
||||
#
|
||||
# Why this is the arm that keeps the REQUIRED gate honest:
|
||||
# - claude-code's `minimax` provider (providers.yaml / registry_gen.go)
|
||||
# is third_party_anthropic_compat: it reads MINIMAX_API_KEY at boot
|
||||
# and routes ANTHROPIC_BASE_URL → api.minimax.io/anthropic. So the
|
||||
# ONLY tenant secret needed is {"MINIMAX_API_KEY": <key>} — exactly
|
||||
# the SECRETS_JSON branch test_staging_full_saas.sh uses.
|
||||
# - Model id is the BARE `MiniMax-M2.7`, the registered BYOK arm for
|
||||
# claude-code (registry_gen.go:88 Runtimes["claude-code"]["minimax"]
|
||||
# Models). DeriveProvider routes bare → provider=minimax (BYOK). The
|
||||
# colon-namespaced `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code
|
||||
# (the adapter can't strip `minimax:`; internal#718) and 422s create —
|
||||
# it is only the correct BYOK id on openclaw/hermes, which DO strip it.
|
||||
run_minimax() {
|
||||
echo ""
|
||||
echo "=== minimax (claude-code BYOK) happy path ==="
|
||||
if [ -z "${E2E_MINIMAX_API_KEY:-}" ]; then
|
||||
skip "E2E_MINIMAX_API_KEY not set (MiniMax live arm needs the MiniMax key)"
|
||||
return 0
|
||||
fi
|
||||
local secrets
|
||||
secrets=$(python3 -c "
|
||||
import json, os
|
||||
# claude-code's minimax provider (third_party_anthropic_compat) reads
|
||||
# MINIMAX_API_KEY and points ANTHROPIC_BASE_URL at api.minimax.io/anthropic
|
||||
# at boot — so the ONLY tenant secret needed is the MiniMax key itself.
|
||||
print(json.dumps({'MINIMAX_API_KEY': os.environ['E2E_MINIMAX_API_KEY']}))
|
||||
")
|
||||
local resp wsid
|
||||
# BARE registered BYOK model id `MiniMax-M2.7` (registry_gen.go:88). The
|
||||
# colon form `minimax:MiniMax-M2.7` is UNREGISTERED on claude-code (adapter
|
||||
# can't strip `minimax:`; internal#718) and 422s create — bare derives
|
||||
# provider=minimax (BYOK via MINIMAX_API_KEY) and passes create-validation.
|
||||
resp=$(curl -s -X POST "$BASE/workspaces" ${ADMIN_AUTH[@]+"${ADMIN_AUTH[@]}"} -H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"Priority E2E (minimax)\",\"runtime\":\"claude-code\",\"model\":\"MiniMax-M2.7\",\"tier\":1,\"secrets\":$secrets}")
|
||||
wsid=$(echo "$resp" | python3 -c 'import json,sys;print(json.load(sys.stdin).get("id",""))') || true
|
||||
if [ -z "$wsid" ]; then
|
||||
# BEST-EFFORT: real MiniMax create/provision can still miss on transient
|
||||
# backend / provisioning issues (the bare model id itself is registered —
|
||||
# see header). Do NOT red the gate; mock is the required backbone. Report
|
||||
# the create response so any miss is visible.
|
||||
bestfail "create minimax workspace (best-effort; mock carries the gate)" "$resp"
|
||||
return 0
|
||||
fi
|
||||
CREATED_WSIDS+=("$wsid")
|
||||
echo " workspace=$wsid"
|
||||
|
||||
# claude-code runtime image is already pulled; cold boot ~30-90s. The
|
||||
# first MiniMax cold-call can be slow but that's covered by send_test_prompt's
|
||||
# --max-time 180.
|
||||
local final
|
||||
final=$(wait_for_status "$wsid" "online failed" 240) || true
|
||||
if [ "$final" != "online" ]; then
|
||||
bestfail "minimax workspace reaches online (best-effort)" "final status: $final"
|
||||
return 0
|
||||
fi
|
||||
pass "minimax workspace reaches online"
|
||||
|
||||
local token
|
||||
token=$(echo "$resp" | e2e_extract_token)
|
||||
if [ -z "$token" ]; then
|
||||
token=$(e2e_mint_workspace_token "$wsid")
|
||||
fi
|
||||
if [ -z "$token" ]; then
|
||||
bestfail "resolve minimax workspace token (best-effort)" "no token returned"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local reply
|
||||
if reply=$(send_test_prompt "$wsid" "$token"); then
|
||||
if echo "$reply" | grep -q "PONG"; then
|
||||
validated "minimax reply contains PONG"
|
||||
else
|
||||
validated "minimax reply non-empty (first 80 chars: ${reply:0:80})"
|
||||
fi
|
||||
assert_activity_logged "minimax" "$wsid" "$token"
|
||||
else
|
||||
bestfail "minimax reply (best-effort)" "${reply:-<empty or error>}"
|
||||
fi
|
||||
}
|
||||
|
||||
# `mock` runs FIRST and by default: it is the no-key REQUIRE-LIVE backbone
|
||||
# that guarantees >=1 validation on a healthy platform (see run_mock). The
|
||||
# real-LLM arms (claude-code/codex/hermes/openclaw/minimax/google-adk) run if
|
||||
# their secrets are present and add real-provider coverage on top; minimax is
|
||||
# best-effort (never reds the gate). google-adk ALSO asserts its registration
|
||||
# unconditionally (no key needed), then drives its AI-Studio BYOK live arm as a
|
||||
# REQUIRED-when-keyed (fail-closed-if-present), LOUD-skip-when-absent arm.
|
||||
WANT="${E2E_RUNTIMES:-mock claude-code codex hermes openclaw minimax google-adk}"
|
||||
for r in $WANT; do
|
||||
case "$r" in
|
||||
mock) run_mock ;;
|
||||
claude-code) run_claude_code ;;
|
||||
codex) run_codex ;;
|
||||
hermes) run_hermes ;;
|
||||
openclaw) run_openclaw ;;
|
||||
all) run_claude_code; run_codex; run_hermes; run_openclaw ;;
|
||||
minimax) run_minimax ;;
|
||||
google-adk) run_google_adk ;;
|
||||
all) run_mock; run_claude_code; run_codex; run_hermes; run_openclaw; run_minimax; run_google_adk ;;
|
||||
*) echo "unknown runtime in E2E_RUNTIMES: $r" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=== Results: $PASS passed, $FAIL failed, $SKIP skipped ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
echo "=== Results: $PASS passed, $FAIL failed, $SKIP skipped, $VALIDATED runtime(s) validated end-to-end ==="
|
||||
|
||||
# Final exit decision lives in evaluate_require_live_gate (defined at the top of
|
||||
# this file, before any platform I/O) so the same logic is unit-tested in
|
||||
# isolation by test_require_live_priority_gate_unit.sh. Mirror its return code
|
||||
# into the process exit code.
|
||||
evaluate_require_live_gate
|
||||
exit $?
|
||||
|
||||
@@ -389,8 +389,24 @@ INSTANCE_ID_GRACE_SECS="${E2E_INSTANCE_ID_GRACE_SECS:-45}"
|
||||
WS_LAST_STATUS=""
|
||||
while true; do
|
||||
if [ "$(date +%s)" -gt "$ONLINE_DEADLINE" ]; then
|
||||
# Boot-failure diagnostic burst (#2310-class): last_sample_error is often
|
||||
# EMPTY for a config-resolution failure (the agent never sampled — it
|
||||
# failed before its first heartbeat), so a bare "err=" tells us nothing
|
||||
# (run 223233). Surface the FULL workspace record + every plausible error
|
||||
# field so the actual reason (e.g. unservable provider, missing key, wrong
|
||||
# model arm) is visible without re-running.
|
||||
WS_LAST_ERR=$(ws_field "$WS_ID" "last_sample_error")
|
||||
fail "Workspace $WS_ID never reached status=online within ${WORKSPACE_ONLINE_TIMEOUT_SECS}s (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR)"
|
||||
log "── DIAGNOSTIC BURST (step 4 — workspace never reached online) ──"
|
||||
log " model=$MODEL_SLUG llm_path=${E2E_LLM_PATH:-platform} secrets=$([ "$SECRETS_JSON" = '{}' ] && echo '(none)' || echo '(set)')"
|
||||
for f in status last_sample_error last_error error provisioning_error instance_id instance_status; do
|
||||
log " ${f}=$(ws_field "$WS_ID" "$f")"
|
||||
done
|
||||
log " full record:"
|
||||
tenant_call GET "/workspaces/$WS_ID" 2>/dev/null \
|
||||
| python3 -m json.tool 2>/dev/null | sed 's/^/ /' \
|
||||
|| log " (could not fetch /workspaces/$WS_ID)"
|
||||
log "── END DIAGNOSTIC ──"
|
||||
fail "Workspace $WS_ID never reached status=online within ${WORKSPACE_ONLINE_TIMEOUT_SECS}s (last status=$WS_LAST_STATUS, err=$WS_LAST_ERR; see diagnostic burst above)"
|
||||
fi
|
||||
WS_STATUS=$(ws_field "$WS_ID" "status")
|
||||
if [ "$WS_STATUS" != "$WS_LAST_STATUS" ]; then
|
||||
|
||||
Executable
+124
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fail-direction / load-bearing proof for the E2E_REQUIRE_LIVE
|
||||
# fail-closed-on-skip guard in test_staging_full_saas.sh.
|
||||
#
|
||||
# WHY (harden/e2e-staging-saas-failclosed): the staging SaaS E2E is being
|
||||
# hardened to become a HARD merge-gate. A gate that can reach its final `ok`
|
||||
# WITHOUT having actually exercised a provision→online→A2A cycle is a
|
||||
# false-green — it would let a refactor that short-circuits the lifecycle
|
||||
# (or a skip path that swallows it) report PASS. require_live_or_die() is the
|
||||
# guard; this test proves it FAILS (exit 5) when milestones are missing and
|
||||
# PASSES when all fired — the watch-it-fail counterpart the dev-SOP requires.
|
||||
#
|
||||
# Runs entirely offline (no LLM, no network, no provisioning) — pure shell
|
||||
# logic — so it can run on every PR in the fast lane and locally via `bash`.
|
||||
set -uo pipefail
|
||||
|
||||
# Scratch dir for the generated guard-runner stubs. EXIT trap guarantees
|
||||
# cleanup even when an assertion exits the test non-zero (lint_cleanup_traps).
|
||||
TMPDIR_E2E=$(mktemp -d -t require-live-guard-XXXXXX)
|
||||
trap 'rm -rf "$TMPDIR_E2E"' EXIT INT TERM
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# Reproduce the EXACT guard logic from test_staging_full_saas.sh. Kept in
|
||||
# lockstep with the host script: if the host logic changes, this test must
|
||||
# change with it (and a divergence is itself a signal to re-prove the gate).
|
||||
make_guard_runner() {
|
||||
cat <<'EOF'
|
||||
REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
|
||||
LIVE_MILESTONES=""
|
||||
live_milestone() {
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $1 "*) ;;
|
||||
*) LIVE_MILESTONES="$LIVE_MILESTONES $1" ;;
|
||||
esac
|
||||
}
|
||||
require_live_or_die() {
|
||||
[ "$REQUIRE_LIVE" = "1" ] || return 0
|
||||
local required="provisioned tenant_online workspace_online a2a_roundtrip"
|
||||
local m missing=""
|
||||
for m in $required; do
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $m "*) ;;
|
||||
*) missing="$missing $m" ;;
|
||||
esac
|
||||
done
|
||||
if [ -n "$missing" ]; then
|
||||
echo "MISSING:${missing}" >&2
|
||||
exit 5
|
||||
fi
|
||||
}
|
||||
EOF
|
||||
}
|
||||
|
||||
# run_case <E2E_REQUIRE_LIVE value> <space-separated milestones to stamp>
|
||||
# echoes the observed exit code.
|
||||
run_case() {
|
||||
local require_live="$1"; shift
|
||||
local milestones="$1"; shift || true
|
||||
local stub observed m
|
||||
stub=$(mktemp "$TMPDIR_E2E/stub.XXXXXX")
|
||||
{
|
||||
echo "#!/usr/bin/env bash"
|
||||
echo "set -uo pipefail"
|
||||
make_guard_runner
|
||||
for m in $milestones; do
|
||||
echo "live_milestone $m"
|
||||
done
|
||||
echo "require_live_or_die"
|
||||
echo 'echo REACHED_END'
|
||||
} > "$stub"
|
||||
E2E_REQUIRE_LIVE="$require_live" bash "$stub" >/dev/null 2>&1
|
||||
observed=$?
|
||||
rm -f "$stub"
|
||||
echo "$observed"
|
||||
}
|
||||
|
||||
assert_rc() {
|
||||
local label="$1" require_live="$2" milestones="$3" expected="$4"
|
||||
local observed
|
||||
observed=$(run_case "$require_live" "$milestones")
|
||||
if [ "$observed" = "$expected" ]; then
|
||||
echo " ✓ $label: REQUIRE_LIVE=$require_live milestones='$milestones' → rc=$observed"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
echo " ✗ $label: REQUIRE_LIVE=$require_live milestones='$milestones' expected=$expected OBSERVED=$observed" >&2
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== E2E_REQUIRE_LIVE fail-closed-on-skip guard proof ==="
|
||||
echo
|
||||
|
||||
# DECISIVE (false-green trap): REQUIRE_LIVE=1 but NO lifecycle ran → exit 5.
|
||||
assert_rc "require-live, nothing ran → exit 5 (the false-green trap)" \
|
||||
1 "" 5
|
||||
|
||||
# REQUIRE_LIVE=1 with a partial lifecycle (provisioned but no A2A) → exit 5.
|
||||
assert_rc "require-live, partial lifecycle → exit 5" \
|
||||
1 "provisioned tenant_online workspace_online" 5
|
||||
|
||||
# REQUIRE_LIVE=1 with every required milestone → pass (rc=0).
|
||||
assert_rc "require-live, full lifecycle → pass" \
|
||||
1 "provisioned tenant_online workspace_online a2a_roundtrip" 0
|
||||
|
||||
# Idempotency: duplicate stamps don't break membership; full set still passes.
|
||||
assert_rc "require-live, duplicate stamps still pass" \
|
||||
1 "provisioned provisioned tenant_online workspace_online a2a_roundtrip a2a_roundtrip" 0
|
||||
|
||||
# Guard is a no-op when CI did not demand a live run: a non-live local run
|
||||
# with nothing stamped must NOT exit 5 (we don't break local/debug runs).
|
||||
assert_rc "no require-live, nothing ran → pass (guard is opt-in)" \
|
||||
0 "" 0
|
||||
assert_rc "require-live unset-equivalent (0), partial → pass" \
|
||||
0 "provisioned" 0
|
||||
|
||||
# Extra unknown milestone is harmless as long as required set is present.
|
||||
assert_rc "require-live, extra milestone tolerated" \
|
||||
1 "provisioned tenant_online workspace_online a2a_roundtrip extra_thing" 0
|
||||
|
||||
echo
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
+114
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fail-direction / load-bearing proof for the E2E_REQUIRE_LIVE zero-validated
|
||||
# gate in test_priority_runtimes_e2e.sh (the REQUIRED `E2E API Smoke Test`).
|
||||
#
|
||||
# WHY (harden/enforce-ci-gates-core-v2, PR #2286): the priority-runtimes E2E's
|
||||
# only historical exit gate was `[ "$FAIL" -eq 0 ]`. When every runtime SKIPs
|
||||
# because no live secret is present — exactly what the CI step did — PASS=0
|
||||
# FAIL=0 and the script exited 0 (GREEN) while validating ZERO runtimes. The
|
||||
# REQUIRED merge gate was therefore false-green: passing without exercising a
|
||||
# single runtime. The fix adds a VALIDATED counter and makes a zero-validated
|
||||
# run RED when E2E_REQUIRE_LIVE is set.
|
||||
#
|
||||
# That zero-validated→RED decision lives in evaluate_require_live_gate() in
|
||||
# test_priority_runtimes_e2e.sh. CI cannot prove it via a live arm — the CI
|
||||
# substrate can't provision ANY runtime end-to-end (MiniMax 422, mock org-
|
||||
# import create fails, claude-code needs a key CI lacks), so the live e2e-api
|
||||
# job does NOT force E2E_REQUIRE_LIVE (that would red the required gate for
|
||||
# everyone). This UNIT test is the regression coverage instead: it drives the
|
||||
# REAL evaluate_require_live_gate() function — not a copy — in isolation by
|
||||
# sourcing the script with E2E_PRIORITY_UNIT_SOURCE=1 (which stops before any
|
||||
# platform I/O), setting the counters, and asserting the gate's return code.
|
||||
#
|
||||
# Because it exercises the actual function, a future revert of the zero-
|
||||
# validated→RED logic in test_priority_runtimes_e2e.sh fails THIS test on
|
||||
# every PR — so the false-green can't silently come back.
|
||||
#
|
||||
# Runs entirely offline (no LLM, no network, no provisioning) — pure shell
|
||||
# logic — so it runs on every PR in the fast lane and locally via `bash`.
|
||||
set -uo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
GATE_SCRIPT="$SCRIPT_DIR/test_priority_runtimes_e2e.sh"
|
||||
|
||||
if [ ! -f "$GATE_SCRIPT" ]; then
|
||||
echo "FATAL: cannot find $GATE_SCRIPT" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
# run_case <E2E_REQUIRE_LIVE value> <VALIDATED count> <FAIL count>
|
||||
# Sources the REAL test_priority_runtimes_e2e.sh under the unit source-guard
|
||||
# (E2E_PRIORITY_UNIT_SOURCE=1 → it returns right after defining the counters
|
||||
# and evaluate_require_live_gate(), before _lib.sh / the live pre-sweep curl),
|
||||
# sets the counters to the scenario, calls the real gate, and echoes the
|
||||
# return code. Each case runs in a fresh `bash -c` so set -e/-u inside the
|
||||
# sourced script can't leak between cases or kill this harness.
|
||||
run_case() {
|
||||
local require_live="$1" validated="$2" failcount="$3"
|
||||
local observed
|
||||
E2E_PRIORITY_UNIT_SOURCE=1 \
|
||||
E2E_REQUIRE_LIVE="$require_live" \
|
||||
GATE_SCRIPT="$GATE_SCRIPT" \
|
||||
VAL="$validated" \
|
||||
FL="$failcount" \
|
||||
bash -c '
|
||||
set -uo pipefail
|
||||
# shellcheck disable=SC1090
|
||||
source "$GATE_SCRIPT" # returns at the source-guard (no platform I/O)
|
||||
VALIDATED="$VAL"
|
||||
FAIL="$FL"
|
||||
evaluate_require_live_gate >/dev/null 2>&1
|
||||
exit $?
|
||||
'
|
||||
observed=$?
|
||||
echo "$observed"
|
||||
}
|
||||
|
||||
assert_rc() {
|
||||
local label="$1" require_live="$2" validated="$3" failcount="$4" expected="$5"
|
||||
local observed
|
||||
observed=$(run_case "$require_live" "$validated" "$failcount")
|
||||
if [ "$observed" = "$expected" ]; then
|
||||
echo " ✓ $label: REQUIRE_LIVE=$require_live VALIDATED=$validated FAIL=$failcount → rc=$observed"
|
||||
PASS=$((PASS + 1))
|
||||
else
|
||||
echo " ✗ $label: REQUIRE_LIVE=$require_live VALIDATED=$validated FAIL=$failcount expected=$expected OBSERVED=$observed" >&2
|
||||
FAIL=$((FAIL + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== E2E_REQUIRE_LIVE priority-runtimes zero-validated gate proof ==="
|
||||
echo " (drives the REAL evaluate_require_live_gate from $GATE_SCRIPT)"
|
||||
echo
|
||||
|
||||
# (a) DECISIVE false-green trap: REQUIRE_LIVE=1 + zero validated → RED (exit 1).
|
||||
assert_rc "require-live, zero validated → RED (the false-green trap)" \
|
||||
1 0 0 1
|
||||
|
||||
# (b) REQUIRE_LIVE=1 + at least one validated → GREEN (exit 0).
|
||||
assert_rc "require-live, one validated → GREEN" \
|
||||
1 1 0 0
|
||||
assert_rc "require-live, several validated → GREEN" \
|
||||
1 3 0 0
|
||||
|
||||
# (c) REQUIRE_LIVE unset-equivalent (0) + zero validated → GREEN (loud skip).
|
||||
assert_rc "no require-live, zero validated → GREEN (dev-convenience loud skip)" \
|
||||
0 0 0 0
|
||||
|
||||
# REQUIRE_LIVE=true (string form) is also honoured by the gate.
|
||||
assert_rc "require-live='true', zero validated → RED" \
|
||||
true 0 0 1
|
||||
|
||||
# A real FAIL is always red, regardless of REQUIRE_LIVE / VALIDATED — the
|
||||
# zero-validated guard must not mask (nor be masked by) a genuine failure.
|
||||
assert_rc "real FAIL with validations, no require-live → RED" \
|
||||
0 2 1 1
|
||||
assert_rc "real FAIL, zero validated, no require-live → RED" \
|
||||
0 0 1 1
|
||||
|
||||
echo
|
||||
echo "=== Results: $PASS passed, $FAIL failed ==="
|
||||
[ "$FAIL" -eq 0 ]
|
||||
@@ -26,7 +26,26 @@
|
||||
# the workspace stuck on 'online' indefinitely.)
|
||||
#
|
||||
# Hibernation is intentionally NOT covered here — it has its own timing
|
||||
# model (idle threshold) and warrants a separate harness.
|
||||
# model (idle threshold) and warrants a separate harness. (The
|
||||
# pause→resume + hibernate→wake transitions for PLATFORM-compute runtimes
|
||||
# are covered by test_staging_full_saas.sh step 10b.)
|
||||
#
|
||||
# BYO meta-runtime arms (kimi, kimi-cli) — added 2026-06-05:
|
||||
# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime:
|
||||
# runtime_registry.go:141-147) that go through the SAME external/poll
|
||||
# provisioning path as `external` — create with external:true →
|
||||
# awaiting_agent, register → online — but with their runtime LABEL
|
||||
# PRESERVED (workspace.go:752-770 normalizeExternalRuntime keeps the
|
||||
# specific label, does NOT coerce to generic "external", so the canvas
|
||||
# shows the right runtime). They had ONLY validation/unit coverage and
|
||||
# were NEVER provisioned→online in any e2e. Step 9 adds, for EACH of
|
||||
# {kimi, kimi-cli}: create → assert awaiting_agent + label-preserved →
|
||||
# register(poll) → assert online + label-preserved → A2A → assert the
|
||||
# poll-mode {status:"queued"} envelope (a2a_proxy.go:462-477). The A2A
|
||||
# arm proves the a2a proxy routes a BYO meta-runtime to the poll queue
|
||||
# (200 + queued) rather than 404/500 — the meaningful round-trip for a
|
||||
# workspace with no standing live agent. A real BYO-agent COMPLETION
|
||||
# needs a standing kimi BYO cell (flagged for the CTO in the PR body).
|
||||
#
|
||||
# Required env (mirrors test_staging_full_saas.sh):
|
||||
# MOLECULE_CP_URL default: https://staging-api.moleculesai.app
|
||||
@@ -40,9 +59,25 @@
|
||||
# E2E_INTENTIONAL_FAILURE 1 → break a step on purpose to verify
|
||||
# the EXIT trap still tears down (mirrors
|
||||
# the full-saas harness's safety net).
|
||||
# E2E_REQUIRE_LIVE 1 → fail-closed if the harness exits 0
|
||||
# WITHOUT having driven all four
|
||||
# awaiting_agent transitions. CI sets this
|
||||
# so a future skip / early-return can never
|
||||
# masquerade as a green run. Mirrors CP
|
||||
# serving-e2e SERVING_E2E_REQUIRE_LIVE.
|
||||
# E2E_STALE_POLL_DEADLINE_SECS default 240. Upper bound for the
|
||||
# heartbeat-staleness READINESS poll (step
|
||||
# 6). Replaces the old fixed sleep+one-shot
|
||||
# assert that raced the sweep cadence.
|
||||
# E2E_TRANSIENT_RETRIES default 8. Bounded retries for register /
|
||||
# re-register against transient edge errors
|
||||
# (502/503/504 from Caddy during cold TLS /
|
||||
# agent boot). Mirrors the full-saas
|
||||
# cold-start retry loop — NOT a bare sleep.
|
||||
#
|
||||
# Exit codes: 0 happy, 1 generic, 2 missing env, 3 provision timeout,
|
||||
# 4 teardown leak.
|
||||
# 4 teardown leak, 5 REQUIRE_LIVE violation (exited 0 having validated
|
||||
# nothing).
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -51,6 +86,13 @@ ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway s
|
||||
PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
|
||||
RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
|
||||
STALE_WAIT_SECS="${E2E_STALE_WAIT_SECS:-180}"
|
||||
# Readiness-poll deadline for the sweep transition (step 6). Must exceed
|
||||
# STALE_WAIT_SECS (the no-heartbeat window) by at least one sweep
|
||||
# interval so a slightly-late sweep tick is polled-for, not misread as a
|
||||
# stuck 'online'. 240 = 180s window + 60s sweep-cadence headroom.
|
||||
STALE_POLL_DEADLINE_SECS="${E2E_STALE_POLL_DEADLINE_SECS:-240}"
|
||||
TRANSIENT_RETRIES="${E2E_TRANSIENT_RETRIES:-8}"
|
||||
REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
|
||||
|
||||
SLUG="e2e-ext-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
|
||||
SLUG=$(echo "$SLUG" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-' | head -c 32)
|
||||
@@ -59,6 +101,66 @@ log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
# REQUIRE_LIVE bookkeeping: count the four awaiting_agent transitions the
|
||||
# test is contracted to prove. The EXIT trap fails-closed (exit 5) if the
|
||||
# script reaches a clean exit without all four — so a silent skip, an
|
||||
# early `return 0`, or a refactor that drops a step can never show green.
|
||||
TRANSITIONS_VERIFIED=0
|
||||
EXPECTED_TRANSITIONS=4
|
||||
require_transition() { # $1 = human label
|
||||
TRANSITIONS_VERIFIED=$((TRANSITIONS_VERIFIED + 1))
|
||||
log " [require-live] transition ${TRANSITIONS_VERIFIED}/${EXPECTED_TRANSITIONS} proven: $1"
|
||||
}
|
||||
|
||||
# Redact bearer tokens from any HTTP body before logging (mirrors the
|
||||
# full-saas sanitize_http_body so transient-error logs never leak creds).
|
||||
sanitize_http_body() {
|
||||
sed -E 's/(Bearer|token)[[:space:]]+[A-Za-z0-9._-]+/\1 REDACTED/g'
|
||||
}
|
||||
|
||||
# Bounded retry-on-transient for POST /registry/register. The tenant edge
|
||||
# (Caddy) returns 502/503/504 with an identifiable body while TLS / the
|
||||
# workspace agent finishes cold-booting — a single shot here was the
|
||||
# un-named flake (a transient edge error misread as a register failure).
|
||||
# This mirrors the full-saas cold-start loop (test_staging_full_saas.sh
|
||||
# ~L780-816): retry ONLY on a transient TRANSPORT class (5xx + body
|
||||
# match), bounded by TRANSIENT_RETRIES, and FAIL CLOSED (non-zero) once
|
||||
# the budget is spent. It deliberately does NOT retry on a 4xx — that's a
|
||||
# real contract bug (e.g. wrong payload field) and must stay red.
|
||||
# Sets REGISTER_RESP (body + trailing "HTTP_CODE=NNN" line) on success;
|
||||
# returns non-zero (caller `fail`s) when the bounded budget is exhausted.
|
||||
register_with_retry() { # $1 = step label, $2 = request body
|
||||
local label="$1" body="$2"
|
||||
local attempt code resp safe
|
||||
for attempt in $(seq 1 "$TRANSIENT_RETRIES"); do
|
||||
set +e
|
||||
resp=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST \
|
||||
"$TENANT_URL/registry/register" \
|
||||
-H "Authorization: Bearer $WS_AUTH_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$body")
|
||||
set -e
|
||||
code=$(printf '%s' "$resp" | sed -n 's/^HTTP_CODE=//p' | tail -n1)
|
||||
code=${code:-000}
|
||||
if [ "$code" = "200" ]; then
|
||||
REGISTER_RESP="$resp"
|
||||
return 0
|
||||
fi
|
||||
safe=$(printf '%s' "$resp" | sanitize_http_body | head -c 300)
|
||||
# Retry ONLY on a transient transport class; a 4xx is a real bug.
|
||||
if echo "$code" | grep -Eq '^(502|503|504)$' \
|
||||
&& echo "$safe" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|error code: 502|error code: 504|workspace agent unreachable|connection refused|no healthy upstream'; then
|
||||
log " ${label} transient $code attempt ${attempt}/${TRANSIENT_RETRIES}: $safe"
|
||||
[ "$attempt" -lt "$TRANSIENT_RETRIES" ] && { sleep 10; continue; }
|
||||
fi
|
||||
# Non-transient (4xx, or unrecognized 5xx body): stop and fail closed.
|
||||
REGISTER_RESP="$resp"
|
||||
return 1
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
CURL_COMMON=(-sS --fail-with-body --max-time 30)
|
||||
|
||||
# ─── cleanup trap (mirrors full-saas) ────────────────────────────────────
|
||||
@@ -98,8 +200,19 @@ cleanup_org() {
|
||||
fi
|
||||
ok "Teardown clean — no orphan resources for $SLUG (${elapsed}s)"
|
||||
|
||||
# REQUIRE_LIVE fail-closed gate. Only meaningful on an OTHERWISE-CLEAN
|
||||
# exit (entry_rc==0): a script that completed all steps but somehow did
|
||||
# not register all four transitions (a skip, an early return, a dropped
|
||||
# assertion in a refactor) must NOT report success. A non-zero entry_rc
|
||||
# already carries its own failure semantics — don't mask it with 5.
|
||||
if [ "$entry_rc" = "0" ] && [ "${REQUIRE_LIVE}" = "1" ] \
|
||||
&& [ "$TRANSITIONS_VERIFIED" -lt "$EXPECTED_TRANSITIONS" ]; then
|
||||
echo "❌ REQUIRE_LIVE: exited 0 but only ${TRANSITIONS_VERIFIED}/${EXPECTED_TRANSITIONS} awaiting_agent transitions were proven — refusing to report green." >&2
|
||||
exit 5
|
||||
fi
|
||||
|
||||
case "$entry_rc" in
|
||||
0|1|2|3|4) ;;
|
||||
0|1|2|3|4|5) ;;
|
||||
*) exit 1 ;;
|
||||
esac
|
||||
}
|
||||
@@ -125,10 +238,17 @@ admin_call() {
|
||||
|
||||
# ─── 1. Create org ──────────────────────────────────────────────────────
|
||||
log "1/8 Creating org $SLUG..."
|
||||
# admin_call inherits CURL_COMMON's --fail-with-body: a non-2xx makes curl
|
||||
# exit 22, which under `set -euo pipefail` would abort this bare command
|
||||
# substitution BEFORE the `fail "... missing 'id'"` handler below can print
|
||||
# the body. set +e / `|| true` keeps the 22 from tripping `set -e`; curl
|
||||
# still wrote the body, so CREATE_RESP holds it and the id-check surfaces why.
|
||||
set +e
|
||||
CREATE_RESP=$(admin_call POST /cp/admin/orgs \
|
||||
-d "{\"slug\":\"$SLUG\",\"name\":\"E2E ext $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}")
|
||||
ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
[ -z "$ORG_ID" ] && fail "Org create response missing 'id'"
|
||||
set -e
|
||||
ORG_ID=$(echo "$CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
[ -z "$ORG_ID" ] && fail "Org create response missing 'id': $(printf '%s' "$CREATE_RESP" | sanitize_http_body 2>/dev/null || printf '%s' "$CREATE_RESP")"
|
||||
ok "Org created (id=$ORG_ID)"
|
||||
|
||||
# ─── 2. Wait for tenant provisioning ────────────────────────────────────
|
||||
@@ -221,8 +341,13 @@ tenant_call() {
|
||||
# on whatever the create handler set first (typically 'provisioning')
|
||||
# because the follow-up UPDATE failed the enum cast.
|
||||
log "4/8 Creating external workspace (no URL — exercises workspace.go:333)..."
|
||||
# tenant_call inherits CURL_COMMON's --fail-with-body: guard the same way as
|
||||
# the org create above so a non-2xx returns the body to the id/status checks
|
||||
# below instead of aborting opaquely on curl exit 22.
|
||||
set +e
|
||||
WS_CREATE_RESP=$(tenant_call POST /workspaces \
|
||||
-d '{"name":"ext-e2e","runtime":"external","external":true}')
|
||||
set -e
|
||||
|
||||
WS_ID=$(echo "$WS_CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
WS_RESP_STATUS=$(echo "$WS_CREATE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
@@ -235,7 +360,7 @@ try:
|
||||
except Exception:
|
||||
print('')
|
||||
")
|
||||
[ -z "$WS_ID" ] && fail "Workspace create missing id: $WS_CREATE_RESP"
|
||||
[ -z "$WS_ID" ] && fail "Workspace create missing id: $(printf '%s' "$WS_CREATE_RESP" | sanitize_http_body 2>/dev/null || printf '%s' "$WS_CREATE_RESP")"
|
||||
[ "$WS_RESP_STATUS" != "awaiting_agent" ] && fail "Expected response status=awaiting_agent, got $WS_RESP_STATUS"
|
||||
ok "Workspace created (id=$WS_ID, response status=awaiting_agent)"
|
||||
|
||||
@@ -248,6 +373,7 @@ GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
DB_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$DB_STATUS" != "awaiting_agent" ] && fail "DB row status=$DB_STATUS (expected awaiting_agent — migration 046 likely not applied)"
|
||||
ok "DB row stored as awaiting_agent (proof migration 046 applied)"
|
||||
require_transition "create: provisioning → awaiting_agent (DB-verified)"
|
||||
|
||||
# ─── 5. Register the workspace (transitions to online) ──────────────────
|
||||
# Pre-fix this path was actually fine because it writes 'online', a value
|
||||
@@ -277,20 +403,20 @@ log "5/8 Registering workspace via /registry/register..."
|
||||
# url — accepted but not dispatched-to in poll mode, so
|
||||
# example.invalid is a valid sentinel.
|
||||
REGISTER_BODY=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-ext","skills":[{"id":"echo","name":"Echo"}]}}' "$WS_ID")
|
||||
# Disable --fail-with-body for this one call so a 4xx surfaces the response
|
||||
# body (the bare CURL_COMMON would `set -e`-kill before we could log it).
|
||||
REGISTER_RESP=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST "$TENANT_URL/registry/register" \
|
||||
-H "Authorization: Bearer $WS_AUTH_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$REGISTER_BODY") || true
|
||||
log " register response: $(echo "$REGISTER_RESP" | head -c 300)"
|
||||
echo "$REGISTER_RESP" | grep -q "HTTP_CODE=200" || fail "register returned non-200 — see body above"
|
||||
# Bounded retry-on-transient (see register_with_retry). The previous
|
||||
# single-shot here would `fail` on a cold-boot 502 from the tenant edge —
|
||||
# an un-named transient misread as a register break. The helper retries
|
||||
# ONLY that class and fails closed on a real 4xx or an exhausted budget.
|
||||
REGISTER_RESP=""
|
||||
register_with_retry "register" "$REGISTER_BODY" \
|
||||
|| fail "register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
|
||||
log " register response: $(echo "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
|
||||
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
ONLINE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$ONLINE_STATUS" != "online" ] && fail "Expected online after register, got $ONLINE_STATUS"
|
||||
ok "Workspace transitioned to online"
|
||||
require_transition "register: awaiting_agent → online"
|
||||
|
||||
# Confirm the register handler echoed back delivery_mode=poll. We read
|
||||
# this from the register RESPONSE, not the workspace GET response, because
|
||||
@@ -310,38 +436,165 @@ fi
|
||||
# This is the SECOND silent-failure path (registry/healthsweep.go's
|
||||
# sweepStaleRemoteWorkspaces). Pre-migration-046 the heartbeat-staleness
|
||||
# UPDATE silently failed and the workspace stuck on 'online' forever
|
||||
# even though no agent was alive. We wait the full window + a sweep
|
||||
# interval and assert the row transitions back to 'awaiting_agent'.
|
||||
log "6/8 Waiting ${STALE_WAIT_SECS}s for heartbeat-staleness sweep (no heartbeat sent)..."
|
||||
# even though no agent was alive.
|
||||
#
|
||||
# FLAKE FIX (named: sweep-cadence race). The old code did a FIXED
|
||||
# `sleep $STALE_WAIT_SECS` then a SINGLE assert. The staleness sweep is a
|
||||
# periodic tick (REMOTE_LIVENESS_STALE_AFTER + a sweep interval); if the
|
||||
# tick that flips the row lands even one second after the fixed sleep, the
|
||||
# one-shot GET reads 'online' and the test fails — a real transition,
|
||||
# misread as a flake because the assert was racing the sweep cadence.
|
||||
# Replace with: sleep through the mandatory no-heartbeat window ONCE (the
|
||||
# sweep cannot fire before the window elapses, so polling earlier is
|
||||
# pointless), then READINESS-POLL for the awaiting_agent transition up to
|
||||
# STALE_POLL_DEADLINE_SECS, hard-failing with a clear message at the
|
||||
# deadline. Deterministic: a slow-but-working sweep passes; a genuinely
|
||||
# stuck 'online' still fails (now with how long we actually waited).
|
||||
log "6/8 Waiting ${STALE_WAIT_SECS}s no-heartbeat window, then polling for sweep (up to ${STALE_POLL_DEADLINE_SECS}s total)..."
|
||||
[ "$STALE_POLL_DEADLINE_SECS" -le "$STALE_WAIT_SECS" ] && \
|
||||
fail "Misconfigured: STALE_POLL_DEADLINE_SECS ($STALE_POLL_DEADLINE_SECS) must exceed STALE_WAIT_SECS ($STALE_WAIT_SECS) by at least one sweep interval"
|
||||
sleep "$STALE_WAIT_SECS"
|
||||
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
STALE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$STALE_STATUS" != "awaiting_agent" ] && \
|
||||
fail "After ${STALE_WAIT_SECS}s with no heartbeat, expected status=awaiting_agent (sweep transition), got $STALE_STATUS — migration 046 likely not applied OR sweep not running"
|
||||
STALE_DEADLINE=$(( $(date +%s) + (STALE_POLL_DEADLINE_SECS - STALE_WAIT_SECS) ))
|
||||
STALE_STATUS=""
|
||||
while true; do
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
STALE_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$STALE_STATUS" = "awaiting_agent" ] && break
|
||||
if [ "$(date +%s)" -gt "$STALE_DEADLINE" ]; then
|
||||
fail "After ${STALE_POLL_DEADLINE_SECS}s with no heartbeat, status still '$STALE_STATUS' (expected awaiting_agent sweep transition) — migration 046 likely not applied OR sweep not running"
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
ok "Heartbeat-staleness sweep transitioned online → awaiting_agent (proof healthsweep.go fix working)"
|
||||
require_transition "sweep: online → awaiting_agent (no heartbeat)"
|
||||
|
||||
# ─── 7. Re-register and confirm we can come back online ─────────────────
|
||||
# This proves the awaiting_agent state is recoverable (re-registrable),
|
||||
# which is the whole point of using it instead of 'offline'.
|
||||
log "7/8 Re-registering after stale → confirming recovery to online..."
|
||||
# Same payload contract as step 5 (id + agent_card both required). See note
|
||||
# there for why workspace_id would 400.
|
||||
REREG_RESP=$(curl -sS --max-time 30 -w "\nHTTP_CODE=%{http_code}" -X POST "$TENANT_URL/registry/register" \
|
||||
-H "Authorization: Bearer $WS_AUTH_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$REGISTER_BODY") || true
|
||||
log " re-register response: $(echo "$REREG_RESP" | head -c 300)"
|
||||
echo "$REREG_RESP" | grep -q "HTTP_CODE=200" || fail "re-register returned non-200 — see body above"
|
||||
# there for why workspace_id would 400. Same bounded retry-on-transient.
|
||||
REGISTER_RESP=""
|
||||
register_with_retry "re-register" "$REGISTER_BODY" \
|
||||
|| fail "re-register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
|
||||
log " re-register response: $(echo "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
|
||||
|
||||
GET_RESP=$(tenant_call GET "/workspaces/$WS_ID")
|
||||
RECOVERED_STATUS=$(echo "$GET_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$RECOVERED_STATUS" != "online" ] && \
|
||||
fail "Expected re-register to return workspace to online, got $RECOVERED_STATUS"
|
||||
ok "Re-register succeeded — awaiting_agent → online (operator-recoverable)"
|
||||
require_transition "re-register: awaiting_agent → online (recovery)"
|
||||
|
||||
# ─── 7b. BYO meta-runtime arms: kimi + kimi-cli ─────────────────────────
|
||||
# kimi and kimi-cli are BYO-compute meta-runtimes (isExternalLikeRuntime).
|
||||
# They share the external/poll provisioning path but PRESERVE their runtime
|
||||
# label (workspace.go normalizeExternalRuntime). They had no provision→online
|
||||
# e2e until now. For EACH: create(external:true, runtime=<rt>) → assert
|
||||
# awaiting_agent + label preserved → register(poll) → assert online + label
|
||||
# preserved → A2A → assert the poll-mode {status:"queued"} envelope.
|
||||
#
|
||||
# Why poll-mode {queued} is the A2A assertion (not a real completion): there
|
||||
# is no standing live BYO agent in staging, so the meaningful round-trip is
|
||||
# that the a2a proxy ROUTES a BYO meta-runtime to the poll queue (HTTP 200 +
|
||||
# {status:"queued", delivery_mode:"poll"}, a2a_proxy.go:462-477) instead of
|
||||
# 404/500. A real BYO-agent COMPLETION needs a standing kimi BYO cell — see
|
||||
# the CTO flag in the PR body.
|
||||
byo_meta_runtime_arm() { # $1 = runtime label (kimi | kimi-cli)
|
||||
local rt="$1"
|
||||
local resp wid status auth get_resp db_status reg_dm online_status
|
||||
log " [$rt] create (external:true, runtime=$rt)..."
|
||||
resp=$(tenant_call POST /workspaces \
|
||||
-d "$(printf '{"name":"ext-%s-e2e","runtime":"%s","external":true}' "$rt" "$rt")")
|
||||
wid=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))")
|
||||
status=$(echo "$resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
auth=$(echo "$resp" | python3 -c "
|
||||
import json,sys
|
||||
try:
|
||||
d=json.load(sys.stdin); conn=d.get('connection') or {}
|
||||
print(conn.get('auth_token','') or d.get('auth_token',''))
|
||||
except Exception:
|
||||
print('')
|
||||
")
|
||||
[ -z "$wid" ] && fail "[$rt] create missing id: $resp"
|
||||
[ "$status" = "awaiting_agent" ] || fail "[$rt] create status='$status' (expected awaiting_agent — external/poll path)"
|
||||
[ -z "$auth" ] && fail "[$rt] create returned no workspace auth token — register impossible"
|
||||
|
||||
# Assert the runtime LABEL was preserved (NOT coerced to generic 'external').
|
||||
get_resp=$(tenant_call GET "/workspaces/$wid")
|
||||
db_status=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
local db_runtime
|
||||
db_runtime=$(echo "$get_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('runtime',''))")
|
||||
[ "$db_status" = "awaiting_agent" ] || fail "[$rt] DB row status=$db_status (expected awaiting_agent)"
|
||||
[ "$db_runtime" = "$rt" ] || fail "[$rt] runtime label coerced to '$db_runtime' (expected '$rt' — normalizeExternalRuntime must PRESERVE the BYO meta-runtime label, workspace.go:752-770)"
|
||||
ok " [$rt] create → awaiting_agent, runtime label preserved ✓"
|
||||
|
||||
# register(poll) → online. Reuse register_with_retry by setting WS_AUTH_TOKEN
|
||||
# (the helper reads it as a global). REGISTER_RESP is set by the helper.
|
||||
WS_AUTH_TOKEN="$auth"
|
||||
local body
|
||||
body=$(printf '{"id":"%s","url":"https://example.invalid:443","delivery_mode":"poll","agent_card":{"name":"e2e-%s","skills":[{"id":"echo","name":"Echo"}]}}' "$wid" "$rt")
|
||||
REGISTER_RESP=""
|
||||
register_with_retry "[$rt] register" "$body" \
|
||||
|| fail "[$rt] register returned non-200 after bounded retries — body: $(printf '%s' "$REGISTER_RESP" | sanitize_http_body | head -c 300)"
|
||||
online_status=$(tenant_call GET "/workspaces/$wid" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))")
|
||||
[ "$online_status" = "online" ] || fail "[$rt] expected online after register, got $online_status"
|
||||
reg_dm=$(echo "$REGISTER_RESP" | head -n1 | python3 -c "import json,sys; print(json.load(sys.stdin).get('delivery_mode',''))" 2>/dev/null || echo "")
|
||||
[ "$reg_dm" = "poll" ] || fail "[$rt] register response delivery_mode='$reg_dm' (expected poll)"
|
||||
ok " [$rt] register → online (delivery_mode=poll) ✓"
|
||||
|
||||
# A2A → assert poll-mode {status:"queued"} envelope. Bounded retry on the
|
||||
# transient cold-edge 5xx class; a 4xx/non-queued 2xx is a real bug.
|
||||
local a2a_payload a2a_tmp a2a_code a2a_rc a2a_status attempt
|
||||
a2a_payload=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc':'2.0','method':'message/send','id':'e2e-byo-1',
|
||||
'params':{'message':{'role':'user','messageId':f'e2e-{uuid.uuid4().hex[:8]}',
|
||||
'parts':[{'kind':'text','text':'BYO meta-runtime poll-route smoke. Respond: OK'}]}}
|
||||
}))
|
||||
")
|
||||
a2a_tmp=$(mktemp -t byo_a2a.XXXXXX)
|
||||
for attempt in $(seq 1 8); do
|
||||
: >"$a2a_tmp"
|
||||
set +e
|
||||
a2a_code=$(curl -sS --max-time 60 -X POST "$TENANT_URL/workspaces/$wid/a2a" \
|
||||
-H "Authorization: Bearer $TENANT_TOKEN" \
|
||||
-H "X-Molecule-Org-Id: $ORG_ID" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$a2a_payload" -o "$a2a_tmp" -w '%{http_code}' 2>/dev/null)
|
||||
a2a_rc=$?
|
||||
set -e
|
||||
a2a_code=${a2a_code:-000}
|
||||
if [ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ]; then break; fi
|
||||
if echo "$a2a_code" | grep -Eq '^(502|503|504)$' && [ "$attempt" -lt 8 ]; then
|
||||
log " [$rt] A2A transient $a2a_code attempt $attempt/8"; sleep 10; continue
|
||||
fi
|
||||
break
|
||||
done
|
||||
a2a_status=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('status',''))" "$a2a_tmp" 2>/dev/null || echo "")
|
||||
local a2a_dm
|
||||
a2a_dm=$(python3 -c "import json,sys; print(json.load(open(sys.argv[1])).get('delivery_mode',''))" "$a2a_tmp" 2>/dev/null || echo "")
|
||||
rm -f "$a2a_tmp"
|
||||
[ "$a2a_rc" = "0" ] && [ "$a2a_code" = "200" ] \
|
||||
|| fail "[$rt] A2A POST failed (rc=$a2a_rc, http=$a2a_code) — a BYO meta-runtime poll-mode A2A must 200 with a queued envelope, not error"
|
||||
[ "$a2a_status" = "queued" ] && [ "$a2a_dm" = "poll" ] \
|
||||
|| fail "[$rt] A2A returned status='$a2a_status' delivery_mode='$a2a_dm' (expected queued/poll — a2a proxy must route a BYO meta-runtime to the poll queue, a2a_proxy.go:462-477)"
|
||||
ok " [$rt] A2A → poll-mode queued envelope ✓ (provision→online→A2A proven for $rt)"
|
||||
}
|
||||
|
||||
log "7c/8 BYO meta-runtime arms (kimi, kimi-cli) — provision→online→A2A..."
|
||||
byo_meta_runtime_arm "kimi"
|
||||
byo_meta_runtime_arm "kimi-cli"
|
||||
ok "BYO meta-runtime arms passed for kimi + kimi-cli"
|
||||
|
||||
# ─── 8. Done — cleanup runs in the EXIT trap ───────────────────────────
|
||||
# REQUIRE_LIVE belt-and-braces: assert here too (in addition to the EXIT
|
||||
# trap) so the failure surfaces in step order, not only post-teardown.
|
||||
if [ "${REQUIRE_LIVE}" = "1" ] && [ "$TRANSITIONS_VERIFIED" -lt "$EXPECTED_TRANSITIONS" ]; then
|
||||
fail "REQUIRE_LIVE: only ${TRANSITIONS_VERIFIED}/${EXPECTED_TRANSITIONS} transitions proven at end of run"
|
||||
fi
|
||||
log "8/8 All four awaiting_agent transitions verified."
|
||||
log "═══════════════════════════════════════════════════════════════════"
|
||||
ok "External-runtime E2E PASSED on $SLUG"
|
||||
|
||||
@@ -24,6 +24,19 @@
|
||||
#
|
||||
# Optional env:
|
||||
# E2E_RUNTIME hermes (default) | claude-code | codex | openclaw
|
||||
# | seo-agent | google-adk
|
||||
# - seo-agent: a claude-code-adapter template
|
||||
# VARIANT (not a distinct registry runtime).
|
||||
# Selected via the `template` field (config.yaml
|
||||
# resolves runtime=claude-code); reuses the
|
||||
# same MiniMax/claude-code key path. See the
|
||||
# TEMPLATE derivation + SECRETS_JSON block.
|
||||
# - google-adk: Gemini. The AI-Studio-keyed BYOK
|
||||
# path (E2E_GOOGLE_API_KEY) is staging-
|
||||
# exercisable here; the keyless Vertex PROD
|
||||
# path needs WIF (see header note + the CTO
|
||||
# flag in the PR body) and is selected via
|
||||
# E2E_LLM_PATH=platform + a platform: model.
|
||||
# E2E_PROVISION_TIMEOUT_SECS default 900 (15 min cold EC2 budget)
|
||||
# E2E_WORKSPACE_ONLINE_TIMEOUT_SECS default 3600 (60 min — hermes
|
||||
# cold-boot worst-case + slack). Raised from
|
||||
@@ -47,6 +60,27 @@
|
||||
# tear down cleanly (and exit 4 on leak).
|
||||
# Used by a dedicated sanity workflow
|
||||
# that verifies the safety net.
|
||||
# E2E_LIFECYCLE auto (default) | off
|
||||
# When auto + MODE=full, exercises the
|
||||
# pause→resume→online and hibernate→resume(wake)
|
||||
# state transitions on the provisioned parent
|
||||
# (step 10b). These are REAL transitions on the
|
||||
# live tenant (Pause stops the container + sets
|
||||
# status=paused; Resume re-provisions →
|
||||
# provisioning → online; Hibernate stops +
|
||||
# status=hibernated; the next A2A auto-wakes it).
|
||||
# Set `off` for a fast smoke that skips the
|
||||
# ~2x-reprovision cost. In smoke MODE it is
|
||||
# skipped regardless (no parent stability budget).
|
||||
# E2E_REQUIRE_LIVE 1 → fail-closed-on-skip guard (CI sets this).
|
||||
# When set, the run MUST actually complete
|
||||
# ≥1 full provision→online→A2A cycle. A run
|
||||
# that reaches the end without having proven
|
||||
# a real round-trip (e.g. a future refactor
|
||||
# short-circuits a stage, or a skip path
|
||||
# swallows the lifecycle) exits 5 rather than
|
||||
# reporting a false green. Mirrors CP
|
||||
# serving-e2e's SERVING_E2E_REQUIRE_LIVE.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 happy path
|
||||
@@ -54,6 +88,37 @@
|
||||
# 2 missing required env
|
||||
# 3 provisioning timed out
|
||||
# 4 teardown left orphan resources
|
||||
# 5 E2E_REQUIRE_LIVE set but the run validated no real lifecycle (no
|
||||
# false-green-on-skip)
|
||||
#
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
# PROMOTION-READINESS (harden/e2e-staging-saas-failclosed):
|
||||
# This harness is being hardened so `E2E Staging SaaS` + `E2E Staging
|
||||
# Platform Boot` can become HARD merge-gates. continue-on-error is NOT
|
||||
# flipped here — that promotion is the CTO's irreversible branch-protection
|
||||
# call. What this branch makes fail-closed (was false-green / un-named
|
||||
# flake before):
|
||||
# • Provision/online waits are bounded readiness-POLLS, not fixed sleeps;
|
||||
# each hard-fails with a named mechanism + last-seen signal on deadline,
|
||||
# never a silent timeout (cp#245 boot-timeout class).
|
||||
# • Peer-discovery (9b) asserts a real 2xx, not just "not 404" — a 5xx /
|
||||
# 000 / empty no longer reads as "reachable".
|
||||
# • Activity-log (9b) is ASSERTED reachable (2xx + parseable), not
|
||||
# logged-and-ignored behind `|| echo '[]'`.
|
||||
# • Child activity provenance (10) is asserted (was soft-logged).
|
||||
# • E2E_REQUIRE_LIVE=1 (CI) makes the run exit 5 if it reached the end
|
||||
# without proving a real provision→online→A2A round-trip — no
|
||||
# false-green-on-skip.
|
||||
# STILL BLOCKS making it REQUIRED (must clear before the CTO flips
|
||||
# continue-on-error→false in .gitea/workflows/e2e-staging-saas.yml):
|
||||
# • De-flake window: N consecutive green runs on main for BOTH jobs
|
||||
# (platform-boot shares the cp#245 boot surface — #2187 tracks its
|
||||
# flip). This harness removes the harness-side flake mechanisms; the
|
||||
# remaining surface is real-infra (EC2 cold boot, CF DNS) latency,
|
||||
# already bounded by the readiness polls above.
|
||||
# • Branch-protection required-context wiring is a repo-settings change,
|
||||
# not a code change in this PR.
|
||||
# ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -90,6 +155,41 @@ log() { echo "[$(date +%H:%M:%S)] $*"; }
|
||||
fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
|
||||
ok() { echo "[$(date +%H:%M:%S)] ✅ $*"; }
|
||||
|
||||
# ─── fail-closed-on-skip live-lifecycle guard ───────────────────────────
|
||||
# E2E_REQUIRE_LIVE=1 (set by CI) asserts this run ACTUALLY exercised a full
|
||||
# provision→online→A2A cycle. Each load-bearing lifecycle stage stamps a
|
||||
# milestone via live_milestone(); at the very end, require_live_or_die()
|
||||
# checks every required milestone fired. Mechanism: without this, a future
|
||||
# refactor that short-circuits a stage — or a skip/early-return path that
|
||||
# swallows the lifecycle — would let the script reach its final `ok` and
|
||||
# report GREEN having validated nothing. Mirrors CP serving-e2e's
|
||||
# SERVING_E2E_REQUIRE_LIVE (skip-if-absent must be LOUD, never silent green).
|
||||
REQUIRE_LIVE="${E2E_REQUIRE_LIVE:-0}"
|
||||
LIVE_MILESTONES=""
|
||||
live_milestone() {
|
||||
# Idempotent set-membership append. Space-delimited; names are tokens.
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $1 "*) ;;
|
||||
*) LIVE_MILESTONES="$LIVE_MILESTONES $1" ;;
|
||||
esac
|
||||
}
|
||||
require_live_or_die() {
|
||||
# No-op unless CI demanded a live run.
|
||||
[ "$REQUIRE_LIVE" = "1" ] || return 0
|
||||
local required="provisioned tenant_online workspace_online a2a_roundtrip"
|
||||
local m missing=""
|
||||
for m in $required; do
|
||||
case " $LIVE_MILESTONES " in
|
||||
*" $m "*) ;;
|
||||
*) missing="$missing $m" ;;
|
||||
esac
|
||||
done
|
||||
if [ -n "$missing" ]; then
|
||||
echo "[$(date +%H:%M:%S)] ❌ E2E_REQUIRE_LIVE=1 but the run did NOT prove a full live lifecycle — missing milestone(s):${missing}. Reached:${LIVE_MILESTONES:-<none>}. This is a false-green-on-skip guard: a run that validates no real provision→online→A2A cycle MUST NOT report green." >&2
|
||||
exit 5
|
||||
fi
|
||||
}
|
||||
|
||||
# Per-runtime model slug dispatch — see lib/model_slug.sh for the rationale.
|
||||
# Extracted so unit tests (tests/e2e/test_model_slug.sh) can pin every branch
|
||||
# without booting the full 11-step lifecycle.
|
||||
@@ -197,7 +297,7 @@ cleanup_org() {
|
||||
# case statement, and opens a false-positive priority-high
|
||||
# "safety net broken" issue (#2159, 2026-04-27).
|
||||
case "$entry_rc" in
|
||||
0|1|2|3|4) ;; # contracted codes — let bash use entry_rc
|
||||
0|1|2|3|4|5) ;; # contracted codes — let bash use entry_rc
|
||||
*) exit 1 ;; # anything else is a generic failure
|
||||
esac
|
||||
}
|
||||
@@ -295,6 +395,7 @@ print('(no org row found for slug=$SLUG — DB drift?)')
|
||||
esac
|
||||
done
|
||||
ok "Tenant provisioning complete"
|
||||
live_milestone provisioned
|
||||
|
||||
# Derive tenant domain from CP hostname so the same harness works in
|
||||
# both prod (api.moleculesai.app → moleculesai.app) and staging
|
||||
@@ -351,6 +452,7 @@ while true; do
|
||||
sleep 5
|
||||
done
|
||||
ok "Tenant reachable at $TENANT_URL"
|
||||
live_milestone tenant_online
|
||||
|
||||
# Sanity-test path: once the tenant is provisioned, poisoning the
|
||||
# tenant token proves the EXIT trap + leak assertion still fire.
|
||||
@@ -515,6 +617,24 @@ print(json.dumps({
|
||||
'ANTHROPIC_API_KEY': k,
|
||||
}))
|
||||
")
|
||||
elif [ -n "${E2E_GOOGLE_API_KEY:-}" ]; then
|
||||
# google-adk AI-Studio BYOK path. The `google` provider entry
|
||||
# (providers.yaml:401-413) reads GEMINI_API_KEY / GOOGLE_API_KEY and dials
|
||||
# generativelanguage.googleapis.com — the tenant's OWN key, distinct from the
|
||||
# keyless-Vertex PROD path (which routes through the CP proxy + server-side
|
||||
# WIF and carries NO tenant credential). This branch exercises google-adk
|
||||
# being PROVISIONED AT ALL on staging; the Vertex-specific WIF path is flagged
|
||||
# for the CTO (needs extra provisioning) and is NOT reachable here. Inject
|
||||
# under both env names the provider accepts so the adapter resolves regardless
|
||||
# of which one it reads first.
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
k = os.environ['E2E_GOOGLE_API_KEY']
|
||||
print(json.dumps({
|
||||
'GOOGLE_API_KEY': k,
|
||||
'GEMINI_API_KEY': k,
|
||||
}))
|
||||
")
|
||||
elif [ -n "${E2E_OPENAI_API_KEY:-}" ]; then
|
||||
SECRETS_JSON=$(python3 -c "
|
||||
import json, os
|
||||
@@ -534,22 +654,234 @@ fi
|
||||
MODEL_SLUG=$(pick_model_slug "$RUNTIME")
|
||||
log " MODEL_SLUG=$MODEL_SLUG"
|
||||
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
|
||||
# ─── BYOK opt-in split (secret-write gate requires explicit byok) ───────
|
||||
# Every vendor-key arm above (MiniMax / Anthropic / Google / OpenAI-hermes)
|
||||
# writes one or more keys that workspace-server's secret-write gate —
|
||||
# rejectPlatformManagedDirectLLMBypassForWorkspace in
|
||||
# workspace-server/internal/handlers/secrets.go — STRIPS/BLOCKS while a
|
||||
# workspace's resolved billing mode is platform_managed (the org/CTO default).
|
||||
# The strip-list (secrets.go platformManagedDirectLLMBypassKeys) includes
|
||||
# MINIMAX_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, OPENAI_API_KEY/_BASE_URL,
|
||||
# HERMES_CUSTOM_API_KEY/_BASE_URL, etc. A bare vendor key in the CREATE payload
|
||||
# does NOT auto-derive byok: at create time no auth-env is present yet, so the
|
||||
# resolver derives platform_managed and the write is rejected. The resolver's
|
||||
# org rung was retired (internal#718 P2-B) — ResolveLLMBillingMode now ignores
|
||||
# the org default — so the ONLY way to opt a workspace into byok is an explicit
|
||||
# per-workspace override via PUT /admin/workspaces/:id/llm-billing-mode.
|
||||
#
|
||||
# Real evidence — staging job 295385 (main f1558b54), AFTER #2311/#2312 made
|
||||
# bare `MiniMax-M2.7` registry-valid: parent-create passed model validation but
|
||||
# FAILED with
|
||||
# {"error":"direct vendor key writes are blocked for platform-managed
|
||||
# workspaces; ... or set this workspace's billing mode to 'byok' via
|
||||
# /admin/workspaces/:id/llm-billing-mode","key":"MINIMAX_API_KEY"}
|
||||
# That 400 is INTENDED product behavior, not a product bug. The e2e must mirror
|
||||
# the real BYOK user flow: opt the workspace into byok FIRST, then write the key.
|
||||
#
|
||||
# Mechanism: per-workspace override (NOT org-default), because the org rung is
|
||||
# retired — an org-create billing field could not satisfy this gate even if
|
||||
# /cp/admin/orgs accepted one. So for any arm that ships strip-listed keys we:
|
||||
# 1. create the workspace WITHOUT those keys (create succeeds platform_managed),
|
||||
# 2. PUT billing-mode=byok on that workspace id (per-tenant admin token),
|
||||
# 3. write the deferred strip-listed keys (now allowed by the gate),
|
||||
# then continue. The #1994 byok-routing guard (8c) then sees a LEGITIMATELY
|
||||
# byok workspace (explicit override) and still validates real routing — NOT
|
||||
# masked.
|
||||
#
|
||||
# The PLATFORM path (E2E_LLM_PATH=platform) produces SECRETS_JSON='{}', so it
|
||||
# carries NO strip-listed key → CREATE_SECRETS_JSON stays '{}' and no opt-in
|
||||
# fires. It remains platform_managed (the moonshot/kimi NOT_CONFIGURED
|
||||
# regression guard) — deliberately untouched.
|
||||
#
|
||||
# Keep this strip-list BYTE-IN-SYNC with secrets.go platformManagedDirectLLMBypassKeys.
|
||||
BYOK_STRIP_KEYS="AI_GATEWAY_API_KEY ANTHROPIC_API_KEY ANTHROPIC_AUTH_TOKEN ARCEEAI_API_KEY CLAUDE_CODE_OAUTH_TOKEN CODEX_AUTH_JSON DASHSCOPE_API_KEY DEEPSEEK_API_KEY GEMINI_API_KEY GLM_API_KEY HERMES_CUSTOM_API_KEY HERMES_CUSTOM_BASE_URL HF_TOKEN KIMI_API_KEY KIMI_CN_API_KEY MINIMAX_API_KEY MINIMAX_CN_API_KEY NOUS_API_KEY OPENAI_API_KEY OPENAI_BASE_URL OPENROUTER_API_KEY XAI_API_KEY ZAI_API_KEY"
|
||||
# Split SECRETS_JSON into CREATE_SECRETS_JSON (gate-safe, written at create)
|
||||
# and DEFERRED_SECRETS_JSON (strip-listed keys, written AFTER byok opt-in).
|
||||
# Emit the two JSON blobs on SEPARATE LINES (not space-separated) — a value or
|
||||
# a json.dumps default separator contains spaces, which whitespace-`read` would
|
||||
# mangle. read -r line1 → CREATE, line2 → DEFERRED.
|
||||
{
|
||||
read -r CREATE_SECRETS_JSON
|
||||
read -r DEFERRED_SECRETS_JSON
|
||||
} < <(
|
||||
BYOK_STRIP_KEYS="$BYOK_STRIP_KEYS" E2E_WS_SECRETS="$SECRETS_JSON" python3 -c "
|
||||
import json, os
|
||||
strip = set(os.environ['BYOK_STRIP_KEYS'].split())
|
||||
d = json.loads(os.environ['E2E_WS_SECRETS'] or '{}')
|
||||
create = {k: v for k, v in d.items() if k not in strip}
|
||||
deferred = {k: v for k, v in d.items() if k in strip}
|
||||
print(json.dumps(create))
|
||||
print(json.dumps(deferred))
|
||||
"
|
||||
)
|
||||
# Defensive: if the split somehow produced empty (read failure), treat as
|
||||
# no-deferred so we never PUT byok on a workspace that has no vendor key.
|
||||
[ -n "$DEFERRED_SECRETS_JSON" ] || DEFERRED_SECRETS_JSON='{}'
|
||||
[ -n "$CREATE_SECRETS_JSON" ] || CREATE_SECRETS_JSON='{}'
|
||||
if [ "$DEFERRED_SECRETS_JSON" != "{}" ]; then
|
||||
log " BYOK opt-in required — deferring vendor key(s) until after billing-mode=byok"
|
||||
fi
|
||||
|
||||
# byok_opt_in_and_write_deferred <workspace_id>
|
||||
# For the byok arms (DEFERRED_SECRETS_JSON non-empty): PUT billing-mode=byok
|
||||
# on the workspace, then write each deferred strip-listed secret (now allowed
|
||||
# by the secret-write gate). No-op for the platform/no-key path. See the
|
||||
# BYOK-opt-in block above + secrets.go rejectPlatformManagedDirectLLMBypassForWorkspace.
|
||||
byok_opt_in_and_write_deferred() {
|
||||
local _id="$1"
|
||||
if [ "$DEFERRED_SECRETS_JSON" = "{}" ]; then
|
||||
return 0
|
||||
fi
|
||||
# Explicit byok opt-in (per-workspace override).
|
||||
local _bm_resp _bm_mode
|
||||
set +e
|
||||
_bm_resp=$(tenant_call PUT "/admin/workspaces/$_id/llm-billing-mode" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"mode":"byok"}' 2>/dev/null)
|
||||
local _bm_rc=$?
|
||||
set -e
|
||||
if [ "$_bm_rc" != "0" ]; then
|
||||
fail "byok opt-in: PUT /admin/workspaces/$_id/llm-billing-mode {mode:byok} failed (rc=$_bm_rc). Raw: $(printf '%s' "$_bm_resp" | sanitize_http_body)"
|
||||
fi
|
||||
_bm_mode=$(echo "$_bm_resp" | python3 -c "import json,sys; print(json.load(sys.stdin).get('resolved_mode',''))" 2>/dev/null || echo "")
|
||||
[ "$_bm_mode" = "byok" ] || fail "byok opt-in: workspace $_id resolved_mode='$_bm_mode' after PUT mode=byok (want byok). Raw: $(printf '%s' "$_bm_resp" | sanitize_http_body)"
|
||||
|
||||
# Write each deferred strip-listed secret one-per-call (the Set endpoint
|
||||
# takes {key,value}). The gate now passes because resolved=byok. Bodies are
|
||||
# built in Python (env-only) so secret values never hit a command line.
|
||||
local _keys _k _sec_body _sec_tmp _sec_code _sec_out
|
||||
_keys=$(echo "$DEFERRED_SECRETS_JSON" | python3 -c "import json,sys; print('\n'.join(json.load(sys.stdin).keys()))")
|
||||
while IFS= read -r _k; do
|
||||
[ -n "$_k" ] || continue
|
||||
_sec_body=$(BYOK_K="$_k" E2E_WS_DEFERRED="$DEFERRED_SECRETS_JSON" python3 -c "
|
||||
import json, os
|
||||
d = json.loads(os.environ['E2E_WS_DEFERRED'])
|
||||
print(json.dumps({'key': os.environ['BYOK_K'], 'value': d[os.environ['BYOK_K']]}))
|
||||
")
|
||||
_sec_tmp=$(mktemp -t synth_byok_secret.XXXXXX)
|
||||
_sec_code=$(printf '%s' "$_sec_body" | tenant_call POST "/workspaces/$_id/secrets" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d @- \
|
||||
-o "$_sec_tmp" -w '%{http_code}' 2>/dev/null || echo "000")
|
||||
if [ "$_sec_code" != "200" ] && [ "$_sec_code" != "201" ] && [ "$_sec_code" != "204" ]; then
|
||||
_sec_out=$(cat "$_sec_tmp" 2>/dev/null | sanitize_http_body)
|
||||
rm -f "$_sec_tmp"
|
||||
fail "byok vendor-key write: POST /workspaces/$_id/secrets ($_k) returned $_sec_code: $_sec_out — secret-write gate should allow it after the byok opt-in (secrets.go rejectPlatformManagedDirectLLMBypassForWorkspace)."
|
||||
fi
|
||||
rm -f "$_sec_tmp"
|
||||
done <<< "$_keys"
|
||||
ok " $_id byok opt-in + deferred vendor key(s) written"
|
||||
}
|
||||
|
||||
# ─── runtime → provision-selector resolution ────────────────────────────
|
||||
# Most runtimes are selected directly by the `runtime` field. seo-agent is
|
||||
# the exception: it is NOT a registry runtime (absent from manifest.json +
|
||||
# runtime_registry.go knownRuntimes) — it is a claude-code-adapter template
|
||||
# VARIANT selected by the `template` field. The ws-server Create handler reads
|
||||
# the template's config.yaml, which declares `runtime: claude-code`, and
|
||||
# resolves the concrete runtime from there (workspace.go:290-336). So for
|
||||
# seo-agent we send template="seo-agent" and OMIT runtime, letting the
|
||||
# template resolve it — sending an explicit runtime="seo-agent" would
|
||||
# RUNTIME_UNSUPPORTED-422 at workspace.go:374-384 because it is not in
|
||||
# knownRuntimes. PROVISION_TEMPLATE is "" for every real registry runtime.
|
||||
PROVISION_TEMPLATE=""
|
||||
case "$RUNTIME" in
|
||||
seo-agent) PROVISION_TEMPLATE="seo-agent" ;;
|
||||
esac
|
||||
|
||||
# Build the create payload in Python so the optional `template`/`runtime`
|
||||
# fields are emitted conditionally and the secrets blob is embedded without
|
||||
# shell-escaping hazards. Args: name, [parent_id|""].
|
||||
build_create_payload() {
|
||||
local name="$1" parent_id="${2:-}"
|
||||
E2E_WS_NAME="$name" \
|
||||
E2E_WS_PARENT_ID="$parent_id" \
|
||||
E2E_WS_RUNTIME="$RUNTIME" \
|
||||
E2E_WS_TEMPLATE="$PROVISION_TEMPLATE" \
|
||||
E2E_WS_MODEL="$MODEL_SLUG" \
|
||||
E2E_WS_SECRETS="$CREATE_SECRETS_JSON" \
|
||||
python3 -c "
|
||||
import json, os
|
||||
secrets = json.loads(os.environ['E2E_WS_SECRETS'] or '{}')
|
||||
payload = {
|
||||
'name': os.environ['E2E_WS_NAME'],
|
||||
'tier': 2,
|
||||
'model': os.environ['E2E_WS_MODEL'],
|
||||
'secrets': secrets,
|
||||
}
|
||||
tmpl = os.environ.get('E2E_WS_TEMPLATE', '')
|
||||
if tmpl:
|
||||
# Template-selected variant (seo-agent): the template's config.yaml
|
||||
# resolves runtime=claude-code server-side. Do NOT also send an explicit
|
||||
# runtime — seo-agent is not a registry runtime and would 422.
|
||||
payload['template'] = tmpl
|
||||
else:
|
||||
payload['runtime'] = os.environ['E2E_WS_RUNTIME']
|
||||
pid = os.environ.get('E2E_WS_PARENT_ID', '')
|
||||
if pid:
|
||||
payload['parent_id'] = pid
|
||||
print(json.dumps(payload))
|
||||
"
|
||||
}
|
||||
|
||||
if [ -n "$PROVISION_TEMPLATE" ]; then
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME via template=$PROVISION_TEMPLATE → claude-code adapter)..."
|
||||
else
|
||||
log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
|
||||
fi
|
||||
# tenant_call inherits CURL_COMMON's --fail-with-body, so a non-2xx create
|
||||
# (e.g. the 422 RUNTIME_UNSUPPORTED below) makes curl exit 22. Capturing it
|
||||
# bare as $(tenant_call ...) propagates that 22 through the command
|
||||
# substitution and, under `set -euo pipefail`, ABORTS the whole script right
|
||||
# here — before the `fail "... Response: ..."` handler below can print the
|
||||
# body. The result was an opaque `curl: (22) ... error: 422` + teardown with
|
||||
# no body (run 220702, main f78fef4c, step "5/11 Provisioning parent
|
||||
# workspace"). set +e / `|| true` keeps the 22 from tripping `set -e`; curl
|
||||
# still WROTE the body to stdout (that's what --fail-with-body does), so
|
||||
# PARENT_RESP holds the 422 JSON and the id-check below surfaces WHY.
|
||||
set +e
|
||||
PARENT_RESP=$(tenant_call POST /workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"secrets\":$SECRETS_JSON}")
|
||||
PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
|
||||
-d "$(build_create_payload 'E2E Parent')")
|
||||
set -e
|
||||
# Surface the workspace-create error CLEARLY instead of dying on a Python
|
||||
# KeyError when the response has no 'id'. The load-bearing cases this names:
|
||||
# - google-adk: RUNTIME_UNSUPPORTED 422 if google-adk is absent from the
|
||||
# deployed manifest.json's workspace_templates (the Create-handler
|
||||
# allowlist is manifest-derived — runtime_registry.go). google-adk is in
|
||||
# providers.yaml + provisioner/registry.go + registry_gen but NOT (yet) in
|
||||
# manifest.json, so it cannot be provisioned by `runtime` until the
|
||||
# manifest gains it. Flagged for the CTO — this arm REDS until then.
|
||||
# - seo-agent: an "invalid template" 400 if the seo-agent template isn't
|
||||
# present in the tenant's configs/cache dir (template-cache refresh gap).
|
||||
PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
if [ -z "$PARENT_ID" ]; then
|
||||
fail "Parent workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-<none>}). Response: $(printf '%s' "$PARENT_RESP" | sanitize_http_body)"
|
||||
fi
|
||||
log " PARENT_ID=$PARENT_ID"
|
||||
# BYOK arms only: opt the workspace into byok, then write the deferred vendor
|
||||
# key(s). No-op for the platform/no-key path. (See the BYOK opt-in block.)
|
||||
byok_opt_in_and_write_deferred "$PARENT_ID"
|
||||
|
||||
# ─── 6. Provision child (full mode only) ────────────────────────────────
|
||||
CHILD_ID=""
|
||||
if [ "$MODE" = "full" ]; then
|
||||
log "6/11 Provisioning child workspace..."
|
||||
# Same --fail-with-body / set -e abort guard as the parent create above:
|
||||
# let a non-2xx return the body so the id-check below surfaces it instead
|
||||
# of the script dying opaquely on curl exit 22.
|
||||
set +e
|
||||
CHILD_RESP=$(tenant_call POST /workspaces \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"$MODEL_SLUG\",\"parent_id\":\"$PARENT_ID\",\"secrets\":$SECRETS_JSON}")
|
||||
CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
|
||||
-d "$(build_create_payload 'E2E Child' "$PARENT_ID")")
|
||||
set -e
|
||||
CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('id',''))" 2>/dev/null || echo "")
|
||||
if [ -z "$CHILD_ID" ]; then
|
||||
fail "Child workspace create returned no 'id' (runtime=$RUNTIME, template=${PROVISION_TEMPLATE:-<none>}). Response: $(printf '%s' "$CHILD_RESP" | sanitize_http_body)"
|
||||
fi
|
||||
log " CHILD_ID=$CHILD_ID"
|
||||
# Same BYOK opt-in as the parent — the child also carries the vendor key(s).
|
||||
byok_opt_in_and_write_deferred "$CHILD_ID"
|
||||
else
|
||||
log "6/11 Canary mode — skipping child workspace"
|
||||
fi
|
||||
@@ -570,6 +902,7 @@ fi
|
||||
WS_TO_CHECK=("$PARENT_ID")
|
||||
[ -n "$CHILD_ID" ] && WS_TO_CHECK+=("$CHILD_ID")
|
||||
wait_workspaces_online_routable "7/11 Waiting for workspace(s) to reach status=online (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min — hermes cold boot)..." "${WS_TO_CHECK[@]}"
|
||||
live_milestone workspace_online
|
||||
|
||||
# ─── 7a. Real chat image upload/download round-trip ───────────────────
|
||||
# This deliberately uses the production workflow: tenant admin/session auth
|
||||
@@ -671,6 +1004,12 @@ for wid in "${WS_TO_CHECK[@]}"; do
|
||||
else
|
||||
DIAG_FAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('first_failure','unknown'))" 2>/dev/null || echo "unknown")
|
||||
DIAG_DETAIL=$(echo "$DIAG_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); s=[x for x in d.get('steps',[]) if not x.get('ok')]; step=s[0] if s else {}; print(' — '.join(x for x in [step.get('error',''), step.get('detail','')] if x))" 2>/dev/null || echo "")
|
||||
# #767: always emit the full diagnose JSON so operators see every step's
|
||||
# Detail field even when the Python extraction above fails or the shape
|
||||
# drifts. The burst is bracketed like steps 2 and 4 for grep-friendly CI.
|
||||
log "── DIAGNOSTIC BURST (step 7b — terminal diagnose for $wid) ──"
|
||||
echo "$DIAG_JSON" | python3 -m json.tool 2>/dev/null || echo "$DIAG_JSON"
|
||||
log "── END DIAGNOSTIC ──"
|
||||
fail "Workspace $wid terminal diagnose failed at step '$DIAG_FAIL': $DIAG_DETAIL — check tenant SG has tcp/22 from the configured EIC endpoint SG, MOLECULE_EIC_ENDPOINT_SG_ID is set in Railway, and EIC endpoint health"
|
||||
fi
|
||||
done
|
||||
@@ -886,7 +1225,7 @@ fi
|
||||
# identical on main's scheduled synthetic E2E and on PRs (so it is an
|
||||
# environmental backend regression, never PR-introduced).
|
||||
if echo "$AGENT_TEXT" | grep -qiF "message contained no text content"; then
|
||||
fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code default is minimax:MiniMax-M2.7 since #2263; was bare MiniMax-M2 #2710) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
|
||||
fail "A2A — EMPTY COMPLETION (backend regression, NOT a platform/workspace-server bug). The configured model (MODEL_SLUG=${MODEL_SLUG:-?}) returned a 2xx completion with no text part; the runtime surfaced 'message contained no text content.'. Operator action: check the staging LLM backend / proxy for the canary model (the claude-code MiniMax-BYOK default is the BARE registered id MiniMax-M2.7 — the colon minimax:MiniMax-M2.7 is UNREGISTERED on claude-code, internal#718) — empty assistant turns, not an auth/quota/boot fault. Raw: $AGENT_TEXT"
|
||||
fi
|
||||
# Generic catch-all — falls through if none of the known regressions hit.
|
||||
if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
|
||||
@@ -981,6 +1320,11 @@ except Exception:
|
||||
" 2>/dev/null || echo "")
|
||||
# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
|
||||
a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
|
||||
# Real, deterministic LLM round-trip proven — the load-bearing milestone for
|
||||
# the fail-closed-on-skip guard. Stamped AFTER a2a_assert_real_completion (not
|
||||
# after the looser PONG check) so the milestone means a verified completion,
|
||||
# not just a 2xx-with-text.
|
||||
live_milestone a2a_roundtrip
|
||||
|
||||
# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
|
||||
# The parent was provisioned with the customer's OWN vendor key
|
||||
@@ -1096,28 +1440,92 @@ print(json.dumps({
|
||||
'scope': 'LOCAL'
|
||||
}))
|
||||
")
|
||||
tenant_call POST "/workspaces/$PARENT_ID/memories" \
|
||||
# SURFACE THE BODY (mirrors the step-9b / A2A pattern): the previous
|
||||
# `>/dev/null || fail "memory POST failed"` discarded the response body
|
||||
# that --fail-with-body deliberately preserves on a non-2xx, so a 500 from
|
||||
# the workspace-server HMA path (e.g. "failed to store memory" /
|
||||
# "failed to resolve writable namespaces", or a 503 "memory plugin is not
|
||||
# configured") was reported as a bare "memory POST failed" — opaque, the
|
||||
# same #2310-class blind spot. Route http_code into -w and body into -o,
|
||||
# then fail with the sanitized status+body so the mechanism is visible.
|
||||
MEM_POST_TMP=$(e2e_tmp /tmp/e2e_mem_post.XXXXXX)
|
||||
set +e
|
||||
MEM_POST_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/memories" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$MEM_PAYLOAD" >/dev/null || fail "memory POST failed"
|
||||
MEM_LIST=$(tenant_call GET "/workspaces/$PARENT_ID/memories?scope=LOCAL")
|
||||
-d "$MEM_PAYLOAD" \
|
||||
-o "$MEM_POST_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
MEM_POST_RC=$?
|
||||
set -e
|
||||
MEM_POST_CODE=${MEM_POST_CODE:-000}
|
||||
if [ "$MEM_POST_RC" != "0" ] || [ "$MEM_POST_CODE" -lt 200 ] || [ "$MEM_POST_CODE" -ge 300 ]; then
|
||||
MEM_POST_BODY=$(head -c 400 "$MEM_POST_TMP" 2>/dev/null | sanitize_http_body)
|
||||
fail "memory POST /workspaces/$PARENT_ID/memories failed (curl_rc=$MEM_POST_RC, http=$MEM_POST_CODE): ${MEM_POST_BODY:-<empty body>}"
|
||||
fi
|
||||
|
||||
# Same fail-closed surfacing for the read-back: a 5xx / network error here
|
||||
# previously slipped through the bare `$(tenant_call ...)` capture and only
|
||||
# showed up as "not readable" with an empty list.
|
||||
MEM_LIST_TMP=$(e2e_tmp /tmp/e2e_mem_list.XXXXXX)
|
||||
set +e
|
||||
MEM_LIST_CODE=$(tenant_call GET "/workspaces/$PARENT_ID/memories?scope=LOCAL" \
|
||||
-o "$MEM_LIST_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
MEM_LIST_RC=$?
|
||||
set -e
|
||||
MEM_LIST_CODE=${MEM_LIST_CODE:-000}
|
||||
MEM_LIST=$(cat "$MEM_LIST_TMP" 2>/dev/null || echo "")
|
||||
if [ "$MEM_LIST_RC" != "0" ] || [ "$MEM_LIST_CODE" -lt 200 ] || [ "$MEM_LIST_CODE" -ge 300 ]; then
|
||||
fail "memory GET /workspaces/$PARENT_ID/memories failed (curl_rc=$MEM_LIST_RC, http=$MEM_LIST_CODE): $(printf '%s' "$MEM_LIST" | sanitize_http_body | head -c 400)"
|
||||
fi
|
||||
if ! echo "$MEM_LIST" | grep -q "run $SLUG"; then
|
||||
fail "HMA memory not readable after write. List: ${MEM_LIST:0:200}"
|
||||
fail "HMA memory not readable after write (http=$MEM_LIST_CODE). List: $(printf '%s' "$MEM_LIST" | sanitize_http_body | head -c 200)"
|
||||
fi
|
||||
ok "HMA memory write+read roundtripped"
|
||||
|
||||
log "9b. Peer discovery + activity log smoke..."
|
||||
# FAIL-CLOSED: assert a real 2xx, not merely "not 404". The previous
|
||||
# `[ "$PEERS_CODE" = "404" ] && fail` only caught the route-missing case —
|
||||
# a 5xx, 000 (connection failure), or empty capture ALL fell through to
|
||||
# "reachable" (false-green: a broken-but-present route read as healthy).
|
||||
# Mechanism: route the http_code into its own tempfile (no stderr capture,
|
||||
# which the old `2>&1 | head -1` could pollute with a curl error line) and
|
||||
# require 2xx explicitly.
|
||||
PEERS_TMP=$(e2e_tmp /tmp/e2e_peers.XXXXXX)
|
||||
set +e
|
||||
tenant_call GET "/registry/$PARENT_ID/peers" -o /dev/null -w "%{http_code}\n" 2>&1 | head -1 > /tmp/peers_code.txt
|
||||
PEERS_CODE=$(tenant_call GET "/registry/$PARENT_ID/peers" \
|
||||
-o "$PEERS_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
PEERS_RC=$?
|
||||
set -e
|
||||
PEERS_CODE=$(cat /tmp/peers_code.txt)
|
||||
[ "$PEERS_CODE" = "404" ] && fail "Peers endpoint missing (404) — route regression"
|
||||
PEERS_CODE=${PEERS_CODE:-000}
|
||||
if [ "$PEERS_CODE" = "404" ]; then
|
||||
fail "Peers endpoint missing (404) — route regression. /registry/$PARENT_ID/peers"
|
||||
fi
|
||||
if [ "$PEERS_RC" != "0" ] || [ "$PEERS_CODE" -lt 200 ] || [ "$PEERS_CODE" -ge 300 ]; then
|
||||
fail "Peers endpoint unhealthy (curl_rc=$PEERS_RC, http=$PEERS_CODE) — not a clean 2xx, so 'reachable' would be a false-green. Body: $(head -c 200 "$PEERS_TMP" 2>/dev/null | sanitize_http_body)"
|
||||
fi
|
||||
ok "Peers endpoint reachable (HTTP $PEERS_CODE)"
|
||||
|
||||
ACTIVITY=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" 2>/dev/null || echo '[]')
|
||||
ACTIVITY_COUNT=$(echo "$ACTIVITY" | python3 -c "import json,sys
|
||||
d=json.load(sys.stdin)
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
|
||||
log " Activity events observed: $ACTIVITY_COUNT"
|
||||
# FAIL-CLOSED: the activity-log read was `|| echo '[]'` then the count was
|
||||
# only LOGGED, never asserted — a 5xx / network failure silently became an
|
||||
# empty list and the step exited 0 having validated nothing (false-green:
|
||||
# "validated nothing" class). Assert the endpoint returns a 2xx and a
|
||||
# parseable activity shape. We do NOT assert count>0 (the parent may
|
||||
# legitimately have 0 events this early — that's a real, valid state), but
|
||||
# we DO require the call to have actually succeeded and returned valid JSON.
|
||||
ACTIVITY_TMP=$(e2e_tmp /tmp/e2e_activity.XXXXXX)
|
||||
set +e
|
||||
ACTIVITY_CODE=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" \
|
||||
-o "$ACTIVITY_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
ACTIVITY_RC=$?
|
||||
set -e
|
||||
ACTIVITY_CODE=${ACTIVITY_CODE:-000}
|
||||
if [ "$ACTIVITY_RC" != "0" ] || [ "$ACTIVITY_CODE" -lt 200 ] || [ "$ACTIVITY_CODE" -ge 300 ]; then
|
||||
fail "Activity-log endpoint unhealthy (curl_rc=$ACTIVITY_RC, http=$ACTIVITY_CODE) — was previously swallowed by '|| echo []' and reported as 0 events (false-green). Body: $(head -c 200 "$ACTIVITY_TMP" 2>/dev/null | sanitize_http_body)"
|
||||
fi
|
||||
ACTIVITY_COUNT=$(python3 -c "import json,sys
|
||||
d=json.load(open(sys.argv[1]))
|
||||
print(len(d if isinstance(d, list) else d.get('events', [])))" "$ACTIVITY_TMP" 2>/dev/null) \
|
||||
|| fail "Activity-log returned HTTP $ACTIVITY_CODE but body was not parseable JSON (events array / {events:[...]}). Body: $(head -c 200 "$ACTIVITY_TMP" 2>/dev/null | sanitize_http_body)"
|
||||
log " Activity events observed: $ACTIVITY_COUNT (endpoint 2xx + parseable ✓)"
|
||||
|
||||
# ─── 9c. Workspace KV memory Edit round-trip ─────────────────────────
|
||||
# Pins the Edit affordance added to the canvas Memory tab. The UI calls
|
||||
@@ -1268,14 +1676,173 @@ except Exception:
|
||||
[ -z "$DELEG_TEXT" ] && fail "Delegation returned no text. Raw: ${DELEG_RESP:0:200}"
|
||||
ok "Delegation proxy works (child responded: \"${DELEG_TEXT:0:60}\")"
|
||||
|
||||
CHILD_ACT=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" 2>/dev/null || echo '[]')
|
||||
if echo "$CHILD_ACT" | grep -q "$PARENT_ID"; then
|
||||
# FAIL-CLOSED via bounded readiness-POLL (was soft-logged false-green).
|
||||
# The activity pipeline is async, so an immediate single read can miss the
|
||||
# parent reference — but "did not reference parent" was previously just
|
||||
# LOGGED and the step passed regardless, so a genuinely broken provenance
|
||||
# pipeline (parent never recorded as source) read as success. Mechanism:
|
||||
# poll the child activity log for the parent id for a bounded window
|
||||
# (E2E_CHILD_ACTIVITY_TIMEOUT_SECS, default 60s) — this is the real
|
||||
# readiness signal (provenance row materialised), not a fixed sleep — and
|
||||
# hard-fail with a named mechanism if it never appears.
|
||||
CHILD_ACT_DEADLINE=$(( $(date +%s) + ${E2E_CHILD_ACTIVITY_TIMEOUT_SECS:-60} ))
|
||||
CHILD_ACT_SEEN=0
|
||||
CHILD_ACT_LASTCODE="000"
|
||||
while true; do
|
||||
CHILD_ACT_TMP=$(e2e_tmp /tmp/e2e_child_act.XXXXXX)
|
||||
set +e
|
||||
CHILD_ACT_CODE=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" \
|
||||
-o "$CHILD_ACT_TMP" -w "%{http_code}" 2>/dev/null)
|
||||
set -e
|
||||
CHILD_ACT_LASTCODE=${CHILD_ACT_CODE:-000}
|
||||
if grep -q "$PARENT_ID" "$CHILD_ACT_TMP" 2>/dev/null; then
|
||||
CHILD_ACT_SEEN=1
|
||||
break
|
||||
fi
|
||||
[ "$(date +%s)" -ge "$CHILD_ACT_DEADLINE" ] && break
|
||||
sleep 5
|
||||
done
|
||||
if [ "$CHILD_ACT_SEEN" = "1" ]; then
|
||||
ok "Child activity log records parent as source"
|
||||
else
|
||||
log "Child activity log did not reference parent (pipeline may be async)"
|
||||
fail "Child activity log never referenced parent $PARENT_ID within ${E2E_CHILD_ACTIVITY_TIMEOUT_SECS:-60}s (last http=$CHILD_ACT_LASTCODE) — delegation-provenance pipeline regression (parent not recorded as source). Previously soft-logged → false-green."
|
||||
fi
|
||||
fi
|
||||
|
||||
# ─── 10b. Pause/Resume + Hibernate/Resume lifecycle transitions ─────────
|
||||
# Exercise the REAL workspace lifecycle state machine on the provisioned
|
||||
# parent — the transitions that previously had only handler unit tests
|
||||
# (handlers_additional_test.go / hibernation_test.go) and NO real-infra
|
||||
# coverage. Each transition is asserted against the live DB-backed status the
|
||||
# GET /workspaces/:id endpoint returns, so a regression in the Pause/Resume/
|
||||
# Hibernate handlers (workspace_restart.go) or their CP stop/re-provision
|
||||
# wiring fails the gate instead of silently leaking an EC2 / wedging a tenant.
|
||||
#
|
||||
# Contract (workspace_restart.go):
|
||||
# POST /pause online → 'paused' (container stopped, url cleared) {"status":"paused"}
|
||||
# POST /resume paused → 'provisioning' → … → 'online' (re-provision) {"status":"provisioning"}
|
||||
# POST /hibernate online → 'hibernating' → 'hibernated' (container stopped) {"status":"hibernated"}
|
||||
# auto-wake next A2A message/send on a hibernated ws → online
|
||||
#
|
||||
# Gated to full MODE (smoke has no parent-stability budget) + E2E_LIFECYCLE.
|
||||
# Runs LAST (after all read-only A2A/memory/peer checks) so the pause/stop
|
||||
# cycles don't disturb the earlier assertions. Skips are LOUD (logged), and
|
||||
# any broken transition hard-fails — never a silent pass.
|
||||
if [ "$MODE" = "full" ] && [ "${E2E_LIFECYCLE:-auto}" != "off" ]; then
|
||||
log "10b/11 Lifecycle transitions: pause→resume→online, hibernate→resume(wake) on parent $PARENT_ID..."
|
||||
|
||||
lifecycle_status() { # echoes the live workspace status
|
||||
tenant_call GET "/workspaces/$PARENT_ID" 2>/dev/null \
|
||||
| python3 -c "import json,sys; print(json.load(sys.stdin).get('status') or '')" 2>/dev/null || echo ""
|
||||
}
|
||||
# Bounded readiness-poll for a target status — same fail-closed shape as
|
||||
# wait_workspaces_online_routable, but for an arbitrary terminal status.
|
||||
wait_status() { # $1=target $2=timeout_secs $3=label
|
||||
local target="$1" timeout="$2" label="$3"
|
||||
local deadline cur last=""
|
||||
deadline=$(( $(date +%s) + timeout ))
|
||||
while true; do
|
||||
cur=$(lifecycle_status)
|
||||
if [ "$cur" != "$last" ]; then log " parent status → ${cur:-<empty>}"; last="$cur"; fi
|
||||
[ "$cur" = "$target" ] && return 0
|
||||
if [ "$(date +%s)" -gt "$deadline" ]; then
|
||||
log " [lifecycle] $label never reached '$target' within ${timeout}s (last='$cur')"
|
||||
return 1
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
}
|
||||
|
||||
# ── pause → paused ──
|
||||
PAUSE_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/pause" 2>/dev/null || echo '{}')
|
||||
PAUSE_STATUS=$(echo "$PAUSE_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
[ "$PAUSE_STATUS" = "paused" ] || fail "Pause: POST /pause returned status='$PAUSE_STATUS' (expected 'paused'). Body: ${PAUSE_RESP:0:200}"
|
||||
# Poll the DB-backed status — the response body could lie; the GET proves the row.
|
||||
wait_status "paused" 120 "pause" || fail "Pause: workspace $PARENT_ID never settled at status=paused (DB row) — Pause handler / CP stop regression (workspace_restart.go Pause)."
|
||||
ok " pause → paused (DB-verified)"
|
||||
|
||||
# ── resume → provisioning → online ──
|
||||
RESUME_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/resume" 2>/dev/null || echo '{}')
|
||||
RESUME_STATUS=$(echo "$RESUME_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
[ "$RESUME_STATUS" = "provisioning" ] || fail "Resume: POST /resume returned status='$RESUME_STATUS' (expected 'provisioning'). Body: ${RESUME_RESP:0:200}"
|
||||
# Resume re-provisions from the preserved config volume; reuse the same
|
||||
# online+routable readiness boundary the initial boot used (no fresh EC2
|
||||
# cold-start, but CP re-provision + heartbeat recovery can still take minutes).
|
||||
wait_workspaces_online_routable " Waiting for parent to return online after resume (up to $((WORKSPACE_ONLINE_TIMEOUT_SECS/60)) min)..." "$PARENT_ID"
|
||||
ok " resume → provisioning → online (DB-verified)"
|
||||
|
||||
# ── hibernate → hibernated ──
|
||||
HIB_RESP=$(tenant_call POST "/workspaces/$PARENT_ID/hibernate?force=true" 2>/dev/null || echo '{}')
|
||||
HIB_STATUS=$(echo "$HIB_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
|
||||
[ "$HIB_STATUS" = "hibernated" ] || fail "Hibernate: POST /hibernate?force=true returned status='$HIB_STATUS' (expected 'hibernated'). Body: ${HIB_RESP:0:200}"
|
||||
# The handler runs the claim→stop→'hibernated' sequence; poll the DB row to
|
||||
# confirm it landed on 'hibernated' (not stuck mid-'hibernating').
|
||||
wait_status "hibernated" 120 "hibernate" || fail "Hibernate: workspace $PARENT_ID never settled at status=hibernated (DB row) — Hibernate handler / CP stop regression (workspace_restart.go HibernateWorkspace)."
|
||||
ok " hibernate → hibernated (DB-verified)"
|
||||
|
||||
# ── resume-from-hibernate via auto-wake on next A2A ──
|
||||
# A hibernated workspace auto-wakes on the next incoming A2A message/send
|
||||
# (no explicit /resume — Resume only handles status=paused). Send a wake
|
||||
# A2A and assert the workspace returns to online. We accept transient cold
|
||||
# 5xx during wake (same edge class the PONG probe tolerates) and poll the
|
||||
# status to the online boundary rather than asserting on the single A2A code.
|
||||
log " Hibernate auto-wake: sending A2A to wake hibernated parent..."
|
||||
WAKE_PAYLOAD=$(python3 -c "
|
||||
import json, uuid
|
||||
print(json.dumps({
|
||||
'jsonrpc': '2.0',
|
||||
'method': 'message/send',
|
||||
'id': 'e2e-wake-1',
|
||||
'params': {
|
||||
'message': {
|
||||
'role': 'user',
|
||||
'messageId': f'e2e-wake-{uuid.uuid4().hex[:8]}',
|
||||
'parts': [{'kind': 'text', 'text': 'This is the platform lifecycle smoke test waking a hibernated workspace. No tools or memory are needed — please respond with exactly the single token: WOKE'}]
|
||||
}
|
||||
}
|
||||
}))
|
||||
")
|
||||
WAKE_TMP=$(mktemp -t wake_a2a.XXXXXX)
|
||||
for WAKE_ATTEMPT in $(seq 1 12); do
|
||||
: >"$WAKE_TMP"
|
||||
set +e
|
||||
WAKE_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
|
||||
--max-time 90 \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$WAKE_PAYLOAD" \
|
||||
-o "$WAKE_TMP" -w '%{http_code}' 2>/dev/null)
|
||||
WAKE_RC=$?
|
||||
set -e
|
||||
WAKE_CODE=${WAKE_CODE:-000}
|
||||
if [ "$WAKE_RC" = "0" ] && [ "$WAKE_CODE" -ge 200 ] && [ "$WAKE_CODE" -lt 300 ]; then
|
||||
break
|
||||
fi
|
||||
WAKE_SAFE_BODY=$(cat "$WAKE_TMP" 2>/dev/null | sanitize_http_body)
|
||||
# Wake legitimately returns transient 5xx while the container restarts —
|
||||
# retry that class only (bounded), never a 4xx.
|
||||
if echo "$WAKE_CODE" | grep -Eq '^(502|503|504)$' && [ "$WAKE_ATTEMPT" -lt 12 ]; then
|
||||
log " wake A2A cold/restart attempt $WAKE_ATTEMPT/12 returned $WAKE_CODE: ${WAKE_SAFE_BODY:0:120}"
|
||||
sleep 15
|
||||
continue
|
||||
fi
|
||||
break
|
||||
done
|
||||
rm -f "$WAKE_TMP"
|
||||
# The auto-wake contract is the STATUS transition (hibernated → online), not
|
||||
# the A2A body content — assert the live DB row, the real readiness signal.
|
||||
wait_status "online" "$WORKSPACE_ONLINE_TIMEOUT_SECS" "hibernate-wake" \
|
||||
|| fail "Hibernate auto-wake: parent $PARENT_ID never returned to status=online after a wake A2A (last A2A http=$WAKE_CODE) — auto-wake-on-message regression (a hibernated ws must re-provision on the next A2A)."
|
||||
ok " hibernate → online via auto-wake A2A (DB-verified)"
|
||||
ok "Lifecycle transitions passed: pause→resume→online + hibernate→wake→online"
|
||||
else
|
||||
log "10b/11 Lifecycle transitions skipped (MODE=$MODE, E2E_LIFECYCLE=${E2E_LIFECYCLE:-auto}) — pause/resume/hibernate only run in full mode with E2E_LIFECYCLE!=off."
|
||||
fi
|
||||
|
||||
# ─── 11. Teardown runs via trap ────────────────────────────────────────
|
||||
# Fail-closed-on-skip: before declaring PASS, assert (when CI demanded a live
|
||||
# run) that every load-bearing lifecycle milestone actually fired. A run that
|
||||
# reaches here without provision→online→A2A having truly happened exits 5
|
||||
# instead of reporting green. Teardown still runs (EXIT trap) on that exit.
|
||||
require_live_or_die
|
||||
log "11/11 All checks passed. Teardown runs via EXIT trap."
|
||||
ok "═══ STAGING $MODE-SAAS E2E PASSED ═══"
|
||||
|
||||
@@ -203,6 +203,60 @@ def test_f1_job_missing_from_sentinel_needs(drift_module, tmp_path, monkeypatch)
|
||||
assert any("F1 —" in f and "test" in f for f in findings), findings
|
||||
|
||||
|
||||
def test_detect_drift_403_fails_closed(drift_module, tmp_path, monkeypatch):
|
||||
"""AUTH FAILURE on branch_protections (HTTP 401/403) → RAISE (fail
|
||||
closed). The token can't read BP, so drift is UNVERIFIABLE; greening
|
||||
the hourly cron here would let jobs↔protection drift go silently
|
||||
undetected — exactly the regression class this sentinel exists to
|
||||
catch. fix/core-ci-fail-closed.
|
||||
"""
|
||||
ci = _write_ci_yaml(
|
||||
tmp_path,
|
||||
jobs={"build": {"runs-on": "ubuntu-latest"}},
|
||||
sentinel_needs=["build"],
|
||||
)
|
||||
audit = _write_audit_yaml(tmp_path, ["ci / build (pull_request)"])
|
||||
_patch_paths(drift_module, monkeypatch, ci, audit)
|
||||
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/main"): (
|
||||
drift_module.ApiError(
|
||||
"GET /repos/owner/repo/branch_protections/main → HTTP 403: forbidden"
|
||||
)
|
||||
),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
with pytest.raises(drift_module.ApiError):
|
||||
drift_module.detect_drift("main")
|
||||
|
||||
|
||||
def test_detect_drift_404_skips_branch(drift_module, tmp_path, monkeypatch):
|
||||
"""Authenticated 404 (branch genuinely has no protection, e.g. staging
|
||||
pre-rollout) → tolerated skip: return ([], debug) with
|
||||
protection_contexts_skipped True. NOT a fail-open (real read of an
|
||||
absent resource with a valid token)."""
|
||||
ci = _write_ci_yaml(
|
||||
tmp_path,
|
||||
jobs={"build": {"runs-on": "ubuntu-latest"}},
|
||||
sentinel_needs=["build"],
|
||||
)
|
||||
audit = _write_audit_yaml(tmp_path, ["ci / build (pull_request)"])
|
||||
_patch_paths(drift_module, monkeypatch, ci, audit)
|
||||
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/staging"): (
|
||||
drift_module.ApiError(
|
||||
"GET /repos/owner/repo/branch_protections/staging → HTTP 404: not found"
|
||||
)
|
||||
),
|
||||
})
|
||||
monkeypatch.setattr(drift_module, "api", stub)
|
||||
findings, debug = drift_module.detect_drift("staging")
|
||||
assert findings == []
|
||||
assert debug.get("protection_contexts_skipped") is True
|
||||
assert debug.get("protection_http_status") == 404
|
||||
|
||||
|
||||
def test_f1b_sentinel_needs_typo(drift_module, tmp_path, monkeypatch):
|
||||
"""F1b: sentinel.needs lists a job not present in ci.yml (typo).
|
||||
|
||||
|
||||
@@ -34,9 +34,12 @@ Test classes (per `feedback_branch_count_before_approving`):
|
||||
together, not short-circuited.
|
||||
- test_bp_empty_lints_nothing — BP has no contexts.
|
||||
Exit 0 cleanly.
|
||||
- test_api_403_skips_gracefully — branch_protections endpoint
|
||||
403s (token-scope). Exit 0 with ::error::, do NOT red-X.
|
||||
- test_api_404_skips_gracefully — branch has no protection.
|
||||
- test_api_403_fails_closed — branch_protections endpoint
|
||||
401/403s (auth failure). FAIL CLOSED (exit 2) with ::error::.
|
||||
- test_api_transient_fails_closed — transient/unexpected API
|
||||
error. FAIL CLOSED (exit 2).
|
||||
- test_api_404_skips_gracefully — branch has no protection
|
||||
(authenticated absent resource). Tolerated skip (exit 0 + warning).
|
||||
Exit 0 cleanly.
|
||||
- test_context_event_match_required — BP context says `(push)` and
|
||||
workflow only emits on `pull_request`. That's NOT a match — the
|
||||
@@ -247,9 +250,10 @@ def test_bp_empty_lints_nothing(envset, monkeypatch, capsys):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API 403 — graceful-degrade.
|
||||
# API 403 — AUTH FAILURE → FAIL CLOSED (exit 2). This is a HARD gate on a
|
||||
# protected context; a token that can't read BP must NOT green the lint.
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_api_403_skips_gracefully(envset, monkeypatch, capsys):
|
||||
def test_api_403_fails_closed(envset, monkeypatch, capsys):
|
||||
_write_wf(
|
||||
envset,
|
||||
"ci.yml",
|
||||
@@ -259,13 +263,30 @@ def test_api_403_skips_gracefully(envset, monkeypatch, capsys):
|
||||
m = _import_lint()
|
||||
_stub_api(monkeypatch, m, ("forbidden", None))
|
||||
rc = m.run()
|
||||
assert rc == 0
|
||||
assert rc == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "403" in err or "scope" in err.lower() or "token" in err.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API 404 — branch has no protection → clean exit.
|
||||
# API transient/unexpected error → FAIL CLOSED (exit 2).
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_api_transient_fails_closed(envset, monkeypatch, capsys):
|
||||
_write_wf(
|
||||
envset,
|
||||
"ci.yml",
|
||||
"name: CI\non:\n pull_request:\n branches: [main]\njobs:\n"
|
||||
" j:\n runs-on: x\n steps:\n - run: echo hi\n",
|
||||
)
|
||||
m = _import_lint()
|
||||
_stub_api(monkeypatch, m, ("error", None))
|
||||
rc = m.run()
|
||||
assert rc == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API 404 — authenticated absent resource (branch has no protection) →
|
||||
# tolerated graceful skip (exit 0 with ::warning::), NOT a fail-open.
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_api_404_skips_gracefully(envset, monkeypatch, capsys):
|
||||
_write_wf(
|
||||
|
||||
@@ -47,7 +47,10 @@ Test classes (per `feedback_branch_count_before_approving`):
|
||||
(the OLD context name disappears; the NEW one needs validation).
|
||||
- test_unrelated_workflow_edit_is_not_new — edit a comment in
|
||||
an existing emitter; no new context introduced; pass.
|
||||
- test_api_403_skips_gracefully — BP read 403; exit 0
|
||||
- test_api_403_fails_closed — BP read 401/403 auth
|
||||
failure → FAIL CLOSED (exit 2)
|
||||
- test_api_transient_fails_closed — transient → exit 2
|
||||
- test_api_404_skips_gracefully — authenticated 404 → exit 0
|
||||
with stderr ::error::.
|
||||
- test_directive_must_be_in_workflow_yml — directive in PR
|
||||
body alone is NOT sufficient; the comment must live in the
|
||||
@@ -392,9 +395,10 @@ def test_unrelated_workflow_edit_is_not_new(env, monkeypatch, capsys):
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BP API 403 → exit 0 with ::error::.
|
||||
# BP API 401/403 = AUTH FAILURE → FAIL CLOSED (exit 2). A new emission can't
|
||||
# be verified against BP if the token can't read BP — must not green.
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_api_403_skips_gracefully(env, monkeypatch, capsys):
|
||||
def test_api_403_fails_closed(env, monkeypatch, capsys):
|
||||
m = _import_lint()
|
||||
_stub_git_and_api(
|
||||
monkeypatch,
|
||||
@@ -404,11 +408,44 @@ def test_api_403_skips_gracefully(env, monkeypatch, capsys):
|
||||
bp_response=("forbidden", None),
|
||||
)
|
||||
rc = m.run()
|
||||
assert rc == 0
|
||||
assert rc == 2
|
||||
err = capsys.readouterr().err
|
||||
assert "403" in err or "scope" in err.lower() or "token" in err.lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BP API transient/unexpected error → FAIL CLOSED (exit 2).
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_api_transient_fails_closed(env, monkeypatch, capsys):
|
||||
m = _import_lint()
|
||||
_stub_git_and_api(
|
||||
monkeypatch,
|
||||
m,
|
||||
base_files={".gitea/workflows/ci.yml": WF_CI_BASE},
|
||||
head_files={".gitea/workflows/ci.yml": WF_CI_NEW_JOB},
|
||||
bp_response=("error", None),
|
||||
)
|
||||
rc = m.run()
|
||||
assert rc == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# BP API authenticated 404 (branch genuinely unprotected) → tolerated
|
||||
# graceful skip (exit 0 with ::warning::), NOT a fail-open.
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_api_404_skips_gracefully(env, monkeypatch, capsys):
|
||||
m = _import_lint()
|
||||
_stub_git_and_api(
|
||||
monkeypatch,
|
||||
m,
|
||||
base_files={".gitea/workflows/ci.yml": WF_CI_BASE},
|
||||
head_files={".gitea/workflows/ci.yml": WF_CI_NEW_JOB},
|
||||
bp_response=("not_found", None),
|
||||
)
|
||||
rc = m.run()
|
||||
assert rc == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Directive must be in the workflow YML, not PR body.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -527,15 +527,13 @@ def test_multi_required_one_bad_one_good_fails(
|
||||
assert "good.yml" not in ln
|
||||
|
||||
|
||||
def test_protection_403_treated_as_skip(lint_module, monkeypatch, capsys):
|
||||
"""If the token can't read branch_protections (HTTP 403), exit 0
|
||||
with a clear ::error::-but-non-fatal note. Same scope-fallback shape
|
||||
as ci-required-drift.py per the precedent.
|
||||
|
||||
Rationale: if the lint workflow itself can't read protection, the PR
|
||||
can't make THIS state worse (a paths-filter PR was already addable
|
||||
without the lint). Better to surface a token-scope problem loudly
|
||||
than to red-X every PR until the token is fixed.
|
||||
def test_protection_403_fails_closed(lint_module, monkeypatch, capsys):
|
||||
"""AUTH FAILURE → FAIL CLOSED (exit 4). If the token can't read
|
||||
branch_protections (HTTP 401/403), the lint CANNOT enumerate the
|
||||
required-check set and therefore CANNOT verify the no-paths-filter
|
||||
invariant. This is a HARD gate on a protected (same-repo PR) context,
|
||||
so it MUST fail loud rather than green an unverifiable gate — fix the
|
||||
token, not the lint.
|
||||
"""
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/main"): (
|
||||
@@ -546,7 +544,26 @@ def test_protection_403_treated_as_skip(lint_module, monkeypatch, capsys):
|
||||
})
|
||||
monkeypatch.setattr(lint_module, "api", stub)
|
||||
rc = lint_module.run()
|
||||
assert rc == 0
|
||||
assert rc == 4
|
||||
err = capsys.readouterr().err
|
||||
assert "::error::" in err
|
||||
assert "403" in err
|
||||
|
||||
|
||||
def test_protection_404_skips_gracefully(lint_module, monkeypatch, capsys):
|
||||
"""Authenticated 404 (branch genuinely has no protection) is the one
|
||||
tolerated degradation: there are no required contexts to check.
|
||||
Exit 0 with a ::warning:: — NOT a fail-open (this is a real read of an
|
||||
absent resource with a valid token, not an auth failure)."""
|
||||
stub = _make_stub_api({
|
||||
("GET", "/repos/owner/repo/branch_protections/main"): (
|
||||
lint_module.ApiError(
|
||||
"GET /repos/owner/repo/branch_protections/main → HTTP 404: not found"
|
||||
)
|
||||
),
|
||||
})
|
||||
monkeypatch.setattr(lint_module, "api", stub)
|
||||
rc = lint_module.run()
|
||||
assert rc == 0
|
||||
err = capsys.readouterr().err
|
||||
assert "404" in err
|
||||
|
||||
@@ -2,10 +2,15 @@ package main
|
||||
|
||||
import "testing"
|
||||
|
||||
// TestResolveBindHost pins the precedence: BIND_ADDR explicit > dev-mode
|
||||
// fail-open default of 127.0.0.1 > production-shape empty (all interfaces).
|
||||
// TestResolveBindHost pins the precedence: BIND_ADDR explicit > local-dev
|
||||
// loopback default of 127.0.0.1 > production-shape empty (all interfaces).
|
||||
//
|
||||
// Mutation-test invariant: removing the IsDevModeFailOpen() branch makes
|
||||
// (harden/no-fail-open-auth) The loopback default is now keyed on
|
||||
// MOLECULE_ENV alone (IsLocalDevEnv), decoupled from ADMIN_TOKEN — a dev box
|
||||
// defaults to loopback even when it provisions an ADMIN_TOKEN. This is
|
||||
// defense-in-depth, not an auth lever; auth is fail-closed in every env.
|
||||
//
|
||||
// Mutation-test invariant: removing the IsLocalDevEnv() branch makes
|
||||
// "no_bindaddr_devmode_unset_admin" fail (returns "" instead of "127.0.0.1").
|
||||
// Removing the BIND_ADDR branch makes "explicit_bindaddr_*" cases fail.
|
||||
func TestResolveBindHost(t *testing.T) {
|
||||
@@ -35,7 +40,10 @@ func TestResolveBindHost(t *testing.T) {
|
||||
bindAddr: "",
|
||||
adminToken: "secret",
|
||||
molEnv: "dev",
|
||||
want: "", // ADMIN_TOKEN flips IsDevModeFailOpen to false → all interfaces
|
||||
// harden/no-fail-open-auth: loopback default is keyed on
|
||||
// MOLECULE_ENV alone now — a dev box defaults to loopback even
|
||||
// with ADMIN_TOKEN provisioned (which dev-start.sh now does).
|
||||
want: "127.0.0.1",
|
||||
},
|
||||
{
|
||||
name: "no_bindaddr_production_env",
|
||||
|
||||
@@ -474,12 +474,12 @@ func main() {
|
||||
|
||||
// HTTP server with graceful shutdown.
|
||||
//
|
||||
// Bind host: in dev-mode (no ADMIN_TOKEN, MOLECULE_ENV=dev|development)
|
||||
// the AdminAuth chain fails open by design; pairing that with a wildcard
|
||||
// bind would expose unauth /workspaces to any same-LAN peer. Default to
|
||||
// loopback when fail-open is active. Operators who need LAN exposure set
|
||||
// BIND_ADDR=0.0.0.0 explicitly. Production (ADMIN_TOKEN set) is unchanged.
|
||||
// See molecule-core#7.
|
||||
// Bind host: in local dev (MOLECULE_ENV=dev|development) default the
|
||||
// listener to loopback as defense-in-depth — a dev box shouldn't be
|
||||
// reachable from the LAN. This is NOT an auth lever (auth is fail-closed
|
||||
// in every env now); it's strictly the safer default. Operators who need
|
||||
// LAN exposure set BIND_ADDR=0.0.0.0 explicitly. Production binds all
|
||||
// interfaces (existing shape). See molecule-core#7.
|
||||
bindHost := resolveBindHost()
|
||||
srv := &http.Server{
|
||||
Addr: fmt.Sprintf("%s:%s", bindHost, port),
|
||||
@@ -489,7 +489,7 @@ func main() {
|
||||
|
||||
// Start server in goroutine
|
||||
go func() {
|
||||
log.Printf("Platform starting on %s:%s (dev-mode-fail-open=%v)", bindHost, port, middleware.IsDevModeFailOpen())
|
||||
log.Printf("Platform starting on %s:%s (local-dev-env=%v)", bindHost, port, middleware.IsLocalDevEnv())
|
||||
if err := srv.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatalf("Server failed: %v", err)
|
||||
}
|
||||
@@ -528,20 +528,20 @@ func envOr(key, fallback string) string {
|
||||
//
|
||||
// Precedence:
|
||||
// 1. BIND_ADDR — explicit operator override (any value, including "0.0.0.0").
|
||||
// 2. dev-mode fail-open active → "127.0.0.1" (loopback only).
|
||||
// 2. local dev (MOLECULE_ENV=dev|development) → "127.0.0.1" (loopback only).
|
||||
// 3. otherwise → "" (Go binds every interface; existing prod/self-host shape).
|
||||
//
|
||||
// Coupling the loopback default to middleware.IsDevModeFailOpen() means the
|
||||
// two safety levers — bind narrowness and auth strength — move together. A
|
||||
// production deploy (ADMIN_TOKEN set) keeps binding to all interfaces because
|
||||
// the auth chain is doing its job; a dev Mac (no ADMIN_TOKEN, MOLECULE_ENV=dev)
|
||||
// is reachable only via loopback because the auth chain is fail-open. See
|
||||
// molecule-core#7 for the original LAN exposure finding.
|
||||
// NOTE (harden/no-fail-open-auth): this is a defense-in-depth default, NOT an
|
||||
// auth lever. Auth is fail-closed in every environment now, so the loopback
|
||||
// default no longer compensates for a weak auth chain — it simply keeps a dev
|
||||
// box off the LAN by default. It is keyed on MOLECULE_ENV alone (decoupled
|
||||
// from ADMIN_TOKEN), because dev now provisions an ADMIN_TOKEN yet should
|
||||
// still default to loopback. See molecule-core#7 for the original LAN finding.
|
||||
func resolveBindHost() string {
|
||||
if v := os.Getenv("BIND_ADDR"); v != "" {
|
||||
return v
|
||||
}
|
||||
if middleware.IsDevModeFailOpen() {
|
||||
if middleware.IsLocalDevEnv() {
|
||||
return "127.0.0.1"
|
||||
}
|
||||
return ""
|
||||
|
||||
@@ -21,6 +21,27 @@ const (
|
||||
|
||||
var slackHTTPClient = &http.Client{Timeout: slackHTTPTimeout}
|
||||
|
||||
// slackWebhookAccepted reports whether a Slack Incoming Webhook URL is allowed
|
||||
// as a send destination. Production accepts only the real hooks.slack.com host.
|
||||
//
|
||||
// TEST SEAM (gating e2e): when MOLECULE_CHANNELS_TEST_WEBHOOK_BASE is set, a
|
||||
// URL with that prefix is ALSO accepted so tests/e2e/test_channels_e2e.sh can
|
||||
// point the live Slack send path at a local mock-upstream and assert the mock
|
||||
// actually received the serialized {"text":...} payload end-to-end (the unit
|
||||
// tests can only assert the body shape — see lark_test.go's prefix-gate
|
||||
// workaround comment). The env var is NEVER set in any production/staging
|
||||
// deploy; channelsTestWebhookBase() returns "" there and only the real
|
||||
// hooks.slack.com prefix passes, so this changes no production behaviour.
|
||||
func slackWebhookAccepted(u string) bool {
|
||||
if strings.HasPrefix(u, slackWebhookPrefix) {
|
||||
return true
|
||||
}
|
||||
if base := channelsTestWebhookBase(); base != "" && strings.HasPrefix(u, base) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// SlackAdapter implements ChannelAdapter for Slack Incoming Webhooks.
|
||||
//
|
||||
// Outbound messages are sent via Slack Incoming Webhooks (the simple,
|
||||
@@ -98,7 +119,7 @@ func (s *SlackAdapter) ValidateConfig(config map[string]interface{}) error {
|
||||
return fmt.Errorf("bot_token mode requires channel_id")
|
||||
}
|
||||
}
|
||||
if webhookURL != "" && !strings.HasPrefix(webhookURL, slackWebhookPrefix) {
|
||||
if webhookURL != "" && !slackWebhookAccepted(webhookURL) {
|
||||
return fmt.Errorf("invalid Slack webhook URL")
|
||||
}
|
||||
return nil
|
||||
@@ -197,7 +218,7 @@ func (s *SlackAdapter) sendWebhookMessage(ctx context.Context, config map[string
|
||||
if webhookURL == "" {
|
||||
return fmt.Errorf("webhook_url not configured")
|
||||
}
|
||||
if !strings.HasPrefix(webhookURL, slackWebhookPrefix) {
|
||||
if !slackWebhookAccepted(webhookURL) {
|
||||
return fmt.Errorf("invalid Slack webhook URL")
|
||||
}
|
||||
|
||||
|
||||
@@ -148,7 +148,18 @@ func (t *TelegramAdapter) DiscoverChats(ctx context.Context, botToken string) (*
|
||||
return nil, errors.New("invalid bot token format")
|
||||
}
|
||||
|
||||
bot, err := tgbotapi.NewBotAPI(botToken)
|
||||
// TEST SEAM: when MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE is set (only in
|
||||
// the gating channels e2e — never in prod/staging), build the bot client
|
||||
// against a local mock API base instead of api.telegram.org so
|
||||
// POST /channels/discover can be proven end-to-end. The format string is
|
||||
// "<base>/bot%s/%s" (token, method), matching tgbotapi.APIEndpoint.
|
||||
var bot *tgbotapi.BotAPI
|
||||
var err error
|
||||
if apiBase := channelsTestTelegramAPIBase(); apiBase != "" {
|
||||
bot, err = tgbotapi.NewBotAPIWithAPIEndpoint(botToken, apiBase+"/bot%s/%s")
|
||||
} else {
|
||||
bot, err = tgbotapi.NewBotAPI(botToken)
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid bot token: %w", err)
|
||||
}
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
package channels
|
||||
|
||||
import "os"
|
||||
|
||||
// Test seams for the GATING channels e2e (tests/e2e/test_channels_e2e.sh).
|
||||
//
|
||||
// Every adapter pins its outbound destination to the real vendor host
|
||||
// (hooks.slack.com, discord.com, api.telegram.org) in both ValidateConfig and
|
||||
// SendMessage. That host pin is correct for production, but it means a real
|
||||
// end-to-end test cannot point the LIVE send/discover path at a local mock
|
||||
// upstream — so today the outbound serialize+POST is only ever asserted by
|
||||
// unit tests that reconstruct the payload by hand (see lark_test.go's
|
||||
// "we can't change the prefix const" comment) and never proven through the
|
||||
// running platform.
|
||||
//
|
||||
// These two env-gated overrides close that gap WITHOUT changing any
|
||||
// production behaviour:
|
||||
//
|
||||
// - MOLECULE_CHANNELS_TEST_WEBHOOK_BASE — when set, Slack Incoming Webhook
|
||||
// URLs with this prefix are accepted as send destinations (in addition to
|
||||
// the real hooks.slack.com host). Lets the e2e create a slack channel whose
|
||||
// webhook_url points at a local httptest mock and assert the mock RECEIVED
|
||||
// the serialized {"text":...} payload.
|
||||
//
|
||||
// - MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE — when set, TelegramAdapter.
|
||||
// DiscoverChats builds its bot client against this API base instead of
|
||||
// api.telegram.org, so POST /channels/discover can be exercised against a
|
||||
// mock that serves getMe/getUpdates and the e2e can assert the discovered
|
||||
// chats round-trip.
|
||||
//
|
||||
// Both vars are NEVER set in any production or staging deploy. The helpers
|
||||
// return "" there, so the real vendor-host pins are the only thing that
|
||||
// passes — production behaviour is byte-for-byte unchanged. Reading os.Getenv
|
||||
// on each call (not caching) keeps the seam honest: a process that never sets
|
||||
// the var can never accidentally enable it.
|
||||
|
||||
// channelsTestWebhookBase returns the test-only accepted webhook base prefix,
|
||||
// or "" in production. See package doc above.
|
||||
func channelsTestWebhookBase() string {
|
||||
return os.Getenv("MOLECULE_CHANNELS_TEST_WEBHOOK_BASE")
|
||||
}
|
||||
|
||||
// channelsTestTelegramAPIBase returns the test-only Telegram Bot API base
|
||||
// (a printf format string "<base>/bot%s/%s"), or "" in production.
|
||||
func channelsTestTelegramAPIBase() string {
|
||||
return os.Getenv("MOLECULE_CHANNELS_TEST_TELEGRAM_API_BASE")
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
package handlers
|
||||
|
||||
// a2a_full_body_delivery_guard_test.go — regression guard for core#2175.
|
||||
//
|
||||
// core#2175 RCA: the long-believed "A2A truncation" was a MISDIAGNOSIS.
|
||||
// A2A message delivery preserves the FULL body on every agent-facing path.
|
||||
// Only HUMAN-facing DISPLAY previews are capped (activity title 80 runes,
|
||||
// broadcast 120, delegation summary 80, canvas response_preview 200 bytes).
|
||||
// Those caps live on display/broadcast fields, NOT on the bytes an agent
|
||||
// reads off the wire.
|
||||
//
|
||||
// This file locks in the correct behaviour so a FUTURE change cannot
|
||||
// silently reintroduce REAL truncation on the agent-facing delivery paths:
|
||||
//
|
||||
// 1. DequeueNext (a2a_queue.go) — the drain/read path does
|
||||
// `SELECT ... body::text ...` and returns item.Body. The delivered
|
||||
// body MUST equal the enqueued body byte-for-byte.
|
||||
//
|
||||
// 2. toolCheckTaskStatus (mcp_tools.go) — reads activity_logs.response_body
|
||||
// and surfaces result["result"] = extractA2AText(responseBody). The
|
||||
// returned text MUST be the COMPLETE response text, not a preview.
|
||||
//
|
||||
// Both bodies used here are WELL over 200 chars (> the largest preview cap,
|
||||
// canvas response_preview at 200 bytes) so a regression that wired any
|
||||
// display cap into a delivery path would fail loudly.
|
||||
//
|
||||
// Style: matches the sibling a2a_queue_test.go / mcp_tools_test.go — sqlmock,
|
||||
// no integration build tag. These paths are deterministically exercisable
|
||||
// against the mock because the truncation guard is about what the Go code
|
||||
// does with the row value, not about Postgres-side text handling. CI's
|
||||
// real-PG integration arm (a2a_*_integration tests) additionally exercises
|
||||
// the live `body::text` round-trip.
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/db"
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
)
|
||||
|
||||
// largeA2ABody builds a syntactically valid A2A JSON-RPC message body whose
|
||||
// embedded text part is `textLen` runes long, so the whole body comfortably
|
||||
// exceeds every human-facing preview cap (max 200 bytes).
|
||||
func largeA2ABody(textLen int) string {
|
||||
longText := strings.Repeat("A", textLen)
|
||||
return `{"jsonrpc":"2.0","method":"message/send","params":{"message":{"role":"user","messageId":"guard-2175","parts":[{"type":"text","text":"` + longText + `"}]}}}`
|
||||
}
|
||||
|
||||
// TestDequeueNext_PreservesFullBody_NoTruncation is the guard for the queue
|
||||
// drain/read path. It asserts that the body returned from DequeueNext equals
|
||||
// the enqueued body byte-for-byte, even when far longer than any preview cap.
|
||||
func TestDequeueNext_PreservesFullBody_NoTruncation(t *testing.T) {
|
||||
// 4000-char text part → total body well over the 200-byte canvas cap and
|
||||
// every other display preview cap.
|
||||
fullBody := largeA2ABody(4000)
|
||||
if len(fullBody) <= 200 {
|
||||
t.Fatalf("test setup error: body must exceed the largest preview cap (200); got %d", len(fullBody))
|
||||
}
|
||||
|
||||
mockDB, mock, err := sqlmock.New(sqlmock.QueryMatcherOption(sqlmock.QueryMatcherEqual))
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create sqlmock: %v", err)
|
||||
}
|
||||
prevDB := db.DB
|
||||
db.DB = mockDB
|
||||
t.Cleanup(func() { db.DB = prevDB; mockDB.Close() })
|
||||
|
||||
const wsID = "ws-guard-2175"
|
||||
const itemID = "qid-guard-2175"
|
||||
|
||||
// DequeueNext runs BEGIN → SELECT ... body::text ... → UPDATE → COMMIT.
|
||||
// The mocked SELECT returns the FULL body in the body column; the guard
|
||||
// is that DequeueNext propagates it untouched into item.Body.
|
||||
mock.ExpectBegin()
|
||||
mock.ExpectQuery(
|
||||
"SELECT id, workspace_id, caller_id, priority, body::text, method, attempts FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued' AND (expires_at IS NULL OR expires_at > now()) ORDER BY priority DESC, enqueued_at ASC FOR UPDATE SKIP LOCKED LIMIT 1").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "caller_id", "priority", "body", "method", "attempts",
|
||||
}).AddRow(
|
||||
itemID, wsID, sql.NullString{Valid: false}, PriorityTask,
|
||||
fullBody, sql.NullString{String: "message/send", Valid: true}, 0,
|
||||
))
|
||||
mock.ExpectExec(
|
||||
"UPDATE a2a_queue SET status = 'dispatched', dispatched_at = now(), attempts = attempts + 1 WHERE id = $1").
|
||||
WithArgs(itemID).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
mock.ExpectCommit()
|
||||
|
||||
item, err := DequeueNext(context.Background(), wsID)
|
||||
if err != nil {
|
||||
t.Fatalf("DequeueNext returned error: %v", err)
|
||||
}
|
||||
if item == nil {
|
||||
t.Fatal("DequeueNext returned nil item for a non-empty queue")
|
||||
}
|
||||
|
||||
if got := string(item.Body); got != fullBody {
|
||||
t.Errorf("delivered body was truncated/altered.\n enqueued len=%d\n delivered len=%d\n REGRESSION: a delivery path must NOT apply a display preview cap (core#2175)",
|
||||
len(fullBody), len(got))
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation is the guard
|
||||
// for the check_task_status agent-facing read path. It asserts that the text
|
||||
// surfaced in result["result"] (via extractA2AText over response_body) is the
|
||||
// COMPLETE response text — never a preview-capped slice.
|
||||
func TestToolCheckTaskStatus_ReturnsFullResponseBody_NoTruncation(t *testing.T) {
|
||||
// 3000-char response text, far above any preview cap.
|
||||
fullText := strings.Repeat("B", 3000)
|
||||
responseBody := `{"jsonrpc":"2.0","result":{"artifacts":[{"parts":[{"type":"text","text":"` + fullText + `"}]}]}}`
|
||||
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create sqlmock: %v", err)
|
||||
}
|
||||
t.Cleanup(func() { mockDB.Close() })
|
||||
|
||||
h := &MCPHandler{database: mockDB}
|
||||
|
||||
const callerID = "ws-caller-2175"
|
||||
const targetID = "ws-target-2175"
|
||||
const taskID = "del-guard-2175"
|
||||
|
||||
mock.ExpectQuery(`SELECT status, error_detail, response_body`).
|
||||
WithArgs(callerID, targetID, taskID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"status", "error_detail", "response_body"}).
|
||||
AddRow("completed", sql.NullString{Valid: false}, []byte(responseBody)))
|
||||
|
||||
out, err := h.toolCheckTaskStatus(context.Background(), callerID, map[string]interface{}{
|
||||
"workspace_id": targetID,
|
||||
"task_id": taskID,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("toolCheckTaskStatus returned error: %v", err)
|
||||
}
|
||||
|
||||
// The full text must appear in the serialized result. If a future change
|
||||
// applied a preview cap (e.g. TruncateBytes(…, 200)) to the agent-facing
|
||||
// result, this substring check would fail.
|
||||
if !strings.Contains(out, fullText) {
|
||||
t.Errorf("check_task_status result was truncated.\n expected full %d-char response text in result\n REGRESSION: the agent-facing check_task_status path must return the COMPLETE response_body, not a display preview (core#2175)",
|
||||
len(fullText))
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractA2AText_FullBodyNoCap is a focused unit-level guard on the
|
||||
// extractor itself: extractA2AText must return the entire text part with no
|
||||
// length cap, for both supported A2A response shapes.
|
||||
func TestExtractA2AText_FullBodyNoCap(t *testing.T) {
|
||||
fullText := strings.Repeat("C", 2500)
|
||||
|
||||
cases := map[string]string{
|
||||
"artifacts shape": `{"result":{"artifacts":[{"parts":[{"type":"text","text":"` + fullText + `"}]}]}}`,
|
||||
"message shape": `{"result":{"message":{"parts":[{"type":"text","text":"` + fullText + `"}]}}}`,
|
||||
}
|
||||
for name, body := range cases {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
got := extractA2AText([]byte(body))
|
||||
if got != fullText {
|
||||
t.Errorf("extractA2AText capped/altered the text.\n want len=%d\n got len=%d\n REGRESSION: extractor must not truncate (core#2175)",
|
||||
len(fullText), len(got))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
package handlers
|
||||
|
||||
// a2a_outbound_envelope_test.go — outbound A2A `message/send` envelope
|
||||
// CONTRACT gate (issue #2251).
|
||||
//
|
||||
// #2251: an outbound A2A envelope shipped without `role` and with text
|
||||
// parts keyed `type` instead of the v0.3-canonical `kind`. The receiver's
|
||||
// a-2-a-sdk v0.3 Pydantic validator silently rejected the message
|
||||
// post-dispatch — the sender saw a happy 200/202 while the brief was
|
||||
// dropped (the same invisible-rejection failure class as the v0.2→v0.3
|
||||
// content bug pinned by a2a_corpus_test.go, but on the SEND side).
|
||||
//
|
||||
// The inbound corpus replay (a2a_corpus_test.go) proves normalizeA2APayload
|
||||
// produces `parts[].kind` + a non-empty messageId, but it does NOT assert
|
||||
// `role`, and it only covers what we RECEIVE. Nothing pins what core
|
||||
// EMITS. This file pins the emit contract at the helper that builds the
|
||||
// parts (buildA2AMessageParts, used by both delegate_task and
|
||||
// delegate_task_async) and asserts the canonical Part key is `kind`.
|
||||
//
|
||||
// Part-object schema (A2A v0.3): every Part MUST carry a `kind`
|
||||
// discriminator ("text" | "file" | "data"); there is NO `type` key. A
|
||||
// text Part is {"kind":"text","text":"..."}. Emitting `type` makes the
|
||||
// v0.3 validator drop the Part.
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestBuildA2AMessageParts_TextPartUsesKindNotType pins the v0.3 Part
|
||||
// discriminator for the text part emitted on every outbound A2A
|
||||
// delegation. RED before #2251's fix (the helper emitted
|
||||
// {"type":"text",...}); the receiver's v0.3 Pydantic validator drops a
|
||||
// Part keyed `type`, silently losing the task text.
|
||||
func TestBuildA2AMessageParts_TextPartUsesKindNotType(t *testing.T) {
|
||||
parts := buildA2AMessageParts("do the work", nil)
|
||||
if len(parts) == 0 {
|
||||
t.Fatal("buildA2AMessageParts returned no parts for a non-empty task")
|
||||
}
|
||||
text := parts[0]
|
||||
|
||||
if _, hasType := text["type"]; hasType {
|
||||
t.Errorf("text part uses forbidden v0.2 key `type` %v — A2A v0.3 Parts discriminate on `kind`; `type` is dropped by the receiver's validator (#2251)", text)
|
||||
}
|
||||
kind, ok := text["kind"].(string)
|
||||
if !ok {
|
||||
t.Fatalf("text part missing string `kind` discriminator; got %v", text)
|
||||
}
|
||||
if kind != "text" {
|
||||
t.Errorf("text part kind = %q, want \"text\"", kind)
|
||||
}
|
||||
if text["text"] != "do the work" {
|
||||
t.Errorf("text part text = %v, want \"do the work\"", text["text"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildA2AMessageParts_FilePartUsesKind guards the file-attachment
|
||||
// Part the same way. The file path was already correct (it used `kind`),
|
||||
// so this is a non-regression pin — it must STAY `kind` when the text
|
||||
// path is fixed (a careless "make them consistent" edit could flip both
|
||||
// to the wrong key).
|
||||
func TestBuildA2AMessageParts_FilePartUsesKind(t *testing.T) {
|
||||
atts := []AgentMessageAttachment{
|
||||
{URI: "https://example.com/a.png", MimeType: "image/png", Name: "a.png"},
|
||||
}
|
||||
parts := buildA2AMessageParts("caption", atts)
|
||||
if len(parts) < 2 {
|
||||
t.Fatalf("expected text + file parts, got %d", len(parts))
|
||||
}
|
||||
file := parts[1]
|
||||
if _, hasType := file["type"]; hasType {
|
||||
t.Errorf("file part uses forbidden `type` key: %v", file)
|
||||
}
|
||||
if _, hasKind := file["kind"]; !hasKind {
|
||||
t.Errorf("file part missing `kind` discriminator: %v", file)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDelegationOutboundEnvelope_RoleAndKind pins the FULL outbound
|
||||
// envelope contract — role + parts[].kind — on the canonical helper.
|
||||
// A v0.3 `message` MUST carry `role` ("user" for a delegation request)
|
||||
// and `parts` whose every entry discriminates on `kind`. This is the
|
||||
// shape the receiver's MessageSendParams validator accepts; an envelope
|
||||
// missing `role` or keyed `type` is silently rejected (#2251).
|
||||
//
|
||||
// Built from the same primitives delegation.go / mcp_tools.go assemble
|
||||
// (role:"user" + buildA2AMessageParts) so the round-trip through
|
||||
// json.Marshal proves the wire bytes are v0.3-valid.
|
||||
func TestDelegationOutboundEnvelope_RoleAndKind(t *testing.T) {
|
||||
envelope := map[string]interface{}{
|
||||
"method": "message/send",
|
||||
"params": map[string]interface{}{
|
||||
"message": map[string]interface{}{
|
||||
"role": "user",
|
||||
"messageId": "deleg-1",
|
||||
"parts": buildA2AMessageParts("do the work", nil),
|
||||
},
|
||||
},
|
||||
}
|
||||
raw, err := json.Marshal(envelope)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal envelope: %v", err)
|
||||
}
|
||||
var parsed map[string]interface{}
|
||||
if err := json.Unmarshal(raw, &parsed); err != nil {
|
||||
t.Fatalf("unmarshal envelope: %v", err)
|
||||
}
|
||||
|
||||
params, _ := parsed["params"].(map[string]interface{})
|
||||
if params == nil {
|
||||
t.Fatal("envelope missing params")
|
||||
}
|
||||
msg, _ := params["message"].(map[string]interface{})
|
||||
if msg == nil {
|
||||
t.Fatal("envelope missing params.message")
|
||||
}
|
||||
|
||||
// role is mandatory on a v0.3 message — the receiver rejects without it.
|
||||
role, hasRole := msg["role"].(string)
|
||||
if !hasRole || role == "" {
|
||||
t.Errorf("params.message missing non-empty `role` — v0.3 requires it; omitting it is the other half of #2251")
|
||||
}
|
||||
|
||||
parts, _ := msg["parts"].([]interface{})
|
||||
if len(parts) == 0 {
|
||||
t.Fatal("params.message.parts is empty")
|
||||
}
|
||||
for i, p := range parts {
|
||||
pm, _ := p.(map[string]interface{})
|
||||
if pm == nil {
|
||||
t.Errorf("part %d is not an object: %v", i, p)
|
||||
continue
|
||||
}
|
||||
if _, hasType := pm["type"]; hasType {
|
||||
t.Errorf("part %d uses forbidden `type` key (must be `kind`): %v", i, pm)
|
||||
}
|
||||
if _, hasKind := pm["kind"]; !hasKind {
|
||||
t.Errorf("part %d missing `kind` discriminator: %v", i, pm)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -801,6 +801,18 @@ func normalizeA2APayload(body []byte) ([]byte, string, *proxyA2AError) {
|
||||
if _, hasID := msg["messageId"]; !hasID {
|
||||
msg["messageId"] = uuid.New().String()
|
||||
}
|
||||
// #2251: default params.message.role to "user" when absent.
|
||||
// The downstream a2a-sdk v0.3 Pydantic validator marks role a
|
||||
// REQUIRED field; a role-less envelope fails parse with
|
||||
// "params.message.role Field required". The Go builders
|
||||
// (mcp_tools/delegation/scheduler/channels) already set it, but
|
||||
// raw external/canvas POSTs to ProxyA2A may omit it — making this
|
||||
// the single canonical choke that guarantees a schema-valid role.
|
||||
// Mirror the messageId default exactly: inject only when missing,
|
||||
// never overwrite a caller-supplied role (e.g. "agent").
|
||||
if _, hasRole := msg["role"]; !hasRole {
|
||||
msg["role"] = "user"
|
||||
}
|
||||
_, hasParts := msg["parts"]
|
||||
rawContent, hasContent := msg["content"]
|
||||
if !hasParts {
|
||||
@@ -832,6 +844,27 @@ func normalizeA2APayload(body []byte) ([]byte, string, *proxyA2AError) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// #2251: wire hygiene — the A2A v0.3 Part discriminator is
|
||||
// "kind", but some builders/clients emit the legacy "type" key
|
||||
// (e.g. delegation.go). The v0.3 Pydantic validator keys on
|
||||
// "kind"; a stray "type" leaves the Part untagged. Rename
|
||||
// "type" → "kind" on every Part that lacks an explicit "kind"
|
||||
// so the discriminator is always present on the wire.
|
||||
if parts, ok := msg["parts"].([]interface{}); ok {
|
||||
for _, p := range parts {
|
||||
part, ok := p.(map[string]interface{})
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, hasKind := part["kind"]; hasKind {
|
||||
continue
|
||||
}
|
||||
if t, hasType := part["type"]; hasType {
|
||||
part["kind"] = t
|
||||
delete(part, "type")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1514,6 +1514,142 @@ func TestNormalizeA2APayload_NoMessageNoCheck(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// --- #2251: role default + part-kind hygiene contract tests ---
|
||||
//
|
||||
// These assert normalizeA2APayload is the single canonical Go choke that
|
||||
// guarantees a schema-valid outbound message/send envelope: it injects a
|
||||
// default params.message.role="user" when the sender omitted role (the bug
|
||||
// that made delegate_task fail the peer's a2a Pydantic validator with
|
||||
// "params.message.role Field required" while reply_to_workspace worked), and
|
||||
// it renames the legacy Part discriminator "type"→"kind" for wire hygiene.
|
||||
|
||||
// normMsg is a small helper that runs normalizeA2APayload and returns the
|
||||
// resolved params.message map, failing the test on any normalization error.
|
||||
func normMsg(t *testing.T, raw string) map[string]interface{} {
|
||||
t.Helper()
|
||||
out, _, perr := normalizeA2APayload([]byte(raw))
|
||||
if perr != nil {
|
||||
t.Fatalf("normalizeA2APayload returned error: %+v", perr)
|
||||
}
|
||||
var parsed map[string]interface{}
|
||||
if err := json.Unmarshal(out, &parsed); err != nil {
|
||||
t.Fatalf("output not valid JSON: %v", err)
|
||||
}
|
||||
params, ok := parsed["params"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("output missing params object: %s", string(out))
|
||||
}
|
||||
msg, ok := params["message"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("output missing params.message object: %s", string(out))
|
||||
}
|
||||
return msg
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_DefaultsRoleWhenMissing(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
raw string
|
||||
}{
|
||||
{
|
||||
name: "v0.3 parts, no role",
|
||||
raw: `{"method":"message/send","params":{"message":{"parts":[{"kind":"text","text":"hi"}]}}}`,
|
||||
},
|
||||
{
|
||||
name: "v0.2 string content, no role",
|
||||
raw: `{"method":"message/send","params":{"message":{"content":"hi"}}}`,
|
||||
},
|
||||
{
|
||||
name: "legacy type part, no role",
|
||||
raw: `{"method":"message/send","params":{"message":{"parts":[{"type":"text","text":"hi"}]}}}`,
|
||||
},
|
||||
{
|
||||
name: "already wrapped jsonrpc, no role",
|
||||
raw: `{"jsonrpc":"2.0","id":"x","method":"message/send","params":{"message":{"parts":[{"kind":"text","text":"hi"}]}}}`,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
msg := normMsg(t, tc.raw)
|
||||
if msg["role"] != "user" {
|
||||
t.Errorf("expected role defaulted to \"user\", got %v", msg["role"])
|
||||
}
|
||||
// Parts must remain valid (non-empty) after normalization.
|
||||
parts, ok := msg["parts"].([]interface{})
|
||||
if !ok || len(parts) == 0 {
|
||||
t.Fatalf("expected non-empty parts after normalization, got %v", msg["parts"])
|
||||
}
|
||||
// Every part must carry the v0.3 "kind" discriminator.
|
||||
for i, p := range parts {
|
||||
part, ok := p.(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("part %d is not an object: %v", i, p)
|
||||
}
|
||||
if _, hasKind := part["kind"]; !hasKind {
|
||||
t.Errorf("part %d missing \"kind\" discriminator: %v", i, part)
|
||||
}
|
||||
if _, hasType := part["type"]; hasType {
|
||||
t.Errorf("part %d still has legacy \"type\" key: %v", i, part)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_PreservesExplicitRole(t *testing.T) {
|
||||
// A caller-supplied role (e.g. "agent") must NOT be overwritten with "user".
|
||||
msg := normMsg(t, `{"method":"message/send","params":{"message":{"role":"agent","parts":[{"kind":"text","text":"hi"}]}}}`)
|
||||
if msg["role"] != "agent" {
|
||||
t.Errorf("explicit role overwritten: expected \"agent\", got %v", msg["role"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_RenamesPartTypeToKind(t *testing.T) {
|
||||
// Mirrors delegation.go's builder which emits {"type":"text",...}. After
|
||||
// normalization the wire Part must be discriminated by "kind".
|
||||
msg := normMsg(t, `{"method":"message/send","params":{"message":{"role":"user","parts":[{"type":"text","text":"a"},{"type":"file","uri":"workspace:/x"}]}}}`)
|
||||
parts := msg["parts"].([]interface{})
|
||||
if len(parts) != 2 {
|
||||
t.Fatalf("expected 2 parts, got %d", len(parts))
|
||||
}
|
||||
wantKind := []string{"text", "file"}
|
||||
for i, p := range parts {
|
||||
part := p.(map[string]interface{})
|
||||
if part["kind"] != wantKind[i] {
|
||||
t.Errorf("part %d: expected kind=%q, got %v", i, wantKind[i], part["kind"])
|
||||
}
|
||||
if _, hasType := part["type"]; hasType {
|
||||
t.Errorf("part %d still carries legacy \"type\": %v", i, part)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeA2APayload_DoesNotClobberKindWithType(t *testing.T) {
|
||||
// If a part has BOTH kind and type, kind wins and is left untouched.
|
||||
msg := normMsg(t, `{"method":"message/send","params":{"message":{"role":"user","parts":[{"kind":"text","type":"ignored","text":"a"}]}}}`)
|
||||
part := msg["parts"].([]interface{})[0].(map[string]interface{})
|
||||
if part["kind"] != "text" {
|
||||
t.Errorf("expected kind preserved as \"text\", got %v", part["kind"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestNormalizeA2APayload_RoleDefault_ContractRegression documents the
|
||||
// pre-fix failure: without the role default, a role-less message/send body
|
||||
// emerged from normalization still missing params.message.role, which the
|
||||
// peer's a2a Pydantic validator rejects. This asserts the POST-fix invariant
|
||||
// (role present) directly; before the a2a_proxy.go change this assertion
|
||||
// fails (role is absent → msg["role"] == nil).
|
||||
func TestNormalizeA2APayload_RoleDefault_ContractRegression(t *testing.T) {
|
||||
msg := normMsg(t, `{"method":"message/send","params":{"message":{"parts":[{"kind":"text","text":"delegate this"}]}}}`)
|
||||
role, hasRole := msg["role"]
|
||||
if !hasRole {
|
||||
t.Fatal("REGRESSION (#2251): params.message.role absent after normalization — peer a2a validator will reject with 'role Field required'")
|
||||
}
|
||||
if role != "user" {
|
||||
t.Errorf("expected default role \"user\", got %v", role)
|
||||
}
|
||||
}
|
||||
|
||||
// --- resolveAgentURL direct unit tests ---
|
||||
|
||||
func TestResolveAgentURL_CacheHit(t *testing.T) {
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -18,6 +19,7 @@ import (
|
||||
dockerclient "github.com/docker/docker/client"
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/provisioner"
|
||||
)
|
||||
|
||||
@@ -41,10 +43,53 @@ func NewWorkspaceImageService(docker *dockerclient.Client) *WorkspaceImageServic
|
||||
return &WorkspaceImageService{docker: docker}
|
||||
}
|
||||
|
||||
// AllRuntimes is the canonical list mirroring docs/workspace-runtime-package.md.
|
||||
// Update both when a new template is added.
|
||||
var AllRuntimes = []string{
|
||||
"claude-code", "codex", "hermes", "openclaw",
|
||||
// AllRuntimes is the canonical set of workspace runtimes this tenant will
|
||||
// pull/recreate template images for. It is DERIVED from the same providers
|
||||
// manifest SSOT (internal/providers/providers.yaml `runtimes:` block, mirrored
|
||||
// from CP's providers.yaml) that the rest of the platform routes against —
|
||||
// NOT a second hand-maintained list.
|
||||
//
|
||||
// Why derive instead of hardcode (controlplane#578): the old hardcoded slice
|
||||
// here ({claude-code, codex, hermes, openclaw}) silently DRIFTED from CP, which
|
||||
// already accepts `google-adk` for pin-promote/redeploy. A google-adk pin would
|
||||
// be accepted CP-side, then this tenant's POST /admin/workspace-images/refresh
|
||||
// ?runtime=google-adk rejected it 400 ("unknown runtime"), so google-adk image
|
||||
// fixes never deployed. Deriving from the manifest makes the tenant allowlist
|
||||
// and the CP allowlist provably the same set — they can't drift again.
|
||||
//
|
||||
// imageRefreshFallbackRuntimes is used ONLY if the embedded providers manifest
|
||||
// fails to load (which would be a build/CI failure caught by the providers
|
||||
// package's own tests, never a healthy prod). It preserves the historical
|
||||
// behavior — plus google-adk — so a manifest regression can never take the
|
||||
// refresh endpoint fully offline. Kept in lockstep with the providers.yaml
|
||||
// `runtimes:` keys; the drift guard in admin_workspace_images_test.go asserts
|
||||
// the two match.
|
||||
var imageRefreshFallbackRuntimes = []string{
|
||||
"claude-code", "codex", "google-adk", "hermes", "openclaw",
|
||||
}
|
||||
|
||||
// AllRuntimes is computed once at package init from the providers SSOT.
|
||||
var AllRuntimes = loadImageRefreshRuntimes()
|
||||
|
||||
// loadImageRefreshRuntimes returns the sorted runtime names declared in the
|
||||
// providers manifest, falling back to imageRefreshFallbackRuntimes if the
|
||||
// manifest can't be loaded.
|
||||
func loadImageRefreshRuntimes() []string {
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil || len(m.Runtimes) == 0 {
|
||||
if err != nil {
|
||||
log.Printf("workspace-images: providers.LoadManifest failed (%v); falling back to static runtime allowlist", err)
|
||||
}
|
||||
out := append([]string(nil), imageRefreshFallbackRuntimes...)
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
out := make([]string, 0, len(m.Runtimes))
|
||||
for rt := range m.Runtimes {
|
||||
out = append(out, rt)
|
||||
}
|
||||
sort.Strings(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// RefreshResult is the per-call outcome surfaced to HTTP callers AND logged
|
||||
@@ -197,7 +242,7 @@ func (s *WorkspaceImageService) Refresh(ctx context.Context, runtimes []string,
|
||||
|
||||
// AdminWorkspaceImagesHandler serves POST /admin/workspace-images/refresh.
|
||||
//
|
||||
// ?runtime=claude-code (optional; default = all 8 templates)
|
||||
// ?runtime=claude-code (optional; default = all runtimes in AllRuntimes)
|
||||
// &recreate=true|false (default true; false = pull only)
|
||||
//
|
||||
// Returns JSON {pulled: [...], failed: [...], recreated: [...]}
|
||||
|
||||
@@ -3,7 +3,14 @@ package handlers
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/providers"
|
||||
)
|
||||
|
||||
func TestGHCRAuthHeader_NoEnvReturnsEmpty(t *testing.T) {
|
||||
@@ -92,6 +99,119 @@ func TestGHCRAuthHeader_RespectsRegistryEnv(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// runtimeListContains is a tiny membership helper for the runtime-allowlist tests.
|
||||
func runtimeListContains(s []string, v string) bool {
|
||||
for _, x := range s {
|
||||
if x == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TestAllRuntimes_IncludesGoogleADK is the direct regression for
|
||||
// controlplane#578: a google-adk pin promote/redeploy is accepted CP-side, so
|
||||
// the tenant image-refresh allowlist MUST also accept google-adk or the image
|
||||
// fix never deploys (tenant returned 400 "unknown runtime"). google-adk lives
|
||||
// in the providers SSOT, so the derived AllRuntimes must contain it.
|
||||
func TestAllRuntimes_IncludesGoogleADK(t *testing.T) {
|
||||
if !runtimeListContains(AllRuntimes, "google-adk") {
|
||||
t.Fatalf("AllRuntimes must include google-adk (controlplane#578 drift); got %v", AllRuntimes)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAllRuntimes_MatchesProvidersSSOT is the drift guard. AllRuntimes is
|
||||
// derived from providers.LoadManifest().Runtimes — assert it equals exactly the
|
||||
// runtime keys the providers manifest (mirrored from CP's providers.yaml)
|
||||
// declares. If CP adds/removes a runtime, this test fails RED until the tenant
|
||||
// re-derives, so the tenant image-refresh allowlist can never silently drift
|
||||
// from the CP pin-promote allowlist again.
|
||||
func TestAllRuntimes_MatchesProvidersSSOT(t *testing.T) {
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatalf("providers.LoadManifest: %v", err)
|
||||
}
|
||||
want := make([]string, 0, len(m.Runtimes))
|
||||
for rt := range m.Runtimes {
|
||||
want = append(want, rt)
|
||||
}
|
||||
sort.Strings(want)
|
||||
|
||||
got := append([]string(nil), AllRuntimes...)
|
||||
sort.Strings(got)
|
||||
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("AllRuntimes drift: got %v, want %v (providers SSOT)", got, want)
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("AllRuntimes drift at %d: got %v, want %v (providers SSOT)", i, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestImageRefreshFallbackMatchesSSOT pins the static fallback (used only when
|
||||
// the embedded manifest fails to load) to the providers SSOT. If a runtime is
|
||||
// added to providers.yaml but not to imageRefreshFallbackRuntimes, this fails
|
||||
// RED — so a manifest-load failure can't silently drop a supported runtime.
|
||||
func TestImageRefreshFallbackMatchesSSOT(t *testing.T) {
|
||||
m, err := providers.LoadManifest()
|
||||
if err != nil {
|
||||
t.Fatalf("providers.LoadManifest: %v", err)
|
||||
}
|
||||
want := make([]string, 0, len(m.Runtimes))
|
||||
for rt := range m.Runtimes {
|
||||
want = append(want, rt)
|
||||
}
|
||||
sort.Strings(want)
|
||||
|
||||
got := append([]string(nil), imageRefreshFallbackRuntimes...)
|
||||
sort.Strings(got)
|
||||
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("fallback drift: got %v, want %v (providers SSOT)", got, want)
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("fallback drift at %d: got %v, want %v (providers SSOT)", i, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestRefresh_RejectsUnknownRuntime asserts a genuinely unknown runtime still
|
||||
// 400s (the guard isn't removed) AND that the 400 body lists google-adk in
|
||||
// known_runtimes (proving the allowlist now advertises it). This exercises the
|
||||
// gin handler's reject branch, which runs entirely before any Docker call.
|
||||
func TestRefresh_RejectsUnknownRuntime(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
|
||||
// nil docker client is safe: the unknown-runtime branch returns 400
|
||||
// before svc.Refresh (which is the only path that touches Docker).
|
||||
h := &AdminWorkspaceImagesHandler{svc: &WorkspaceImageService{}}
|
||||
|
||||
r := gin.New()
|
||||
r.POST("/admin/workspace-images/refresh", h.Refresh)
|
||||
|
||||
req := httptest.NewRequest(http.MethodPost, "/admin/workspace-images/refresh?runtime=not-a-real-runtime", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
r.ServeHTTP(rec, req)
|
||||
|
||||
if rec.Code != http.StatusBadRequest {
|
||||
t.Fatalf("unknown runtime: got status %d, want 400; body=%s", rec.Code, rec.Body.String())
|
||||
}
|
||||
|
||||
var body struct {
|
||||
Error string `json:"error"`
|
||||
KnownRuntimes []string `json:"known_runtimes"`
|
||||
}
|
||||
if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
|
||||
t.Fatalf("decode 400 body: %v (raw=%s)", err, rec.Body.String())
|
||||
}
|
||||
if !runtimeListContains(body.KnownRuntimes, "google-adk") {
|
||||
t.Errorf("400 known_runtimes must advertise google-adk (controlplane#578); got %v", body.KnownRuntimes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGHCRAuthHeader_TrimsWhitespace(t *testing.T) {
|
||||
t.Setenv("MOLECULE_IMAGE_REGISTRY", "")
|
||||
// .env lines often have trailing newlines or accidental spaces. Without
|
||||
|
||||
@@ -73,6 +73,7 @@ func (h *ChannelHandler) List(c *gin.Context) {
|
||||
var config map[string]interface{}
|
||||
if err := json.Unmarshal(configJSON, &config); err != nil {
|
||||
log.Printf("Channels: unmarshal config for channel %s: %v", id, err)
|
||||
config = map[string]interface{}{}
|
||||
}
|
||||
// #319: decrypt sensitive fields first so the mask operates on
|
||||
// plaintext (first-4 / last-4 of the real token, not the ciphertext
|
||||
@@ -94,6 +95,7 @@ func (h *ChannelHandler) List(c *gin.Context) {
|
||||
var allowed []string
|
||||
if err := json.Unmarshal(allowedJSON, &allowed); err != nil {
|
||||
log.Printf("Channels: unmarshal allowed_users for channel %s: %v", id, err)
|
||||
allowed = []string{}
|
||||
}
|
||||
|
||||
entry := map[string]interface{}{
|
||||
@@ -540,9 +542,11 @@ func (h *ChannelHandler) Webhook(c *gin.Context) {
|
||||
}
|
||||
if err := json.Unmarshal(configJSON, &row.Config); err != nil {
|
||||
log.Printf("Channels: unmarshal config for webhook row %s: %v", row.ID, err)
|
||||
row.Config = map[string]interface{}{}
|
||||
}
|
||||
if err := json.Unmarshal(allowedJSON, &row.AllowedUsers); err != nil {
|
||||
log.Printf("Channels: unmarshal allowed_users for webhook row %s: %v", row.ID, err)
|
||||
row.AllowedUsers = []string{}
|
||||
}
|
||||
if err := channels.DecryptSensitiveFields(row.Config); err != nil {
|
||||
log.Printf("Channels: decrypt webhook row %s: %v", row.ID, err)
|
||||
|
||||
@@ -116,6 +116,56 @@ func TestChannelHandler_List(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestChannelHandler_List_InvalidJSON_FallsBack(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewChannelHandler(newTestChannelManager())
|
||||
|
||||
rows := sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "channel_type", "channel_config", "enabled",
|
||||
"allowed_users", "last_message_at", "message_count", "created_at", "updated_at",
|
||||
}).AddRow(
|
||||
"ch-bad", "ws-1", "telegram",
|
||||
[]byte(`{not valid json`),
|
||||
true, []byte(`[also not json`), nil, 0, nil, nil,
|
||||
)
|
||||
mock.ExpectQuery("SELECT .* FROM workspace_channels WHERE workspace_id").
|
||||
WithArgs("ws-1").
|
||||
WillReturnRows(rows)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request, _ = http.NewRequest("GET", "/workspaces/ws-1/channels", nil)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-1"}}
|
||||
|
||||
handler.List(c)
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Errorf("expected 200, got %d", w.Code)
|
||||
}
|
||||
|
||||
var result []map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &result)
|
||||
if len(result) != 1 {
|
||||
t.Fatalf("expected 1 channel, got %d", len(result))
|
||||
}
|
||||
|
||||
config, ok := result[0]["config"].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected config to be a map, got %T", result[0]["config"])
|
||||
}
|
||||
if len(config) != 0 {
|
||||
t.Errorf("expected empty config after unmarshal fallback, got %v", config)
|
||||
}
|
||||
|
||||
allowed, ok := result[0]["allowed_users"].([]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected allowed_users to be a slice, got %T", result[0]["allowed_users"])
|
||||
}
|
||||
if len(allowed) != 0 {
|
||||
t.Errorf("expected empty allowed_users after unmarshal fallback, got %v", allowed)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Create ====================
|
||||
|
||||
func TestChannelHandler_Create_Success(t *testing.T) {
|
||||
@@ -546,6 +596,41 @@ func TestChannelHandler_Webhook_UnknownType(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestChannelHandler_Webhook_InvalidJSON_FallsBack verifies that when the DB
|
||||
// row contains invalid JSON for channel_config or allowed_users, the webhook
|
||||
// handler logs the error and falls back to an empty map/slice rather than
|
||||
// leaving the fields nil (which would panic on downstream code that expects
|
||||
// concrete values). With empty config there is no chat_id match, so the
|
||||
// handler returns {"status":"no_channel"}.
|
||||
func TestChannelHandler_Webhook_InvalidJSON_FallsBack(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
handler := NewChannelHandler(newTestChannelManager())
|
||||
|
||||
mock.ExpectQuery(`SELECT id, workspace_id, channel_type, channel_config, enabled, allowed_users FROM workspace_channels WHERE channel_type = .* AND enabled = true`).
|
||||
WithArgs("telegram").
|
||||
WillReturnRows(sqlmock.NewRows([]string{
|
||||
"id", "workspace_id", "channel_type", "channel_config", "enabled", "allowed_users",
|
||||
}).AddRow("ch-bad", "ws-1", "telegram", []byte(`{bad json`), true, []byte(`[bad json`)))
|
||||
|
||||
body := `{"update_id":1,"message":{"message_id":1,"from":{"id":111,"is_bot":false,"first_name":"Test","username":"testuser"},"chat":{"id":-100123,"title":"Test Group","type":"supergroup"},"date":1700000000,"text":"hello"}}`
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Request = httptest.NewRequest(http.MethodPost, "/webhooks/telegram", strings.NewReader(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
c.Params = gin.Params{{Key: "type", Value: "telegram"}}
|
||||
|
||||
handler.Webhook(c)
|
||||
|
||||
if w.Code != 200 {
|
||||
t.Errorf("expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
json.Unmarshal(w.Body.Bytes(), &resp)
|
||||
if resp["status"] != "no_channel" {
|
||||
t.Errorf("expected status 'no_channel', got %v", resp["status"])
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Discover ====================
|
||||
|
||||
func TestChannelHandler_Discover_MissingToken(t *testing.T) {
|
||||
|
||||
@@ -68,6 +68,10 @@ func TestPeers_CrossTenant_OrgRootNotLeaked(t *testing.T) {
|
||||
|
||||
caller := "org-a-root" // parent_id IS NULL — an org root for tenant A
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
// (Unordered match is set above, so this can be consumed at any point.)
|
||||
seedDiscoveryGrandfather(mock, caller)
|
||||
|
||||
// parent_id lookup → NULL (caller is an org root)
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(caller).
|
||||
@@ -128,6 +132,9 @@ func TestPeers_SameOrg_SiblingsStillWork(t *testing.T) {
|
||||
caller := "org-a-child-1"
|
||||
parent := "org-a-root"
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, caller)
|
||||
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(caller).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(parent))
|
||||
|
||||
@@ -179,8 +179,11 @@ func (h *DelegationHandler) Delegate(c *gin.Context) {
|
||||
"message": map[string]interface{}{
|
||||
"role": "user",
|
||||
"messageId": delegationID,
|
||||
"parts": []map[string]interface{}{{"type": "text", "text": body.Task}},
|
||||
"metadata": map[string]interface{}{"delegation_id": delegationID},
|
||||
// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251) —
|
||||
// a `type`-keyed Part is dropped by the receiver's v0.3
|
||||
// validator, silently losing the delegated task.
|
||||
"parts": []map[string]interface{}{{"kind": "text", "text": body.Task}},
|
||||
"metadata": map[string]interface{}{"delegation_id": delegationID},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
@@ -422,28 +422,33 @@ func (h *DiscoveryHandler) CheckAccess(c *gin.Context) {
|
||||
// workspaces with tokens must present a matching Bearer, token binding
|
||||
// is strict (A's token cannot authenticate caller B).
|
||||
//
|
||||
// Fail-open on DB hiccups. Unlike secrets.Values (which returns plaintext
|
||||
// secrets and must fail closed), discovery only exposes peer URLs that
|
||||
// are already behind the existing `CanCommunicate` hierarchy check — a
|
||||
// momentary DB outage shouldn't take agent-to-agent discovery offline.
|
||||
// (harden/no-fail-open-auth) Fails CLOSED on DB error. This used to return nil
|
||||
// (allow) on a HasAnyLiveToken hiccup "because discovery only exposes peer URLs
|
||||
// already behind CanCommunicate" — but the CTO "nothing fail-open" directive is
|
||||
// absolute, and a request must never gain access because the auth datastore is
|
||||
// unreachable. A datastore error now writes 503 (availability tradeoff that
|
||||
// grants NO access) and returns a non-nil error; the caller already does
|
||||
// `if err != nil { return }` so the 503 body is what the client sees.
|
||||
func validateDiscoveryCaller(ctx context.Context, c *gin.Context, workspaceID string) error {
|
||||
hasLive, err := wsauth.HasAnyLiveToken(ctx, db.DB, workspaceID)
|
||||
if err != nil {
|
||||
log.Printf("wsauth: discovery HasAnyLiveToken(%s) failed: %v — allowing request", workspaceID, err)
|
||||
return nil
|
||||
log.Printf("wsauth: discovery HasAnyLiveToken(%s): datastore lookup failed (returning 503): %v", workspaceID, err)
|
||||
c.JSON(http.StatusServiceUnavailable, gin.H{
|
||||
"error": "platform datastore unavailable — retry shortly",
|
||||
"code": "platform_unavailable",
|
||||
})
|
||||
return errors.New("auth datastore unavailable")
|
||||
}
|
||||
if !hasLive {
|
||||
return nil // legacy / pre-upgrade
|
||||
}
|
||||
// Tier-1b dev-mode hatch — same escape hatch AdminAuth and
|
||||
// WorkspaceAuth apply on a local Docker setup. Without this, the
|
||||
// canvas Details tab can never load peers for a workspace that has
|
||||
// registered its live token, producing the 401 the user sees.
|
||||
// Gated by MOLECULE_ENV=development + empty ADMIN_TOKEN, so SaaS
|
||||
// production stays strict.
|
||||
if middleware.IsDevModeFailOpen() {
|
||||
return nil
|
||||
}
|
||||
// (harden/no-fail-open-auth) The former dev-mode escape hatch that
|
||||
// returned nil (allow) here when MOLECULE_ENV=dev + ADMIN_TOKEN unset
|
||||
// has been REMOVED. Discovery callers must present a verified CP
|
||||
// session or a valid bearer in every environment. Local dev now
|
||||
// authenticates the Canvas with a provisioned ADMIN_TOKEN /
|
||||
// NEXT_PUBLIC_ADMIN_TOKEN (see scripts/dev-start.sh), so the Details
|
||||
// tab loads peers with a real credential rather than via fail-open.
|
||||
|
||||
// Try session cookie auth first (SaaS canvas path).
|
||||
// verifiedCPSession returns (valid, presented):
|
||||
|
||||
@@ -49,6 +49,10 @@ func TestDiscover_WorkspaceNotFound_WithCaller(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first;
|
||||
// grandfather (count=0) so the bearer-less request is allowed through.
|
||||
seedDiscoveryGrandfather(mock, "ws-caller")
|
||||
|
||||
// CanCommunicate will need DB lookups — both workspace name lookups
|
||||
// For the access check: caller lookup succeeds, target lookup fails
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
|
||||
@@ -113,6 +117,9 @@ func TestPeers_WithParent(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-sibling-1")
|
||||
|
||||
// Expect parent_id lookup for the requesting workspace
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-sibling-1").
|
||||
@@ -165,6 +172,9 @@ func TestPeers_NotFound(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-ghost")
|
||||
|
||||
// Workspace not found
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-ghost").
|
||||
@@ -191,6 +201,11 @@ func TestPeers_DBError(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// Auth probe grandfathers; this test targets a DB error on the
|
||||
// *handler-body* parent_id query → 500 (distinct from the auth-probe
|
||||
// DB error which now fails closed with 503).
|
||||
seedDiscoveryGrandfather(mock, "ws-dberr")
|
||||
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-dberr").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
@@ -216,6 +231,9 @@ func TestPeers_RootWorkspace_NoPeers(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-root-alone")
|
||||
|
||||
// Root workspace (parent_id is NULL)
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-root-alone").
|
||||
@@ -270,6 +288,9 @@ func peersFilterFixture(t *testing.T) (*DiscoveryHandler, sqlmock.Sqlmock) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-self")
|
||||
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-self").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow("ws-pm"))
|
||||
@@ -927,13 +948,14 @@ func TestDiscoverHostPeer_Smoke_Success(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Peers auth — dev-mode fail-open gate ====================
|
||||
// ==================== Peers auth — fail-CLOSED gate ====================
|
||||
//
|
||||
// validateDiscoveryCaller applies a Tier-1b dev-mode hatch so the canvas
|
||||
// user session (which holds no workspace-scoped bearer) can still load
|
||||
// the Details → PEERS list on a local Docker setup. The gate must pass
|
||||
// ONLY when MOLECULE_ENV is development AND ADMIN_TOKEN is empty.
|
||||
// These tests pin that contract against accidental polarity flips.
|
||||
// (harden/no-fail-open-auth) validateDiscoveryCaller USED to apply a
|
||||
// Tier-1b dev-mode hatch that let the bearer-less canvas session load the
|
||||
// Details → PEERS list when MOLECULE_ENV=development AND ADMIN_TOKEN empty.
|
||||
// That hatch has been REMOVED — discovery callers must present a verified
|
||||
// CP session or a valid bearer in every environment. These tests pin the
|
||||
// fail-closed contract against accidental re-introduction.
|
||||
|
||||
// peersAuthFixtureHasLiveToken seeds the mock rows required for the
|
||||
// Peers handler to reach the auth branch: HasAnyLiveToken → true (a
|
||||
@@ -946,10 +968,30 @@ func peersAuthFixtureHasLiveToken(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
|
||||
}
|
||||
|
||||
func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
|
||||
// Dev mode: MOLECULE_ENV=development AND ADMIN_TOKEN empty. Canvas
|
||||
// sends no bearer token; validateDiscoveryCaller must return nil
|
||||
// (allow) and the handler must proceed to return the peer list.
|
||||
// seedDiscoveryGrandfather seeds the FIRST query validateDiscoveryCaller
|
||||
// issues (HasAnyLiveToken → 0 = legacy / pre-upgrade) so a bearer-less
|
||||
// discovery request grandfathers through and the test can exercise the
|
||||
// handler body.
|
||||
//
|
||||
// (harden/no-fail-open-auth) Before this branch, validateDiscoveryCaller
|
||||
// returned nil (allow) when the HasAnyLiveToken probe ERRORED — so these
|
||||
// handler-body tests never had to seed the probe at all; the unmatched
|
||||
// COUNT query erred and the fail-open swallowed it. Now that the DB-error
|
||||
// path fails CLOSED (503), the probe must be seeded explicitly. count=0 is
|
||||
// the legitimate grandfather path (no live tokens for this workspace yet),
|
||||
// which is what these pre-existing tests intend.
|
||||
func seedDiscoveryGrandfather(mock sqlmock.Sqlmock, workspaceID string) {
|
||||
mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
|
||||
WithArgs(workspaceID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
}
|
||||
|
||||
func TestPeers_DevMode_BearerlessRequest_FailsClosed(t *testing.T) {
|
||||
// (harden/no-fail-open-auth) Exact old-hatch conditions:
|
||||
// MOLECULE_ENV=development AND ADMIN_TOKEN empty, with a live token in
|
||||
// the DB. The bearer-less canvas-style request must now 401 — the
|
||||
// dev-mode hatch that returned nil (allow) here is gone. Local dev
|
||||
// authenticates via a provisioned ADMIN_TOKEN (scripts/dev-start.sh).
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
|
||||
@@ -957,22 +999,10 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// Only the HasAnyLiveToken probe runs; auth 401s before the peer
|
||||
// queries, so no further expectations are seeded.
|
||||
peersAuthFixtureHasLiveToken(mock, "ws-dev")
|
||||
|
||||
// Root workspace → children+parent queries still fire but the
|
||||
// parent_id lookup comes first.
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-dev").
|
||||
WillReturnRows(sqlmock.NewRows([]string{"parent_id"}).AddRow(nil))
|
||||
peerCols := []string{"id", "name", "role", "tier", "status", "agent_card", "url", "parent_id", "active_tasks"}
|
||||
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id IS NULL AND w.id").
|
||||
WithArgs("ws-dev").
|
||||
WillReturnRows(sqlmock.NewRows(peerCols))
|
||||
// #383 — children query gained explicit `w.id != $2` self-filter.
|
||||
mock.ExpectQuery("SELECT w.id.+WHERE w.parent_id = \\$1 AND w.id != \\$2 AND w.status").
|
||||
WithArgs("ws-dev", "ws-dev").
|
||||
WillReturnRows(sqlmock.NewRows(peerCols))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-dev"}}
|
||||
@@ -980,8 +1010,8 @@ func TestPeers_DevModeFailOpen_AllowsBearerlessRequest(t *testing.T) {
|
||||
|
||||
handler.Peers(c)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 under dev-mode hatch, got %d: %s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Fatalf("expected 401 (fail-closed) under old dev-mode hatch conditions, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1034,6 +1064,70 @@ func TestPeers_DevModeFailOpen_ClosedInProduction(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestPeers_AuthProbeDBError_FailsClosed pins the removal of
|
||||
// validateDiscoveryCaller's fail-open-on-DB-error branch
|
||||
// (harden/no-fail-open-auth). When the HasAnyLiveToken auth probe ERRORS, the
|
||||
// request must NOT be allowed through — it now returns 503 (availability
|
||||
// tradeoff that grants NO access). Before this branch the function returned nil
|
||||
// (allow) on a DB hiccup, so the request reached the peer queries.
|
||||
//
|
||||
// Watch-it-fail: restore `if err != nil { log; return nil }` in
|
||||
// validateDiscoveryCaller → this flips 503→(200/handler path) and fails.
|
||||
func TestPeers_AuthProbeDBError_FailsClosed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// The FIRST query validateDiscoveryCaller issues (HasAnyLiveToken) errors.
|
||||
// No further expectations: a fail-closed 503 must be written before the
|
||||
// peer-list queries run.
|
||||
mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
|
||||
WithArgs("ws-dberr-auth").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-dberr-auth"}}
|
||||
c.Request = httptest.NewRequest("GET", "/registry/ws-dberr-auth/peers", nil)
|
||||
|
||||
handler.Peers(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("auth-probe DB error must fail CLOSED: expected 503, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestDiscover_AuthProbeDBError_FailsClosed is the Discover-endpoint companion
|
||||
// to TestPeers_AuthProbeDBError_FailsClosed: a HasAnyLiveToken error on the
|
||||
// caller's discovery request fails CLOSED with 503 (was: fail-open allow).
|
||||
func TestDiscover_AuthProbeDBError_FailsClosed(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
mock.ExpectQuery("SELECT COUNT.+workspace_auth_tokens").
|
||||
WithArgs("ws-caller").
|
||||
WillReturnError(sql.ErrConnDone)
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: "ws-target"}}
|
||||
c.Request = httptest.NewRequest("GET", "/registry/discover/ws-target", nil)
|
||||
c.Request.Header.Set("X-Workspace-ID", "ws-caller")
|
||||
|
||||
handler.Discover(c)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Fatalf("Discover auth-probe DB error must fail CLOSED: expected 503, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== Peers — #383 self never appears in result ====================
|
||||
|
||||
// TestPeers_ExcludeSelf_DefenseInDepth verifies the final-line filter in
|
||||
@@ -1056,6 +1150,9 @@ func TestPeers_ExcludeSelf_DefenseInDepth(t *testing.T) {
|
||||
|
||||
const selfID = "ws-xiaodong"
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, selfID)
|
||||
|
||||
// parent_id lookup — workspace has a parent.
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs(selfID).
|
||||
|
||||
@@ -551,6 +551,9 @@ func TestDiscover_AccessDenied(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-child-a")
|
||||
|
||||
// CanCommunicate: different parents → denied
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-child-a").
|
||||
@@ -582,6 +585,9 @@ func TestDiscover_TargetOffline(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-caller")
|
||||
|
||||
// Share a parent so communication is allowed under post-#1955 rules
|
||||
sharedParent := "ws-parent"
|
||||
mock.ExpectQuery("SELECT id, parent_id FROM workspaces WHERE id =").
|
||||
|
||||
@@ -373,6 +373,9 @@ func TestExtended_DiscoverWithCallerID(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(callerID) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-caller")
|
||||
|
||||
// CanCommunicate needs to look up both workspaces
|
||||
// Share a parent so communication is allowed under post-#1955 rules
|
||||
sharedParent := "ws-parent"
|
||||
@@ -464,6 +467,9 @@ func TestExtended_Peers(t *testing.T) {
|
||||
setupTestRedis(t)
|
||||
handler := NewDiscoveryHandler()
|
||||
|
||||
// validateDiscoveryCaller probes HasAnyLiveToken(:id) first; grandfather.
|
||||
seedDiscoveryGrandfather(mock, "ws-peer")
|
||||
|
||||
// Expect parent_id lookup for requesting workspace (root-level, no parent)
|
||||
mock.ExpectQuery("SELECT parent_id FROM workspaces WHERE id =").
|
||||
WithArgs("ws-peer").
|
||||
|
||||
@@ -508,6 +508,7 @@ func TestBuildProvisionerConfig_WorkspacePathFromPayload(t *testing.T) {
|
||||
map[string][]byte{"config.yaml": []byte("name: test")},
|
||||
models.CreateWorkspacePayload{Tier: 2, Runtime: "claude-code", WorkspaceDir: "/tmp/workspace", WorkspaceAccess: "read_write"},
|
||||
map[string]string{"OPENAI_API_KEY": "sk-test"},
|
||||
nil,
|
||||
"/tmp/plugins",
|
||||
)
|
||||
|
||||
|
||||
@@ -192,7 +192,11 @@ func (h *MCPHandler) toolGetWorkspaceInfo(ctx context.Context, workspaceID strin
|
||||
// follow in the order provided, with kind derived from MIME type.
|
||||
func buildA2AMessageParts(task string, attachments []AgentMessageAttachment) []map[string]interface{} {
|
||||
parts := []map[string]interface{}{
|
||||
{"type": "text", "text": task},
|
||||
// A2A v0.3 Part discriminator is `kind`, NOT `type` (#2251).
|
||||
// The receiver's v0.3 Pydantic validator drops a Part keyed
|
||||
// `type`, silently losing the task text — the file part below
|
||||
// already uses `kind`, this is the matching fix for text.
|
||||
{"kind": "text", "text": task},
|
||||
}
|
||||
for _, att := range attachments {
|
||||
kind := kindFromMimeType(att.MimeType)
|
||||
|
||||
@@ -161,7 +161,7 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
// 1. Strip plugin's rule/fragment markers from CLAUDE.md (mirrors
|
||||
// AgentskillsAdaptor.uninstall lines 184-188). Best-effort: if
|
||||
// the user edited CLAUDE.md, our marker stays untouched.
|
||||
h.stripPluginMarkersFromMemory(ctx, containerName, pluginName)
|
||||
h.stripPluginMarkersFromMemory(ctx, workspaceID, containerName, pluginName)
|
||||
|
||||
// 2. Remove copied skill dirs declared in the plugin's plugin.yaml.
|
||||
for _, skill := range skillNames {
|
||||
@@ -171,9 +171,11 @@ func (h *PluginsHandler) uninstallViaDocker(ctx context.Context, c *gin.Context,
|
||||
log.Printf("Plugin uninstall: skipping invalid skill name %q in %s: %v", skill, pluginName, err)
|
||||
continue
|
||||
}
|
||||
_, _ = h.execAsRoot(ctx, containerName, []string{
|
||||
if _, rmErr := h.execAsRoot(ctx, containerName, []string{
|
||||
"rm", "-rf", "/configs/skills/" + skill,
|
||||
})
|
||||
}); rmErr != nil {
|
||||
log.Printf("Plugin uninstall: failed to remove skill %s from %s: %v", skill, workspaceID, rmErr)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Delete the plugin directory itself (as root to handle file ownership).
|
||||
|
||||
@@ -393,7 +393,7 @@ func (h *PluginsHandler) readPluginSkillsFromContainer(ctx context.Context, cont
|
||||
// `# Plugin: <name> /` — mirrors AgentskillsAdaptor.uninstall's stripping
|
||||
// logic so install/uninstall are symmetric. Best-effort: silent on read or
|
||||
// write failure, since the rest of uninstall must still succeed.
|
||||
func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, containerName, pluginName string) {
|
||||
func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, workspaceID, containerName, pluginName string) {
|
||||
// Use sed via bash -c for atomic in-place delete: drop the marker line
|
||||
// and the blank line that follows it (install adds a leading blank line
|
||||
// before the marker via append_to_memory). Three sed passes mirror the
|
||||
@@ -417,7 +417,9 @@ func (h *PluginsHandler) stripPluginMarkersFromMemory(ctx context.Context, conta
|
||||
`awk 'BEGIN{skip=0; blanks=0} /^%s/{skip=1; blanks=0; next} skip==1 && /^[[:space:]]*$/{blanks++; if(blanks>=2){skip=0; print; next} next} /^# Plugin: /{if(skip==1)skip=0} skip==1{next} {print}' /configs/CLAUDE.md > /tmp/claude.new && mv /tmp/claude.new /configs/CLAUDE.md`,
|
||||
regexpEscapeForAwk(marker),
|
||||
)
|
||||
_, _ = h.execAsRoot(ctx, containerName, []string{"bash", "-c", script})
|
||||
if _, awkErr := h.execAsRoot(ctx, containerName, []string{"bash", "-c", script}); awkErr != nil {
|
||||
log.Printf("Plugin uninstall: failed to strip markers from CLAUDE.md for %s in %s: %v", pluginName, workspaceID, awkErr)
|
||||
}
|
||||
}
|
||||
|
||||
// regexpEscapeForAwk escapes characters that have special meaning inside an
|
||||
|
||||
@@ -89,13 +89,16 @@ func TestSecurity_GetTemplates_NoAuth_Returns401(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecurity_GetTemplates_FreshInstall_FailsOpen verifies that GET /templates
|
||||
// still succeeds on a fresh install (zero enrolled workspaces → AdminAuth fail-open).
|
||||
// This is the regression check: the auth gate must not break new deployments.
|
||||
func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
|
||||
// TestSecurity_GetTemplates_FreshInstall_FailsClosed pins the post-hardening
|
||||
// contract (harden/no-fail-open-auth): GET /templates on a fresh install (zero
|
||||
// enrolled workspaces, no ADMIN_TOKEN) now 401s with no bearer. The former
|
||||
// AdminAuth Tier-1 lazy-bootstrap fail-open (fresh install ⇒ 200) is gone — a
|
||||
// new deployment must provision ADMIN_TOKEN (dev does so via dev-start.sh).
|
||||
func TestSecurity_GetTemplates_FreshInstall_FailsClosed(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_ENV", "")
|
||||
authDB, authMock := newFreshInstallAuthDB(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
@@ -108,8 +111,8 @@ func TestSecurity_GetTemplates_FreshInstall_FailsOpen(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodGet, "/templates", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("#686 GET /templates fresh-install: want 200 (fail-open), got %d body=%s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("#686 GET /templates fresh-install fail-closed: want 401, got %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := authMock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet auth mock expectations: %v", err)
|
||||
@@ -148,12 +151,14 @@ func TestSecurity_GetOrgTemplates_NoAuth_Returns401(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen mirrors the /templates
|
||||
// regression check for /org/templates — fresh installs must still work.
|
||||
func TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen(t *testing.T) {
|
||||
// TestSecurity_GetOrgTemplates_FreshInstall_FailsClosed mirrors the /templates
|
||||
// fail-closed check for /org/templates (harden/no-fail-open-auth): a fresh
|
||||
// install with no bearer / no ADMIN_TOKEN now 401s rather than fail-open.
|
||||
func TestSecurity_GetOrgTemplates_FreshInstall_FailsClosed(t *testing.T) {
|
||||
setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_ENV", "")
|
||||
authDB, authMock := newFreshInstallAuthDB(t)
|
||||
|
||||
tmpDir := t.TempDir()
|
||||
@@ -167,8 +172,8 @@ func TestSecurity_GetOrgTemplates_FreshInstall_FailsOpen(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodGet, "/org/templates", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("#686 GET /org/templates fresh-install: want 200 (fail-open), got %d body=%s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("#686 GET /org/templates fresh-install fail-closed: want 401, got %d body=%s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := authMock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet auth mock expectations: %v", err)
|
||||
|
||||
@@ -243,6 +243,7 @@ func TestBuildProvisionerConfig_CopiesComputeSizingFromPayload(t *testing.T) {
|
||||
},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
t.TempDir(),
|
||||
)
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@ func (h *WorkspaceHandler) provisionWorkspaceOpts(workspaceID, templatePath stri
|
||||
workspaceID, filepath.Base(runtimeTemplate))
|
||||
templatePath = runtimeTemplate
|
||||
// Rebuild cfg with the recovered template path so Start() sees it.
|
||||
cfg = h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, prepared.EnvVars, prepared.PluginsPath)
|
||||
cfg = h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, prepared.EnvVars, prepared.Config.WorkspaceSecretKeys, prepared.PluginsPath)
|
||||
cfg.ResetClaudeSession = resetClaudeSession
|
||||
recovered = true
|
||||
break
|
||||
@@ -281,6 +281,7 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
|
||||
configFiles map[string][]byte,
|
||||
payload models.CreateWorkspacePayload,
|
||||
envVars map[string]string,
|
||||
workspaceSecretKeys map[string]struct{},
|
||||
pluginsPath string,
|
||||
) provisioner.WorkspaceConfig {
|
||||
// Per-workspace workspace_dir takes priority over global WORKSPACE_DIR env var.
|
||||
@@ -331,14 +332,20 @@ func (h *WorkspaceHandler) buildProvisionerConfig(
|
||||
InstanceType: payload.Compute.InstanceType,
|
||||
DiskGB: int32(payload.Compute.Volume.RootGB),
|
||||
DataPersistence: payload.Compute.DataPersistence,
|
||||
Provider: payload.Compute.Provider,
|
||||
Display: provisioner.WorkspaceDisplayConfig{
|
||||
Mode: payload.Compute.Display.Mode,
|
||||
Width: payload.Compute.Display.Width,
|
||||
Height: payload.Compute.Display.Height,
|
||||
Protocol: payload.Compute.Display.Protocol,
|
||||
},
|
||||
EnvVars: envVars,
|
||||
PlatformURL: h.platformURL,
|
||||
EnvVars: envVars,
|
||||
// Forensic #145: positive provenance set so the SCM-write-token guard
|
||||
// (cp_provisioner.Start) exempts a workspace-authored GITEA_TOKEN from
|
||||
// the operator-bleed strip while still stripping global/persona-merged
|
||||
// SCM tokens. Carried by both Docker- and CP-mode configs.
|
||||
WorkspaceSecretKeys: workspaceSecretKeys,
|
||||
PlatformURL: h.platformURL,
|
||||
// Image left empty — molecule-core's runtime_image_pins table (mig
|
||||
// 047, dead reader removed by RFC internal#617 / task #335) was an
|
||||
// aspirational SSOT that never received a writer. CP's
|
||||
@@ -1233,9 +1240,18 @@ func firstNonEmptyEnv(names ...string) string {
|
||||
// stores — NOT the user's own scoped PAT they explicitly authorized via
|
||||
// the per-workspace Secrets tab.
|
||||
//
|
||||
// The third return value (workspaceKeys) is the POSITIVE counterpart: the
|
||||
// set of keys authored via the per-workspace `workspace_secrets` table
|
||||
// (user / org-admin set, authenticated as the workspace owner). It is the
|
||||
// provenance signal the forensic #145 SCM-write-token guard consults to
|
||||
// EXEMPT a workspace-scoped GITEA_TOKEN (the intended, legitimate delivery
|
||||
// channel for a reviewer agent) from the operator-bleed strip. A key set
|
||||
// in BOTH stores lands here (workspace overrides global) and is removed
|
||||
// from globalKeys, matching the precedence semantic below.
|
||||
//
|
||||
// The merged map preserves the existing precedence semantic (workspace
|
||||
// rows overwrite global rows on key collision); only the provenance side-
|
||||
// channel is new. Existing single-return callers can ignore globalKeys.
|
||||
// channels are new. Existing callers can ignore globalKeys / workspaceKeys.
|
||||
//
|
||||
// F1086 / #1206: the returned error string is the SAFE-CANNED message that
|
||||
// gets persisted to workspaces.last_sample_error AND broadcast as the
|
||||
@@ -1243,9 +1259,10 @@ func firstNonEmptyEnv(names ...string) string {
|
||||
// the encryption version, the decrypt-error text) is logged here, never
|
||||
// returned to the caller, so it can't leak via the canvas event stream
|
||||
// (cf. TestProvisionWorkspace_NoInternalErrorsInBroadcast).
|
||||
func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]string, map[string]struct{}, string) {
|
||||
func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]string, map[string]struct{}, map[string]struct{}, string) {
|
||||
envVars := map[string]string{}
|
||||
globalKeys := map[string]struct{}{}
|
||||
workspaceKeys := map[string]struct{}{}
|
||||
globalRows, globalErr := db.DB.QueryContext(ctx,
|
||||
`SELECT key, encrypted_value, encryption_version FROM global_secrets`)
|
||||
if globalErr == nil {
|
||||
@@ -1266,7 +1283,7 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
|
||||
decrypted, decErr := crypto.DecryptVersioned(v, ver)
|
||||
if decErr != nil {
|
||||
log.Printf("Provisioner: FATAL — failed to decrypt global secret %s (version=%d): %v — aborting provision of workspace %s", k, ver, decErr, workspaceID)
|
||||
return nil, nil, "failed to decrypt global secret"
|
||||
return nil, nil, nil, "failed to decrypt global secret"
|
||||
}
|
||||
envVars[k] = string(decrypted)
|
||||
globalKeys[k] = struct{}{}
|
||||
@@ -1300,7 +1317,7 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
|
||||
decrypted, decErr := crypto.DecryptVersioned(v, ver)
|
||||
if decErr != nil {
|
||||
log.Printf("Provisioner: FATAL — failed to decrypt workspace secret %s (version=%d) for %s: %v — aborting provision", k, ver, workspaceID, decErr)
|
||||
return nil, nil, "failed to decrypt workspace secret"
|
||||
return nil, nil, nil, "failed to decrypt workspace secret"
|
||||
}
|
||||
envVars[k] = string(decrypted)
|
||||
// User-authored workspace_secrets value supersedes any
|
||||
@@ -1309,13 +1326,19 @@ func loadWorkspaceSecrets(ctx context.Context, workspaceID string) (map[string]s
|
||||
// re-set the value via the canvas Secrets tab, so it is
|
||||
// no longer "the operator-store version."
|
||||
delete(globalKeys, k)
|
||||
// Positive provenance: record that this key was authored
|
||||
// via workspace_secrets. The forensic #145 SCM-write-token
|
||||
// guard exempts only keys in this set — a workspace-scoped
|
||||
// GITEA_TOKEN is the intended delivery channel for that
|
||||
// workspace's agent.
|
||||
workspaceKeys[k] = struct{}{}
|
||||
}
|
||||
}
|
||||
if err := wsRows.Err(); err != nil {
|
||||
log.Printf("Provisioner: workspace_secrets rows.Err workspace=%s: %v", workspaceID, err)
|
||||
}
|
||||
}
|
||||
return envVars, globalKeys, ""
|
||||
return envVars, globalKeys, workspaceKeys, ""
|
||||
}
|
||||
|
||||
// provisionWorkspaceCP provisions a workspace via the control plane API.
|
||||
|
||||
@@ -122,7 +122,7 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
payload models.CreateWorkspacePayload,
|
||||
resetClaudeSession bool,
|
||||
) (*preparedProvisionContext, *provisionAbort) {
|
||||
envVars, globalSecretKeys, decryptErr := loadWorkspaceSecrets(ctx, workspaceID)
|
||||
envVars, globalSecretKeys, workspaceSecretKeys, decryptErr := loadWorkspaceSecrets(ctx, workspaceID)
|
||||
if decryptErr != "" {
|
||||
return nil, &provisionAbort{Msg: decryptErr}
|
||||
}
|
||||
@@ -294,7 +294,7 @@ func (h *WorkspaceHandler) prepareProvisionContext(
|
||||
return nil, abort
|
||||
}
|
||||
|
||||
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, pluginsPath)
|
||||
cfg := h.buildProvisionerConfig(ctx, workspaceID, templatePath, configFiles, payload, envVars, workspaceSecretKeys, pluginsPath)
|
||||
cfg.ResetClaudeSession = resetClaudeSession
|
||||
|
||||
return &preparedProvisionContext{
|
||||
|
||||
@@ -845,6 +845,7 @@ func TestBuildProvisionerConfig_BasicFields(t *testing.T) {
|
||||
map[string][]byte{"config.yaml": []byte("name: test")},
|
||||
models.CreateWorkspacePayload{Tier: 1, Runtime: "claude-code"},
|
||||
map[string]string{"API_KEY": "secret"},
|
||||
nil,
|
||||
pluginsPath,
|
||||
)
|
||||
|
||||
@@ -893,6 +894,7 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
|
||||
nil,
|
||||
models.CreateWorkspacePayload{Tier: 2, Runtime: "claude-code"},
|
||||
nil,
|
||||
nil,
|
||||
pluginsPath,
|
||||
)
|
||||
|
||||
@@ -901,6 +903,71 @@ func TestBuildProvisionerConfig_WorkspacePathFromEnv(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== loadWorkspaceSecrets provenance (forensic #145) ====================
|
||||
|
||||
// TestLoadWorkspaceSecrets_WorkspaceKeysProvenance pins the positive
|
||||
// provenance side-channel added for forensic #145: a key sourced from
|
||||
// workspace_secrets must land in the third return value (workspaceKeys),
|
||||
// while a key sourced only from global_secrets must NOT. A key present in
|
||||
// BOTH stores is treated as workspace-authored (workspace overrides global),
|
||||
// so it lands in workspaceKeys AND is removed from globalKeys.
|
||||
func TestLoadWorkspaceSecrets_WorkspaceKeysProvenance(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
|
||||
// global_secrets: an operator-store GITEA_TOKEN (the bleed channel) and
|
||||
// an OPERATOR_ONLY key that no workspace row re-sets.
|
||||
globalRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("GITEA_TOKEN", []byte("operator-store-gitea"), 0).
|
||||
AddRow("OPERATOR_ONLY", []byte("op-val"), 0)
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
|
||||
WillReturnRows(globalRows)
|
||||
|
||||
// workspace_secrets: the user/org-admin re-authors GITEA_TOKEN (override)
|
||||
// and adds a workspace-only WS_ONLY key. encryption_version 0 = plaintext
|
||||
// passthrough (crypto.DecryptVersioned).
|
||||
wsRows := sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
|
||||
AddRow("GITEA_TOKEN", []byte("workspace-authored-gitea"), 0).
|
||||
AddRow("WS_ONLY", []byte("ws-val"), 0)
|
||||
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
|
||||
WithArgs("ws-prov").
|
||||
WillReturnRows(wsRows)
|
||||
|
||||
envVars, globalKeys, workspaceKeys, errMsg := loadWorkspaceSecrets(context.Background(), "ws-prov")
|
||||
if errMsg != "" {
|
||||
t.Fatalf("loadWorkspaceSecrets returned error: %q", errMsg)
|
||||
}
|
||||
|
||||
// Workspace override wins on value precedence.
|
||||
if got := envVars["GITEA_TOKEN"]; got != "workspace-authored-gitea" {
|
||||
t.Errorf("GITEA_TOKEN value = %q; want workspace-authored override", got)
|
||||
}
|
||||
|
||||
// workspaceKeys: both workspace-sourced keys present.
|
||||
if _, ok := workspaceKeys["GITEA_TOKEN"]; !ok {
|
||||
t.Errorf("GITEA_TOKEN (re-authored via workspace_secrets) missing from workspaceKeys: %v", workspaceKeys)
|
||||
}
|
||||
if _, ok := workspaceKeys["WS_ONLY"]; !ok {
|
||||
t.Errorf("WS_ONLY (workspace_secrets) missing from workspaceKeys: %v", workspaceKeys)
|
||||
}
|
||||
// OPERATOR_ONLY came only from global_secrets → NOT workspace-authored.
|
||||
if _, ok := workspaceKeys["OPERATOR_ONLY"]; ok {
|
||||
t.Errorf("OPERATOR_ONLY (global_secrets only) wrongly present in workspaceKeys: %v", workspaceKeys)
|
||||
}
|
||||
|
||||
// globalKeys: GITEA_TOKEN's operator-bleed flag dropped by the override;
|
||||
// OPERATOR_ONLY stays flagged.
|
||||
if _, ok := globalKeys["GITEA_TOKEN"]; ok {
|
||||
t.Errorf("GITEA_TOKEN should be removed from globalKeys after workspace override: %v", globalKeys)
|
||||
}
|
||||
if _, ok := globalKeys["OPERATOR_ONLY"]; !ok {
|
||||
t.Errorf("OPERATOR_ONLY missing from globalKeys: %v", globalKeys)
|
||||
}
|
||||
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("sqlmock expectations not met: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// ==================== issueAndInjectToken (issue #418) ====================
|
||||
|
||||
// TestIssueAndInjectToken_HappyPath verifies that on a normal (re)provision the
|
||||
|
||||
@@ -5,61 +5,53 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Dev-mode escape hatch — factored out of AdminAuth + WorkspaceAuth so a
|
||||
// future third caller (or a change to what "dev mode" means) touches one
|
||||
// place. Narrowing the exposed seam also makes it grep-able from security
|
||||
// reviews: every `isDevModeFailOpen()` call is an intentional fail-open.
|
||||
// Local-dev environment detection.
|
||||
//
|
||||
// Why the helper exists at all: on `go run ./cmd/server` the Canvas (at
|
||||
// localhost:3000) calls the platform (at localhost:8080) cross-port. Both
|
||||
// `isSameOriginCanvas` (Referer==Host) and the AdminAuth Tier-1 fail-open
|
||||
// (no tokens in DB) close the moment the user creates their first
|
||||
// workspace. Without this hatch the Canvas 401s on every /workspaces
|
||||
// enumeration and every /workspaces/:id/* read until the operator sets
|
||||
// `ADMIN_TOKEN` and rebuilds the Canvas bundle with a matching
|
||||
// `NEXT_PUBLIC_ADMIN_TOKEN`. That's too much friction for a local smoke
|
||||
// test — hence the hatch.
|
||||
// SECURITY (harden/no-fail-open-auth): this file used to export an auth
|
||||
// escape hatch — `isDevModeFailOpen()` — that let AdminAuth, WorkspaceAuth,
|
||||
// and the discovery handler serve admin/workspace-protected endpoints with
|
||||
// NO bearer token whenever `ADMIN_TOKEN` was unset and `MOLECULE_ENV` was a
|
||||
// dev value. The CTO directive is "nothing should be fail-open": auth is now
|
||||
// fail-CLOSED in every environment, dev included. The hatch is GONE.
|
||||
//
|
||||
// Why it's safe for SaaS: hosted tenants are provisioned with both
|
||||
// `ADMIN_TOKEN` (a random secret, checked by Tier-2 above) and
|
||||
// `MOLECULE_ENV=production`. Either one being set makes this helper
|
||||
// return false, so the fail-open branch is unreachable in production.
|
||||
// Real token minting goes through AdminAuth, so local development keeps a
|
||||
// narrow fail-open mode for browser/API smoke tests without an admin secret.
|
||||
// What remains here is a NON-security predicate, `isLocalDevEnv()`, that
|
||||
// reports ONLY whether `MOLECULE_ENV` names a local-dev environment. It does
|
||||
// NOT consult `ADMIN_TOKEN` and it does NOT influence authentication. It is
|
||||
// used for two convenience/defense-in-depth knobs that never grant access:
|
||||
//
|
||||
// - ratelimit.go: relax the per-caller request bucket on a single-user
|
||||
// local stack (a DoS knob, not a credential — relaxing it cannot expose
|
||||
// any protected data).
|
||||
// - cmd/server resolveBindHost(): default the HTTP listener to loopback
|
||||
// (127.0.0.1) in local dev. This is strictly *safer* than binding all
|
||||
// interfaces and is unrelated to whether a request is authenticated.
|
||||
//
|
||||
// Local dev now stays AUTHENTICATED, not open: scripts/dev-start.sh
|
||||
// provisions a deterministic `ADMIN_TOKEN` and hands the matching
|
||||
// `NEXT_PUBLIC_ADMIN_TOKEN` to the Canvas, so the browser sends a real
|
||||
// bearer. See scripts/dev-start.sh and canvas/src/lib/api.ts.
|
||||
|
||||
// devModeEnvValues is the set of MOLECULE_ENV values that count as
|
||||
// "explicit dev mode". Production callers don't set any of these.
|
||||
// "explicit local dev". Production callers don't set any of these.
|
||||
// Case-insensitive compare via strings.ToLower below.
|
||||
var devModeEnvValues = map[string]struct{}{
|
||||
"development": {},
|
||||
"dev": {},
|
||||
}
|
||||
|
||||
// isDevModeFailOpen reports whether the AdminAuth / WorkspaceAuth
|
||||
// middleware should let a bearer-less request through despite live
|
||||
// workspace tokens existing in the DB.
|
||||
//
|
||||
// True only when BOTH:
|
||||
// - `ADMIN_TOKEN` is empty (operator has not opted in to the #684
|
||||
// closure), AND
|
||||
// - `MOLECULE_ENV` is explicitly a dev value ("development" / "dev").
|
||||
//
|
||||
// Either condition failing returns false — that's the SaaS safety
|
||||
// guarantee. Tests: `devmode_test.go` covers every branch.
|
||||
func isDevModeFailOpen() bool {
|
||||
if os.Getenv("ADMIN_TOKEN") != "" {
|
||||
return false
|
||||
}
|
||||
// isLocalDevEnv reports whether MOLECULE_ENV names a local-dev environment
|
||||
// ("development" / "dev"). It carries NO authentication semantics — callers
|
||||
// must never use it to bypass a credential check. It exists only for
|
||||
// dev-convenience / defense-in-depth knobs (rate-limit relaxation, loopback
|
||||
// bind default) that cannot expose protected data.
|
||||
func isLocalDevEnv() bool {
|
||||
env := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_ENV")))
|
||||
_, ok := devModeEnvValues[env]
|
||||
return ok
|
||||
}
|
||||
|
||||
// IsDevModeFailOpen exposes isDevModeFailOpen to packages outside the
|
||||
// middleware module (handlers, discovery, etc.) so they can apply the
|
||||
// same Tier-1b escape hatch their sibling AdminAuth / WorkspaceAuth
|
||||
// already do. Keep every call site audit-tagged so security review can
|
||||
// grep them.
|
||||
func IsDevModeFailOpen() bool {
|
||||
return isDevModeFailOpen()
|
||||
// IsLocalDevEnv exposes isLocalDevEnv to packages outside the middleware
|
||||
// module (cmd/server bind-host default). NON-security: see isLocalDevEnv.
|
||||
func IsLocalDevEnv() bool {
|
||||
return isLocalDevEnv()
|
||||
}
|
||||
|
||||
@@ -4,74 +4,66 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Unit tests for the isDevModeFailOpen predicate. The AdminAuth and
|
||||
// WorkspaceAuth middleware tests exercise the same helper indirectly via
|
||||
// HTTP, but a direct predicate test locks the pure-logic behaviour:
|
||||
// future callers can add themselves to `devmode.go` with confidence.
|
||||
// Unit tests for the isLocalDevEnv predicate.
|
||||
//
|
||||
// (harden/no-fail-open-auth) This predicate replaced the old
|
||||
// isDevModeFailOpen() auth escape hatch. It carries NO authentication
|
||||
// semantics and does NOT consult ADMIN_TOKEN — it reports ONLY whether
|
||||
// MOLECULE_ENV names a local-dev environment. It gates non-security knobs
|
||||
// (rate-limit relaxation, loopback bind default). The fail-CLOSED auth
|
||||
// behaviour is enforced by no_fail_open_test.go.
|
||||
|
||||
func TestIsDevModeFailOpen_DevModeNoAdminToken_True(t *testing.T) {
|
||||
func TestIsLocalDevEnv_Development_True(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
if !isDevModeFailOpen() {
|
||||
t.Error("expected dev mode + no admin token to return true")
|
||||
if !isLocalDevEnv() {
|
||||
t.Error("expected MOLECULE_ENV=development to be local dev")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDevModeFailOpen_DevModeShortAlias_True(t *testing.T) {
|
||||
// "dev" is a valid alias for "development".
|
||||
func TestIsLocalDevEnv_ShortAlias_True(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", "dev")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
if !isDevModeFailOpen() {
|
||||
t.Error("expected MOLECULE_ENV=dev to be treated as dev mode")
|
||||
if !isLocalDevEnv() {
|
||||
t.Error("expected MOLECULE_ENV=dev to be treated as local dev")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDevModeFailOpen_AdminTokenSet_False(t *testing.T) {
|
||||
// Setting ADMIN_TOKEN is the operator's explicit opt-in to the #684
|
||||
// closure. Dev mode must NOT silently override that signal.
|
||||
func TestIsLocalDevEnv_IgnoresAdminToken(t *testing.T) {
|
||||
// Decoupled from ADMIN_TOKEN: dev now provisions one, but the bind /
|
||||
// rate-limit knobs still treat the env as local dev. Crucially this
|
||||
// predicate grants no access, so the coupling no longer matters.
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "operator-explicitly-set-this")
|
||||
if isDevModeFailOpen() {
|
||||
t.Error("explicit ADMIN_TOKEN must suppress the dev-mode hatch")
|
||||
t.Setenv("ADMIN_TOKEN", "operator-set-this")
|
||||
if !isLocalDevEnv() {
|
||||
t.Error("ADMIN_TOKEN must not affect isLocalDevEnv (env-only predicate)")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDevModeFailOpen_Production_False(t *testing.T) {
|
||||
// The SaaS-safety guarantee: production tenants always have
|
||||
// MOLECULE_ENV=production, so the hatch is unreachable even if a
|
||||
// misconfigured deployment also leaves ADMIN_TOKEN unset.
|
||||
func TestIsLocalDevEnv_Production_False(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", "production")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
if isDevModeFailOpen() {
|
||||
t.Error("production must never hit the dev-mode fail-open branch")
|
||||
if isLocalDevEnv() {
|
||||
t.Error("production must not count as local dev")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDevModeFailOpen_CaseInsensitive(t *testing.T) {
|
||||
// Operators shouldn't have to remember exact casing for a dev-only
|
||||
// convenience. "Development", "DEV", " dev " all count.
|
||||
func TestIsLocalDevEnv_CaseInsensitive(t *testing.T) {
|
||||
cases := []string{"Development", "DEVELOPMENT", "Dev", "DEV", " dev "}
|
||||
for _, env := range cases {
|
||||
t.Run(env, func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", env)
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
if !isDevModeFailOpen() {
|
||||
t.Errorf("MOLECULE_ENV=%q should count as dev mode", env)
|
||||
if !isLocalDevEnv() {
|
||||
t.Errorf("MOLECULE_ENV=%q should count as local dev", env)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsDevModeFailOpen_UnknownEnv_False(t *testing.T) {
|
||||
// Arbitrary / unset MOLECULE_ENV values are NOT treated as dev mode.
|
||||
// Keeps the fail-open branch narrow — no silent opt-in from a typo.
|
||||
func TestIsLocalDevEnv_UnknownEnv_False(t *testing.T) {
|
||||
cases := []string{"", "staging", "local", "preview", "test", "devel"}
|
||||
for _, env := range cases {
|
||||
t.Run(env, func(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", env)
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
if isDevModeFailOpen() {
|
||||
t.Errorf("MOLECULE_ENV=%q must not enable fail-open", env)
|
||||
if isLocalDevEnv() {
|
||||
t.Errorf("MOLECULE_ENV=%q must not count as local dev", env)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -0,0 +1,245 @@
|
||||
package middleware
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// no_fail_open_test.go is the regression gate for the CTO directive
|
||||
// "nothing should be fail-open" (branch harden/no-fail-open-auth).
|
||||
//
|
||||
// It asserts that AdminAuth and WorkspaceAuth fail CLOSED (401) under the
|
||||
// EXACT conditions that used to trigger the removed dev-mode fail-open hatch:
|
||||
// - ADMIN_TOKEN unset, AND
|
||||
// - MOLECULE_ENV is a dev value ("development" / "dev"), AND
|
||||
// - any HasAnyLiveTokenGlobal state (0 = fresh install, 1 = post-workspace).
|
||||
//
|
||||
// To prove this is RED against the old behaviour: temporarily restore the
|
||||
// `if isDevModeFailOpen() { c.Next(); return }` short-circuit in
|
||||
// wsauth_middleware.go (and the Tier-1 `if adminSecret == "" { c.Next() }`
|
||||
// branch) — every sub-case below flips from 401 to 200 and fails. After the
|
||||
// hardening, all sub-cases are 401.
|
||||
|
||||
// failOpenConditions enumerates the (MOLECULE_ENV, hasLiveTokens) combinations
|
||||
// that the removed hatch keyed on. ADMIN_TOKEN is always unset here — that was
|
||||
// a precondition of the old fail-open.
|
||||
var failOpenConditions = []struct {
|
||||
name string
|
||||
molEnv string
|
||||
liveCount int
|
||||
}{
|
||||
{"dev_alias_fresh_install", "dev", 0},
|
||||
{"dev_alias_post_workspace", "dev", 1},
|
||||
{"development_fresh_install", "development", 0},
|
||||
{"development_post_workspace", "development", 1},
|
||||
}
|
||||
|
||||
func TestAdminAuth_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
|
||||
for _, tc := range failOpenConditions {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_ENV", tc.molEnv)
|
||||
// Ensure no CP-session path can accidentally pass.
|
||||
t.Setenv("CP_UPSTREAM_URL", "")
|
||||
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock.New: %v", err)
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
// AdminAuth always probes HasAnyLiveTokenGlobal (for the 503-on-
|
||||
// outage semantics), so it must be expected for both counts.
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(tc.liveCount))
|
||||
|
||||
r := gin.New()
|
||||
r.GET("/admin/secrets", AdminAuth(mockDB), func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("AdminAuth must fail CLOSED under old hatch conditions "+
|
||||
"(MOLECULE_ENV=%q, ADMIN_TOKEN unset, liveTokens=%d): expected 401, got %d: %s",
|
||||
tc.molEnv, tc.liveCount, w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorkspaceAuth_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
|
||||
for _, tc := range failOpenConditions {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_ENV", tc.molEnv)
|
||||
t.Setenv("CP_UPSTREAM_URL", "")
|
||||
|
||||
// WorkspaceAuth 401s before any DB lookup when there is no
|
||||
// bearer / cookie, so no queries are expected regardless of
|
||||
// the nominal live-token count.
|
||||
mockDB, _, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock.New: %v", err)
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
r := gin.New()
|
||||
r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req, _ := http.NewRequest(http.MethodGet,
|
||||
"/workspaces/00000000-0000-0000-0000-000000000000/activity", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("WorkspaceAuth must fail CLOSED under old hatch conditions "+
|
||||
"(MOLECULE_ENV=%q, ADMIN_TOKEN unset): expected 401, got %d: %s",
|
||||
tc.molEnv, w.Code, w.Body.String())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestCanvasOrBearer_NoFailOpen_UnderOldHatchConditions is the regression gate
|
||||
// for the two fail-open branches removed from CanvasOrBearer
|
||||
// (harden/no-fail-open-auth, "nothing fail-open" pass 2):
|
||||
//
|
||||
// (a) lazy-bootstrap pass: `if !hasLive { c.Next(); return }` — a zero-token
|
||||
// install used to pass EVERYTHING through. Now a bearer-less request on a
|
||||
// fresh install (HasAnyLiveTokenGlobal → 0) fails CLOSED with 401.
|
||||
// (b) fail-open-on-DB-error: `if err != nil { log; c.Next(); return }` — a
|
||||
// HasAnyLiveTokenGlobal error used to ALLOW. Now it fails CLOSED with 503.
|
||||
//
|
||||
// Watch-it-fail: restore either short-circuit in CanvasOrBearer and the
|
||||
// matching sub-case flips (401→200 / 503→200) and fails.
|
||||
func TestCanvasOrBearer_NoFailOpen_UnderOldHatchConditions(t *testing.T) {
|
||||
// (a) Fresh install (0 live tokens), no bearer, no ADMIN_TOKEN → 401.
|
||||
t.Run("zero_token_install_no_bearer_fails_closed_401", func(t *testing.T) {
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("CORS_ORIGINS", "")
|
||||
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock.New: %v", err)
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
|
||||
handlerCalled := false
|
||||
r := gin.New()
|
||||
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
|
||||
handlerCalled = true
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("CanvasOrBearer lazy-bootstrap fail-open removed: zero-token install must 401, got %d: %s",
|
||||
w.Code, w.Body.String())
|
||||
}
|
||||
if handlerCalled {
|
||||
t.Error("handler reached on a fresh-install bearer-less request — lazy-bootstrap fail-open not removed")
|
||||
}
|
||||
})
|
||||
|
||||
// (b) Auth datastore error → 503 (NOT allow).
|
||||
t.Run("db_error_fails_closed_503", func(t *testing.T) {
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock.New: %v", err)
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnError(http.ErrAbortHandler) // any non-nil error suffices
|
||||
|
||||
handlerCalled := false
|
||||
r := gin.New()
|
||||
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
|
||||
handlerCalled = true
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("CanvasOrBearer DB-error fail-open removed: must 503, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if handlerCalled {
|
||||
t.Error("handler reached on a datastore-error request — DB-error fail-open not removed")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNoFailOpenAuthHelperReexists is a source-guard: it asserts that no
|
||||
// fail-open auth helper (the removed isDevModeFailOpen / IsDevModeFailOpen)
|
||||
// has crept back into the middleware package as real code. The replacement
|
||||
// predicate is the NON-security isLocalDevEnv (bind / rate-limit only);
|
||||
// re-introducing the old fail-open identifier as a declaration or call is a
|
||||
// regression of the CTO directive.
|
||||
//
|
||||
// It matches the *invocation/declaration* form `isDevModeFailOpen(` (which
|
||||
// only appears in live code) and deliberately ignores prose mentions in
|
||||
// `//` comments, so the historical references kept in doc comments don't
|
||||
// trip the guard.
|
||||
func TestNoFailOpenAuthHelperReexists(t *testing.T) {
|
||||
forbidden := []string{"isDevModeFailOpen(", "IsDevModeFailOpen("}
|
||||
|
||||
entries, err := os.ReadDir(".")
|
||||
if err != nil {
|
||||
t.Fatalf("ReadDir: %v", err)
|
||||
}
|
||||
for _, e := range entries {
|
||||
name := e.Name()
|
||||
if e.IsDir() || !strings.HasSuffix(name, ".go") {
|
||||
continue
|
||||
}
|
||||
// Skip this guard file itself (it names the forbidden tokens on
|
||||
// purpose, including inside a comment).
|
||||
if name == "no_fail_open_test.go" {
|
||||
continue
|
||||
}
|
||||
data, err := os.ReadFile(filepath.Clean(name))
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFile %s: %v", name, err)
|
||||
}
|
||||
for i, line := range strings.Split(string(data), "\n") {
|
||||
// Ignore single-line comments — historical mentions live there.
|
||||
code := line
|
||||
if idx := strings.Index(code, "//"); idx >= 0 {
|
||||
code = code[:idx]
|
||||
}
|
||||
for _, f := range forbidden {
|
||||
if strings.Contains(code, f) {
|
||||
t.Errorf("%s:%d uses forbidden fail-open auth helper %q — "+
|
||||
"the dev-mode fail-open hatch must stay removed (harden/no-fail-open-auth). "+
|
||||
"Use isLocalDevEnv (NON-security) for dev-only knobs instead.",
|
||||
name, i+1, strings.TrimSuffix(f, "("))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,15 +102,16 @@ func (rl *RateLimiter) keyFor(c *gin.Context) string {
|
||||
// the priority list and rationale.
|
||||
func (rl *RateLimiter) Middleware() gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
// Tier-1b dev-mode hatch — same gate as AdminAuth / WorkspaceAuth /
|
||||
// discovery. On a local single-user Docker setup the 600-req/min
|
||||
// bucket fills fast: a 15-workspace canvas + activity polling +
|
||||
// approvals polling + A2A overlay + initial hydration all land in
|
||||
// one bucket (whichever keyFor returns — typically the dev user's
|
||||
// IP or shared admin token), so a minute of active use can trip
|
||||
// 429 and blank the page. Gated by MOLECULE_ENV=development +
|
||||
// empty ADMIN_TOKEN so SaaS production keeps the bucket.
|
||||
if isDevModeFailOpen() {
|
||||
// Local-dev rate-limit relaxation (NON-security; see devmode.go).
|
||||
// On a local single-user stack the 600-req/min bucket fills fast:
|
||||
// a 15-workspace canvas + activity polling + approvals polling +
|
||||
// A2A overlay + initial hydration all land in one bucket, so a
|
||||
// minute of active use can trip 429 and blank the page. This only
|
||||
// relaxes a DoS knob — it grants no access and is unrelated to
|
||||
// authentication (auth is fail-closed in every env). Gated solely
|
||||
// by MOLECULE_ENV=dev/development so SaaS production keeps the
|
||||
// bucket. Decoupled from ADMIN_TOKEN (dev now provisions one).
|
||||
if isLocalDevEnv() {
|
||||
c.Header("X-RateLimit-Limit", "unlimited")
|
||||
c.Next()
|
||||
return
|
||||
|
||||
@@ -120,12 +120,12 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
|
||||
return
|
||||
}
|
||||
}
|
||||
// Local-dev escape hatch — see devmode.go. Unreachable on SaaS
|
||||
// (hosted tenants always have ADMIN_TOKEN + MOLECULE_ENV=production).
|
||||
if isDevModeFailOpen() {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
// No bearer, no verified CP session: fail CLOSED in EVERY
|
||||
// environment (harden/no-fail-open-auth). The old local-dev
|
||||
// escape hatch that let bearer-less requests through when
|
||||
// ADMIN_TOKEN was unset + MOLECULE_ENV=dev has been removed —
|
||||
// local dev now authenticates with a provisioned ADMIN_TOKEN
|
||||
// (see scripts/dev-start.sh).
|
||||
c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "missing workspace auth token"})
|
||||
}
|
||||
}
|
||||
@@ -133,11 +133,18 @@ func WorkspaceAuth(database *sql.DB) gin.HandlerFunc {
|
||||
// AdminAuth returns a Gin middleware for global/admin routes (e.g.
|
||||
// /settings/secrets, /admin/secrets) that have no per-workspace scope.
|
||||
//
|
||||
// FAIL-CLOSED in every environment (harden/no-fail-open-auth): there is no
|
||||
// bearer-less path through this middleware. A request reaches the handler
|
||||
// ONLY by presenting a valid credential (verified CP session cookie, org
|
||||
// token, ADMIN_TOKEN, or — deprecated — a live workspace token). The former
|
||||
// "Tier-1 lazy-bootstrap fail-open" (no live tokens + no ADMIN_TOKEN ⇒ pass)
|
||||
// has been removed: it let an attacker pre-empt the first user by POSTing
|
||||
// /org/import before any token was minted (C4 SaaS-launch finding). A fresh
|
||||
// install must set ADMIN_TOKEN to reach admin routes.
|
||||
//
|
||||
// # Credential tier (evaluated in order)
|
||||
//
|
||||
// 1. Lazy-bootstrap fail-open: if no live workspace token exists anywhere on
|
||||
// the platform (fresh install / pre-Phase-30 upgrade), every request passes
|
||||
// through so existing deployments keep working.
|
||||
// 1. Verified CP session cookie (SaaS canvas) — upstream-confirmed.
|
||||
//
|
||||
// 2. ADMIN_TOKEN env var (recommended, closes #684): when set, the bearer
|
||||
// MUST equal this value exactly (constant-time comparison). Workspace
|
||||
@@ -163,33 +170,17 @@ func AdminAuth(database *sql.DB) gin.HandlerFunc {
|
||||
ctx := c.Request.Context()
|
||||
adminSecret := os.Getenv("ADMIN_TOKEN")
|
||||
|
||||
hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
|
||||
if err != nil {
|
||||
// (harden/no-fail-open-auth) Both former fail-open branches have
|
||||
// been REMOVED here:
|
||||
// - Tier-1 lazy-bootstrap (no live tokens + no ADMIN_TOKEN ⇒ pass)
|
||||
// - Tier-1b local-dev escape hatch (isDevModeFailOpen ⇒ pass)
|
||||
// Admin auth is now fail-CLOSED in every environment. We still probe
|
||||
// HasAnyLiveTokenGlobal so a datastore outage returns a structured
|
||||
// 503 (not a silent pass), but its result no longer opens any path.
|
||||
if _, err := wsauth.HasAnyLiveTokenGlobal(ctx, database); err != nil {
|
||||
abortAuthLookupError(c, "AdminAuth: HasAnyLiveTokenGlobal", err)
|
||||
return
|
||||
}
|
||||
if !hasLive {
|
||||
// Tier 1: fail-open is ONLY safe when ADMIN_TOKEN is unset
|
||||
// (self-hosted dev, pre-Phase-30 upgrade). Hosted SaaS always
|
||||
// sets ADMIN_TOKEN at provision time, and C4 (SaaS-launch
|
||||
// blocker) showed that without this guard an attacker can
|
||||
// pre-empt the first user by POSTing /org/import before any
|
||||
// token gets minted. When ADMIN_TOKEN is set we fall through
|
||||
// into the same bearer-check path Tier-2 uses below.
|
||||
if adminSecret == "" {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Tier 1b: Local-dev escape hatch — see devmode.go. Lets the
|
||||
// Canvas dashboard keep working after the first workspace token
|
||||
// lands in the DB on `go run ./cmd/server`. Unreachable on SaaS
|
||||
// (hosted tenants always have ADMIN_TOKEN + MOLECULE_ENV=production).
|
||||
if isDevModeFailOpen() {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
// SaaS-canvas path: when the request carries a WorkOS session
|
||||
// cookie AND the CP confirms it's valid, accept without a
|
||||
@@ -281,34 +272,46 @@ func cpSessionActor(cookieHeader string) string {
|
||||
// Accepts either:
|
||||
//
|
||||
// 1. A valid bearer token (same contract as AdminAuth) — covers molecli,
|
||||
// agent-to-platform calls, and anyone using the API directly.
|
||||
// 2. A browser Origin header that matches CORS_ORIGINS (canvas itself).
|
||||
// This is NOT a strict auth boundary — curl can forge Origin — but for
|
||||
// cosmetic-only routes the trade-off is acceptable. Non-cosmetic routes
|
||||
// MUST NOT use this middleware (see #194 review on why it would re-open
|
||||
// #164 CRITICAL if applied to /bundles/import).
|
||||
// agent-to-platform calls, the browser canvas (which now sends
|
||||
// Authorization: Bearer $NEXT_PUBLIC_ADMIN_TOKEN on every platform
|
||||
// call — see canvas/src/lib/api.ts platformAuthHeaders), and anyone
|
||||
// using the API directly.
|
||||
// 2. A same-origin canvas request (Referer/Host match), but ONLY when the
|
||||
// combined-tenant canvas proxy is active (CANVAS_PROXY_URL set). This is
|
||||
// a real same-origin check the browser cannot forge cross-origin (see
|
||||
// isSameOriginCanvas / IsVerifiedCanvasSession, #623/#194) — NOT the
|
||||
// trivially-forgeable cross-origin Origin header. The forgeable
|
||||
// CORS_ORIGINS Origin-match path was REMOVED under the CTO
|
||||
// "nothing fail-open" directive (a no-bearer request passing purely on a
|
||||
// spoofable Origin is effectively open even for a cosmetic route, and is
|
||||
// no longer needed now that the canvas always sends a bearer).
|
||||
//
|
||||
// Lazy-bootstrap fail-open preserved: zero-token installs pass everything
|
||||
// through so fresh self-hosted / dev sessions aren't bricked.
|
||||
// Non-cosmetic routes MUST NOT use this middleware (see #194 review on why it
|
||||
// would re-open #164 CRITICAL if applied to /bundles/import).
|
||||
//
|
||||
// (harden/no-fail-open-auth) Two former fail-open branches are REMOVED:
|
||||
// - DB-error on HasAnyLiveTokenGlobal used to `c.Next()` (allow); it now
|
||||
// fails CLOSED with 503 (availability tradeoff that grants NO access).
|
||||
// - The lazy-bootstrap pass (`!hasLive ⇒ c.Next()`) used to let a
|
||||
// zero-token install through EVERYTHING; it is gone. Bootstrap is now via
|
||||
// ADMIN_TOKEN (provisioned by scripts/dev-start.sh for local dev,
|
||||
// operator/SaaS-set in production) — local mimics production.
|
||||
func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
ctx := c.Request.Context()
|
||||
|
||||
hasLive, err := wsauth.HasAnyLiveTokenGlobal(ctx, database)
|
||||
if err != nil {
|
||||
log.Printf("wsauth: CanvasOrBearer HasAnyLiveTokenGlobal failed: %v — allowing request", err)
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
if !hasLive {
|
||||
c.Next()
|
||||
// Probe global token state for the (no-bearer) same-origin path
|
||||
// below. Fail CLOSED on a datastore error — an availability tradeoff
|
||||
// that does NOT grant access (was: log + c.Next() fail-open).
|
||||
if _, err := wsauth.HasAnyLiveTokenGlobal(ctx, database); err != nil {
|
||||
abortAuthLookupError(c, "CanvasOrBearer: HasAnyLiveTokenGlobal", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Path 1: bearer present → bearer MUST validate. Do not fall through
|
||||
// to Origin on an invalid bearer — an attacker with a revoked /
|
||||
// expired token + a matching Origin would otherwise bypass auth.
|
||||
// Empty bearer → skip to Origin path (canvas never sends one).
|
||||
// to the same-origin path on an invalid bearer — an attacker with a
|
||||
// revoked / expired token would otherwise bypass auth.
|
||||
// Empty bearer → fall to the same-origin canvas path.
|
||||
if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" {
|
||||
// Admin token accepted for canvas dashboard
|
||||
adminSecret := os.Getenv("ADMIN_TOKEN")
|
||||
@@ -324,13 +327,10 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
// Path 2: canvas origin match (cross-origin canvas).
|
||||
if canvasOriginAllowed(c.GetHeader("Origin")) {
|
||||
c.Next()
|
||||
return
|
||||
}
|
||||
|
||||
// Path 3: same-origin canvas (tenant image).
|
||||
// Path 2: same-origin canvas (combined-tenant image). Gated behind
|
||||
// canvasProxyActive (CANVAS_PROXY_URL) and a non-forgeable
|
||||
// Referer/Host same-origin check — NOT the spoofable cross-origin
|
||||
// Origin header (that path was removed, see doc comment above).
|
||||
if isSameOriginCanvas(c) {
|
||||
c.Next()
|
||||
return
|
||||
@@ -340,30 +340,14 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
// canvasOriginAllowed returns true if origin matches any entry in the
|
||||
// CORS_ORIGINS env var (comma-separated) or the localhost defaults.
|
||||
// Exact-match only; no prefix or wildcard logic — that's handled by the
|
||||
// real CORS middleware upstream. The intent here is "did this request come
|
||||
// from the canvas page the user is already logged into?" — a binary check.
|
||||
func canvasOriginAllowed(origin string) bool {
|
||||
if origin == "" {
|
||||
return false
|
||||
}
|
||||
allowed := []string{"http://localhost:3000", "http://localhost:3001"}
|
||||
if v := os.Getenv("CORS_ORIGINS"); v != "" {
|
||||
for _, o := range strings.Split(v, ",") {
|
||||
if o = strings.TrimSpace(o); o != "" {
|
||||
allowed = append(allowed, o)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, a := range allowed {
|
||||
if a == origin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
// (harden/no-fail-open-auth) canvasOriginAllowed was REMOVED. It matched a
|
||||
// request's (trivially forgeable, cross-origin) Origin header against
|
||||
// CORS_ORIGINS and was the basis of CanvasOrBearer's no-bearer Origin-match
|
||||
// pass — effectively open to any curl that sets a matching Origin. Under the
|
||||
// CTO "nothing fail-open" directive that path is gone; the canvas now always
|
||||
// sends a bearer (NEXT_PUBLIC_ADMIN_TOKEN), so nothing legitimate relied on it.
|
||||
// The CORS *response-header* allowlist is handled by the real CORS middleware
|
||||
// upstream, unaffected by this removal.
|
||||
|
||||
// isSameOriginCanvas returns true when the request appears to come from the
|
||||
// canvas UI served by the same Go process (tenant image). In this topology,
|
||||
|
||||
@@ -143,11 +143,15 @@ func TestCanvasOrBearer_AdminTokenEnv_Passes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestCanvasOrBearer_DBError_FailOpen pins the documented behavior on a
|
||||
// HasAnyLiveTokenGlobal failure. The middleware logs and falls open so a
|
||||
// flaky DB doesn't lock canvas users out of cosmetic routes. Hardcoded in
|
||||
// the comment block; this is a reminder if anyone changes that semantic.
|
||||
func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
|
||||
// TestCanvasOrBearer_DBError_FailsClosed pins the removal of the
|
||||
// fail-open-on-DB-error branch (harden/no-fail-open-auth). A
|
||||
// HasAnyLiveTokenGlobal failure used to log + c.Next() (allow); it now fails
|
||||
// CLOSED with 503 — an availability tradeoff that grants NO access. The
|
||||
// handler must NOT be reached.
|
||||
//
|
||||
// Watch-it-fail: restore `if err != nil { log; c.Next(); return }` in
|
||||
// CanvasOrBearer → this flips 503→200 and fails.
|
||||
func TestCanvasOrBearer_DBError_FailsClosed(t *testing.T) {
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock: %v", err)
|
||||
@@ -156,8 +160,10 @@ func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnError(http.ErrAbortHandler) // any non-nil error suffices
|
||||
|
||||
handlerCalled := false
|
||||
r := gin.New()
|
||||
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
|
||||
handlerCalled = true
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
@@ -165,8 +171,11 @@ func TestCanvasOrBearer_DBError_FailOpen(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("DB error fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusServiceUnavailable {
|
||||
t.Errorf("DB error must fail CLOSED: got %d, want 503 (%s)", w.Code, w.Body.String())
|
||||
}
|
||||
if handlerCalled {
|
||||
t.Error("handler reached on a datastore-error request — DB-error fail-open not removed")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -339,15 +339,24 @@ func TestWorkspaceAuth_WrongWorkspace_Returns401(t *testing.T) {
|
||||
// TestAdminAuth_FailOpen_NoTokensGlobally — C10/C11: on a fresh install (no
|
||||
// live tokens anywhere) the middleware must let the request through so existing
|
||||
// deployments keep working during the Phase-30 rollout.
|
||||
func TestAdminAuth_FailOpen_NoTokensGlobally(t *testing.T) {
|
||||
// TestAdminAuth_FreshInstallNoTokens_FailsClosed pins the post-hardening
|
||||
// contract (harden/no-fail-open-auth): on a fresh install with NO live
|
||||
// tokens anywhere AND no ADMIN_TOKEN, a bearer-less admin request now 401s.
|
||||
// The former Tier-1 "lazy-bootstrap fail-open" (no tokens ⇒ 200) is GONE —
|
||||
// it let an attacker pre-empt the first user via /org/import (C4). A fresh
|
||||
// install must provision ADMIN_TOKEN to reach admin routes.
|
||||
func TestAdminAuth_FreshInstallNoTokens_FailsClosed(t *testing.T) {
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_ENV", "")
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock.New: %v", err)
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
// HasAnyLiveTokenGlobal returns 0 — fresh install.
|
||||
// HasAnyLiveTokenGlobal returns 0 — fresh install. We still probe it
|
||||
// (so a DB outage yields a structured 503), but the result no longer
|
||||
// opens any path.
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
|
||||
@@ -360,8 +369,8 @@ func TestAdminAuth_FailOpen_NoTokensGlobally(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodGet, "/admin/secrets", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("C10 fail-open (no global tokens): expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("fresh-install no-token fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
@@ -831,18 +840,23 @@ func TestAdminAuth_Issue180_ApprovalsListing_NoBearer_Returns401(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens documents the
|
||||
// fail-open contract: on a fresh install (no tokens anywhere), the middleware
|
||||
// must not block the canvas from polling /approvals/pending.
|
||||
func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) {
|
||||
// TestAdminAuth_Issue180_ApprovalsListing_FreshInstall_FailsClosed pins the
|
||||
// post-hardening contract (harden/no-fail-open-auth): on a fresh install (no
|
||||
// tokens anywhere, no ADMIN_TOKEN), the canvas polling /approvals/pending with
|
||||
// no bearer now gets 401. The former #180 fail-open (200 on no-tokens) is gone
|
||||
// — local dev now provisions an ADMIN_TOKEN and the canvas authenticates with
|
||||
// it (scripts/dev-start.sh).
|
||||
func TestAdminAuth_Issue180_ApprovalsListing_FreshInstall_FailsClosed(t *testing.T) {
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
t.Setenv("MOLECULE_ENV", "")
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock.New: %v", err)
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
// HasAnyLiveTokenGlobal returns 0 — fresh install, no tokens yet.
|
||||
// HasAnyLiveTokenGlobal returns 0 — fresh install, no tokens yet. Probed
|
||||
// for the 503-on-outage semantics, but it opens no path now.
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
|
||||
@@ -855,24 +869,21 @@ func TestAdminAuth_Issue180_ApprovalsListing_FailOpen_NoTokens(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodGet, "/approvals/pending", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("#180 fail-open (no tokens): expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("#180 fresh-install fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen documents the
|
||||
// local-dev escape hatch on WorkspaceAuth. On `go run ./cmd/server` +
|
||||
// `npm run dev`, Canvas at localhost:3000 calls the platform at
|
||||
// localhost:8080 cross-port, so isSameOriginCanvas's Host==Referer
|
||||
// check fails. Without this hatch the Canvas can't show per-workspace
|
||||
// activity/delegations.
|
||||
//
|
||||
// SaaS never fires this branch because tenant provisioning sets both
|
||||
// MOLECULE_ENV=production and ADMIN_TOKEN.
|
||||
func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
|
||||
// TestWorkspaceAuth_DevMode_NoBearer_FailsClosed pins the post-hardening
|
||||
// contract (harden/no-fail-open-auth): the former local-dev escape hatch on
|
||||
// WorkspaceAuth — which let a bearer-less request through when
|
||||
// MOLECULE_ENV=dev + ADMIN_TOKEN unset — is GONE. Under exactly those
|
||||
// conditions the request now 401s. Local dev authenticates with a
|
||||
// provisioned ADMIN_TOKEN handed to the Canvas (scripts/dev-start.sh).
|
||||
func TestWorkspaceAuth_DevMode_NoBearer_FailsClosed(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
|
||||
@@ -882,7 +893,9 @@ func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
|
||||
}
|
||||
defer mockDB.Close()
|
||||
|
||||
// No DB queries expected — the hatch short-circuits before any lookup.
|
||||
// No DB queries expected — WorkspaceAuth 401s before any lookup when
|
||||
// there is no bearer / cookie. The hatch that used to short-circuit
|
||||
// here no longer exists.
|
||||
|
||||
r := gin.New()
|
||||
r.GET("/workspaces/:id/activity", WorkspaceAuth(mockDB), func(c *gin.Context) {
|
||||
@@ -894,8 +907,8 @@ func TestWorkspaceAuth_DevModeEscapeHatch_NoBearer_FailsOpen(t *testing.T) {
|
||||
"/workspaces/00000000-0000-0000-0000-000000000000/activity", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("WorkspaceAuth dev-mode hatch: expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("WorkspaceAuth dev-mode fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -957,15 +970,14 @@ func TestWorkspaceAuth_DevModeEscapeHatch_IgnoredWhenAdminTokenSet(t *testing.T)
|
||||
}
|
||||
}
|
||||
|
||||
// TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens documents the
|
||||
// Tier-1b dev-mode escape hatch. When the platform runs with MOLECULE_ENV=development
|
||||
// and ADMIN_TOKEN is unset, AdminAuth must stay fail-open even after workspace
|
||||
// tokens land in the DB. This keeps the Canvas dashboard usable in local dev
|
||||
// after the first workspace is created (PR #1871 — quickstart bugless).
|
||||
//
|
||||
// SaaS never hits this path because tenant provisioning sets both
|
||||
// ADMIN_TOKEN and MOLECULE_ENV=production.
|
||||
func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
|
||||
// TestAdminAuth_DevMode_NoBearer_FailsClosed pins the post-hardening contract
|
||||
// (harden/no-fail-open-auth): the former Tier-1b dev-mode escape hatch — which
|
||||
// let AdminAuth pass a bearer-less request when MOLECULE_ENV=dev + ADMIN_TOKEN
|
||||
// unset, even with live tokens in the DB — is GONE. Under exactly those
|
||||
// conditions the request now 401s. Local dev authenticates with a provisioned
|
||||
// ADMIN_TOKEN handed to the Canvas as NEXT_PUBLIC_ADMIN_TOKEN
|
||||
// (scripts/dev-start.sh).
|
||||
func TestAdminAuth_DevMode_NoBearer_FailsClosed(t *testing.T) {
|
||||
t.Setenv("MOLECULE_ENV", "development")
|
||||
t.Setenv("ADMIN_TOKEN", "")
|
||||
|
||||
@@ -976,7 +988,7 @@ func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
|
||||
defer mockDB.Close()
|
||||
|
||||
// HasAnyLiveTokenGlobal returns 1 — tokens exist (post first-workspace).
|
||||
// The Tier-1 fail-open branch WOULD close here. Tier-1b must still open.
|
||||
// Probed for the 503-on-outage semantics, but it opens no path now.
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(1))
|
||||
|
||||
@@ -989,8 +1001,8 @@ func TestAdminAuth_DevModeEscapeHatch_FailsOpenWithHasLiveTokens(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodGet, "/workspaces", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("dev-mode escape hatch: expected 200, got %d: %s", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("dev-mode fail-closed: expected 401, got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
if err := mock.ExpectationsWereMet(); err != nil {
|
||||
t.Errorf("unmet sqlmock expectations: %v", err)
|
||||
@@ -1104,7 +1116,16 @@ func TestAdminAuth_Issue120_PatchWorkspace_NoBearer_Returns401(t *testing.T) {
|
||||
// Accepts bearer or a matching Origin header. MUST NOT be used anywhere a
|
||||
// forged request would leak data or create resources.
|
||||
|
||||
func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
|
||||
// TestCanvasOrBearer_NoTokens_FailsClosed pins the removal of the
|
||||
// lazy-bootstrap fail-open (harden/no-fail-open-auth): a zero-token install
|
||||
// must NOT pass everything through. A bearer-less request on a fresh install
|
||||
// (HasAnyLiveTokenGlobal → 0) now 401s. Bootstrap is via ADMIN_TOKEN
|
||||
// (scripts/dev-start.sh provisions it for local dev; operator/SaaS sets it in
|
||||
// production) — not a zero-config fail-open.
|
||||
//
|
||||
// Watch-it-fail: restore `if !hasLive { c.Next(); return }` in CanvasOrBearer
|
||||
// → this flips 401→200 and fails.
|
||||
func TestCanvasOrBearer_NoTokens_FailsClosed(t *testing.T) {
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock: %v", err)
|
||||
@@ -1114,8 +1135,10 @@ func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
|
||||
mock.ExpectQuery(hasAnyLiveTokenGlobalQuery).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"count"}).AddRow(0))
|
||||
|
||||
handlerCalled := false
|
||||
r := gin.New()
|
||||
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
|
||||
handlerCalled = true
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
@@ -1123,8 +1146,11 @@ func TestCanvasOrBearer_NoTokens_FailOpen(t *testing.T) {
|
||||
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("bootstrap fail-open: got %d, want 200 (%s)", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("zero-token install must fail CLOSED (lazy-bootstrap fail-open removed): got %d, want 401 (%s)", w.Code, w.Body.String())
|
||||
}
|
||||
if handlerCalled {
|
||||
t.Error("handler reached on a fresh-install bearer-less request — lazy-bootstrap fail-open not removed")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1195,7 +1221,16 @@ func TestCanvasOrBearer_TokensExist_WrongOrigin_Returns401(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanvasOrBearer_TokensExist_CanvasOrigin_Passes(t *testing.T) {
|
||||
// TestCanvasOrBearer_TokensExist_ForgeableOrigin_NoBearer_FailsClosed pins the
|
||||
// removal of the cross-origin Origin-match cosmetic path
|
||||
// (harden/no-fail-open-auth). A no-bearer request whose forgeable Origin header
|
||||
// matches CORS_ORIGINS used to pass; it now 401s. The canvas always sends a
|
||||
// bearer (NEXT_PUBLIC_ADMIN_TOKEN), so legitimate traffic is unaffected, and a
|
||||
// curl that forges Origin can no longer reach even a cosmetic route.
|
||||
//
|
||||
// Watch-it-fail: restore `if canvasOriginAllowed(c.GetHeader("Origin")) {
|
||||
// c.Next(); return }` in CanvasOrBearer → this flips 401→200 and fails.
|
||||
func TestCanvasOrBearer_TokensExist_ForgeableOrigin_NoBearer_FailsClosed(t *testing.T) {
|
||||
mockDB, mock, err := sqlmock.New()
|
||||
if err != nil {
|
||||
t.Fatalf("sqlmock: %v", err)
|
||||
@@ -1207,18 +1242,24 @@ func TestCanvasOrBearer_TokensExist_CanvasOrigin_Passes(t *testing.T) {
|
||||
|
||||
t.Setenv("CORS_ORIGINS", "https://acme.moleculesai.app,https://bob.moleculesai.app")
|
||||
|
||||
handlerCalled := false
|
||||
r := gin.New()
|
||||
r.PUT("/canvas/viewport", CanvasOrBearer(mockDB), func(c *gin.Context) {
|
||||
handlerCalled = true
|
||||
c.JSON(http.StatusOK, gin.H{"ok": true})
|
||||
})
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
req, _ := http.NewRequest(http.MethodPut, "/canvas/viewport", nil)
|
||||
// A matching-but-forgeable Origin with NO bearer must NOT pass anymore.
|
||||
req.Header.Set("Origin", "https://acme.moleculesai.app")
|
||||
r.ServeHTTP(w, req)
|
||||
|
||||
if w.Code != http.StatusOK {
|
||||
t.Errorf("canvas origin: got %d, want 200 (%s)", w.Code, w.Body.String())
|
||||
if w.Code != http.StatusUnauthorized {
|
||||
t.Errorf("no-bearer request on a forgeable matching Origin must fail CLOSED (Origin-match path removed): got %d, want 401 (%s)", w.Code, w.Body.String())
|
||||
}
|
||||
if handlerCalled {
|
||||
t.Error("handler reached on a no-bearer forgeable-Origin request — Origin-match fail-open not removed")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1298,21 +1339,9 @@ func TestCanvasOrBearer_WrongOrigin_Blocked(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanvasOriginAllowed_EmptyOriginRejected(t *testing.T) {
|
||||
if canvasOriginAllowed("") {
|
||||
t.Error("empty Origin must not pass")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanvasOriginAllowed_LocalhostDefault(t *testing.T) {
|
||||
t.Setenv("CORS_ORIGINS", "")
|
||||
if !canvasOriginAllowed("http://localhost:3000") {
|
||||
t.Error("localhost:3000 should be allowed by default")
|
||||
}
|
||||
if canvasOriginAllowed("http://evil.example.com") {
|
||||
t.Error("random origin should not be allowed")
|
||||
}
|
||||
}
|
||||
// (harden/no-fail-open-auth) TestCanvasOriginAllowed_* were REMOVED along with
|
||||
// the canvasOriginAllowed helper they exercised — the forgeable cross-origin
|
||||
// Origin-match cosmetic path no longer exists in CanvasOrBearer.
|
||||
|
||||
// ── Issue #623 regression ─────────────────────────────────────────────────────
|
||||
// AdminAuth must NOT accept forged Origin headers. Any container on the Docker
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user