Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 3949788916 | |||
| eb055253ff | |||
| 66f3d0b0f6 |
@@ -23,7 +23,6 @@ import dataclasses
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
@@ -327,43 +326,6 @@ def update_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
)
|
||||
|
||||
|
||||
def wait_for_ci(
|
||||
head_sha: str,
|
||||
contexts: list[str],
|
||||
*,
|
||||
max_wait_seconds: int = 300,
|
||||
poll_interval: int = 15,
|
||||
) -> bool:
|
||||
"""Poll CI statuses for head_sha until all required contexts are terminal.
|
||||
|
||||
Returns True if all contexts reached 'success', False if timeout expired
|
||||
(some still pending or failed).
|
||||
|
||||
Background: after a queue-triggered PR update, CI re-runs on the new head.
|
||||
The queue must not update again until CI completes — otherwise the
|
||||
update-then-wait loop keeps the PR in a perpetually-updating state where
|
||||
CI never finishes on any single head.
|
||||
"""
|
||||
deadline = time.time() + max_wait_seconds
|
||||
while time.time() < deadline:
|
||||
time.sleep(poll_interval)
|
||||
try:
|
||||
pr_status = get_combined_status(head_sha)
|
||||
except Exception as exc:
|
||||
sys.stderr.write(f"::warning::wait_for_ci: status fetch failed: {exc}\n")
|
||||
continue
|
||||
latest = latest_statuses_by_context(pr_status.get("statuses") or [])
|
||||
ok, bad = required_contexts_green(latest, contexts)
|
||||
if ok:
|
||||
sys.stderr.write(f"::notice::wait_for_ci: all contexts green after {int(time.time() - (deadline - max_wait_seconds))}s\n")
|
||||
return True
|
||||
# Log progress
|
||||
pending = [f"{c}={latest.get(c, {}).get('status', 'missing')}" for c in contexts if latest.get(c, {}).get('status') != 'success']
|
||||
sys.stderr.write(f"::notice::wait_for_ci: still waiting ({int(deadline - time.time())}s left): {', '.join(pending[:3])}\n")
|
||||
sys.stderr.write(f"::warning::wait_for_ci: timeout after {max_wait_seconds}s; proceeding with merge check\n")
|
||||
return False
|
||||
|
||||
|
||||
def merge_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
payload = {
|
||||
"Do": "merge",
|
||||
@@ -376,24 +338,7 @@ def merge_pull(pr_number: int, *, dry_run: bool) -> None:
|
||||
print(f"::notice::merging PR #{pr_number}")
|
||||
if dry_run:
|
||||
return
|
||||
# Gitea's merge endpoint returns HTTP 200 with an empty body on success.
|
||||
# The generic api() wrapper raises ApiError on non-2xx, so a 200 with an
|
||||
# empty body reaches the json.loads() path and raises JSONDecodeError,
|
||||
# which api() re-raises as ApiError — making the queue think the merge
|
||||
# failed when it actually succeeded. Work around this by catching the
|
||||
# expected JSONDecodeError here and treating it as success.
|
||||
try:
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
|
||||
except ApiError as exc:
|
||||
# Surface non-merge errors (5xx server errors, 403 forbidden, etc.)
|
||||
if "merge" in str(exc).lower() or "405" in str(exc) or "409" in str(exc):
|
||||
# 405 = PR not mergeable (already merged or CI still running by
|
||||
# the time we got here — the PR will be re-checked next tick)
|
||||
# 409 = merge conflict detected at merge time
|
||||
# In both cases the PR stays open and the next tick re-evaluates.
|
||||
sys.stderr.write(f"::warning::merge call returned: {exc}\n")
|
||||
else:
|
||||
raise
|
||||
api("POST", f"/repos/{OWNER}/{NAME}/pulls/{pr_number}/merge", body=payload, expect_json=False)
|
||||
|
||||
|
||||
def process_once(*, dry_run: bool = False) -> int:
|
||||
@@ -445,32 +390,6 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
print(f"::notice::PR #{pr_number} decision={decision.action}: {decision.reason}")
|
||||
if decision.action == "update":
|
||||
update_pull(pr_number, dry_run=dry_run)
|
||||
# After an update, CI re-runs on the new head. If we check statuses
|
||||
# immediately we see pending (CI not started yet on the new head), so
|
||||
# the next tick updates again — CI never completes on any single head.
|
||||
# Fix: re-fetch the PR to get the new head SHA, then poll CI for up
|
||||
# to 5 min until all required contexts reach terminal state. If CI
|
||||
# finishes in time, proceed to merge on the same tick.
|
||||
if not dry_run:
|
||||
updated_pr = get_pull(pr_number)
|
||||
new_head = updated_pr.get("head", {}).get("sha", "")
|
||||
if new_head and new_head != head_sha:
|
||||
sys.stderr.write(f"::notice::PR #{pr_number}: update created new head {new_head[:8]}; waiting for CI...\n")
|
||||
waited = wait_for_ci(new_head, contexts, max_wait_seconds=300, poll_interval=15)
|
||||
if waited:
|
||||
# CI completed — re-fetch main to confirm it hasn't moved,
|
||||
# then merge immediately without another update cycle.
|
||||
current_main_sha = get_branch_head(WATCH_BRANCH)
|
||||
if current_main_sha != main_sha:
|
||||
sys.stderr.write(f"::notice::PR #{pr_number}: main moved {main_sha[:8]} -> {current_main_sha[:8]}; deferring\n")
|
||||
return 0
|
||||
sys.stderr.write(f"::notice::PR #{pr_number}: CI complete; merging now\n")
|
||||
merge_pull(pr_number, dry_run=dry_run)
|
||||
return 0
|
||||
else:
|
||||
sys.stderr.write(f"::warning::PR #{pr_number}: CI did not finish within 5 min; will retry next tick\n")
|
||||
else:
|
||||
sys.stderr.write(f"::notice::PR #{pr_number}: update did not change head SHA; will retry\n")
|
||||
post_comment(
|
||||
pr_number,
|
||||
(
|
||||
@@ -481,13 +400,6 @@ def process_once(*, dry_run: bool = False) -> int:
|
||||
)
|
||||
return 0
|
||||
if decision.ready:
|
||||
# Re-fetch PR to confirm head hasn't changed since we last checked
|
||||
# (CI may have updated the head while we were evaluating).
|
||||
current_pr = get_pull(pr_number)
|
||||
current_head = current_pr.get("head", {}).get("sha", "")
|
||||
if current_head != head_sha:
|
||||
print(f"::notice::PR #{pr_number} head changed {head_sha[:8]} -> {current_head[:8]}; re-evaluating")
|
||||
return 0
|
||||
latest_main_sha = get_branch_head(WATCH_BRANCH)
|
||||
if latest_main_sha != main_sha:
|
||||
print(
|
||||
|
||||
+25
-168
@@ -68,7 +68,7 @@ import sys
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from typing import Any, Callable
|
||||
from typing import Any
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -110,7 +110,7 @@ def normalize_slug(raw: str, numeric_aliases: dict[int, str] | None = None) -> s
|
||||
# for /sop-revoke (RFC#351 open question 4 — reason is captured but not
|
||||
# yet validated; future iteration may require a min-length).
|
||||
_DIRECTIVE_RE = re.compile(
|
||||
r"^[ \t]*/(sop-ack|sop-revoke|sop-n/a)[ \t]+([A-Za-z0-9_\- ]+?)(?:[ \t]+(.*))?[ \t]*$",
|
||||
r"^[ \t]*/(sop-ack|sop-revoke)[ \t]+([A-Za-z0-9_\- ]+?)(?:[ \t]+(.*))?[ \t]*$",
|
||||
re.MULTILINE,
|
||||
)
|
||||
|
||||
@@ -118,21 +118,19 @@ _DIRECTIVE_RE = re.compile(
|
||||
def parse_directives(
|
||||
comment_body: str,
|
||||
numeric_aliases: dict[int, str],
|
||||
) -> tuple[list[tuple[str, str, str]], list[tuple[str, str, str]]]:
|
||||
"""Extract /sop-ack, /sop-revoke, and /sop-n/a directives from a comment body.
|
||||
) -> tuple[list[tuple[str, str, str]], list]:
|
||||
"""Extract /sop-ack and /sop-revoke directives from a comment body.
|
||||
|
||||
Returns (directives, na_directives) where each is a list of
|
||||
(kind, canonical_slug, note) tuples:
|
||||
kind is "sop-ack", "sop-revoke", or "sop-n/a"
|
||||
canonical_slug is the normalized form (or "" if unparseable)
|
||||
note is the trailing free-text (may be "")
|
||||
The two lists are kept separate so call sites can unpack them
|
||||
directly (e.g. directives, na_directives = parse_directives(...)).
|
||||
Returns (directives, na_directives) where:
|
||||
directives is a list of (kind, canonical_slug, note) tuples
|
||||
kind is "sop-ack" or "sop-revoke"
|
||||
canonical_slug is the normalized form (or "" if unparseable)
|
||||
note is the trailing free-text (may be "")
|
||||
na_directives is reserved for future N/A handling (always [] for now)
|
||||
"""
|
||||
directives: list[tuple[str, str, str]] = []
|
||||
na_directives: list[tuple[str, str, str]] = []
|
||||
out: list[tuple[str, str, str]] = []
|
||||
if not comment_body:
|
||||
return directives, na_directives
|
||||
return out, []
|
||||
for m in _DIRECTIVE_RE.finditer(comment_body):
|
||||
kind = m.group(1)
|
||||
raw_slug = (m.group(2) or "").strip()
|
||||
@@ -162,12 +160,8 @@ def parse_directives(
|
||||
note_from_group = (m.group(3) or "").strip()
|
||||
# If we collapsed multi-word slug into kebab and there's a
|
||||
# trailing-text group too, append it.
|
||||
entry = (kind, canonical, note_from_group)
|
||||
if kind == "sop-n/a":
|
||||
na_directives.append(entry)
|
||||
else:
|
||||
directives.append(entry)
|
||||
return directives, na_directives
|
||||
out.append((kind, canonical, note_from_group))
|
||||
return out, []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -180,8 +174,8 @@ def section_marker_present(body: str, marker: str) -> bool:
|
||||
on a non-empty line (i.e. the author actually filled it in).
|
||||
|
||||
We require the marker substring AND non-whitespace content on the
|
||||
same line OR within the next non-blank line — this prevents
|
||||
trivially-empty checklists like:
|
||||
same line OR within the next line — this prevents trivially-empty
|
||||
checklists like:
|
||||
|
||||
## SOP-Checklist
|
||||
- [ ] **Comprehensive testing performed**:
|
||||
@@ -190,18 +184,9 @@ def section_marker_present(body: str, marker: str) -> bool:
|
||||
from auto-passing the section-present check. The peer-ack is still
|
||||
required, but answering with empty content is captured as a soft
|
||||
finding via the section-present test alone.
|
||||
|
||||
NOTE: we scan forward through blank lines (the markdown-header pattern
|
||||
is ## Header\\n\\ncontent) so that a header + blank-line + content
|
||||
structure still satisfies the check. The backward checkbox fallback
|
||||
catches inline markers without a preceding checkbox (mc#1099).
|
||||
"""
|
||||
if not body or not marker:
|
||||
return False
|
||||
# Strip trailing whitespace so the blank-line scan below can find
|
||||
# content that appears on the very last line of the body (without
|
||||
# being misled by a trailing \n or spaces).
|
||||
body = body.rstrip()
|
||||
body_lower = body.lower()
|
||||
marker_lower = marker.lower()
|
||||
idx = body_lower.find(marker_lower)
|
||||
@@ -217,44 +202,13 @@ def section_marker_present(body: str, marker: str) -> bool:
|
||||
stripped = re.sub(r"[\s\*:\-\[\]]+", "", line)
|
||||
if stripped:
|
||||
return True
|
||||
# Fall through: scan forward, skipping blank-only lines, until we find
|
||||
# non-empty content or run out of body. Handles:
|
||||
# ## Header ← marker line (empty after marker)
|
||||
# ← blank line (skipped)
|
||||
# - actual content ← found
|
||||
pos = line_end
|
||||
while True:
|
||||
# Skip the current newline and any additional newlines (blank lines).
|
||||
while pos < len(body) and body[pos] == "\n":
|
||||
pos += 1
|
||||
if pos >= len(body):
|
||||
break
|
||||
line_end = body.find("\n", pos)
|
||||
if line_end < 0:
|
||||
line_end = len(body)
|
||||
line = body[pos:line_end]
|
||||
stripped = re.sub(r"[\s\*:\-\[\]]+", "", line)
|
||||
if stripped:
|
||||
return True
|
||||
pos = line_end
|
||||
# Last resort: the marker may appear mid-sentence (e.g.
|
||||
# **Memory/saved-feedback consulted**: No applicable...).
|
||||
# Search backward within the CURRENT LINE only (not preceding lines)
|
||||
# to find a checkbox on the same line before the marker text.
|
||||
# mc#1099 follow-up: memory-consulted detection was failing because
|
||||
# the checkbox was on the same line before the inline marker.
|
||||
_CHECKBOX_RE = re.compile(r"- \[[ x\]]|<input", re.IGNORECASE)
|
||||
line_start = body.rfind("\n", 0, idx) + 1 # 0 if no newline before idx
|
||||
before = body[line_start:idx]
|
||||
m = _CHECKBOX_RE.search(before)
|
||||
if not m:
|
||||
return False
|
||||
# Require meaningful content between the checkbox and the marker text
|
||||
# (markdown formatting like ** or * must also be stripped).
|
||||
# If only whitespace/markdown chars remain, the checkbox line is empty.
|
||||
between = before[m.end() :]
|
||||
stripped_between = re.sub(r"[\s\*:#\[\]_\-]+", "", between)
|
||||
return bool(stripped_between)
|
||||
# Fall through: check the NEXT line (multi-line answers).
|
||||
next_line_end = body.find("\n", line_end + 1)
|
||||
if next_line_end < 0:
|
||||
next_line_end = len(body)
|
||||
next_line = body[line_end + 1:next_line_end]
|
||||
stripped_next = re.sub(r"[\s\*:\-\[\]]+", "", next_line)
|
||||
return bool(stripped_next)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -297,7 +251,8 @@ def compute_ack_state(
|
||||
user = (c.get("user") or {}).get("login", "")
|
||||
if not user:
|
||||
continue
|
||||
for kind, slug, _note in parse_directives(body, numeric_aliases)[0]:
|
||||
directives, _na = parse_directives(body, numeric_aliases)
|
||||
for kind, slug, _note in directives:
|
||||
if not slug:
|
||||
unparseable_per_user[user] = unparseable_per_user.get(user, 0) + 1
|
||||
continue
|
||||
@@ -349,63 +304,6 @@ def compute_ack_state(
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# N/A-gate evaluation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def compute_na_state(
|
||||
comments: list[dict[str, Any]],
|
||||
author: str,
|
||||
na_gates: dict[str, Any],
|
||||
probe: Callable[[str, list[str]], list[str]],
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""Evaluate which N/A gates have a valid declaration from a team member.
|
||||
|
||||
Returns dict[gate_name, dict] where each dict has:
|
||||
declared: bool — at least one valid non-author team-member declared N/A
|
||||
decl_ackers: list[str] — usernames who declared this gate N/A
|
||||
rejected: dict with keys:
|
||||
not_in_team: list[str] — users who tried but aren't in required teams
|
||||
"""
|
||||
# Build per-user latest N/A directive (most-recent wins per RFC#324).
|
||||
latest_na: dict[str, tuple[str, str]] = {} # user → (gate, note)
|
||||
for c in comments:
|
||||
body = c.get("body", "") or ""
|
||||
user = (c.get("user") or {}).get("login", "")
|
||||
if not user:
|
||||
continue
|
||||
for kind, gate, note in parse_directives(body, {})[1]:
|
||||
# [1] = na_directives only
|
||||
if gate in na_gates:
|
||||
latest_na[user] = (gate, note)
|
||||
|
||||
result: dict[str, dict[str, Any]] = {}
|
||||
for gate, gate_cfg in na_gates.items():
|
||||
result[gate] = {
|
||||
"declared": False,
|
||||
"decl_ackers": [],
|
||||
"rejected": {"not_in_team": []},
|
||||
}
|
||||
decl_ackers: list[str] = []
|
||||
not_in_team: list[str] = []
|
||||
for user, (g, _note) in latest_na.items():
|
||||
if g != gate:
|
||||
continue
|
||||
if user == author:
|
||||
continue # authors cannot self-declare N/A
|
||||
approved = probe(gate, [user])
|
||||
if approved:
|
||||
decl_ackers.append(user)
|
||||
else:
|
||||
not_in_team.append(user)
|
||||
result[gate]["declared"] = bool(decl_ackers)
|
||||
result[gate]["decl_ackers"] = decl_ackers
|
||||
result[gate]["rejected"]["not_in_team"] = not_in_team
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gitea API client
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -800,7 +698,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||
cfg = load_config(args.config)
|
||||
items: list[dict[str, Any]] = cfg["items"]
|
||||
items_by_slug = {it["slug"]: it for it in items}
|
||||
na_gates: dict[str, Any] = cfg.get("n/a_gates", {})
|
||||
numeric_aliases = {
|
||||
int(it["numeric_alias"]): it["slug"] for it in items if it.get("numeric_alias")
|
||||
}
|
||||
@@ -921,46 +818,6 @@ def main(argv: list[str] | None = None) -> int:
|
||||
description=description, target_url=target_url,
|
||||
)
|
||||
print(f"::notice::status posted: {args.status_context} → {state}")
|
||||
|
||||
# --- N/A gate status (RFC#324 §N/A follow-up) ---
|
||||
# Post a separate status so review-check.sh can discover N/A declarations
|
||||
# and waive the Gitea-approve requirement for that gate.
|
||||
na_state: dict[str, dict[str, Any]] = {}
|
||||
if na_gates:
|
||||
na_state = compute_na_state(comments, author, na_gates, probe)
|
||||
|
||||
na_descs: list[str] = []
|
||||
for gate, s in na_state.items():
|
||||
if s["declared"]:
|
||||
na_descs.append(gate)
|
||||
decl = s["decl_ackers"]
|
||||
rej = s["rejected"]["not_in_team"]
|
||||
if decl:
|
||||
print(f"::notice:: [N/A OK] {gate} — declared by {','.join(decl)}")
|
||||
if rej:
|
||||
print(
|
||||
f"::notice:: [N/A REJ] {gate} — not-in-team: {','.join(rej)}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
na_desc = ", ".join(sorted(na_descs)) if na_descs else "(none)"
|
||||
na_status_state = "success" if na_descs else "pending"
|
||||
# review-check.sh reads the description to discover which gates are N/A.
|
||||
# Include the gate names so it can grep for them.
|
||||
na_description = f"N/A: {na_desc}" if na_descs else "N/A: (none)"
|
||||
|
||||
if not args.dry_run:
|
||||
client.post_status(
|
||||
args.owner, args.repo, head_sha,
|
||||
state=na_status_state,
|
||||
context="sop-checklist / na-declarations (pull_request)",
|
||||
description=na_description,
|
||||
target_url=target_url,
|
||||
)
|
||||
print(
|
||||
f"::notice::na-declarations status → {na_status_state}: {na_description}"
|
||||
)
|
||||
|
||||
# By default exit 0 — the POSTed status IS the gate, NOT the job
|
||||
# conclusion. If the job exits 1 BP will see TWO failure signals
|
||||
# (one from the job's auto-status, one from our POST), making the
|
||||
|
||||
@@ -551,55 +551,3 @@ class TestEndToEndAckFlow(unittest.TestCase):
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(verbosity=2)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# compute_na_state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestComputeNaState(unittest.TestCase):
|
||||
"""Tests for /sop-n/a directive evaluation."""
|
||||
|
||||
def test_no_na_declarations(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
na_gates = cfg.get("n/a_gates", {})
|
||||
comments = []
|
||||
na_state = sop.compute_na_state(comments, "alice", na_gates, lambda *_: [])
|
||||
self.assertFalse(na_state["qa-review"]["declared"])
|
||||
self.assertFalse(na_state["security-review"]["declared"])
|
||||
|
||||
def test_na_declared_by_authorized_user(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
na_gates = cfg.get("n/a_gates", {})
|
||||
comments = [_comment("bob", "/sop-n/a qa-review N/A: pure tooling change")]
|
||||
na_state = sop.compute_na_state(comments, "alice", na_gates, lambda g, u: u)
|
||||
self.assertTrue(na_state["qa-review"]["declared"])
|
||||
self.assertEqual(na_state["qa-review"]["decl_ackers"], ["bob"])
|
||||
|
||||
def test_na_declared_by_unauthorized_user_rejected(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
na_gates = cfg.get("n/a_gates", {})
|
||||
comments = [_comment("mallory", "/sop-n/a qa-review N/A: not real team")]
|
||||
na_state = sop.compute_na_state(comments, "alice", na_gates, lambda g, u: [])
|
||||
self.assertFalse(na_state["qa-review"]["declared"])
|
||||
self.assertEqual(na_state["qa-review"]["rejected"]["not_in_team"], ["mallory"])
|
||||
|
||||
def test_author_cannot_self_declare_na(self):
|
||||
cfg = sop.load_config(CONFIG_PATH)
|
||||
na_gates = cfg.get("n/a_gates", {})
|
||||
comments = [_comment("alice", "/sop-n/a qa-review N/A: I am the author")]
|
||||
na_state = sop.compute_na_state(comments, "alice", na_gates, lambda g, u: u)
|
||||
self.assertFalse(na_state["qa-review"]["declared"])
|
||||
|
||||
def test_parse_directives_separates_na_from_ack(self):
|
||||
directives, na_directives = sop.parse_directives(
|
||||
"/sop-ack comprehensive-testing\n/sop-n/a qa-review N/A: no surface",
|
||||
{},
|
||||
)
|
||||
self.assertEqual(len(directives), 1)
|
||||
self.assertEqual(directives[0][0], "sop-ack")
|
||||
self.assertEqual(len(na_directives), 1)
|
||||
self.assertEqual(na_directives[0][0], "sop-n/a")
|
||||
self.assertEqual(na_directives[0][1], "qa-review")
|
||||
self.assertIn("no surface", na_directives[0][2])
|
||||
|
||||
@@ -57,7 +57,7 @@ permissions:
|
||||
# can produce duplicate comments before the title-search dedup wins.
|
||||
concurrency:
|
||||
group: ci-required-drift
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
drift:
|
||||
|
||||
@@ -80,7 +80,7 @@ permissions:
|
||||
# stacking up.
|
||||
concurrency:
|
||||
group: continuous-synth-e2e
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
@@ -90,7 +90,7 @@ concurrency:
|
||||
# would let a queued staging/main push behind a PR run get cancelled,
|
||||
# leaving any gate that reads "completed run at SHA" stuck.
|
||||
group: e2e-peer-visibility-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
@@ -101,6 +101,7 @@ jobs:
|
||||
# push/dispatch/cron only (30+ min). This is NOT a fake-green mask of
|
||||
# the real assertion — it validates the driving script's bash syntax
|
||||
# and inline-python so a broken test script fails at PR time.
|
||||
# bp-required: pending #1296 ← flip to required once e2e-peer-visibility is green
|
||||
pr-validate:
|
||||
name: E2E Peer Visibility
|
||||
runs-on: ubuntu-latest
|
||||
@@ -118,6 +119,7 @@ jobs:
|
||||
# Real gate: provisions a throwaway org + sibling-per-runtime, drives
|
||||
# the LITERAL list_peers MCP call per runtime, asserts 200 + expected
|
||||
# peer set, then scoped teardown. push(main)/dispatch/cron only.
|
||||
# bp-required: pending #1296 ← flip to required once e2e-peer-visibility is green
|
||||
peer-visibility:
|
||||
name: E2E Peer Visibility
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
@@ -61,7 +61,7 @@ concurrency:
|
||||
# wasted CI is acceptable given the alternative is losing staging-tip
|
||||
# data that auto-promote-staging needs.
|
||||
group: e2e-staging-canvas-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
@@ -71,7 +71,7 @@ on:
|
||||
|
||||
concurrency:
|
||||
group: e2e-staging-external
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -72,7 +72,7 @@ on:
|
||||
# teardown step and leave orphan EC2s.
|
||||
concurrency:
|
||||
group: e2e-staging-saas
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
@@ -26,7 +26,7 @@ env:
|
||||
|
||||
concurrency:
|
||||
group: e2e-staging-sanity
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
@@ -32,12 +32,6 @@ on:
|
||||
# iterating all open PRs when PR_NUMBER is empty.
|
||||
workflow_dispatch:
|
||||
|
||||
# Cancel stale runs so the 8-runner pool stays available for PR jobs.
|
||||
# Per-SHA group ensures push and cron runs at different SHAs don't cancel each other.
|
||||
concurrency:
|
||||
group: gate-check-v3-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
# read: contents — for checkout (base ref, not PR head for security)
|
||||
# read: pull-requests — for reading PR info via API
|
||||
|
||||
@@ -22,7 +22,7 @@ permissions:
|
||||
|
||||
concurrency:
|
||||
group: gitea-merge-queue-${{ github.repository }}
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
queue:
|
||||
|
||||
@@ -58,7 +58,7 @@ permissions:
|
||||
# POSTs can produce duplicates before the title search dedup wins.
|
||||
concurrency:
|
||||
group: main-red-watchdog
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
watchdog:
|
||||
|
||||
@@ -49,17 +49,13 @@ jobs:
|
||||
# bp-exempt: post-merge image publication side effect; CI / all-required gates source changes.
|
||||
build-and-push:
|
||||
name: Build & push canvas image
|
||||
# Dedicated publish/release lane (internal#462 / #394 / #399). Ship
|
||||
# path (on: push:main, canvas/**) — reserved capacity so a merged
|
||||
# canvas fix's image build never FIFO-queues behind PR required-CI.
|
||||
# The `publish` label resolves ONLY to the molecule-runner-publish-*
|
||||
# sub-pool (config.publish.yaml). HARD DEPENDENCY: this MUST land
|
||||
# AFTER the publish-lane runners are registered/advertising `publish`
|
||||
# — the earlier #599 `docker` label attempt queued indefinitely with
|
||||
# zero eligible runners precisely because the label was targeted
|
||||
# before any runner advertised it (see #576). The lane is registered
|
||||
# in this rollout (internal#462) so the precondition holds.
|
||||
runs-on: publish
|
||||
# REVERTED (infra/revert-docker-runner-label): `runs-on: ubuntu-latest` restored.
|
||||
# The `docker` label is not registered on any act_runner. `runs-on: [ubuntu-latest, docker]`
|
||||
# causes jobs to queue indefinitely with zero eligible runners — strictly worse than the
|
||||
# pre-#599 coin-flip (50% success rate). Once the `docker` label is registered on
|
||||
# ≥2 runners, re-apply the fix from #599 (infra/docker-runner-label).
|
||||
# See issue #576 + infra-lead pulse ~00:30Z.
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
@@ -66,10 +66,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
# Dedicated publish/release lane (internal#462 / #394 / #399). Ship
|
||||
# path (on: push tag runtime-v*) — reserved capacity, never FIFO
|
||||
# behind PR-CI. `publish` resolves only to molecule-runner-publish-*.
|
||||
runs-on: publish
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
version: ${{ steps.version.outputs.version }}
|
||||
wheel_sha256: ${{ steps.wheel_hash.outputs.wheel_sha256 }}
|
||||
@@ -169,9 +166,7 @@ jobs:
|
||||
|
||||
cascade:
|
||||
needs: publish
|
||||
# Publish/release lane (internal#462) — downstream of the runtime
|
||||
# publish ship job; keep it on the reserved lane too.
|
||||
runs-on: publish
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Wait for PyPI to propagate the new version
|
||||
env:
|
||||
|
||||
@@ -54,14 +54,7 @@ env:
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
# Dedicated publish/release lane (internal#462 / #394 / #399). This
|
||||
# is a post-merge ship job (on: push:main) — it must NOT FIFO-compete
|
||||
# with PR required-CI on the shared pool (PR#1350's prod image build
|
||||
# was delayed ~25min this way). The `publish` label resolves ONLY to
|
||||
# the reserved molecule-runner-publish-* sub-pool (config.publish.yaml,
|
||||
# OUTSIDE the managed 1..20 range) so a merged fix's image build
|
||||
# starts immediately while PR-CI keeps the general pool.
|
||||
runs-on: publish
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
@@ -188,9 +181,7 @@ jobs:
|
||||
name: Production auto-deploy
|
||||
needs: build-and-push
|
||||
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
|
||||
# Publish/release lane (internal#462) — production deploy of a merged
|
||||
# fix; reserved capacity, never queued behind PR-CI.
|
||||
runs-on: publish
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 75
|
||||
env:
|
||||
CP_URL: ${{ vars.PROD_CP_URL || 'https://api.moleculesai.app' }}
|
||||
|
||||
@@ -40,7 +40,7 @@ env:
|
||||
|
||||
concurrency:
|
||||
group: railway-pin-audit
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
@@ -68,10 +68,7 @@ jobs:
|
||||
# bp-exempt: production redeploy is a side-effect workflow, not a merge gate.
|
||||
redeploy:
|
||||
if: ${{ github.event_name == 'workflow_dispatch' }}
|
||||
# Dedicated publish/release lane (internal#462 / #394 / #399).
|
||||
# Production tenant redeploy — a deploy action, reserved capacity so
|
||||
# it never queues behind PR-CI. `publish` -> molecule-runner-publish-*.
|
||||
runs-on: publish
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
@@ -75,10 +75,7 @@ env:
|
||||
jobs:
|
||||
# bp-exempt: post-merge staging redeploy side effect; CI / all-required gates source changes.
|
||||
redeploy:
|
||||
# Dedicated publish/release lane (internal#462 / #394 / #399).
|
||||
# Post-merge staging redeploy — a deploy action, reserved capacity.
|
||||
# `publish` -> molecule-runner-publish-* sub-pool.
|
||||
runs-on: publish
|
||||
runs-on: ubuntu-latest
|
||||
# Phase 3 (RFC #219 §1): surface broken workflows without blocking.
|
||||
# mc#774: pre-existing continue-on-error mask; root-fix and remove, do not renew silently.
|
||||
continue-on-error: true
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
# Consolidated comment dispatcher for manual review/tier refires.
|
||||
# DEPRECATED — superseded by `.gitea/workflows/sop-checklist.yml`.
|
||||
#
|
||||
# The review-refire logic (qa/security/tier slash-command dispatch) has been
|
||||
# merged into sop-checklist.yml as the `review-refire` job. This workflow
|
||||
# is kept as a no-op stub to avoid a gap during the transition window where
|
||||
# this file may be deleted while sop-checklist.yml has not yet been merged.
|
||||
#
|
||||
# After sop-checklist.yml lands, this file will be deleted (issue #1280).
|
||||
#
|
||||
# Historical behavior (superseded):
|
||||
# Gitea 1.22 queues one run per workflow subscribed to `issue_comment` before
|
||||
# evaluating job-level `if:`. SOP-heavy PRs therefore created queue storms when
|
||||
# qa-review, security-review, sop-checklist, and sop-tier-refire all
|
||||
# listened to comments. This workflow is the single non-SOP comment subscriber:
|
||||
# ordinary comments no-op quickly; slash commands post the required status
|
||||
# contexts to the PR head SHA.
|
||||
# evaluating job-level `if:`. Previously this workflow was the single
|
||||
# non-SOP comment subscriber for qa/security/tier refire slash commands.
|
||||
|
||||
name: review-refire-comments
|
||||
|
||||
@@ -23,91 +28,12 @@ concurrency:
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# No-op stub — all refire logic moved to sop-checklist.yml review-refire job.
|
||||
# Kept to avoid transition gap; will be deleted after sop-checklist.yml merges.
|
||||
dispatch:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Classify comment
|
||||
id: classify
|
||||
env:
|
||||
COMMENT_BODY: ${{ github.event.comment.body }}
|
||||
IS_PR: ${{ github.event.issue.pull_request != null }}
|
||||
- name: Deprecated — refire logic moved to sop-checklist.yml
|
||||
run: |
|
||||
set -euo pipefail
|
||||
{
|
||||
echo "run_qa=false"
|
||||
echo "run_security=false"
|
||||
echo "run_tier=false"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
if [ "$IS_PR" != "true" ]; then
|
||||
echo "::notice::not a PR comment; no-op"
|
||||
exit 0
|
||||
fi
|
||||
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
|
||||
case "$first_line" in
|
||||
/qa-recheck*)
|
||||
echo "run_qa=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/security-recheck*)
|
||||
echo "run_security=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/refire-tier-check*)
|
||||
echo "run_tier=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "::notice::no supported review refire slash command; no-op"
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Check out BASE ref for trusted scripts
|
||||
if: |
|
||||
steps.classify.outputs.run_qa == 'true' ||
|
||||
steps.classify.outputs.run_security == 'true' ||
|
||||
steps.classify.outputs.run_tier == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Refire qa-review status
|
||||
if: steps.classify.outputs.run_qa == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
TEAM: qa
|
||||
TEAM_ID: '20'
|
||||
REVIEW_CHECK_DEBUG: '0'
|
||||
REVIEW_CHECK_STRICT: '0'
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire security-review status
|
||||
if: steps.classify.outputs.run_security == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
TEAM: security
|
||||
TEAM_ID: '21'
|
||||
REVIEW_CHECK_DEBUG: '0'
|
||||
REVIEW_CHECK_STRICT: '0'
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire sop-tier-check status
|
||||
if: steps.classify.outputs.run_tier == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
SOP_DEBUG: '0'
|
||||
run: bash .gitea/scripts/sop-tier-refire.sh
|
||||
echo "::warning::review-refire-comments.yml is deprecated. Refire logic is now in sop-checklist.yml review-refire job. This workflow is a no-op stub pending deletion (issue #1280)."
|
||||
exit 0
|
||||
|
||||
@@ -44,12 +44,6 @@ on:
|
||||
- ".github/scripts/lint_secret_pattern_drift.py"
|
||||
- ".githooks/pre-commit"
|
||||
|
||||
# Cancel stale runs to keep the 8-runner pool available for PR jobs.
|
||||
# Per-SHA group ensures push and scheduled runs at different SHAs don't cancel each other.
|
||||
concurrency:
|
||||
group: secret-pattern-drift-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
GITHUB_SERVER_URL: https://git.moleculesai.app
|
||||
|
||||
|
||||
@@ -2,24 +2,20 @@
|
||||
#
|
||||
# RFC#351 Step 2 of 6 (implementation MVP).
|
||||
#
|
||||
# === DESIGN ===
|
||||
# === CONSOLIDATION (issue #1280) ===
|
||||
#
|
||||
# Goal: each PR must answer 7 SOP-checklist questions in its body,
|
||||
# and each item must have at least one /sop-ack <slug> comment from
|
||||
# a non-author peer in the required team. BP requires the
|
||||
# `sop-checklist / all-items-acked (pull_request)` status to merge.
|
||||
# This workflow is the SINGLE `issue_comment` subscriber — the logic from
|
||||
# `review-refire-comments.yml` has been merged in. Before this change:
|
||||
# - sop-checklist.yml (pre-2026-05-16) → issue_comment:[created,edited,deleted] → runner slot used, job no-oped
|
||||
# - review-refire-comments.yml → issue_comment:[created] → runner slot used, job no-oped
|
||||
# → every non-refire comment occupied 2 runner slots for ~800 s each
|
||||
# (~650 no-op runs/day, ~1,300 runner-slot-occupancy-hours/day).
|
||||
#
|
||||
# Triggers:
|
||||
# - `pull_request_target`: opened, edited, synchronize, reopened
|
||||
# → fires when PR opens, body is edited (refire — RFC#351 §4),
|
||||
# or new code is pushed (head.sha changes → stale status would
|
||||
# be auto-discarded by BP via dismiss_stale_reviews, but the
|
||||
# status itself is per-SHA so we re-post on the new head).
|
||||
# - `issue_comment`: created, edited, deleted
|
||||
# → fires on any new comment so /sop-ack / /sop-revoke take
|
||||
# effect immediately (Gitea 1.22.6 doesn't refire on
|
||||
# pull_request_review per feedback_pull_request_review_no_refire,
|
||||
# so issue_comment is the canonical refire channel).
|
||||
# Fix (PR #1345 / issue #1280):
|
||||
# - ONE workflow, ONE issue_comment:[created] subscription (no edited/deleted)
|
||||
# - all-items-acked job: pull_request_target OR sop slash-command comments
|
||||
# - review-refire job: qa/security/tier refire slash commands
|
||||
# → ~50% reduction in comment-triggered runner occupancy vs pre-fix.
|
||||
#
|
||||
# Trust boundary (mirrors RFC#324 §A4 + sop-tier-check security note):
|
||||
# `pull_request_target` (not `pull_request`) — workflow def is loaded
|
||||
@@ -51,7 +47,7 @@
|
||||
# /sop-ack <slug-or-numeric-alias> [optional note]
|
||||
# — register a peer-ack for one checklist item.
|
||||
# — slug accepts kebab-case, snake_case, or natural-spaces
|
||||
# (all normalize to canonical kebab-case).
|
||||
# (all normalized to canonical kebab-case).
|
||||
# — numeric 1..7 maps via config.items[*].numeric_alias.
|
||||
# — most-recent (user, slug) directive wins.
|
||||
#
|
||||
@@ -61,6 +57,13 @@
|
||||
# — most-recent (user, slug) directive wins, so a later /sop-ack
|
||||
# re-restores the ack.
|
||||
#
|
||||
# /sop-n/a <gate> [reason]
|
||||
# — declare a gate (qa-review, security-review) N/A.
|
||||
# — see sop-checklist-config.yaml n/a_gates section.
|
||||
#
|
||||
# /qa-recheck /security-recheck /refire-tier-check
|
||||
# — refire the corresponding status check on the PR head.
|
||||
#
|
||||
# The eval is read-only + idempotent (read PR + comments + team
|
||||
# membership, compute, post status). Re-running on any event is safe —
|
||||
# the new status overwrites the previous one for the same context.
|
||||
@@ -79,22 +82,21 @@ on:
|
||||
pull_request_target:
|
||||
types: [opened, edited, synchronize, reopened, labeled, unlabeled]
|
||||
issue_comment:
|
||||
types: [created, edited, deleted]
|
||||
types: [created] # NOT [created, edited, deleted] — Gitea 1.22.6 holds a runner slot
|
||||
# at job-parsing time, before job-level if: guards run. edited/deleted events
|
||||
# occupied ~1,300 runner-slot-hours/day on this workflow alone during the
|
||||
# 2026-05-16 freeze. Per PR #1345 fix.
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
# NOTE: `statuses: write` is the GitHub-Actions name for POST /statuses.
|
||||
# Gitea 1.22.6 may not gate on this permission key (it just checks the
|
||||
# token), but listing it explicitly documents intent for the next
|
||||
# platform-version upgrade.
|
||||
statuses: write
|
||||
|
||||
jobs:
|
||||
# sop-checklist gate: runs on PR lifecycle events OR sop slash commands.
|
||||
# All other comment types (no-op text comments) no longer assign a runner
|
||||
# because this job's if: guard short-circuits before runner assignment.
|
||||
all-items-acked:
|
||||
# Run on pull_request_target events always. On issue_comment events,
|
||||
# only when the comment is on a PR (issue_comment fires for issues
|
||||
# too) and the body contains one of the slash-commands.
|
||||
if: |
|
||||
github.event_name == 'pull_request_target' ||
|
||||
(github.event_name == 'issue_comment' &&
|
||||
@@ -128,3 +130,93 @@ jobs:
|
||||
--pr "$PR_NUMBER" \
|
||||
--config .gitea/sop-checklist-config.yaml \
|
||||
--gitea-host git.moleculesai.app
|
||||
|
||||
# review-refire job: handles /qa-recheck, /security-recheck, /refire-tier-check.
|
||||
# Runs ONLY on comment events with the matching slash commands.
|
||||
# Previously in review-refire-comments.yml (now consolidated here per #1280).
|
||||
review-refire:
|
||||
if: |
|
||||
github.event_name == 'issue_comment' &&
|
||||
github.event.issue.pull_request != null
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Classify comment
|
||||
id: classify
|
||||
env:
|
||||
COMMENT_BODY: ${{ github.event.comment.body }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
{
|
||||
echo "run_qa=false"
|
||||
echo "run_security=false"
|
||||
echo "run_tier=false"
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
first_line=$(printf '%s\n' "$COMMENT_BODY" | sed -n '1p')
|
||||
case "$first_line" in
|
||||
/qa-recheck*)
|
||||
echo "run_qa=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/security-recheck*)
|
||||
echo "run_security=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
/refire-tier-check*)
|
||||
echo "run_tier=true" >> "$GITHUB_OUTPUT"
|
||||
;;
|
||||
*)
|
||||
echo "::notice::no supported review refire slash command; no-op"
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Check out BASE ref for trusted scripts
|
||||
if: |
|
||||
steps.classify.outputs.run_qa == 'true' ||
|
||||
steps.classify.outputs.run_security == 'true' ||
|
||||
steps.classify.outputs.run_tier == 'true'
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
ref: ${{ github.event.repository.default_branch }}
|
||||
|
||||
- name: Refire qa-review status
|
||||
if: steps.classify.outputs.run_qa == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
TEAM: qa
|
||||
TEAM_ID: '20'
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
REVIEW_CHECK_DEBUG: '0'
|
||||
REVIEW_CHECK_STRICT: '0'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire security-review status
|
||||
if: steps.classify.outputs.run_security == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.RFC_324_TEAM_READ_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
||||
TEAM: security
|
||||
TEAM_ID: '21'
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
REVIEW_CHECK_DEBUG: '0'
|
||||
REVIEW_CHECK_STRICT: '0'
|
||||
run: |
|
||||
set -euo pipefail
|
||||
.gitea/scripts/review-refire-status.sh
|
||||
|
||||
- name: Refire sop-tier-check status
|
||||
if: steps.classify.outputs.run_tier == 'true'
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.SOP_TIER_CHECK_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITEA_HOST: git.moleculesai.app
|
||||
REPO: ${{ github.repository }}
|
||||
PR_NUMBER: ${{ github.event.issue.number }}
|
||||
COMMENT_AUTHOR: ${{ github.event.comment.user.login }}
|
||||
SOP_DEBUG: '0'
|
||||
run: bash .gitea/scripts/sop-tier-refire.sh
|
||||
|
||||
@@ -38,7 +38,7 @@ on:
|
||||
# full run, but two smoke runs SHOULD queue against each other.
|
||||
concurrency:
|
||||
group: staging-smoke
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
# Needed to open / close the alerting issue.
|
||||
|
||||
@@ -74,7 +74,7 @@ permissions:
|
||||
contents: read
|
||||
|
||||
# NOTE: NO `concurrency:` block is intentional.
|
||||
# Gitea 1.22.6 doesn't honor `cancel-in-progress: false`: queued ticks
|
||||
# Gitea 1.22.6 doesn't honor `cancel-in-progress: true`: queued ticks
|
||||
# of the same group get cancelled-with-started=0 instead of waiting
|
||||
# (DB-verified 2026-05-12, runs 16053/16085 of status-reaper.yml).
|
||||
# The reaper's POST /statuses/{sha} is idempotent — Gitea de-dups by
|
||||
|
||||
@@ -58,7 +58,7 @@ on:
|
||||
# scheduled run would otherwise issue duplicate DELETE calls.
|
||||
concurrency:
|
||||
group: sweep-cf-orphans
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -42,7 +42,7 @@ on:
|
||||
# Don't let two sweeps race the same account.
|
||||
concurrency:
|
||||
group: sweep-cf-tunnels
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -51,7 +51,7 @@ on:
|
||||
# on a manual trigger; queue rather than parallel-delete.
|
||||
concurrency:
|
||||
group: sweep-stale-e2e-orgs
|
||||
cancel-in-progress: false
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
@@ -22,11 +22,6 @@ on:
|
||||
- cron: '17 4 * * 1' # Mondays at 04:17 UTC
|
||||
workflow_dispatch:
|
||||
|
||||
# Cancel stale runs to keep the 8-runner pool available for PR jobs.
|
||||
concurrency:
|
||||
group: weekly-platform-go-${{ github.event.pull_request.head.sha || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
statuses: write
|
||||
|
||||
+4
-1
@@ -30,7 +30,10 @@
|
||||
{"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"},
|
||||
{"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"},
|
||||
{"name": "langgraph", "repo": "molecule-ai/molecule-ai-workspace-template-langgraph", "ref": "main"},
|
||||
{"name": "autogen", "repo": "molecule-ai/molecule-ai-workspace-template-autogen", "ref": "main"}
|
||||
{"name": "crewai", "repo": "molecule-ai/molecule-ai-workspace-template-crewai", "ref": "main"},
|
||||
{"name": "autogen", "repo": "molecule-ai/molecule-ai-workspace-template-autogen", "ref": "main"},
|
||||
{"name": "deepagents", "repo": "molecule-ai/molecule-ai-workspace-template-deepagents", "ref": "main"},
|
||||
{"name": "gemini-cli", "repo": "molecule-ai/molecule-ai-workspace-template-gemini-cli", "ref": "main"}
|
||||
],
|
||||
"org_templates": [
|
||||
{"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"},
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
package handlers
|
||||
|
||||
// Regression coverage for the POLL-mode arm of the canvas user-message
|
||||
// data-loss bug (internal#470 sibling — tracked on internal#471).
|
||||
//
|
||||
// Bug (reported 2026-05-16 by CTO Hongming): "in canvas i sometimes lose
|
||||
// my own message when i exit chat". The push-mode arm was fixed by
|
||||
// #1347 (persistUserMessageAtIngest — a SYNCHRONOUS, before-dispatch,
|
||||
// context.WithoutCancel INSERT). #1347's framing asserted "poll-mode
|
||||
// workspaces were never affected — logA2AReceiveQueued already persists
|
||||
// at ingest". That assertion is OVERSTATED.
|
||||
//
|
||||
// Hongming's tenant (slug `hongming`, org 2c940477-...) has 4 workspaces,
|
||||
// ALL runtime=external with empty URL → ALL delivery_mode=poll (proven
|
||||
// empirically: a benign A2A probe returns the synthetic
|
||||
// {"delivery_mode":"poll","status":"queued"} envelope for every one).
|
||||
// So his reported loss is the POLL path, NOT the push path #1347 fixes.
|
||||
//
|
||||
// Root cause (poll arm): the poll-mode short-circuit (a2a_proxy.go ~402)
|
||||
// calls logA2AReceiveQueued and then IMMEDIATELY returns the synthetic
|
||||
// 200 {status:"queued"} to the canvas. But logA2AReceiveQueued's durable
|
||||
// INSERT runs inside h.goAsync(...) — a DETACHED goroutine with NO
|
||||
// happens-before barrier against the HTTP response. The canvas sees 200
|
||||
// ("message accepted") while the activity_logs row may not yet be — and,
|
||||
// on a workspace-server restart / deploy / OOM / EC2 hibernation between
|
||||
// the 200 and the goroutine's commit, NEVER will be — durable. There is
|
||||
// also no fallback (unlike push-mode's legacy-INSERT fallback): a
|
||||
// swallowed LogActivity error loses the message with only a log line.
|
||||
// Chat-history reads activity_logs (postgres_store.go:165-187); a missing
|
||||
// row = message gone on reopen. That is exactly Hongming's symptom.
|
||||
//
|
||||
// Fix (parity with push-mode): the poll-mode ingest persist of the
|
||||
// canvas user message must be SYNCHRONOUS — committed before the queued
|
||||
// 200 is returned — on a context.WithoutCancel derived context, so a
|
||||
// client disconnect on chat-exit and a post-response restart cannot lose
|
||||
// it. Behavior is never worse than today (best-effort; a persist error
|
||||
// still returns queued).
|
||||
//
|
||||
// TEST DESIGN NOTE: sqlmock.ExpectationsWereMet() hangs indefinitely if
|
||||
// the expected query never fires. We use a select+default+time.After
|
||||
// pattern so the test FAILS fast (not hangs) when the production code
|
||||
// regresses to async (the INSERT never fires before handler returns),
|
||||
// while still returning promptly when all expectations are met. The
|
||||
// insertDelay is kept small (50ms) to minimise suite-level timing
|
||||
// impact under -race detection, where mock delays are amplified by
|
||||
// the instrumenter's goroutine overhead.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/DATA-DOG/go-sqlmock"
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// TestProxyA2A_PollMode_PersistsUserMessageSynchronouslyBeforeQueuedResponse
|
||||
// is the defining contract: for a poll-mode workspace, the canvas user
|
||||
// message MUST be durably INSERTed into activity_logs BEFORE the synthetic
|
||||
// queued 200 is returned to the client — with NO reliance on a detached
|
||||
// async goroutine completing later.
|
||||
//
|
||||
// The test proves the ordering by making the INSERT block briefly and
|
||||
// asserting the handler does NOT return until the INSERT has completed.
|
||||
// Pre-fix (INSERT in h.goAsync, response returned immediately) the
|
||||
// handler returns ~instantly while the INSERT is still pending in the
|
||||
// goroutine → the elapsed time is far below the injected INSERT delay and
|
||||
// ExpectationsWereMet() is racy/unmet at return. Post-fix (synchronous
|
||||
// persist before the queued response) the handler return is gated on the
|
||||
// INSERT, so elapsed >= the injected delay and the expectation is met
|
||||
// deterministically at return WITHOUT any waitAsyncForTest()/sleep.
|
||||
func TestProxyA2A_PollMode_PersistsUserMessageSynchronouslyBeforeQueuedResponse(t *testing.T) {
|
||||
mock := setupTestDB(t)
|
||||
setupTestRedis(t)
|
||||
broadcaster := newTestBroadcaster()
|
||||
handler := NewWorkspaceHandler(broadcaster, nil, "http://localhost:8080", t.TempDir())
|
||||
|
||||
const wsID = "ws-poll-sync-persist"
|
||||
// Keep delay small: -race detection amplifies mock delays significantly.
|
||||
// A 50ms delay is sufficient to prove synchronous blocking (~50× the
|
||||
// normal INSERT latency) without bloating the full ./... suite runtime.
|
||||
const insertDelay = 50 * time.Millisecond
|
||||
|
||||
expectBudgetCheck(mock, wsID)
|
||||
|
||||
// lookupDeliveryMode → poll, triggering the short-circuit.
|
||||
mock.ExpectQuery("SELECT delivery_mode FROM workspaces WHERE id").
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"delivery_mode"}).AddRow("poll"))
|
||||
|
||||
// workspace-name lookup inside logA2AReceiveQueued.
|
||||
mock.ExpectQuery(`SELECT name FROM workspaces WHERE id`).
|
||||
WithArgs(wsID).
|
||||
WillReturnRows(sqlmock.NewRows([]string{"name"}).AddRow("Poll WS"))
|
||||
|
||||
// The durable user-message write. We delay it so a synchronous
|
||||
// persist visibly gates the handler return; a detached-goroutine
|
||||
// persist (pre-fix) does not. The fix must keep using
|
||||
// context.WithoutCancel so this write survives a chat-exit cancel.
|
||||
mock.ExpectExec("INSERT INTO activity_logs").
|
||||
WillDelayFor(insertDelay).
|
||||
WillReturnResult(sqlmock.NewResult(0, 1))
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(w)
|
||||
c.Params = gin.Params{{Key: "id", Value: wsID}}
|
||||
|
||||
// callerID == "" (no X-Workspace-ID) → this is a canvas_user message,
|
||||
// exactly Hongming's case.
|
||||
body := `{"jsonrpc":"2.0","id":"poll-canvas-1","method":"message/send","params":{"message":{"role":"user","parts":[{"text":"my own message"}]}}}`
|
||||
c.Request = httptest.NewRequest("POST", "/workspaces/"+wsID+"/a2a", bytes.NewBufferString(body))
|
||||
c.Request.Header.Set("Content-Type", "application/json")
|
||||
|
||||
start := time.Now()
|
||||
handler.ProxyA2A(c)
|
||||
elapsed := time.Since(start)
|
||||
|
||||
// Defining assertion #1: the handler must not have returned the
|
||||
// queued response before the durable INSERT committed. Pre-fix this
|
||||
// fails (elapsed ≈ 0, INSERT still racing in goAsync).
|
||||
if elapsed < insertDelay {
|
||||
t.Fatalf("poll-mode queued response returned in %v, before the %v user-message INSERT — "+
|
||||
"the message is not durable when the client/process goes away (DATA LOSS). "+
|
||||
"Persist must be synchronous before the queued 200.", elapsed, insertDelay)
|
||||
}
|
||||
|
||||
// Defining assertion #2: the durable write actually happened by the
|
||||
// time the handler returned. ExpectionsWereMet() hangs indefinitely if
|
||||
// the mock never fires (e.g. production code regressed to async),
|
||||
// so we check it in a goroutine with a hard 2s timeout — fails fast
|
||||
// (no CI hang) on regression while returning promptly on success.
|
||||
expectDone := make(chan error, 1)
|
||||
go func() { expectDone <- mock.ExpectationsWereMet() }()
|
||||
select {
|
||||
case err := <-expectDone:
|
||||
if err != nil {
|
||||
t.Fatalf("user-message INSERT was not durable at handler return (unmet sqlmock expectations): %v", err)
|
||||
}
|
||||
case <-time.After(2 * time.Second):
|
||||
t.Fatalf("ExpectationsWereMet() hung for >2s — INSERT mock never fired. " +
|
||||
"Likely cause: production code regressed logA2AReceiveQueued to goAsync " +
|
||||
"(INSERT fires after handler returns, not before).")
|
||||
}
|
||||
|
||||
// Sanity: still the correct poll-mode envelope + status.
|
||||
if w.Code != http.StatusOK {
|
||||
t.Fatalf("expected 200 (queued), got %d: %s", w.Code, w.Body.String())
|
||||
}
|
||||
var resp map[string]interface{}
|
||||
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
|
||||
t.Fatalf("response is not valid JSON: %v", err)
|
||||
}
|
||||
if resp["status"] != "queued" || resp["delivery_mode"] != "poll" {
|
||||
t.Errorf("poll envelope changed: got status=%v delivery_mode=%v, want queued/poll",
|
||||
resp["status"], resp["delivery_mode"])
|
||||
}
|
||||
}
|
||||
@@ -504,49 +504,25 @@ func lookupDeliveryMode(ctx context.Context, workspaceID string) string {
|
||||
// reads in PR 3 — that's how a poll-mode workspace receives inbound A2A
|
||||
// without a public URL.
|
||||
func (h *WorkspaceHandler) logA2AReceiveQueued(ctx context.Context, workspaceID, callerID string, body []byte, a2aMethod string) {
|
||||
// DATA-LOSS FIX (internal#471 — poll-mode sibling of #1347/internal#470):
|
||||
// this is the ONLY durable write of a poll-mode inbound message,
|
||||
// including a canvas_user message (callerID == "") typed in the canvas
|
||||
// chat. It MUST be SYNCHRONOUS and complete BEFORE the caller returns
|
||||
// the synthetic {status:"queued"} 200 — otherwise the canvas sees the
|
||||
// send acknowledged while the activity_logs row is still racing in a
|
||||
// detached goroutine, and a workspace-server restart / deploy / OOM /
|
||||
// EC2 hibernation between the 200 and the goroutine's commit loses the
|
||||
// user's message permanently (chat-history reads activity_logs, so a
|
||||
// missing row = message gone on reopen). Hongming's tenant is entirely
|
||||
// poll-mode (4 external workspaces, no URL — verified empirically), so
|
||||
// his reported loss is THIS path; #1347 (push-mode, persists AFTER the
|
||||
// poll short-circuit) structurally cannot cover it.
|
||||
//
|
||||
// Mirrors persistUserMessageAtIngest's discipline:
|
||||
// - context.WithoutCancel: a client disconnect on chat-exit (which
|
||||
// cancels the inbound request ctx) MUST NOT abort this write.
|
||||
// - SYNCHRONOUS (no goAsync): the row must be durable before the
|
||||
// queued 200 is returned to the caller.
|
||||
// - Best-effort: LogActivity already logs+swallows INSERT errors, so
|
||||
// a hiccup never blocks or fails the user's send (behavior for
|
||||
// that one request is never worse than the pre-fix async path).
|
||||
// The post-commit broadcast still fires inside LogActivity; a missed
|
||||
// WebSocket event is not data loss (the durable row is the truth the
|
||||
// canvas re-reads on reopen).
|
||||
insCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var wsName string
|
||||
db.DB.QueryRowContext(insCtx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName)
|
||||
db.DB.QueryRowContext(ctx, `SELECT name FROM workspaces WHERE id = $1`, workspaceID).Scan(&wsName)
|
||||
if wsName == "" {
|
||||
wsName = workspaceID
|
||||
}
|
||||
summary := a2aMethod + " → " + wsName + " (queued for poll)"
|
||||
LogActivity(insCtx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
ActivityType: "a2a_receive",
|
||||
SourceID: nilIfEmpty(callerID),
|
||||
TargetID: &workspaceID,
|
||||
Method: &a2aMethod,
|
||||
Summary: &summary,
|
||||
RequestBody: json.RawMessage(body),
|
||||
Status: "ok",
|
||||
h.goAsync(func() {
|
||||
logCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), 30*time.Second)
|
||||
defer cancel()
|
||||
LogActivity(logCtx, h.broadcaster, ActivityParams{
|
||||
WorkspaceID: workspaceID,
|
||||
ActivityType: "a2a_receive",
|
||||
SourceID: nilIfEmpty(callerID),
|
||||
TargetID: &workspaceID,
|
||||
Method: &a2aMethod,
|
||||
Summary: &summary,
|
||||
RequestBody: json.RawMessage(body),
|
||||
Status: "ok",
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -44,8 +44,8 @@ func NewWorkspaceImageService(docker *dockerclient.Client) *WorkspaceImageServic
|
||||
// AllRuntimes is the canonical list mirroring docs/workspace-runtime-package.md.
|
||||
// Update both when a new template is added.
|
||||
var AllRuntimes = []string{
|
||||
"claude-code", "langgraph", "autogen",
|
||||
"hermes", "openclaw",
|
||||
"claude-code", "langgraph", "crewai", "autogen",
|
||||
"deepagents", "hermes", "gemini-cli", "openclaw",
|
||||
}
|
||||
|
||||
// RefreshResult is the per-call outcome surfaced to HTTP callers AND logged
|
||||
|
||||
@@ -23,8 +23,8 @@ package models
|
||||
// - claude-code: "sonnet" — Anthropic's CLI accepts the short
|
||||
// name and resolves it via the operator's anthropic-oauth or
|
||||
// ANTHROPIC_API_KEY chain.
|
||||
// - everything else (hermes, langgraph, autogen, codex, openclaw,
|
||||
// external, ""): a fully-qualified
|
||||
// - everything else (hermes, langgraph, crewai, autogen, deepagents,
|
||||
// codex, openclaw, gemini-cli, external, ""): a fully-qualified
|
||||
// vendor:model slug that the universal MODEL_PROVIDER chain in
|
||||
// molecule-core PR #247 can route via per-vendor required_env.
|
||||
//
|
||||
|
||||
@@ -21,9 +21,12 @@ func TestDefaultModel(t *testing.T) {
|
||||
// as a generic "unknown" failure.
|
||||
{"hermes", "anthropic:claude-opus-4-7"},
|
||||
{"langgraph", "anthropic:claude-opus-4-7"},
|
||||
{"crewai", "anthropic:claude-opus-4-7"},
|
||||
{"autogen", "anthropic:claude-opus-4-7"},
|
||||
{"deepagents", "anthropic:claude-opus-4-7"},
|
||||
{"codex", "anthropic:claude-opus-4-7"},
|
||||
{"openclaw", "anthropic:claude-opus-4-7"},
|
||||
{"gemini-cli", "anthropic:claude-opus-4-7"},
|
||||
{"external", "anthropic:claude-opus-4-7"},
|
||||
|
||||
// Unknown / empty — fall through to universal default rather
|
||||
|
||||
@@ -190,7 +190,7 @@ func TestEnsureLocalImage_RepoNotFound(t *testing.T) {
|
||||
opts.HTTPClient = srv.Client()
|
||||
opts.remoteHeadSha = nil // exercise real HTTP path
|
||||
|
||||
_, err := ensureLocalImageWithOpts(context.Background(), "hermes", opts)
|
||||
_, err := ensureLocalImageWithOpts(context.Background(), "crewai", opts)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error, got nil")
|
||||
}
|
||||
|
||||
@@ -35,19 +35,6 @@ import (
|
||||
// drift-risk #6.
|
||||
var ErrNoBackend = errors.New("provisioner: no backend configured (zero-valued receiver)")
|
||||
|
||||
// ErrUnresolvableRuntime is returned by selectImage when a workspace
|
||||
// names a runtime that has no resolvable image (not in RuntimeImages and
|
||||
// no operator-pinned cfg.Image). RFC internal#483 + security review 4269:
|
||||
// previously such a request silently fell through to DefaultImage
|
||||
// (langgraph) — a user asking for crewai would get a langgraph container
|
||||
// with no signal. The CTO standing directive
|
||||
// (feedback_platform_must_hardgate_base_contract) is fail-closed: a
|
||||
// named-but-unresolvable runtime must reject with a structured,
|
||||
// runtime-naming error so the existing provision-failed notify/log path
|
||||
// surfaces it, NOT silently degrade. The genuinely-unspecified (empty)
|
||||
// runtime is still a distinct, legitimate path that keeps DefaultImage.
|
||||
var ErrUnresolvableRuntime = errors.New("provisioner: requested runtime has no resolvable image")
|
||||
|
||||
// RuntimeImages maps runtime names to their Docker image refs.
|
||||
// Each standalone template repo publishes its image via the reusable
|
||||
// publish-template-image workflow in molecule-ci on every main merge.
|
||||
@@ -117,33 +104,20 @@ type WorkspaceConfig struct {
|
||||
// selectImage resolves the final Docker image ref for a workspace. The handler
|
||||
// layer is the source of truth — if it set cfg.Image (the digest-pinned form
|
||||
// from runtime_image_pins, #2272), honor that. Otherwise fall back to the
|
||||
// runtime→tag lookup in RuntimeImages (legacy `:latest` behavior).
|
||||
//
|
||||
// Fail-closed contract (RFC internal#483 / security review 4269 /
|
||||
// feedback_platform_must_hardgate_base_contract): if the workspace NAMES a
|
||||
// runtime that resolves to no image (not in RuntimeImages, no pinned
|
||||
// cfg.Image), reject with ErrUnresolvableRuntime instead of silently
|
||||
// substituting DefaultImage. Pre-fix, removing crewai/deepagents/gemini-cli
|
||||
// from the catalog left those create requests silently provisioning a
|
||||
// langgraph container — the user asked for crewai and got langgraph with no
|
||||
// signal. The error propagates through Start → markProvisionFailed, which
|
||||
// already broadcasts WorkspaceProvisionFailed and records the message.
|
||||
//
|
||||
// The genuinely-unspecified runtime (empty cfg.Runtime, e.g. an org template
|
||||
// that doesn't pin one) is an intended distinct path and still resolves to
|
||||
// DefaultImage — only a NAMED-but-unresolvable runtime is rejected.
|
||||
func selectImage(cfg WorkspaceConfig) (string, error) {
|
||||
// runtime→tag lookup in RuntimeImages (legacy `:latest` behavior). When the
|
||||
// runtime isn't recognized either, fall back to DefaultImage so Start() still
|
||||
// has something to hand Docker — surfacing a "No such image" later is more
|
||||
// actionable than a silent "" panic in ContainerCreate.
|
||||
func selectImage(cfg WorkspaceConfig) string {
|
||||
if cfg.Image != "" {
|
||||
return cfg.Image, nil
|
||||
return cfg.Image
|
||||
}
|
||||
if cfg.Runtime != "" {
|
||||
if img, ok := RuntimeImages[cfg.Runtime]; ok {
|
||||
return img, nil
|
||||
return img
|
||||
}
|
||||
return "", fmt.Errorf("%w: runtime %q (known runtimes: %v)",
|
||||
ErrUnresolvableRuntime, cfg.Runtime, knownRuntimes)
|
||||
}
|
||||
return DefaultImage, nil
|
||||
return DefaultImage
|
||||
}
|
||||
|
||||
// Workspace-access constants for #65. Matches the CHECK constraint on
|
||||
@@ -362,15 +336,7 @@ func (p *Provisioner) Start(ctx context.Context, cfg WorkspaceConfig) (string, e
|
||||
|
||||
env := buildContainerEnv(cfg)
|
||||
|
||||
image, imgErr := selectImage(cfg)
|
||||
if imgErr != nil {
|
||||
// Fail-closed: a named-but-unresolvable runtime must not silently
|
||||
// become DefaultImage (RFC internal#483 / review 4269). The caller's
|
||||
// error path (markProvisionFailed) broadcasts the failure + records
|
||||
// the message so the canvas surfaces it.
|
||||
log.Printf("Provisioner: refusing to start %s: %v", cfg.WorkspaceID, imgErr)
|
||||
return "", imgErr
|
||||
}
|
||||
image := selectImage(cfg)
|
||||
|
||||
// Local-build mode (issue #63 / Task #194): when MOLECULE_IMAGE_REGISTRY
|
||||
// is unset, the OSS contributor path skips the registry pull entirely
|
||||
|
||||
@@ -513,10 +513,7 @@ func TestWorkspaceConfig_ResetClaudeSessionFieldPresent(t *testing.T) {
|
||||
// we lose the "one bad publish doesn't break every workspace" guarantee.
|
||||
func TestSelectImage_PrefersExplicitImage(t *testing.T) {
|
||||
pinned := "ghcr.io/molecule-ai/workspace-template-claude-code@sha256:3d6761a97ed07d7d33cfc19a8fbab81175d9d9179618d493dbc00c5f7ef076a3"
|
||||
got, err := selectImage(WorkspaceConfig{Runtime: "claude-code", Image: pinned})
|
||||
if err != nil {
|
||||
t.Fatalf("selectImage with cfg.Image=pinned: unexpected error %v", err)
|
||||
}
|
||||
got := selectImage(WorkspaceConfig{Runtime: "claude-code", Image: pinned})
|
||||
if got != pinned {
|
||||
t.Errorf("selectImage with cfg.Image=pinned: got %q, want %q", got, pinned)
|
||||
}
|
||||
@@ -526,46 +523,28 @@ func TestSelectImage_PrefersExplicitImage(t *testing.T) {
|
||||
// pin lookup deliberately bypassed via WORKSPACE_IMAGE_LOCAL_OVERRIDE).
|
||||
// selectImage must use the legacy runtime→:latest map.
|
||||
func TestSelectImage_FallsBackToRuntimeMap(t *testing.T) {
|
||||
got, err := selectImage(WorkspaceConfig{Runtime: "claude-code", Image: ""})
|
||||
if err != nil {
|
||||
t.Fatalf("selectImage with empty Image: unexpected error %v", err)
|
||||
}
|
||||
got := selectImage(WorkspaceConfig{Runtime: "claude-code", Image: ""})
|
||||
want := RuntimeImages["claude-code"]
|
||||
if got != want {
|
||||
t.Errorf("selectImage with empty Image: got %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSelectImage_NamedUnresolvableRuntimeRejects pins the fail-closed
|
||||
// contract (RFC internal#483 / security review 4269 /
|
||||
// feedback_platform_must_hardgate_base_contract): a NAMED runtime with no
|
||||
// resolvable image must reject with ErrUnresolvableRuntime, NOT silently
|
||||
// substitute DefaultImage. Pre-fix this returned langgraph — a user asking
|
||||
// for a removed runtime (crewai/deepagents/gemini-cli) silently got a
|
||||
// langgraph container. "crewai" is the concrete regression from the
|
||||
// security finding.
|
||||
func TestSelectImage_NamedUnresolvableRuntimeRejects(t *testing.T) {
|
||||
for _, rt := range []string{"no-such-runtime", "crewai", "deepagents", "gemini-cli"} {
|
||||
got, err := selectImage(WorkspaceConfig{Runtime: rt})
|
||||
if !errors.Is(err, ErrUnresolvableRuntime) {
|
||||
t.Errorf("selectImage(%q): got err %v, want ErrUnresolvableRuntime", rt, err)
|
||||
}
|
||||
if got != "" {
|
||||
t.Errorf("selectImage(%q): got image %q, want \"\" on reject", rt, got)
|
||||
}
|
||||
if err != nil && !strings.Contains(err.Error(), rt) {
|
||||
t.Errorf("selectImage(%q): error must name the offending runtime, got %v", rt, err)
|
||||
}
|
||||
// TestSelectImage_UnknownRuntimeFallsBackToDefault preserves today's
|
||||
// behavior — an unrecognized runtime resolves to DefaultImage rather than
|
||||
// "" so ContainerCreate gets a usable arg and surfaces a meaningful
|
||||
// "No such image" error if the default itself is missing.
|
||||
func TestSelectImage_UnknownRuntimeFallsBackToDefault(t *testing.T) {
|
||||
got := selectImage(WorkspaceConfig{Runtime: "no-such-runtime"})
|
||||
if got != DefaultImage {
|
||||
t.Errorf("selectImage with unknown runtime: got %q, want DefaultImage %q", got, DefaultImage)
|
||||
}
|
||||
}
|
||||
|
||||
// TestSelectImage_EmptyRuntimeFallsBackToDefault: same invariant for the
|
||||
// no-runtime-supplied path (legacy callers / older handler code).
|
||||
func TestSelectImage_EmptyRuntimeFallsBackToDefault(t *testing.T) {
|
||||
got, err := selectImage(WorkspaceConfig{})
|
||||
if err != nil {
|
||||
t.Fatalf("selectImage with zero cfg: unexpected error %v (empty runtime is a legitimate DefaultImage path)", err)
|
||||
}
|
||||
got := selectImage(WorkspaceConfig{})
|
||||
if got != DefaultImage {
|
||||
t.Errorf("selectImage with zero cfg: got %q, want DefaultImage %q", got, DefaultImage)
|
||||
}
|
||||
@@ -829,7 +808,7 @@ func TestIsImageNotFoundErr(t *testing.T) {
|
||||
{"nil", nil, false},
|
||||
{"moby no such image", fmtErr(`Error response from daemon: No such image: workspace-template:openclaw`), true},
|
||||
{"no such image lowercase", fmtErr(`error: no such image: foo:bar`), true},
|
||||
{"image not found", fmtErr(`Error: image "workspace-template:hermes" not found`), true},
|
||||
{"image not found", fmtErr(`Error: image "workspace-template:crewai" not found`), true},
|
||||
{"generic not found without image", fmtErr(`container not found`), false},
|
||||
{"unrelated error", fmtErr(`connection refused`), false},
|
||||
{"permission denied", fmtErr(`permission denied`), false},
|
||||
|
||||
@@ -21,6 +21,9 @@ var knownRuntimes = []string{
|
||||
"autogen",
|
||||
"claude-code",
|
||||
"codex",
|
||||
"crewai",
|
||||
"deepagents",
|
||||
"gemini-cli",
|
||||
"hermes",
|
||||
"langgraph",
|
||||
"openclaw",
|
||||
|
||||
@@ -53,8 +53,8 @@ func TestRuntimeImage_AllKnownRuntimes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
// Pin the count so adding a runtime requires explicit test acknowledgement.
|
||||
if len(knownRuntimes) != 6 {
|
||||
t.Errorf("knownRuntimes length = %d, want 6 (autogen, claude-code, codex, hermes, langgraph, openclaw)", len(knownRuntimes))
|
||||
if len(knownRuntimes) != 9 {
|
||||
t.Errorf("knownRuntimes length = %d, want 9 (autogen, claude-code, codex, crewai, deepagents, gemini-cli, hermes, langgraph, openclaw)", len(knownRuntimes))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -431,43 +431,6 @@ def _is_self_notify_row(row: dict[str, Any]) -> bool:
|
||||
return source_id is None or source_id == ""
|
||||
|
||||
|
||||
def _is_self_echo_row(row: dict[str, Any], workspace_id: str) -> bool:
|
||||
"""Return True if ``row`` is a self-originated a2a_receive row.
|
||||
|
||||
Internal #469: when a workspace delegates to a target that never picks
|
||||
up the task, ``tool_delegate_task`` calls ``report_activity`` which
|
||||
POSTs to the platform with source_id set to the *sender's* workspace
|
||||
UUID (mandated by spoof-defense in workspace-server's a2a_proxy). The
|
||||
activity API exposes that row under type=a2a_receive, so the inbox
|
||||
poller re-fetches it. Without this guard the row is surfaced as
|
||||
kind='peer_agent' with the workspace's own identity as peer_id —
|
||||
the workspace sees its own delegation-failure echoed back as if a
|
||||
peer had delegated to it.
|
||||
|
||||
The guard mirrors the existing _is_self_notify_row pattern: both
|
||||
skip rows that would otherwise create spurious inbound signal. The
|
||||
long-term fix (making the platform write a distinct activity_type
|
||||
for agent-outbound rows) is tracked separately; this guard stays
|
||||
because it only excludes rows the agent never wants.
|
||||
|
||||
``workspace_id`` must be non-empty — an empty-string workspace_id
|
||||
(single-workspace legacy path) can never match a UUID source_id, so
|
||||
the predicate is always False there, which is safe.
|
||||
|
||||
RFC #2829 PR-2 note: rows with method="delegate_result" are excluded
|
||||
from the self-echo guard even when source_id matches our workspace_id.
|
||||
The platform may write a delegation-result row with source_id set to
|
||||
our workspace_id (e.g. a self-delegation or edge case in the platform's
|
||||
result-writing path). Such rows must reach the inbox so that
|
||||
message_from_activity can surface them as peer_agent inbound and the
|
||||
runtime receives the delegation result. Silently filtering them as
|
||||
self-echo would break delegation result delivery.
|
||||
"""
|
||||
if not workspace_id:
|
||||
return False
|
||||
return row.get("source_id") == workspace_id and row.get("method") != "delegate_result"
|
||||
|
||||
|
||||
def message_from_activity(row: dict[str, Any]) -> InboxMessage:
|
||||
"""Convert one /activity row into an InboxMessage.
|
||||
|
||||
@@ -660,16 +623,6 @@ def _poll_once(
|
||||
# the same self-notify on every iteration.
|
||||
last_id = str(row.get("id", "")) or last_id
|
||||
continue
|
||||
if _is_self_echo_row(row, workspace_id):
|
||||
# Internal #469: tool_delegate_task writes its own a2a_receive
|
||||
# row with source_id = this workspace's UUID (spoof-defense).
|
||||
# The poll fetches it back as kind='peer_agent', making the
|
||||
# workspace echo its own delegation-failure as an inbound from
|
||||
# a phantom peer. Skip it — the real delegation-result path
|
||||
# (delegate_result push) is separate and unaffected. Cursor
|
||||
# still advances so the next poll doesn't re-seen this row.
|
||||
last_id = str(row.get("id", "")) or last_id
|
||||
continue
|
||||
message = message_from_activity(row)
|
||||
if not message.activity_id:
|
||||
continue
|
||||
|
||||
@@ -495,151 +495,6 @@ def test_poll_once_skips_self_notify_rows(state: inbox.InboxState):
|
||||
assert [m.activity_id for m in queue] == ["act-real"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _is_self_echo_row — internal #469 fix
|
||||
# ---------------------------------------------------------------------------
|
||||
#
|
||||
# When a workspace delegates to a target that never picks up the task,
|
||||
# tool_delegate_task calls report_activity("a2a_receive", ...) which POSTs
|
||||
# to the platform with source_id set to the *sender's* workspace UUID
|
||||
# (spoof-defense). The activity API returns that row under type=a2a_receive
|
||||
# on the next poll, so message_from_activity sets peer_id = workspace's own
|
||||
# UUID — the workspace sees its own delegation-failure as an inbound from
|
||||
# a phantom peer. _is_self_echo_row guards against this.
|
||||
#
|
||||
# Internal #469 was live-reproduced on hongming.moleculesai.app 2026-05-16.
|
||||
|
||||
|
||||
def test_is_self_echo_row_true_when_source_id_matches_workspace():
|
||||
row = {"source_id": "ws-abc123", "method": "a2a_receive"}
|
||||
assert inbox._is_self_echo_row(row, "ws-abc123") is True
|
||||
|
||||
|
||||
def test_is_self_echo_row_false_when_source_id_differs():
|
||||
"""A real peer agent (different workspace_id) must NOT be filtered."""
|
||||
row = {"source_id": "ws-peer", "method": "a2a_receive"}
|
||||
assert inbox._is_self_echo_row(row, "ws-1") is False
|
||||
|
||||
|
||||
def test_is_self_echo_row_false_when_source_id_is_none():
|
||||
"""Canvas-user inbound has no source_id — never an echo."""
|
||||
row = {"source_id": None, "method": "a2a_receive"}
|
||||
assert inbox._is_self_echo_row(row, "ws-1") is False
|
||||
|
||||
|
||||
def test_is_self_echo_row_false_when_workspace_id_is_empty():
|
||||
"""Single-workspace legacy path with empty workspace_id cannot
|
||||
match a UUID source_id — predicate is always False, which is safe."""
|
||||
row = {"source_id": "ws-abc123", "method": "a2a_receive"}
|
||||
assert inbox._is_self_echo_row(row, "") is False
|
||||
|
||||
|
||||
def test_is_self_echo_row_false_when_source_id_key_absent():
|
||||
row = {"method": "a2a_receive"}
|
||||
assert inbox._is_self_echo_row(row, "ws-1") is False
|
||||
|
||||
|
||||
def test_is_self_echo_row_false_for_delegate_result():
|
||||
"""RFC #2829 PR-2 regression pin: a row with source_id matching our
|
||||
workspace_id but method=delegate_result must NOT be filtered as a
|
||||
self-echo. The platform may write a delegation-result row with our
|
||||
workspace_id as source_id; such rows must reach the inbox so the
|
||||
runtime receives the delegation result. Silently filtering them would
|
||||
break delegate_result delivery."""
|
||||
row = {"source_id": "ws-1", "method": "delegate_result"}
|
||||
assert inbox._is_self_echo_row(row, "ws-1") is False
|
||||
|
||||
|
||||
def test_poll_once_skips_self_echo_rows(state: inbox.InboxState):
|
||||
"""Internal #469 regression pin: a row with source_id matching our
|
||||
workspace_id must NOT land in the inbox queue — it is our own
|
||||
delegation-report echoing back, not a real peer inbound."""
|
||||
rows = [
|
||||
{
|
||||
"id": "act-real-peer",
|
||||
"source_id": "ws-peer",
|
||||
"method": "a2a_receive",
|
||||
"summary": None,
|
||||
"request_body": {"parts": [{"type": "text", "text": "real peer inbound"}]},
|
||||
"created_at": "2026-04-30T22:00:00Z",
|
||||
},
|
||||
{
|
||||
"id": "act-self-echo",
|
||||
"source_id": "ws-1",
|
||||
"method": "a2a_receive",
|
||||
"summary": "task result: target timed out",
|
||||
"request_body": None,
|
||||
"created_at": "2026-04-30T22:00:01Z",
|
||||
},
|
||||
]
|
||||
resp = _make_response(200, rows)
|
||||
p, _ = _patch_httpx(resp)
|
||||
with p:
|
||||
n = inbox._poll_once(state, "http://platform", "ws-1", {})
|
||||
|
||||
# Only the real peer inbound counted; self-echo silently dropped.
|
||||
assert n == 1
|
||||
queue = state.peek(10)
|
||||
assert [m.activity_id for m in queue] == ["act-real-peer"]
|
||||
assert queue[0].peer_id == "ws-peer"
|
||||
|
||||
|
||||
def test_poll_once_advances_cursor_past_self_echo(state: inbox.InboxState):
|
||||
"""Cursor must advance past self-echo rows even though we don't
|
||||
enqueue them. Otherwise the next poll re-fetches the same self-echo
|
||||
on every iteration, wasting requests and blocking real inbound."""
|
||||
state.save_cursor("act-old")
|
||||
rows = [
|
||||
{
|
||||
"id": "act-self-echo",
|
||||
"source_id": "ws-1",
|
||||
"method": "a2a_receive",
|
||||
"summary": "task result: timeout",
|
||||
"request_body": None,
|
||||
"created_at": "2026-04-30T22:00:00Z",
|
||||
},
|
||||
]
|
||||
resp = _make_response(200, rows)
|
||||
p, _ = _patch_httpx(resp)
|
||||
with p:
|
||||
n = inbox._poll_once(state, "http://platform", "ws-1", {})
|
||||
|
||||
assert n == 0
|
||||
assert state.peek(10) == []
|
||||
# Cursor must move past the skipped row so we don't re-poll it.
|
||||
assert state.load_cursor() == "act-self-echo"
|
||||
|
||||
|
||||
def test_poll_once_self_echo_does_not_fire_notification(state: inbox.InboxState):
|
||||
"""The notification callback (channel push to Claude Code etc.)
|
||||
must not fire for self-echo rows. Same rationale as self-notify:
|
||||
push-capable hosts would see the echo loop on the push channel."""
|
||||
rows = [
|
||||
{
|
||||
"id": "act-self-echo",
|
||||
"source_id": "ws-1",
|
||||
"method": "a2a_receive",
|
||||
"summary": "task result: timeout",
|
||||
"request_body": None,
|
||||
"created_at": "2026-04-30T22:00:00Z",
|
||||
},
|
||||
]
|
||||
received: list[dict] = []
|
||||
inbox.set_notification_callback(received.append)
|
||||
try:
|
||||
resp = _make_response(200, rows)
|
||||
p, _ = _patch_httpx(resp)
|
||||
with p:
|
||||
inbox._poll_once(state, "http://platform", "ws-1", {})
|
||||
finally:
|
||||
inbox.set_notification_callback(None)
|
||||
|
||||
assert received == [], (
|
||||
"self-echo rows must not surface as MCP notifications — "
|
||||
"doing so re-creates the echo loop on push-capable hosts"
|
||||
)
|
||||
|
||||
|
||||
def test_poll_once_advances_cursor_past_self_notify(state: inbox.InboxState):
|
||||
"""Cursor must advance past self-notify rows even though we don't
|
||||
enqueue them. Otherwise the next poll re-fetches the same self-
|
||||
|
||||
Reference in New Issue
Block a user