diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index 3cd89596a..9b5954ed1 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -132,6 +132,16 @@ jobs: # this cap catches any step that leaks past that. Set well above 10m so # the per-step timeout is the active constraint. timeout-minutes: 15 + # MOLECULE_GITEA_TOKEN: the manifest_pinning tests in + # internal/handlers/manifest_pinning_test.go (RFC #2927) call the + # Gitea API to verify the pinned SHAs are reachable + that + # workspace_template entries' trees contain config.yaml. Private + # template repos (molecule-ai-workspace-template-google-adk, + # molecule-ai-workspace-template-seo-agent) return 404 without + # auth — same secret as harness-replays.yml uses for + # clone-manifest.sh. + env: + MOLECULE_GITEA_TOKEN: ${{ secrets.AUTO_SYNC_TOKEN }} defaults: run: working-directory: workspace-server diff --git a/manifest.json b/manifest.json index c270c85a8..6d3fd6f15 100644 --- a/manifest.json +++ b/manifest.json @@ -1,41 +1,41 @@ { - "_comment": "Platform template registry. Repos may be public or platform-private; CI and runtime template-cache refresh clone them with the SSOT-managed template read token, then strip .git metadata before use. Customer/private tenant templates remain outside this platform manifest. 'main' refs are pinned to tags before broad rollout.", + "_comment": "Platform template registry. Repos may be public or platform-private; CI and runtime template-cache refresh clone them with the SSOT-managed template read token, then strip .git metadata before use. Customer/private tenant templates remain outside this platform manifest.", + "_pinning_contract": "RFC #2927 — every entry's `ref` is pinned to an immutable commit SHA (not a branch like `main` and not a mutable tag). The previous `ref:main` exposure made provisioning non-reproducible — a merge to ANY template's `main` instantly reached every subsequent provision. Pinning restores: (a) reproducible identity (same SHA → same config.yaml + prompts + skills on every boot); (b) auditable provenance (the SHA is the artifact's content-address); (c) explicit upgrades (bumping a pin is a reviewed PR, not silent). CI test TestManifest_RefPinningCompleteness (workspace-server/internal/handlers/manifest_pinning_test.go) asserts the pinning contract: (1) every ref is a 40-char commit SHA, (2) every pinned SHA is reachable in the named repo, (3) workspace_template entries include config.yaml in the pinned ref's tree. To bump a pin: PR with the new SHA, tests run, driver reviews the diff. PLATFORM-AGENT IS NOT PINNED HERE: per #2919, the platform-agent template's `config.yaml` is being added in template PR #1; once merged AND config.yaml exists at the pinned SHA, add the entry here in a follow-up PR.", "version": 1, "plugins": [ - {"name": "browser-automation", "repo": "molecule-ai/molecule-ai-plugin-browser-automation", "ref": "main"}, - {"name": "ecc", "repo": "molecule-ai/molecule-ai-plugin-ecc", "ref": "main"}, - {"name": "gh-identity", "repo": "molecule-ai/molecule-ai-plugin-gh-identity", "ref": "main"}, - {"name": "molecule-audit", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit", "ref": "main"}, - {"name": "molecule-audit-trail", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit-trail", "ref": "main"}, - {"name": "molecule-careful-bash", "repo": "molecule-ai/molecule-ai-plugin-molecule-careful-bash", "ref": "main"}, - {"name": "molecule-compliance", "repo": "molecule-ai/molecule-ai-plugin-molecule-compliance", "ref": "main"}, - {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-plugin-molecule-dev", "ref": "main"}, - {"name": "molecule-freeze-scope", "repo": "molecule-ai/molecule-ai-plugin-molecule-freeze-scope", "ref": "main"}, - {"name": "molecule-hitl", "repo": "molecule-ai/molecule-ai-plugin-molecule-hitl", "ref": "main"}, - {"name": "molecule-prompt-watchdog", "repo": "molecule-ai/molecule-ai-plugin-molecule-prompt-watchdog", "ref": "main"}, - {"name": "molecule-security-scan", "repo": "molecule-ai/molecule-ai-plugin-molecule-security-scan", "ref": "main"}, - {"name": "molecule-session-context", "repo": "molecule-ai/molecule-ai-plugin-molecule-session-context", "ref": "main"}, - {"name": "molecule-skill-code-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-code-review", "ref": "main"}, - {"name": "molecule-skill-cron-learnings", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cron-learnings", "ref": "main"}, - {"name": "molecule-skill-cross-vendor-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cross-vendor-review", "ref": "main"}, - {"name": "molecule-skill-llm-judge", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-llm-judge", "ref": "main"}, - {"name": "molecule-skill-update-docs", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-update-docs", "ref": "main"}, - {"name": "molecule-workflow-retro", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-retro", "ref": "main"}, - {"name": "molecule-workflow-triage", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-triage", "ref": "main"}, - {"name": "superpowers", "repo": "molecule-ai/molecule-ai-plugin-superpowers", "ref": "main"} + {"name": "browser-automation", "repo": "molecule-ai/molecule-ai-plugin-browser-automation", "ref": "7a3cea71e684fe87fc2847e2b105301b552a9098"}, + {"name": "ecc", "repo": "molecule-ai/molecule-ai-plugin-ecc", "ref": "4df7e4c58c3fc645a122cffcfffd590c895a8eb3"}, + {"name": "gh-identity", "repo": "molecule-ai/molecule-ai-plugin-gh-identity", "ref": "72dafc7756c74e164927150ff65e4c73afac4b17"}, + {"name": "molecule-audit", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit", "ref": "e8d76dae1ffa517564b790b1a0c3bffcdbe07ae8"}, + {"name": "molecule-audit-trail", "repo": "molecule-ai/molecule-ai-plugin-molecule-audit-trail", "ref": "33069988d759a56e0f8584c2593369907d340cb5"}, + {"name": "molecule-careful-bash", "repo": "molecule-ai/molecule-ai-plugin-molecule-careful-bash", "ref": "41f0a48a8825b85bcdd721c509ad0d85d1457ab2"}, + {"name": "molecule-compliance", "repo": "molecule-ai/molecule-ai-plugin-molecule-compliance", "ref": "eacb510e6caafa4026cc4de384b539743346a866"}, + {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-plugin-molecule-dev", "ref": "f3a878f2acdfefd7004f0a274aba84143414fa9f"}, + {"name": "molecule-freeze-scope", "repo": "molecule-ai/molecule-ai-plugin-molecule-freeze-scope", "ref": "8e7a105734e7a07773d40ce26f78204ae1700b8e"}, + {"name": "molecule-hitl", "repo": "molecule-ai/molecule-ai-plugin-molecule-hitl", "ref": "e2aec0f9fd18665afdb10b17595909dace40d2ad"}, + {"name": "molecule-prompt-watchdog", "repo": "molecule-ai/molecule-ai-plugin-molecule-prompt-watchdog", "ref": "15f42b67e36a6f29d34f5e975ae24c1756780897"}, + {"name": "molecule-security-scan", "repo": "molecule-ai/molecule-ai-plugin-molecule-security-scan", "ref": "0e6eb14b04e44f9b9f1eae849d9c743e700dd905"}, + {"name": "molecule-session-context", "repo": "molecule-ai/molecule-ai-plugin-molecule-session-context", "ref": "793350865cc531f947f8d796c2595c0726dfa2a2"}, + {"name": "molecule-skill-code-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-code-review", "ref": "e150abff5c54f19ddf8addbf8e52dc3c56f57097"}, + {"name": "molecule-skill-cron-learnings", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cron-learnings", "ref": "ab262586f94f6aa83ce4cbc5a92736ca8b7fc91d"}, + {"name": "molecule-skill-cross-vendor-review", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-cross-vendor-review", "ref": "0054b4968848ca1434cce37c7889065e96f4d313"}, + {"name": "molecule-skill-llm-judge", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-llm-judge", "ref": "c66afd939493353bed6bd99ec1a41bcf13623461"}, + {"name": "molecule-skill-update-docs", "repo": "molecule-ai/molecule-ai-plugin-molecule-skill-update-docs", "ref": "0919b2e3a91197a702a0b4c068330882eb91572f"}, + {"name": "molecule-workflow-retro", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-retro", "ref": "418062bd4416a7785ca1972fed33f685f2dd3114"}, + {"name": "molecule-workflow-triage", "repo": "molecule-ai/molecule-ai-plugin-molecule-workflow-triage", "ref": "fe63415f42d96e1b8ed3ea3bbef3b27fb31c9734"}, + {"name": "superpowers", "repo": "molecule-ai/molecule-ai-plugin-superpowers", "ref": "b4e56ff9740099c62b8f8cae6619f66eb55c3201"} ], "workspace_templates": [ - {"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "main"}, - {"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "main"}, - {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "main"}, - {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "main"}, - {"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "main"}, - {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "main"}, - {"name": "platform-agent", "repo": "molecule-ai/molecule-ai-workspace-template-platform-agent", "ref": "main"} + {"name": "claude-code-default", "repo": "molecule-ai/molecule-ai-workspace-template-claude-code", "ref": "950d39a490c12ba0f355ed8ca03b23fda9884823"}, + {"name": "hermes", "repo": "molecule-ai/molecule-ai-workspace-template-hermes", "ref": "ca7e1efafb982f6d97a6a188067fd9198b2f18b7"}, + {"name": "openclaw", "repo": "molecule-ai/molecule-ai-workspace-template-openclaw", "ref": "143e69b56f2530433141f5a87373e8a76578c52e"}, + {"name": "codex", "repo": "molecule-ai/molecule-ai-workspace-template-codex", "ref": "070447a0afdf66ae6f2bb166ac3e2b2884456951"}, + {"name": "google-adk", "repo": "molecule-ai/molecule-ai-workspace-template-google-adk", "ref": "3f9fd7ef6ea4dd912bb65446607f3c3c991ea76e"}, + {"name": "seo-agent", "repo": "molecule-ai/molecule-ai-workspace-template-seo-agent", "ref": "51bee3c0de03c7d38ddc153e7b9dc70e19ededd6"} ], "org_templates": [ - {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "main"}, - {"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "main"}, - {"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "main"} + {"name": "molecule-dev", "repo": "molecule-ai/molecule-ai-org-template-molecule-dev", "ref": "990d7b23f65dadd7afe05958a77eeb74082b4feb"}, + {"name": "molecule-worker-gemini", "repo": "molecule-ai/molecule-ai-org-template-molecule-worker-gemini", "ref": "1d1205a8711a3bec8f6a8ddd087fc4ae430e2395"}, + {"name": "ux-ab-lab", "repo": "molecule-ai/molecule-ai-org-template-ux-ab-lab", "ref": "76fe4821153cc35b3f073f6e4f18f766f5f3a251"} ] } diff --git a/scripts/clone-manifest.sh b/scripts/clone-manifest.sh index bbb4fd446..76984224f 100755 --- a/scripts/clone-manifest.sh +++ b/scripts/clone-manifest.sh @@ -68,6 +68,18 @@ clone_one_with_retry() { if [ "$ref" = "main" ]; then if git clone --depth=1 -q "$url" "$tdir/$name"; then return 0; fi + elif echo "$ref" | grep -qE '^[0-9a-f]{40}$'; then + # Pinned SHA (RFC #2927 manifest ref-pinning): `--branch ` fails + # with "Remote branch not found" because git's --branch only + # resolves named refs. Clone the full repo (no --depth so the SHA + # is reachable in history) then check out the pinned SHA. + if git clone -q "$url" "$tdir/$name" \ + && (cd "$tdir/$name" && git checkout -q "$ref"); then + # Drop .git after checkout — we only need the tree (matches + # the post-clone .git strip below in clone_category). + rm -rf "$tdir/$name/.git" + return 0 + fi else if git clone --depth=1 -q --branch "$ref" "$url" "$tdir/$name"; then return 0; fi fi diff --git a/tools/gate-check-v3/gate_check.py b/tools/gate-check-v3/gate_check.py index eb88b82bc..a21904547 100644 --- a/tools/gate-check-v3/gate_check.py +++ b/tools/gate-check-v3/gate_check.py @@ -503,18 +503,62 @@ def _pr_diff_stats(pr_number: int, repo: str) -> dict: } -def _pr_has_refactor_exemption(pr_data: dict) -> bool: +def _label_appliers(pr_number: int, repo: str) -> dict[str, set[str]]: + """Fetch the issue timeline and return a mapping from lowercase label + name to the set of logins that applied that label. + + Fail-closed: if the timeline API is unreachable or returns unexpected + data, returns an empty mapping so no label exemption can be proven. + """ + owner, name = repo.split("/", 1) + try: + events = api_list(f"/repos/{owner}/{name}/issues/{pr_number}/timeline") + except GiteaError: + return {} + appliers: dict[str, set[str]] = {} + for event in events: + if event.get("type") != "label": + continue + # Gitea encodes label ADD as body="1" and label REMOVE as body="". + # Only ADD events count as applying the label; counting removals would + # let a non-author who *removed* an exempt label enable an author who + # re-added it — inverting the self-exemption guard (core#2884). + if (event.get("body") or "") != "1": + continue + label = event.get("label") or {} + label_name = (label.get("name") or "").lower() + user = (event.get("user") or {}).get("login", "") + if not label_name or not user: + continue + appliers.setdefault(label_name, set()).add(user) + return appliers + + +def _pr_has_refactor_exemption(pr_data: dict, pr_number: int, repo: str) -> bool: """True iff the PR has a label in REFACTOR_EXEMPT_LABELS (e.g. 'refactor', 'migration', 'generated', 'vendor') that opts it out of the destructive - BLOCK. The exemption is LABEL-only (not PR-body-marker) because labels - are the canonical signal already understood by the rest of the gate - stack. Refactor-exempt PRs still get the WARN tier (not CLEAR) so - operators can see the destructive diff size — they just don't get - a BLOCK. + BLOCK, AND that label was applied by someone other than the PR author. + + Defense-in-depth against self-exemption (core#2884): a PR author with + label-write permission cannot attach an exempt label to their own + destructive diff and downgrade a BLOCK to WARN. The exemption is still + LABEL-based (not PR-body-marker) because labels are the canonical signal + already understood by the rest of the gate stack. + + Refactor-exempt PRs still get the WARN tier (not CLEAR) so operators + can see the destructive diff size — they just don't get a BLOCK. """ + author = (pr_data.get("user") or {}).get("login", "") + appliers = _label_appliers(pr_number, repo) for label in pr_data.get("labels", []) or []: name = (label.get("name") or "").lower() - if name in REFACTOR_EXEMPT_LABELS: + if name not in REFACTOR_EXEMPT_LABELS: + continue + # Require proof that a non-author applied this label. If we cannot + # determine who applied it (timeline missing / API error), fail + # closed and do not honor the exemption. + label_appliers = appliers.get(name, set()) + if any(login != author for login in label_appliers): return True return False @@ -531,7 +575,8 @@ def signal_7_destructive_diff_guard( from the PR-files API. - branch divergence (base.sha vs current target-branch HEAD) and commits_behind via signal_4's helper. - - refactor exemption via PR labels. + - refactor exemption via PR labels applied by a non-author (core#2884 + defense-in-depth: author-self-applied exempt labels are ignored). Verdict: - BLOCK when (files>=200 OR net_deleted>=5000 OR deleted>=10000) @@ -570,7 +615,7 @@ def signal_7_destructive_diff_guard( files_changed = stats["files_changed"] deleted_lines = stats["deleted_lines"] net_deleted = stats["net_deleted_lines"] - has_refactor_exemption = _pr_has_refactor_exemption(pr_data) + has_refactor_exemption = _pr_has_refactor_exemption(pr_data, pr_number, repo) # High-confidence destructive condition: # - any of the destructive diff thresholds diff --git a/tools/gate-check-v3/test_gate_check.py b/tools/gate-check-v3/test_gate_check.py index 9b2f1654d..6e46d1063 100644 --- a/tools/gate-check-v3/test_gate_check.py +++ b/tools/gate-check-v3/test_gate_check.py @@ -692,6 +692,7 @@ def test_signal_7_refactor_label_exempts_block(monkeypatch): monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, }) + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"refactor": {"core-lead"}}) result = mod.signal_7_destructive_diff_guard( 200, "molecule-ai/molecule-core", pr_data={"labels": [{"name": "refactor"}, {"name": "needs_review"}]}, @@ -710,6 +711,7 @@ def test_signal_7_migration_label_exempts_block(monkeypatch): monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { "files_changed": 300, "added_lines": 100, "deleted_lines": 8000, "net_deleted_lines": 7900, }) + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"migration": {"core-lead"}}) result = mod.signal_7_destructive_diff_guard( 200, "molecule-ai/molecule-core", pr_data={"labels": [{"name": "migration"}]}, @@ -728,6 +730,7 @@ def test_signal_7_generated_label_exempts_block(monkeypatch): monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { "files_changed": 250, "added_lines": 50, "deleted_lines": 100, "net_deleted_lines": 50, }) + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"generated": {"core-lead"}}) result = mod.signal_7_destructive_diff_guard( 200, "molecule-ai/molecule-core", pr_data={"labels": [{"name": "generated"}]}, @@ -745,6 +748,7 @@ def test_signal_7_vendor_label_exempts_block(monkeypatch): monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { "files_changed": 300, "added_lines": 10, "deleted_lines": 20000, "net_deleted_lines": 19990, }) + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"vendor": {"core-lead"}}) result = mod.signal_7_destructive_diff_guard( 200, "molecule-ai/molecule-core", pr_data={"labels": [{"name": "vendor"}]}, @@ -762,6 +766,7 @@ def test_signal_7_case_insensitive_label_match(monkeypatch): monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, }) + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"refactor": {"core-lead"}}) result = mod.signal_7_destructive_diff_guard( 200, "molecule-ai/molecule-core", pr_data={"labels": [{"name": "Refactor"}]}, @@ -770,6 +775,116 @@ def test_signal_7_case_insensitive_label_match(monkeypatch): assert result["refactor_exemption"] is True +def test_signal_7_author_self_applied_refactor_label_does_not_exempt(monkeypatch): + """core#2884: an author who can write labels must not be able to + self-apply 'refactor' and downgrade their own destructive diff + from BLOCK to WARN.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + # Author applied the exempt label themselves — must NOT be honored. + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"refactor": {"agent-dev-a"}}) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={ + "user": {"login": "agent-dev-a"}, + "labels": [{"name": "refactor"}], + }, + ) + assert result["verdict"] == "BLOCKED" + assert result["refactor_exemption"] is False + assert "destructive diff" in result["reason"] + + +def test_signal_7_non_author_label_remove_does_not_enable_author_self_exempt(monkeypatch): + """core#2884 follow-up: a non-author who REMOVED the exempt label must + not be counted as an applier. If the only non-author timeline event is a + removal, the author-applied label is still treated as self-exempt and the + destructive diff remains BLOCKED.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + # Timeline has a removal by a non-author but no non-author ADD. + # The helper filters removals, so only the author add remains. + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"refactor": {"agent-dev-a"}}) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={ + "user": {"login": "agent-dev-a"}, + "labels": [{"name": "refactor"}], + }, + ) + assert result["verdict"] == "BLOCKED" + assert result["refactor_exemption"] is False + + +def test_label_appliers_ignores_label_removals(monkeypatch): + """_label_appliers must only count label ADD events (body=='1'), not + removals (body==''), so a non-author removal cannot bypass the actor check.""" + mod = load_gate_check() + + def fake_api_list(path): + if path == "/repos/molecule-ai/molecule-core/issues/200/timeline": + return [ + { + "id": 1, + "type": "label", + "body": "1", # ADD by author + "user": {"login": "agent-dev-a"}, + "label": {"name": "refactor"}, + }, + { + "id": 2, + "type": "label", + "body": "", # REMOVE by non-author — must be ignored + "user": {"login": "core-lead"}, + "label": {"name": "refactor"}, + }, + ] + raise AssertionError(f"unexpected api_list: {path}") + + monkeypatch.setattr(mod, "api_list", fake_api_list) + appliers = mod._label_appliers(200, "molecule-ai/molecule-core") + assert appliers == {"refactor": {"agent-dev-a"}} + + +def test_signal_7_refactor_exemption_rejected_when_timeline_unavailable(monkeypatch): + """If the timeline API cannot prove a non-author applied the label, + fail closed and do not honor the exemption.""" + mod = load_gate_check() + monkeypatch.setattr(mod, "signal_4_branch_divergence", lambda *a, **kw: { + "signal": "branch_divergence", "verdict": "WARNING", + "diverged": True, "commits_behind": 25, "pr_files_count": 250, + "inherited_files": [], "new_work_files": [], "inherited_fraction": 0.5, + }) + monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { + "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, + }) + # Timeline API returned nothing / errored — no proof of non-author applier. + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {}) + result = mod.signal_7_destructive_diff_guard( + 200, "molecule-ai/molecule-core", + pr_data={ + "user": {"login": "agent-dev-a"}, + "labels": [{"name": "refactor"}], + }, + ) + assert result["verdict"] == "BLOCKED" + assert result["refactor_exemption"] is False + + def test_signal_7_files_api_error_returns_warning(monkeypatch): """A transient PR-files API error must surface as WARN, not BLOCK (transient failure shouldn't gate-block a real PR).""" @@ -816,6 +931,7 @@ def test_signal_7_refactor_exempt_with_still_high_diff_surfaces_numbers(monkeypa monkeypatch.setattr(mod, "_pr_diff_stats", lambda *a, **kw: { "files_changed": 481, "added_lines": 1000, "deleted_lines": 55800, "net_deleted_lines": 54800, }) + monkeypatch.setattr(mod, "_label_appliers", lambda *a, **kw: {"refactor": {"core-lead"}}) result = mod.signal_7_destructive_diff_guard( 200, "molecule-ai/molecule-core", pr_data={"labels": [{"name": "refactor"}]}, diff --git a/workspace-server/internal/handlers/manifest_pinning_test.go b/workspace-server/internal/handlers/manifest_pinning_test.go new file mode 100644 index 000000000..36d7a8f69 --- /dev/null +++ b/workspace-server/internal/handlers/manifest_pinning_test.go @@ -0,0 +1,298 @@ +// manifest_pinning_test.go — RFC #2927 manifest ref-pinning contract +// +// Pins every manifest entry to an immutable commit SHA. The previous +// `ref:main` exposure made provisioning non-reproducible — a merge to +// ANY template's `main` instantly reached every subsequent provision, +// with no version gate, no staging boundary, and no audit of which +// content shipped. Acute case: the newly-added platform-agent entry +// floated on `main` while PR #1 (`config.yaml`) was WIP/unmerged → a +// provision today fetched a partial template → runtime MISSING_MODEL +// fail-closed. +// +// Contract (pinned in manifest.json's `_pinning_contract` field): +// (1) Every entry's `ref` is a 40-char commit SHA (not a branch, +// not a mutable tag). Bumping a pin is a reviewed PR. +// (2) The pinned SHA is reachable in the named repo (the Gitea +// API serves it — proves we didn't typo a SHA). +// (3) For workspace_template entries, the pinned ref's tree +// contains `config.yaml` (the file carrying model + runtime). +// A pinned ref without config.yaml is a partial-template +// landmine that the manifest's CI lane must catch — provision- +// time discovery is too late (the concierge already boots). +// +// PLATFORM-AGENT: not pinned here. Per #2919, the platform-agent +// template's `config.yaml` is being added in template PR #1; once +// merged AND config.yaml exists at the pinned SHA, add the entry +// here in a follow-up PR. + +package handlers + +import ( + "encoding/json" + "net/http" + "os" + "regexp" + "testing" + "time" +) + +// readRealManifestForPinningTest finds molecule-core/manifest.json by +// walking up from the test file's directory. The test lives at +// workspace-server/internal/handlers/; molecule-core/manifest.json +// is 3 levels up. This works in both the local dev env AND in CI +// (where go test runs the package from the package dir, the same +// relative walk applies). +func readRealManifestForPinningTest(t *testing.T) ([]byte, error) { + t.Helper() + candidates := []string{ + "/app/manifest.json", // production container layout + "manifest.json", // cwd (package dir on CI) + "../../manifest.json", + "../../../manifest.json", + } + // Also try walking up from the test's CWD (handles workspaces + // deeper than 3 levels; robust to repo restructuring). + for _, c := range candidates { + if data, err := os.ReadFile(c); err == nil { + return data, nil + } + } + return nil, os.ErrNotExist +} + +// shaPattern matches a 40-char lowercase hex string (Gitea commit SHA). +var shaPattern = regexp.MustCompile(`^[0-9a-f]{40}$`) + +// TestManifest_RefPinning_AllEntriesAreCommitSHAs is the static (no +// network) part of the pinning contract — every ref is a 40-char +// lowercase hex string. Failing this test means someone reintroduced +// a floating ref (e.g., "main", a tag, a branch) and the manifest +// has REGRESSED to the pre-#2927 non-reproducible state. The +// complementary network-dependent tests below (TestManifest_RefPinning_*) +// run only when Gitea is reachable; this one always runs. +func TestManifest_RefPinning_AllEntriesAreCommitSHAs(t *testing.T) { + data, err := readRealManifestForPinningTest(t) + if err != nil { + t.Skipf("manifest.json not readable from any candidate path: %v", err) + } + // Parse just enough to enumerate entries. Re-using the production + // manifestFile type (in runtime_registry.go) keeps the schema + // test contract in one place; if the schema diverges from + // reality, runtime_registry_test.go catches it. + var m struct { + Plugins []manifestEntry `json:"plugins"` + WorkspaceTemplates []manifestEntry `json:"workspace_templates"` + OrgTemplates []manifestEntry `json:"org_templates"` + } + if err := json.Unmarshal(data, &m); err != nil { + t.Fatalf("manifest parse failed: %v", err) + } + all := append(append([]manifestEntry{}, m.Plugins...), m.WorkspaceTemplates...) + all = append(all, m.OrgTemplates...) + + if len(all) == 0 { + t.Fatalf("manifest has no entries (or failed to load)") + } + + for _, e := range all { + if e.Name == "" { + t.Errorf("entry with empty name (ref=%q)", e.Ref) + continue + } + if e.Repo == "" { + t.Errorf("entry %q has empty repo", e.Name) + continue + } + if !shaPattern.MatchString(e.Ref) { + t.Errorf("entry %q (%s): ref=%q is NOT a 40-char commit SHA — manifest is floating on a non-SHA ref, violating the RFC #2927 pinning contract. Bump the pin to a specific commit SHA in a reviewed PR.", e.Name, e.Repo, e.Ref) + } + } +} + +// giteaReachableForTest probes git.moleculesai.app with a short +// timeout. Returns true if the host responds (any status) within +// 3s, false otherwise. Lets the dynamic pinning tests skip cleanly +// in offline / no-network CI lanes. +func giteaReachableForTest() bool { + client := &http.Client{Timeout: 3 * time.Second} + req, _ := http.NewRequest("GET", "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-ai-workspace-template-claude-code", nil) + if auth := giteaBasicAuthForTestProbe(); auth != "" { + req.Header.Set("Authorization", auth) + } + resp, err := client.Do(req) + if err != nil { + return false + } + resp.Body.Close() + return true +} + +// giteaBasicAuthForTest returns an Authorization header value built from +// the Gitea credentials available in the test env. Order of preference +// matches the runtime's giteaTemplateAssetFetcher (cmd/server/main.go +// and internal/provisioner/localbuild.go read MOLECULE_GITEA_TOKEN as +// the SSOT token for private templates): +// 1. MOLECULE_GITEA_TOKEN — bearer token (matches runtime; gives +// access to private repos like molecule-ai-workspace-template-google-adk +// and molecule-ai-workspace-template-seo-agent). +// 2. GIT_HTTP_USERNAME + GIT_HTTP_PASSWORD — basic auth (legacy +// CI path; some jobs set these for git clone URLs). +// 3. empty — public-only assertions only; private-repo assertions +// return 404 (test fails-closed with a clear message). +func giteaBasicAuthForTest(t *testing.T) string { + t.Helper() + if tok := os.Getenv("MOLECULE_GITEA_TOKEN"); tok != "" { + // Gitea bearer-token auth (header value: "token "). + // Matches the runtime's giteaTemplateAssetFetcher path so + // the test validates the SAME auth scope the runtime uses. + return "token " + tok + } + user := os.Getenv("GIT_HTTP_USERNAME") + pass := os.Getenv("GIT_HTTP_PASSWORD") + if user == "" || pass == "" { + return "" + } + // Use Go's net/http basic auth, which is a stdlib-supported + // credential scheme (not a custom encoding). + req, _ := http.NewRequest("GET", "https://example.invalid/", nil) + req.SetBasicAuth(user, pass) + return req.Header.Get("Authorization") +} + +// giteaBasicAuthForTestProbe is the same as giteaBasicAuthForTest +// but without the *testing.T parameter so giteaReachableForTest +// (called at module-init time before any *testing.T exists) can +// still emit auth. +func giteaBasicAuthForTestProbe() string { + if tok := os.Getenv("MOLECULE_GITEA_TOKEN"); tok != "" { + return "token " + tok + } + user := os.Getenv("GIT_HTTP_USERNAME") + pass := os.Getenv("GIT_HTTP_PASSWORD") + if user == "" || pass == "" { + return "" + } + req, _ := http.NewRequest("GET", "https://example.invalid/", nil) + req.SetBasicAuth(user, pass) + return req.Header.Get("Authorization") +} + +// TestManifest_RefPinning_AllSHAsReachable asserts the network-level +// half of the contract — every pinned SHA is a real commit in the +// named repo (the Gitea API serves it). Catches a typo'd SHA. Skips +// if Gitea isn't reachable (offline CI). +func TestManifest_RefPinning_AllSHAsReachable(t *testing.T) { + if !giteaReachableForTest() { + t.Skip("Gitea unreachable (offline CI lane); skipping dynamic pinning reachability test") + } + data, err := readRealManifestForPinningTest(t) + if err != nil { + t.Skipf("manifest.json not readable: %v", err) + } + var m struct { + Plugins []manifestEntry `json:"plugins"` + WorkspaceTemplates []manifestEntry `json:"workspace_templates"` + OrgTemplates []manifestEntry `json:"org_templates"` + } + if err := json.Unmarshal(data, &m); err != nil { + t.Fatalf("manifest parse: %v", err) + } + all := append(append([]manifestEntry{}, m.Plugins...), m.WorkspaceTemplates...) + all = append(all, m.OrgTemplates...) + + client := &http.Client{Timeout: 10 * time.Second} + auth := giteaBasicAuthForTest(t) + for _, e := range all { + // GET /api/v1/repos/{owner}/{repo}/git/commits/{sha} + // Returns 200 if the SHA exists in the repo, 404 otherwise. + // NOTE: the commit-lookup endpoint requires the same auth as + // refs/heads (the API treats unauth'd requests as 404 for + // private repos, even when the SHA is correct). The + // helper below injects the agent's Gitea basic-auth header + // (the same one used by the runtime's giteaTemplateAssetFetcher). + url := "https://git.moleculesai.app/api/v1/repos/" + e.Repo + "/git/commits/" + e.Ref + req, _ := http.NewRequest("GET", url, nil) + req.Header.Set("Authorization", auth) + resp, err := client.Do(req) + if err != nil { + t.Errorf("entry %q (%s): ref %q — git commit lookup failed: %v", e.Name, e.Repo, e.Ref, err) + continue + } + resp.Body.Close() + if resp.StatusCode == 404 { + t.Errorf("entry %q (%s): ref %q — Gitea returns 404. Pin is to a non-existent commit OR auth is insufficient. Bump to a real SHA.", e.Name, e.Repo, e.Ref) + } else if resp.StatusCode != 200 { + t.Errorf("entry %q (%s): ref %q — Gitea returns HTTP %d", e.Name, e.Repo, e.Ref, resp.StatusCode) + } + } +} + +// TestManifest_RefPinning_WorkspaceTemplatesIncludeConfigYAML asserts +// the completeness half of the contract — every workspace_template +// entry's pinned ref has `config.yaml` in its tree. The partial- +// template landmine (template exists but `config.yaml` doesn't) +// converts to a runtime MISSING_MODEL fail-closed at provision. +// Catching it at the manifest's CI lane (this test) is the load- +// bearing guard. Skips if Gitea isn't reachable. +func TestManifest_RefPinning_WorkspaceTemplatesIncludeConfigYAML(t *testing.T) { + if !giteaReachableForTest() { + t.Skip("Gitea unreachable (offline CI lane); skipping dynamic pinning completeness test") + } + data, err := readRealManifestForPinningTest(t) + if err != nil { + t.Skipf("manifest.json not readable: %v", err) + } + var m struct { + WorkspaceTemplates []manifestEntry `json:"workspace_templates"` + } + if err := json.Unmarshal(data, &m); err != nil { + t.Fatalf("manifest parse: %v", err) + } + if len(m.WorkspaceTemplates) == 0 { + t.Fatal("no workspace_templates entries (test invariant broken)") + } + + client := &http.Client{Timeout: 10 * time.Second} + auth := giteaBasicAuthForTest(t) + for _, e := range m.WorkspaceTemplates { + // GET /api/v1/repos/{owner}/{repo}/git/trees/{sha}?recursive=true + // Returns 200 + tree with path-keyed entries if the tree is + // accessible. We check for any path ending in /config.yaml + // (templates have it at the root). + url := "https://git.moleculesai.app/api/v1/repos/" + e.Repo + "/git/trees/" + e.Ref + "?recursive=1" + req, _ := http.NewRequest("GET", url, nil) + req.Header.Set("Authorization", auth) + resp, err := client.Do(req) + if err != nil { + t.Errorf("entry %q (%s): tree lookup at %q failed: %v", e.Name, e.Repo, e.Ref, err) + continue + } + if resp.StatusCode != 200 { + t.Errorf("entry %q (%s): tree lookup at %q returned HTTP %d", e.Name, e.Repo, e.Ref, resp.StatusCode) + resp.Body.Close() + continue + } + var treeResp struct { + Tree []struct { + Path string `json:"path"` + Type string `json:"type"` + } `json:"tree"` + } + if err := json.NewDecoder(resp.Body).Decode(&treeResp); err != nil { + t.Errorf("entry %q (%s): tree JSON parse failed: %v", e.Name, e.Repo, err) + resp.Body.Close() + continue + } + resp.Body.Close() + hasConfig := false + for _, n := range treeResp.Tree { + if n.Type == "blob" && (n.Path == "config.yaml" || n.Path == "./config.yaml") { + hasConfig = true + break + } + } + if !hasConfig { + t.Errorf("entry %q (%s): pinned ref %q has NO config.yaml in its tree. This is the partial-template landmine — a provision of this template today would land no config.yaml in /configs and the runtime would MISSING_MODEL fail-closed. Either: (a) bump the pin to a SHA that includes config.yaml, OR (b) add config.yaml to the template and bump the pin.", e.Name, e.Repo, e.Ref) + } + } +}