chore(dead-code): remove unused QueueDepth function

QueueDepth was added for Phase 2/3 busy-return response visibility but was never wired to a caller. The inline depth query in EnqueueA2A serves today's enqueue response, making this function dead code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
Merge pull request 'fix(provision): platform-managed workspace must fail-closed when CP proxy env absent (#2162 )' (#2164 ) from fix/2162-platform-managed-fail-closed-missing-proxy into main
2026-06-03 07:23:28 +00:00 · 2026-06-03 06:21:10 +00:00 · 2026-06-03 05:54:23 +00:00 · 2026-06-03 05:40:44 +00:00 · 2026-06-03 02:04:35 +00:00
12 changed files with 145 additions and 464 deletions
@@ -1,171 +0,0 @@
-"""Live-fire regression test for #2159 — gate auto-fire runtime verification.
-
-Static tests (test_gate_review_auto_fire.py) validate that the workflow YAML
-is structurally correct. This test validates the *runtime* path: submitting an
-APPROVED review to a PR whose head contains the current gate workflows causes
-Gitea Actions to queue the qa-review + security-review workflows and POST the
-branch-protection-required (pull_request_target) contexts within a reasonable
-window.
-
-Skipped when Gitea API credentials are not available. Intended for:
-  - manual developer verification
-  - CI jobs provisioned with a service-account token
-
-Environment:
-  GITEA_HOST            — default: git.moleculesai.app
-  GITEA_TOKEN           — token with read:repository + write:issues (for review POST)
-  REPO                  — default: molecule-ai/molecule-core
-  LIVEFIRE_PR_NUMBER    — optional; if omitted the test tries to find a
-                          suitable open PR automatically, or skips.
-  LIVEFIRE_TIMEOUT_SEC  — default: 120
-"""
-
-import base64
-import json
-import os
-import time
-import urllib.error
-import urllib.request
-from pathlib import Path
-
-import pytest
-
-import yaml
-
-GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
-GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
-REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
-LIVEFIRE_PR_NUMBER = os.environ.get("LIVEFIRE_PR_NUMBER", "")
-LIVEFIRE_TIMEOUT_SEC = int(os.environ.get("LIVEFIRE_TIMEOUT_SEC", "120"))
-
-REQUIRED_CONTEXTS = [
-    "qa-review / approved (pull_request_target)",
-    "security-review / approved (pull_request_target)",
-]
-
-skip_no_token = pytest.mark.skipif(
-    not GITEA_TOKEN,
-    reason="GITEA_TOKEN not set — live-fire test requires API credentials",
-)
-
-
-def _api(method: str, path: str, body: dict | None = None) -> tuple[int, dict]:
-    url = f"https://{GITEA_HOST}/api/v1{path}"
-    headers = {
-        "Authorization": f"token {GITEA_TOKEN}",
-        "Content-Type": "application/json",
-    }
-    data = json.dumps(body).encode() if body else None
-    req = urllib.request.Request(url, data=data, headers=headers, method=method)
-    try:
-        with urllib.request.urlopen(req, timeout=30) as resp:
-            raw = resp.read()
-            code = resp.status
-    except urllib.error.HTTPError as exc:
-        raw = exc.read()
-        code = exc.code
-    payload = json.loads(raw) if raw else {}
-    return code, payload
-
-
-def _get_pr(number: int) -> dict:
-    code, pr = _api("GET", f"/repos/{REPO}/pulls/{number}")
-    if code != 200:
-        pytest.fail(f"GET /pulls/{number} returned HTTP {code}: {pr}")
-    return pr
-
-
-def _list_open_prs() -> list[dict]:
-    code, prs = _api("GET", f"/repos/{REPO}/pulls?state=open&limit=50")
-    if code != 200:
-        pytest.fail(f"GET /pulls?state=open returned HTTP {code}: {prs}")
-    return prs
-
-
-def _pr_has_trigger_in_head(pr: dict) -> bool:
-    """Return True if the PR head contains pull_request_review in both workflows."""
-    head_sha = pr["head"]["sha"]
-    for wf_name in ("qa-review.yml", "security-review.yml"):
-        path = f"/repos/{REPO}/contents/.gitea/workflows/{wf_name}?ref={head_sha}"
-        code, payload = _api("GET", path)
-        if code != 200:
-            return False
-        raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
-        wf = yaml.safe_load(raw)
-        on = wf.get(True) or wf.get("on") or {}
-        if "pull_request_review" not in on:
-            return False
-    return True
-
-
-def _find_suitable_pr() -> dict:
-    if LIVEFIRE_PR_NUMBER:
-        pr = _get_pr(int(LIVEFIRE_PR_NUMBER))
-        if pr.get("state") != "open":
-            pytest.skip(f"PR {LIVEFIRE_PR_NUMBER} is not open")
-        return pr
-
-    prs = _list_open_prs()
-    for pr in prs:
-        if _pr_has_trigger_in_head(pr):
-            return pr
-    pytest.skip("No open PR found whose head contains the pull_request_review trigger")
-
-
-def _submit_approved_review(pr_number: int) -> None:
-    code, _ = _api(
-        "POST",
-        f"/repos/{REPO}/pulls/{pr_number}/reviews",
-        {"body": "Live-fire test APPROVED review", "event": "APPROVE"},
-    )
-    # 200 = created, 422 = review already exists (idempotent enough for our purposes)
-    if code not in (200, 201, 422):
-        pytest.fail(f"POST /pulls/{pr_number}/reviews returned HTTP {code}")
-
-
-def _poll_status_contexts(sha: str, timeout_sec: int = LIVEFIRE_TIMEOUT_SEC) -> dict[str, str]:
-    deadline = time.monotonic() + timeout_sec
-    found: dict[str, str] = {}
-    while time.monotonic() < deadline:
-        code, statuses = _api("GET", f"/repos/{REPO}/statuses/{sha}?limit=100")
-        if code == 200:
-            for st in statuses:
-                ctx = st.get("context", "")
-                if ctx in REQUIRED_CONTEXTS:
-                    found[ctx] = st.get("state", st.get("status", ""))
-        if all(ctx in found for ctx in REQUIRED_CONTEXTS):
-            return found
-        time.sleep(5)
-    return found
-
-
-@skip_no_token
-class TestGateAutoFireLive:
-    def test_auto_fire_posts_required_contexts(self):
-        """Submit APPROVED review; assert BP-required contexts appear within timeout."""
-        pr = _find_suitable_pr()
-        pr_number = pr["number"]
-        head_sha = pr["head"]["sha"]
-
-        # Pre-check: ensure contexts are not already present from a previous run.
-        # We tolerate stale contexts; the test looks for a fresh appearance.
-        _submit_approved_review(pr_number)
-
-        found = _poll_status_contexts(head_sha)
-
-        missing = [ctx for ctx in REQUIRED_CONTEXTS if ctx not in found]
-        if missing:
-            pytest.fail(
-                f"After {LIVEFIRE_TIMEOUT_SEC}s, contexts still missing: {missing}. "
-                f"Found: {found}. "
-                f"PR #{pr_number} head={head_sha}. "
-                f"This indicates the pull_request_review trigger did not fire at runtime."
-            )
-
-        # The contexts appeared — that's the proof of auto-fire.
-        # We do NOT assert success vs failure; the evaluator decides that.
-        # The point of #2159 is that the workflows QUEUE and POST at all.
-        for ctx, state in found.items():
-            assert state in ("pending", "success", "failure"), (
-                f"Unexpected state {state!r} for {ctx}"
-            )
@@ -1,142 +0,0 @@
-"""Stale-head diagnostic test for #2159.
-
-Deterministically reports whether a PR's HEAD contains the pull_request_review
-trigger in qa-review.yml and security-review.yml. If the trigger is absent,
-auto-fire on APPROVED review is impossible for that PR.
-
-This is used as a self-diagnostic for future stale-PR situations (PRs opened
-before #2157 merged, or branches cut from old bases).
-
-Environment:
-  GITEA_HOST  — default: git.moleculesai.app
-  GITEA_TOKEN — token with read:repository scope (optional; falls back to local files)
-  REPO        — default: molecule-ai/molecule-core
-  PR_NUMBER   — required when running against a real PR
-"""
-
-import base64
-import json
-import os
-import urllib.error
-import urllib.request
-from pathlib import Path
-
-import pytest
-
-import yaml
-
-GITEA_HOST = os.environ.get("GITEA_HOST", "git.moleculesai.app")
-GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
-REPO = os.environ.get("REPO", "molecule-ai/molecule-core")
-PR_NUMBER = os.environ.get("PR_NUMBER", "")
-
-ROOT = Path(__file__).resolve().parents[2]
-
-
-def _api(method: str, path: str) -> tuple[int, dict]:
-    url = f"https://{GITEA_HOST}/api/v1{path}"
-    headers = {"Authorization": f"token {GITEA_TOKEN}"}
-    req = urllib.request.Request(url, headers=headers, method=method)
-    try:
-        with urllib.request.urlopen(req, timeout=30) as resp:
-            return resp.status, json.loads(resp.read())
-    except urllib.error.HTTPError as exc:
-        return exc.code, json.loads(exc.read()) if exc.read() else {}
-
-
-def _fetch_workflow_from_ref(workflow_name: str, ref: str) -> dict:
-    path = f"/repos/{REPO}/contents/.gitea/workflows/{workflow_name}?ref={ref}"
-    code, payload = _api("GET", path)
-    if code != 200:
-        pytest.fail(
-            f"GET {path} returned HTTP {code}: {payload}. "
-            f"Cannot determine whether PR head contains the trigger."
-        )
-    raw = base64.b64decode(payload.get("content", "")).decode("utf-8")
-    return yaml.safe_load(raw)
-
-
-def _fetch_workflow_local(workflow_name: str) -> dict:
-    p = ROOT / "workflows" / workflow_name
-    if not p.exists():
-        pytest.fail(f"Local workflow file not found: {p}")
-    return yaml.safe_load(p.read_text())
-
-
-def _has_pull_request_review_trigger(wf: dict) -> bool:
-    on = wf.get(True) or wf.get("on") or {}
-    if isinstance(on, list):
-        return "pull_request_review" in on
-    if isinstance(on, dict):
-        return "pull_request_review" in on
-    return False
-
-
-def _diagnose_pr(pr_number: int) -> dict[str, bool]:
-    code, pr = _api("GET", f"/repos/{REPO}/pulls/{pr_number}")
-    if code != 200:
-        pytest.fail(f"GET /pulls/{pr_number} returned HTTP {code}: {pr}")
-
-    head_ref = pr["head"]["ref"]
-    head_sha = pr["head"]["sha"]
-
-    results: dict[str, bool] = {}
-    for wf_name in ("qa-review.yml", "security-review.yml"):
-        wf = _fetch_workflow_from_ref(wf_name, head_sha)
-        results[wf_name] = _has_pull_request_review_trigger(wf)
-
-    return {
-        "pr_number": pr_number,
-        "head_ref": head_ref,
-        "head_sha": head_sha,
-        "triggers": results,
-        "auto_fire_possible": all(results.values()),
-    }
-
-
-def _diagnose_local() -> dict[str, bool]:
-    results: dict[str, bool] = {}
-    for wf_name in ("qa-review.yml", "security-review.yml"):
-        wf = _fetch_workflow_local(wf_name)
-        results[wf_name] = _has_pull_request_review_trigger(wf)
-    return {
-        "pr_number": None,
-        "head_ref": "local-checkout",
-        "head_sha": None,
-        "triggers": results,
-        "auto_fire_possible": all(results.values()),
-    }
-
-
-class TestStaleHeadDiagnostic:
-    """Test deterministically reports 'auto-fire impossible for this PR' when
-    the PR head lacks the pull_request_review trigger.
-    """
-
-    def test_local_checkout_has_pull_request_review_trigger(self):
-        """Local files (the ones in this checkout) must contain the trigger.
-
-        This is the baseline: if the checkout itself is stale, every PR cut
-        from it will also be stale.
-        """
-        diag = _diagnose_local()
-        missing = [n for n, ok in diag["triggers"].items() if not ok]
-        if missing:
-            pytest.fail(
-                f"Local checkout is missing pull_request_review trigger in: {missing}. "
-                f"This branch cannot produce PRs that auto-fire."
-            )
-
-    @pytest.mark.skipif(not GITEA_TOKEN, reason="GITEA_TOKEN not set")
-    @pytest.mark.skipif(not PR_NUMBER, reason="PR_NUMBER not set")
-    def test_pr_head_has_pull_request_review_trigger(self):
-        """When PR_NUMBER is given, assert the PR head contains the trigger."""
-        diag = _diagnose_pr(int(PR_NUMBER))
-        if not diag["auto_fire_possible"]:
-            missing = [n for n, ok in diag["triggers"].items() if not ok]
-            pytest.fail(
-                f"Auto-fire impossible for PR #{diag['pr_number']}. "
-                f"Head ref={diag['head_ref']} sha={diag['head_sha']}. "
-                f"Missing trigger in: {missing}. "
-                f"This PR needs /qa-recheck + /security-recheck fallback, or a rebase onto current main."
-            )
@@ -1,131 +0,0 @@
-# Developer SOP — PR review gate auto-fire and stale-head handling
-
-> Last updated: 2026-06-03 (cp#2159 follow-up)
->
-> Applies to: all core-PR authors and reviewers on `molecule-core` and sibling
-> repos using the `qa-review` + `security-review` branch-protection gates.
-
---
-
-## 1. Gitea PR-head workflow-selection rule
-
-**Rule:** For `pull_request_target` and `pull_request_review` events, Gitea
-loads the workflow definition from the **PR's HEAD branch**, not from the
-base (`main`) branch.
-
-This is different from GitHub Actions, where `pull_request_target` always
-loads workflows from the base branch. Gitea's behaviour means:
-
- A PR that was opened **before** the `pull_request_review` trigger was added
-to `qa-review.yml` / `security-review.yml` will **NOT** auto-fire on review,
-because its HEAD still contains the old workflow YAML (no trigger).
-
- A PR that was opened **after** the trigger was added (or that has been
-rebased onto a commit containing the trigger) **WILL** auto-fire, because its
-HEAD contains the new workflow YAML.
-
-### Ops implication
-
-| PR head contains `pull_request_review` trigger? | Behaviour on APPROVED review |
-|---|---|
-| **Yes** (cut from current main, or rebased) | Workflows auto-queue, evaluate, and POST the `(pull_request_target)` context automatically. No slash-command needed. |
-| **No** (stale head, opened before #2157) | Nothing fires. Use `/qa-recheck` + `/security-recheck` slash-commands in a PR comment, OR rebase onto current main. |
-
---
-
-## 2. Standard core-PR flow (post-#2157)
-
-```
-1. Author opens PR from a branch based on current main
-   → qa-review + security-review workflows run on pull_request_target
-   → status contexts post (initial eval, usually red until reviews land)
-
-2. Reviewers submit real APPROVED reviews
-   → If PR head has the trigger: workflows AUTO-FIRE on pull_request_review
-   → Contexts flip green (or stay red if reviewer is not in team)
-
-3. [Optional] If contexts did not flip (stale head, event lost, etc.):
-   → Anyone can comment `/qa-recheck` or `/security-recheck`
-   → sop-checklist.yml refires the evaluator (read-only, idempotent)
-
-4. Both qa-review + security-review contexts are green
-   → Plain Do:merge (no force-merge needed)
-```
-
-### Key point
-
-The `/qa-recheck` and `/security-recheck` commands are a **backstop**, not the
-primary path. PRs cut from current main should auto-fire without manual
-intervention.
-
---
-
-## 3. Diagnosing a stale head
-
-If a PR has real team-member APPROVED reviews but the qa/security contexts
-remain red and no workflow run appears on the PR's "Actions" tab for the
-review event, the PR head is likely stale.
-
-### Quick check
-
-```bash
-# From the PR page, look at the head commit SHA, then:
-curl -sS "https://git.moleculesai.app/api/v1/repos/molecule-ai/molecule-core/contents/.gitea/workflows/qa-review.yml?ref=<HEAD_SHA>" \
-  | jq -r '.content' | base64 -d | grep -c 'pull_request_review'
-# 0  → stale head (no trigger in that version of the workflow)
-# >0 → trigger present; auto-fire SHOULD work (if it didn't, file a tracker)
-```
-
-### Automated diagnostic
-
-The test suite includes `test_gate_stale_head_diagnostic.py`, which reports
-"auto-fire impossible for this PR" when the head lacks the trigger. Run it
-in CI or locally with:
-
-```bash
-PR_NUMBER=123 python -m pytest .gitea/scripts/tests/test_gate_stale_head_diagnostic.py -v
-```
-
---
-
-## 4. Rebasing vs. slash-refire
-
-| Approach | When to use | Trade-off |
-|---|---|---|
-| **Rebase onto current main** | PR is genuinely stale (head lacks trigger OR head is far behind main) | Clean history, gets all recent fixes, but requires force-push and re-approval if the branch was protected |
-| **`/qa-recheck` + `/security-recheck`** | PR head is recent but the review event was missed, or you want to avoid rebase churn | Quick, no force-push, but does NOT fix a missing trigger in the head |
-
-**Do not** use slash-refire as a substitute for rebasing a stale head. If the
-workflow YAML in the PR head does not contain `pull_request_review`, no amount
-of rechecking will make auto-fire work.
-
---
-
-## 5. Live-fire verification
-
-The `test_gate_auto_fire_live.py` regression test exercises the full runtime
-path: it submits an APPROVED review to a test PR and polls for the
-`(pull_request_target)` status contexts. It is skipped when no API token is
-available, and is intended to catch runtime non-fire that static structural
-tests (e.g. `test_gate_review_auto_fire.py`) cannot detect.
-
-Run manually with:
-
-```bash
-export GITEA_HOST=git.moleculesai.app
-export GITEA_TOKEN=<your-token>
-export REPO=molecule-ai/molecule-core
-export LIVEFIRE_PR_NUMBER=<test-pr-number>
-python -m pytest .gitea/scripts/tests/test_gate_auto_fire_live.py -v
-```
-
---
-
-## References
-
- #2159 — gate auto-trigger not firing (root cause: stale PR heads lacking
-the `pull_request_review` trigger, NOT a workflow code defect)
- #765 — static structural regression test for gate configuration
- #2157 — merged trigger addition (`pull_request_review` types: [submitted])
- #2020 — milestone confirming gate infrastructure is stable
- RFC#324 — qa-review + security-review design
@@ -246,20 +246,6 @@ func MarkQueueItemFailed(ctx context.Context, id, errMsg string) {
 	}
 }

-// QueueDepth returns the number of currently-queued (not dispatched/completed)
-// items for a workspace. Used by the busy-return response body so callers
-// can see how many ahead of them.
-func QueueDepth(ctx context.Context, workspaceID string) int {
-	var n int
-	if err := db.DB.QueryRowContext(ctx,
-		`SELECT COUNT(*) FROM a2a_queue WHERE workspace_id = $1 AND status = 'queued'`,
-		workspaceID,
-	).Scan(&n); err != nil {
-		log.Printf("A2AQueue: QueueDepth query failed for workspace %s: %v", workspaceID, err)
-	}
-	return n
-}
-
 // DropStaleQueueItems marks queued items older than maxAge as 'dropped' with a
 // system-generated reason so PM agents stop processing stale post-incident noise.
 // Called with a workspaceID to scope cleanup to one workspace, or empty to sweep
@@ -372,3 +372,78 @@ func TestApplyPlatformManagedLLMEnv_WorkspaceOriginCredExemptFromStrip(t *testin
 		t.Errorf("sqlmock expectations: %v", err)
 	}
 }
+
+// TestApplyPlatformManagedLLMEnv_MissingProxyEnvFailClosed is the #2162
+// regression guard. A platform-managed workspace whose CP proxy env is absent
+// must NOT start credential-less. The empty-proxy path must return
+// HasUsableLLMCred=false so the caller aborts with MISSING_PLATFORM_PROXY.
+//
+// Mutation: revert the early-return from HasUsableLLMCred=false to true
+// → workspace starts with zero credential → "container started but never
+// called /registry/register" (600s provision-timeout sweep) → this test RED.
+func TestApplyPlatformManagedLLMEnv_MissingProxyEnvFailClosed(t *testing.T) {
+	ctx := context.Background()
+	const wsID = "29b95be9-811e-4857-be36-1dafdbf4f697" // adk-demo failure workspace
+
+	mock := setupTestDB(t)
+	expectOverrideQuery(mock, wsID, "")
+
+	// No proxy env present — simulates the boot-race / misconfig path.
+	envVars := map[string]string{}
+	res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "moonshot/kimi-k2.6", nil)
+
+	if res.ResolvedMode != LLMBillingModePlatformManaged {
+		t.Fatalf("platform-managed model must stay platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
+	}
+	// THE FIX: must NOT report usable credential when none was injected.
+	if res.HasUsableLLMCred {
+		t.Fatalf("empty proxy env → HasUsableLLMCred must be false (fail-closed), got true — the #2162 dark-wedge class")
+	}
+	// No credential env must be present.
+	if _, present := envVars["ANTHROPIC_API_KEY"]; present {
+		t.Errorf("empty proxy env must NOT inject ANTHROPIC_API_KEY")
+	}
+	if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
+		t.Errorf("empty proxy env must NOT inject MOLECULE_LLM_USAGE_TOKEN")
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations: %v", err)
+	}
+}
+
+// TestApplyPlatformManagedLLMEnv_ProxyEnvPresentInjectsCredential is the
+// positive-path pair to the #2162 regression guard: when the CP proxy env IS
+// present, the platform-managed path must inject ANTHROPIC_API_KEY +
+// ANTHROPIC_BASE_URL for an Anthropic-native runtime and report
+// HasUsableLLMCred=true.
+func TestApplyPlatformManagedLLMEnv_ProxyEnvPresentInjectsCredential(t *testing.T) {
+	ctx := context.Background()
+	const wsID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
+
+	mock := setupTestDB(t)
+	expectOverrideQuery(mock, wsID, "")
+
+	envVars := map[string]string{}
+	// Simulate the CP proxy env being present (as it is in production).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.moleculesai.app/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "PLATFORM-PROXY-TOKEN")
+
+	res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "moonshot/kimi-k2.6", nil)
+
+	if res.ResolvedMode != LLMBillingModePlatformManaged {
+		t.Fatalf("expected platform_managed, got %q", res.ResolvedMode)
+	}
+	if !res.HasUsableLLMCred {
+		t.Fatalf("proxy env present → HasUsableLLMCred must be true, got false")
+	}
+	if envVars["ANTHROPIC_API_KEY"] != "PLATFORM-PROXY-TOKEN" {
+		t.Errorf("ANTHROPIC_API_KEY must be injected with the platform proxy token; got %q", envVars["ANTHROPIC_API_KEY"])
+	}
+	if envVars["ANTHROPIC_BASE_URL"] != "https://api.moleculesai.app/api/v1/internal/llm/anthropic/v1" {
+		t.Errorf("ANTHROPIC_BASE_URL must be injected with the platform anthropic proxy; got %q", envVars["ANTHROPIC_BASE_URL"])
+	}
+	if err := mock.ExpectationsWereMet(); err != nil {
+		t.Errorf("sqlmock expectations: %v", err)
+	}
+}
@@ -93,3 +93,16 @@ func formatMissingBYOKCredentialError(mode string) string {
 		mode,
 	)
 }
+
+// formatMissingPlatformProxyError builds the user-facing message for a
+// provision failure caused by a platform-managed workspace whose control-plane
+// proxy environment is absent (#2162). The platform-managed path requires
+// MOLECULE_LLM_BASE_URL + MOLECULE_LLM_USAGE_TOKEN (or their OPENAI_*
+// fallbacks) to inject a usable credential; without them the workspace must
+// NOT start credential-less.
+func formatMissingPlatformProxyError() string {
+	return "this workspace is configured for platform-managed LLM billing but the control-plane proxy is not ready. " +
+		"The required platform proxy env (MOLECULE_LLM_BASE_URL + MOLECULE_LLM_USAGE_TOKEN) is absent. " +
+		"This is usually a transient boot-race; retry in 30 seconds. If it persists, verify the platform proxy " +
+		"is configured for this tenant/runtime and contact the platform team."
+}
@@ -1003,12 +1003,13 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
 	anthropicBaseURL := firstNonEmptyEnv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "ANTHROPIC_BASE_URL")
 	token := firstNonEmptyEnv("MOLECULE_LLM_USAGE_TOKEN", "OPENAI_API_KEY")
 	if baseURL == "" || token == "" {
-		// Proxy not configured (boot race / misconfig). On the platform_managed
-		// path the workspace IS entitled to platform creds, so we do NOT strip
-		// here — but we report HasUsableLLMCred from whatever survived so the
-		// caller's fail-closed branch (non-platform only) is never reached on
-		// this path.
-		return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: true, Source: res.Source}
+		// Proxy not configured (boot race / misconfig). The platform_managed
+		// path REQUIRES the CP proxy env to inject a usable credential.
+		// Reporting HasUsableLLMCred=true here would start the workspace
+		// credential-less — the adk-demo dark-wedge class (#2162).
+		// Return false so the caller's fail-closed branch aborts with
+		// MISSING_PLATFORM_PROXY.
+		return platformLLMEnvResult{ResolvedMode: res.ResolvedMode, HasUsableLLMCred: false, Source: res.Source}
 	}
 	stripPlatformManagedLLMBypassEnv(envVars)

@@ -134,6 +134,11 @@ func TestProvisionWorkspaceAuto_NoBackendMarksFailed(t *testing.T) {
 // This is the regression-prevention test for the Design Director bug
 // where 7-of-7 sub-agents went down the Docker path on SaaS.
 func TestProvisionWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) {
+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
+
 	mock := setupTestDB(t)
 	mock.MatchExpectationsInOrder(false)

@@ -597,6 +602,11 @@ func TestNoCallSiteCallsBareStop(t *testing.T) {
 // count without mocking out the retry helper itself, which would
 // invert the test contract — the retry IS the dispatcher's job here).
 func TestRestartWorkspaceAuto_RoutesToCPWhenSet(t *testing.T) {
+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
+
 	rec := &trackingCPProv{}
 	bcast := &concurrentSafeBroadcaster{}
 	h := NewWorkspaceHandler(bcast, nil, "http://localhost:8080", t.TempDir())
@@ -795,6 +805,11 @@ func TestResumeHandler_UsesProvisionWorkspaceAuto(t *testing.T) {
 // the async tests; the absence of `go` semantics is the load-bearing
 // distinction we're pinning.
 func TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet(t *testing.T) {
+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
+
 	mock := setupTestDB(t)
 	mock.MatchExpectationsInOrder(false)
 	// provisionWorkspaceCP runs prepareProvisionContext synchronously, which
@@ -98,6 +98,11 @@ func (r *recordingCPProv) startedSet() map[string]struct{} {
 func TestProvisionWorkspaceCP_ConcurrentBurst_NoSilentDrop(t *testing.T) {
 	const numWorkspaces = 7

+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
+
 	mock := setupTestDB(t)

 	// Every goroutine runs prepareProvisionContext → mintWorkspaceSecrets
@@ -230,6 +230,18 @@ func (h *WorkspaceHandler) prepareProvisionContext(
 			Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "1994"},
 		}
 	}
+	// Fail closed for a platform-managed workspace whose CP proxy env is
+	// absent: do NOT start it credential-less (adk-demo dark-wedge class,
+	// #2162). The platform_managed path requires the proxy injection to
+	// produce a usable credential.
+	if llmRes.ResolvedMode == LLMBillingModePlatformManaged && !llmRes.HasUsableLLMCred {
+		msg := formatMissingPlatformProxyError()
+		log.Printf("Provisioner: ABORT workspace=%s — platform-managed billing mode but CP proxy env absent (MISSING_PLATFORM_PROXY, molecule-core#2162)", workspaceID)
+		return nil, &provisionAbort{
+			Msg:   msg,
+			Extra: map[string]interface{}{"error": msg, "code": "MISSING_PLATFORM_PROXY", "billing_mode": llmRes.ResolvedMode, "issue": "2162"},
+		}
+	}
 	applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model)
 	if payload.Role != "" {
 		envVars["MOLECULE_AGENT_ROLE"] = payload.Role
@@ -264,6 +264,11 @@ func TestPrepareProvisionContext_ParentIDInjection(t *testing.T) {
 		},
 	}

+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
+
 	for _, tc := range cases {
 		t.Run(tc.name, func(t *testing.T) {
 			mock := setupTestDB(t)
@@ -331,6 +336,10 @@ func TestPrepareProvisionContext_InjectsGitHTTPCredsFromPersonaToken(t *testing.
 		}
 	}
 	t.Setenv("MOLECULE_PERSONA_ROOT", root)
+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")

 	cases := []struct {
 		name         string
@@ -459,6 +468,10 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing.
 		t.Fatal(err)
 	}
 	t.Setenv("MOLECULE_PERSONA_ROOT", root)
+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")

 	mock := setupTestDB(t)
 	mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
@@ -1424,6 +1424,11 @@ func (s *stubFailingCPProv) IsRunning(_ context.Context, _ string) (bool, error)
 // the broadcast payload would surface every marker; the canned
 // "provisioning failed" message must surface none of them.
 func TestProvisionWorkspaceCP_NoInternalErrorsInBroadcast(t *testing.T) {
+	// Supply the CP proxy env so the platform-managed default does not abort
+	// with MISSING_PLATFORM_PROXY (molecule-core#2162).
+	t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
+	t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
+
 	mock := setupTestDB(t)

 	// loadWorkspaceSecrets queries global_secrets and workspace_secrets
Author	SHA1	Message	Date
Molecule AI Dev Engineer A (Kimi)	f12c38b3f6	chore(dead-code): remove unused QueueDepth function QueueDepth was added for Phase 2/3 busy-return response visibility but was never wired to a caller. The inline depth query in EnqueueA2A serves today's enqueue response, making this function dead code. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-03 07:23:28 +00:00
core-be	9aafcf7ad3	Merge pull request 'fix(provision): platform-managed workspace must fail-closed when CP proxy env absent (#2162 )' (#2164 ) from fix/2162-platform-managed-fail-closed-missing-proxy into main ci-arm64-advisory / fast-checks (push) Waiting to run Details CI / Python Lint & Test (push) Successful in 3s Details Block internal-flavored paths / Block forbidden paths (push) Successful in 8s Details Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (push) Failing after 1s Details E2E Chat / detect-changes (push) Successful in 7s Details CI / Detect changes (push) Successful in 13s Details Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (push) Successful in 4s Details E2E API Smoke Test / detect-changes (push) Successful in 13s Details E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 11s Details Secret scan / Scan diff for credential-shaped strings (push) Successful in 4s Details Handlers Postgres Integration / detect-changes (push) Successful in 9s Details Harness Replays / detect-changes (push) Successful in 9s Details CI / Canvas (Next.js) (push) Successful in 1s Details E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 1s Details Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (push) Successful in 10s Details CI / Canvas Deploy Reminder (push) Successful in 1s Details E2E Staging SaaS (full lifecycle) / pr-validate (push) Successful in 28s Details CI / Shellcheck (E2E scripts) (push) Successful in 30s Details Harness Replays / Harness Replays (push) Successful in 59s Details Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 1m7s Details E2E API Smoke Test / E2E API Smoke Test (push) Successful in 2m17s Details publish-workspace-server-image / build-and-push (push) Successful in 3m13s Details E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (push) Failing after 4m24s Details E2E Chat / E2E Chat (push) Successful in 5m5s Details CI / Platform (Go) (push) Successful in 5m53s Details CI / all-required (push) Successful in 8s Details publish-workspace-server-image / Production auto-deploy (push) Successful in 5m40s Details	2026-06-03 06:21:10 +00:00
Molecule AI Dev Engineer A (Kimi)	9a28c88682	test(provision): supply CP proxy env in auto-routing tests (#2162 ) ci-arm64-advisory / fast-checks (pull_request) Waiting to run Details Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 3s Details CI / Python Lint & Test (pull_request) Successful in 2s Details Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Failing after 2s Details Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 3s Details E2E API Smoke Test / detect-changes (pull_request) Successful in 7s Details Handlers Postgres Integration / detect-changes (pull_request) Successful in 8s Details Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 4s Details gate-check-v3 / gate-check (pull_request_target) Successful in 3s Details qa-review / approved (pull_request_target) Failing after 3s Details sop-checklist / review-refire (pull_request_target) Has been skipped Details sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2 Details sop-checklist / na-declarations (pull_request) N/A: (none) Details sop-checklist / all-items-acked (pull_request_target) Successful in 3s Details sop-tier-check / tier-check (pull_request_target) Successful in 3s Details E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 22s Details E2E Chat / detect-changes (pull_request) Successful in 23s Details Harness Replays / detect-changes (pull_request) Successful in 21s Details Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 21s Details CI / Detect changes (pull_request) Successful in 25s Details E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 24s Details CI / Shellcheck (E2E scripts) (pull_request) Successful in 1s Details CI / Canvas (Next.js) (pull_request) Successful in 8s Details Harness Replays / Harness Replays (pull_request) Successful in 7s Details E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 7s Details E2E Chat / E2E Chat (pull_request) Successful in 9s Details CI / Canvas Deploy Reminder (pull_request) Has been skipped Details lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m0s Details E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 51s Details Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m1s Details E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 3m58s Details CI / Platform (Go) (pull_request) Successful in 4m41s Details CI / all-required (pull_request) Successful in 2s Details qa-review / approved (pull_request_review) Has been skipped Details security-review / approved (pull_request_review) Has been skipped Details sop-tier-check / tier-check (pull_request_review) Successful in 6s Details security-review / approved (pull_request_target) Refired via /security-recheck by unknown Details audit-force-merge / audit (pull_request_target) Successful in 9s Details Three auto-routing tests (TestProvisionWorkspaceAuto_RoutesToCPWhenSet, TestRestartWorkspaceAuto_RoutesToCPWhenSet, TestProvisionWorkspaceAutoSync_RoutesToCPWhenSet) use models.CreateWorkspacePayload with Runtime="claude-code" and empty Model. This now derives to platform_managed billing mode, which fails closed with MISSING_PLATFORM_PROXY when the CP proxy env is absent. Supply the proxy env via t.Setenv so the tests reach the CP provisioner stub instead of aborting early. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-03 05:54:23 +00:00
Molecule AI Dev Engineer A (Kimi)	334d485efc	test(provision): supply CP proxy env in tests that hit platform-managed default ci-arm64-advisory / fast-checks (pull_request) Waiting to run Details CI / Python Lint & Test (pull_request) Successful in 3s Details Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 4s Details E2E Chat / detect-changes (pull_request) Successful in 6s Details Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Failing after 1s Details Handlers Postgres Integration / detect-changes (pull_request) Successful in 6s Details Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 3s Details gate-check-v3 / gate-check (pull_request_target) Successful in 3s Details Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 12s Details qa-review / approved (pull_request_target) Failing after 3s Details sop-checklist / review-refire (pull_request_target) Has been skipped Details sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2 Details sop-checklist / na-declarations (pull_request) N/A: (none) Details sop-checklist / all-items-acked (pull_request_target) Successful in 4s Details security-review / approved (pull_request_target) Failing after 4s Details sop-tier-check / tier-check (pull_request_target) Successful in 5s Details E2E Chat / E2E Chat (pull_request) Successful in 8s Details Harness Replays / detect-changes (pull_request) Successful in 16s Details Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 15s Details CI / Detect changes (pull_request) Successful in 27s Details E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 26s Details E2E API Smoke Test / detect-changes (pull_request) Successful in 27s Details Harness Replays / Harness Replays (pull_request) Successful in 6s Details E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 28s Details CI / Canvas (Next.js) (pull_request) Successful in 1s Details CI / Canvas Deploy Reminder (pull_request) Has been skipped Details CI / Shellcheck (E2E scripts) (pull_request) Successful in 4s Details E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 4s Details lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 59s Details Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m1s Details E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m57s Details E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 4m19s Details CI / Platform (Go) (pull_request) Failing after 4m15s Details CI / all-required (pull_request) Has been skipped Details The #2162 fix adds a MISSING_PLATFORM_PROXY abort when a platform-managed workspace has no CP proxy env. Five existing tests call prepareProvisionContext or provisionWorkspaceCP with a payload that resolves to platform_managed but do not set MOLECULE_LLM_BASE_URL / MOLECULE_LLM_USAGE_TOKEN, causing them to abort early and fail their assertions. Add the proxy env to: - TestPrepareProvisionContext_ParentIDInjected - TestPrepareProvisionContext_InjectsGitHTTPCredsFromPersonaToken - TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken - TestProvisionWorkspaceCP_NoInternalErrorsInBroadcast - TestProvisionWorkspaceCP_ConcurrentBurst_NoSilentDrop Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-03 05:40:44 +00:00
Molecule AI Dev Engineer A (Kimi)	55e201157a	fix(provision): platform-managed workspace must fail-closed when CP proxy env absent (#2162 ) ci-arm64-advisory / fast-checks (pull_request) Waiting to run Details Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 3s Details CI / Python Lint & Test (pull_request) Successful in 10s Details Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Failing after 3s Details Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 7s Details Handlers Postgres Integration / detect-changes (pull_request) Successful in 11s Details Harness Replays / detect-changes (pull_request) Successful in 10s Details CI / Detect changes (pull_request) Successful in 16s Details Lint forbidden tenant-env keys / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 9s Details E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 17s Details qa-review / approved (pull_request_target) Failing after 3s Details E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 28s Details E2E API Smoke Test / detect-changes (pull_request) Successful in 30s Details E2E Chat / detect-changes (pull_request) Successful in 30s Details Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 16s Details sop-checklist / review-refire (pull_request_target) Has been skipped Details security-review / approved (pull_request_target) Failing after 3s Details sop-tier-check / tier-check (pull_request_target) Successful in 4s Details gate-check-v3 / gate-check (pull_request_target) Successful in 18s Details Harness Replays / Harness Replays (pull_request) Successful in 2s Details sop-checklist / all-items-acked (pull_request) acked: 0/7 — missing: comprehensive-testing, local-postgres-e2e, staging-smoke, +4 — body-unfilled: comprehensive-testing, local-postgres-e2 Details sop-checklist / na-declarations (pull_request) N/A: (none) Details sop-checklist / all-items-acked (pull_request_target) Successful in 11s Details CI / Shellcheck (E2E scripts) (pull_request) Successful in 1s Details E2E Chat / E2E Chat (pull_request) Successful in 2s Details CI / Canvas (Next.js) (pull_request) Successful in 8s Details CI / Canvas Deploy Reminder (pull_request) Has been skipped Details E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 11s Details lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m23s Details Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m1s Details E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m56s Details CI / Platform (Go) (pull_request) Failing after 4m37s Details CI / all-required (pull_request) Has been skipped Details E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Failing after 8m39s Details applyPlatformManagedLLMEnv falsely reported HasUsableLLMCred:true when MOLECULE_LLM_BASE_URL + MOLECULE_LLM_USAGE_TOKEN were empty, causing claude-code workspaces to boot credential-less and hit the 600s provision-timeout sweep (adk-demo dark-wedge class). Fix: - Empty-proxy-env path returns HasUsableLLMCred:false (was true). - Caller aborts with MISSING_PLATFORM_PROXY, symmetric to the BYOK MISSING_BYOK_CREDENTIAL hard-fail. - User-facing error message explains the boot-race and retry path. Regression tests: - TestApplyPlatformManagedLLMEnv_MissingProxyEnvFailClosed: asserts HasUsableLLMCred=false when proxy env absent. - TestApplyPlatformManagedLLMEnv_ProxyEnvPresentInjectsCredential: asserts ANTHROPIC_API_KEY + ANTHROPIC_BASE_URL injected when proxy env present. Refs: #2162, #711 (BYOK fail-closed pattern), #1994 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-06-03 02:04:35 +00:00