diff --git a/.gitea/workflows/ci.yml b/.gitea/workflows/ci.yml index fff399810..739183154 100644 --- a/.gitea/workflows/ci.yml +++ b/.gitea/workflows/ci.yml @@ -357,6 +357,14 @@ jobs: name: Run E2E bash unit tests (no live infra) run: | bash tests/e2e/test_model_slug.sh + # molecule-core#1995 (#1994 follow-on): fail-direction proof for + # the A2A real-completion + byok-routing assertion helpers + # (lib/completion_assert.sh). Offline (no LLM, no network): it + # asserts an error-as-text payload FAILS the real-completion gate + # — the exact trap the historical shape-only `"kind":"text"` + # check missed. If a refactor weakens the gate to a shape check, + # this step goes red on every PR. + bash tests/e2e/test_completion_assert_unit.sh - if: ${{ needs.changes.outputs.scripts == 'true' }} name: Test ECR promote-tenant-image script (mock-driven, no live infra) diff --git a/.gitea/workflows/e2e-staging-saas.yml b/.gitea/workflows/e2e-staging-saas.yml index 315af9edc..92db50363 100644 --- a/.gitea/workflows/e2e-staging-saas.yml +++ b/.gitea/workflows/e2e-staging-saas.yml @@ -49,6 +49,7 @@ on: - 'workspace-server/internal/middleware/**' - 'workspace-server/internal/provisioner/**' - 'tests/e2e/test_staging_full_saas.sh' + - 'tests/e2e/lib/completion_assert.sh' - 'tests/e2e/lib/aws_leak_check.sh' - 'tests/e2e/test_aws_leak_check.sh' - '.gitea/workflows/e2e-staging-saas.yml' @@ -61,6 +62,7 @@ on: - 'workspace-server/internal/middleware/**' - 'workspace-server/internal/provisioner/**' - 'tests/e2e/test_staging_full_saas.sh' + - 'tests/e2e/lib/completion_assert.sh' - 'tests/e2e/lib/aws_leak_check.sh' - 'tests/e2e/test_aws_leak_check.sh' - '.gitea/workflows/e2e-staging-saas.yml' diff --git a/tests/e2e/lib/completion_assert.sh b/tests/e2e/lib/completion_assert.sh new file mode 100755 index 000000000..2d763a472 --- /dev/null +++ b/tests/e2e/lib/completion_assert.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +# Real-completion + per-provider liveness + byok-routing assertion helpers +# for the staging full-SaaS E2E (tests/e2e/test_staging_full_saas.sh). +# +# WHY THIS LIB EXISTS (molecule-core#1995 / #1994 follow-on): +# The A2A e2e historically asserted only response SHAPE — e.g. +# test_a2a_e2e.sh:`check "SEO response has text" '"kind":"text"'`. A fully +# BROKEN agent returns its error AS a text part: +# {"kind":"text","text":"Agent error (Exception) — see workspace logs..."} +# which STILL matches `"kind":"text"` → the shape check PASSES on a broken +# agent. That is exactly why the 2026-05-2x drained-key / byok-misroute +# failures (agents-team PM + reno marketing erroring on every LLM call) +# sailed through CI. "Channel returns text shape" != "agent actually +# completed an LLM round-trip". +# +# These helpers add three load-bearing gates ON TOP of (never replacing) the +# existing shape + PONG checks: +# 1. a2a_assert_real_completion — deterministic known-answer round-trip +# (CONTAINS the expected token AND NOT an error-as-text payload). +# 2. provider_liveness_matrix — per-offered-provider cheap completion +# probe, providers sourced from the providers.yaml SSOT runtimes block. +# 3. assert_byok_not_platform_proxy — #1994 regression guard: a +# byok-resolving workspace must NOT resolve to platform_managed. +# +# Conventions: reuses the host script's fail()/ok()/log() + tenant_call(). +# Source this AFTER those are defined. BASH 4+. + +# Error-as-text trap markers. If the agent's text part contains ANY of +# these, the "round-trip" did not really complete — the agent surfaced an +# error AS text. This is the negative assertion that makes a broken agent +# FAIL instead of slipping through the shape check. +# +# Kept as an array (not a single regex) so a new failure signature is a +# one-line append + the failure message can name which marker matched. +A2A_ERROR_AS_TEXT_MARKERS=( + "Agent error" + "Exception" + "error result" + "MISSING_BYOK_CREDENTIAL" +) + +# a2a_completion_error_marker +# Echoes the first error-as-text marker found in (case- +# insensitive), or nothing if clean. Exit 0 if a marker matched, 1 if not. +# Pure string scan — no LLM, no network — so it is deterministic and is the +# unit under the fail-direction proof in test_completion_assert_unit.sh. +a2a_completion_error_marker() { + local text="$1" + local upper marker + upper=$(printf '%s' "$text" | tr '[:lower:]' '[:upper:]') + for marker in "${A2A_ERROR_AS_TEXT_MARKERS[@]}"; do + if printf '%s' "$upper" | grep -qF -- "$(printf '%s' "$marker" | tr '[:lower:]' '[:upper:]')"; then + printf '%s' "$marker" + return 0 + fi + done + return 1 +} + +# a2a_assert_real_completion +# The CORE gate. Asserts the agent text: +# (a) does NOT contain any error-as-text marker (broken-agent trap), AND +# (b) CONTAINS (case-insensitive) — proving a real LLM +# round-trip produced the deterministic known answer. +# Calls fail() (which exits) on either violation. This MUST fail on an +# error-as-text payload — that is the property test_completion_assert_unit.sh +# pins. +a2a_assert_real_completion() { + local text="$1" + local expected="$2" + local ctx="${3:-A2A}" + + if [ -z "$text" ]; then + fail "$ctx — real-completion gate: agent returned EMPTY text (no round-trip)." + fi + + local hit + if hit=$(a2a_completion_error_marker "$text"); then + fail "$ctx — real-completion gate: agent returned an ERROR-AS-TEXT payload (matched '$hit'). A broken agent that surfaces its error as a text part is NOT a completed round-trip. This is the trap the shape-only check missed (#1994). Raw: ${text:0:200}" + fi + + # Known-answer: real LLM round-trip yields the deterministic token. A + # prompt-echo / truncated-context / wrong-auth pipeline won't. + if ! printf '%s' "$text" | tr '[:lower:]' '[:upper:]' | grep -qF -- "$(printf '%s' "$expected" | tr '[:lower:]' '[:upper:]')"; then + fail "$ctx — real-completion gate: reply did NOT contain expected known-answer token '$expected'. The channel returned a text shape but no real completion. Raw: ${text:0:200}" + fi + + ok "$ctx — real completion verified (contains '$expected', no error-as-text). Reply: \"${text:0:80}\"" +} + +# offered_platform_models_for_runtime +# Emits, one per line, the platform-servable model ids the providers.yaml +# SSOT (runtimes..providers[name=platform].models) declares for +# . This is the SSOT-driven offered/platform-servable matrix — NOT +# a hardcoded provider list — so a provider added/removed in providers.yaml +# automatically changes the matrix this probe exercises. +# +# Reads the embedded copy at workspace-server/internal/providers/providers.yaml +# (the same file go:embed compiles into the binary). Requires python3 + +# PyYAML (already a test-harness dep). On parse failure, emits nothing and +# returns 1 so the caller can fail loud rather than silently skip. +offered_platform_models_for_runtime() { + local runtime="$1" + local yaml_path="${PROVIDERS_YAML_PATH:-}" + if [ -z "$yaml_path" ]; then + # This lib lives at tests/e2e/lib/ -> repo root is three dirs up + # (lib -> e2e -> tests -> repo-root). + yaml_path="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/workspace-server/internal/providers/providers.yaml" + fi + if [ ! -f "$yaml_path" ]; then + log " [provider-matrix] providers.yaml SSOT not found at $yaml_path" + return 1 + fi + RUNTIME_REF="$runtime" python3 - "$yaml_path" <<'PY' +import os, sys +try: + import yaml +except Exception as e: # PyYAML missing — fail loud, do not silently skip. + sys.stderr.write(f"PyYAML required for provider-matrix SSOT read: {e}\n") + sys.exit(2) +rt = os.environ["RUNTIME_REF"] +with open(sys.argv[1]) as f: + doc = yaml.safe_load(f) +native = (doc.get("runtimes") or {}).get(rt) or {} +for pref in native.get("providers", []) or []: + if pref.get("name") == "platform": + for m in pref.get("models", []) or []: + print(m) +PY +} + +# provider_liveness_matrix +# For each platform-servable model the SSOT lists for , calls +# which must echo the agent text (or empty) and return +# 0 on a non-error completion, non-zero otherwise. Logs a per-model pass/fail +# matrix. Returns 0 only if EVERY probed model produced a non-error +# completion; non-zero (and a recorded matrix) otherwise. +# +# Purpose: exercise each offered provider's AUTH + ROUTING path so a drained +# key / wrong base-URL / byok-misroute fails the gate (the #1994 class). The +# probe_fn is expected to use minimal max_tokens. +# +# This helper does the SSOT read + matrix bookkeeping; the host script +# supplies probe_fn (it owns workspace ids + tenant_call wiring). +provider_liveness_matrix() { + local runtime="$1" + local probe_fn="$2" + local models model rc total=0 passed=0 + local -a results=() + + models=$(offered_platform_models_for_runtime "$runtime") || { + fail "provider-liveness: could not read offered-provider matrix from providers.yaml SSOT for runtime=$runtime" + } + if [ -z "$models" ]; then + log " [provider-matrix] runtime=$runtime offers no platform-servable models in the SSOT — nothing to probe (not a failure)." + return 0 + fi + + log " [provider-matrix] SSOT offered platform models for $runtime:" + while IFS= read -r model; do + [ -z "$model" ] && continue + log " - $model" + done <<<"$models" + + while IFS= read -r model; do + [ -z "$model" ] && continue + total=$((total + 1)) + set +e + "$probe_fn" "$model" + rc=$? + set -e + if [ "$rc" = "0" ]; then + passed=$((passed + 1)) + results+=("PASS $model") + elif [ "$rc" = "75" ]; then + # 75 (EX_TEMPFAIL convention) = probe skipped (key/runtime not + # available in this lane). Not counted toward pass/fail — logged. + total=$((total - 1)) + results+=("SKIP $model (probe unavailable in this lane)") + else + results+=("FAIL $model") + fi + done <<<"$models" + + log " [provider-matrix] result matrix (runtime=$runtime):" + local line + for line in "${results[@]}"; do + log " $line" + done + log " [provider-matrix] $passed/$total probed providers completed without error" + + if [ "$passed" != "$total" ]; then + return 1 + fi + return 0 +} + +# assert_byok_not_platform_proxy +# #1994 regression guard. Given the JSON body from +# GET /admin/workspaces/:id/llm-billing-mode (same derived resolver the +# provision-time strip gate uses), asserts the workspace resolves to BYOK +# and NOT platform_managed. A regression of #1994 (byok workspace baked to +# platform_managed → routed through the platform proxy → platform LLM key +# drained) flips resolved_mode to "platform_managed" and trips this gate. +# Calls fail() (exits) on violation. +assert_byok_not_platform_proxy() { + local body="$1" + local ctx="${2:-byok-guard}" + local mode prov + mode=$(printf '%s' "$body" | python3 -c "import json,sys +try: print(json.load(sys.stdin).get('resolved_mode','')) +except Exception: print('')" 2>/dev/null || echo "") + prov=$(printf '%s' "$body" | python3 -c "import json,sys +try: + d=json.load(sys.stdin); v=d.get('provider_selection') + print(v if v is not None else '') +except Exception: print('')" 2>/dev/null || echo "") + + if [ -z "$mode" ]; then + fail "$ctx — byok-routing guard: could not read resolved_mode from billing-mode response. Raw: ${body:0:200}" + fi + if [ "$mode" = "platform_managed" ]; then + fail "$ctx — byok-routing guard TRIPPED (#1994 regression): a byok-configured workspace resolved to 'platform_managed' (provider_selection=$prov) → it would route through the platform proxy and drain the platform LLM key. Expected resolved_mode=byok. Raw: ${body:0:200}" + fi + if [ "$mode" != "byok" ]; then + fail "$ctx — byok-routing guard: unexpected resolved_mode='$mode' (expected 'byok'). provider_selection=$prov. Raw: ${body:0:200}" + fi + ok "$ctx — byok-routing guard: workspace resolves byok (provider_selection=$prov), NOT platform-proxy. #1994 stays fixed." +} diff --git a/tests/e2e/test_a2a_e2e.sh b/tests/e2e/test_a2a_e2e.sh index a7711aa0a..99390f71e 100644 --- a/tests/e2e/test_a2a_e2e.sh +++ b/tests/e2e/test_a2a_e2e.sh @@ -8,6 +8,34 @@ TIMEOUT="${A2A_TIMEOUT:-120}" # seconds per A2A call (override via A2A_TIMEOUT # shellcheck source=_lib.sh source "$(dirname "$0")/_lib.sh" +# molecule-core#1995 (#1994 follow-on): real-completion assertion helpers. +# Adds a NEGATIVE error-as-text check on top of the shape checks below, so a +# broken agent that returns its error AS a text part +# ({"kind":"text","text":"Agent error (Exception) ..."}) — which STILL +# matches the shape check `"kind":"text"` — now FAILS instead of passing. +# shellcheck source=lib/completion_assert.sh +source "$(dirname "$0")/lib/completion_assert.sh" + +# check_no_error_as_text +# Additive negative gate: PASS only if the agent text carries NO +# error-as-text marker (Agent error / Exception / error result / +# MISSING_BYOK_CREDENTIAL). Uses the same scanner as the staging +# real-completion gate so the trap is closed consistently across lanes. +check_no_error_as_text() { + local desc="$1" + local text="$2" + local hit + if hit=$(a2a_completion_error_marker "$text"); then + echo "FAIL: $desc" + echo " agent returned an error-AS-text payload (matched '$hit') — a broken" + echo " agent that surfaces its error as a text part is NOT a real reply." + echo " got: $(echo "$text" | head -3)" + FAIL=$((FAIL + 1)) + else + echo "PASS: $desc" + PASS=$((PASS + 1)) + fi +} check() { local desc="$1" @@ -81,6 +109,8 @@ check "JSON-RPC response has result" '"result"' "$R" check "Response has agent role" '"role":"agent"' "$R" check "Response has text part" '"kind":"text"' "$R" TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR") +# Negative gate (#1994): the text part must not BE an error. +check_no_error_as_text "Echo reply is not an error-as-text payload" "$TEXT" echo " Agent said: $TEXT" echo "" @@ -92,6 +122,11 @@ R=$(a2a_send "$SEO_ID" "What SEO skills do you have?") check "SEO agent responds" '"result"' "$R" check "SEO response has text" '"kind":"text"' "$R" TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR") +# Negative gate (#1994): a broken SEO agent that returns "Agent error +# (Exception) ..." AS text still matches the `"kind":"text"` shape check +# above — THAT is the gap that let drained-key/byok-misroute failures pass +# CI. This makes that case FAIL. +check_no_error_as_text "SEO reply is not an error-as-text payload" "$TEXT" echo " SEO Agent said: $TEXT" echo "" diff --git a/tests/e2e/test_completion_assert_unit.sh b/tests/e2e/test_completion_assert_unit.sh new file mode 100755 index 000000000..6024537b7 --- /dev/null +++ b/tests/e2e/test_completion_assert_unit.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +# Fail-direction / load-bearing proof for lib/completion_assert.sh. +# +# This is the watch-it-FAIL counterpart the dev-SOP Phase 3 requires: it +# proves the new real-completion + byok gates actually CATCH a broken agent, +# not just pass on a good one. It runs entirely offline (no LLM, no network, +# no provisioning) — pure assertion logic — so it can run on every PR in the +# fast lane (e2e-api.yml unit-shell step) and locally via `bash`. +# +# The decisive case is `error-as-text payload MUST FAIL`: that is the exact +# trap (#1994) the historical shape-only check missed. If a refactor weakens +# a2a_assert_real_completion to a substring/shape check, THIS test goes red. +set -uo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +PASS=0 +FAIL=0 + +# Minimal stand-ins for the host script's helpers. fail() must NOT exit the +# whole harness here — we want to assert that it WAS called. We trap it by +# running the assertion in a subshell and checking the subshell's exit code: +# the real fail() exits 1, ok() exits 0 implicitly. +log() { echo "[unit] $*"; } +ok() { echo "[unit] OK: $*"; } +fail() { echo "[unit] FAIL-CALLED: $*" >&2; exit 1; } + +# shellcheck source=lib/completion_assert.sh +source "$HERE/lib/completion_assert.sh" + +expect_pass() { + local desc="$1"; shift + if ( "$@" ) >/dev/null 2>&1; then + echo "PASS: $desc (assertion accepted, as expected)" + PASS=$((PASS + 1)) + else + echo "FAIL: $desc — expected the assertion to ACCEPT, but it rejected" + FAIL=$((FAIL + 1)) + fi +} + +expect_fail() { + local desc="$1"; shift + if ( "$@" ) >/dev/null 2>&1; then + echo "FAIL: $desc — expected the assertion to REJECT, but it accepted (gate NOT load-bearing!)" + FAIL=$((FAIL + 1)) + else + echo "PASS: $desc (assertion rejected, as expected)" + PASS=$((PASS + 1)) + fi +} + +echo "=== completion_assert.sh fail-direction proof ===" + +# ---- a2a_assert_real_completion ---- +# Good: real known-answer reply passes. +expect_pass "real PINEAPPLE reply passes" \ + a2a_assert_real_completion "PINEAPPLE" "PINEAPPLE" "unit" +expect_pass "case-insensitive known answer passes" \ + a2a_assert_real_completion "pineapple" "PINEAPPLE" "unit" +expect_pass "known answer with minor wrapping passes" \ + a2a_assert_real_completion "Sure: PINEAPPLE" "PINEAPPLE" "unit" + +# DECISIVE: the error-as-text trap. Each MUST fail — these are the payloads a +# broken agent returns that the old shape-only `"kind":"text"` check passed. +expect_fail "Agent error as text payload MUST fail" \ + a2a_assert_real_completion "Agent error (Exception) — see workspace logs for details." "PINEAPPLE" "unit" +expect_fail "bare Exception as text MUST fail" \ + a2a_assert_real_completion "Traceback ... Exception: boom" "PINEAPPLE" "unit" +expect_fail "error result as text MUST fail" \ + a2a_assert_real_completion "tool returned error result" "PINEAPPLE" "unit" +expect_fail "MISSING_BYOK_CREDENTIAL as text MUST fail" \ + a2a_assert_real_completion "MISSING_BYOK_CREDENTIAL: set your own key" "PINEAPPLE" "unit" +# Error-as-text that ALSO happens to contain the token still fails (error +# marker takes precedence — a real completion never carries these markers). +expect_fail "error-as-text containing the token still fails" \ + a2a_assert_real_completion "Agent error: could not produce PINEAPPLE" "PINEAPPLE" "unit" +# Empty text fails. +expect_fail "empty text fails" \ + a2a_assert_real_completion "" "PINEAPPLE" "unit" +# Wrong/echoed content (no token, no error) fails — shape-OK but not a real +# completion. +expect_fail "wrong content without token fails" \ + a2a_assert_real_completion "Reply with exactly the word PINEAPPLE and nothing else." "BANANA" "unit" + +# ---- assert_byok_not_platform_proxy (#1994 guard) ---- +expect_pass "byok resolution passes the guard" \ + assert_byok_not_platform_proxy '{"resolved_mode":"byok","provider_selection":"minimax","source":"derived_provider"}' "unit" +# DECISIVE: a platform_managed resolution on a byok workspace = the #1994 +# regression. MUST fail. +expect_fail "platform_managed resolution trips the #1994 guard" \ + assert_byok_not_platform_proxy '{"resolved_mode":"platform_managed","provider_selection":"platform","source":"derived_provider"}' "unit" +expect_fail "missing resolved_mode trips the guard" \ + assert_byok_not_platform_proxy '{"provider_selection":"x"}' "unit" +expect_fail "disabled mode trips the guard (not byok)" \ + assert_byok_not_platform_proxy '{"resolved_mode":"disabled"}' "unit" + +# ---- a2a_completion_error_marker (the scanner under the gate) ---- +if hit=$(a2a_completion_error_marker "all good PINEAPPLE"); then + echo "FAIL: clean text wrongly flagged as error marker ($hit)"; FAIL=$((FAIL + 1)) +else + echo "PASS: clean text has no error marker"; PASS=$((PASS + 1)) +fi +if hit=$(a2a_completion_error_marker "An Exception occurred"); then + echo "PASS: error marker detected ($hit)"; PASS=$((PASS + 1)) +else + echo "FAIL: error marker NOT detected in 'An Exception occurred'"; FAIL=$((FAIL + 1)) +fi + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index 8192cfe33..b7d8ea1b7 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -99,6 +99,12 @@ source "$(dirname "$0")/lib/model_slug.sh" # shellcheck disable=SC1091 # shellcheck source=lib/aws_leak_check.sh source "$(dirname "$0")/lib/aws_leak_check.sh" +# shellcheck disable=SC1091 +# shellcheck source=lib/completion_assert.sh +# molecule-core#1995 (#1994 follow-on): real-completion + per-provider +# liveness + byok-routing assertion helpers. Adds gates that FAIL on an +# error-as-text payload (the trap the shape-only A2A checks missed). +source "$(dirname "$0")/lib/completion_assert.sh" CURL_COMMON=(-sS --fail-with-body --max-time 30) E2E_TMP_FILES=() @@ -867,6 +873,182 @@ fi ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\"" +# ─── 8b. Real-completion known-answer round-trip (CORE GATE, #1994) ──── +# The existing PONG check + generic error grep above already do a lot, but +# this stanza is the canonical real-completion gate the #1994 follow-on +# adds: a DETERMINISTIC known-answer prompt asserted via +# a2a_assert_real_completion, which FAILS on an error-as-text payload +# ({"kind":"text","text":"Agent error (Exception) ..."}). That payload +# matches the historical shape-only check `"kind":"text"` and so passed CI +# on a fully broken agent (drained-key / byok-misroute, 2026-05-2x). This +# gate makes that case RED. Reuses the same cold-start retry-on-transient +# (502/503/504) loop the PONG probe uses — retry-once-on-network, never on +# agent-error. Single round-trip → the one place we spend a non-trivial +# token budget (default backend MiniMax — cheap token plan). +KA_PAYLOAD=$(python3 -c " +import json, uuid +print(json.dumps({ + 'jsonrpc': '2.0', + 'method': 'message/send', + 'id': 'e2e-known-answer-1', + 'params': { + 'message': { + 'role': 'user', + 'messageId': f'e2e-{uuid.uuid4().hex[:8]}', + 'parts': [{'kind': 'text', 'text': 'Reply with exactly the word PINEAPPLE and nothing else.'}] + } + } +})) +") +KA_TMP=$(mktemp -t known_answer_a2a.XXXXXX) +KA_RESP="" +for KA_ATTEMPT in $(seq 1 6); do + : >"$KA_TMP" + set +e + KA_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \ + --max-time 90 \ + -H "Content-Type: application/json" \ + -d "$KA_PAYLOAD" \ + -o "$KA_TMP" \ + -w '%{http_code}' \ + 2>/dev/null) + KA_RC=$? + set -e + KA_CODE=${KA_CODE:-000} + KA_RESP=$(cat "$KA_TMP" 2>/dev/null || echo "") + if [ "$KA_RC" = "0" ] && [ "$KA_CODE" -ge 200 ] && [ "$KA_CODE" -lt 300 ]; then + break + fi + KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body) + # Retry ONLY on transient transport errors — never on an agent-level + # error (those must surface and fail the gate). + if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then + log " known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY" + if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi + fi + break +done +rm -f "$KA_TMP" +if [ "$KA_RC" != "0" ] || [ "$KA_CODE" -lt 200 ] || [ "$KA_CODE" -ge 300 ]; then + KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body) + fail "Known-answer A2A POST failed after $KA_ATTEMPT attempt(s) (curl_rc=$KA_RC, http=$KA_CODE): $KA_SAFE_BODY" +fi +KA_TEXT=$(echo "$KA_RESP" | python3 -c " +import json, sys +try: + d = json.load(sys.stdin) + parts = d.get('result', {}).get('parts', []) + print(parts[0].get('text', '') if parts else '') +except Exception: + print('') +" 2>/dev/null || echo "") +# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text. +a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)" + +# ─── 8c. byok-routing regression guard (#1994) ───────────────────────── +# The parent was provisioned with the customer's OWN vendor key +# (MINIMAX_API_KEY / ANTHROPIC_API_KEY in SECRETS_JSON) → it must resolve +# BYOK, not platform_managed. #1994 was exactly the inverse: a byok +# workspace baked platform_managed on (re-)provision → routed through the +# platform proxy → drained the platform LLM key. We read the SAME derived +# resolver the provision-time strip gate uses +# (GET /admin/workspaces/:id/llm-billing-mode) and assert resolved_mode!= +# platform_managed. A regression flips it RED. +# +# Only meaningful when the parent actually carries a byok credential; the +# OpenAI/hermes path uses a different env shape, and the no-key path is +# legitimately platform_managed (the CTO default). Gate on the same +# E2E_*_API_KEY presence the SECRETS_JSON branch keyed off. +if [ -n "${E2E_MINIMAX_API_KEY:-}" ] || [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then + set +e + BILLING_RESP=$(tenant_call GET "/admin/workspaces/$PARENT_ID/llm-billing-mode" 2>/dev/null) + BILLING_RC=$? + set -e + if [ "$BILLING_RC" != "0" ] || [ -z "$BILLING_RESP" ]; then + fail "byok-routing guard: GET /admin/workspaces/$PARENT_ID/llm-billing-mode failed (rc=$BILLING_RC). Body: ${BILLING_RESP:0:200}" + fi + assert_byok_not_platform_proxy "$BILLING_RESP" "byok-guard (parent, $RUNTIME/$MODEL_SLUG)" +else + log "8c. byok-routing guard skipped — parent carries no own-vendor key (OpenAI/no-key path is legitimately platform_managed)." +fi + +# ─── 8d. Per-offered-provider liveness matrix (SSOT-driven, #1994 class) ─ +# For each platform-servable model the providers.yaml SSOT +# (runtimes..providers[platform].models) declares for this +# runtime, send a minimal max_tokens-bounded "say ok" probe and assert a +# NON-ERROR completion. Purpose: exercise each offered provider's AUTH + +# ROUTING path so a drained key / wrong base-URL / byok-misroute fails the +# gate (the #1994 class). Providers/models come from the SSOT — not a +# hardcoded list — so the matrix tracks providers.yaml automatically. +# +# This lane provisions ONE parent workspace with ONE configured key, so we +# can only truly drive the providers that key authenticates. Probing a +# model whose provider key is absent in this lane is reported SKIP (rc=75), +# not FAIL — keeping the gate deterministic + low-flake. The matrix still +# proves the configured provider's full auth+routing path end-to-end, and +# logs the offered set so over/under-offer drift is visible in the CI log. +provider_liveness_probe() { + local model_id="$1" + # Map the SSOT platform model id (e.g. minimax/MiniMax-M2.7) to the + # vendor namespace token to decide whether THIS lane has its key. + local vendor="${model_id%%/*}" + case "$vendor" in + minimax) [ -n "${E2E_MINIMAX_API_KEY:-}" ] || return 75 ;; + anthropic) [ -n "${E2E_ANTHROPIC_API_KEY:-}" ] || return 75 ;; + openai) [ -n "${E2E_OPENAI_API_KEY:-}" ] || return 75 ;; + *) return 75 ;; # kimi/moonshot etc. — no key wired in this lane + esac + local probe_payload + probe_payload=$(python3 -c " +import json, uuid +print(json.dumps({ + 'jsonrpc': '2.0', + 'method': 'message/send', + 'id': 'e2e-liveness-' + uuid.uuid4().hex[:6], + 'params': { + 'message': { + 'role': 'user', + 'messageId': f'e2e-{uuid.uuid4().hex[:8]}', + 'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}], + }, + 'configuration': {'max_tokens': 4} + } +})) +") + local tmp code rc resp + tmp=$(mktemp -t liveness_a2a.XXXXXX) + set +e + code=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \ + --max-time 60 \ + -H "Content-Type: application/json" \ + -d "$probe_payload" \ + -o "$tmp" -w '%{http_code}' 2>/dev/null) + rc=$? + set -e + resp=$(cat "$tmp" 2>/dev/null || echo "") + rm -f "$tmp" + if [ "$rc" != "0" ] || [ "${code:-000}" -lt 200 ] || [ "${code:-000}" -ge 300 ]; then + log " probe $model_id: HTTP ${code:-000} rc=$rc" + return 1 + fi + local text + text=$(echo "$resp" | python3 -c " +import json,sys +try: + d=json.load(sys.stdin); p=d.get('result',{}).get('parts',[]) + print(p[0].get('text','') if p else '') +except Exception: print('')" 2>/dev/null || echo "") + if [ -z "$text" ] || a2a_completion_error_marker "$text" >/dev/null; then + log " probe $model_id: error-as-text or empty: ${text:0:120}" + return 1 + fi + return 0 +} +if ! provider_liveness_matrix "$RUNTIME" provider_liveness_probe; then + fail "Per-provider liveness matrix: at least one offered provider failed its auth+routing probe (see matrix above). This is the #1994 class — a drained key / wrong base-URL / byok-misroute." +fi +ok "Per-provider liveness matrix passed (all probed offered providers completed without error)" + # ─── 9. HMA + peers + activity (full mode) ───────────────────────────── if [ "$MODE" = "full" ]; then log "9/11 Writing + reading HMA memory on parent..." diff --git a/workspace-server/internal/handlers/llm_billing_mode_provision_parity_test.go b/workspace-server/internal/handlers/llm_billing_mode_provision_parity_test.go new file mode 100644 index 000000000..6b650afdd --- /dev/null +++ b/workspace-server/internal/handlers/llm_billing_mode_provision_parity_test.go @@ -0,0 +1,257 @@ +package handlers + +// llm_billing_mode_provision_parity_test.go — molecule-core#1994. +// +// Root cause pinned in Phase 1: the PROVISION path resolved billing mode from +// the raw payload.Model, while the READ endpoint resolves from the stored +// MODEL workspace_secret. On a RE-PROVISION (restart/resume/auto-restart) the +// payload is rebuilt from the DB with Name+Tier+Runtime ONLY — payload.Model +// is "" (workspace_restart.go:333/844/1017 via withStoredCompute, which +// backfills Compute but NOT Model). So applyPlatformManagedLLMEnv called +// ResolveLLMBillingModeDerived(runtime, "", ...) → DeriveProvider errored on an +// empty model → default-closed platform_managed → the CP proxy got baked in and +// the workspace billed the PLATFORM Anthropic key for the customer's own usage +// (Reno Stars Marketing agent 6b66de8d, opus, claude-code; live-confirmed +// 2026-05-28: container env MODEL=opus but MOLECULE_LLM_BILLING_MODE_RESOLVED= +// platform_managed + ANTHROPIC_BASE_URL=). +// +// The fix: applyPlatformManagedLLMEnv resolves the effective model using the +// SAME fallback chain applyRuntimeModelEnv already uses +// (payload.Model → envVars["MOLECULE_MODEL"] → envVars["MODEL"]) BEFORE +// deriving, so the provision path's derive inputs match the read path's. The +// merged envVars already carries the MODEL workspace_secret (loadWorkspaceSecrets). +// +// These tests are mutation-load-bearing: reverting the effective-model fix +// (passing payload.Model verbatim) turns +// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel and the parity +// test RED. + +import ( + "context" + "testing" + + "git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models" + "github.com/DATA-DOG/go-sqlmock" +) + +// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel is the direct +// repro of the #1994 divergence at the provision resolver. payload.Model is "" +// (the re-provision shape) but the workspace's own oauth + MODEL=opus are +// present in envVars (loaded from workspace_secrets). The resolver MUST derive +// from the stored model → anthropic-oauth → byok, NOT default-closed to +// platform_managed. +// +// Asserts the byok outcome AND that the byok branch's effects fired: +// - billing-mode env = byok (not platform_managed) +// - ANTHROPIC_BASE_URL NOT rewritten to the platform proxy (left direct) +// - the workspace's OWN oauth (workspace_secrets provenance, NOT in +// globalKeys) survives — usable credential present. +// +// Mutation: revert applyPlatformManagedLLMEnv to pass payload.Model ("") to the +// resolver → derive errors on empty model → platform_managed → this test RED on +// every assertion. +func TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel(t *testing.T) { + ctx := context.Background() + const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent + + mock := setupTestDB(t) + // Resolver reads the override (NULL — no explicit operator pin). + expectOverrideQuery(mock, wsID, "") + + // The container env as loadWorkspaceSecrets would have built it on a + // re-provision: the workspace's OWN oauth (workspace_secrets provenance) + + // the stored MODEL=opus. The platform proxy URL is present from the prior + // platform_managed boot (the env we must NOT re-bake). + envVars := map[string]string{ + "MODEL": "opus", + "CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH", // workspace_secrets origin + "ANTHROPIC_BASE_URL": "https://api.moleculesai.app/api/v1/internal/llm/anthropic", + } + // payload.Model == "" — exactly the re-provision shape. + res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "") + + if res.ResolvedMode != LLMBillingModeBYOK { + t.Fatalf("re-provision with stored MODEL=opus must resolve byok, got %q (source=%s) — the #1994 divergence", res.ResolvedMode, res.Source) + } + if res.Source != BillingModeSourceDerivedProvider { + t.Errorf("source: got %q want derived_provider (opus → anthropic-oauth)", res.Source) + } + if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModeBYOK { + t.Errorf("MOLECULE_LLM_BILLING_MODE_RESOLVED: got %q want byok", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"]) + } + // byok must NOT route through the platform proxy. + if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.moleculesai.app/api/v1/internal/llm/anthropic" { + // The byok branch must leave ANTHROPIC_BASE_URL untouched (the prior + // proxy URL is what re-provision must STOP re-asserting from the + // platform path; the workspace template resets it to direct on the byok + // path). The key assertion is the inverse below: the platform path did + // NOT run, so MOLECULE_LLM_BASE_URL / usage token were NOT injected. + _ = got + } + // The decisive proxy-bypass assertions: the platform_managed path injects + // these; the byok branch must NOT. + if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok { + t.Errorf("byok path must NOT inject the platform usage token (proxy billing); got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"]) + } + if !res.HasUsableLLMCred { + t.Errorf("the workspace's OWN oauth (workspace_secrets origin) must survive → HasUsableLLMCred=true") + } + if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "RENO-OWN-OAUTH" { + t.Errorf("workspace-origin oauth must survive the byok strip; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"]) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("sqlmock expectations: %v", err) + } +} + +// TestApplyPlatformManagedLLMEnv_ReadProvisionParity is the core regression +// guard against the #1994 divergence ever returning: for the same workspace +// inputs (same runtime, same stored MODEL, same auth env, same override), the +// READ-path resolver (ResolveLLMBillingMode → readWorkspaceDeriveInputs) and +// the PROVISION-path resolver (applyPlatformManagedLLMEnv) MUST land on the +// same billing mode. +// +// Mutation: revert the effective-model fix → provision path derives from "" +// → platform_managed while the read path derives opus → byok → parity BREAKS +// → this test RED. +func TestApplyPlatformManagedLLMEnv_ReadProvisionParity(t *testing.T) { + ctx := context.Background() + const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" + + // ---- READ PATH ---- + // ResolveLLMBillingMode reads in order: override (NULL) → runtime → secrets + // (MODEL=opus + the oauth key) → then ResolveLLMBillingModeDerived re-reads + // the override (NULL again). + readMock := setupTestDB(t) + expectOverrideQuery(readMock, wsID, "") // first override read (legacy resolver) + readMock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code")) + readMock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). + AddRow("MODEL", []byte("opus"), 0). + AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("RENO-OWN-OAUTH"), 0)) + expectOverrideQuery(readMock, wsID, "") // second override read (derived resolver) + + readRes, err := ResolveLLMBillingMode(ctx, wsID, "") + if err != nil { + t.Fatalf("read-path resolve err: %v", err) + } + if err := readMock.ExpectationsWereMet(); err != nil { + t.Errorf("read-path sqlmock expectations: %v", err) + } + + // ---- PROVISION PATH ---- + provMock := setupTestDB(t) + expectOverrideQuery(provMock, wsID, "") + provEnv := map[string]string{ + "MODEL": "opus", + "CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH", + } + provRes := applyPlatformManagedLLMEnv(ctx, provEnv, wsID, "claude-code", "") + if err := provMock.ExpectationsWereMet(); err != nil { + t.Errorf("provision-path sqlmock expectations: %v", err) + } + + if readRes.ResolvedMode != provRes.ResolvedMode { + t.Fatalf("PARITY VIOLATION (#1994): read-path resolved %q but provision-path resolved %q for the same workspace inputs (claude-code, MODEL=opus)", + readRes.ResolvedMode, provRes.ResolvedMode) + } + if readRes.ResolvedMode != LLMBillingModeBYOK { + t.Errorf("both paths should resolve byok for (claude-code, opus); got %q", readRes.ResolvedMode) + } +} + +// TestApplyPlatformManagedLLMEnv_DefaultPreservation pins the CTO invariant +// "default stays platform": a workspace with no non-platform provider selection +// and no own credential (no stored MODEL, empty env) still resolves +// platform_managed. The fix must NOT flip genuinely-platform workspaces to byok. +// +// This mirrors the agents-team genuinely-platform case. Mutation: a fix that +// silently defaulted byok on an empty/underivable model would turn this RED. +func TestApplyPlatformManagedLLMEnv_DefaultPreservation(t *testing.T) { + ctx := context.Background() + const wsID = "11111111-2222-3333-4444-555555555555" + + mock := setupTestDB(t) + expectOverrideQuery(mock, wsID, "") + + // No MODEL anywhere, no auth env — nothing to derive. + envVars := map[string]string{} + res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "") + + if res.ResolvedMode != LLMBillingModePlatformManaged { + t.Fatalf("no model + no cred must default platform_managed (CTO: default stays platform), got %q (source=%s)", res.ResolvedMode, res.Source) + } + if res.Source != BillingModeSourceDerivedDefault { + t.Errorf("source: got %q want derived_default", res.Source) + } + if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModePlatformManaged { + t.Errorf("resolved env: got %q want platform_managed", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"]) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("sqlmock expectations: %v", err) + } +} + +// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives is the +// molecule-core#1994 (corrected-model) inversion of the former internal#711 +// strip test. `global_secrets` is the TENANT's store, so a byok workspace +// whose oauth lives at GLOBAL scope (shared across the tenant's workspaces) is +// running on the TENANT's own credential — it must SURVIVE and route direct, +// not be stripped + failed-closed. MODEL=opus derives byok; the global-scope +// oauth is the tenant's own and is exactly what byok runs on. +// +// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch +// → the oauth disappears → HasUsableLLMCred=false → this test RED on both the +// survival assertion and the usable-cred assertion. +func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives(t *testing.T) { + ctx := context.Background() + const wsID = "99999999-8888-7777-6666-555555555555" + + mock := setupTestDB(t) + expectOverrideQuery(mock, wsID, "") + + // The tenant's own oauth at global scope (a global_secrets row), shared + // across all the tenant's workspaces. There is no separate workspace row. + envVars := map[string]string{ + "MODEL": "opus", + "CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH", + } + + res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "") + + if res.ResolvedMode != LLMBillingModeBYOK { + t.Fatalf("opus derives byok; got %q", res.ResolvedMode) + } + // The tenant's own global-scope oauth SURVIVES — byok runs on it, direct. + if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" { + t.Errorf("tenant's own global-scope oauth must survive on byok; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"]) + } + if !res.HasUsableLLMCred { + t.Errorf("tenant's own global-scope oauth is a usable credential → HasUsableLLMCred must be true (byok must not be failed-closed)") + } + // byok must NOT force the platform proxy. + if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present { + t.Errorf("byok must not inject the platform usage token; got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"]) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("sqlmock expectations: %v", err) + } +} + +// TestReProvisionPayloadOmitsModel is a static guard pinning the upstream +// trigger: the re-provision payload builders pass Name+Tier+Runtime but NOT +// Model, so applyPlatformManagedLLMEnv cannot rely on payload.Model and must +// fall back to the stored MODEL in envVars. If a future change starts threading +// Model into these payloads, this test documents that the fallback is then +// belt-and-suspenders (still correct), not the sole mechanism. +func TestReProvisionPayloadOmitsModel(t *testing.T) { + // Mirrors withStoredCompute(ctx, id, CreateWorkspacePayload{Name, Tier, + // Runtime}) at workspace_restart.go:333/844/1017 — Model is the zero value. + p := models.CreateWorkspacePayload{Name: "Reno Stars Marketing", Tier: 1, Runtime: "claude-code"} + if p.Model != "" { + t.Fatalf("re-provision payload model expected empty (the #1994 trigger), got %q", p.Model) + } +} diff --git a/workspace-server/internal/handlers/secrets.go b/workspace-server/internal/handlers/secrets.go index da7f4af0c..236e11b4e 100644 --- a/workspace-server/internal/handlers/secrets.go +++ b/workspace-server/internal/handlers/secrets.go @@ -245,11 +245,6 @@ func (h *SecretsHandler) Values(c *gin.Context) { // provisioner path in workspace_provision.go so env-vars look identical // whether the workspace was bootstrapped locally or remotely). out := map[string]string{} - // Provenance side-channel (internal#711): which keys in `out` originated - // from global_secrets and were NOT overridden by a workspace_secrets row. - // Used by the provider-aware gate below so a non-platform workspace's - // remote pull never receives the platform's scope:global LLM credential. - globalKeys := map[string]struct{}{} // Track decrypt failures so we can refuse the response with a list // instead of returning a partial bundle that boots a broken agent. var failedKeys []string @@ -275,7 +270,6 @@ func (h *SecretsHandler) Values(c *gin.Context) { continue } out[k] = string(decrypted) - globalKeys[k] = struct{}{} } } if err := globalRows.Err(); err != nil { @@ -300,10 +294,6 @@ func (h *SecretsHandler) Values(c *gin.Context) { continue } out[k] = string(decrypted) // workspace override wins over global - // User explicitly re-set this via the canvas Secrets tab — it is - // no longer "the operator-store version", so drop the global - // provenance flag (mirrors loadWorkspaceSecrets). - delete(globalKeys, k) } } if err := wsRows.Err(); err != nil { @@ -319,32 +309,16 @@ func (h *SecretsHandler) Values(c *gin.Context) { return } - // internal#711: provider-aware gate on the remote-pull path. A workspace - // whose resolved billing mode is NOT platform_managed (byok / subscription) - // must NOT receive the platform's scope:global LLM credentials - // (CLAUDE_CODE_OAUTH_TOKEN + the rest of the bypass-key set). Those keys - // were merged from global_secrets above; here we drop any that are still - // of global provenance (a workspace override survives, since its flag was - // cleared). Symmetric with applyPlatformManagedLLMEnv's strip on the - // provision/restart env path — both injection vectors are now gated. - // - // Default-closed: ResolveLLMBillingMode collapses any DB error / NULL / - // garbled value to platform_managed, so a transient failure leaves the - // existing (global-inheriting) behavior in place rather than stripping a - // platform_managed workspace's creds. - orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE"))) - res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode) - if resolveErr != nil { - log.Printf("secrets.Values: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr) - } - if res.ResolvedMode != LLMBillingModePlatformManaged { - for k := range globalKeys { - if isPlatformManagedDirectLLMBypassKey(k) { - delete(out, k) - } - } - } - + // molecule-core#1994 (corrected model): the remote-pull bundle is the + // TENANT's own merged secrets (global_secrets + workspace_secrets, the + // latter winning on collision). `global_secrets` is the tenant's store, not + // the platform's, so a byok workspace's pull MUST include the tenant's own + // global-scope LLM credential — that is exactly what it runs on, direct. + // The earlier internal#711 byok strip here rested on the inverted "global = + // platform's own" premise and is removed; the platform's own proxy token is + // never in a tenant's global_secrets (it lives in server env only and is + // injected separately on the platform_managed provision path), so there is + // nothing platform-owned to withhold on this path. c.JSON(http.StatusOK, out) } diff --git a/workspace-server/internal/handlers/secrets_test.go b/workspace-server/internal/handlers/secrets_test.go index d624278c1..3e30c0db6 100644 --- a/workspace-server/internal/handlers/secrets_test.go +++ b/workspace-server/internal/handlers/secrets_test.go @@ -840,14 +840,20 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) { } } -// TestSecretsValues_ByokStripsGlobalLLMCred is the internal#711 regression -// guard for the remote-pull injection vector. A non-platform (byok) workspace -// that pulls its secrets via GET /workspaces/:id/secrets/values must NOT -// receive the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN — that key is -// of global_secrets provenance and is dropped by the provider-aware gate. -// Its OWN ANTHROPIC_API_KEY (a workspace_secrets row) survives, and unrelated -// non-LLM global secrets are untouched. -func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) { +// TestSecretsValues_ByokServesTenantGlobalLLMCred is the molecule-core#1994 +// (corrected-model) regression guard for the remote-pull path. `global_secrets` +// is the TENANT's store, so a byok workspace's pull MUST include the tenant's +// own global-scope LLM credential — that is exactly what byok runs on, direct. +// +// Pre-fix (internal#711) this path STRIPPED the global-origin oauth on byok, +// resting on the inverted premise that a global LLM cred was "the platform's +// own"; that killed legitimate byok workspaces whose oauth lived at global +// scope. The strip is removed: the merged bundle (tenant globals + workspace +// overrides) is served verbatim. +// +// Mutation: re-add the byok global-LLM-cred strip in secrets.go Values() → +// CLAUDE_CODE_OAUTH_TOKEN disappears from the body → this test RED. +func TestSecretsValues_ByokServesTenantGlobalLLMCred(t *testing.T) { mock := setupTestDB(t) handler := NewSecretsHandler(nil) @@ -860,21 +866,18 @@ func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) { mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`). WithArgs("tok-1"). WillReturnResult(sqlmock.NewResult(0, 1)) - // global_secrets holds the platform's scope:global OAuth token + a - // non-LLM operator global (should be untouched). + // global_secrets holds the TENANT's own global-scope OAuth token (shared + // across all the tenant's workspaces) + a non-LLM global. mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`). WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). - AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("PLATFORM-GLOBAL-OAUTH"), 0). + AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0). AddRow("SENTRY_DSN", []byte("https://sentry.example/123"), 0)) - // The workspace brought its OWN Anthropic API key via the Secrets tab. + // This workspace set no LLM secret of its own — it relies on the tenant + // global-scope oauth. mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id`). WithArgs(testWsID). WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). - AddRow("ANTHROPIC_API_KEY", []byte("CUSTOMER-OWN-ANTHROPIC-KEY"), 0)) - // Resolver: this workspace is byok. - mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). - WithArgs(testWsID). - WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) + AddRow("MODEL", []byte("opus"), 0)) w := httptest.NewRecorder() c := secretsValuesRequest(w, "Bearer good-token") @@ -885,13 +888,13 @@ func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) { } var body map[string]string _ = json.Unmarshal(w.Body.Bytes(), &body) - // 1. Platform global OAuth token stripped — the leak is closed on the pull path. - if got, ok := body["CLAUDE_CODE_OAUTH_TOKEN"]; ok { - t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — platform scope:global token must be stripped for byok pull", got) + // 1. The tenant's own global-scope OAuth token SURVIVES — byok runs on it. + if body["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token served for byok pull", body["CLAUDE_CODE_OAUTH_TOKEN"]) } - // 2. The workspace's own LLM key survives. - if body["ANTHROPIC_API_KEY"] != "CUSTOMER-OWN-ANTHROPIC-KEY" { - t.Fatalf("ANTHROPIC_API_KEY = %q, want the workspace's own key preserved", body["ANTHROPIC_API_KEY"]) + // 2. The workspace's own non-LLM secret survives. + if body["MODEL"] != "opus" { + t.Fatalf("MODEL = %q, want opus preserved", body["MODEL"]) } // 3. Unrelated non-LLM global secrets are untouched. if body["SENTRY_DSN"] != "https://sentry.example/123" { @@ -976,6 +979,49 @@ func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) { } } +// TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant is the +// molecule-core#1994 co-mingling GUARD regression. Removing the byok strip is +// only safe if the platform's own credential is never written into a tenant's +// global_secrets. SetGlobal is the in-code write boundary: on a tenant whose +// resolved LLM mode is platform_managed (the metered default), a direct +// vendor / oauth bypass-list key MUST be rejected (400) and NOT persisted — +// the tenant is supposed to route through the CP proxy, not carry a direct +// platform-shaped credential at global scope. This is what keeps a +// platform-origin token out of global_secrets going forward. +// +// (On a byok/disabled tenant the same write is ALLOWED — that key is the +// tenant's OWN credential, which the corrected model expects at global scope. +// TestSetGlobal_AutoRestartsAffectedWorkspaces covers that allowed path.) +// +// Mutation: drop the rejectPlatformManagedDirectLLMBypass guard from SetGlobal +// → the write reaches the INSERT (no 400) → this test RED. +func TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant(t *testing.T) { + setupTestDB(t) + handler := NewSecretsHandler(nil) + + // Org/tenant default is platform_managed — the metered path. A direct + // vendor key write into global_secrets must be refused here. + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-platform-shaped"}` + c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.SetGlobal(c) + + if w.Code != http.StatusBadRequest { + t.Fatalf("expected 400 (bypass-list key rejected for platform_managed tenant), got %d: %s", w.Code, w.Body.String()) + } + if !strings.Contains(w.Body.String(), "blocked") { + t.Errorf("response should explain the block; got %s", w.Body.String()) + } + // No INSERT was expected on the mock — sqlmock would error on an + // unexpected ExecContext, so reaching here with a 400 proves the write + // was refused before the DB. +} + // TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15. func TestDeleteGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) { mock := setupTestDB(t) diff --git a/workspace-server/internal/handlers/workspace_provision.go b/workspace-server/internal/handlers/workspace_provision.go index ba45bca96..3b7609d84 100644 --- a/workspace-server/internal/handlers/workspace_provision.go +++ b/workspace-server/internal/handlers/workspace_provision.go @@ -773,12 +773,7 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) { // can no longer confuse a provider slug for a model id. CP-side // slot-separation (cp#213 + cp#220) merged the analogous fix on // the CP side; this is the workspace-server companion. - if model == "" { - model = envVars["MOLECULE_MODEL"] - } - if model == "" { - model = envVars["MODEL"] - } + model = effectiveModelForBilling(model, envVars) if model == "" { return } @@ -811,6 +806,31 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) { } } +// effectiveModelForBilling resolves the picked model id from an explicit +// argument with the SAME fallback chain applyRuntimeModelEnv uses to set the +// container MODEL env: explicit arg → envVars["MOLECULE_MODEL"] → +// envVars["MODEL"] (the workspace_secret). It is the single source of truth +// for "what model is this workspace going to run", shared by both +// applyRuntimeModelEnv (which exports it to the container) and +// applyPlatformManagedLLMEnv (which derives the billing mode from it). +// +// molecule-core#1994: the billing resolver MUST consult the same effective +// model the container will actually run. Pre-fix it used the raw payload.Model +// only, which is "" on a re-provision (the payload is rebuilt from the DB with +// no Model), so it derived from an empty model → defaulted closed to +// platform_managed and diverged from the read endpoint (which reads the stored +// MODEL secret). Returns "" only when no model is resolvable anywhere — the +// legitimate "unset → platform default" case the resolver fails closed on. +func effectiveModelForBilling(model string, envVars map[string]string) string { + if model == "" { + model = envVars["MOLECULE_MODEL"] + } + if model == "" { + model = envVars["MODEL"] + } + return model +} + // applyPlatformManagedLLMEnv wires the control-plane LLM proxy into a // workspace only when the RESOLVED billing mode for this workspace is // platform_managed. "Resolved" means: the workspace-level override (if any) @@ -834,40 +854,41 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) { // answer "what mode is this workspace running under" without DB queries // (RFC Observability hot-spot). // -// internal#711 — PROVIDER-AWARE GLOBAL-LLM-CRED GATE. The platform's -// LLM credentials (CLAUDE_CODE_OAUTH_TOKEN + the rest of the -// platformManagedDirectLLMBypassKeys set) live in `global_secrets` and -// are merged into EVERY workspace's env by loadWorkspaceSecrets — that -// merge is provenance-blind. Pre-fix, the non-platform (byok/disabled) -// early-return left envVars untouched, so a BYOK / subscription -// workspace that brought NO LLM credential of its own still inherited -// the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN and ran Opus on -// the platform's (Molecule's) Anthropic credits (Reno Stars SEO + -// Marketing agents, confirmed live 2026-05-27). +// molecule-core#1994 (credential-handling follow-on, CTO-confirmed model). +// `global_secrets` is the TENANT's own secret store, shared across all of +// that tenant's workspaces — it is NOT the platform's. The platform's own +// LLM credential is the CP proxy usage token (MOLECULE_LLM_USAGE_TOKEN), +// injected SEPARATELY on the platform_managed path below; it is never stored +// in any tenant's global_secrets. // -// The gate: on the non-platform path we strip every platform-managed -// LLM key whose PROVENANCE is `global_secrets` (the globalKeys set). -// A workspace's OWN LLM credential — set via the canvas Secrets tab, -// i.e. a `workspace_secrets` row — has had its global provenance flag -// dropped by loadWorkspaceSecrets, so it is NOT in globalKeys and -// survives. Net effect: platform global LLM creds reach a workspace -// ONLY when its resolved mode is platform_managed; a non-platform -// workspace resolves to its own (workspace-scoped) credential or none. +// Consequently the byok/disabled branch does NOT strip the tenant's +// global-origin LLM creds. Under the corrected model the tenant's own +// credential — whether at global scope (a global_secrets row, e.g. the key +// they configured via the org-import required-env preflight / the settings +// Secrets tab) or at workspace scope (a workspace_secrets row) — is exactly +// what byok must run on, direct. The earlier internal#711 strip rested on the +// inverted premise that a global-scope LLM cred was "the platform's own"; it +// was wrong and it killed legitimate byok workspaces (MISSING_BYOK_CREDENTIAL +// for tenants whose oauth lived at global scope — Reno Stars Marketing agent, +// confirmed live 2026-05-28). Removing the strip is only safe because the +// platform's own credential is never co-mingled into a tenant's global_secrets +// (guarded at the write boundary: SetGlobal rejects bypass-list keys for a +// platform-managed tenant; the platform proxy token is read from server env +// only, never persisted to a tenant store). // -// The boolean return reports whether, after the gate, the workspace -// still has at least one usable LLM credential. The caller -// (prepareProvisionContext) uses it to FAIL CLOSED — a non-platform -// workspace with no usable LLM credential is aborted with a clear -// MISSING_BYOK_CREDENTIAL error at provision time rather than being -// started on (now-stripped) platform creds. +// The boolean return still reports whether the workspace has at least one +// usable LLM credential. The caller (prepareProvisionContext) uses it to FAIL +// CLOSED — a byok workspace with no usable LLM credential at ANY scope is +// aborted with a clear MISSING_BYOK_CREDENTIAL error at provision time rather +// than started credential-less. // platformLLMEnvResult is the structured outcome of applyPlatformManagedLLMEnv. // ResolvedMode is the per-workspace billing/provider mode the resolver -// landed on. HasUsableLLMCred reports whether — AFTER the provider-aware -// global-cred gate — the workspace still has at least one platform-managed -// LLM credential key in its env (its own, workspace-scoped one). Only the -// non-platform path consults HasUsableLLMCred for the fail-closed decision; -// the platform_managed path always returns true (it forces the CP proxy -// usage token, which IS the usable credential). +// landed on. HasUsableLLMCred reports whether the workspace has at least one +// platform-managed-shaped LLM credential key in its env — the tenant's own, +// at global or workspace scope. Only the non-platform (byok) path consults +// HasUsableLLMCred for the fail-closed decision; the platform_managed path +// always returns true (it forces the CP proxy usage token, which IS the +// usable credential). type platformLLMEnvResult struct { ResolvedMode string HasUsableLLMCred bool @@ -879,7 +900,7 @@ type platformLLMEnvResult struct { Source BillingModeSource } -func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, globalKeys map[string]struct{}, workspaceID, runtime, model string) platformLLMEnvResult { +func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string) platformLLMEnvResult { // internal#718 P2-B: the platform-vs-byok decision now DERIVES the provider // from (runtime, model) via the registry and keys off IsPlatform(derived) — // NOT a stored LLM_PROVIDER and NOT the org rung. This path already carries @@ -889,7 +910,20 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, // disambiguation input the registry uses to split oauth-vs-api). The org-env // MOLECULE_LLM_BILLING_MODE is NO LONGER read into the decision (retired). availableAuthEnv := availableAuthEnvNames(envVars) - res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, availableAuthEnv) + // molecule-core#1994: derive billing mode from the EFFECTIVE model, not the + // raw payload.Model. On a re-provision (restart/resume/auto-restart) the + // payload is rebuilt from the DB with Name+Tier+Runtime only — payload.Model + // is "" (workspace_restart.go via withStoredCompute, which backfills Compute + // but NOT Model). With an empty model DeriveProvider errors → the resolver + // defaults closed to platform_managed and bakes the CP proxy, DIVERGING from + // the read endpoint (which reads the stored MODEL workspace_secret and derives + // byok). The stored model already lives in the merged envVars (loaded by + // loadWorkspaceSecrets); resolve it with the SAME fallback chain + // applyRuntimeModelEnv uses so the provision-path derive inputs match the + // read-path's — keeping the two resolvers in parity (the #1994 regression + // guard test asserts this). + effectiveModel := effectiveModelForBilling(model, envVars) + res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, effectiveModel, availableAuthEnv) if resolveErr != nil { // resolveErr != nil ⇒ resolver hit a DB error AND already defaulted // res.ResolvedMode to platform_managed. Log + proceed; the safe default @@ -911,19 +945,19 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] = res.ResolvedMode if res.ResolvedMode != LLMBillingModePlatformManaged { // byok or disabled — DO NOT force-route to CP, DO NOT override the - // workspace's own ANTHROPIC_BASE_URL / OAuth token. + // workspace's own ANTHROPIC_BASE_URL / OAuth token, and DO NOT strip + // the tenant's own LLM credentials. // - // internal#711: but DO strip platform-origin LLM credentials. The - // platform's scope:global CLAUDE_CODE_OAUTH_TOKEN (+ the rest of the - // bypass-key set) was merged into envVars by loadWorkspaceSecrets - // from global_secrets; without this strip a BYOK workspace that - // brought no LLM credential of its own would inherit the platform's - // global token and bill the platform's Anthropic credits. The strip - // is PROVENANCE-AWARE: only keys still flagged as global_secrets - // origin are removed; a workspace's own LLM cred (a workspace_secrets - // row — provenance flag already dropped by loadWorkspaceSecrets) - // survives so the workspace talks to its own provider directly. - stripGlobalOriginLLMCreds(envVars, globalKeys) + // molecule-core#1994 (corrected model): `global_secrets` is the + // TENANT's store, not the platform's. The tenant's own credential — + // at global OR workspace scope — is exactly what byok runs on, direct. + // We leave envVars untouched here and report whether a usable LLM + // credential survived so the caller can fail closed when there is + // genuinely none (no platform-managed-shaped key at any scope). The + // platform's own credential is never in a tenant's global_secrets + // (guarded at the SetGlobal write boundary + the proxy token is + // server-env-only), so leaving the tenant's globals in place cannot + // re-open the platform-credit drain. return platformLLMEnvResult{ ResolvedMode: res.ResolvedMode, HasUsableLLMCred: hasAnyPlatformManagedLLMKey(envVars), @@ -980,32 +1014,11 @@ func stripPlatformManagedLLMBypassEnv(envVars map[string]string) { } } -// stripGlobalOriginLLMCreds removes platform-managed LLM credential keys -// (CLAUDE_CODE_OAUTH_TOKEN + the rest of platformManagedDirectLLMBypassKeys) -// from envVars ONLY when they originated from the operator-controlled -// `global_secrets` table (i.e. their key is present in globalKeys). -// -// internal#711 provider-aware gate. A platform global LLM credential is the -// platform's own credential and must never be the credential a non-platform -// (byok / subscription) workspace runs on. loadWorkspaceSecrets drops the -// global-provenance flag for any key the workspace re-set via the canvas -// Secrets tab (a workspace_secrets row), so a workspace's OWN LLM credential -// is NOT in globalKeys and survives this strip — only the inherited platform -// global creds are removed. -func stripGlobalOriginLLMCreds(envVars map[string]string, globalKeys map[string]struct{}) { - for key := range platformManagedDirectLLMBypassKeys { - if _, fromGlobal := globalKeys[key]; fromGlobal { - delete(envVars, key) - } - } -} - -// hasAnyPlatformManagedLLMKey reports whether envVars still carries at least -// one non-empty platform-managed LLM credential key after the provider-aware -// gate. Used by the non-platform fail-closed branch: a byok/subscription -// workspace with no surviving (workspace-scoped) LLM credential must be -// aborted with MISSING_BYOK_CREDENTIAL rather than started credential-less or -// on stripped platform creds. +// hasAnyPlatformManagedLLMKey reports whether envVars carries at least one +// non-empty platform-managed-shaped LLM credential key (the tenant's own, at +// global or workspace scope). Used by the byok fail-closed branch: a byok +// workspace with no LLM credential at ANY scope must be aborted with +// MISSING_BYOK_CREDENTIAL rather than started credential-less. func hasAnyPlatformManagedLLMKey(envVars map[string]string) bool { for key := range platformManagedDirectLLMBypassKeys { if strings.TrimSpace(envVars[key]) != "" { diff --git a/workspace-server/internal/handlers/workspace_provision_shared.go b/workspace-server/internal/handlers/workspace_provision_shared.go index a42e49c7c..aac432384 100644 --- a/workspace-server/internal/handlers/workspace_provision_shared.go +++ b/workspace-server/internal/handlers/workspace_provision_shared.go @@ -193,33 +193,31 @@ func (h *WorkspaceHandler) prepareProvisionContext( // continue to rely on workspace_secrets / org-import persona-env // merge for their git auth. applyAgentGitHTTPCreds(envVars, payload.Role) - // internal#711: provider-aware LLM-credential resolution. On a non-platform - // (byok/subscription) workspace this strips the platform's scope:global LLM - // creds inherited from global_secrets and reports whether the workspace - // still has a usable (workspace-scoped) LLM credential of its own. - llmRes := applyPlatformManagedLLMEnv(ctx, envVars, globalSecretKeys, workspaceID, payload.Runtime, payload.Model) - // Fail closed for a BYOK workspace with no usable LLM credential: do NOT - // start it on the platform's (now-stripped) global creds. Mirror the - // "model+provider+credential REQUIRED at create" spirit (internal#711) - // with an actionable error surfaced at provision time. + // molecule-core#1994: per-workspace LLM billing-mode resolution + env wiring. + // On platform_managed it forces the CP proxy usage token; on byok/disabled + // it leaves the tenant's own creds (global OR workspace scope) untouched and + // reports whether a usable LLM credential is present. + llmRes := applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model) + // Fail closed for a BYOK workspace with no usable LLM credential at ANY + // scope: do NOT start it credential-less. Mirror the "model+provider+ + // credential REQUIRED at create" spirit with an actionable error surfaced + // at provision time. // // Scoped to byok specifically (NOT disabled): "byok" means "the user // intends to run an LLM on their own credential" — a missing one is a // misconfiguration worth surfacing loudly. "disabled" means "this // workspace runs no platform-billed LLM at all" (terminal / file work, or - // a runtime that talks to a non-bypass-key endpoint); stripping the - // inherited platform globals is sufficient there and aborting would - // regress a legitimate no-LLM workspace. The strip above already ran for - // both non-platform modes. + // a runtime that talks to a non-bypass-key endpoint), so aborting would + // regress a legitimate no-LLM workspace. // - // The bypass-key check is intentionally broad — any surviving bypass key - // (the workspace's own, of workspace_secrets provenance) clears it. + // The bypass-key check is intentionally broad — any present bypass key + // (the tenant's own, at global or workspace scope) clears it. if llmRes.ResolvedMode == LLMBillingModeBYOK && !llmRes.HasUsableLLMCred { msg := formatMissingBYOKCredentialError(llmRes.ResolvedMode) - log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, internal#711)", workspaceID) + log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, molecule-core#1994)", workspaceID) return nil, &provisionAbort{ Msg: msg, - Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "711"}, + Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "1994"}, } } applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model) diff --git a/workspace-server/internal/handlers/workspace_provision_shared_test.go b/workspace-server/internal/handlers/workspace_provision_shared_test.go index 4136319d1..3074afd5e 100644 --- a/workspace-server/internal/handlers/workspace_provision_shared_test.go +++ b/workspace-server/internal/handlers/workspace_provision_shared_test.go @@ -494,32 +494,34 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing. } } -// TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed is the -// internal#711 end-to-end guard for the live Reno Stars leak. A byok -// workspace whose ONLY LLM credential is the platform's scope:global -// CLAUDE_CODE_OAUTH_TOKEN (inherited from global_secrets, no workspace -// override) must: +// TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds is the +// molecule-core#1994 (corrected-model) end-to-end inversion of the former +// internal#711 fail-closed test, for the live Reno Stars byok agents. A byok +// workspace whose LLM credential is the TENANT's own scope:global +// CLAUDE_CODE_OAUTH_TOKEN (a global_secrets row, no workspace override) must: // -// 1. have that platform token STRIPPED from the prepared env (no leak), and -// 2. ABORT the provision with the MISSING_BYOK_CREDENTIAL code rather than -// start the workspace on the platform's credits. +// 1. KEEP that oauth in the prepared container env (it is the tenant's own +// credential — exactly what byok runs on, direct), and +// 2. NOT abort — the provision proceeds. // -// This is the discriminating end-to-end test: pre-fix prepared.EnvVars would -// carry CLAUDE_CODE_OAUTH_TOKEN= and the provision would -// succeed, running Opus on Molecule's Anthropic credits. -func TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed(t *testing.T) { +// Pre-fix (internal#711) prepared.EnvVars stripped the global oauth and the +// provision aborted MISSING_BYOK_CREDENTIAL → the agent was dead. This is the +// discriminating end-to-end guard for the fix. +func TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds(t *testing.T) { const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) mock := setupTestDB(t) - // global_secrets carries the platform's scope:global OAuth token. + // global_secrets carries the TENANT's own scope:global OAuth token + the + // stored MODEL (so the resolver derives byok from opus). mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`). WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). - AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("PLATFORM-GLOBAL-OAUTH"), 0)) - // Workspace set NO secrets of its own. + AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0)) + // Workspace set its own MODEL (no LLM cred of its own — relies on global). mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`). WithArgs(wsID). - WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})) + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). + AddRow("MODEL", []byte("opus"), 0)) // Resolver: workspace override = byok. mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). WithArgs(wsID). @@ -534,8 +536,57 @@ func TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed(t *testing.T prepared, abort := handler.prepareProvisionContext( context.Background(), wsID, "/nonexistent", nil, payload, false) + if abort != nil { + t.Fatalf("expected provision to proceed (byok on tenant's own global oauth), got abort=%v", abort.Extra) + } + if prepared == nil { + t.Fatalf("prepared context is nil despite no abort") + } + // The tenant's own global oauth must be present in the container env. + if prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global oauth preserved for byok", + prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"]) + } + // byok must not have been routed through the platform proxy. + if _, ok := prepared.EnvVars["MOLECULE_LLM_USAGE_TOKEN"]; ok { + t.Fatalf("byok provision must NOT inject the platform usage token") + } + if got := prepared.EnvVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"]; got != LLMBillingModeBYOK { + t.Fatalf("MOLECULE_LLM_BILLING_MODE_RESOLVED = %q, want byok", got) + } +} + +// TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed is the +// companion: the fail-closed abort is UNCHANGED for a byok workspace with no +// LLM credential at ANY scope (no global row, no workspace row). It still +// aborts MISSING_BYOK_CREDENTIAL rather than starting credential-less. +func TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed(t *testing.T) { + const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" + t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) + + mock := setupTestDB(t) + // No global LLM cred — only the stored MODEL so the resolver derives byok. + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"})) + mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}). + AddRow("MODEL", []byte("opus"), 0)) + mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). + WithArgs(wsID). + WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK)) + + handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir()) + payload := models.CreateWorkspacePayload{ + Name: "Reno Stars SEO", + Runtime: "claude-code", + Tier: 1, + } + prepared, abort := handler.prepareProvisionContext( + context.Background(), wsID, "/nonexistent", nil, payload, false) + if abort == nil { - t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v) — the leak would still ship", prepared) + t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v)", prepared) } if code, _ := abort.Extra["code"].(string); code != "MISSING_BYOK_CREDENTIAL" { t.Fatalf("abort.Extra[code] = %v, want MISSING_BYOK_CREDENTIAL", abort.Extra["code"]) @@ -917,7 +968,7 @@ func TestApplyPlatformManagedLLMEnv_NonClaudeRuntimeDefaultsOpenAIProxyWhenNoWor t.Setenv("MOLECULE_LLM_DEFAULT_MODEL", "moonshot/kimi-k2.6") envVars := map[string]string{} - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "codex", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "") applyRuntimeModelEnv(envVars, "codex", "") if got := envVars["OPENAI_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/openai/v1" { @@ -947,7 +998,7 @@ func TestApplyPlatformManagedLLMEnv_StripsWorkspaceOpenAIKeyForClaudeCode(t *tes "OPENAI_BASE_URL": "https://api.openai.com/v1", "MODEL": "openai/gpt-5.5", } - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") if _, ok := envVars["OPENAI_API_KEY"]; ok { t.Fatalf("OPENAI_API_KEY should be stripped for claude-code platform-managed mode") @@ -973,7 +1024,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeUsesAnthropicProxyOverOAuth(t *tes "CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token", "MODEL": "sonnet", } - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped in platform-managed mode") @@ -996,7 +1047,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeInjectsAnthropicProxyWhenNoWorkspa t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{} - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "minimax/MiniMax-M2.7") + applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7") if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/anthropic/v1" { t.Fatalf("ANTHROPIC_BASE_URL = %q", got) @@ -1019,7 +1070,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) { "MINIMAX_API_KEY": "user-minimax-key", "MODEL": "MiniMax-M2.7", } - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "") if _, ok := envVars["MINIMAX_API_KEY"]; ok { t.Fatalf("MINIMAX_API_KEY should be stripped in platform-managed mode") @@ -1053,7 +1104,7 @@ func TestApplyPlatformManagedLLMEnv_NoopsOutsidePlatformManaged(t *testing.T) { t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "kimi-for-coding") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding") if res.ResolvedMode != LLMBillingModeBYOK { t.Fatalf("resolved mode = %q, want byok (derived from non-platform model)", res.ResolvedMode) @@ -1100,7 +1151,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing "CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token", "MODEL": "sonnet", } - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") // 1. OAuth token intact — not stripped. if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "user-oauth-token" { @@ -1131,19 +1182,19 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing } } -// TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken is the -// internal#711 regression guard for the live 2026-05-27 leak (Reno Stars SEO -// + Marketing claude-code agents). A non-platform (byok) workspace that -// brought NO LLM credential of its own, but which inherited the platform's -// scope:global CLAUDE_CODE_OAUTH_TOKEN from global_secrets (provenance = -// globalKeys), must have that platform token STRIPPED — not run on it. +// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect is +// the molecule-core#1994 (corrected-model) inversion of the former +// internal#711 strip test, exercised through applyPlatformManagedLLMEnv. The +// live failure this guards: the Reno Stars Marketing/SEO byok agents whose +// Claude oauth lives at GLOBAL scope (the tenant's own credential, shared +// across the tenant's workspaces) were stripped + failed-closed under the +// inverted "global == platform's own" premise → MISSING_BYOK_CREDENTIAL → +// dead. Under the corrected model `global_secrets` is the TENANT's store, so +// that oauth is exactly what byok runs on: it must SURVIVE and route direct. // -// Pre-fix the byok early-return left envVars untouched, so the platform's -// global OAuth token survived into the container and the agent ran Opus on -// the platform's Anthropic credits. The fix gates the global-cred merge on -// provider==platform: a non-platform workspace keeps only its own -// (workspace_secrets) creds, of which there are none here. -func TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken(t *testing.T) { +// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch +// → the oauth disappears → this test RED on both survival + HasUsableLLMCred. +func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect(t *testing.T) { const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent mock := setupTestDB(t) mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). @@ -1155,31 +1206,32 @@ func TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken(t *testing. t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic") t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") - // The ONLY LLM credential in env is the platform's scope:global OAuth - // token, merged from global_secrets (so its key is in globalKeys). The - // workspace set none of its own. + // The tenant's own oauth at GLOBAL scope (a global_secrets row). The + // workspace set no separate row of its own; it relies on the tenant global. envVars := map[string]string{ - "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH", "MODEL": "opus", } - globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") - // 1. The platform global OAuth token must be STRIPPED — the leak is closed. - if got, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { - t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — platform scope:global token must be stripped for a byok workspace", got) + // 1. The tenant's own global-scope oauth SURVIVES — byok runs on it. + if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" { + t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token preserved for byok", envVars["CLAUDE_CODE_OAUTH_TOKEN"]) } // 2. No CP proxy creds forced (byok = workspace talks to its own provider). if got, ok := envVars["ANTHROPIC_API_KEY"]; ok { t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got) } - // 3. Resolver reports byok with NO usable LLM credential → caller fails closed. + if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok { + t.Fatalf("MOLECULE_LLM_USAGE_TOKEN must NOT be injected for byok") + } + // 3. byok WITH a usable credential → caller does NOT fail closed. if res.ResolvedMode != LLMBillingModeBYOK { t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK) } - if res.HasUsableLLMCred { - t.Fatalf("HasUsableLLMCred = true, want false (only the stripped platform global token was present)") + if !res.HasUsableLLMCred { + t.Fatalf("HasUsableLLMCred = false, want true (tenant's own global-scope oauth is the usable credential)") } if err := mock.ExpectationsWereMet(); err != nil { t.Errorf("unmet sqlmock expectations: %v", err) @@ -1214,7 +1266,7 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_PlatformModelKeepsPlatformCreds(t *t t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "anthropic/claude-opus-4-7") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "anthropic/claude-opus-4-7") if res.ResolvedMode != LLMBillingModePlatformManaged { t.Fatalf("platform-derived model must resolve platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source) @@ -1234,14 +1286,15 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_PlatformModelKeepsPlatformCreds(t *t } } -// NON-PLATFORM-DERIVED → byok + STRIP + FAIL-CLOSED signal (THE FIX, the Reno -// billing-leak class). A claude-code workspace with a non-platform model -// (kimi-for-coding → kimi-coding) and NO override + NO own cred, inheriting only -// the platform's scope:global OAuth token, now DERIVES byok → #1963 strips the -// global token → HasUsableLLMCred=false → caller fails closed. Pre-P2 this same -// workspace resolved platform_managed (via the never-written org rung) and ran -// on the platform's credits. This is the discriminating delta test. -func TestApplyPlatformManagedLLMEnv_DERIVED_NonPlatformModelStripsAndFailsClosed(t *testing.T) { +// NON-PLATFORM-DERIVED + NO CREDENTIAL AT ALL → byok + FAIL-CLOSED. This is +// the legitimate remaining fail-closed path under the corrected model +// (molecule-core#1994): a claude-code workspace with a non-platform model +// (kimi-for-coding → byok) and NO override and NO LLM credential at ANY scope +// (no global row, no workspace row) has nothing to run on → HasUsableLLMCred= +// false → caller (prepareProvisionContext) aborts MISSING_BYOK_CREDENTIAL. The +// fail-closed branch is unchanged by the strip removal; only its trigger +// narrowed from "no workspace-scoped cred" to "no cred at any scope". +func TestApplyPlatformManagedLLMEnv_DERIVED_ByokNoCredentialFailsClosed(t *testing.T) { const wsID = "99999999-8888-7777-6666-555555555555" mock := setupTestDB(t) mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). @@ -1252,32 +1305,25 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_NonPlatformModelStripsAndFailsClosed t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1") t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") - // Only LLM cred is the platform's scope:global OAuth token (globalKeys). - envVars := map[string]string{ - "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", - } - globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} + // No LLM credential at all — neither global nor workspace scope. + envVars := map[string]string{} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "kimi-for-coding") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding") // 1. DERIVED byok (NOT the old platform_managed default). if res.ResolvedMode != LLMBillingModeBYOK { - t.Fatalf("non-platform-derived model must resolve byok, got %q (source=%s) — THE FIX regressed", res.ResolvedMode, res.Source) + t.Fatalf("non-platform-derived model must resolve byok, got %q (source=%s)", res.ResolvedMode, res.Source) } if res.Source != BillingModeSourceDerivedProvider { t.Errorf("source: got %q want derived_provider", res.Source) } - // 2. #1963 strip: the platform global OAuth token is removed (leak closed). - if got, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { - t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — must be stripped for a derived-byok workspace (Reno leak)", got) - } - // 3. No CP proxy creds forced. + // 2. No CP proxy creds forced. if got, ok := envVars["ANTHROPIC_API_KEY"]; ok { t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got) } - // 4. No usable cred → caller (prepareProvisionContext) fails closed. + // 3. No usable cred at any scope → caller fails closed. if res.HasUsableLLMCred { - t.Fatalf("HasUsableLLMCred = true, want false (only the stripped platform global token was present)") + t.Fatalf("HasUsableLLMCred = true, want false (no LLM credential present at any scope)") } if err := mock.ExpectationsWereMet(); err != nil { t.Errorf("unmet sqlmock expectations: %v", err) @@ -1300,7 +1346,7 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_UnsetModelPlatformDefault(t *testing t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") if res.ResolvedMode != LLMBillingModePlatformManaged { t.Fatalf("unset model must default platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source) @@ -1316,14 +1362,13 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_UnsetModelPlatformDefault(t *testing } } -// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal is -// the discriminating companion to the strip test: a byok workspace that DID -// set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets tab (a -// workspace_secrets row) keeps it. loadWorkspaceSecrets drops the global -// provenance flag on a workspace override, so the key is NOT in globalKeys -// and the provenance-aware strip leaves it alone. Proves the fix strips only -// platform-origin creds, never the customer's own. -func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t *testing.T) { +// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth is the +// workspace-scope companion to the global-scope survival test: a byok +// workspace that set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets +// tab (a workspace_secrets row) keeps it and runs direct. Under the corrected +// model (molecule-core#1994) the tenant's credential survives at EITHER scope; +// this pins the workspace-scope half. +func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth(t *testing.T) { const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent mock := setupTestDB(t) mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). @@ -1334,15 +1379,13 @@ func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t * t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1") t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") - // Workspace set its OWN OAuth token — loadWorkspaceSecrets would have - // dropped its global provenance flag, so globalKeys does NOT contain it. + // Workspace set its OWN OAuth token (a workspace_secrets row). envVars := map[string]string{ "CLAUDE_CODE_OAUTH_TOKEN": "CUSTOMER-OWN-OAUTH-TOKEN", "MODEL": "opus", } - globalKeys := map[string]struct{}{} // not from global_secrets - res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "CUSTOMER-OWN-OAUTH-TOKEN" { t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the workspace's own token left intact", got) @@ -1358,13 +1401,18 @@ func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t * } } -// TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred proves -// that "disabled" mode also strips the platform's global LLM creds (the leak -// is closed for disabled too), and reports HasUsableLLMCred=false. The -// caller's fail-closed abort is scoped to byok only, so a disabled workspace -// with no LLM cred still boots (for terminal / non-LLM work); here we pin the -// function-level strip + report. -func TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred(t *testing.T) { +// TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy proves the +// corrected-model behavior for "disabled": the tenant's own global-scope LLM +// cred is NOT stripped and the CP proxy is NOT forced. "disabled" means the +// workspace runs no platform-billed LLM, but the tenant's own credential is +// still the tenant's to keep; the caller's fail-closed abort is byok-only so a +// disabled workspace boots regardless. The previous internal#711 behavior +// stripped the global cred here on the same inverted premise; that strip is +// removed. +// +// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the non-platform +// branch → the oauth disappears → this test RED on the survival assertion. +func TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy(t *testing.T) { const wsID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" mock := setupTestDB(t) mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`). @@ -1374,31 +1422,33 @@ func TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred(t *test t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged) envVars := map[string]string{ - "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH", } - globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") - if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { - t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN must be stripped for disabled mode too") + // The tenant's own global cred survives (not stripped). + if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" { + t.Fatalf("tenant's own global cred must survive for disabled mode; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"]) + } + // No proxy forced for disabled. + if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok { + t.Fatalf("disabled must not inject the platform usage token") } if res.ResolvedMode != LLMBillingModeDisabled { t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeDisabled) } - if res.HasUsableLLMCred { - t.Fatalf("HasUsableLLMCred = true, want false") - } if err := mock.ExpectationsWereMet(); err != nil { t.Errorf("unmet sqlmock expectations: %v", err) } } // TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds is -// the no-regression guard for the OTHER side of the gate (internal#711): a -// platform-managed workspace MUST still receive the platform's creds. Here -// the proxy IS configured, so the contract is the existing one — the global -// OAuth token is replaced by the proxy usage token (HasUsableLLMCred=true). +// the no-regression guard for the metered platform_managed path +// (molecule-core#1994): a platform-managed workspace MUST still strip any +// direct oauth and route through the CP proxy. The direct OAuth token is +// replaced by the proxy usage token (HasUsableLLMCred=true). This path is +// UNCHANGED by the byok strip removal — only the byok/disabled branch changed. func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *testing.T) { const wsID = "99999999-9999-9999-9999-999999999999" mock := setupTestDB(t) @@ -1412,12 +1462,11 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *t t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token") envVars := map[string]string{ - "CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN": "DIRECT-OAUTH-TOKEN", "MODEL": "opus", } - globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}} - res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "") + res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") // Platform-managed routes through the CP proxy: OAuth stripped, proxy creds forced. if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok { @@ -1458,7 +1507,7 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode(t *tes "CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token", "MODEL": "sonnet", } - applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "") + applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "") // OAuth stripped, proxy forced — unchanged platform_managed contract. if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {