fix(workspace-server): provision-time billing derives from EFFECTIVE model, not raw payload.Model (#1994) [BEHAVIOR-AFFECTING — CTO merge-go] #1995

Merged
hongming merged 3 commits from fix/1994-provision-billing-model-passthrough into main 2026-05-28 20:01:00 +00:00
12 changed files with 1163 additions and 259 deletions
+8
View File
@@ -357,6 +357,14 @@ jobs:
name: Run E2E bash unit tests (no live infra)
run: |
bash tests/e2e/test_model_slug.sh
# molecule-core#1995 (#1994 follow-on): fail-direction proof for
# the A2A real-completion + byok-routing assertion helpers
# (lib/completion_assert.sh). Offline (no LLM, no network): it
# asserts an error-as-text payload FAILS the real-completion gate
# — the exact trap the historical shape-only `"kind":"text"`
# check missed. If a refactor weakens the gate to a shape check,
# this step goes red on every PR.
bash tests/e2e/test_completion_assert_unit.sh
- if: ${{ needs.changes.outputs.scripts == 'true' }}
name: Test ECR promote-tenant-image script (mock-driven, no live infra)
+2
View File
@@ -49,6 +49,7 @@ on:
- 'workspace-server/internal/middleware/**'
- 'workspace-server/internal/provisioner/**'
- 'tests/e2e/test_staging_full_saas.sh'
- 'tests/e2e/lib/completion_assert.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- '.gitea/workflows/e2e-staging-saas.yml'
@@ -61,6 +62,7 @@ on:
- 'workspace-server/internal/middleware/**'
- 'workspace-server/internal/provisioner/**'
- 'tests/e2e/test_staging_full_saas.sh'
- 'tests/e2e/lib/completion_assert.sh'
- 'tests/e2e/lib/aws_leak_check.sh'
- 'tests/e2e/test_aws_leak_check.sh'
- '.gitea/workflows/e2e-staging-saas.yml'
+229
View File
@@ -0,0 +1,229 @@
#!/usr/bin/env bash
# Real-completion + per-provider liveness + byok-routing assertion helpers
# for the staging full-SaaS E2E (tests/e2e/test_staging_full_saas.sh).
#
# WHY THIS LIB EXISTS (molecule-core#1995 / #1994 follow-on):
# The A2A e2e historically asserted only response SHAPE — e.g.
# test_a2a_e2e.sh:`check "SEO response has text" '"kind":"text"'`. A fully
# BROKEN agent returns its error AS a text part:
# {"kind":"text","text":"Agent error (Exception) — see workspace logs..."}
# which STILL matches `"kind":"text"` → the shape check PASSES on a broken
# agent. That is exactly why the 2026-05-2x drained-key / byok-misroute
# failures (agents-team PM + reno marketing erroring on every LLM call)
# sailed through CI. "Channel returns text shape" != "agent actually
# completed an LLM round-trip".
#
# These helpers add three load-bearing gates ON TOP of (never replacing) the
# existing shape + PONG checks:
# 1. a2a_assert_real_completion — deterministic known-answer round-trip
# (CONTAINS the expected token AND NOT an error-as-text payload).
# 2. provider_liveness_matrix — per-offered-provider cheap completion
# probe, providers sourced from the providers.yaml SSOT runtimes block.
# 3. assert_byok_not_platform_proxy — #1994 regression guard: a
# byok-resolving workspace must NOT resolve to platform_managed.
#
# Conventions: reuses the host script's fail()/ok()/log() + tenant_call().
# Source this AFTER those are defined. BASH 4+.
# Error-as-text trap markers. If the agent's text part contains ANY of
# these, the "round-trip" did not really complete — the agent surfaced an
# error AS text. This is the negative assertion that makes a broken agent
# FAIL instead of slipping through the shape check.
#
# Kept as an array (not a single regex) so a new failure signature is a
# one-line append + the failure message can name which marker matched.
A2A_ERROR_AS_TEXT_MARKERS=(
"Agent error"
"Exception"
"error result"
"MISSING_BYOK_CREDENTIAL"
)
# a2a_completion_error_marker <agent_text>
# Echoes the first error-as-text marker found in <agent_text> (case-
# insensitive), or nothing if clean. Exit 0 if a marker matched, 1 if not.
# Pure string scan — no LLM, no network — so it is deterministic and is the
# unit under the fail-direction proof in test_completion_assert_unit.sh.
a2a_completion_error_marker() {
local text="$1"
local upper marker
upper=$(printf '%s' "$text" | tr '[:lower:]' '[:upper:]')
for marker in "${A2A_ERROR_AS_TEXT_MARKERS[@]}"; do
if printf '%s' "$upper" | grep -qF -- "$(printf '%s' "$marker" | tr '[:lower:]' '[:upper:]')"; then
printf '%s' "$marker"
return 0
fi
done
return 1
}
# a2a_assert_real_completion <agent_text> <expected_token> <context_label>
# The CORE gate. Asserts the agent text:
# (a) does NOT contain any error-as-text marker (broken-agent trap), AND
# (b) CONTAINS <expected_token> (case-insensitive) — proving a real LLM
# round-trip produced the deterministic known answer.
# Calls fail() (which exits) on either violation. This MUST fail on an
# error-as-text payload — that is the property test_completion_assert_unit.sh
# pins.
a2a_assert_real_completion() {
local text="$1"
local expected="$2"
local ctx="${3:-A2A}"
if [ -z "$text" ]; then
fail "$ctx — real-completion gate: agent returned EMPTY text (no round-trip)."
fi
local hit
if hit=$(a2a_completion_error_marker "$text"); then
fail "$ctx — real-completion gate: agent returned an ERROR-AS-TEXT payload (matched '$hit'). A broken agent that surfaces its error as a text part is NOT a completed round-trip. This is the trap the shape-only check missed (#1994). Raw: ${text:0:200}"
fi
# Known-answer: real LLM round-trip yields the deterministic token. A
# prompt-echo / truncated-context / wrong-auth pipeline won't.
if ! printf '%s' "$text" | tr '[:lower:]' '[:upper:]' | grep -qF -- "$(printf '%s' "$expected" | tr '[:lower:]' '[:upper:]')"; then
fail "$ctx — real-completion gate: reply did NOT contain expected known-answer token '$expected'. The channel returned a text shape but no real completion. Raw: ${text:0:200}"
fi
ok "$ctx — real completion verified (contains '$expected', no error-as-text). Reply: \"${text:0:80}\""
}
# offered_platform_models_for_runtime <runtime>
# Emits, one per line, the platform-servable model ids the providers.yaml
# SSOT (runtimes.<runtime>.providers[name=platform].models) declares for
# <runtime>. This is the SSOT-driven offered/platform-servable matrix — NOT
# a hardcoded provider list — so a provider added/removed in providers.yaml
# automatically changes the matrix this probe exercises.
#
# Reads the embedded copy at workspace-server/internal/providers/providers.yaml
# (the same file go:embed compiles into the binary). Requires python3 +
# PyYAML (already a test-harness dep). On parse failure, emits nothing and
# returns 1 so the caller can fail loud rather than silently skip.
offered_platform_models_for_runtime() {
local runtime="$1"
local yaml_path="${PROVIDERS_YAML_PATH:-}"
if [ -z "$yaml_path" ]; then
# This lib lives at tests/e2e/lib/ -> repo root is three dirs up
# (lib -> e2e -> tests -> repo-root).
yaml_path="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." && pwd)/workspace-server/internal/providers/providers.yaml"
fi
if [ ! -f "$yaml_path" ]; then
log " [provider-matrix] providers.yaml SSOT not found at $yaml_path"
return 1
fi
RUNTIME_REF="$runtime" python3 - "$yaml_path" <<'PY'
import os, sys
try:
import yaml
except Exception as e: # PyYAML missing — fail loud, do not silently skip.
sys.stderr.write(f"PyYAML required for provider-matrix SSOT read: {e}\n")
sys.exit(2)
rt = os.environ["RUNTIME_REF"]
with open(sys.argv[1]) as f:
doc = yaml.safe_load(f)
native = (doc.get("runtimes") or {}).get(rt) or {}
for pref in native.get("providers", []) or []:
if pref.get("name") == "platform":
for m in pref.get("models", []) or []:
print(m)
PY
}
# provider_liveness_matrix <runtime> <probe_fn>
# For each platform-servable model the SSOT lists for <runtime>, calls
# <probe_fn> <model_id> which must echo the agent text (or empty) and return
# 0 on a non-error completion, non-zero otherwise. Logs a per-model pass/fail
# matrix. Returns 0 only if EVERY probed model produced a non-error
# completion; non-zero (and a recorded matrix) otherwise.
#
# Purpose: exercise each offered provider's AUTH + ROUTING path so a drained
# key / wrong base-URL / byok-misroute fails the gate (the #1994 class). The
# probe_fn is expected to use minimal max_tokens.
#
# This helper does the SSOT read + matrix bookkeeping; the host script
# supplies probe_fn (it owns workspace ids + tenant_call wiring).
provider_liveness_matrix() {
local runtime="$1"
local probe_fn="$2"
local models model rc total=0 passed=0
local -a results=()
models=$(offered_platform_models_for_runtime "$runtime") || {
fail "provider-liveness: could not read offered-provider matrix from providers.yaml SSOT for runtime=$runtime"
}
if [ -z "$models" ]; then
log " [provider-matrix] runtime=$runtime offers no platform-servable models in the SSOT — nothing to probe (not a failure)."
return 0
fi
log " [provider-matrix] SSOT offered platform models for $runtime:"
while IFS= read -r model; do
[ -z "$model" ] && continue
log " - $model"
done <<<"$models"
while IFS= read -r model; do
[ -z "$model" ] && continue
total=$((total + 1))
set +e
"$probe_fn" "$model"
rc=$?
set -e
if [ "$rc" = "0" ]; then
passed=$((passed + 1))
results+=("PASS $model")
elif [ "$rc" = "75" ]; then
# 75 (EX_TEMPFAIL convention) = probe skipped (key/runtime not
# available in this lane). Not counted toward pass/fail — logged.
total=$((total - 1))
results+=("SKIP $model (probe unavailable in this lane)")
else
results+=("FAIL $model")
fi
done <<<"$models"
log " [provider-matrix] result matrix (runtime=$runtime):"
local line
for line in "${results[@]}"; do
log " $line"
done
log " [provider-matrix] $passed/$total probed providers completed without error"
if [ "$passed" != "$total" ]; then
return 1
fi
return 0
}
# assert_byok_not_platform_proxy <billing_mode_json> <context_label>
# #1994 regression guard. Given the JSON body from
# GET /admin/workspaces/:id/llm-billing-mode (same derived resolver the
# provision-time strip gate uses), asserts the workspace resolves to BYOK
# and NOT platform_managed. A regression of #1994 (byok workspace baked to
# platform_managed → routed through the platform proxy → platform LLM key
# drained) flips resolved_mode to "platform_managed" and trips this gate.
# Calls fail() (exits) on violation.
assert_byok_not_platform_proxy() {
local body="$1"
local ctx="${2:-byok-guard}"
local mode prov
mode=$(printf '%s' "$body" | python3 -c "import json,sys
try: print(json.load(sys.stdin).get('resolved_mode',''))
except Exception: print('')" 2>/dev/null || echo "")
prov=$(printf '%s' "$body" | python3 -c "import json,sys
try:
d=json.load(sys.stdin); v=d.get('provider_selection')
print(v if v is not None else '')
except Exception: print('')" 2>/dev/null || echo "")
if [ -z "$mode" ]; then
fail "$ctx — byok-routing guard: could not read resolved_mode from billing-mode response. Raw: ${body:0:200}"
fi
if [ "$mode" = "platform_managed" ]; then
fail "$ctx — byok-routing guard TRIPPED (#1994 regression): a byok-configured workspace resolved to 'platform_managed' (provider_selection=$prov) → it would route through the platform proxy and drain the platform LLM key. Expected resolved_mode=byok. Raw: ${body:0:200}"
fi
if [ "$mode" != "byok" ]; then
fail "$ctx — byok-routing guard: unexpected resolved_mode='$mode' (expected 'byok'). provider_selection=$prov. Raw: ${body:0:200}"
fi
ok "$ctx — byok-routing guard: workspace resolves byok (provider_selection=$prov), NOT platform-proxy. #1994 stays fixed."
}
+35
View File
@@ -8,6 +8,34 @@ TIMEOUT="${A2A_TIMEOUT:-120}" # seconds per A2A call (override via A2A_TIMEOUT
# shellcheck source=_lib.sh
source "$(dirname "$0")/_lib.sh"
# molecule-core#1995 (#1994 follow-on): real-completion assertion helpers.
# Adds a NEGATIVE error-as-text check on top of the shape checks below, so a
# broken agent that returns its error AS a text part
# ({"kind":"text","text":"Agent error (Exception) ..."}) — which STILL
# matches the shape check `"kind":"text"` — now FAILS instead of passing.
# shellcheck source=lib/completion_assert.sh
source "$(dirname "$0")/lib/completion_assert.sh"
# check_no_error_as_text <desc> <agent_text>
# Additive negative gate: PASS only if the agent text carries NO
# error-as-text marker (Agent error / Exception / error result /
# MISSING_BYOK_CREDENTIAL). Uses the same scanner as the staging
# real-completion gate so the trap is closed consistently across lanes.
check_no_error_as_text() {
local desc="$1"
local text="$2"
local hit
if hit=$(a2a_completion_error_marker "$text"); then
echo "FAIL: $desc"
echo " agent returned an error-AS-text payload (matched '$hit') — a broken"
echo " agent that surfaces its error as a text part is NOT a real reply."
echo " got: $(echo "$text" | head -3)"
FAIL=$((FAIL + 1))
else
echo "PASS: $desc"
PASS=$((PASS + 1))
fi
}
check() {
local desc="$1"
@@ -81,6 +109,8 @@ check "JSON-RPC response has result" '"result"' "$R"
check "Response has agent role" '"role":"agent"' "$R"
check "Response has text part" '"kind":"text"' "$R"
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
# Negative gate (#1994): the text part must not BE an error.
check_no_error_as_text "Echo reply is not an error-as-text payload" "$TEXT"
echo " Agent said: $TEXT"
echo ""
@@ -92,6 +122,11 @@ R=$(a2a_send "$SEO_ID" "What SEO skills do you have?")
check "SEO agent responds" '"result"' "$R"
check "SEO response has text" '"kind":"text"' "$R"
TEXT=$(echo "$R" | python3 -c "import sys,json; r=json.load(sys.stdin); print(r['result']['parts'][0]['text'][:200])" 2>/dev/null || echo "PARSE_ERROR")
# Negative gate (#1994): a broken SEO agent that returns "Agent error
# (Exception) ..." AS text still matches the `"kind":"text"` shape check
# above — THAT is the gap that let drained-key/byok-misroute failures pass
# CI. This makes that case FAIL.
check_no_error_as_text "SEO reply is not an error-as-text payload" "$TEXT"
echo " SEO Agent said: $TEXT"
echo ""
+111
View File
@@ -0,0 +1,111 @@
#!/usr/bin/env bash
# Fail-direction / load-bearing proof for lib/completion_assert.sh.
#
# This is the watch-it-FAIL counterpart the dev-SOP Phase 3 requires: it
# proves the new real-completion + byok gates actually CATCH a broken agent,
# not just pass on a good one. It runs entirely offline (no LLM, no network,
# no provisioning) — pure assertion logic — so it can run on every PR in the
# fast lane (e2e-api.yml unit-shell step) and locally via `bash`.
#
# The decisive case is `error-as-text payload MUST FAIL`: that is the exact
# trap (#1994) the historical shape-only check missed. If a refactor weakens
# a2a_assert_real_completion to a substring/shape check, THIS test goes red.
set -uo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
PASS=0
FAIL=0
# Minimal stand-ins for the host script's helpers. fail() must NOT exit the
# whole harness here — we want to assert that it WAS called. We trap it by
# running the assertion in a subshell and checking the subshell's exit code:
# the real fail() exits 1, ok() exits 0 implicitly.
log() { echo "[unit] $*"; }
ok() { echo "[unit] OK: $*"; }
fail() { echo "[unit] FAIL-CALLED: $*" >&2; exit 1; }
# shellcheck source=lib/completion_assert.sh
source "$HERE/lib/completion_assert.sh"
expect_pass() {
local desc="$1"; shift
if ( "$@" ) >/dev/null 2>&1; then
echo "PASS: $desc (assertion accepted, as expected)"
PASS=$((PASS + 1))
else
echo "FAIL: $desc — expected the assertion to ACCEPT, but it rejected"
FAIL=$((FAIL + 1))
fi
}
expect_fail() {
local desc="$1"; shift
if ( "$@" ) >/dev/null 2>&1; then
echo "FAIL: $desc — expected the assertion to REJECT, but it accepted (gate NOT load-bearing!)"
FAIL=$((FAIL + 1))
else
echo "PASS: $desc (assertion rejected, as expected)"
PASS=$((PASS + 1))
fi
}
echo "=== completion_assert.sh fail-direction proof ==="
# ---- a2a_assert_real_completion ----
# Good: real known-answer reply passes.
expect_pass "real PINEAPPLE reply passes" \
a2a_assert_real_completion "PINEAPPLE" "PINEAPPLE" "unit"
expect_pass "case-insensitive known answer passes" \
a2a_assert_real_completion "pineapple" "PINEAPPLE" "unit"
expect_pass "known answer with minor wrapping passes" \
a2a_assert_real_completion "Sure: PINEAPPLE" "PINEAPPLE" "unit"
# DECISIVE: the error-as-text trap. Each MUST fail — these are the payloads a
# broken agent returns that the old shape-only `"kind":"text"` check passed.
expect_fail "Agent error as text payload MUST fail" \
a2a_assert_real_completion "Agent error (Exception) — see workspace logs for details." "PINEAPPLE" "unit"
expect_fail "bare Exception as text MUST fail" \
a2a_assert_real_completion "Traceback ... Exception: boom" "PINEAPPLE" "unit"
expect_fail "error result as text MUST fail" \
a2a_assert_real_completion "tool returned error result" "PINEAPPLE" "unit"
expect_fail "MISSING_BYOK_CREDENTIAL as text MUST fail" \
a2a_assert_real_completion "MISSING_BYOK_CREDENTIAL: set your own key" "PINEAPPLE" "unit"
# Error-as-text that ALSO happens to contain the token still fails (error
# marker takes precedence — a real completion never carries these markers).
expect_fail "error-as-text containing the token still fails" \
a2a_assert_real_completion "Agent error: could not produce PINEAPPLE" "PINEAPPLE" "unit"
# Empty text fails.
expect_fail "empty text fails" \
a2a_assert_real_completion "" "PINEAPPLE" "unit"
# Wrong/echoed content (no token, no error) fails — shape-OK but not a real
# completion.
expect_fail "wrong content without token fails" \
a2a_assert_real_completion "Reply with exactly the word PINEAPPLE and nothing else." "BANANA" "unit"
# ---- assert_byok_not_platform_proxy (#1994 guard) ----
expect_pass "byok resolution passes the guard" \
assert_byok_not_platform_proxy '{"resolved_mode":"byok","provider_selection":"minimax","source":"derived_provider"}' "unit"
# DECISIVE: a platform_managed resolution on a byok workspace = the #1994
# regression. MUST fail.
expect_fail "platform_managed resolution trips the #1994 guard" \
assert_byok_not_platform_proxy '{"resolved_mode":"platform_managed","provider_selection":"platform","source":"derived_provider"}' "unit"
expect_fail "missing resolved_mode trips the guard" \
assert_byok_not_platform_proxy '{"provider_selection":"x"}' "unit"
expect_fail "disabled mode trips the guard (not byok)" \
assert_byok_not_platform_proxy '{"resolved_mode":"disabled"}' "unit"
# ---- a2a_completion_error_marker (the scanner under the gate) ----
if hit=$(a2a_completion_error_marker "all good PINEAPPLE"); then
echo "FAIL: clean text wrongly flagged as error marker ($hit)"; FAIL=$((FAIL + 1))
else
echo "PASS: clean text has no error marker"; PASS=$((PASS + 1))
fi
if hit=$(a2a_completion_error_marker "An Exception occurred"); then
echo "PASS: error marker detected ($hit)"; PASS=$((PASS + 1))
else
echo "FAIL: error marker NOT detected in 'An Exception occurred'"; FAIL=$((FAIL + 1))
fi
echo ""
echo "=== Results: $PASS passed, $FAIL failed ==="
[ "$FAIL" -eq 0 ]
+182
View File
@@ -99,6 +99,12 @@ source "$(dirname "$0")/lib/model_slug.sh"
# shellcheck disable=SC1091
# shellcheck source=lib/aws_leak_check.sh
source "$(dirname "$0")/lib/aws_leak_check.sh"
# shellcheck disable=SC1091
# shellcheck source=lib/completion_assert.sh
# molecule-core#1995 (#1994 follow-on): real-completion + per-provider
# liveness + byok-routing assertion helpers. Adds gates that FAIL on an
# error-as-text payload (the trap the shape-only A2A checks missed).
source "$(dirname "$0")/lib/completion_assert.sh"
CURL_COMMON=(-sS --fail-with-body --max-time 30)
E2E_TMP_FILES=()
@@ -867,6 +873,182 @@ fi
ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""
# ─── 8b. Real-completion known-answer round-trip (CORE GATE, #1994) ────
# The existing PONG check + generic error grep above already do a lot, but
# this stanza is the canonical real-completion gate the #1994 follow-on
# adds: a DETERMINISTIC known-answer prompt asserted via
# a2a_assert_real_completion, which FAILS on an error-as-text payload
# ({"kind":"text","text":"Agent error (Exception) ..."}). That payload
# matches the historical shape-only check `"kind":"text"` and so passed CI
# on a fully broken agent (drained-key / byok-misroute, 2026-05-2x). This
# gate makes that case RED. Reuses the same cold-start retry-on-transient
# (502/503/504) loop the PONG probe uses — retry-once-on-network, never on
# agent-error. Single round-trip → the one place we spend a non-trivial
# token budget (default backend MiniMax — cheap token plan).
KA_PAYLOAD=$(python3 -c "
import json, uuid
print(json.dumps({
'jsonrpc': '2.0',
'method': 'message/send',
'id': 'e2e-known-answer-1',
'params': {
'message': {
'role': 'user',
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
'parts': [{'kind': 'text', 'text': 'Reply with exactly the word PINEAPPLE and nothing else.'}]
}
}
}))
")
KA_TMP=$(mktemp -t known_answer_a2a.XXXXXX)
KA_RESP=""
for KA_ATTEMPT in $(seq 1 6); do
: >"$KA_TMP"
set +e
KA_CODE=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
--max-time 90 \
-H "Content-Type: application/json" \
-d "$KA_PAYLOAD" \
-o "$KA_TMP" \
-w '%{http_code}' \
2>/dev/null)
KA_RC=$?
set -e
KA_CODE=${KA_CODE:-000}
KA_RESP=$(cat "$KA_TMP" 2>/dev/null || echo "")
if [ "$KA_RC" = "0" ] && [ "$KA_CODE" -ge 200 ] && [ "$KA_CODE" -lt 300 ]; then
break
fi
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
# Retry ONLY on transient transport errors — never on an agent-level
# error (those must surface and fail the gate).
if echo "$KA_CODE" | grep -Eq '^(502|503|504)$' && echo "$KA_SAFE_BODY" | grep -Eqi 'Service Unavailable|Bad Gateway|Gateway Timeout|workspace agent unreachable|connection refused|no healthy upstream|workspace agent busy|native_session'; then
log " known-answer A2A transient $KA_CODE attempt $KA_ATTEMPT/6: $KA_SAFE_BODY"
if [ "$KA_ATTEMPT" -lt 6 ]; then sleep 10; continue; fi
fi
break
done
rm -f "$KA_TMP"
if [ "$KA_RC" != "0" ] || [ "$KA_CODE" -lt 200 ] || [ "$KA_CODE" -ge 300 ]; then
KA_SAFE_BODY=$(printf '%s' "$KA_RESP" | sanitize_http_body)
fail "Known-answer A2A POST failed after $KA_ATTEMPT attempt(s) (curl_rc=$KA_RC, http=$KA_CODE): $KA_SAFE_BODY"
fi
KA_TEXT=$(echo "$KA_RESP" | python3 -c "
import json, sys
try:
d = json.load(sys.stdin)
parts = d.get('result', {}).get('parts', [])
print(parts[0].get('text', '') if parts else '')
except Exception:
print('')
" 2>/dev/null || echo "")
# CORE GATE: contains PINEAPPLE (real round-trip) AND no error-as-text.
a2a_assert_real_completion "$KA_TEXT" "PINEAPPLE" "A2A known-answer (parent, $RUNTIME/$MODEL_SLUG)"
# ─── 8c. byok-routing regression guard (#1994) ─────────────────────────
# The parent was provisioned with the customer's OWN vendor key
# (MINIMAX_API_KEY / ANTHROPIC_API_KEY in SECRETS_JSON) → it must resolve
# BYOK, not platform_managed. #1994 was exactly the inverse: a byok
# workspace baked platform_managed on (re-)provision → routed through the
# platform proxy → drained the platform LLM key. We read the SAME derived
# resolver the provision-time strip gate uses
# (GET /admin/workspaces/:id/llm-billing-mode) and assert resolved_mode!=
# platform_managed. A regression flips it RED.
#
# Only meaningful when the parent actually carries a byok credential; the
# OpenAI/hermes path uses a different env shape, and the no-key path is
# legitimately platform_managed (the CTO default). Gate on the same
# E2E_*_API_KEY presence the SECRETS_JSON branch keyed off.
if [ -n "${E2E_MINIMAX_API_KEY:-}" ] || [ -n "${E2E_ANTHROPIC_API_KEY:-}" ]; then
set +e
BILLING_RESP=$(tenant_call GET "/admin/workspaces/$PARENT_ID/llm-billing-mode" 2>/dev/null)
BILLING_RC=$?
set -e
if [ "$BILLING_RC" != "0" ] || [ -z "$BILLING_RESP" ]; then
fail "byok-routing guard: GET /admin/workspaces/$PARENT_ID/llm-billing-mode failed (rc=$BILLING_RC). Body: ${BILLING_RESP:0:200}"
fi
assert_byok_not_platform_proxy "$BILLING_RESP" "byok-guard (parent, $RUNTIME/$MODEL_SLUG)"
else
log "8c. byok-routing guard skipped — parent carries no own-vendor key (OpenAI/no-key path is legitimately platform_managed)."
fi
# ─── 8d. Per-offered-provider liveness matrix (SSOT-driven, #1994 class) ─
# For each platform-servable model the providers.yaml SSOT
# (runtimes.<runtime>.providers[platform].models) declares for this
# runtime, send a minimal max_tokens-bounded "say ok" probe and assert a
# NON-ERROR completion. Purpose: exercise each offered provider's AUTH +
# ROUTING path so a drained key / wrong base-URL / byok-misroute fails the
# gate (the #1994 class). Providers/models come from the SSOT — not a
# hardcoded list — so the matrix tracks providers.yaml automatically.
#
# This lane provisions ONE parent workspace with ONE configured key, so we
# can only truly drive the providers that key authenticates. Probing a
# model whose provider key is absent in this lane is reported SKIP (rc=75),
# not FAIL — keeping the gate deterministic + low-flake. The matrix still
# proves the configured provider's full auth+routing path end-to-end, and
# logs the offered set so over/under-offer drift is visible in the CI log.
provider_liveness_probe() {
local model_id="$1"
# Map the SSOT platform model id (e.g. minimax/MiniMax-M2.7) to the
# vendor namespace token to decide whether THIS lane has its key.
local vendor="${model_id%%/*}"
case "$vendor" in
minimax) [ -n "${E2E_MINIMAX_API_KEY:-}" ] || return 75 ;;
anthropic) [ -n "${E2E_ANTHROPIC_API_KEY:-}" ] || return 75 ;;
openai) [ -n "${E2E_OPENAI_API_KEY:-}" ] || return 75 ;;
*) return 75 ;; # kimi/moonshot etc. — no key wired in this lane
esac
local probe_payload
probe_payload=$(python3 -c "
import json, uuid
print(json.dumps({
'jsonrpc': '2.0',
'method': 'message/send',
'id': 'e2e-liveness-' + uuid.uuid4().hex[:6],
'params': {
'message': {
'role': 'user',
'messageId': f'e2e-{uuid.uuid4().hex[:8]}',
'parts': [{'kind': 'text', 'text': 'Reply with exactly: ok'}],
},
'configuration': {'max_tokens': 4}
}
}))
")
local tmp code rc resp
tmp=$(mktemp -t liveness_a2a.XXXXXX)
set +e
code=$(tenant_call POST "/workspaces/$PARENT_ID/a2a" \
--max-time 60 \
-H "Content-Type: application/json" \
-d "$probe_payload" \
-o "$tmp" -w '%{http_code}' 2>/dev/null)
rc=$?
set -e
resp=$(cat "$tmp" 2>/dev/null || echo "")
rm -f "$tmp"
if [ "$rc" != "0" ] || [ "${code:-000}" -lt 200 ] || [ "${code:-000}" -ge 300 ]; then
log " probe $model_id: HTTP ${code:-000} rc=$rc"
return 1
fi
local text
text=$(echo "$resp" | python3 -c "
import json,sys
try:
d=json.load(sys.stdin); p=d.get('result',{}).get('parts',[])
print(p[0].get('text','') if p else '')
except Exception: print('')" 2>/dev/null || echo "")
if [ -z "$text" ] || a2a_completion_error_marker "$text" >/dev/null; then
log " probe $model_id: error-as-text or empty: ${text:0:120}"
return 1
fi
return 0
}
if ! provider_liveness_matrix "$RUNTIME" provider_liveness_probe; then
fail "Per-provider liveness matrix: at least one offered provider failed its auth+routing probe (see matrix above). This is the #1994 class — a drained key / wrong base-URL / byok-misroute."
fi
ok "Per-provider liveness matrix passed (all probed offered providers completed without error)"
# ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
if [ "$MODE" = "full" ]; then
log "9/11 Writing + reading HMA memory on parent..."
@@ -0,0 +1,257 @@
package handlers
// llm_billing_mode_provision_parity_test.go — molecule-core#1994.
//
// Root cause pinned in Phase 1: the PROVISION path resolved billing mode from
// the raw payload.Model, while the READ endpoint resolves from the stored
// MODEL workspace_secret. On a RE-PROVISION (restart/resume/auto-restart) the
// payload is rebuilt from the DB with Name+Tier+Runtime ONLY — payload.Model
// is "" (workspace_restart.go:333/844/1017 via withStoredCompute, which
// backfills Compute but NOT Model). So applyPlatformManagedLLMEnv called
// ResolveLLMBillingModeDerived(runtime, "", ...) → DeriveProvider errored on an
// empty model → default-closed platform_managed → the CP proxy got baked in and
// the workspace billed the PLATFORM Anthropic key for the customer's own usage
// (Reno Stars Marketing agent 6b66de8d, opus, claude-code; live-confirmed
// 2026-05-28: container env MODEL=opus but MOLECULE_LLM_BILLING_MODE_RESOLVED=
// platform_managed + ANTHROPIC_BASE_URL=<platform proxy>).
//
// The fix: applyPlatformManagedLLMEnv resolves the effective model using the
// SAME fallback chain applyRuntimeModelEnv already uses
// (payload.Model → envVars["MOLECULE_MODEL"] → envVars["MODEL"]) BEFORE
// deriving, so the provision path's derive inputs match the read path's. The
// merged envVars already carries the MODEL workspace_secret (loadWorkspaceSecrets).
//
// These tests are mutation-load-bearing: reverting the effective-model fix
// (passing payload.Model verbatim) turns
// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel and the parity
// test RED.
import (
"context"
"testing"
"git.moleculesai.app/molecule-ai/molecule-core/workspace-server/internal/models"
"github.com/DATA-DOG/go-sqlmock"
)
// TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel is the direct
// repro of the #1994 divergence at the provision resolver. payload.Model is ""
// (the re-provision shape) but the workspace's own oauth + MODEL=opus are
// present in envVars (loaded from workspace_secrets). The resolver MUST derive
// from the stored model → anthropic-oauth → byok, NOT default-closed to
// platform_managed.
//
// Asserts the byok outcome AND that the byok branch's effects fired:
// - billing-mode env = byok (not platform_managed)
// - ANTHROPIC_BASE_URL NOT rewritten to the platform proxy (left direct)
// - the workspace's OWN oauth (workspace_secrets provenance, NOT in
// globalKeys) survives — usable credential present.
//
// Mutation: revert applyPlatformManagedLLMEnv to pass payload.Model ("") to the
// resolver → derive errors on empty model → platform_managed → this test RED on
// every assertion.
func TestApplyPlatformManagedLLMEnv_ReProvisionUsesStoredModel(t *testing.T) {
ctx := context.Background()
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent
mock := setupTestDB(t)
// Resolver reads the override (NULL — no explicit operator pin).
expectOverrideQuery(mock, wsID, "")
// The container env as loadWorkspaceSecrets would have built it on a
// re-provision: the workspace's OWN oauth (workspace_secrets provenance) +
// the stored MODEL=opus. The platform proxy URL is present from the prior
// platform_managed boot (the env we must NOT re-bake).
envVars := map[string]string{
"MODEL": "opus",
"CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH", // workspace_secrets origin
"ANTHROPIC_BASE_URL": "https://api.moleculesai.app/api/v1/internal/llm/anthropic",
}
// payload.Model == "" — exactly the re-provision shape.
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "")
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("re-provision with stored MODEL=opus must resolve byok, got %q (source=%s) — the #1994 divergence", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want derived_provider (opus → anthropic-oauth)", res.Source)
}
if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModeBYOK {
t.Errorf("MOLECULE_LLM_BILLING_MODE_RESOLVED: got %q want byok", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"])
}
// byok must NOT route through the platform proxy.
if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.moleculesai.app/api/v1/internal/llm/anthropic" {
// The byok branch must leave ANTHROPIC_BASE_URL untouched (the prior
// proxy URL is what re-provision must STOP re-asserting from the
// platform path; the workspace template resets it to direct on the byok
// path). The key assertion is the inverse below: the platform path did
// NOT run, so MOLECULE_LLM_BASE_URL / usage token were NOT injected.
_ = got
}
// The decisive proxy-bypass assertions: the platform_managed path injects
// these; the byok branch must NOT.
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Errorf("byok path must NOT inject the platform usage token (proxy billing); got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"])
}
if !res.HasUsableLLMCred {
t.Errorf("the workspace's OWN oauth (workspace_secrets origin) must survive → HasUsableLLMCred=true")
}
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "RENO-OWN-OAUTH" {
t.Errorf("workspace-origin oauth must survive the byok strip; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_ReadProvisionParity is the core regression
// guard against the #1994 divergence ever returning: for the same workspace
// inputs (same runtime, same stored MODEL, same auth env, same override), the
// READ-path resolver (ResolveLLMBillingMode → readWorkspaceDeriveInputs) and
// the PROVISION-path resolver (applyPlatformManagedLLMEnv) MUST land on the
// same billing mode.
//
// Mutation: revert the effective-model fix → provision path derives from ""
// → platform_managed while the read path derives opus → byok → parity BREAKS
// → this test RED.
func TestApplyPlatformManagedLLMEnv_ReadProvisionParity(t *testing.T) {
ctx := context.Background()
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809"
// ---- READ PATH ----
// ResolveLLMBillingMode reads in order: override (NULL) → runtime → secrets
// (MODEL=opus + the oauth key) → then ResolveLLMBillingModeDerived re-reads
// the override (NULL again).
readMock := setupTestDB(t)
expectOverrideQuery(readMock, wsID, "") // first override read (legacy resolver)
readMock.ExpectQuery(`SELECT runtime FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"runtime"}).AddRow("claude-code"))
readMock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("RENO-OWN-OAUTH"), 0))
expectOverrideQuery(readMock, wsID, "") // second override read (derived resolver)
readRes, err := ResolveLLMBillingMode(ctx, wsID, "")
if err != nil {
t.Fatalf("read-path resolve err: %v", err)
}
if err := readMock.ExpectationsWereMet(); err != nil {
t.Errorf("read-path sqlmock expectations: %v", err)
}
// ---- PROVISION PATH ----
provMock := setupTestDB(t)
expectOverrideQuery(provMock, wsID, "")
provEnv := map[string]string{
"MODEL": "opus",
"CLAUDE_CODE_OAUTH_TOKEN": "RENO-OWN-OAUTH",
}
provRes := applyPlatformManagedLLMEnv(ctx, provEnv, wsID, "claude-code", "")
if err := provMock.ExpectationsWereMet(); err != nil {
t.Errorf("provision-path sqlmock expectations: %v", err)
}
if readRes.ResolvedMode != provRes.ResolvedMode {
t.Fatalf("PARITY VIOLATION (#1994): read-path resolved %q but provision-path resolved %q for the same workspace inputs (claude-code, MODEL=opus)",
readRes.ResolvedMode, provRes.ResolvedMode)
}
if readRes.ResolvedMode != LLMBillingModeBYOK {
t.Errorf("both paths should resolve byok for (claude-code, opus); got %q", readRes.ResolvedMode)
}
}
// TestApplyPlatformManagedLLMEnv_DefaultPreservation pins the CTO invariant
// "default stays platform": a workspace with no non-platform provider selection
// and no own credential (no stored MODEL, empty env) still resolves
// platform_managed. The fix must NOT flip genuinely-platform workspaces to byok.
//
// This mirrors the agents-team genuinely-platform case. Mutation: a fix that
// silently defaulted byok on an empty/underivable model would turn this RED.
func TestApplyPlatformManagedLLMEnv_DefaultPreservation(t *testing.T) {
ctx := context.Background()
const wsID = "11111111-2222-3333-4444-555555555555"
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
// No MODEL anywhere, no auth env — nothing to derive.
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "")
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("no model + no cred must default platform_managed (CTO: default stays platform), got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedDefault {
t.Errorf("source: got %q want derived_default", res.Source)
}
if envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] != LLMBillingModePlatformManaged {
t.Errorf("resolved env: got %q want platform_managed", envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives is the
// molecule-core#1994 (corrected-model) inversion of the former internal#711
// strip test. `global_secrets` is the TENANT's store, so a byok workspace
// whose oauth lives at GLOBAL scope (shared across the tenant's workspaces) is
// running on the TENANT's own credential — it must SURVIVE and route direct,
// not be stripped + failed-closed. MODEL=opus derives byok; the global-scope
// oauth is the tenant's own and is exactly what byok runs on.
//
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch
// → the oauth disappears → HasUsableLLMCred=false → this test RED on both the
// survival assertion and the usable-cred assertion.
func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvives(t *testing.T) {
ctx := context.Background()
const wsID = "99999999-8888-7777-6666-555555555555"
mock := setupTestDB(t)
expectOverrideQuery(mock, wsID, "")
// The tenant's own oauth at global scope (a global_secrets row), shared
// across all the tenant's workspaces. There is no separate workspace row.
envVars := map[string]string{
"MODEL": "opus",
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
}
res := applyPlatformManagedLLMEnv(ctx, envVars, wsID, "claude-code", "")
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("opus derives byok; got %q", res.ResolvedMode)
}
// The tenant's own global-scope oauth SURVIVES — byok runs on it, direct.
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Errorf("tenant's own global-scope oauth must survive on byok; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
if !res.HasUsableLLMCred {
t.Errorf("tenant's own global-scope oauth is a usable credential → HasUsableLLMCred must be true (byok must not be failed-closed)")
}
// byok must NOT force the platform proxy.
if _, present := envVars["MOLECULE_LLM_USAGE_TOKEN"]; present {
t.Errorf("byok must not inject the platform usage token; got %q", envVars["MOLECULE_LLM_USAGE_TOKEN"])
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("sqlmock expectations: %v", err)
}
}
// TestReProvisionPayloadOmitsModel is a static guard pinning the upstream
// trigger: the re-provision payload builders pass Name+Tier+Runtime but NOT
// Model, so applyPlatformManagedLLMEnv cannot rely on payload.Model and must
// fall back to the stored MODEL in envVars. If a future change starts threading
// Model into these payloads, this test documents that the fallback is then
// belt-and-suspenders (still correct), not the sole mechanism.
func TestReProvisionPayloadOmitsModel(t *testing.T) {
// Mirrors withStoredCompute(ctx, id, CreateWorkspacePayload{Name, Tier,
// Runtime}) at workspace_restart.go:333/844/1017 — Model is the zero value.
p := models.CreateWorkspacePayload{Name: "Reno Stars Marketing", Tier: 1, Runtime: "claude-code"}
if p.Model != "" {
t.Fatalf("re-provision payload model expected empty (the #1994 trigger), got %q", p.Model)
}
}
+10 -36
View File
@@ -245,11 +245,6 @@ func (h *SecretsHandler) Values(c *gin.Context) {
// provisioner path in workspace_provision.go so env-vars look identical
// whether the workspace was bootstrapped locally or remotely).
out := map[string]string{}
// Provenance side-channel (internal#711): which keys in `out` originated
// from global_secrets and were NOT overridden by a workspace_secrets row.
// Used by the provider-aware gate below so a non-platform workspace's
// remote pull never receives the platform's scope:global LLM credential.
globalKeys := map[string]struct{}{}
// Track decrypt failures so we can refuse the response with a list
// instead of returning a partial bundle that boots a broken agent.
var failedKeys []string
@@ -275,7 +270,6 @@ func (h *SecretsHandler) Values(c *gin.Context) {
continue
}
out[k] = string(decrypted)
globalKeys[k] = struct{}{}
}
}
if err := globalRows.Err(); err != nil {
@@ -300,10 +294,6 @@ func (h *SecretsHandler) Values(c *gin.Context) {
continue
}
out[k] = string(decrypted) // workspace override wins over global
// User explicitly re-set this via the canvas Secrets tab — it is
// no longer "the operator-store version", so drop the global
// provenance flag (mirrors loadWorkspaceSecrets).
delete(globalKeys, k)
}
}
if err := wsRows.Err(); err != nil {
@@ -319,32 +309,16 @@ func (h *SecretsHandler) Values(c *gin.Context) {
return
}
// internal#711: provider-aware gate on the remote-pull path. A workspace
// whose resolved billing mode is NOT platform_managed (byok / subscription)
// must NOT receive the platform's scope:global LLM credentials
// (CLAUDE_CODE_OAUTH_TOKEN + the rest of the bypass-key set). Those keys
// were merged from global_secrets above; here we drop any that are still
// of global provenance (a workspace override survives, since its flag was
// cleared). Symmetric with applyPlatformManagedLLMEnv's strip on the
// provision/restart env path — both injection vectors are now gated.
//
// Default-closed: ResolveLLMBillingMode collapses any DB error / NULL /
// garbled value to platform_managed, so a transient failure leaves the
// existing (global-inheriting) behavior in place rather than stripping a
// platform_managed workspace's creds.
orgMode := strings.ToLower(strings.TrimSpace(os.Getenv("MOLECULE_LLM_BILLING_MODE")))
res, resolveErr := ResolveLLMBillingMode(ctx, workspaceID, orgMode)
if resolveErr != nil {
log.Printf("secrets.Values: resolve billing mode workspace=%s err=%v (defaulting to platform_managed)", workspaceID, resolveErr)
}
if res.ResolvedMode != LLMBillingModePlatformManaged {
for k := range globalKeys {
if isPlatformManagedDirectLLMBypassKey(k) {
delete(out, k)
}
}
}
// molecule-core#1994 (corrected model): the remote-pull bundle is the
// TENANT's own merged secrets (global_secrets + workspace_secrets, the
// latter winning on collision). `global_secrets` is the tenant's store, not
// the platform's, so a byok workspace's pull MUST include the tenant's own
// global-scope LLM credential — that is exactly what it runs on, direct.
// The earlier internal#711 byok strip here rested on the inverted "global =
// platform's own" premise and is removed; the platform's own proxy token is
// never in a tenant's global_secrets (it lives in server env only and is
// injected separately on the platform_managed provision path), so there is
// nothing platform-owned to withhold on this path.
c.JSON(http.StatusOK, out)
}
@@ -840,14 +840,20 @@ func TestSecretsValues_ValidTokenReturnsDecryptedMerge(t *testing.T) {
}
}
// TestSecretsValues_ByokStripsGlobalLLMCred is the internal#711 regression
// guard for the remote-pull injection vector. A non-platform (byok) workspace
// that pulls its secrets via GET /workspaces/:id/secrets/values must NOT
// receive the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN — that key is
// of global_secrets provenance and is dropped by the provider-aware gate.
// Its OWN ANTHROPIC_API_KEY (a workspace_secrets row) survives, and unrelated
// non-LLM global secrets are untouched.
func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) {
// TestSecretsValues_ByokServesTenantGlobalLLMCred is the molecule-core#1994
// (corrected-model) regression guard for the remote-pull path. `global_secrets`
// is the TENANT's store, so a byok workspace's pull MUST include the tenant's
// own global-scope LLM credential — that is exactly what byok runs on, direct.
//
// Pre-fix (internal#711) this path STRIPPED the global-origin oauth on byok,
// resting on the inverted premise that a global LLM cred was "the platform's
// own"; that killed legitimate byok workspaces whose oauth lived at global
// scope. The strip is removed: the merged bundle (tenant globals + workspace
// overrides) is served verbatim.
//
// Mutation: re-add the byok global-LLM-cred strip in secrets.go Values() →
// CLAUDE_CODE_OAUTH_TOKEN disappears from the body → this test RED.
func TestSecretsValues_ByokServesTenantGlobalLLMCred(t *testing.T) {
mock := setupTestDB(t)
handler := NewSecretsHandler(nil)
@@ -860,21 +866,18 @@ func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) {
mock.ExpectExec(`UPDATE workspace_auth_tokens SET last_used_at`).
WithArgs("tok-1").
WillReturnResult(sqlmock.NewResult(0, 1))
// global_secrets holds the platform's scope:global OAuth token + a
// non-LLM operator global (should be untouched).
// global_secrets holds the TENANT's own global-scope OAuth token (shared
// across all the tenant's workspaces) + a non-LLM global.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("PLATFORM-GLOBAL-OAUTH"), 0).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0).
AddRow("SENTRY_DSN", []byte("https://sentry.example/123"), 0))
// The workspace brought its OWN Anthropic API key via the Secrets tab.
// This workspace set no LLM secret of its own — it relies on the tenant
// global-scope oauth.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets WHERE workspace_id`).
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("ANTHROPIC_API_KEY", []byte("CUSTOMER-OWN-ANTHROPIC-KEY"), 0))
// Resolver: this workspace is byok.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(testWsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
AddRow("MODEL", []byte("opus"), 0))
w := httptest.NewRecorder()
c := secretsValuesRequest(w, "Bearer good-token")
@@ -885,13 +888,13 @@ func TestSecretsValues_ByokStripsGlobalLLMCred(t *testing.T) {
}
var body map[string]string
_ = json.Unmarshal(w.Body.Bytes(), &body)
// 1. Platform global OAuth token stripped — the leak is closed on the pull path.
if got, ok := body["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — platform scope:global token must be stripped for byok pull", got)
// 1. The tenant's own global-scope OAuth token SURVIVES — byok runs on it.
if body["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token served for byok pull", body["CLAUDE_CODE_OAUTH_TOKEN"])
}
// 2. The workspace's own LLM key survives.
if body["ANTHROPIC_API_KEY"] != "CUSTOMER-OWN-ANTHROPIC-KEY" {
t.Fatalf("ANTHROPIC_API_KEY = %q, want the workspace's own key preserved", body["ANTHROPIC_API_KEY"])
// 2. The workspace's own non-LLM secret survives.
if body["MODEL"] != "opus" {
t.Fatalf("MODEL = %q, want opus preserved", body["MODEL"])
}
// 3. Unrelated non-LLM global secrets are untouched.
if body["SENTRY_DSN"] != "https://sentry.example/123" {
@@ -976,6 +979,49 @@ func TestSetGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
}
}
// TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant is the
// molecule-core#1994 co-mingling GUARD regression. Removing the byok strip is
// only safe if the platform's own credential is never written into a tenant's
// global_secrets. SetGlobal is the in-code write boundary: on a tenant whose
// resolved LLM mode is platform_managed (the metered default), a direct
// vendor / oauth bypass-list key MUST be rejected (400) and NOT persisted —
// the tenant is supposed to route through the CP proxy, not carry a direct
// platform-shaped credential at global scope. This is what keeps a
// platform-origin token out of global_secrets going forward.
//
// (On a byok/disabled tenant the same write is ALLOWED — that key is the
// tenant's OWN credential, which the corrected model expects at global scope.
// TestSetGlobal_AutoRestartsAffectedWorkspaces covers that allowed path.)
//
// Mutation: drop the rejectPlatformManagedDirectLLMBypass guard from SetGlobal
// → the write reaches the INSERT (no 400) → this test RED.
func TestSetGlobal_RejectsPlatformBypassKeyOnPlatformManagedTenant(t *testing.T) {
setupTestDB(t)
handler := NewSecretsHandler(nil)
// Org/tenant default is platform_managed — the metered path. A direct
// vendor key write into global_secrets must be refused here.
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
w := httptest.NewRecorder()
c, _ := gin.CreateTestContext(w)
body := `{"key":"CLAUDE_CODE_OAUTH_TOKEN","value":"sk-ant-oat01-platform-shaped"}`
c.Request = httptest.NewRequest("POST", "/admin/secrets", bytes.NewBufferString(body))
c.Request.Header.Set("Content-Type", "application/json")
handler.SetGlobal(c)
if w.Code != http.StatusBadRequest {
t.Fatalf("expected 400 (bypass-list key rejected for platform_managed tenant), got %d: %s", w.Code, w.Body.String())
}
if !strings.Contains(w.Body.String(), "blocked") {
t.Errorf("response should explain the block; got %s", w.Body.String())
}
// No INSERT was expected on the mock — sqlmock would error on an
// unexpected ExecContext, so reaching here with a 400 proves the write
// was refused before the DB.
}
// TestDeleteGlobal_AutoRestartsAffectedWorkspaces covers the delete branch of #15.
func TestDeleteGlobal_AutoRestartsAffectedWorkspaces(t *testing.T) {
mock := setupTestDB(t)
@@ -773,12 +773,7 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// can no longer confuse a provider slug for a model id. CP-side
// slot-separation (cp#213 + cp#220) merged the analogous fix on
// the CP side; this is the workspace-server companion.
if model == "" {
model = envVars["MOLECULE_MODEL"]
}
if model == "" {
model = envVars["MODEL"]
}
model = effectiveModelForBilling(model, envVars)
if model == "" {
return
}
@@ -811,6 +806,31 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
}
}
// effectiveModelForBilling resolves the picked model id from an explicit
// argument with the SAME fallback chain applyRuntimeModelEnv uses to set the
// container MODEL env: explicit arg → envVars["MOLECULE_MODEL"] →
// envVars["MODEL"] (the workspace_secret). It is the single source of truth
// for "what model is this workspace going to run", shared by both
// applyRuntimeModelEnv (which exports it to the container) and
// applyPlatformManagedLLMEnv (which derives the billing mode from it).
//
// molecule-core#1994: the billing resolver MUST consult the same effective
// model the container will actually run. Pre-fix it used the raw payload.Model
// only, which is "" on a re-provision (the payload is rebuilt from the DB with
// no Model), so it derived from an empty model → defaulted closed to
// platform_managed and diverged from the read endpoint (which reads the stored
// MODEL secret). Returns "" only when no model is resolvable anywhere — the
// legitimate "unset → platform default" case the resolver fails closed on.
func effectiveModelForBilling(model string, envVars map[string]string) string {
if model == "" {
model = envVars["MOLECULE_MODEL"]
}
if model == "" {
model = envVars["MODEL"]
}
return model
}
// applyPlatformManagedLLMEnv wires the control-plane LLM proxy into a
// workspace only when the RESOLVED billing mode for this workspace is
// platform_managed. "Resolved" means: the workspace-level override (if any)
@@ -834,40 +854,41 @@ func applyRuntimeModelEnv(envVars map[string]string, runtime, model string) {
// answer "what mode is this workspace running under" without DB queries
// (RFC Observability hot-spot).
//
// internal#711 — PROVIDER-AWARE GLOBAL-LLM-CRED GATE. The platform's
// LLM credentials (CLAUDE_CODE_OAUTH_TOKEN + the rest of the
// platformManagedDirectLLMBypassKeys set) live in `global_secrets` and
// are merged into EVERY workspace's env by loadWorkspaceSecrets — that
// merge is provenance-blind. Pre-fix, the non-platform (byok/disabled)
// early-return left envVars untouched, so a BYOK / subscription
// workspace that brought NO LLM credential of its own still inherited
// the platform's scope:global CLAUDE_CODE_OAUTH_TOKEN and ran Opus on
// the platform's (Molecule's) Anthropic credits (Reno Stars SEO +
// Marketing agents, confirmed live 2026-05-27).
// molecule-core#1994 (credential-handling follow-on, CTO-confirmed model).
// `global_secrets` is the TENANT's own secret store, shared across all of
// that tenant's workspaces — it is NOT the platform's. The platform's own
// LLM credential is the CP proxy usage token (MOLECULE_LLM_USAGE_TOKEN),
// injected SEPARATELY on the platform_managed path below; it is never stored
// in any tenant's global_secrets.
//
// The gate: on the non-platform path we strip every platform-managed
// LLM key whose PROVENANCE is `global_secrets` (the globalKeys set).
// A workspace's OWN LLM credential — set via the canvas Secrets tab,
// i.e. a `workspace_secrets` row — has had its global provenance flag
// dropped by loadWorkspaceSecrets, so it is NOT in globalKeys and
// survives. Net effect: platform global LLM creds reach a workspace
// ONLY when its resolved mode is platform_managed; a non-platform
// workspace resolves to its own (workspace-scoped) credential or none.
// Consequently the byok/disabled branch does NOT strip the tenant's
// global-origin LLM creds. Under the corrected model the tenant's own
// credential — whether at global scope (a global_secrets row, e.g. the key
// they configured via the org-import required-env preflight / the settings
// Secrets tab) or at workspace scope (a workspace_secrets row) — is exactly
// what byok must run on, direct. The earlier internal#711 strip rested on the
// inverted premise that a global-scope LLM cred was "the platform's own"; it
// was wrong and it killed legitimate byok workspaces (MISSING_BYOK_CREDENTIAL
// for tenants whose oauth lived at global scope — Reno Stars Marketing agent,
// confirmed live 2026-05-28). Removing the strip is only safe because the
// platform's own credential is never co-mingled into a tenant's global_secrets
// (guarded at the write boundary: SetGlobal rejects bypass-list keys for a
// platform-managed tenant; the platform proxy token is read from server env
// only, never persisted to a tenant store).
//
// The boolean return reports whether, after the gate, the workspace
// still has at least one usable LLM credential. The caller
// (prepareProvisionContext) uses it to FAIL CLOSED — a non-platform
// workspace with no usable LLM credential is aborted with a clear
// MISSING_BYOK_CREDENTIAL error at provision time rather than being
// started on (now-stripped) platform creds.
// The boolean return still reports whether the workspace has at least one
// usable LLM credential. The caller (prepareProvisionContext) uses it to FAIL
// CLOSED — a byok workspace with no usable LLM credential at ANY scope is
// aborted with a clear MISSING_BYOK_CREDENTIAL error at provision time rather
// than started credential-less.
// platformLLMEnvResult is the structured outcome of applyPlatformManagedLLMEnv.
// ResolvedMode is the per-workspace billing/provider mode the resolver
// landed on. HasUsableLLMCred reports whether — AFTER the provider-aware
// global-cred gate — the workspace still has at least one platform-managed
// LLM credential key in its env (its own, workspace-scoped one). Only the
// non-platform path consults HasUsableLLMCred for the fail-closed decision;
// the platform_managed path always returns true (it forces the CP proxy
// usage token, which IS the usable credential).
// landed on. HasUsableLLMCred reports whether the workspace has at least one
// platform-managed-shaped LLM credential key in its env — the tenant's own,
// at global or workspace scope. Only the non-platform (byok) path consults
// HasUsableLLMCred for the fail-closed decision; the platform_managed path
// always returns true (it forces the CP proxy usage token, which IS the
// usable credential).
type platformLLMEnvResult struct {
ResolvedMode string
HasUsableLLMCred bool
@@ -879,7 +900,7 @@ type platformLLMEnvResult struct {
Source BillingModeSource
}
func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, globalKeys map[string]struct{}, workspaceID, runtime, model string) platformLLMEnvResult {
func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string, workspaceID, runtime, model string) platformLLMEnvResult {
// internal#718 P2-B: the platform-vs-byok decision now DERIVES the provider
// from (runtime, model) via the registry and keys off IsPlatform(derived) —
// NOT a stored LLM_PROVIDER and NOT the org rung. This path already carries
@@ -889,7 +910,20 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
// disambiguation input the registry uses to split oauth-vs-api). The org-env
// MOLECULE_LLM_BILLING_MODE is NO LONGER read into the decision (retired).
availableAuthEnv := availableAuthEnvNames(envVars)
res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, model, availableAuthEnv)
// molecule-core#1994: derive billing mode from the EFFECTIVE model, not the
// raw payload.Model. On a re-provision (restart/resume/auto-restart) the
// payload is rebuilt from the DB with Name+Tier+Runtime only — payload.Model
// is "" (workspace_restart.go via withStoredCompute, which backfills Compute
// but NOT Model). With an empty model DeriveProvider errors → the resolver
// defaults closed to platform_managed and bakes the CP proxy, DIVERGING from
// the read endpoint (which reads the stored MODEL workspace_secret and derives
// byok). The stored model already lives in the merged envVars (loaded by
// loadWorkspaceSecrets); resolve it with the SAME fallback chain
// applyRuntimeModelEnv uses so the provision-path derive inputs match the
// read-path's — keeping the two resolvers in parity (the #1994 regression
// guard test asserts this).
effectiveModel := effectiveModelForBilling(model, envVars)
res, resolveErr := ResolveLLMBillingModeDerived(ctx, workspaceID, runtime, effectiveModel, availableAuthEnv)
if resolveErr != nil {
// resolveErr != nil ⇒ resolver hit a DB error AND already defaulted
// res.ResolvedMode to platform_managed. Log + proceed; the safe default
@@ -911,19 +945,19 @@ func applyPlatformManagedLLMEnv(ctx context.Context, envVars map[string]string,
envVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"] = res.ResolvedMode
if res.ResolvedMode != LLMBillingModePlatformManaged {
// byok or disabled — DO NOT force-route to CP, DO NOT override the
// workspace's own ANTHROPIC_BASE_URL / OAuth token.
// workspace's own ANTHROPIC_BASE_URL / OAuth token, and DO NOT strip
// the tenant's own LLM credentials.
//
// internal#711: but DO strip platform-origin LLM credentials. The
// platform's scope:global CLAUDE_CODE_OAUTH_TOKEN (+ the rest of the
// bypass-key set) was merged into envVars by loadWorkspaceSecrets
// from global_secrets; without this strip a BYOK workspace that
// brought no LLM credential of its own would inherit the platform's
// global token and bill the platform's Anthropic credits. The strip
// is PROVENANCE-AWARE: only keys still flagged as global_secrets
// origin are removed; a workspace's own LLM cred (a workspace_secrets
// row — provenance flag already dropped by loadWorkspaceSecrets)
// survives so the workspace talks to its own provider directly.
stripGlobalOriginLLMCreds(envVars, globalKeys)
// molecule-core#1994 (corrected model): `global_secrets` is the
// TENANT's store, not the platform's. The tenant's own credential —
// at global OR workspace scope — is exactly what byok runs on, direct.
// We leave envVars untouched here and report whether a usable LLM
// credential survived so the caller can fail closed when there is
// genuinely none (no platform-managed-shaped key at any scope). The
// platform's own credential is never in a tenant's global_secrets
// (guarded at the SetGlobal write boundary + the proxy token is
// server-env-only), so leaving the tenant's globals in place cannot
// re-open the platform-credit drain.
return platformLLMEnvResult{
ResolvedMode: res.ResolvedMode,
HasUsableLLMCred: hasAnyPlatformManagedLLMKey(envVars),
@@ -980,32 +1014,11 @@ func stripPlatformManagedLLMBypassEnv(envVars map[string]string) {
}
}
// stripGlobalOriginLLMCreds removes platform-managed LLM credential keys
// (CLAUDE_CODE_OAUTH_TOKEN + the rest of platformManagedDirectLLMBypassKeys)
// from envVars ONLY when they originated from the operator-controlled
// `global_secrets` table (i.e. their key is present in globalKeys).
//
// internal#711 provider-aware gate. A platform global LLM credential is the
// platform's own credential and must never be the credential a non-platform
// (byok / subscription) workspace runs on. loadWorkspaceSecrets drops the
// global-provenance flag for any key the workspace re-set via the canvas
// Secrets tab (a workspace_secrets row), so a workspace's OWN LLM credential
// is NOT in globalKeys and survives this strip — only the inherited platform
// global creds are removed.
func stripGlobalOriginLLMCreds(envVars map[string]string, globalKeys map[string]struct{}) {
for key := range platformManagedDirectLLMBypassKeys {
if _, fromGlobal := globalKeys[key]; fromGlobal {
delete(envVars, key)
}
}
}
// hasAnyPlatformManagedLLMKey reports whether envVars still carries at least
// one non-empty platform-managed LLM credential key after the provider-aware
// gate. Used by the non-platform fail-closed branch: a byok/subscription
// workspace with no surviving (workspace-scoped) LLM credential must be
// aborted with MISSING_BYOK_CREDENTIAL rather than started credential-less or
// on stripped platform creds.
// hasAnyPlatformManagedLLMKey reports whether envVars carries at least one
// non-empty platform-managed-shaped LLM credential key (the tenant's own, at
// global or workspace scope). Used by the byok fail-closed branch: a byok
// workspace with no LLM credential at ANY scope must be aborted with
// MISSING_BYOK_CREDENTIAL rather than started credential-less.
func hasAnyPlatformManagedLLMKey(envVars map[string]string) bool {
for key := range platformManagedDirectLLMBypassKeys {
if strings.TrimSpace(envVars[key]) != "" {
@@ -193,33 +193,31 @@ func (h *WorkspaceHandler) prepareProvisionContext(
// continue to rely on workspace_secrets / org-import persona-env
// merge for their git auth.
applyAgentGitHTTPCreds(envVars, payload.Role)
// internal#711: provider-aware LLM-credential resolution. On a non-platform
// (byok/subscription) workspace this strips the platform's scope:global LLM
// creds inherited from global_secrets and reports whether the workspace
// still has a usable (workspace-scoped) LLM credential of its own.
llmRes := applyPlatformManagedLLMEnv(ctx, envVars, globalSecretKeys, workspaceID, payload.Runtime, payload.Model)
// Fail closed for a BYOK workspace with no usable LLM credential: do NOT
// start it on the platform's (now-stripped) global creds. Mirror the
// "model+provider+credential REQUIRED at create" spirit (internal#711)
// with an actionable error surfaced at provision time.
// molecule-core#1994: per-workspace LLM billing-mode resolution + env wiring.
// On platform_managed it forces the CP proxy usage token; on byok/disabled
// it leaves the tenant's own creds (global OR workspace scope) untouched and
// reports whether a usable LLM credential is present.
llmRes := applyPlatformManagedLLMEnv(ctx, envVars, workspaceID, payload.Runtime, payload.Model)
// Fail closed for a BYOK workspace with no usable LLM credential at ANY
// scope: do NOT start it credential-less. Mirror the "model+provider+
// credential REQUIRED at create" spirit with an actionable error surfaced
// at provision time.
//
// Scoped to byok specifically (NOT disabled): "byok" means "the user
// intends to run an LLM on their own credential" — a missing one is a
// misconfiguration worth surfacing loudly. "disabled" means "this
// workspace runs no platform-billed LLM at all" (terminal / file work, or
// a runtime that talks to a non-bypass-key endpoint); stripping the
// inherited platform globals is sufficient there and aborting would
// regress a legitimate no-LLM workspace. The strip above already ran for
// both non-platform modes.
// a runtime that talks to a non-bypass-key endpoint), so aborting would
// regress a legitimate no-LLM workspace.
//
// The bypass-key check is intentionally broad — any surviving bypass key
// (the workspace's own, of workspace_secrets provenance) clears it.
// The bypass-key check is intentionally broad — any present bypass key
// (the tenant's own, at global or workspace scope) clears it.
if llmRes.ResolvedMode == LLMBillingModeBYOK && !llmRes.HasUsableLLMCred {
msg := formatMissingBYOKCredentialError(llmRes.ResolvedMode)
log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, internal#711)", workspaceID)
log.Printf("Provisioner: ABORT workspace=%s — byok billing mode has no usable LLM credential (MISSING_BYOK_CREDENTIAL, molecule-core#1994)", workspaceID)
return nil, &provisionAbort{
Msg: msg,
Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "711"},
Extra: map[string]interface{}{"error": msg, "code": "MISSING_BYOK_CREDENTIAL", "billing_mode": llmRes.ResolvedMode, "issue": "1994"},
}
}
applyRuntimeModelEnv(envVars, payload.Runtime, payload.Model)
@@ -494,32 +494,34 @@ func TestPrepareProvisionContext_WorkspaceSecretWinsOverPersonaToken(t *testing.
}
}
// TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed is the
// internal#711 end-to-end guard for the live Reno Stars leak. A byok
// workspace whose ONLY LLM credential is the platform's scope:global
// CLAUDE_CODE_OAUTH_TOKEN (inherited from global_secrets, no workspace
// override) must:
// TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds is the
// molecule-core#1994 (corrected-model) end-to-end inversion of the former
// internal#711 fail-closed test, for the live Reno Stars byok agents. A byok
// workspace whose LLM credential is the TENANT's own scope:global
// CLAUDE_CODE_OAUTH_TOKEN (a global_secrets row, no workspace override) must:
//
// 1. have that platform token STRIPPED from the prepared env (no leak), and
// 2. ABORT the provision with the MISSING_BYOK_CREDENTIAL code rather than
// start the workspace on the platform's credits.
// 1. KEEP that oauth in the prepared container env (it is the tenant's own
// credential — exactly what byok runs on, direct), and
// 2. NOT abort the provision proceeds.
//
// This is the discriminating end-to-end test: pre-fix prepared.EnvVars would
// carry CLAUDE_CODE_OAUTH_TOKEN=<platform token> and the provision would
// succeed, running Opus on Molecule's Anthropic credits.
func TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed(t *testing.T) {
// Pre-fix (internal#711) prepared.EnvVars stripped the global oauth and the
// provision aborted MISSING_BYOK_CREDENTIAL → the agent was dead. This is the
// discriminating end-to-end guard for the fix.
func TestPrepareProvisionContext_ByokWithTenantGlobalOAuthSucceeds(t *testing.T) {
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
mock := setupTestDB(t)
// global_secrets carries the platform's scope:global OAuth token.
// global_secrets carries the TENANT's own scope:global OAuth token + the
// stored MODEL (so the resolver derives byok from opus).
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("PLATFORM-GLOBAL-OAUTH"), 0))
// Workspace set NO secrets of its own.
AddRow("CLAUDE_CODE_OAUTH_TOKEN", []byte("TENANT-OWN-GLOBAL-OAUTH"), 0))
// Workspace set its own MODEL (no LLM cred of its own — relies on global).
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0))
// Resolver: workspace override = byok.
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
@@ -534,8 +536,57 @@ func TestPrepareProvisionContext_ByokWithOnlyGlobalOAuthFailsClosed(t *testing.T
prepared, abort := handler.prepareProvisionContext(
context.Background(), wsID, "/nonexistent", nil, payload, false)
if abort != nil {
t.Fatalf("expected provision to proceed (byok on tenant's own global oauth), got abort=%v", abort.Extra)
}
if prepared == nil {
t.Fatalf("prepared context is nil despite no abort")
}
// The tenant's own global oauth must be present in the container env.
if prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global oauth preserved for byok",
prepared.EnvVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
// byok must not have been routed through the platform proxy.
if _, ok := prepared.EnvVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("byok provision must NOT inject the platform usage token")
}
if got := prepared.EnvVars["MOLECULE_LLM_BILLING_MODE_RESOLVED"]; got != LLMBillingModeBYOK {
t.Fatalf("MOLECULE_LLM_BILLING_MODE_RESOLVED = %q, want byok", got)
}
}
// TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed is the
// companion: the fail-closed abort is UNCHANGED for a byok workspace with no
// LLM credential at ANY scope (no global row, no workspace row). It still
// aborts MISSING_BYOK_CREDENTIAL rather than starting credential-less.
func TestPrepareProvisionContext_ByokNoCredentialAtAnyScopeFailsClosed(t *testing.T) {
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc"
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
mock := setupTestDB(t)
// No global LLM cred — only the stored MODEL so the resolver derives byok.
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM global_secrets`).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}))
mock.ExpectQuery(`SELECT key, encrypted_value, encryption_version FROM workspace_secrets`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"key", "encrypted_value", "encryption_version"}).
AddRow("MODEL", []byte("opus"), 0))
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
WithArgs(wsID).
WillReturnRows(sqlmock.NewRows([]string{"llm_billing_mode"}).AddRow(LLMBillingModeBYOK))
handler := NewWorkspaceHandler(&captureBroadcaster{}, nil, "http://localhost:8080", t.TempDir())
payload := models.CreateWorkspacePayload{
Name: "Reno Stars SEO",
Runtime: "claude-code",
Tier: 1,
}
prepared, abort := handler.prepareProvisionContext(
context.Background(), wsID, "/nonexistent", nil, payload, false)
if abort == nil {
t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v) — the leak would still ship", prepared)
t.Fatalf("expected MISSING_BYOK_CREDENTIAL abort, got success (prepared=%v)", prepared)
}
if code, _ := abort.Extra["code"].(string); code != "MISSING_BYOK_CREDENTIAL" {
t.Fatalf("abort.Extra[code] = %v, want MISSING_BYOK_CREDENTIAL", abort.Extra["code"])
@@ -917,7 +968,7 @@ func TestApplyPlatformManagedLLMEnv_NonClaudeRuntimeDefaultsOpenAIProxyWhenNoWor
t.Setenv("MOLECULE_LLM_DEFAULT_MODEL", "moonshot/kimi-k2.6")
envVars := map[string]string{}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "codex", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "codex", "")
applyRuntimeModelEnv(envVars, "codex", "")
if got := envVars["OPENAI_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/openai/v1" {
@@ -947,7 +998,7 @@ func TestApplyPlatformManagedLLMEnv_StripsWorkspaceOpenAIKeyForClaudeCode(t *tes
"OPENAI_BASE_URL": "https://api.openai.com/v1",
"MODEL": "openai/gpt-5.5",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
if _, ok := envVars["OPENAI_API_KEY"]; ok {
t.Fatalf("OPENAI_API_KEY should be stripped for claude-code platform-managed mode")
@@ -973,7 +1024,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeUsesAnthropicProxyOverOAuth(t *tes
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN should be stripped in platform-managed mode")
@@ -996,7 +1047,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeInjectsAnthropicProxyWhenNoWorkspa
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "minimax/MiniMax-M2.7")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "minimax/MiniMax-M2.7")
if got := envVars["ANTHROPIC_BASE_URL"]; got != "https://api.example.test/api/v1/internal/llm/anthropic/v1" {
t.Fatalf("ANTHROPIC_BASE_URL = %q", got)
@@ -1019,7 +1070,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeStripsVendorBYOK(t *testing.T) {
"MINIMAX_API_KEY": "user-minimax-key",
"MODEL": "MiniMax-M2.7",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, "", "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, "", "claude-code", "")
if _, ok := envVars["MINIMAX_API_KEY"]; ok {
t.Fatalf("MINIMAX_API_KEY should be stripped in platform-managed mode")
@@ -1053,7 +1104,7 @@ func TestApplyPlatformManagedLLMEnv_NoopsOutsidePlatformManaged(t *testing.T) {
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "kimi-for-coding")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding")
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("resolved mode = %q, want byok (derived from non-platform model)", res.ResolvedMode)
@@ -1100,7 +1151,7 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
// 1. OAuth token intact — not stripped.
if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "user-oauth-token" {
@@ -1131,19 +1182,19 @@ func TestApplyPlatformManagedLLMEnv_ClaudeCodeByokKeepsOwnProviderEnv(t *testing
}
}
// TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken is the
// internal#711 regression guard for the live 2026-05-27 leak (Reno Stars SEO
// + Marketing claude-code agents). A non-platform (byok) workspace that
// brought NO LLM credential of its own, but which inherited the platform's
// scope:global CLAUDE_CODE_OAUTH_TOKEN from global_secrets (provenance =
// globalKeys), must have that platform token STRIPPED — not run on it.
// TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect is
// the molecule-core#1994 (corrected-model) inversion of the former
// internal#711 strip test, exercised through applyPlatformManagedLLMEnv. The
// live failure this guards: the Reno Stars Marketing/SEO byok agents whose
// Claude oauth lives at GLOBAL scope (the tenant's own credential, shared
// across the tenant's workspaces) were stripped + failed-closed under the
// inverted "global == platform's own" premise → MISSING_BYOK_CREDENTIAL →
// dead. Under the corrected model `global_secrets` is the TENANT's store, so
// that oauth is exactly what byok runs on: it must SURVIVE and route direct.
//
// Pre-fix the byok early-return left envVars untouched, so the platform's
// global OAuth token survived into the container and the agent ran Opus on
// the platform's Anthropic credits. The fix gates the global-cred merge on
// provider==platform: a non-platform workspace keeps only its own
// (workspace_secrets) creds, of which there are none here.
func TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken(t *testing.T) {
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the byok branch
// → the oauth disappears → this test RED on both survival + HasUsableLLMCred.
func TestApplyPlatformManagedLLMEnv_ByokGlobalScopeOAuthSurvivesAndRunsDirect(t *testing.T) {
const wsID = "352e3c2b-0546-4e9c-b487-1e2ff1cf29fc" // Reno Stars SEO agent
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
@@ -1155,31 +1206,32 @@ func TestApplyPlatformManagedLLMEnv_ByokStripsGlobalOriginOAuthToken(t *testing.
t.Setenv("MOLECULE_LLM_ANTHROPIC_BASE_URL", "https://api.example.test/api/v1/internal/llm/anthropic")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// The ONLY LLM credential in env is the platform's scope:global OAuth
// token, merged from global_secrets (so its key is in globalKeys). The
// workspace set none of its own.
// The tenant's own oauth at GLOBAL scope (a global_secrets row). The
// workspace set no separate row of its own; it relies on the tenant global.
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN",
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
"MODEL": "opus",
}
globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
// 1. The platform global OAuth token must be STRIPPED — the leak is closed.
if got, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — platform scope:global token must be stripped for a byok workspace", got)
// 1. The tenant's own global-scope oauth SURVIVES — byok runs on it.
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the tenant's own global-scope token preserved for byok", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
// 2. No CP proxy creds forced (byok = workspace talks to its own provider).
if got, ok := envVars["ANTHROPIC_API_KEY"]; ok {
t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got)
}
// 3. Resolver reports byok with NO usable LLM credential → caller fails closed.
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("MOLECULE_LLM_USAGE_TOKEN must NOT be injected for byok")
}
// 3. byok WITH a usable credential → caller does NOT fail closed.
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeBYOK)
}
if res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = true, want false (only the stripped platform global token was present)")
if !res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = false, want true (tenant's own global-scope oauth is the usable credential)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
@@ -1214,7 +1266,7 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_PlatformModelKeepsPlatformCreds(t *t
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "anthropic/claude-opus-4-7")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "anthropic/claude-opus-4-7")
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("platform-derived model must resolve platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
@@ -1234,14 +1286,15 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_PlatformModelKeepsPlatformCreds(t *t
}
}
// NON-PLATFORM-DERIVED → byok + STRIP + FAIL-CLOSED signal (THE FIX, the Reno
// billing-leak class). A claude-code workspace with a non-platform model
// (kimi-for-coding → kimi-coding) and NO override + NO own cred, inheriting only
// the platform's scope:global OAuth token, now DERIVES byok → #1963 strips the
// global token → HasUsableLLMCred=false → caller fails closed. Pre-P2 this same
// workspace resolved platform_managed (via the never-written org rung) and ran
// on the platform's credits. This is the discriminating delta test.
func TestApplyPlatformManagedLLMEnv_DERIVED_NonPlatformModelStripsAndFailsClosed(t *testing.T) {
// NON-PLATFORM-DERIVED + NO CREDENTIAL AT ALL → byok + FAIL-CLOSED. This is
// the legitimate remaining fail-closed path under the corrected model
// (molecule-core#1994): a claude-code workspace with a non-platform model
// (kimi-for-coding → byok) and NO override and NO LLM credential at ANY scope
// (no global row, no workspace row) has nothing to run on → HasUsableLLMCred=
// false → caller (prepareProvisionContext) aborts MISSING_BYOK_CREDENTIAL. The
// fail-closed branch is unchanged by the strip removal; only its trigger
// narrowed from "no workspace-scoped cred" to "no cred at any scope".
func TestApplyPlatformManagedLLMEnv_DERIVED_ByokNoCredentialFailsClosed(t *testing.T) {
const wsID = "99999999-8888-7777-6666-555555555555"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
@@ -1252,32 +1305,25 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_NonPlatformModelStripsAndFailsClosed
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// Only LLM cred is the platform's scope:global OAuth token (globalKeys).
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN",
}
globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}
// No LLM credential at all — neither global nor workspace scope.
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "kimi-for-coding")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "kimi-for-coding")
// 1. DERIVED byok (NOT the old platform_managed default).
if res.ResolvedMode != LLMBillingModeBYOK {
t.Fatalf("non-platform-derived model must resolve byok, got %q (source=%s) — THE FIX regressed", res.ResolvedMode, res.Source)
t.Fatalf("non-platform-derived model must resolve byok, got %q (source=%s)", res.ResolvedMode, res.Source)
}
if res.Source != BillingModeSourceDerivedProvider {
t.Errorf("source: got %q want derived_provider", res.Source)
}
// 2. #1963 strip: the platform global OAuth token is removed (leak closed).
if got, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q present — must be stripped for a derived-byok workspace (Reno leak)", got)
}
// 3. No CP proxy creds forced.
// 2. No CP proxy creds forced.
if got, ok := envVars["ANTHROPIC_API_KEY"]; ok {
t.Fatalf("ANTHROPIC_API_KEY must NOT be injected for byok, got %q", got)
}
// 4. No usable cred → caller (prepareProvisionContext) fails closed.
// 3. No usable cred at any scope → caller fails closed.
if res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = true, want false (only the stripped platform global token was present)")
t.Fatalf("HasUsableLLMCred = true, want false (no LLM credential present at any scope)")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
@@ -1300,7 +1346,7 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_UnsetModelPlatformDefault(t *testing
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
if res.ResolvedMode != LLMBillingModePlatformManaged {
t.Fatalf("unset model must default platform_managed, got %q (source=%s)", res.ResolvedMode, res.Source)
@@ -1316,14 +1362,13 @@ func TestApplyPlatformManagedLLMEnv_DERIVED_UnsetModelPlatformDefault(t *testing
}
}
// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal is
// the discriminating companion to the strip test: a byok workspace that DID
// set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets tab (a
// workspace_secrets row) keeps it. loadWorkspaceSecrets drops the global
// provenance flag on a workspace override, so the key is NOT in globalKeys
// and the provenance-aware strip leaves it alone. Proves the fix strips only
// platform-origin creds, never the customer's own.
func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t *testing.T) {
// TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth is the
// workspace-scope companion to the global-scope survival test: a byok
// workspace that set its own CLAUDE_CODE_OAUTH_TOKEN via the canvas Secrets
// tab (a workspace_secrets row) keeps it and runs direct. Under the corrected
// model (molecule-core#1994) the tenant's credential survives at EITHER scope;
// this pins the workspace-scope half.
func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuth(t *testing.T) {
const wsID = "6b66de8d-9337-4fb4-be8d-6d49dca0d809" // Reno Stars Marketing agent
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
@@ -1334,15 +1379,13 @@ func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t *
t.Setenv("MOLECULE_LLM_BASE_URL", "https://api.example.test/api/v1/internal/llm/openai/v1")
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
// Workspace set its OWN OAuth token — loadWorkspaceSecrets would have
// dropped its global provenance flag, so globalKeys does NOT contain it.
// Workspace set its OWN OAuth token (a workspace_secrets row).
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "CUSTOMER-OWN-OAUTH-TOKEN",
"MODEL": "opus",
}
globalKeys := map[string]struct{}{} // not from global_secrets
res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
if got := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; got != "CUSTOMER-OWN-OAUTH-TOKEN" {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN = %q, want the workspace's own token left intact", got)
@@ -1358,13 +1401,18 @@ func TestApplyPlatformManagedLLMEnv_ByokKeepsWorkspaceOwnOAuthEvenWithGlobal(t *
}
}
// TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred proves
// that "disabled" mode also strips the platform's global LLM creds (the leak
// is closed for disabled too), and reports HasUsableLLMCred=false. The
// caller's fail-closed abort is scoped to byok only, so a disabled workspace
// with no LLM cred still boots (for terminal / non-LLM work); here we pin the
// function-level strip + report.
func TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred(t *testing.T) {
// TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy proves the
// corrected-model behavior for "disabled": the tenant's own global-scope LLM
// cred is NOT stripped and the CP proxy is NOT forced. "disabled" means the
// workspace runs no platform-billed LLM, but the tenant's own credential is
// still the tenant's to keep; the caller's fail-closed abort is byok-only so a
// disabled workspace boots regardless. The previous internal#711 behavior
// stripped the global cred here on the same inverted premise; that strip is
// removed.
//
// Mutation (load-bearing): re-add stripGlobalOriginLLMCreds on the non-platform
// branch → the oauth disappears → this test RED on the survival assertion.
func TestApplyPlatformManagedLLMEnv_DisabledKeepsTenantGlobalNoProxy(t *testing.T) {
const wsID = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
mock := setupTestDB(t)
mock.ExpectQuery(`SELECT llm_billing_mode FROM workspaces WHERE id = \$1`).
@@ -1374,31 +1422,33 @@ func TestApplyPlatformManagedLLMEnv_DisabledStripsGlobalButReportsNoCred(t *test
t.Setenv("MOLECULE_LLM_BILLING_MODE", LLMBillingModePlatformManaged)
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN",
"CLAUDE_CODE_OAUTH_TOKEN": "TENANT-OWN-GLOBAL-OAUTH",
}
globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
t.Fatalf("CLAUDE_CODE_OAUTH_TOKEN must be stripped for disabled mode too")
// The tenant's own global cred survives (not stripped).
if envVars["CLAUDE_CODE_OAUTH_TOKEN"] != "TENANT-OWN-GLOBAL-OAUTH" {
t.Fatalf("tenant's own global cred must survive for disabled mode; got %q", envVars["CLAUDE_CODE_OAUTH_TOKEN"])
}
// No proxy forced for disabled.
if _, ok := envVars["MOLECULE_LLM_USAGE_TOKEN"]; ok {
t.Fatalf("disabled must not inject the platform usage token")
}
if res.ResolvedMode != LLMBillingModeDisabled {
t.Fatalf("ResolvedMode = %q, want %q", res.ResolvedMode, LLMBillingModeDisabled)
}
if res.HasUsableLLMCred {
t.Fatalf("HasUsableLLMCred = true, want false")
}
if err := mock.ExpectationsWereMet(); err != nil {
t.Errorf("unmet sqlmock expectations: %v", err)
}
}
// TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds is
// the no-regression guard for the OTHER side of the gate (internal#711): a
// platform-managed workspace MUST still receive the platform's creds. Here
// the proxy IS configured, so the contract is the existing one — the global
// OAuth token is replaced by the proxy usage token (HasUsableLLMCred=true).
// the no-regression guard for the metered platform_managed path
// (molecule-core#1994): a platform-managed workspace MUST still strip any
// direct oauth and route through the CP proxy. The direct OAuth token is
// replaced by the proxy usage token (HasUsableLLMCred=true). This path is
// UNCHANGED by the byok strip removal — only the byok/disabled branch changed.
func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *testing.T) {
const wsID = "99999999-9999-9999-9999-999999999999"
mock := setupTestDB(t)
@@ -1412,12 +1462,11 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillReceivesGlobalCreds(t *t
t.Setenv("MOLECULE_LLM_USAGE_TOKEN", "tenant-admin-token")
envVars := map[string]string{
"CLAUDE_CODE_OAUTH_TOKEN": "PLATFORM-GLOBAL-OAUTH-TOKEN",
"CLAUDE_CODE_OAUTH_TOKEN": "DIRECT-OAUTH-TOKEN",
"MODEL": "opus",
}
globalKeys := map[string]struct{}{"CLAUDE_CODE_OAUTH_TOKEN": {}}
res := applyPlatformManagedLLMEnv(context.Background(), envVars, globalKeys, wsID, "claude-code", "")
res := applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
// Platform-managed routes through the CP proxy: OAuth stripped, proxy creds forced.
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {
@@ -1458,7 +1507,7 @@ func TestApplyPlatformManagedLLMEnv_PlatformManagedStillEmitsResolvedMode(t *tes
"CLAUDE_CODE_OAUTH_TOKEN": "user-oauth-token",
"MODEL": "sonnet",
}
applyPlatformManagedLLMEnv(context.Background(), envVars, nil, wsID, "claude-code", "")
applyPlatformManagedLLMEnv(context.Background(), envVars, wsID, "claude-code", "")
// OAuth stripped, proxy forced — unchanged platform_managed contract.
if _, ok := envVars["CLAUDE_CODE_OAUTH_TOKEN"]; ok {