From 0a0f11b41fefdbe0d98ac2073abb7d6b01f3b27d Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Thu, 23 Apr 2026 10:41:47 -0700 Subject: [PATCH] feat(runtime): auto-detect LLM token type, normalise env on boot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform stores per-workspace LLM credentials under a single key (ANTHROPIC_AUTH_TOKEN in workspace_secrets). But downstream tools expect different env var names depending on the token type: sk-ant-oat01-* → CLAUDE_CODE_OAUTH_TOKEN (Claude Code OAuth session) sk-ant-api03-* → ANTHROPIC_API_KEY (direct Anthropic API) sk-cp-* → ANTHROPIC_AUTH_TOKEN (proxy: MiniMax, gateways) Without normalisation, an OAuth token under ANTHROPIC_AUTH_TOKEN gets sent as a bearer to api.anthropic.com, which responds: 401 authentication_error: OAuth authentication is currently not supported. This was a platform-wide footgun: anyone rotating LLM keys had to know the exact env var for each token type, AND make sure stale overrides were cleared, AND set ANTHROPIC_BASE_URL correctly for proxies (or NOT set for native Claude). Nothing downstream could help — the SDK just saw the wrong var. Fix: - New molecule_runtime/llm_auth.py — normalise_llm_env() mutates os.environ (or any dict) to the correct shape based on token prefix. Returns a NormalisationResult for logging. - main.py calls it as step 0, before any adapter/executor import. Every adapter (claude-code, langgraph, crewai, autogen, hermes, …) benefits automatically — no per-adapter branching needed. - 11 unit tests covering all prefix paths, edge cases, and the "operator deliberately set CLAUDE_CODE_OAUTH_TOKEN" precedence rule. Operationally: this means operators can keep using one ANTHROPIC_AUTH_TOKEN slot in platform settings and just paste whatever token the agent needs. No env-var-name awareness required. Tested locally: 11/11 new tests pass. 83 other tests unchanged (pre-existing failures on staging are all unrelated: test_workspace_id_validation, test_a2a_mcp_server RBAC, the test_imports.main module-walker — same signature as on staging HEAD before this PR). Co-Authored-By: Claude Opus 4.7 (1M context) --- molecule_runtime/llm_auth.py | 163 +++++++++++++++++++++++++++++++++++ molecule_runtime/main.py | 10 ++- tests/test_llm_auth.py | 125 +++++++++++++++++++++++++++ 3 files changed, 297 insertions(+), 1 deletion(-) create mode 100644 molecule_runtime/llm_auth.py create mode 100644 tests/test_llm_auth.py diff --git a/molecule_runtime/llm_auth.py b/molecule_runtime/llm_auth.py new file mode 100644 index 0000000..3ef641f --- /dev/null +++ b/molecule_runtime/llm_auth.py @@ -0,0 +1,163 @@ +"""LLM auth-env normalisation. + +Platform stores per-workspace LLM credentials under a single key, +``ANTHROPIC_AUTH_TOKEN``. But the CLI/SDK tools we invoke downstream +expect *different* env var names depending on the token type: + + Token prefix Correct env var Base URL needed + ------------------ ------------------------ ---------------- + sk-ant-oat01-* CLAUDE_CODE_OAUTH_TOKEN none (Claude handles) + sk-ant-api03-* ANTHROPIC_API_KEY none (Claude default) + sk-cp-* ANTHROPIC_AUTH_TOKEN proxy URL (MiniMax etc.) + other/unknown (leave as-is) (leave as-is) + +Without this normalisation, passing an OAuth token as +``ANTHROPIC_AUTH_TOKEN`` causes the Claude SDK to send it as a bearer +token to ``api.anthropic.com``, which responds: + + 401 {"error":{"type":"authentication_error", + "message":"OAuth authentication is currently not supported."}} + +Call :func:`normalise_llm_env` once, early in the runtime bootstrap +(before any adapter/executor is created). The function mutates +``os.environ`` in place and returns a report of what changed so the +boot log shows the mapping. + +Safe to call multiple times — idempotent. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from typing import Optional + + +@dataclass +class NormalisationResult: + """What normalise_llm_env did. Safe to print in boot logs.""" + + detected_kind: str = "none" # "oauth" | "api_key" | "proxy" | "unknown" | "none" + renamed_to: Optional[str] = None + cleared_vars: list[str] = field(default_factory=list) + warning: Optional[str] = None + + def summary(self) -> str: + if self.detected_kind == "none": + return "llm-auth: no ANTHROPIC_AUTH_TOKEN set" + line = f"llm-auth: detected {self.detected_kind}" + if self.renamed_to: + line += f" → exported as {self.renamed_to}" + if self.cleared_vars: + line += f" (cleared: {', '.join(self.cleared_vars)})" + if self.warning: + line += f" [WARN: {self.warning}]" + return line + + +def _prefix_of(token: str) -> str: + """Classify a token string by its well-known prefix.""" + if token.startswith("sk-ant-oat01-"): + return "oauth" + if token.startswith("sk-ant-api03-"): + return "api_key" + if token.startswith("sk-cp-"): + return "proxy" + return "unknown" + + +def normalise_llm_env(env: Optional[dict[str, str]] = None) -> NormalisationResult: + """Inspect and rewrite LLM auth env vars in place. + + Parameters + ---------- + env + The env mapping to mutate. Defaults to ``os.environ``. + Passing a dict is useful for tests. + + Returns + ------- + NormalisationResult + Describes what was detected and what was changed, for logging. + """ + if env is None: + env = os.environ + + result = NormalisationResult() + + # Priority: explicit CLAUDE_CODE_OAUTH_TOKEN wins if already present + # (operator set it deliberately — don't override). + existing_oauth = env.get("CLAUDE_CODE_OAUTH_TOKEN", "") + if existing_oauth: + result.detected_kind = "oauth" + result.renamed_to = None + # If ANTHROPIC_AUTH_TOKEN is ALSO set with a conflicting value, + # clear it so the SDK doesn't pick the wrong one. + auth = env.get("ANTHROPIC_AUTH_TOKEN", "") + if auth and auth != existing_oauth: + env.pop("ANTHROPIC_AUTH_TOKEN", None) + result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN") + # Base URL is irrelevant for OAuth mode; remove the proxy URL + # so the SDK uses Claude defaults. + base = env.get("ANTHROPIC_BASE_URL", "") + if base and "anthropic.com" not in base: + env.pop("ANTHROPIC_BASE_URL", None) + result.cleared_vars.append("ANTHROPIC_BASE_URL") + return result + + # No explicit CLAUDE_CODE_OAUTH_TOKEN — detect from ANTHROPIC_AUTH_TOKEN + tok = env.get("ANTHROPIC_AUTH_TOKEN", "") + if not tok: + return result + + kind = _prefix_of(tok) + result.detected_kind = kind + + if kind == "oauth": + env["CLAUDE_CODE_OAUTH_TOKEN"] = tok + env.pop("ANTHROPIC_AUTH_TOKEN", None) + result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN") + result.renamed_to = "CLAUDE_CODE_OAUTH_TOKEN" + # Proxy base URL must go — OAuth flow uses Anthropic's own endpoint + base = env.get("ANTHROPIC_BASE_URL", "") + if base and "anthropic.com" not in base: + env.pop("ANTHROPIC_BASE_URL", None) + result.cleared_vars.append("ANTHROPIC_BASE_URL") + + elif kind == "api_key": + # Anthropic API keys can ride ANTHROPIC_API_KEY (strongly preferred by + # claude-code) OR ANTHROPIC_AUTH_TOKEN. Moving it to ANTHROPIC_API_KEY + # is the safer default because claude-code in non-bare mode reads + # ANTHROPIC_API_KEY first. + env["ANTHROPIC_API_KEY"] = tok + env.pop("ANTHROPIC_AUTH_TOKEN", None) + result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN") + result.renamed_to = "ANTHROPIC_API_KEY" + # Clear proxy base URL for direct Anthropic calls + base = env.get("ANTHROPIC_BASE_URL", "") + if base and "anthropic.com" not in base: + env.pop("ANTHROPIC_BASE_URL", None) + result.cleared_vars.append("ANTHROPIC_BASE_URL") + + elif kind == "proxy": + # sk-cp-* = Claude proxy token (MiniMax, custom gateways). KEEP + # ANTHROPIC_AUTH_TOKEN + ANTHROPIC_BASE_URL as-is; that's the correct + # shape for proxies. + result.renamed_to = None + base = env.get("ANTHROPIC_BASE_URL", "") + if not base: + result.warning = ( + "proxy token detected but ANTHROPIC_BASE_URL is empty — " + "proxy calls will fail without a base URL" + ) + + else: + # unknown — be conservative, leave env untouched but warn + result.warning = ( + "ANTHROPIC_AUTH_TOKEN has an unrecognised prefix " + f"{tok[:12] + '…' if len(tok) > 12 else tok!r}; " + "not normalising. If this is a proxy token, prefix convention " + "is sk-cp-*; OAuth is sk-ant-oat01-*; API key is sk-ant-api03-*." + ) + + return result diff --git a/molecule_runtime/main.py b/molecule_runtime/main.py index 673afe0..4a7739d 100644 --- a/molecule_runtime/main.py +++ b/molecule_runtime/main.py @@ -65,7 +65,15 @@ async def main(): # pragma: no cover platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080") awareness_config = get_awareness_config() - # 0. Initialise OpenTelemetry (no-op if packages not installed) + # 0. Normalise LLM auth env vars based on token type. + # Platform stores tokens as ANTHROPIC_AUTH_TOKEN, but the Claude SDK/CLI + # expects different env vars per token kind (OAuth vs API key vs proxy). + # Doing this early means every downstream adapter/executor sees a + # consistent, correct env — no per-adapter detection needed. + from molecule_runtime.llm_auth import normalise_llm_env + print(normalise_llm_env().summary()) + + # 0.5 Initialise OpenTelemetry (no-op if packages not installed) setup_telemetry(service_name=workspace_id) # 1. Load config diff --git a/tests/test_llm_auth.py b/tests/test_llm_auth.py new file mode 100644 index 0000000..bcc7d42 --- /dev/null +++ b/tests/test_llm_auth.py @@ -0,0 +1,125 @@ +"""Unit tests for molecule_runtime.llm_auth.normalise_llm_env.""" + +from molecule_runtime.llm_auth import normalise_llm_env + + +def test_no_token_is_noop(): + env: dict[str, str] = {} + r = normalise_llm_env(env) + assert r.detected_kind == "none" + assert env == {} + assert r.renamed_to is None + + +def test_oauth_token_moved_to_oauth_env_var(): + env = { + "ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc123", + "ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic", + } + r = normalise_llm_env(env) + assert r.detected_kind == "oauth" + assert r.renamed_to == "CLAUDE_CODE_OAUTH_TOKEN" + assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-abc123" + assert "ANTHROPIC_AUTH_TOKEN" not in env + assert "ANTHROPIC_BASE_URL" not in env + assert "ANTHROPIC_AUTH_TOKEN" in r.cleared_vars + assert "ANTHROPIC_BASE_URL" in r.cleared_vars + + +def test_oauth_token_keeps_anthropic_base_url(): + # If base URL is actually Anthropic, keep it (no-op on that var). + env = { + "ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc", + "ANTHROPIC_BASE_URL": "https://api.anthropic.com", + } + r = normalise_llm_env(env) + assert r.detected_kind == "oauth" + assert env.get("ANTHROPIC_BASE_URL") == "https://api.anthropic.com" + assert "ANTHROPIC_BASE_URL" not in r.cleared_vars + + +def test_api_key_moved_to_anthropic_api_key(): + env = { + "ANTHROPIC_AUTH_TOKEN": "sk-ant-api03-xyz789", + "ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic", + } + r = normalise_llm_env(env) + assert r.detected_kind == "api_key" + assert r.renamed_to == "ANTHROPIC_API_KEY" + assert env["ANTHROPIC_API_KEY"] == "sk-ant-api03-xyz789" + assert "ANTHROPIC_AUTH_TOKEN" not in env + assert "ANTHROPIC_BASE_URL" not in env + + +def test_proxy_token_left_alone(): + env = { + "ANTHROPIC_AUTH_TOKEN": "sk-cp-minimax-token-foo", + "ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic", + } + r = normalise_llm_env(env) + assert r.detected_kind == "proxy" + assert r.renamed_to is None + # Proxies need both vars unchanged + assert env["ANTHROPIC_AUTH_TOKEN"] == "sk-cp-minimax-token-foo" + assert env["ANTHROPIC_BASE_URL"] == "https://api.minimax.io/anthropic" + assert r.warning is None + + +def test_proxy_token_without_base_url_warns(): + env = {"ANTHROPIC_AUTH_TOKEN": "sk-cp-something"} + r = normalise_llm_env(env) + assert r.detected_kind == "proxy" + assert r.warning is not None + assert "ANTHROPIC_BASE_URL" in r.warning + + +def test_unknown_prefix_leaves_env_and_warns(): + env = {"ANTHROPIC_AUTH_TOKEN": "garbage-prefix-xyz"} + r = normalise_llm_env(env) + assert r.detected_kind == "unknown" + assert r.renamed_to is None + assert env["ANTHROPIC_AUTH_TOKEN"] == "garbage-prefix-xyz" + assert r.warning is not None + assert "unrecognised prefix" in r.warning + + +def test_existing_oauth_env_takes_precedence(): + # Operator set CLAUDE_CODE_OAUTH_TOKEN deliberately; don't overwrite. + env = { + "CLAUDE_CODE_OAUTH_TOKEN": "sk-ant-oat01-deliberate", + "ANTHROPIC_AUTH_TOKEN": "sk-cp-stale-proxy-value", + "ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic", + } + r = normalise_llm_env(env) + assert r.detected_kind == "oauth" + assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-deliberate" + # Conflicting ANTHROPIC_AUTH_TOKEN cleared so SDK picks the right one + assert "ANTHROPIC_AUTH_TOKEN" not in env + assert "ANTHROPIC_BASE_URL" not in env + + +def test_idempotent_second_call(): + env = {"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-once"} + normalise_llm_env(env) + r = normalise_llm_env(env) + assert r.detected_kind == "oauth" + assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-once" + assert "ANTHROPIC_AUTH_TOKEN" not in env + + +def test_summary_renders_without_error(): + env = {"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc"} + r = normalise_llm_env(env) + line = r.summary() + assert "oauth" in line + assert "CLAUDE_CODE_OAUTH_TOKEN" in line + + +def test_uses_os_environ_by_default(monkeypatch): + import os + monkeypatch.setenv("ANTHROPIC_AUTH_TOKEN", "sk-ant-oat01-real") + monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False) + r = normalise_llm_env() + assert r.detected_kind == "oauth" + assert os.environ.get("CLAUDE_CODE_OAUTH_TOKEN") == "sk-ant-oat01-real" + assert "ANTHROPIC_AUTH_TOKEN" not in os.environ