feat(runtime): auto-detect LLM token type, normalise env on boot

Platform stores per-workspace LLM credentials under a single key
(ANTHROPIC_AUTH_TOKEN in workspace_secrets). But downstream tools
expect different env var names depending on the token type:

  sk-ant-oat01-*  → CLAUDE_CODE_OAUTH_TOKEN  (Claude Code OAuth session)
  sk-ant-api03-*  → ANTHROPIC_API_KEY        (direct Anthropic API)
  sk-cp-*         → ANTHROPIC_AUTH_TOKEN     (proxy: MiniMax, gateways)

Without normalisation, an OAuth token under ANTHROPIC_AUTH_TOKEN gets
sent as a bearer to api.anthropic.com, which responds:

    401 authentication_error: OAuth authentication is currently not
    supported.

This was a platform-wide footgun: anyone rotating LLM keys had to
know the exact env var for each token type, AND make sure stale
overrides were cleared, AND set ANTHROPIC_BASE_URL correctly for
proxies (or NOT set for native Claude). Nothing downstream could
help — the SDK just saw the wrong var.

Fix:

- New molecule_runtime/llm_auth.py — normalise_llm_env() mutates
  os.environ (or any dict) to the correct shape based on token
  prefix. Returns a NormalisationResult for logging.
- main.py calls it as step 0, before any adapter/executor import.
  Every adapter (claude-code, langgraph, crewai, autogen, hermes,
  …) benefits automatically — no per-adapter branching needed.
- 11 unit tests covering all prefix paths, edge cases, and the
  "operator deliberately set CLAUDE_CODE_OAUTH_TOKEN" precedence
  rule.

Operationally: this means operators can keep using one
ANTHROPIC_AUTH_TOKEN slot in platform settings and just paste
whatever token the agent needs. No env-var-name awareness required.

Tested locally: 11/11 new tests pass. 83 other tests unchanged
(pre-existing failures on staging are all unrelated:
test_workspace_id_validation, test_a2a_mcp_server RBAC, the
test_imports.main module-walker — same signature as on staging
HEAD before this PR).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
rabbitblood 2026-04-23 10:41:47 -07:00
parent dcb6edd1a1
commit 0a0f11b41f
3 changed files with 297 additions and 1 deletions

View File

@ -0,0 +1,163 @@
"""LLM auth-env normalisation.
Platform stores per-workspace LLM credentials under a single key,
``ANTHROPIC_AUTH_TOKEN``. But the CLI/SDK tools we invoke downstream
expect *different* env var names depending on the token type:
Token prefix Correct env var Base URL needed
------------------ ------------------------ ----------------
sk-ant-oat01-* CLAUDE_CODE_OAUTH_TOKEN none (Claude handles)
sk-ant-api03-* ANTHROPIC_API_KEY none (Claude default)
sk-cp-* ANTHROPIC_AUTH_TOKEN proxy URL (MiniMax etc.)
other/unknown (leave as-is) (leave as-is)
Without this normalisation, passing an OAuth token as
``ANTHROPIC_AUTH_TOKEN`` causes the Claude SDK to send it as a bearer
token to ``api.anthropic.com``, which responds:
401 {"error":{"type":"authentication_error",
"message":"OAuth authentication is currently not supported."}}
Call :func:`normalise_llm_env` once, early in the runtime bootstrap
(before any adapter/executor is created). The function mutates
``os.environ`` in place and returns a report of what changed so the
boot log shows the mapping.
Safe to call multiple times idempotent.
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from typing import Optional
@dataclass
class NormalisationResult:
"""What normalise_llm_env did. Safe to print in boot logs."""
detected_kind: str = "none" # "oauth" | "api_key" | "proxy" | "unknown" | "none"
renamed_to: Optional[str] = None
cleared_vars: list[str] = field(default_factory=list)
warning: Optional[str] = None
def summary(self) -> str:
if self.detected_kind == "none":
return "llm-auth: no ANTHROPIC_AUTH_TOKEN set"
line = f"llm-auth: detected {self.detected_kind}"
if self.renamed_to:
line += f" → exported as {self.renamed_to}"
if self.cleared_vars:
line += f" (cleared: {', '.join(self.cleared_vars)})"
if self.warning:
line += f" [WARN: {self.warning}]"
return line
def _prefix_of(token: str) -> str:
"""Classify a token string by its well-known prefix."""
if token.startswith("sk-ant-oat01-"):
return "oauth"
if token.startswith("sk-ant-api03-"):
return "api_key"
if token.startswith("sk-cp-"):
return "proxy"
return "unknown"
def normalise_llm_env(env: Optional[dict[str, str]] = None) -> NormalisationResult:
"""Inspect and rewrite LLM auth env vars in place.
Parameters
----------
env
The env mapping to mutate. Defaults to ``os.environ``.
Passing a dict is useful for tests.
Returns
-------
NormalisationResult
Describes what was detected and what was changed, for logging.
"""
if env is None:
env = os.environ
result = NormalisationResult()
# Priority: explicit CLAUDE_CODE_OAUTH_TOKEN wins if already present
# (operator set it deliberately — don't override).
existing_oauth = env.get("CLAUDE_CODE_OAUTH_TOKEN", "")
if existing_oauth:
result.detected_kind = "oauth"
result.renamed_to = None
# If ANTHROPIC_AUTH_TOKEN is ALSO set with a conflicting value,
# clear it so the SDK doesn't pick the wrong one.
auth = env.get("ANTHROPIC_AUTH_TOKEN", "")
if auth and auth != existing_oauth:
env.pop("ANTHROPIC_AUTH_TOKEN", None)
result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN")
# Base URL is irrelevant for OAuth mode; remove the proxy URL
# so the SDK uses Claude defaults.
base = env.get("ANTHROPIC_BASE_URL", "")
if base and "anthropic.com" not in base:
env.pop("ANTHROPIC_BASE_URL", None)
result.cleared_vars.append("ANTHROPIC_BASE_URL")
return result
# No explicit CLAUDE_CODE_OAUTH_TOKEN — detect from ANTHROPIC_AUTH_TOKEN
tok = env.get("ANTHROPIC_AUTH_TOKEN", "")
if not tok:
return result
kind = _prefix_of(tok)
result.detected_kind = kind
if kind == "oauth":
env["CLAUDE_CODE_OAUTH_TOKEN"] = tok
env.pop("ANTHROPIC_AUTH_TOKEN", None)
result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN")
result.renamed_to = "CLAUDE_CODE_OAUTH_TOKEN"
# Proxy base URL must go — OAuth flow uses Anthropic's own endpoint
base = env.get("ANTHROPIC_BASE_URL", "")
if base and "anthropic.com" not in base:
env.pop("ANTHROPIC_BASE_URL", None)
result.cleared_vars.append("ANTHROPIC_BASE_URL")
elif kind == "api_key":
# Anthropic API keys can ride ANTHROPIC_API_KEY (strongly preferred by
# claude-code) OR ANTHROPIC_AUTH_TOKEN. Moving it to ANTHROPIC_API_KEY
# is the safer default because claude-code in non-bare mode reads
# ANTHROPIC_API_KEY first.
env["ANTHROPIC_API_KEY"] = tok
env.pop("ANTHROPIC_AUTH_TOKEN", None)
result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN")
result.renamed_to = "ANTHROPIC_API_KEY"
# Clear proxy base URL for direct Anthropic calls
base = env.get("ANTHROPIC_BASE_URL", "")
if base and "anthropic.com" not in base:
env.pop("ANTHROPIC_BASE_URL", None)
result.cleared_vars.append("ANTHROPIC_BASE_URL")
elif kind == "proxy":
# sk-cp-* = Claude proxy token (MiniMax, custom gateways). KEEP
# ANTHROPIC_AUTH_TOKEN + ANTHROPIC_BASE_URL as-is; that's the correct
# shape for proxies.
result.renamed_to = None
base = env.get("ANTHROPIC_BASE_URL", "")
if not base:
result.warning = (
"proxy token detected but ANTHROPIC_BASE_URL is empty — "
"proxy calls will fail without a base URL"
)
else:
# unknown — be conservative, leave env untouched but warn
result.warning = (
"ANTHROPIC_AUTH_TOKEN has an unrecognised prefix "
f"{tok[:12] + '' if len(tok) > 12 else tok!r}; "
"not normalising. If this is a proxy token, prefix convention "
"is sk-cp-*; OAuth is sk-ant-oat01-*; API key is sk-ant-api03-*."
)
return result

View File

@ -65,7 +65,15 @@ async def main(): # pragma: no cover
platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080")
awareness_config = get_awareness_config()
# 0. Initialise OpenTelemetry (no-op if packages not installed)
# 0. Normalise LLM auth env vars based on token type.
# Platform stores tokens as ANTHROPIC_AUTH_TOKEN, but the Claude SDK/CLI
# expects different env vars per token kind (OAuth vs API key vs proxy).
# Doing this early means every downstream adapter/executor sees a
# consistent, correct env — no per-adapter detection needed.
from molecule_runtime.llm_auth import normalise_llm_env
print(normalise_llm_env().summary())
# 0.5 Initialise OpenTelemetry (no-op if packages not installed)
setup_telemetry(service_name=workspace_id)
# 1. Load config

125
tests/test_llm_auth.py Normal file
View File

@ -0,0 +1,125 @@
"""Unit tests for molecule_runtime.llm_auth.normalise_llm_env."""
from molecule_runtime.llm_auth import normalise_llm_env
def test_no_token_is_noop():
env: dict[str, str] = {}
r = normalise_llm_env(env)
assert r.detected_kind == "none"
assert env == {}
assert r.renamed_to is None
def test_oauth_token_moved_to_oauth_env_var():
env = {
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc123",
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
}
r = normalise_llm_env(env)
assert r.detected_kind == "oauth"
assert r.renamed_to == "CLAUDE_CODE_OAUTH_TOKEN"
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-abc123"
assert "ANTHROPIC_AUTH_TOKEN" not in env
assert "ANTHROPIC_BASE_URL" not in env
assert "ANTHROPIC_AUTH_TOKEN" in r.cleared_vars
assert "ANTHROPIC_BASE_URL" in r.cleared_vars
def test_oauth_token_keeps_anthropic_base_url():
# If base URL is actually Anthropic, keep it (no-op on that var).
env = {
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc",
"ANTHROPIC_BASE_URL": "https://api.anthropic.com",
}
r = normalise_llm_env(env)
assert r.detected_kind == "oauth"
assert env.get("ANTHROPIC_BASE_URL") == "https://api.anthropic.com"
assert "ANTHROPIC_BASE_URL" not in r.cleared_vars
def test_api_key_moved_to_anthropic_api_key():
env = {
"ANTHROPIC_AUTH_TOKEN": "sk-ant-api03-xyz789",
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
}
r = normalise_llm_env(env)
assert r.detected_kind == "api_key"
assert r.renamed_to == "ANTHROPIC_API_KEY"
assert env["ANTHROPIC_API_KEY"] == "sk-ant-api03-xyz789"
assert "ANTHROPIC_AUTH_TOKEN" not in env
assert "ANTHROPIC_BASE_URL" not in env
def test_proxy_token_left_alone():
env = {
"ANTHROPIC_AUTH_TOKEN": "sk-cp-minimax-token-foo",
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
}
r = normalise_llm_env(env)
assert r.detected_kind == "proxy"
assert r.renamed_to is None
# Proxies need both vars unchanged
assert env["ANTHROPIC_AUTH_TOKEN"] == "sk-cp-minimax-token-foo"
assert env["ANTHROPIC_BASE_URL"] == "https://api.minimax.io/anthropic"
assert r.warning is None
def test_proxy_token_without_base_url_warns():
env = {"ANTHROPIC_AUTH_TOKEN": "sk-cp-something"}
r = normalise_llm_env(env)
assert r.detected_kind == "proxy"
assert r.warning is not None
assert "ANTHROPIC_BASE_URL" in r.warning
def test_unknown_prefix_leaves_env_and_warns():
env = {"ANTHROPIC_AUTH_TOKEN": "garbage-prefix-xyz"}
r = normalise_llm_env(env)
assert r.detected_kind == "unknown"
assert r.renamed_to is None
assert env["ANTHROPIC_AUTH_TOKEN"] == "garbage-prefix-xyz"
assert r.warning is not None
assert "unrecognised prefix" in r.warning
def test_existing_oauth_env_takes_precedence():
# Operator set CLAUDE_CODE_OAUTH_TOKEN deliberately; don't overwrite.
env = {
"CLAUDE_CODE_OAUTH_TOKEN": "sk-ant-oat01-deliberate",
"ANTHROPIC_AUTH_TOKEN": "sk-cp-stale-proxy-value",
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
}
r = normalise_llm_env(env)
assert r.detected_kind == "oauth"
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-deliberate"
# Conflicting ANTHROPIC_AUTH_TOKEN cleared so SDK picks the right one
assert "ANTHROPIC_AUTH_TOKEN" not in env
assert "ANTHROPIC_BASE_URL" not in env
def test_idempotent_second_call():
env = {"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-once"}
normalise_llm_env(env)
r = normalise_llm_env(env)
assert r.detected_kind == "oauth"
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-once"
assert "ANTHROPIC_AUTH_TOKEN" not in env
def test_summary_renders_without_error():
env = {"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc"}
r = normalise_llm_env(env)
line = r.summary()
assert "oauth" in line
assert "CLAUDE_CODE_OAUTH_TOKEN" in line
def test_uses_os_environ_by_default(monkeypatch):
import os
monkeypatch.setenv("ANTHROPIC_AUTH_TOKEN", "sk-ant-oat01-real")
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
r = normalise_llm_env()
assert r.detected_kind == "oauth"
assert os.environ.get("CLAUDE_CODE_OAUTH_TOKEN") == "sk-ant-oat01-real"
assert "ANTHROPIC_AUTH_TOKEN" not in os.environ