Merge pull request #38 from Molecule-AI/fix/auto-detect-llm-token-type
feat(runtime): auto-detect LLM token type, normalise env on boot
This commit is contained in:
commit
1b04da2061
198
molecule_runtime/llm_auth.py
Normal file
198
molecule_runtime/llm_auth.py
Normal file
@ -0,0 +1,198 @@
|
||||
"""LLM auth-env normalisation.
|
||||
|
||||
Platform stores per-workspace LLM credentials under a single key,
|
||||
``ANTHROPIC_AUTH_TOKEN``. But the CLI/SDK tools we invoke downstream
|
||||
expect *different* env var names depending on the token type:
|
||||
|
||||
Token prefix Correct env var Base URL needed
|
||||
------------------ ------------------------ ----------------
|
||||
sk-ant-oat01-* CLAUDE_CODE_OAUTH_TOKEN none (Claude handles)
|
||||
sk-ant-api03-* ANTHROPIC_API_KEY none (Claude default)
|
||||
sk-cp-* ANTHROPIC_AUTH_TOKEN proxy URL (MiniMax etc.)
|
||||
other/unknown (leave as-is) (leave as-is)
|
||||
|
||||
Without this normalisation, passing an OAuth token as
|
||||
``ANTHROPIC_AUTH_TOKEN`` causes the Claude SDK to send it as a bearer
|
||||
token to ``api.anthropic.com``, which responds:
|
||||
|
||||
401 {"error":{"type":"authentication_error",
|
||||
"message":"OAuth authentication is currently not supported."}}
|
||||
|
||||
Call :func:`normalise_llm_env` once, early in the runtime bootstrap
|
||||
(before any adapter/executor is created). The function mutates
|
||||
``os.environ`` in place and returns a report of what changed so the
|
||||
boot log shows the mapping.
|
||||
|
||||
Safe to call multiple times — idempotent.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class NormalisationResult:
|
||||
"""What normalise_llm_env did. Safe to print in boot logs."""
|
||||
|
||||
detected_kind: str = "none" # "oauth" | "api_key" | "proxy" | "unknown" | "none"
|
||||
renamed_to: Optional[str] = None
|
||||
cleared_vars: list[str] = field(default_factory=list)
|
||||
warning: Optional[str] = None
|
||||
|
||||
def summary(self) -> str:
|
||||
if self.detected_kind == "none":
|
||||
return "llm-auth: no ANTHROPIC_AUTH_TOKEN set"
|
||||
line = f"llm-auth: detected {self.detected_kind}"
|
||||
if self.renamed_to:
|
||||
line += f" → exported as {self.renamed_to}"
|
||||
if self.cleared_vars:
|
||||
line += f" (cleared: {', '.join(self.cleared_vars)})"
|
||||
if self.warning:
|
||||
line += f" [WARN: {self.warning}]"
|
||||
return line
|
||||
|
||||
|
||||
# Anthropic's native API hostnames. Treat as "direct Anthropic" for OAuth /
|
||||
# API-key mode. Anything else in ANTHROPIC_BASE_URL is assumed to be a proxy
|
||||
# and gets cleared when we switch to direct-Anthropic auth.
|
||||
_ANTHROPIC_NATIVE_HOSTS = frozenset({
|
||||
"api.anthropic.com",
|
||||
"anthropic.com",
|
||||
})
|
||||
|
||||
|
||||
def _is_native_anthropic_base_url(base_url: str) -> bool:
|
||||
"""Return True only if the base URL points at an Anthropic-native host.
|
||||
|
||||
Substring matching on ``"anthropic.com"`` would falsely accept
|
||||
``https://my-proxy.anthropic.com.evil.example/`` — parse the URL
|
||||
properly and compare the exact hostname.
|
||||
"""
|
||||
if not base_url:
|
||||
return False
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
host = (urlparse(base_url).hostname or "").lower().strip()
|
||||
except Exception:
|
||||
return False
|
||||
return host in _ANTHROPIC_NATIVE_HOSTS
|
||||
|
||||
|
||||
def _prefix_of(token: str) -> str:
|
||||
"""Classify a token string by its well-known prefix."""
|
||||
if token.startswith("sk-ant-oat01-"):
|
||||
return "oauth"
|
||||
if token.startswith("sk-ant-api03-"):
|
||||
return "api_key"
|
||||
if token.startswith("sk-cp-"):
|
||||
return "proxy"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def normalise_llm_env(env: Optional[dict[str, str]] = None) -> NormalisationResult:
|
||||
"""Inspect and rewrite LLM auth env vars in place.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
env
|
||||
The env mapping to mutate. Defaults to ``os.environ``.
|
||||
Passing a dict is useful for tests.
|
||||
|
||||
Returns
|
||||
-------
|
||||
NormalisationResult
|
||||
Describes what was detected and what was changed, for logging.
|
||||
"""
|
||||
if env is None:
|
||||
env = os.environ
|
||||
|
||||
result = NormalisationResult()
|
||||
|
||||
# Priority: explicit CLAUDE_CODE_OAUTH_TOKEN wins if already present
|
||||
# (operator set it deliberately — don't override).
|
||||
existing_oauth = env.get("CLAUDE_CODE_OAUTH_TOKEN", "")
|
||||
if existing_oauth:
|
||||
result.detected_kind = "oauth"
|
||||
result.renamed_to = None
|
||||
# If ANTHROPIC_AUTH_TOKEN is ALSO set with a conflicting value,
|
||||
# clear it so the SDK doesn't pick the wrong one.
|
||||
auth = env.get("ANTHROPIC_AUTH_TOKEN", "")
|
||||
if auth and auth != existing_oauth:
|
||||
env.pop("ANTHROPIC_AUTH_TOKEN", None)
|
||||
result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN")
|
||||
# Base URL is irrelevant for OAuth mode; remove the proxy URL
|
||||
# so the SDK uses Claude defaults.
|
||||
base = env.get("ANTHROPIC_BASE_URL", "")
|
||||
if base and not _is_native_anthropic_base_url(base):
|
||||
env.pop("ANTHROPIC_BASE_URL", None)
|
||||
result.cleared_vars.append("ANTHROPIC_BASE_URL")
|
||||
return result
|
||||
|
||||
# No explicit CLAUDE_CODE_OAUTH_TOKEN — detect from ANTHROPIC_AUTH_TOKEN.
|
||||
# Strip whitespace because operators frequently paste tokens with
|
||||
# trailing newlines from terminals, and the SDK will reject those as
|
||||
# malformed before auth is even attempted.
|
||||
raw_tok = env.get("ANTHROPIC_AUTH_TOKEN", "")
|
||||
tok = raw_tok.strip()
|
||||
if not tok:
|
||||
return result
|
||||
if tok != raw_tok:
|
||||
env["ANTHROPIC_AUTH_TOKEN"] = tok # persist the cleaned value
|
||||
|
||||
kind = _prefix_of(tok)
|
||||
result.detected_kind = kind
|
||||
|
||||
if kind == "oauth":
|
||||
env["CLAUDE_CODE_OAUTH_TOKEN"] = tok
|
||||
env.pop("ANTHROPIC_AUTH_TOKEN", None)
|
||||
result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN")
|
||||
result.renamed_to = "CLAUDE_CODE_OAUTH_TOKEN"
|
||||
# Proxy base URL must go — OAuth flow uses Anthropic's own endpoint
|
||||
base = env.get("ANTHROPIC_BASE_URL", "")
|
||||
if base and not _is_native_anthropic_base_url(base):
|
||||
env.pop("ANTHROPIC_BASE_URL", None)
|
||||
result.cleared_vars.append("ANTHROPIC_BASE_URL")
|
||||
|
||||
elif kind == "api_key":
|
||||
# Anthropic API keys can ride ANTHROPIC_API_KEY (strongly preferred by
|
||||
# claude-code) OR ANTHROPIC_AUTH_TOKEN. Moving it to ANTHROPIC_API_KEY
|
||||
# is the safer default because claude-code in non-bare mode reads
|
||||
# ANTHROPIC_API_KEY first.
|
||||
env["ANTHROPIC_API_KEY"] = tok
|
||||
env.pop("ANTHROPIC_AUTH_TOKEN", None)
|
||||
result.cleared_vars.append("ANTHROPIC_AUTH_TOKEN")
|
||||
result.renamed_to = "ANTHROPIC_API_KEY"
|
||||
# Clear proxy base URL for direct Anthropic calls
|
||||
base = env.get("ANTHROPIC_BASE_URL", "")
|
||||
if base and not _is_native_anthropic_base_url(base):
|
||||
env.pop("ANTHROPIC_BASE_URL", None)
|
||||
result.cleared_vars.append("ANTHROPIC_BASE_URL")
|
||||
|
||||
elif kind == "proxy":
|
||||
# sk-cp-* = Claude proxy token (MiniMax, custom gateways). KEEP
|
||||
# ANTHROPIC_AUTH_TOKEN + ANTHROPIC_BASE_URL as-is; that's the correct
|
||||
# shape for proxies.
|
||||
result.renamed_to = None
|
||||
base = env.get("ANTHROPIC_BASE_URL", "")
|
||||
if not base:
|
||||
result.warning = (
|
||||
"proxy token detected but ANTHROPIC_BASE_URL is empty — "
|
||||
"proxy calls will fail without a base URL"
|
||||
)
|
||||
|
||||
else:
|
||||
# unknown — be conservative, leave env untouched but warn.
|
||||
# Do NOT include the token value in the warning. Even a prefix
|
||||
# leaks bytes of a secret into logs (which get shipped to
|
||||
# Langfuse / CloudWatch / sentry / slack-firehose).
|
||||
result.warning = (
|
||||
"ANTHROPIC_AUTH_TOKEN has an unrecognised prefix; not "
|
||||
"normalising. Known prefixes: sk-ant-oat01-* (OAuth), "
|
||||
"sk-ant-api03-* (API key), sk-cp-* (proxy)."
|
||||
)
|
||||
|
||||
return result
|
||||
@ -65,7 +65,15 @@ async def main(): # pragma: no cover
|
||||
platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080")
|
||||
awareness_config = get_awareness_config()
|
||||
|
||||
# 0. Initialise OpenTelemetry (no-op if packages not installed)
|
||||
# 0. Normalise LLM auth env vars based on token type.
|
||||
# Platform stores tokens as ANTHROPIC_AUTH_TOKEN, but the Claude SDK/CLI
|
||||
# expects different env vars per token kind (OAuth vs API key vs proxy).
|
||||
# Doing this early means every downstream adapter/executor sees a
|
||||
# consistent, correct env — no per-adapter detection needed.
|
||||
from molecule_runtime.llm_auth import normalise_llm_env
|
||||
print(normalise_llm_env().summary())
|
||||
|
||||
# 0.5 Initialise OpenTelemetry (no-op if packages not installed)
|
||||
setup_telemetry(service_name=workspace_id)
|
||||
|
||||
# 1. Load config
|
||||
|
||||
192
tests/test_llm_auth.py
Normal file
192
tests/test_llm_auth.py
Normal file
@ -0,0 +1,192 @@
|
||||
"""Unit tests for molecule_runtime.llm_auth.normalise_llm_env."""
|
||||
|
||||
from molecule_runtime.llm_auth import normalise_llm_env
|
||||
|
||||
|
||||
def test_no_token_is_noop():
|
||||
env: dict[str, str] = {}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "none"
|
||||
assert env == {}
|
||||
assert r.renamed_to is None
|
||||
|
||||
|
||||
def test_oauth_token_moved_to_oauth_env_var():
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc123",
|
||||
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert r.renamed_to == "CLAUDE_CODE_OAUTH_TOKEN"
|
||||
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-abc123"
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in env
|
||||
assert "ANTHROPIC_BASE_URL" not in env
|
||||
assert "ANTHROPIC_AUTH_TOKEN" in r.cleared_vars
|
||||
assert "ANTHROPIC_BASE_URL" in r.cleared_vars
|
||||
|
||||
|
||||
def test_oauth_token_keeps_anthropic_base_url():
|
||||
# If base URL is actually Anthropic, keep it (no-op on that var).
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc",
|
||||
"ANTHROPIC_BASE_URL": "https://api.anthropic.com",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert env.get("ANTHROPIC_BASE_URL") == "https://api.anthropic.com"
|
||||
assert "ANTHROPIC_BASE_URL" not in r.cleared_vars
|
||||
|
||||
|
||||
def test_api_key_moved_to_anthropic_api_key():
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-ant-api03-xyz789",
|
||||
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "api_key"
|
||||
assert r.renamed_to == "ANTHROPIC_API_KEY"
|
||||
assert env["ANTHROPIC_API_KEY"] == "sk-ant-api03-xyz789"
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in env
|
||||
assert "ANTHROPIC_BASE_URL" not in env
|
||||
|
||||
|
||||
def test_proxy_token_left_alone():
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-cp-minimax-token-foo",
|
||||
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "proxy"
|
||||
assert r.renamed_to is None
|
||||
# Proxies need both vars unchanged
|
||||
assert env["ANTHROPIC_AUTH_TOKEN"] == "sk-cp-minimax-token-foo"
|
||||
assert env["ANTHROPIC_BASE_URL"] == "https://api.minimax.io/anthropic"
|
||||
assert r.warning is None
|
||||
|
||||
|
||||
def test_proxy_token_without_base_url_warns():
|
||||
env = {"ANTHROPIC_AUTH_TOKEN": "sk-cp-something"}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "proxy"
|
||||
assert r.warning is not None
|
||||
assert "ANTHROPIC_BASE_URL" in r.warning
|
||||
|
||||
|
||||
def test_unknown_prefix_leaves_env_and_warns():
|
||||
env = {"ANTHROPIC_AUTH_TOKEN": "garbage-prefix-xyz"}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "unknown"
|
||||
assert r.renamed_to is None
|
||||
assert env["ANTHROPIC_AUTH_TOKEN"] == "garbage-prefix-xyz"
|
||||
assert r.warning is not None
|
||||
assert "unrecognised prefix" in r.warning
|
||||
|
||||
|
||||
def test_existing_oauth_env_takes_precedence():
|
||||
# Operator set CLAUDE_CODE_OAUTH_TOKEN deliberately; don't overwrite.
|
||||
env = {
|
||||
"CLAUDE_CODE_OAUTH_TOKEN": "sk-ant-oat01-deliberate",
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-cp-stale-proxy-value",
|
||||
"ANTHROPIC_BASE_URL": "https://api.minimax.io/anthropic",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-deliberate"
|
||||
# Conflicting ANTHROPIC_AUTH_TOKEN cleared so SDK picks the right one
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in env
|
||||
assert "ANTHROPIC_BASE_URL" not in env
|
||||
|
||||
|
||||
def test_idempotent_second_call():
|
||||
env = {"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-once"}
|
||||
normalise_llm_env(env)
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-once"
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in env
|
||||
|
||||
|
||||
def test_summary_renders_without_error():
|
||||
env = {"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-abc"}
|
||||
r = normalise_llm_env(env)
|
||||
line = r.summary()
|
||||
assert "oauth" in line
|
||||
assert "CLAUDE_CODE_OAUTH_TOKEN" in line
|
||||
|
||||
|
||||
def test_uses_os_environ_by_default(monkeypatch):
|
||||
import os
|
||||
monkeypatch.setenv("ANTHROPIC_AUTH_TOKEN", "sk-ant-oat01-real")
|
||||
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
|
||||
r = normalise_llm_env()
|
||||
assert r.detected_kind == "oauth"
|
||||
assert os.environ.get("CLAUDE_CODE_OAUTH_TOKEN") == "sk-ant-oat01-real"
|
||||
assert "ANTHROPIC_AUTH_TOKEN" not in os.environ
|
||||
|
||||
|
||||
def test_strips_whitespace_and_newlines_from_token():
|
||||
env = {"ANTHROPIC_AUTH_TOKEN": " sk-ant-oat01-abc\n"}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert env["CLAUDE_CODE_OAUTH_TOKEN"] == "sk-ant-oat01-abc"
|
||||
# Trailing newline must not survive into the renamed var
|
||||
assert "\n" not in env["CLAUDE_CODE_OAUTH_TOKEN"]
|
||||
assert " " not in env["CLAUDE_CODE_OAUTH_TOKEN"]
|
||||
|
||||
|
||||
def test_unknown_prefix_does_not_leak_token_to_warning():
|
||||
# Security: warning must not contain any bytes of the secret.
|
||||
sensitive = "ghs_supersecrettoken123"
|
||||
env = {"ANTHROPIC_AUTH_TOKEN": sensitive}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "unknown"
|
||||
assert r.warning is not None
|
||||
# No substring of the token — not even a prefix — is allowed in logs.
|
||||
for i in range(4, len(sensitive)):
|
||||
assert sensitive[:i] not in r.warning, (
|
||||
f"token prefix leaked to warning: {sensitive[:i]!r} found in "
|
||||
f"{r.warning!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_base_url_substring_false_positive_blocked():
|
||||
# A hostile URL that contains 'anthropic.com' as a substring but is not
|
||||
# actually Anthropic MUST still be cleared when switching to OAuth mode.
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-x",
|
||||
"ANTHROPIC_BASE_URL": "https://proxy.anthropic.com.evil.example/",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert "ANTHROPIC_BASE_URL" not in env
|
||||
assert "ANTHROPIC_BASE_URL" in r.cleared_vars
|
||||
|
||||
|
||||
def test_actual_anthropic_base_url_preserved():
|
||||
for url in (
|
||||
"https://api.anthropic.com",
|
||||
"https://api.anthropic.com/v1",
|
||||
"http://api.anthropic.com/", # plain http unlikely but shouldn't crash
|
||||
):
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-x",
|
||||
"ANTHROPIC_BASE_URL": url,
|
||||
}
|
||||
normalise_llm_env(env)
|
||||
assert env.get("ANTHROPIC_BASE_URL") == url, (
|
||||
f"native Anthropic URL {url!r} should be preserved, got "
|
||||
f"{env.get('ANTHROPIC_BASE_URL')!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_malformed_base_url_does_not_crash():
|
||||
# If the URL is garbled, the normaliser shouldn't crash — fall through
|
||||
# to clearing it, which is the safe choice for OAuth mode.
|
||||
env = {
|
||||
"ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-x",
|
||||
"ANTHROPIC_BASE_URL": "not a url",
|
||||
}
|
||||
r = normalise_llm_env(env)
|
||||
assert r.detected_kind == "oauth"
|
||||
assert "ANTHROPIC_BASE_URL" not in env
|
||||
Loading…
Reference in New Issue
Block a user