Merge pull request #19 from Molecule-AI/feat/adapter-prevalidate

feat(adapter): pre-validate ANTHROPIC_BASE_URL + missing model combo
This commit is contained in:
Hongming Wang 2026-04-30 22:43:49 -07:00 committed by GitHub
commit a8d3b97668
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 304 additions and 2 deletions

View File

@ -3,3 +3,20 @@ on: [push, pull_request]
jobs:
validate:
uses: Molecule-AI/molecule-ci/.github/workflows/validate-workspace-template.yml@main
tests:
name: Adapter unit tests
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- run: pip install -q pytest pytest-asyncio
# Tests live under tests/ with their own pytest.ini that anchors
# rootdir there — keeps pytest from importing the package
# __init__.py (which does `from .adapter import ...` for runtime
# discovery and can't be satisfied without molecule_runtime
# installed). See tests/pytest.ini for the full rationale.
- run: python3 -m pytest tests/ -v

View File

@ -213,9 +213,59 @@ class ClaudeCodeAdapter(BaseAdapter):
# RuntimeConfig dataclass. Read `model` defensively from either shape.
rc = config.runtime_config
if isinstance(rc, dict):
model = rc.get("model") or "sonnet"
explicit_model = rc.get("model") or ""
else:
model = getattr(rc, "model", None) or "sonnet"
explicit_model = getattr(rc, "model", None) or ""
# Pre-validation: detect the misconfiguration combo that drove the
# 2026-04-30 staging incident — ANTHROPIC_BASE_URL pointed at a
# non-Anthropic upstream (MiniMax / OpenAI shim) but no explicit
# model was set, so we'd silently fall back to "sonnet" and the
# upstream would hang on the SDK --print probe for 30s before
# timing out. The platform's phantom-busy sweep then resets the
# workspace at the 10min mark — the user-visible failure is "every
# workspace dead" but the root cause is one missing env var.
#
# Fail fast here with an actionable message so the operator sees
# exactly what to fix instead of chasing ghosts in workspace logs.
# We only fire when ALL three are true:
# 1. ANTHROPIC_BASE_URL is set (custom upstream is in play)
# 2. The host is NOT api.anthropic.com (real Anthropic accepts
# "sonnet" as a known alias, so the fallback is fine there)
# 3. The user did NOT set an explicit model (the check we want)
# Anthropic-native users with no model picked still get the
# "sonnet" fallback — that's correct behavior, no error.
base_url = os.environ.get("ANTHROPIC_BASE_URL", "").strip()
if base_url and not explicit_model:
from urllib.parse import urlparse
host = urlparse(base_url).hostname or ""
if host and host != "api.anthropic.com":
raise ValueError(
"claude-code adapter: ANTHROPIC_BASE_URL points at a "
f"non-Anthropic host ({host}) but no model is configured. "
"The default fallback ('sonnet') is an Anthropic-native "
"alias; non-Anthropic shims (MiniMax, OpenAI gateways, "
"etc.) won't recognize it and the SDK --print probe will "
"hang for 30s before timing out. Fix: set MODEL_PROVIDER "
"as a workspace secret (canvas: Save+Restart with model "
"picked) or set runtime_config.model in /configs/config.yaml."
)
model = explicit_model or "sonnet"
# Surface what we resolved to in logs — when the workspace agent
# eventually fails, this single line in the logs explains "the
# adapter sent X to Y" without having to dig into the SDK
# subprocess. Cheap diagnostic, no runtime cost.
if base_url:
from urllib.parse import urlparse
logger.info(
"claude-code: model=%s base_url_host=%s%s",
model,
urlparse(base_url).hostname or "<unparseable>",
" (custom upstream)" if base_url else "",
)
else:
logger.info("claude-code: model=%s base_url=anthropic-default", model)
return ClaudeSDKExecutor(
system_prompt=system_prompt,

11
tests/pytest.ini Normal file
View File

@ -0,0 +1,11 @@
[pytest]
# This pytest.ini anchors pytest's rootdir at tests/ (instead of the
# template directory itself). The template's __init__.py does
# `from .adapter import ClaudeCodeAdapter` for production runtime
# discovery; if pytest treats the template dir as the rootdir, it picks
# up __init__.py as a package node and the relative import fails because
# adapter.py's runtime deps (molecule_runtime, a2a) aren't installed in
# the test environment. Anchoring rootdir here keeps pytest from ever
# touching __init__.py.
addopts = --import-mode=importlib
asyncio_mode = auto

View File

@ -0,0 +1,224 @@
"""Unit tests for ClaudeCodeAdapter.create_executor pre-validation.
Pin the failure-mode-caught-on-2026-04-30 (workspaces with
ANTHROPIC_BASE_URL pointing at a MiniMax/OpenAI shim and no explicit
model hung on the SDK --print probe for 30s, eventually triggering
the platform's phantom-busy sweep).
These tests exercise the pre-validation branch in create_executor
without booting the actual ClaudeSDKExecutor we mock the import
so we can drive the validation logic in isolation.
"""
import os
import sys
import types
from dataclasses import dataclass, field
from unittest.mock import MagicMock
import pytest
# ---- Test scaffolding ----
#
# adapter.py imports at module load:
# - molecule_runtime.adapters.base (BaseAdapter, AdapterConfig, RuntimeCapabilities)
# - a2a.server.agent_execution (AgentExecutor)
# create_executor lazily imports claude_sdk_executor.ClaudeSDKExecutor.
# We stub all four so the test file can run in CI without those packages
# installed. The pre-validation branch we care about runs BEFORE the
# executor instantiates, so the stub doesn't affect what we're testing.
@dataclass
class _StubRuntimeCapabilities:
provides_native_session: bool = False
@dataclass
class _StubAdapterConfig:
runtime_config: object = None
config_path: str = "/tmp/configs"
system_prompt: str = ""
heartbeat: object = None
class _StubBaseAdapter:
async def install_plugins_via_registry(self, *_args, **_kwargs):
pass
def _install_stubs():
"""Install the smallest set of import shims that adapter.py needs."""
if "molecule_runtime" not in sys.modules:
mr = types.ModuleType("molecule_runtime")
mr.adapters = types.ModuleType("molecule_runtime.adapters")
mr.adapters.base = types.ModuleType("molecule_runtime.adapters.base")
mr.adapters.base.BaseAdapter = _StubBaseAdapter
mr.adapters.base.AdapterConfig = _StubAdapterConfig
mr.adapters.base.RuntimeCapabilities = _StubRuntimeCapabilities
sys.modules["molecule_runtime"] = mr
sys.modules["molecule_runtime.adapters"] = mr.adapters
sys.modules["molecule_runtime.adapters.base"] = mr.adapters.base
if "a2a" not in sys.modules:
a2a = types.ModuleType("a2a")
a2a.server = types.ModuleType("a2a.server")
a2a.server.agent_execution = types.ModuleType("a2a.server.agent_execution")
a2a.server.agent_execution.AgentExecutor = type("AgentExecutor", (), {})
sys.modules["a2a"] = a2a
sys.modules["a2a.server"] = a2a.server
sys.modules["a2a.server.agent_execution"] = a2a.server.agent_execution
if "claude_sdk_executor" not in sys.modules:
mod = types.ModuleType("claude_sdk_executor")
mod.ClaudeSDKExecutor = MagicMock(name="ClaudeSDKExecutor")
sys.modules["claude_sdk_executor"] = mod
@pytest.fixture
def adapter(monkeypatch):
"""Fresh ClaudeCodeAdapter with all imports stubbed."""
_install_stubs()
# adapter.py lives in the parent dir. tests/ has no __init__.py
# because the template directory itself is a Python package
# (production runtime imports it via the platform's adapter loader),
# and adding tests/__init__.py would re-expose the same relative-
# import collection problem we sidestepped by isolating tests here.
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if parent_dir not in sys.path:
sys.path.insert(0, parent_dir)
# Strip any cached module so the stubbed sys.modules entries take effect.
sys.modules.pop("adapter", None)
import adapter as adapter_module # noqa: WPS433
return adapter_module.ClaudeCodeAdapter()
# ---- Pre-validation tests ----
@pytest.mark.asyncio
async def test_create_executor_raises_when_custom_base_url_and_no_model(
adapter, monkeypatch
):
"""The 2026-04-30 incident shape: custom upstream + no explicit model.
Adapter must raise ValueError with an actionable message instead of
silently passing 'sonnet' to ClaudeSDKExecutor (which would hang
for 30s on the SDK probe before timing out).
"""
monkeypatch.setenv(
"ANTHROPIC_BASE_URL", "https://api.xiaomimimo.com/anthropic"
)
cfg = _StubAdapterConfig(runtime_config={"model": ""})
with pytest.raises(ValueError) as exc_info:
await adapter.create_executor(cfg)
msg = str(exc_info.value)
assert "ANTHROPIC_BASE_URL" in msg
assert "api.xiaomimimo.com" in msg
assert "MODEL_PROVIDER" in msg or "runtime_config.model" in msg
@pytest.mark.asyncio
async def test_create_executor_passes_when_anthropic_native_and_no_model(
adapter, monkeypatch
):
"""Anthropic-native users with no model picked still get the 'sonnet'
fallback that's correct behavior, never an error. The pre-validation
only fires on non-Anthropic hosts.
"""
monkeypatch.setenv("ANTHROPIC_BASE_URL", "https://api.anthropic.com")
cfg = _StubAdapterConfig(runtime_config={"model": ""})
# Should not raise — fallback to "sonnet" is the documented default.
executor = await adapter.create_executor(cfg)
assert executor is not None
@pytest.mark.asyncio
async def test_create_executor_passes_when_no_base_url_set(adapter, monkeypatch):
"""No ANTHROPIC_BASE_URL = SDK uses its built-in Anthropic default.
That's the historical happy path. Pre-validation must not regress it.
"""
monkeypatch.delenv("ANTHROPIC_BASE_URL", raising=False)
cfg = _StubAdapterConfig(runtime_config={"model": ""})
executor = await adapter.create_executor(cfg)
assert executor is not None
@pytest.mark.asyncio
async def test_create_executor_passes_when_custom_base_url_with_explicit_model(
adapter, monkeypatch
):
"""The fix the user is supposed to apply: set both URL and model.
Pre-validation must let this through cleanly. End-to-end success path
for the MiniMax-shim use case after Option B PRs land.
"""
monkeypatch.setenv(
"ANTHROPIC_BASE_URL", "https://api.xiaomimimo.com/anthropic"
)
cfg = _StubAdapterConfig(
runtime_config={"model": "MiniMax-M2"}
)
executor = await adapter.create_executor(cfg)
assert executor is not None
@pytest.mark.asyncio
async def test_create_executor_passes_dataclass_runtime_config(adapter, monkeypatch):
"""runtime_config can arrive as a dataclass (the production shape via
main.py's load_config) instead of a dict. The defensive read at line
118-122 must work for both. Regression coverage for the read path.
"""
monkeypatch.setenv(
"ANTHROPIC_BASE_URL", "https://api.xiaomimimo.com/anthropic"
)
@dataclass
class _RC:
model: str = "MiniMax-M2"
provider: str = "minimax"
cfg = _StubAdapterConfig(runtime_config=_RC())
executor = await adapter.create_executor(cfg)
assert executor is not None
@pytest.mark.asyncio
async def test_create_executor_raises_when_dataclass_runtime_config_empty_model(
adapter, monkeypatch
):
"""Dataclass shape with empty model triggers the same validation as
dict shape with empty model. Symmetric behavior across both inputs.
"""
monkeypatch.setenv(
"ANTHROPIC_BASE_URL", "https://api.xiaomimimo.com/anthropic"
)
@dataclass
class _RC:
model: str = ""
provider: str = ""
cfg = _StubAdapterConfig(runtime_config=_RC())
with pytest.raises(ValueError):
await adapter.create_executor(cfg)
@pytest.mark.asyncio
async def test_create_executor_passes_when_unparseable_url(adapter, monkeypatch):
"""An unparseable URL value (no host extractable) shouldn't crash
with AttributeError. Should still pass through to the SDK so the
SDK gets to error on it itself adapter doesn't take ownership
of URL validation, just the missing-model invariant.
"""
monkeypatch.setenv("ANTHROPIC_BASE_URL", "://garbage")
cfg = _StubAdapterConfig(runtime_config={"model": ""})
# Empty hostname → pre-validation skips → reaches SDK with "sonnet"
# fallback. The SDK will fail; that's not the adapter's job.
executor = await adapter.create_executor(cfg)
assert executor is not None