diff --git a/adapter.py b/adapter.py index 8f87e03..9465df5 100644 --- a/adapter.py +++ b/adapter.py @@ -278,6 +278,83 @@ def _load_providers(config_path: str) -> tuple: return tuple(parsed) +def _resolve_model_and_provider_from_env( + yaml_model: str, + yaml_provider: str, + providers: tuple, +) -> tuple: + """Reconcile model + provider from env vars vs YAML, with the persona-env + convention winning over the legacy ``MODEL_PROVIDER``-as-model-id usage. + + The persona env files (``~/.molecule-ai/personas//env`` on the host, + sourced into each workspace container at provision time) declare TWO env + vars with distinct semantics: + + * ``MODEL`` — the model id (e.g. ``MiniMax-M2.7-highspeed``, ``opus``). + * ``MODEL_PROVIDER`` — the provider slug (e.g. ``minimax``, + ``claude-code``, ``anthropic``). + + The legacy ``workspace/config.py`` (in molecule-ai-workspace-runtime) + historically interpreted ``MODEL_PROVIDER`` as the *model id* — a name + chosen before there was a separate ``MODEL`` env var. When both env vars + are set with the persona convention, the legacy code reads + ``MODEL_PROVIDER=minimax`` into ``runtime_config.model``, which then + fails to match any registry prefix (``minimax-`` requires a hyphen + suffix) and silently falls through to providers[0] (``anthropic-oauth``). + OAuth-token-less workspaces then wedge at ``query.initialize()`` because + the claude CLI can't authenticate. This is the 2026-05-08 dev-tree + incident — 22/27 non-lead workspaces stuck in ``degraded``. + + Resolution order (this function): + 1. ``MODEL`` env var → picked_model. Authoritative when set; the + persona env always sets it alongside ``MODEL_PROVIDER`` so the + model id never has to be inferred. + 2. ``MODEL_PROVIDER`` env var → explicit_provider, BUT only when the + value matches a known provider name in the registry. This guards + against the legacy case where some callers still set + ``MODEL_PROVIDER`` to a model id (e.g. canvas Save+Restart prior to + this fix). If the value isn't a registered provider name and YAML + didn't supply a model, treat it as a model id for back-compat. + 3. YAML ``runtime_config.model`` / ``provider`` — used for any field + the env didn't supply. Carries the operator's canvas selection + on workspaces that haven't yet adopted the persona env shape. + + Returns ``(picked_model, explicit_provider_name)``. Either may be + empty/None — the caller (``setup``) handles the empty cases via + ``_resolve_provider``'s registry fallback. + """ + env_model = (os.environ.get("MODEL") or "").strip() + env_provider = (os.environ.get("MODEL_PROVIDER") or "").strip() + provider_names_lower = {p.get("name", "").lower() for p in providers} + + # Detect whether MODEL_PROVIDER carries the persona-convention slug + # (provider name) vs. the legacy convention (model id). Persona- + # convention wins when the value matches a registered provider; we + # fall back to legacy interpretation only when it doesn't. + env_provider_is_slug = ( + bool(env_provider) and env_provider.lower() in provider_names_lower + ) + + # Picked model resolution + if env_model: + picked_model = env_model + elif env_provider and not env_provider_is_slug: + # Legacy: MODEL_PROVIDER env carried the model id. Honor it so + # canvas Save+Restart workflows that predate this fix keep working. + picked_model = env_provider + else: + picked_model = yaml_model or "" + + # Explicit provider resolution — env wins when it's a registered slug, + # otherwise fall back to YAML. + if env_provider_is_slug: + explicit_provider = env_provider + else: + explicit_provider = yaml_provider or None + + return picked_model, explicit_provider + + def _strip_provider_prefix(model: str) -> str: """Strip LangChain-style ":" prefix from a model id. @@ -536,11 +613,11 @@ class ClaudeCodeAdapter(BaseAdapter): # validation + ANTHROPIC_BASE_URL routing from that single decision. rc = config.runtime_config if isinstance(rc, dict): - picked_model = rc.get("model") or "sonnet" - explicit_provider_name = rc.get("provider") + yaml_model = rc.get("model") or "" + yaml_provider_name = rc.get("provider") or "" else: - picked_model = getattr(rc, "model", None) or "sonnet" - explicit_provider_name = getattr(rc, "provider", None) + yaml_model = getattr(rc, "model", None) or "" + yaml_provider_name = getattr(rc, "provider", None) or "" # Also honor the top-level `provider:` field in /configs/config.yaml. # The canvas Config-tab Provider dropdown writes there (not into @@ -548,7 +625,7 @@ class ClaudeCodeAdapter(BaseAdapter): # whichever is set wins. Root cause of #180: the adapter used to # ignore both, silently routing every non-Anthropic provider pick # through anthropic-oauth. - if not explicit_provider_name: + if not yaml_provider_name: yaml_path = os.path.join(config.config_path, "config.yaml") try: import yaml # transitive dep via molecule-ai-workspace-runtime @@ -557,7 +634,7 @@ class ClaudeCodeAdapter(BaseAdapter): if isinstance(data, dict): val = data.get("provider") if isinstance(val, str) and val.strip(): - explicit_provider_name = val.strip() + yaml_provider_name = val.strip() except FileNotFoundError: pass except Exception as exc: # noqa: BLE001 — defensive: never block boot @@ -567,6 +644,21 @@ class ClaudeCodeAdapter(BaseAdapter): yaml_path, exc, ) + # Reconcile env vars (persona convention: MODEL=, + # MODEL_PROVIDER=) against YAML. Env wins over YAML — the + # persona env files are the canonical per-agent provider mapping + # (Phase 2 mapping 2026-05-08), and the workspace-runtime wheel's + # legacy ``MODEL_PROVIDER``-as-model-id reading would otherwise + # silently route non-leads to providers[0] = anthropic-oauth. + # Documented in detail at _resolve_model_and_provider_from_env. + picked_model, explicit_provider_name = _resolve_model_and_provider_from_env( + yaml_model=yaml_model, + yaml_provider=yaml_provider_name, + providers=providers, + ) + if not picked_model: + picked_model = "sonnet" + # NOTE: do NOT strip the provider prefix here. The pre-fix routing # behavior — `anthropic:claude-opus-4-7` falls through to # providers[0] (anthropic-oauth) when no model_prefixes match — is @@ -694,9 +786,26 @@ class ClaudeCodeAdapter(BaseAdapter): # RuntimeConfig dataclass. Read `model` defensively from either shape. rc = config.runtime_config if isinstance(rc, dict): - explicit_model = rc.get("model") or "" + yaml_model = rc.get("model") or "" + yaml_provider = rc.get("provider") or "" else: - explicit_model = getattr(rc, "model", None) or "" + yaml_model = getattr(rc, "model", None) or "" + yaml_provider = getattr(rc, "provider", None) or "" + + # Reconcile against env vars (persona convention: MODEL=, + # MODEL_PROVIDER=) using the same helper that ``setup`` uses, + # so the executor and the boot banner agree on the picked model. + # Without this, a workspace whose env says ``MODEL=MiniMax-M2.7`` + # but whose runtime wheel pre-dates the persona-env fix would set + # runtime_config.model="minimax" (the slug, mistakenly read by the + # legacy ``MODEL_PROVIDER``-as-model-id path); this helper restores + # the correct model id before it reaches the SDK. + providers = _load_providers(config.config_path) + explicit_model, _ = _resolve_model_and_provider_from_env( + yaml_model=yaml_model, + yaml_provider=yaml_provider, + providers=providers, + ) explicit_model = _strip_provider_prefix(explicit_model) # Pre-validation: detect the misconfiguration combo that drove the @@ -727,7 +836,7 @@ class ClaudeCodeAdapter(BaseAdapter): "The default fallback ('sonnet') is an Anthropic-native " "alias; non-Anthropic shims (MiniMax, OpenAI gateways, " "etc.) won't recognize it and the SDK --print probe will " - "hang for 30s before timing out. Fix: set MODEL_PROVIDER " + "hang for 30s before timing out. Fix: set MODEL " "as a workspace secret (canvas: Save+Restart with model " "picked) or set runtime_config.model in /configs/config.yaml." ) diff --git a/tests/test_env_model_provider_dispatch.py b/tests/test_env_model_provider_dispatch.py new file mode 100644 index 0000000..3cd1bb1 --- /dev/null +++ b/tests/test_env_model_provider_dispatch.py @@ -0,0 +1,210 @@ +"""Tests for ``_resolve_model_and_provider_from_env`` — the env-vs-YAML +reconciliation that fixes the 2026-05-08 dev-tree wedge incident. + +Symptom: 22/27 non-lead workspaces (minimax tier) wedged on +``Control request timeout: initialize`` because the runtime wheel's +``workspace/config.py`` interpreted ``MODEL_PROVIDER=minimax`` as the +*model id* instead of the provider slug. ``model="minimax"`` failed to +match the ``minimax-`` registry prefix, fell through to providers[0] +(anthropic-oauth), demanded ``CLAUDE_CODE_OAUTH_TOKEN`` (unset on +non-leads), and the claude CLI hung at SDK init. + +The persona env files (``~/.molecule-ai/personas//env``) declare +the new convention: + * ``MODEL`` — model id (e.g. ``MiniMax-M2.7-highspeed``) + * ``MODEL_PROVIDER`` — provider slug (e.g. ``minimax``) + +These tests cover the matrix of (env shape) × (YAML shape) so a future +contributor can't silently regress the wedge fix. +""" + +import pytest + +from adapter import ( + _BUILTIN_PROVIDERS, + _resolve_model_and_provider_from_env, +) + + +# A registry that contains both anthropic-oauth (providers[0]) and +# minimax/zai (third-party slugs) — matches the shipped config.yaml. +_REGISTRY = _BUILTIN_PROVIDERS + ( + { + "name": "minimax", + "auth_mode": "third_party_anthropic_compat", + "model_prefixes": ("minimax-",), + "model_aliases": (), + "base_url": "https://api.minimax.io/anthropic", + "auth_env": ("MINIMAX_API_KEY",), + }, + { + "name": "zai", + "auth_mode": "third_party_anthropic_compat", + "model_prefixes": ("glm-",), + "model_aliases": (), + "base_url": "https://api.z.ai/api/anthropic", + "auth_env": ("GLM_API_KEY",), + }, +) + + +def _clear_env(monkeypatch): + monkeypatch.delenv("MODEL", raising=False) + monkeypatch.delenv("MODEL_PROVIDER", raising=False) + + +# ------------------------------------------------------------------ +# Persona env convention: MODEL=, MODEL_PROVIDER= +# ------------------------------------------------------------------ + +def test_persona_env_minimax_resolves_correctly(monkeypatch): + """The 2026-05-08 wedge regression test: persona env shape must + yield model=MiniMax-M2.7-highspeed (not "minimax") and explicit + provider=minimax.""" + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "MiniMax-M2.7-highspeed") + monkeypatch.setenv("MODEL_PROVIDER", "minimax") + model, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="", providers=_REGISTRY, + ) + assert model == "MiniMax-M2.7-highspeed" + assert provider == "minimax" + + +def test_persona_env_lead_claude_code_resolves_correctly(monkeypatch): + """Lead persona env (MODEL=opus, MODEL_PROVIDER=claude-code) — + ``claude-code`` isn't a registered provider name (registry uses + ``anthropic-oauth``), so it falls back to legacy interpretation + and yields no explicit provider, letting the model-based + fall-through to providers[0]=anthropic-oauth do the right thing.""" + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "opus") + monkeypatch.setenv("MODEL_PROVIDER", "claude-code") + model, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="", providers=_REGISTRY, + ) + assert model == "opus" + # claude-code is not a registered slug, so this falls back — + # provider is None and the caller will model-resolve to + # anthropic-oauth via the alias match on "opus". + assert provider is None + + +def test_persona_env_glm_resolves_correctly(monkeypatch): + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "GLM-4.6") + monkeypatch.setenv("MODEL_PROVIDER", "zai") + model, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="", providers=_REGISTRY, + ) + assert model == "GLM-4.6" + assert provider == "zai" + + +def test_env_provider_slug_case_insensitive(monkeypatch): + """Operator typos like ``MiniMax`` (mixed case) still resolve.""" + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "MiniMax-M2.7-highspeed") + monkeypatch.setenv("MODEL_PROVIDER", "MiniMax") # mixed case + _, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="", providers=_REGISTRY, + ) + assert provider == "MiniMax" # caller compares case-insensitively + + +# ------------------------------------------------------------------ +# Legacy convention: MODEL_PROVIDER=, MODEL unset +# ------------------------------------------------------------------ + +def test_legacy_model_provider_as_model_id_still_works(monkeypatch): + """Pre-2026-05-08 canvas Save+Restart shape: MODEL_PROVIDER carried + the model id directly (e.g. ``MODEL_PROVIDER=MiniMax-M2.7``) and + no MODEL env. Must keep working so existing canvas users don't + break overnight.""" + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL_PROVIDER", "MiniMax-M2.7-highspeed") + model, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="", providers=_REGISTRY, + ) + # MiniMax-M2.7-highspeed is not a registered provider name, so + # it's treated as a legacy model-id-in-MODEL_PROVIDER value. + assert model == "MiniMax-M2.7-highspeed" + assert provider is None + + +# ------------------------------------------------------------------ +# Env wins over YAML +# ------------------------------------------------------------------ + +def test_env_model_wins_over_yaml_model(monkeypatch): + """When both env MODEL and YAML model are set, env wins.""" + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "GLM-4.6") + model, _ = _resolve_model_and_provider_from_env( + yaml_model="MiniMax-M2.7", yaml_provider="", providers=_REGISTRY, + ) + assert model == "GLM-4.6" + + +def test_env_provider_wins_over_yaml_provider(monkeypatch): + """Env MODEL_PROVIDER (when a registered slug) wins over YAML provider.""" + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "GLM-4.6") + monkeypatch.setenv("MODEL_PROVIDER", "zai") + _, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="minimax", providers=_REGISTRY, + ) + assert provider == "zai" + + +# ------------------------------------------------------------------ +# YAML fallback (no env) +# ------------------------------------------------------------------ + +def test_no_env_falls_back_to_yaml(monkeypatch): + """Workspace whose env doesn't set MODEL/MODEL_PROVIDER falls back + to the YAML config — preserves existing operator workflows.""" + _clear_env(monkeypatch) + model, provider = _resolve_model_and_provider_from_env( + yaml_model="claude-sonnet-4-6", + yaml_provider="anthropic-api", + providers=_REGISTRY, + ) + assert model == "claude-sonnet-4-6" + assert provider == "anthropic-api" + + +def test_no_env_no_yaml_returns_empty(monkeypatch): + """Pure default path — caller (setup) substitutes ``sonnet``.""" + _clear_env(monkeypatch) + model, provider = _resolve_model_and_provider_from_env( + yaml_model="", yaml_provider="", providers=_REGISTRY, + ) + assert model == "" + assert provider is None + + +# ------------------------------------------------------------------ +# Whitespace / empty-value defensive cases +# ------------------------------------------------------------------ + +def test_whitespace_only_env_treated_as_unset(monkeypatch): + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", " ") + monkeypatch.setenv("MODEL_PROVIDER", " ") + model, provider = _resolve_model_and_provider_from_env( + yaml_model="opus", yaml_provider="", providers=_REGISTRY, + ) + assert model == "opus" + assert provider is None + + +def test_empty_env_value_treated_as_unset(monkeypatch): + _clear_env(monkeypatch) + monkeypatch.setenv("MODEL", "") + monkeypatch.setenv("MODEL_PROVIDER", "") + model, provider = _resolve_model_and_provider_from_env( + yaml_model="sonnet", yaml_provider="", providers=_REGISTRY, + ) + assert model == "sonnet" + assert provider is None