From 067ad83ce510e022988e779b7f3e65c704f1aad1 Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwangalt@gmail.com>
Date: Thu, 30 Apr 2026 20:47:09 -0700
Subject: [PATCH] feat(config): add explicit `provider:` field alongside
 `model:`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a top-level `provider` slug to WorkspaceConfig and RuntimeConfig so
adapters can route to a specific gateway without re-implementing
slug-prefix parsing across hermes / claude-code / codex.

Resolution chain in load_config (mirrors how `model` resolves):

  1. ``LLM_PROVIDER`` env var — what canvas Save+Restart sets so the
     operator's Provider dropdown choice survives a CP-driven restart
     (the regenerated /configs/config.yaml drops most user fields).
  2. Explicit YAML ``provider:`` — operator pinned it in the file.
  3. Derive from the model slug prefix for backward compat:
       ``anthropic:claude-opus-4-7`` → ``anthropic``
       ``minimax/abab7-chat-preview`` → ``minimax``
       bare model names → ``""`` (let the adapter decide).

`runtime_config.provider` falls back to the top-level resolved
provider, the same shape PR #2438 added for `runtime_config.model`.

Why a separate field at all (we already parse the slug):
  - Custom model aliases without a recognizable prefix need an
    explicit signal — the canvas Provider dropdown writes it.
  - Adapters were each rolling their own slug-parse (hermes's
    derive-provider.sh, claude-code's adapter-default branch, etc.);
    one resolution point in load_config kills that drift class.
  - Canvas needs a stable storage field that doesn't get clobbered
    every time the user picks a new model.

Backward-compatible: when `provider:` is absent, slug derivation
keeps every existing config.yaml working without a migration.

PR-1 of a multi-PR stack (Option B from RFC discussion). Subsequent
PRs plumb the field through workspace-server env, CP user-data,
adapters (hermes prefers explicit over derive-provider.sh), and
canvas Provider dropdown UI.

Tests cover all four resolution paths + runtime_config inheritance:
  - test_provider_default_empty_when_bare_model
  - test_provider_derived_from_colon_slug
  - test_provider_derived_from_slash_slug
  - test_provider_yaml_explicit_wins_over_derived
  - test_provider_env_override_beats_yaml_and_derived
  - test_runtime_config_provider_yaml_wins_over_top_level
  - test_provider_default_from_default_model

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 workspace/config.py            |  54 ++++++++++++
 workspace/tests/test_config.py | 151 +++++++++++++++++++++++++++++++++
 2 files changed, 205 insertions(+)

diff --git a/workspace/config.py b/workspace/config.py
index 370ada11..3b205f1b 100644
--- a/workspace/config.py
+++ b/workspace/config.py
@@ -96,6 +96,10 @@ class RuntimeConfig:
     required_env: list[str] = field(default_factory=list)  # env vars required to run (e.g. ["CLAUDE_CODE_OAUTH_TOKEN"])
     timeout: int = 0           # seconds (0 = no timeout — agents wait until done)
     model: str = ""            # model override for the CLI
+    provider: str = ""         # explicit LLM provider (e.g., "anthropic", "openai",
+                               # "minimax"). Falls back to the top-level resolved
+                               # provider when empty. Adapters (hermes, claude-code,
+                               # codex) prefer this over slug-parsing the model name.
     # Deprecated — use required_env + secrets API instead. Kept for backward compat.
     auth_token_env: str = ""
     auth_token_file: str = ""
@@ -221,6 +225,16 @@ class WorkspaceConfig:
     version: str = "1.0.0"
     tier: int = 1
     model: str = "anthropic:claude-opus-4-7"
+    provider: str = ""
+    """Explicit LLM provider slug (e.g., ``anthropic``, ``openai``, ``minimax``).
+
+    When empty, ``load_config`` derives it from the ``model`` slug prefix
+    (``anthropic:claude-opus-4-7`` → ``anthropic``; ``minimax/abab7-chat`` →
+    ``minimax``; bare model names → ``""``). Set explicitly via the canvas
+    Provider dropdown or the ``LLM_PROVIDER`` env var when the model name
+    is provider-ambiguous (e.g., a custom alias) or when an adapter needs
+    a specific gateway distinct from the model namespace.
+    """
     runtime: str = "langgraph"  # langgraph | claude-code | codex | ollama | custom
     runtime_config: RuntimeConfig = field(default_factory=RuntimeConfig)
     initial_prompt: str = ""
@@ -261,6 +275,20 @@ class WorkspaceConfig:
     automatically adds the ``task-budgets-2026-03-13`` beta header."""
 
 
+def _derive_provider_from_model(model: str) -> str:
+    """Extract the provider slug prefix from a model identifier.
+
+    Recognizes both ``provider:model`` (Anthropic / OpenAI / Google convention)
+    and ``provider/model`` (HuggingFace / Minimax convention). Returns ``""``
+    when the model has no recognizable separator — callers must treat empty
+    as "use adapter default routing", not as a hard failure.
+    """
+    for sep in (":", "/"):
+        if sep in model:
+            return model.partition(sep)[0]
+    return ""
+
+
 def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
     """Load config from WORKSPACE_CONFIG_PATH or the given path."""
     if config_path is None:
@@ -276,6 +304,25 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
     # Override model from env if provided
     model = os.environ.get("MODEL_PROVIDER", raw.get("model", "anthropic:claude-opus-4-7"))
 
+    # Resolve top-level provider with this priority chain:
+    #   1. ``LLM_PROVIDER`` env var (canvas Save+Restart sets this so the
+    #      operator's choice survives a CP-driven restart even though the
+    #      regenerated /configs/config.yaml drops most user fields).
+    #   2. Explicit YAML ``provider:`` (an operator pinned it in the file).
+    #   3. Derive from the model slug prefix for backward compat:
+    #        ``anthropic:claude-opus-4-7`` → ``anthropic``
+    #        ``minimax/abab7-chat-preview`` → ``minimax``
+    #        bare model names → ``""``  (signals "use adapter default")
+    # Empty after all three is fine — adapters that don't need an explicit
+    # provider (langgraph, claude-code-default, codex) keep their existing
+    # routing; adapters that do (hermes via derive-provider.sh) prefer this
+    # over slug-parsing the model name.
+    provider = (
+        os.environ.get("LLM_PROVIDER")
+        or raw.get("provider")
+        or _derive_provider_from_model(model)
+    )
+
     runtime = raw.get("runtime", "langgraph")
     runtime_raw = raw.get("runtime_config", {})
 
@@ -314,6 +361,7 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
         version=raw.get("version", "1.0.0"),
         tier=int(raw.get("tier", 1)) if str(raw.get("tier", 1)).isdigit() else 1,
         model=model,
+        provider=provider,
         runtime=runtime,
         initial_prompt=initial_prompt,
         idle_prompt=idle_prompt,
@@ -336,6 +384,12 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
             # MODEL_PROVIDER is plumbed as an env var, so picking it up via
             # the top-level resolved model keeps the selection sticky.
             model=runtime_raw.get("model") or model,
+            # Same fallback shape as ``model`` above: an explicit
+            # ``runtime_config.provider`` wins; otherwise inherit the
+            # top-level resolved provider so adapters see a single
+            # consistent choice without each one re-implementing
+            # env/YAML/slug-prefix resolution.
+            provider=runtime_raw.get("provider") or provider,
             # Deprecated fields — kept for backward compat
             auth_token_env=runtime_raw.get("auth_token_env", ""),
             auth_token_file=runtime_raw.get("auth_token_file", ""),
diff --git a/workspace/tests/test_config.py b/workspace/tests/test_config.py
index c87198ba..bc09d638 100644
--- a/workspace/tests/test_config.py
+++ b/workspace/tests/test_config.py
@@ -164,6 +164,157 @@ def test_runtime_config_model_picks_up_env_via_top_level(tmp_path, monkeypatch):
     assert cfg.runtime_config.model == "minimax/abab7-chat-preview"
 
 
+# ===== Provider field (Option B — explicit `provider:` alongside `model:`) =====
+#
+# Why a separate `provider` field at all (we already parse the slug prefix off
+# `model`)? Three reasons:
+#   1. Custom model aliases that don't carry a recognizable prefix (e.g., a
+#      tenant-specific name routed through a gateway) need an explicit signal.
+#   2. Adapters were each implementing their own slug-parse — hermes's
+#      derive-provider.sh, claude-code's adapter-default branch, etc. One
+#      resolution point in load_config kills that drift class.
+#   3. The canvas Provider dropdown needs a stable storage field that doesn't
+#      get clobbered every time the user picks a new model.
+#
+# Backward compat: when `provider:` is absent, fall back to slug derivation,
+# so existing config.yaml files keep working without a migration.
+
+
+def test_provider_default_empty_when_bare_model(tmp_path, monkeypatch):
+    """Bare model names (no `:` or `/` separator) yield an empty provider —
+    the signal for "let the adapter decide". Don't guess.
+    """
+    monkeypatch.delenv("LLM_PROVIDER", raising=False)
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(yaml.dump({"model": "claude-opus-4-7"}))
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.provider == ""
+    assert cfg.runtime_config.provider == ""
+
+
+def test_provider_derived_from_colon_slug(tmp_path, monkeypatch):
+    """`provider:model` shape (Anthropic/OpenAI/Google convention) derives
+    the provider from the prefix when no explicit `provider:` is set.
+    Exercises the backward-compat path for every existing config.yaml in
+    the wild.
+    """
+    monkeypatch.delenv("LLM_PROVIDER", raising=False)
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(yaml.dump({"model": "anthropic:claude-opus-4-7"}))
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.provider == "anthropic"
+    # runtime_config.provider inherits the same way runtime_config.model does.
+    assert cfg.runtime_config.provider == "anthropic"
+
+
+def test_provider_derived_from_slash_slug(tmp_path, monkeypatch):
+    """`provider/model` shape (HuggingFace/Minimax convention) derives the
+    provider from the prefix when no explicit `provider:` is set.
+    """
+    monkeypatch.delenv("LLM_PROVIDER", raising=False)
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(yaml.dump({"model": "minimax/abab7-chat-preview"}))
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.provider == "minimax"
+    assert cfg.runtime_config.provider == "minimax"
+
+
+def test_provider_yaml_explicit_wins_over_derived(tmp_path, monkeypatch):
+    """Explicit YAML `provider:` overrides the slug-prefix derivation —
+    needed when the model name's prefix doesn't match the actual gateway
+    (e.g., an `anthropic:claude-opus-4-7` model routed through a custom
+    gateway slug).
+    """
+    monkeypatch.delenv("LLM_PROVIDER", raising=False)
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(
+        yaml.dump(
+            {
+                "model": "anthropic:claude-opus-4-7",
+                "provider": "custom-gateway",
+            }
+        )
+    )
+
+    cfg = load_config(str(tmp_path))
+    # Slug prefix says "anthropic" but the explicit field wins.
+    assert cfg.provider == "custom-gateway"
+    assert cfg.runtime_config.provider == "custom-gateway"
+
+
+def test_provider_env_override_beats_yaml_and_derived(tmp_path, monkeypatch):
+    """`LLM_PROVIDER` env var beats both YAML and slug derivation.
+    This is the path the canvas Save+Restart cycle relies on: the user
+    picks a provider in the canvas Provider dropdown, the platform sets
+    `LLM_PROVIDER` on the workspace, and the next CP-driven restart picks
+    it up regardless of what's in the regenerated /configs/config.yaml.
+    """
+    monkeypatch.setenv("LLM_PROVIDER", "minimax")
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    # YAML says one thing, slug says another, env wins.
+    config_yaml.write_text(
+        yaml.dump(
+            {
+                "model": "anthropic:claude-opus-4-7",
+                "provider": "openai",
+            }
+        )
+    )
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.provider == "minimax"
+    assert cfg.runtime_config.provider == "minimax"
+
+
+def test_runtime_config_provider_yaml_wins_over_top_level(tmp_path, monkeypatch):
+    """An explicit `runtime_config.provider` takes precedence over the
+    top-level resolved provider — same fallback shape as `model`. Needed
+    when a workspace wants the top-level model/provider to stay
+    user-visible while pinning the runtime to a different gateway.
+    """
+    monkeypatch.delenv("LLM_PROVIDER", raising=False)
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(
+        yaml.dump(
+            {
+                "model": "anthropic:claude-opus-4-7",
+                "runtime_config": {"provider": "openai"},
+            }
+        )
+    )
+
+    cfg = load_config(str(tmp_path))
+    # Top-level still derives from the slug.
+    assert cfg.provider == "anthropic"
+    # runtime_config.provider explicit override wins.
+    assert cfg.runtime_config.provider == "openai"
+
+
+def test_provider_default_from_default_model(tmp_path, monkeypatch):
+    """When config.yaml is empty, the WorkspaceConfig default model
+    (`anthropic:claude-opus-4-7`) yields provider=`anthropic`. Pins the
+    "no config" boot path to a sensible derived provider.
+    """
+    monkeypatch.delenv("LLM_PROVIDER", raising=False)
+    monkeypatch.delenv("MODEL_PROVIDER", raising=False)
+    config_yaml = tmp_path / "config.yaml"
+    config_yaml.write_text(yaml.dump({}))
+
+    cfg = load_config(str(tmp_path))
+    assert cfg.model == "anthropic:claude-opus-4-7"
+    assert cfg.provider == "anthropic"
+    assert cfg.runtime_config.provider == "anthropic"
+
+
 def test_delegation_config_defaults(tmp_path):
     """DelegationConfig nested defaults are applied."""
     config_yaml = tmp_path / "config.yaml"