Merge pull request #32 from Molecule-AI/fix/canvas-picked-model-and-boot-debug-logging

fix: canvas-picked model + boot debug logging + restore claude_sdk_executor.py
2026-05-02 21:44:33 -07:00 · 2026-05-02 21:44:33 -07:00 · c09a268ae4
commit c09a268ae4
parent 863a1e6f87 78ae139609
3 changed files with 247 additions and 0 deletions
--- a/adapter.py
+++ b/adapter.py
@ -15,6 +15,41 @@ logger = logging.getLogger(__name__)
 # the workspace by polling /transcript?limit=999999.
 _TRANSCRIPT_MAX_LIMIT = 1000

+# Auth env names to audit at boot. Order is informational; presence/absence
+# of each is logged so the operator can see at a glance which key the
+# workspace was started with vs which is missing. NEVER log values — just
+# the boolean "set"/"unset" per name. Adding a new vendor: add its env
+# name here so the audit reports it too. Keep in sync with the matching
+# list in entrypoint.sh's log_boot_context().
+_AUTH_ENV_AUDIT = (
+    "CLAUDE_CODE_OAUTH_TOKEN",
+    "ANTHROPIC_API_KEY",
+    "ANTHROPIC_AUTH_TOKEN",
+    "ANTHROPIC_BASE_URL",
+    "MINIMAX_API_KEY",
+    "GLM_API_KEY",
+    "KIMI_API_KEY",
+    "DEEPSEEK_API_KEY",
+)
+
+
+def _audit_auth_env_presence() -> None:
+    """Log a one-line snapshot of which auth env names are set.
+
+    Logs NAMES + presence ("set"/"unset"), never VALUES. Lets an
+    operator reading docker logs answer "is this a missing key
+    problem or a routing problem?" in one glance. The boot-banner in
+    setup() answers "which provider got picked"; this audit answers
+    "is the env even there for it." Together they make the
+    crash-loop diagnosis path that bit us 2026-05-02 a one-line read.
+    """
+    snapshot = ", ".join(
+        f"{name}={'set' if os.environ.get(name) else 'unset'}"
+        for name in _AUTH_ENV_AUDIT
+    )
+    logger.info("auth env audit: %s", snapshot)
+
+
 # Auth-mode constants — provider entries use one of these strings.
 # Drives validation behavior in setup() (third-party requires base_url
 # resolution; oauth/anthropic-api leave base_url=None for CLI defaults).
@ -359,6 +394,16 @@ class ClaudeCodeAdapter(BaseAdapter):
            "/".join(auth_env_options),
        )

+        # Audit which auth-relevant env vars are actually present (NAMES
+        # ONLY — never values). Boot-time visibility into "is the key
+        # missing or wrong" was the #1 ask after the 2026-05-02
+        # crash-loop incident: docker logs showed "missing X" with no
+        # hint about which vendor envs WERE set, so an operator with
+        # MINIMAX_API_KEY couldn't tell at a glance whether the
+        # ANTHROPIC_AUTH_TOKEN gap was the cause. This one-line audit
+        # closes that gap. See _audit_auth_env_presence above.
+        _audit_auth_env_presence()
+
        # Auth check — any of the provider's accepted env vars satisfies.
        # Warning (not raise) so a workspace can still boot for non-LLM
        # work (terminal, file editing) while the operator sets the key.
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -9,6 +9,36 @@
 # Pattern matches the legacy monorepo workspace-template/entrypoint.sh:
 # fix volume ownership as root, then re-exec via gosu as agent (uid 1000).

+# Boot-context snapshot — emitted on EVERY container start, including
+# every restart of a crash-loop. Lets `docker logs` answer "what env
+# was actually present?" without having to docker exec into a dying
+# container. Logs NAMES of auth-relevant env vars, never VALUES. Fires
+# twice (once as root pre-gosu, once as agent post-gosu) so an operator
+# can see whether a value was lost across the privilege drop.
+# Keep the env-name list in sync with adapter.py's _AUTH_ENV_AUDIT —
+# the same set of vendors should be audited from both sides.
+log_boot_context() {
+    echo "----- entrypoint boot $(date -u +%Y-%m-%dT%H:%M:%SZ) -----"
+    echo "uid=$(id -u) gid=$(id -g) user=$(id -un 2>/dev/null || echo unknown)"
+    echo "hostname=$(hostname) workspace_id=${WORKSPACE_ID:-<unset>}"
+    echo "platform_url=${PLATFORM_URL:-<unset>}"
+    echo "configs_dir: $(ls -ld /configs 2>/dev/null || echo MISSING)"
+    echo "configs_contents: $(ls /configs 2>/dev/null | tr '\n' ' ' || echo MISSING)"
+    echo "workspace_dir: $(ls -ld /workspace 2>/dev/null || echo MISSING)"
+    # Auth env presence (NAMES + set/unset only — never the values).
+    # Mirror of _AUTH_ENV_AUDIT in adapter.py — keep in sync if you add a vendor.
+    for var in CLAUDE_CODE_OAUTH_TOKEN ANTHROPIC_API_KEY ANTHROPIC_AUTH_TOKEN ANTHROPIC_BASE_URL MINIMAX_API_KEY GLM_API_KEY KIMI_API_KEY DEEPSEEK_API_KEY; do
+        eval "val=\$$var"
+        if [ -n "$val" ]; then
+            echo "env $var=set"
+        else
+            echo "env $var=unset"
+        fi
+    done
+    echo "------------------------------------------------"
+}
+log_boot_context
+
 if [ "$(id -u)" = "0" ]; then
    # Configs volume is created by Docker as root; agent needs write access
    # for plugin installs, memory writes, .auth_token rotation, etc.
--- a/tests/test_adapter_logging.py
+++ b/tests/test_adapter_logging.py
@ -0,0 +1,172 @@
+"""Tests for the adapter-side boot debug logging helpers.
+
+The 2026-05-02 crash-loop diagnosis hinged on operators being able to see,
+from `docker logs` alone, *which* auth env names were set vs unset at boot.
+This test pins that contract — `_audit_auth_env_presence` must emit a
+single INFO line listing every name in `_AUTH_ENV_AUDIT` with its presence
+status, and must NEVER include the value.
+
+Test isolation: adapter.py imports molecule_runtime + a2a at module load.
+Neither is installed in this template's test env (the template ships its
+own stripped-down test set so CI doesn't pull a heavy runtime wheel just
+to lint the adapter helpers). We stub both with empty modules so the
+audit helpers can import cleanly.
+"""
+from __future__ import annotations
+
+import importlib.util
+import logging
+import sys
+import types
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture
+def adapter_module(monkeypatch):
+    """Load the template's adapter module without its molecule_runtime + a2a deps.
+
+    The full adapter requires a2a-sdk + molecule_runtime at import time,
+    which aren't installed in the lean test env. We stub them with empty
+    modules so the module-level helpers (_AUTH_ENV_AUDIT,
+    _audit_auth_env_presence) can be imported in isolation.
+    """
+    # Stub molecule_runtime.adapters.base.BaseAdapter / AdapterConfig /
+    # RuntimeCapabilities (all referenced at adapter.py module load).
+    pkg = types.ModuleType("molecule_runtime")
+    sub = types.ModuleType("molecule_runtime.adapters")
+    base = types.ModuleType("molecule_runtime.adapters.base")
+    base.BaseAdapter = type("BaseAdapter", (), {})
+    base.AdapterConfig = type("AdapterConfig", (), {})
+    base.RuntimeCapabilities = type("RuntimeCapabilities", (), {})
+    monkeypatch.setitem(sys.modules, "molecule_runtime", pkg)
+    monkeypatch.setitem(sys.modules, "molecule_runtime.adapters", sub)
+    monkeypatch.setitem(sys.modules, "molecule_runtime.adapters.base", base)
+
+    # Stub a2a.server.agent_execution.AgentExecutor
+    a2a = types.ModuleType("a2a")
+    a2a_server = types.ModuleType("a2a.server")
+    a2a_ax = types.ModuleType("a2a.server.agent_execution")
+    a2a_ax.AgentExecutor = type("AgentExecutor", (), {})
+    monkeypatch.setitem(sys.modules, "a2a", a2a)
+    monkeypatch.setitem(sys.modules, "a2a.server", a2a_server)
+    monkeypatch.setitem(sys.modules, "a2a.server.agent_execution", a2a_ax)
+
+    template_dir = Path(__file__).resolve().parent.parent
+    monkeypatch.syspath_prepend(str(template_dir))
+
+    # Force-reload so the stubs take effect even if a sibling test
+    # already imported the real (or partially-stubbed) module first.
+    sys.modules.pop("adapter", None)
+    spec = importlib.util.spec_from_file_location("adapter", template_dir / "adapter.py")
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return mod
+
+
+def test_audit_lists_every_name_with_presence(adapter_module, monkeypatch, caplog):
+    """The audit log must enumerate every name in _AUTH_ENV_AUDIT, set or unset."""
+    monkeypatch.setenv("MINIMAX_API_KEY", "fake-secret-MUST-NOT-LEAK")
+    monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
+    monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
+    monkeypatch.delenv("ANTHROPIC_AUTH_TOKEN", raising=False)
+    monkeypatch.delenv("ANTHROPIC_BASE_URL", raising=False)
+    monkeypatch.delenv("GLM_API_KEY", raising=False)
+    monkeypatch.delenv("KIMI_API_KEY", raising=False)
+    monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
+
+    with caplog.at_level(logging.INFO, logger="adapter"):
+        adapter_module._audit_auth_env_presence()
+
+    # Single log record, INFO level, prefix "auth env audit:"
+    matching = [r for r in caplog.records if "auth env audit" in r.getMessage()]
+    assert len(matching) == 1, f"expected exactly one audit record, got {len(matching)}"
+    msg = matching[0].getMessage()
+
+    # Every audited name appears with set/unset
+    for name in adapter_module._AUTH_ENV_AUDIT:
+        assert f"{name}=" in msg, f"audit message missing {name}: {msg!r}"
+
+    # MINIMAX_API_KEY is set, others unset
+    assert "MINIMAX_API_KEY=set" in msg
+    assert "CLAUDE_CODE_OAUTH_TOKEN=unset" in msg
+    assert "ANTHROPIC_API_KEY=unset" in msg
+
+    # Critical security assertion: the SECRET VALUE itself must NOT appear.
+    # If this regresses, the audit is leaking secrets to operator-visible
+    # docker logs and (worse) to the platform's central log aggregator.
+    assert "fake-secret-MUST-NOT-LEAK" not in msg, (
+        "audit log leaked the env VALUE — must be names + set/unset only"
+    )
+
+
+def test_audit_with_all_unset(adapter_module, monkeypatch, caplog):
+    """All names report 'unset' when no auth env is configured (the crash-loop scenario)."""
+    for name in adapter_module._AUTH_ENV_AUDIT:
+        monkeypatch.delenv(name, raising=False)
+
+    with caplog.at_level(logging.INFO, logger="adapter"):
+        adapter_module._audit_auth_env_presence()
+
+    matching = [r for r in caplog.records if "auth env audit" in r.getMessage()]
+    assert len(matching) == 1
+    msg = matching[0].getMessage()
+    for name in adapter_module._AUTH_ENV_AUDIT:
+        assert f"{name}=unset" in msg
+
+
+def test_audit_treats_empty_string_as_unset(adapter_module, monkeypatch, caplog):
+    """Empty-string env values report as 'unset' — matches routing semantics.
+
+    workspace-server's nil/empty handling could plausibly export
+    MINIMAX_API_KEY="" instead of omitting it; the audit must report
+    that as unset (it is, semantically) so the operator's "is the key
+    present?" question gets the same answer as the routing layer's.
+    """
+    monkeypatch.setenv("MINIMAX_API_KEY", "")
+    for name in adapter_module._AUTH_ENV_AUDIT:
+        if name != "MINIMAX_API_KEY":
+            monkeypatch.delenv(name, raising=False)
+
+    with caplog.at_level(logging.INFO, logger="adapter"):
+        adapter_module._audit_auth_env_presence()
+
+    msg = [r.getMessage() for r in caplog.records if "auth env audit" in r.getMessage()][0]
+    assert "MINIMAX_API_KEY=unset" in msg
+
+
+def test_audit_env_list_matches_entrypoint_sh(adapter_module):
+    """_AUTH_ENV_AUDIT in adapter.py must mirror the for-loop in entrypoint.sh.
+
+    The entrypoint emits the same set of NAME=set/unset lines BEFORE the
+    Python adapter ever runs (including the pre-gosu and post-gosu boot
+    contexts), so an operator can correlate a missing key across the
+    privilege drop. If the two lists drift, an env name added in one
+    place but not the other becomes invisible at one tier — exactly the
+    crash-loop diagnosis gap we just closed.
+
+    Pin the union by parsing the shell loop and asserting set-equality.
+    """
+    template_dir = Path(__file__).resolve().parent.parent
+    entrypoint = (template_dir / "entrypoint.sh").read_text()
+    # The for-loop has the form: `for var in NAME1 NAME2 ... NAMEN; do`
+    # Extract NAME1..NAMEN by finding the `for var in ... ; do` line that
+    # references CLAUDE_CODE_OAUTH_TOKEN (so we don't grab unrelated loops).
+    loop_line = next(
+        (line for line in entrypoint.splitlines()
+         if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line),
+        None,
+    )
+    assert loop_line, "entrypoint.sh missing the auth-env audit for-loop"
+    # `    for var in A B C; do` → ['A', 'B', 'C']
+    names_in_shell = (
+        loop_line.split("for var in", 1)[1]
+        .split(";", 1)[0]
+        .split()
+    )
+    assert set(names_in_shell) == set(adapter_module._AUTH_ENV_AUDIT), (
+        f"adapter.py _AUTH_ENV_AUDIT ({set(adapter_module._AUTH_ENV_AUDIT)}) "
+        f"and entrypoint.sh for-loop ({set(names_in_shell)}) disagree on the "
+        "audit set — keep them in sync (see the comment in adapter.py)."
+    )