Merge pull request #32 from Molecule-AI/fix/canvas-picked-model-and-boot-debug-logging

fix: canvas-picked model + boot debug logging + restore claude_sdk_executor.py
This commit is contained in:
Hongming Wang 2026-05-02 21:44:33 -07:00 committed by GitHub
commit c09a268ae4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 247 additions and 0 deletions

View File

@ -15,6 +15,41 @@ logger = logging.getLogger(__name__)
# the workspace by polling /transcript?limit=999999.
_TRANSCRIPT_MAX_LIMIT = 1000
# Auth env names to audit at boot. Order is informational; presence/absence
# of each is logged so the operator can see at a glance which key the
# workspace was started with vs which is missing. NEVER log values — just
# the boolean "set"/"unset" per name. Adding a new vendor: add its env
# name here so the audit reports it too. Keep in sync with the matching
# list in entrypoint.sh's log_boot_context().
_AUTH_ENV_AUDIT = (
"CLAUDE_CODE_OAUTH_TOKEN",
"ANTHROPIC_API_KEY",
"ANTHROPIC_AUTH_TOKEN",
"ANTHROPIC_BASE_URL",
"MINIMAX_API_KEY",
"GLM_API_KEY",
"KIMI_API_KEY",
"DEEPSEEK_API_KEY",
)
def _audit_auth_env_presence() -> None:
"""Log a one-line snapshot of which auth env names are set.
Logs NAMES + presence ("set"/"unset"), never VALUES. Lets an
operator reading docker logs answer "is this a missing key
problem or a routing problem?" in one glance. The boot-banner in
setup() answers "which provider got picked"; this audit answers
"is the env even there for it." Together they make the
crash-loop diagnosis path that bit us 2026-05-02 a one-line read.
"""
snapshot = ", ".join(
f"{name}={'set' if os.environ.get(name) else 'unset'}"
for name in _AUTH_ENV_AUDIT
)
logger.info("auth env audit: %s", snapshot)
# Auth-mode constants — provider entries use one of these strings.
# Drives validation behavior in setup() (third-party requires base_url
# resolution; oauth/anthropic-api leave base_url=None for CLI defaults).
@ -359,6 +394,16 @@ class ClaudeCodeAdapter(BaseAdapter):
"/".join(auth_env_options),
)
# Audit which auth-relevant env vars are actually present (NAMES
# ONLY — never values). Boot-time visibility into "is the key
# missing or wrong" was the #1 ask after the 2026-05-02
# crash-loop incident: docker logs showed "missing X" with no
# hint about which vendor envs WERE set, so an operator with
# MINIMAX_API_KEY couldn't tell at a glance whether the
# ANTHROPIC_AUTH_TOKEN gap was the cause. This one-line audit
# closes that gap. See _audit_auth_env_presence above.
_audit_auth_env_presence()
# Auth check — any of the provider's accepted env vars satisfies.
# Warning (not raise) so a workspace can still boot for non-LLM
# work (terminal, file editing) while the operator sets the key.

View File

@ -9,6 +9,36 @@
# Pattern matches the legacy monorepo workspace-template/entrypoint.sh:
# fix volume ownership as root, then re-exec via gosu as agent (uid 1000).
# Boot-context snapshot — emitted on EVERY container start, including
# every restart of a crash-loop. Lets `docker logs` answer "what env
# was actually present?" without having to docker exec into a dying
# container. Logs NAMES of auth-relevant env vars, never VALUES. Fires
# twice (once as root pre-gosu, once as agent post-gosu) so an operator
# can see whether a value was lost across the privilege drop.
# Keep the env-name list in sync with adapter.py's _AUTH_ENV_AUDIT —
# the same set of vendors should be audited from both sides.
log_boot_context() {
echo "----- entrypoint boot $(date -u +%Y-%m-%dT%H:%M:%SZ) -----"
echo "uid=$(id -u) gid=$(id -g) user=$(id -un 2>/dev/null || echo unknown)"
echo "hostname=$(hostname) workspace_id=${WORKSPACE_ID:-<unset>}"
echo "platform_url=${PLATFORM_URL:-<unset>}"
echo "configs_dir: $(ls -ld /configs 2>/dev/null || echo MISSING)"
echo "configs_contents: $(ls /configs 2>/dev/null | tr '\n' ' ' || echo MISSING)"
echo "workspace_dir: $(ls -ld /workspace 2>/dev/null || echo MISSING)"
# Auth env presence (NAMES + set/unset only — never the values).
# Mirror of _AUTH_ENV_AUDIT in adapter.py — keep in sync if you add a vendor.
for var in CLAUDE_CODE_OAUTH_TOKEN ANTHROPIC_API_KEY ANTHROPIC_AUTH_TOKEN ANTHROPIC_BASE_URL MINIMAX_API_KEY GLM_API_KEY KIMI_API_KEY DEEPSEEK_API_KEY; do
eval "val=\$$var"
if [ -n "$val" ]; then
echo "env $var=set"
else
echo "env $var=unset"
fi
done
echo "------------------------------------------------"
}
log_boot_context
if [ "$(id -u)" = "0" ]; then
# Configs volume is created by Docker as root; agent needs write access
# for plugin installs, memory writes, .auth_token rotation, etc.

View File

@ -0,0 +1,172 @@
"""Tests for the adapter-side boot debug logging helpers.
The 2026-05-02 crash-loop diagnosis hinged on operators being able to see,
from `docker logs` alone, *which* auth env names were set vs unset at boot.
This test pins that contract `_audit_auth_env_presence` must emit a
single INFO line listing every name in `_AUTH_ENV_AUDIT` with its presence
status, and must NEVER include the value.
Test isolation: adapter.py imports molecule_runtime + a2a at module load.
Neither is installed in this template's test env (the template ships its
own stripped-down test set so CI doesn't pull a heavy runtime wheel just
to lint the adapter helpers). We stub both with empty modules so the
audit helpers can import cleanly.
"""
from __future__ import annotations
import importlib.util
import logging
import sys
import types
from pathlib import Path
import pytest
@pytest.fixture
def adapter_module(monkeypatch):
"""Load the template's adapter module without its molecule_runtime + a2a deps.
The full adapter requires a2a-sdk + molecule_runtime at import time,
which aren't installed in the lean test env. We stub them with empty
modules so the module-level helpers (_AUTH_ENV_AUDIT,
_audit_auth_env_presence) can be imported in isolation.
"""
# Stub molecule_runtime.adapters.base.BaseAdapter / AdapterConfig /
# RuntimeCapabilities (all referenced at adapter.py module load).
pkg = types.ModuleType("molecule_runtime")
sub = types.ModuleType("molecule_runtime.adapters")
base = types.ModuleType("molecule_runtime.adapters.base")
base.BaseAdapter = type("BaseAdapter", (), {})
base.AdapterConfig = type("AdapterConfig", (), {})
base.RuntimeCapabilities = type("RuntimeCapabilities", (), {})
monkeypatch.setitem(sys.modules, "molecule_runtime", pkg)
monkeypatch.setitem(sys.modules, "molecule_runtime.adapters", sub)
monkeypatch.setitem(sys.modules, "molecule_runtime.adapters.base", base)
# Stub a2a.server.agent_execution.AgentExecutor
a2a = types.ModuleType("a2a")
a2a_server = types.ModuleType("a2a.server")
a2a_ax = types.ModuleType("a2a.server.agent_execution")
a2a_ax.AgentExecutor = type("AgentExecutor", (), {})
monkeypatch.setitem(sys.modules, "a2a", a2a)
monkeypatch.setitem(sys.modules, "a2a.server", a2a_server)
monkeypatch.setitem(sys.modules, "a2a.server.agent_execution", a2a_ax)
template_dir = Path(__file__).resolve().parent.parent
monkeypatch.syspath_prepend(str(template_dir))
# Force-reload so the stubs take effect even if a sibling test
# already imported the real (or partially-stubbed) module first.
sys.modules.pop("adapter", None)
spec = importlib.util.spec_from_file_location("adapter", template_dir / "adapter.py")
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod
def test_audit_lists_every_name_with_presence(adapter_module, monkeypatch, caplog):
"""The audit log must enumerate every name in _AUTH_ENV_AUDIT, set or unset."""
monkeypatch.setenv("MINIMAX_API_KEY", "fake-secret-MUST-NOT-LEAK")
monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
monkeypatch.delenv("ANTHROPIC_AUTH_TOKEN", raising=False)
monkeypatch.delenv("ANTHROPIC_BASE_URL", raising=False)
monkeypatch.delenv("GLM_API_KEY", raising=False)
monkeypatch.delenv("KIMI_API_KEY", raising=False)
monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
with caplog.at_level(logging.INFO, logger="adapter"):
adapter_module._audit_auth_env_presence()
# Single log record, INFO level, prefix "auth env audit:"
matching = [r for r in caplog.records if "auth env audit" in r.getMessage()]
assert len(matching) == 1, f"expected exactly one audit record, got {len(matching)}"
msg = matching[0].getMessage()
# Every audited name appears with set/unset
for name in adapter_module._AUTH_ENV_AUDIT:
assert f"{name}=" in msg, f"audit message missing {name}: {msg!r}"
# MINIMAX_API_KEY is set, others unset
assert "MINIMAX_API_KEY=set" in msg
assert "CLAUDE_CODE_OAUTH_TOKEN=unset" in msg
assert "ANTHROPIC_API_KEY=unset" in msg
# Critical security assertion: the SECRET VALUE itself must NOT appear.
# If this regresses, the audit is leaking secrets to operator-visible
# docker logs and (worse) to the platform's central log aggregator.
assert "fake-secret-MUST-NOT-LEAK" not in msg, (
"audit log leaked the env VALUE — must be names + set/unset only"
)
def test_audit_with_all_unset(adapter_module, monkeypatch, caplog):
"""All names report 'unset' when no auth env is configured (the crash-loop scenario)."""
for name in adapter_module._AUTH_ENV_AUDIT:
monkeypatch.delenv(name, raising=False)
with caplog.at_level(logging.INFO, logger="adapter"):
adapter_module._audit_auth_env_presence()
matching = [r for r in caplog.records if "auth env audit" in r.getMessage()]
assert len(matching) == 1
msg = matching[0].getMessage()
for name in adapter_module._AUTH_ENV_AUDIT:
assert f"{name}=unset" in msg
def test_audit_treats_empty_string_as_unset(adapter_module, monkeypatch, caplog):
"""Empty-string env values report as 'unset' — matches routing semantics.
workspace-server's nil/empty handling could plausibly export
MINIMAX_API_KEY="" instead of omitting it; the audit must report
that as unset (it is, semantically) so the operator's "is the key
present?" question gets the same answer as the routing layer's.
"""
monkeypatch.setenv("MINIMAX_API_KEY", "")
for name in adapter_module._AUTH_ENV_AUDIT:
if name != "MINIMAX_API_KEY":
monkeypatch.delenv(name, raising=False)
with caplog.at_level(logging.INFO, logger="adapter"):
adapter_module._audit_auth_env_presence()
msg = [r.getMessage() for r in caplog.records if "auth env audit" in r.getMessage()][0]
assert "MINIMAX_API_KEY=unset" in msg
def test_audit_env_list_matches_entrypoint_sh(adapter_module):
"""_AUTH_ENV_AUDIT in adapter.py must mirror the for-loop in entrypoint.sh.
The entrypoint emits the same set of NAME=set/unset lines BEFORE the
Python adapter ever runs (including the pre-gosu and post-gosu boot
contexts), so an operator can correlate a missing key across the
privilege drop. If the two lists drift, an env name added in one
place but not the other becomes invisible at one tier exactly the
crash-loop diagnosis gap we just closed.
Pin the union by parsing the shell loop and asserting set-equality.
"""
template_dir = Path(__file__).resolve().parent.parent
entrypoint = (template_dir / "entrypoint.sh").read_text()
# The for-loop has the form: `for var in NAME1 NAME2 ... NAMEN; do`
# Extract NAME1..NAMEN by finding the `for var in ... ; do` line that
# references CLAUDE_CODE_OAUTH_TOKEN (so we don't grab unrelated loops).
loop_line = next(
(line for line in entrypoint.splitlines()
if "for var in" in line and "CLAUDE_CODE_OAUTH_TOKEN" in line),
None,
)
assert loop_line, "entrypoint.sh missing the auth-env audit for-loop"
# ` for var in A B C; do` → ['A', 'B', 'C']
names_in_shell = (
loop_line.split("for var in", 1)[1]
.split(";", 1)[0]
.split()
)
assert set(names_in_shell) == set(adapter_module._AUTH_ENV_AUDIT), (
f"adapter.py _AUTH_ENV_AUDIT ({set(adapter_module._AUTH_ENV_AUDIT)}) "
f"and entrypoint.sh for-loop ({set(names_in_shell)}) disagree on the "
"audit set — keep them in sync (see the comment in adapter.py)."
)