chore(workspace): drop claude_sdk_executor — Phase 2 of #87
Phase 2 of the universal-runtime refactor (task #87). Now that the claude-code template repo ships its own claude_sdk_executor.py (template PR #13 merged + image rebuilt at 07:36 UTC) the molecule-runtime no longer needs to ship the file. Deletes: - workspace/claude_sdk_executor.py (704 LOC) - workspace/tests/test_claude_sdk_executor.py (~1.6K LOC) Updates: - workspace/runtime_wedge.py — drops the "Compatibility shim" docstring section. The shim was time-bounded ("removed once #87 Phase 2 lands"); this is that PR. - workspace/tests/test_runtime_wedge.py — drops the TestClaudeSdkExecutorReExportShim test class (the shim doesn't exist anymore so the identity assertions would fail at import). - workspace/tests/conftest.py — drops the claude_agent_sdk stub. Its only consumer was test_claude_sdk_executor.py which is gone; no other test imports the SDK. - workspace/cli_executor.py — comment refresh: claude-code template repo (not workspace/) is now the home for ClaudeSDKExecutor. Verified-safe-to-delete: - heartbeat.py: migrated to runtime_wedge in PR #2154 (no longer imports from claude_sdk_executor) - cli_executor.py: only comments referenced claude_sdk_executor; its line-117 ValueError defends against accidental routing - tests: only test_claude_sdk_executor.py + test_runtime_wedge.py's shim class consumed the deleted module; both removed in this PR Verification: - 1182/1182 workspace pytest pass (was 1251; -69 = exactly the deleted test cases — zero unexpected regressions) - No live import of claude_sdk_executor anywhere in molecule-core after deletion (grep verified) Closes #87 for the claude-code adapter. Hermes is already template-only. The remaining adapter-specific code in workspace/ is cli_executor.py (codex/ollama/gemini-cli) tracked by task #122. preflight.py's SUPPORTED_RUNTIMES static list is tracked by task #123 (PR #2155 in flight). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
66b9c04057
commit
4b5ac2ebc2
@ -1,715 +0,0 @@
|
|||||||
"""SDK-based agent executor for Claude Code runtime.
|
|
||||||
|
|
||||||
Uses the official `claude-agent-sdk` Python package to invoke the Claude Code
|
|
||||||
engine programmatically — no subprocess, no stdout parsing, no zombie reap.
|
|
||||||
|
|
||||||
Replaces CLIAgentExecutor for the `claude-code` runtime only. Other CLI runtimes
|
|
||||||
(codex, ollama) keep using `cli_executor.py`.
|
|
||||||
|
|
||||||
Benefits over CLI subprocess:
|
|
||||||
- No per-message ~500ms startup overhead
|
|
||||||
- No stdout buffering issues
|
|
||||||
- Native Python session management (no JSON parsing of stdout)
|
|
||||||
- Real message stream — can surface tool calls in future for live UX
|
|
||||||
- Cooperative cancel (closes the query async generator on cancel())
|
|
||||||
- Same Claude Code engine, so plugins / skills / CLAUDE.md still apply
|
|
||||||
|
|
||||||
Concurrency model
|
|
||||||
-----------------
|
|
||||||
Turns are serialized per-executor via an asyncio.Lock. The old CLI executor
|
|
||||||
serialized implicitly by spawning one subprocess per message and awaiting it;
|
|
||||||
the SDK removes that, so we re-introduce serialization explicitly. This keeps
|
|
||||||
session_id updates race-free and makes cancel() well-defined (there's at most
|
|
||||||
one active stream at any given moment).
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from collections.abc import AsyncIterator, Callable
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from typing import TYPE_CHECKING, Any
|
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
import claude_agent_sdk as sdk
|
|
||||||
|
|
||||||
from a2a.server.agent_execution import AgentExecutor, RequestContext
|
|
||||||
from a2a.server.events import EventQueue
|
|
||||||
from a2a.helpers import new_agent_text_message
|
|
||||||
|
|
||||||
from executor_helpers import (
|
|
||||||
CONFIG_MOUNT,
|
|
||||||
MEMORY_CONTENT_MAX_CHARS,
|
|
||||||
WORKSPACE_MOUNT,
|
|
||||||
auto_push_hook,
|
|
||||||
brief_summary,
|
|
||||||
collect_outbound_files,
|
|
||||||
commit_memory,
|
|
||||||
extract_attached_files,
|
|
||||||
extract_message_text,
|
|
||||||
get_a2a_instructions,
|
|
||||||
get_hma_instructions,
|
|
||||||
get_mcp_server_path,
|
|
||||||
get_system_prompt,
|
|
||||||
read_delegation_results,
|
|
||||||
recall_memories,
|
|
||||||
sanitize_agent_error,
|
|
||||||
set_current_task,
|
|
||||||
)
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from heartbeat import HeartbeatLoop
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_NO_TEXT_MSG = "Error: message contained no text content."
|
|
||||||
_NO_RESPONSE_MSG = "(no response generated)"
|
|
||||||
_MAX_RETRIES = 3
|
|
||||||
_BASE_RETRY_DELAY_S = 5
|
|
||||||
# Cap for stderr captured from the CLI subprocess in the executor log. Keeps
|
|
||||||
# log lines bounded while still surfacing enough context to diagnose crashes.
|
|
||||||
# Fixes #66 (previously the executor logged nothing beyond the generic
|
|
||||||
# "Check stderr output for details" message).
|
|
||||||
_PROCESS_ERROR_STDERR_MAX_CHARS = 4096
|
|
||||||
|
|
||||||
# Substrings in error messages that indicate a transient failure worth retrying.
|
|
||||||
_RETRYABLE_PATTERNS = (
|
|
||||||
"rate",
|
|
||||||
"limit",
|
|
||||||
"429",
|
|
||||||
"overloaded",
|
|
||||||
"capacity",
|
|
||||||
"exit code 1",
|
|
||||||
"try again",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Wedge state moved to runtime_wedge (see that module's docstring for
|
|
||||||
# the rationale + the broader "Compatibility shim" note). This block
|
|
||||||
# re-exports under the historical names so the in-file call sites in
|
|
||||||
# _run_query stay terse and any external consumer that imported them
|
|
||||||
# from claude_sdk_executor keeps working for one release cycle.
|
|
||||||
from runtime_wedge import ( # noqa: E402
|
|
||||||
clear_wedge as _clear_sdk_wedge_on_success,
|
|
||||||
is_wedged,
|
|
||||||
mark_wedged as _mark_sdk_wedged,
|
|
||||||
reset_for_test as _reset_sdk_wedge_for_test,
|
|
||||||
wedge_reason,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Names below are re-exported (not consumed inside this file) for
|
|
||||||
# backwards compatibility with third-party adapters that imported them
|
|
||||||
# from claude_sdk_executor before the wedge state moved to runtime_wedge.
|
|
||||||
# Listing them in __all__ marks the intent explicitly and stops static
|
|
||||||
# analysis from flagging the imports as unused.
|
|
||||||
__all__ = [
|
|
||||||
"is_wedged",
|
|
||||||
"wedge_reason",
|
|
||||||
"_reset_sdk_wedge_for_test",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# Per-tool-use summarizers. Reads the most-useful argument from each
|
|
||||||
# tool's input dict so the canvas progress feed shows
|
|
||||||
# `🛠 Read /tmp/foo` instead of the bare tool name. Anything not in the
|
|
||||||
# table falls through to a generic "🛠 <tool>(…)" line. Order keys by
|
|
||||||
# tool frequency so a future contributor can see the high-traffic
|
|
||||||
# tools first.
|
|
||||||
_TOOL_USE_SUMMARIZERS: dict[str, Callable[[dict], str]] = {
|
|
||||||
"Read": lambda i: f"📄 Read {i.get('file_path', '?')}",
|
|
||||||
"Write": lambda i: f"✍️ Write {i.get('file_path', '?')}",
|
|
||||||
"Edit": lambda i: f"✏️ Edit {i.get('file_path', '?')}",
|
|
||||||
"Bash": lambda i: f"⚡ Bash: {(i.get('command') or '')[:80]}",
|
|
||||||
"Glob": lambda i: f"🔍 Glob {i.get('pattern', '?')}",
|
|
||||||
"Grep": lambda i: f"🔍 Grep {i.get('pattern', '?')}",
|
|
||||||
"WebFetch": lambda i: f"🌐 WebFetch {i.get('url', '?')}",
|
|
||||||
"WebSearch": lambda i: f"🌐 WebSearch {i.get('query', '?')}",
|
|
||||||
"Task": lambda i: f"🤖 Task: {(i.get('description') or '')[:60]}",
|
|
||||||
"TodoWrite": lambda _i: "📝 TodoWrite",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_tool_use(tool_name: str, tool_input: dict) -> str:
|
|
||||||
summarizer = _TOOL_USE_SUMMARIZERS.get(tool_name)
|
|
||||||
if summarizer:
|
|
||||||
try:
|
|
||||||
return summarizer(tool_input or {})[:200]
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
# Generic fallback. Truncated so a tool with a giant input dict
|
|
||||||
# doesn't write a 10kB activity row per call.
|
|
||||||
return f"🛠 {tool_name}(…)"[:200]
|
|
||||||
|
|
||||||
|
|
||||||
async def _report_tool_use(block: Any) -> None:
|
|
||||||
"""Fire-and-forget agent_log activity row per tool the SDK invoked,
|
|
||||||
so the canvas's MyChat live-progress feed can render each step
|
|
||||||
Claude is doing instead of staring at a single spinner.
|
|
||||||
|
|
||||||
Posts directly to /workspaces/:id/activity rather than through
|
|
||||||
a2a_tools.report_activity — that helper also pushes a current_task
|
|
||||||
heartbeat which would duplicate as a TASK_UPDATED line in the
|
|
||||||
chat feed. The workspace card's current_task is already set
|
|
||||||
once per turn by the executor's set_current_task(brief_summary)
|
|
||||||
call, so the per-tool telemetry stays a chat-only signal.
|
|
||||||
|
|
||||||
Best-effort — any failure (network blip, platform unreachable, the
|
|
||||||
block didn't have the attrs we expected) is swallowed silently.
|
|
||||||
The tool will still execute regardless; only the progress
|
|
||||||
telemetry is lost. Deliberately does NOT raise — a malformed
|
|
||||||
block must not abort the message-stream iteration in
|
|
||||||
`_run_query`.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
# Lazy imports to keep this helper non-essential — the
|
|
||||||
# executor must still run when the workspace's network/auth
|
|
||||||
# plumbing isn't fully set up (e.g. unit tests).
|
|
||||||
import httpx
|
|
||||||
from a2a_client import PLATFORM_URL, WORKSPACE_ID
|
|
||||||
from platform_auth import auth_headers
|
|
||||||
except Exception:
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
tool_name = getattr(block, "name", "") or ""
|
|
||||||
tool_input = getattr(block, "input", {}) or {}
|
|
||||||
if not tool_name:
|
|
||||||
return
|
|
||||||
summary = _summarize_tool_use(tool_name, tool_input)
|
|
||||||
# 5s budget — long enough to absorb a single platform GC
|
|
||||||
# pause, short enough that a wedged platform doesn't slow
|
|
||||||
# the tool-iteration cadence beyond noticeable.
|
|
||||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
||||||
await client.post(
|
|
||||||
f"{PLATFORM_URL}/workspaces/{WORKSPACE_ID}/activity",
|
|
||||||
json={
|
|
||||||
"activity_type": "agent_log",
|
|
||||||
"source_id": WORKSPACE_ID,
|
|
||||||
# target_id == source for self-actions. Matches the
|
|
||||||
# convention other self-logged activity rows use
|
|
||||||
# (a2a_receive when the workspace logs its own
|
|
||||||
# outbound reply) so DB consumers joining on
|
|
||||||
# target_id see a well-defined value.
|
|
||||||
"target_id": WORKSPACE_ID,
|
|
||||||
"summary": summary,
|
|
||||||
"status": "ok",
|
|
||||||
"method": tool_name,
|
|
||||||
},
|
|
||||||
headers=auth_headers(),
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
# Telemetry failures must not break the conversation.
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
# Substring patterns that classify an exception as the specific
|
|
||||||
# claude_agent_sdk init-timeout wedge (vs. a rate-limit, transient
|
|
||||||
# subprocess crash, etc.). Match is case-insensitive on the formatted
|
|
||||||
# error string. Adding a new pattern here MUST come with a test in
|
|
||||||
# tests/test_claude_sdk_executor.py — false-positives lock the
|
|
||||||
# workspace into degraded until the next successful query clears it.
|
|
||||||
#
|
|
||||||
# `:initialize` suffix-anchored — the SDK can theoretically time out
|
|
||||||
# on later control messages (in-flight tool callbacks), but those
|
|
||||||
# don't leave the SDK in the unrecoverable post-init state we're
|
|
||||||
# trying to detect. Limit the pattern to the specific wedge.
|
|
||||||
_WEDGE_ERROR_PATTERNS = (
|
|
||||||
"control request timeout: initialize",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
_SWALLOWED_STDERR_MARKER = "Check stderr output for details"
|
|
||||||
|
|
||||||
|
|
||||||
def _probe_claude_cli_error() -> str | None:
|
|
||||||
"""Run ``claude --print`` directly and capture its stderr + stdout.
|
|
||||||
|
|
||||||
Used as a fallback when the claude-agent-sdk raises a bare ``Exception``
|
|
||||||
with the swallowed "Check stderr output for details" placeholder — that
|
|
||||||
happens when the SDK wraps a stream error from the CLI subprocess and
|
|
||||||
loses both the ``.stderr`` attribute and the exit code. At that point
|
|
||||||
the only way to see the real failure reason (rate limit, auth error,
|
|
||||||
network outage, missing token) is to run the CLI ourselves.
|
|
||||||
|
|
||||||
Bounded by a 30s timeout so a hung CLI can't stall the error path.
|
|
||||||
Returns None if the probe itself failed (wrong invariant — don't
|
|
||||||
corrupt the main error message with probe noise).
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import subprocess
|
|
||||||
# --print reads stdin, prints response, exits. Empty stdin gives the
|
|
||||||
# CLI something to work with without triggering an actual model call
|
|
||||||
# when it's going to fail anyway.
|
|
||||||
proc = subprocess.run(
|
|
||||||
["claude", "--print"],
|
|
||||||
input="probe",
|
|
||||||
capture_output=True,
|
|
||||||
text=True,
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
if proc.returncode == 0:
|
|
||||||
# CLI succeeded — the original error was a transient state that
|
|
||||||
# resolved between the SDK failure and our probe. Signal that.
|
|
||||||
return "<cli probe succeeded — error was transient>"
|
|
||||||
raw = (proc.stderr or "") + (proc.stdout or "")
|
|
||||||
raw = raw.strip()
|
|
||||||
if not raw:
|
|
||||||
return f"<cli exited {proc.returncode} with empty output>"
|
|
||||||
if len(raw) > _PROCESS_ERROR_STDERR_MAX_CHARS:
|
|
||||||
raw = raw[:_PROCESS_ERROR_STDERR_MAX_CHARS] + "... [truncated]"
|
|
||||||
return raw
|
|
||||||
except Exception as probe_exc: # pragma: no cover — best-effort diagnostic
|
|
||||||
return f"<probe failed: {type(probe_exc).__name__}: {probe_exc}>"
|
|
||||||
|
|
||||||
|
|
||||||
def _format_process_error(exc: BaseException) -> str:
|
|
||||||
"""Render a Claude-SDK ProcessError (or any ClaudeSDKError) with its full
|
|
||||||
captured context — exit code, stderr, exception type. Plain strings for
|
|
||||||
non-SDK exceptions fall back to str(exc).
|
|
||||||
|
|
||||||
Bounded at _PROCESS_ERROR_STDERR_MAX_CHARS so a runaway CLI can't spam
|
|
||||||
the log. Used by the executor's error path (fixes #66 — the SDK's
|
|
||||||
ProcessError carries `.stderr`/`.exit_code` attributes that the previous
|
|
||||||
code silently discarded, leaving every CLI crash with an identical
|
|
||||||
"Check stderr output for details" message in the workspace log).
|
|
||||||
|
|
||||||
Fixes #160: when the SDK raises a bare ``Exception`` containing the
|
|
||||||
"Check stderr output for details" placeholder (which happens when the
|
|
||||||
CLI subprocess emits a stream error the SDK can't categorize — rate
|
|
||||||
limit, auth, network), there's no ``.stderr``/``.exit_code`` to read.
|
|
||||||
In that case we fall back to running the CLI ourselves via
|
|
||||||
``_probe_claude_cli_error`` so the operator sees the real failure
|
|
||||||
reason (e.g. ``You've hit your limit · resets Apr 17``) instead of
|
|
||||||
chasing ghosts in the workspace logs.
|
|
||||||
"""
|
|
||||||
parts = [f"{type(exc).__name__}: {exc}"]
|
|
||||||
exit_code = getattr(exc, "exit_code", None)
|
|
||||||
if exit_code is not None:
|
|
||||||
parts.append(f"exit_code={exit_code}")
|
|
||||||
stderr = getattr(exc, "stderr", None)
|
|
||||||
if stderr:
|
|
||||||
trimmed = stderr[:_PROCESS_ERROR_STDERR_MAX_CHARS]
|
|
||||||
if len(stderr) > _PROCESS_ERROR_STDERR_MAX_CHARS:
|
|
||||||
trimmed += f"... [{len(stderr) - _PROCESS_ERROR_STDERR_MAX_CHARS} more chars truncated]"
|
|
||||||
parts.append(f"stderr={trimmed!r}")
|
|
||||||
elif exit_code is None and _SWALLOWED_STDERR_MARKER in str(exc):
|
|
||||||
# #160: generic exception with the swallowed-stderr placeholder.
|
|
||||||
# Probe the CLI directly — this is the only way to surface the real
|
|
||||||
# error when the SDK lost it in translation.
|
|
||||||
probed = _probe_claude_cli_error()
|
|
||||||
if probed:
|
|
||||||
parts.append(f"probed_cli_error={probed!r}")
|
|
||||||
return " | ".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class QueryResult:
|
|
||||||
"""Outcome of a single `query()` stream.
|
|
||||||
|
|
||||||
`text` is the canonical final response; `session_id` is the id the SDK
|
|
||||||
reports in its ResultMessage (used for resume on the next turn).
|
|
||||||
"""
|
|
||||||
text: str
|
|
||||||
session_id: str | None
|
|
||||||
|
|
||||||
|
|
||||||
class ClaudeSDKExecutor(AgentExecutor):
|
|
||||||
"""Executes agent tasks via the claude-agent-sdk programmatic API."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
system_prompt: str | None,
|
|
||||||
config_path: str,
|
|
||||||
heartbeat: "HeartbeatLoop | None",
|
|
||||||
model: str = "sonnet",
|
|
||||||
):
|
|
||||||
self.system_prompt = system_prompt
|
|
||||||
self.config_path = config_path
|
|
||||||
self.heartbeat = heartbeat
|
|
||||||
self.model = model
|
|
||||||
self._session_id: str | None = None
|
|
||||||
self._active_stream: AsyncIterator[Any] | None = None
|
|
||||||
# Serializes concurrent execute() calls on the same executor so
|
|
||||||
# session_id / _active_stream mutations stay race-free.
|
|
||||||
self._run_lock = asyncio.Lock()
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Prompt + options builders
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
def _resolve_cwd(self) -> str:
|
|
||||||
"""Run in /workspace if it has been populated, otherwise /configs."""
|
|
||||||
if os.path.isdir(WORKSPACE_MOUNT) and os.listdir(WORKSPACE_MOUNT):
|
|
||||||
return WORKSPACE_MOUNT
|
|
||||||
return CONFIG_MOUNT
|
|
||||||
|
|
||||||
def _build_system_prompt(self) -> str | None:
|
|
||||||
"""Compose system prompt from file + A2A + HMA memory instructions."""
|
|
||||||
base = get_system_prompt(self.config_path, fallback=self.system_prompt)
|
|
||||||
a2a = get_a2a_instructions(mcp=True)
|
|
||||||
hma = get_hma_instructions()
|
|
||||||
parts = [p for p in (base, a2a, hma) if p]
|
|
||||||
return "\n\n".join(parts) if parts else None
|
|
||||||
|
|
||||||
def _prepare_prompt(self, user_input: str) -> str:
|
|
||||||
"""Prepend delegation results that arrived while idle."""
|
|
||||||
delegation_context = read_delegation_results()
|
|
||||||
if delegation_context:
|
|
||||||
return (
|
|
||||||
"[Delegation results received while you were idle]\n"
|
|
||||||
f"{delegation_context}\n\n[New message]\n{user_input}"
|
|
||||||
)
|
|
||||||
return user_input
|
|
||||||
|
|
||||||
async def _inject_memories_if_first_turn(self, prompt: str) -> str:
|
|
||||||
if self._session_id:
|
|
||||||
return prompt
|
|
||||||
memories = await recall_memories()
|
|
||||||
if not memories:
|
|
||||||
return prompt
|
|
||||||
return f"[Prior context from memory]\n{memories}\n\n{prompt}"
|
|
||||||
|
|
||||||
def _load_config_dict(self) -> dict:
|
|
||||||
"""Read config.yaml as a raw dict for field-level inspection.
|
|
||||||
|
|
||||||
Returns an empty dict on any I/O or parse error so callers can
|
|
||||||
always use ``.get()`` without guards.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
config_file = os.path.join(self.config_path, "config.yaml")
|
|
||||||
with open(config_file) as f:
|
|
||||||
return yaml.safe_load(f) or {}
|
|
||||||
except Exception:
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _build_options(self) -> Any:
|
|
||||||
"""Build ClaudeAgentOptions.
|
|
||||||
|
|
||||||
No allowed_tools allowlist — bypassPermissions grants full access,
|
|
||||||
matching the old CLI `--dangerously-skip-permissions` so Claude can
|
|
||||||
use every built-in tool (Task, TodoWrite, NotebookEdit, BashOutput/
|
|
||||||
KillShell, ExitPlanMode, etc.) plus all MCP tools.
|
|
||||||
|
|
||||||
The MCP server launcher uses `sys.executable` so tests and alternate
|
|
||||||
virtual-env layouts don't depend on a `python3` shim being on PATH.
|
|
||||||
|
|
||||||
output_config wiring (issue #652)
|
|
||||||
----------------------------------
|
|
||||||
Reads ``effort`` and ``task_budget`` from config.yaml and populates
|
|
||||||
``output_config`` on the SDK options before the API call:
|
|
||||||
|
|
||||||
- ``effort`` (str): one of low|medium|high|xhigh|max. xhigh is the
|
|
||||||
Opus 4.7 recommended default for long agentic tasks.
|
|
||||||
- ``task_budget`` (int): advisory total-token budget across the full
|
|
||||||
agentic loop. Must be >= 20000 (API minimum) or 0/absent (unset).
|
|
||||||
When set, the ``task-budgets-2026-03-13`` beta header is added so
|
|
||||||
the API accepts the field.
|
|
||||||
"""
|
|
||||||
mcp_servers = {
|
|
||||||
"a2a": {
|
|
||||||
"command": sys.executable,
|
|
||||||
"args": [get_mcp_server_path()],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
create_kwargs: dict = dict(
|
|
||||||
model=self.model,
|
|
||||||
permission_mode="bypassPermissions",
|
|
||||||
cwd=self._resolve_cwd(),
|
|
||||||
mcp_servers=mcp_servers,
|
|
||||||
system_prompt=self._build_system_prompt(),
|
|
||||||
resume=self._session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- output_config: effort + task_budget (issue #652) ---
|
|
||||||
config = self._load_config_dict()
|
|
||||||
output_config: dict = {}
|
|
||||||
effort = config.get("effort", "")
|
|
||||||
task_budget = config.get("task_budget", 0)
|
|
||||||
|
|
||||||
if effort:
|
|
||||||
output_config["effort"] = effort # "low"|"medium"|"high"|"xhigh"|"max"
|
|
||||||
|
|
||||||
if task_budget and int(task_budget) >= 20000:
|
|
||||||
output_config["task_budget"] = {
|
|
||||||
"type": "tokens",
|
|
||||||
"total": int(task_budget),
|
|
||||||
}
|
|
||||||
betas = list(create_kwargs.get("betas", []))
|
|
||||||
if "task-budgets-2026-03-13" not in betas:
|
|
||||||
betas.append("task-budgets-2026-03-13")
|
|
||||||
create_kwargs["betas"] = betas
|
|
||||||
elif task_budget and int(task_budget) > 0:
|
|
||||||
# Below minimum — reject clearly before any API call is made.
|
|
||||||
raise ValueError(
|
|
||||||
f"task_budget must be >= 20000 tokens (got {task_budget})"
|
|
||||||
)
|
|
||||||
|
|
||||||
if output_config:
|
|
||||||
create_kwargs["output_config"] = output_config
|
|
||||||
|
|
||||||
return sdk.ClaudeAgentOptions(**create_kwargs)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# Query streaming
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
async def _run_query(self, prompt: str, options: Any) -> QueryResult:
|
|
||||||
"""Drive the SDK query stream and return a QueryResult.
|
|
||||||
|
|
||||||
Prefers ResultMessage.result (the canonical final text — same field
|
|
||||||
the CLI's --output-format json used) and only falls back to the
|
|
||||||
concatenation of AssistantMessage TextBlocks when result is absent.
|
|
||||||
Otherwise pre-tool reasoning and post-tool summary get double-emitted.
|
|
||||||
|
|
||||||
Pure: does not mutate executor state other than setting / clearing
|
|
||||||
`self._active_stream` so cancel() can reach in. The caller decides
|
|
||||||
whether to persist the returned session_id.
|
|
||||||
"""
|
|
||||||
assistant_chunks: list[str] = []
|
|
||||||
result_text: str | None = None
|
|
||||||
session_id: str | None = None
|
|
||||||
self._active_stream = sdk.query(prompt=prompt, options=options)
|
|
||||||
try:
|
|
||||||
async for message in self._active_stream:
|
|
||||||
if isinstance(message, sdk.AssistantMessage):
|
|
||||||
for block in message.content:
|
|
||||||
if isinstance(block, sdk.TextBlock):
|
|
||||||
assistant_chunks.append(block.text)
|
|
||||||
else:
|
|
||||||
# ToolUseBlock / ServerToolUseBlock are present
|
|
||||||
# on the real SDK but not on the conftest stub —
|
|
||||||
# check by class name to avoid an isinstance()
|
|
||||||
# against a class the stub doesn't define.
|
|
||||||
cls = type(block).__name__
|
|
||||||
if cls in ("ToolUseBlock", "ServerToolUseBlock"):
|
|
||||||
await _report_tool_use(block)
|
|
||||||
elif isinstance(message, sdk.ResultMessage):
|
|
||||||
sid = getattr(message, "session_id", None)
|
|
||||||
if sid:
|
|
||||||
session_id = sid
|
|
||||||
result_text = getattr(message, "result", None)
|
|
||||||
finally:
|
|
||||||
self._active_stream = None
|
|
||||||
text = result_text if result_text is not None else "".join(assistant_chunks)
|
|
||||||
# Auto-recover the wedge flag — if a previous query() left this
|
|
||||||
# process in `_sdk_wedged` and THIS query just completed
|
|
||||||
# cleanly, the SDK clearly works again. Clear so the next
|
|
||||||
# heartbeat reports runtime_state empty and the platform flips
|
|
||||||
# status degraded → online without a manual restart.
|
|
||||||
#
|
|
||||||
# Gate on actual content from the stream so a degenerate
|
|
||||||
# "iterator returned without raising but emitted nothing"
|
|
||||||
# case (possible from a partial stream or a stub SDK) doesn't
|
|
||||||
# falsely advertise recovery. A real successful query yields
|
|
||||||
# at least a ResultMessage (sets result_text) or one
|
|
||||||
# AssistantMessage TextBlock (populates assistant_chunks).
|
|
||||||
if result_text is not None or assistant_chunks:
|
|
||||||
_clear_sdk_wedge_on_success()
|
|
||||||
return QueryResult(text=text, session_id=session_id)
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
# AgentExecutor interface
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
|
|
||||||
async def execute(self, context: RequestContext, event_queue: EventQueue):
|
|
||||||
"""Run a turn through the Claude Agent SDK and emit the response.
|
|
||||||
|
|
||||||
Serialized via `self._run_lock` — concurrent A2A messages to the same
|
|
||||||
workspace queue rather than racing on `_session_id` / `_active_stream`.
|
|
||||||
"""
|
|
||||||
user_input = extract_message_text(context.message)
|
|
||||||
# Surface attached files to claude-code via a manifest in the prompt.
|
|
||||||
# Claude Code reads files through its own Read/Glob tools by path —
|
|
||||||
# as long as the prompt names the path, the CLI will open them on
|
|
||||||
# demand. Same contract every platform runtime uses so the UX is
|
|
||||||
# identical across hermes / langgraph / claude-code.
|
|
||||||
attached = extract_attached_files(context.message)
|
|
||||||
if attached:
|
|
||||||
manifest = "\n\nAttached files:\n" + "\n".join(
|
|
||||||
f"- {f['name']} ({f['mime_type'] or 'unknown type'}) at {f['path']}"
|
|
||||||
for f in attached
|
|
||||||
)
|
|
||||||
user_input = (user_input + manifest) if user_input else manifest.lstrip()
|
|
||||||
if not user_input:
|
|
||||||
await event_queue.enqueue_event(new_agent_text_message(_NO_TEXT_MSG))
|
|
||||||
return
|
|
||||||
|
|
||||||
async with self._run_lock:
|
|
||||||
response_text = await self._execute_locked(user_input)
|
|
||||||
|
|
||||||
# Enqueue outside the lock so the next queued turn can start
|
|
||||||
# preparing its prompt while this turn's response ships. Event
|
|
||||||
# ordering is preserved per-queue by the A2A server, so no races.
|
|
||||||
# If the response mentions /workspace/... files, stage each and
|
|
||||||
# emit FileParts alongside the text so the canvas can download.
|
|
||||||
outbound = collect_outbound_files(response_text)
|
|
||||||
if outbound:
|
|
||||||
from a2a.types import FilePart, FileWithUri, Message, Part, Role, TextPart
|
|
||||||
import uuid as _uuid
|
|
||||||
parts: list = [Part(root=TextPart(text=response_text))] if response_text else []
|
|
||||||
for f in outbound:
|
|
||||||
parts.append(Part(root=FilePart(file=FileWithUri(
|
|
||||||
uri="workspace:" + f["path"],
|
|
||||||
name=f["name"],
|
|
||||||
mimeType=f["mime_type"],
|
|
||||||
))))
|
|
||||||
await event_queue.enqueue_event(Message(
|
|
||||||
messageId=_uuid.uuid4().hex,
|
|
||||||
role=Role.agent,
|
|
||||||
parts=parts,
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
await event_queue.enqueue_event(new_agent_text_message(response_text))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _is_retryable(exc: BaseException) -> bool:
|
|
||||||
"""Check if an SDK exception looks like a transient rate-limit or
|
|
||||||
capacity error that's worth retrying with backoff."""
|
|
||||||
msg = str(exc).lower()
|
|
||||||
return any(p in msg for p in _RETRYABLE_PATTERNS)
|
|
||||||
|
|
||||||
def _reset_session_after_error(self, exc: BaseException) -> None:
|
|
||||||
"""Clear `_session_id` if the exception looks like a subprocess
|
|
||||||
crash (#75). On the next `_build_options()` call `resume=None` is
|
|
||||||
passed to the SDK, so the CLI boots a brand-new session instead of
|
|
||||||
trying to resume one the previous subprocess left in an
|
|
||||||
unrecoverable state.
|
|
||||||
|
|
||||||
Kept in its own method so the policy can evolve (e.g. also clear
|
|
||||||
on MessageParseError) without touching the retry loop. Logs at
|
|
||||||
INFO when a session was actually cleared; silent when there was
|
|
||||||
nothing to reset.
|
|
||||||
"""
|
|
||||||
exc_name = type(exc).__name__
|
|
||||||
# Conservative: reset only on subprocess-level failures. Pure
|
|
||||||
# rate-limit / capacity errors don't leave the session in a bad
|
|
||||||
# state — keep the session_id so the resumed turn preserves
|
|
||||||
# conversational continuity.
|
|
||||||
is_subprocess_error = (
|
|
||||||
exc_name in ("ProcessError", "CLIConnectionError")
|
|
||||||
or getattr(exc, "exit_code", None) is not None
|
|
||||||
or "exit code" in str(exc).lower()
|
|
||||||
)
|
|
||||||
if not is_subprocess_error:
|
|
||||||
return
|
|
||||||
if self._session_id is None:
|
|
||||||
return
|
|
||||||
logger.info(
|
|
||||||
"SDK session reset after %s: clearing session_id so the next "
|
|
||||||
"attempt starts fresh (fixes #75 session contamination)",
|
|
||||||
exc_name,
|
|
||||||
)
|
|
||||||
self._session_id = None
|
|
||||||
|
|
||||||
async def _execute_locked(self, user_input: str) -> str:
|
|
||||||
"""Body of execute() that runs under the run lock.
|
|
||||||
|
|
||||||
Retries transient errors (rate limits, capacity, exit-code-1) up to
|
|
||||||
_MAX_RETRIES times with exponential backoff (5s, 10s, 20s).
|
|
||||||
"""
|
|
||||||
# Keep a clean copy of the user's actual message for the memory record,
|
|
||||||
# BEFORE any delegation or memory injection.
|
|
||||||
original_input = user_input
|
|
||||||
logger.debug("SDK execute [claude-code]: %s", user_input[:200])
|
|
||||||
|
|
||||||
prompt = self._prepare_prompt(user_input)
|
|
||||||
|
|
||||||
response_text: str = ""
|
|
||||||
try:
|
|
||||||
# set_current_task INSIDE the try so active_tasks is always
|
|
||||||
# decremented by the finally block even if CancelledError hits
|
|
||||||
# during the heartbeat HTTP push. Moving it outside the try
|
|
||||||
# created a narrow window where cancellation left active_tasks
|
|
||||||
# stuck at 1 forever, permanently blocking queue drain. (#2026)
|
|
||||||
await set_current_task(self.heartbeat, brief_summary(user_input))
|
|
||||||
prompt = await self._inject_memories_if_first_turn(prompt)
|
|
||||||
for attempt in range(_MAX_RETRIES):
|
|
||||||
options = self._build_options()
|
|
||||||
try:
|
|
||||||
result = await self._run_query(prompt=prompt, options=options)
|
|
||||||
if result.session_id:
|
|
||||||
self._session_id = result.session_id
|
|
||||||
response_text = result.text
|
|
||||||
break # success
|
|
||||||
except Exception as exc:
|
|
||||||
formatted = _format_process_error(exc)
|
|
||||||
# #75: CLI subprocess crashes leave our _session_id
|
|
||||||
# referencing a session the next subprocess can't
|
|
||||||
# resume. Without this reset the next attempt would
|
|
||||||
# crash identically even when the underlying cause
|
|
||||||
# was transient, cascading into "crashed once →
|
|
||||||
# crashes forever until container restart." Clear
|
|
||||||
# the session_id so the next attempt (retry or
|
|
||||||
# next user turn) starts fresh.
|
|
||||||
self._reset_session_after_error(exc)
|
|
||||||
if attempt < _MAX_RETRIES - 1 and self._is_retryable(exc):
|
|
||||||
delay = _BASE_RETRY_DELAY_S * (2 ** attempt)
|
|
||||||
logger.warning(
|
|
||||||
"SDK agent [claude-code] transient error (attempt %d/%d), "
|
|
||||||
"retrying in %ds: %s",
|
|
||||||
attempt + 1, _MAX_RETRIES, delay, formatted,
|
|
||||||
)
|
|
||||||
await asyncio.sleep(delay)
|
|
||||||
continue
|
|
||||||
# Non-retryable or exhausted retries. Log exit_code +
|
|
||||||
# stderr explicitly (fixes #66) so operators don't have
|
|
||||||
# to reproduce the crash manually to find out why the
|
|
||||||
# subprocess died.
|
|
||||||
logger.error("SDK agent error [claude-code]: %s", formatted)
|
|
||||||
logger.exception("SDK agent error [claude-code] — full traceback follows")
|
|
||||||
# Detect the specific claude_agent_sdk init-wedge case
|
|
||||||
# so the heartbeat task can flip the workspace to
|
|
||||||
# `degraded`. Match on the lowercased formatted error;
|
|
||||||
# `formatted` is whatever _format_process_error built,
|
|
||||||
# which already includes both the message and the
|
|
||||||
# exception class name.
|
|
||||||
formatted_lc = formatted.lower()
|
|
||||||
for pat in _WEDGE_ERROR_PATTERNS:
|
|
||||||
if pat in formatted_lc:
|
|
||||||
_mark_sdk_wedged(
|
|
||||||
f"claude_agent_sdk wedge: {formatted[:200]} — restart workspace to recover"
|
|
||||||
)
|
|
||||||
break
|
|
||||||
response_text = sanitize_agent_error(exc)
|
|
||||||
break
|
|
||||||
finally:
|
|
||||||
await set_current_task(self.heartbeat, "")
|
|
||||||
await commit_memory(
|
|
||||||
f"Conversation: {original_input[:MEMORY_CONTENT_MAX_CHARS]}"
|
|
||||||
)
|
|
||||||
# Auto-push unpushed commits and open PR (non-blocking, best-effort).
|
|
||||||
await auto_push_hook()
|
|
||||||
|
|
||||||
return response_text or _NO_RESPONSE_MSG
|
|
||||||
|
|
||||||
async def cancel(self, context: RequestContext, event_queue: EventQueue):
|
|
||||||
"""Cooperatively cancel the currently running turn.
|
|
||||||
|
|
||||||
cancel() targets whatever turn is in flight *right now*, not the
|
|
||||||
specific turn the caller may have been looking at when they sent
|
|
||||||
the cancel request. If turn A has finished and turn B is already
|
|
||||||
running under the run lock by the time cancel arrives, turn B is
|
|
||||||
the one that gets aborted. This matches how a "stop" button in a
|
|
||||||
chat UI typically behaves (stop whatever is running) and is a
|
|
||||||
conscious trade-off against per-turn bookkeeping.
|
|
||||||
|
|
||||||
Implementation: the SDK's query() is an async generator; calling
|
|
||||||
aclose() raises GeneratorExit inside the running turn and unwinds
|
|
||||||
cleanly. We read `self._active_stream` into a local BEFORE calling
|
|
||||||
aclose so the reference can't be reassigned by another turn
|
|
||||||
mid-cancel. Best-effort — if no stream is active (cancel arrived
|
|
||||||
between turns, or the stream has no aclose), this is a no-op.
|
|
||||||
"""
|
|
||||||
stream = self._active_stream
|
|
||||||
if stream is None:
|
|
||||||
return
|
|
||||||
aclose = getattr(stream, "aclose", None)
|
|
||||||
if aclose is None:
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
await aclose()
|
|
||||||
except Exception:
|
|
||||||
logger.exception("SDK cancel: aclose() raised")
|
|
||||||
@ -5,10 +5,11 @@ Supports CLI agents that accept a prompt and output a response:
|
|||||||
- Ollama: ollama run <model> "..."
|
- Ollama: ollama run <model> "..."
|
||||||
- Custom: any command that reads stdin or accepts -p
|
- Custom: any command that reads stdin or accepts -p
|
||||||
|
|
||||||
NOTE: the `claude-code` runtime no longer routes here. It uses
|
NOTE: the `claude-code` runtime no longer routes here — its template
|
||||||
ClaudeSDKExecutor (see claude_sdk_executor.py) which wraps the
|
repo (molecule-ai-workspace-template-claude-code) ships its own
|
||||||
claude-agent-sdk Python package. This executor is reserved for CLI-only
|
ClaudeSDKExecutor wrapping the claude-agent-sdk Python package as of
|
||||||
runtimes that don't yet have a programmatic SDK integration.
|
#87 Phase 2. This executor is reserved for CLI-only runtimes that
|
||||||
|
don't yet have a programmatic SDK integration.
|
||||||
|
|
||||||
The runtime is selected via config.yaml:
|
The runtime is selected via config.yaml:
|
||||||
runtime: codex | ollama | custom
|
runtime: codex | ollama | custom
|
||||||
@ -59,8 +60,8 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
# Built-in runtime presets.
|
# Built-in runtime presets.
|
||||||
# The `claude-code` runtime uses ClaudeSDKExecutor (claude_sdk_executor.py)
|
# The `claude-code` runtime uses ClaudeSDKExecutor in its own template
|
||||||
# and intentionally has no entry here.
|
# repo (post-#87 Phase 2) and intentionally has no entry here.
|
||||||
RUNTIME_PRESETS: dict[str, dict] = {
|
RUNTIME_PRESETS: dict[str, dict] = {
|
||||||
"codex": {
|
"codex": {
|
||||||
"command": "codex",
|
"command": "codex",
|
||||||
@ -117,9 +118,12 @@ class CLIAgentExecutor(AgentExecutor):
|
|||||||
if runtime == "claude-code":
|
if runtime == "claude-code":
|
||||||
# Defensive — the adapter should never construct a CLI executor
|
# Defensive — the adapter should never construct a CLI executor
|
||||||
# for claude-code. Fail loud rather than silently falling back.
|
# for claude-code. Fail loud rather than silently falling back.
|
||||||
|
# The claude-code template owns its own ClaudeSDKExecutor in
|
||||||
|
# molecule-ai-workspace-template-claude-code (post-#87 Phase 2).
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"claude-code runtime is served by ClaudeSDKExecutor, not "
|
"claude-code runtime is served by ClaudeSDKExecutor in its "
|
||||||
"CLIAgentExecutor. Check adapters/claude_code/adapter.py."
|
"template repo, not CLIAgentExecutor. If you're seeing this "
|
||||||
|
"in molecule-runtime, the adapter wiring is wrong."
|
||||||
)
|
)
|
||||||
self.runtime = runtime
|
self.runtime = runtime
|
||||||
self.config = runtime_config
|
self.config = runtime_config
|
||||||
|
|||||||
@ -85,15 +85,6 @@ When wedge is the WRONG primitive: if the failure is per-request (the
|
|||||||
SDK works for some inputs but not others), surface as a normal A2A
|
SDK works for some inputs but not others), surface as a normal A2A
|
||||||
error response, not a wedge. Wedge means "every subsequent request in
|
error response, not a wedge. Wedge means "every subsequent request in
|
||||||
this process will fail until restart."
|
this process will fail until restart."
|
||||||
|
|
||||||
Compatibility shim (will be removed once #87 Phase 2 lands)
|
|
||||||
-----------------------------------------------------------
|
|
||||||
|
|
||||||
claude_sdk_executor.py re-exports the four functions under the historical
|
|
||||||
names (is_wedged, wedge_reason, _mark_sdk_wedged, _clear_sdk_wedge_on_success)
|
|
||||||
for one release cycle. New adapter code should import from runtime_wedge
|
|
||||||
directly; the shim only exists so existing third-party adapters that
|
|
||||||
copied our claude_sdk_executor wedge convention have time to migrate.
|
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|||||||
@ -209,53 +209,14 @@ def _make_tools_mocks():
|
|||||||
sys.modules["builtin_tools.security"] = _sec_mod
|
sys.modules["builtin_tools.security"] = _sec_mod
|
||||||
|
|
||||||
|
|
||||||
def _make_claude_agent_sdk_mock():
|
|
||||||
"""Stub claude_agent_sdk so claude_sdk_executor can be imported without
|
|
||||||
the real SDK installed. Tests that exercise execute() patch query().
|
|
||||||
|
|
||||||
Installed at collection time so a top-level `import claude_agent_sdk`
|
|
||||||
in claude_sdk_executor.py resolves to this stub. Real tests can override
|
|
||||||
individual attributes via patch().
|
|
||||||
"""
|
|
||||||
mod = ModuleType("claude_agent_sdk")
|
|
||||||
|
|
||||||
class _StubTextBlock:
|
|
||||||
def __init__(self, text=""):
|
|
||||||
self.text = text
|
|
||||||
|
|
||||||
class _StubAssistantMessage:
|
|
||||||
def __init__(self, blocks=None):
|
|
||||||
self.content = blocks or []
|
|
||||||
|
|
||||||
class _StubResultMessage:
|
|
||||||
def __init__(self, session_id=None, result=None):
|
|
||||||
self.session_id = session_id
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
class _StubOptions:
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
self.kwargs = kwargs
|
|
||||||
|
|
||||||
async def _stub_query(prompt, options): # pragma: no cover — overridden in tests
|
|
||||||
yield _StubAssistantMessage([_StubTextBlock("stub")])
|
|
||||||
yield _StubResultMessage(session_id="stub-session")
|
|
||||||
|
|
||||||
mod.TextBlock = _StubTextBlock
|
|
||||||
mod.AssistantMessage = _StubAssistantMessage
|
|
||||||
mod.ResultMessage = _StubResultMessage
|
|
||||||
mod.ClaudeAgentOptions = _StubOptions
|
|
||||||
mod.query = _stub_query
|
|
||||||
sys.modules["claude_agent_sdk"] = mod
|
|
||||||
|
|
||||||
|
|
||||||
# Install mocks before any test collection imports a2a_executor
|
# Install mocks before any test collection imports a2a_executor
|
||||||
if "a2a" not in sys.modules:
|
if "a2a" not in sys.modules:
|
||||||
_make_a2a_mocks()
|
_make_a2a_mocks()
|
||||||
|
|
||||||
# Install claude_agent_sdk stub unconditionally: the real SDK ships with
|
# Note: the claude_agent_sdk stub was removed alongside
|
||||||
# workspace-template:claude-code but tests run outside the container.
|
# workspace/claude_sdk_executor.py (#87 Phase 2). The executor + its
|
||||||
if "claude_agent_sdk" not in sys.modules:
|
# tests now live in the claude-code template repo, where the real SDK
|
||||||
_make_claude_agent_sdk_mock()
|
# IS installed via Dockerfile, so no stub is needed.
|
||||||
|
|
||||||
if "langchain_core" not in sys.modules:
|
if "langchain_core" not in sys.modules:
|
||||||
_make_langchain_mocks()
|
_make_langchain_mocks()
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -66,38 +66,7 @@ class TestRuntimeWedge:
|
|||||||
assert runtime_wedge.wedge_reason() == "second wedge — different reason"
|
assert runtime_wedge.wedge_reason() == "second wedge — different reason"
|
||||||
|
|
||||||
|
|
||||||
class TestClaudeSdkExecutorReExportShim:
|
# TestClaudeSdkExecutorReExportShim removed alongside
|
||||||
"""claude_sdk_executor.py keeps re-exporting the old names for one
|
# workspace/claude_sdk_executor.py — the shim served its one-release-
|
||||||
release cycle so any third-party adapter copying our wedge convention
|
# cycle purpose during the universal-runtime refactor (#87 Phase 2).
|
||||||
has time to migrate. These tests pin the shim — when removed, the
|
# The executor + its shim now live in the claude-code template repo.
|
||||||
test file goes too."""
|
|
||||||
|
|
||||||
def test_is_wedged_re_exported(self):
|
|
||||||
from claude_sdk_executor import is_wedged
|
|
||||||
assert is_wedged is runtime_wedge.is_wedged
|
|
||||||
|
|
||||||
def test_wedge_reason_re_exported(self):
|
|
||||||
from claude_sdk_executor import wedge_reason
|
|
||||||
assert wedge_reason is runtime_wedge.wedge_reason
|
|
||||||
|
|
||||||
def test_internal_helpers_re_exported(self):
|
|
||||||
# Keep the underscore names too — claude_sdk_executor's own
|
|
||||||
# _run_query calls _mark_sdk_wedged / _clear_sdk_wedge_on_success
|
|
||||||
# via these re-exports.
|
|
||||||
from claude_sdk_executor import (
|
|
||||||
_mark_sdk_wedged,
|
|
||||||
_clear_sdk_wedge_on_success,
|
|
||||||
_reset_sdk_wedge_for_test,
|
|
||||||
)
|
|
||||||
assert _mark_sdk_wedged is runtime_wedge.mark_wedged
|
|
||||||
assert _clear_sdk_wedge_on_success is runtime_wedge.clear_wedge
|
|
||||||
assert _reset_sdk_wedge_for_test is runtime_wedge.reset_for_test
|
|
||||||
|
|
||||||
def test_re_export_state_is_shared(self):
|
|
||||||
# The shim isn't a copy — both names refer to the same module
|
|
||||||
# state. Marking via the executor name must be observable via
|
|
||||||
# the runtime_wedge name (and vice versa).
|
|
||||||
from claude_sdk_executor import _mark_sdk_wedged
|
|
||||||
_mark_sdk_wedged("via executor shim")
|
|
||||||
assert runtime_wedge.is_wedged() is True
|
|
||||||
assert runtime_wedge.wedge_reason() == "via executor shim"
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user