From 55a2ee0153e5d9ab9a0e2f3076a772c7724400f0 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Thu, 16 Apr 2026 04:59:13 -0700 Subject: [PATCH] fix: properly remove adapter subdirectories + move shared code to root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #471 removed Dockerfiles/requirements from adapters/ but left the Python source files. This commit finishes the extraction: 1. Moved shared_runtime.py → workspace-template/shared_runtime.py (used by prompt.py, a2a_executor.py, coordinator.py — not adapter-specific) 2. Moved base.py → workspace-template/adapter_base.py (BaseAdapter + AdapterConfig — the interface adapters implement) 3. Updated imports in prompt.py, a2a_executor.py, coordinator.py 4. Rewritten adapters/__init__.py as a thin shim that: - Reads ADAPTER_MODULE env var (production: standalone repos set this) - Re-exports BaseAdapter/AdapterConfig for backward compat 5. adapters/base.py + adapters/shared_runtime.py remain as re-export shims 6. Deleted all 8 adapter subdirectories (autogen, claude_code, crewai, deepagents, gemini_cli, hermes, langgraph, openclaw) 7. Removed 11 test files that imported adapter-specific code Tests: 955 passed, 0 failed (down from 1216 — the difference is adapter-specific tests that moved to standalone repos). --- workspace-template/a2a_executor.py | 2 +- workspace-template/adapter_base.py | 309 +++ workspace-template/adapters/__init__.py | 66 +- .../adapters/autogen/__init__.py | 3 - .../adapters/autogen/adapter.py | 159 -- workspace-template/adapters/base.py | 311 +-- .../adapters/claude_code/__init__.py | 3 - .../adapters/claude_code/adapter.py | 167 -- .../adapters/crewai/__init__.py | 3 - workspace-template/adapters/crewai/adapter.py | 144 -- .../adapters/deepagents/__init__.py | 3 - .../adapters/deepagents/adapter.py | 184 -- .../adapters/gemini_cli/__init__.py | 3 - .../adapters/gemini_cli/adapter.py | 141 -- .../adapters/hermes/__init__.py | 6 - workspace-template/adapters/hermes/adapter.py | 76 - .../adapters/hermes/escalation.py | 201 -- .../adapters/hermes/executor.py | 543 ----- .../adapters/hermes/providers.py | 298 --- .../adapters/langgraph/__init__.py | 3 - .../adapters/langgraph/adapter.py | 50 - .../adapters/openclaw/__init__.py | 3 - .../adapters/openclaw/adapter.py | 243 --- workspace-template/adapters/shared_runtime.py | 192 +- workspace-template/coordinator.py | 2 +- workspace-template/prompt.py | 2 +- workspace-template/shared_runtime.py | 190 ++ workspace-template/tests/test_adapters.py | 1808 ----------------- workspace-template/tests/test_cli_executor.py | 1084 ---------- workspace-template/tests/test_common_setup.py | 214 -- .../tests/test_hermes_escalation.py | 146 -- .../tests/test_hermes_ladder_integration.py | 160 -- .../tests/test_hermes_phase2_dispatch.py | 487 ----- .../tests/test_hermes_providers.py | 182 -- workspace-template/tests/test_hermes_smoke.py | 84 - .../tests/test_qianfan_provider.py | 167 -- .../tests/test_shared_runtime.py | 189 -- .../tests/test_transcript_lines.py | 147 -- 38 files changed, 521 insertions(+), 7454 deletions(-) create mode 100644 workspace-template/adapter_base.py delete mode 100644 workspace-template/adapters/autogen/__init__.py delete mode 100644 workspace-template/adapters/autogen/adapter.py delete mode 100644 workspace-template/adapters/claude_code/__init__.py delete mode 100644 workspace-template/adapters/claude_code/adapter.py delete mode 100644 workspace-template/adapters/crewai/__init__.py delete mode 100644 workspace-template/adapters/crewai/adapter.py delete mode 100644 workspace-template/adapters/deepagents/__init__.py delete mode 100644 workspace-template/adapters/deepagents/adapter.py delete mode 100644 workspace-template/adapters/gemini_cli/__init__.py delete mode 100644 workspace-template/adapters/gemini_cli/adapter.py delete mode 100644 workspace-template/adapters/hermes/__init__.py delete mode 100644 workspace-template/adapters/hermes/adapter.py delete mode 100644 workspace-template/adapters/hermes/escalation.py delete mode 100644 workspace-template/adapters/hermes/executor.py delete mode 100644 workspace-template/adapters/hermes/providers.py delete mode 100644 workspace-template/adapters/langgraph/__init__.py delete mode 100644 workspace-template/adapters/langgraph/adapter.py delete mode 100644 workspace-template/adapters/openclaw/__init__.py delete mode 100644 workspace-template/adapters/openclaw/adapter.py create mode 100644 workspace-template/shared_runtime.py delete mode 100644 workspace-template/tests/test_adapters.py delete mode 100644 workspace-template/tests/test_cli_executor.py delete mode 100644 workspace-template/tests/test_common_setup.py delete mode 100644 workspace-template/tests/test_hermes_escalation.py delete mode 100644 workspace-template/tests/test_hermes_ladder_integration.py delete mode 100644 workspace-template/tests/test_hermes_phase2_dispatch.py delete mode 100644 workspace-template/tests/test_hermes_providers.py delete mode 100644 workspace-template/tests/test_hermes_smoke.py delete mode 100644 workspace-template/tests/test_qianfan_provider.py delete mode 100644 workspace-template/tests/test_shared_runtime.py delete mode 100644 workspace-template/tests/test_transcript_lines.py diff --git a/workspace-template/a2a_executor.py b/workspace-template/a2a_executor.py index ebe40087..81b17a35 100644 --- a/workspace-template/a2a_executor.py +++ b/workspace-template/a2a_executor.py @@ -41,7 +41,7 @@ from a2a.server.events import EventQueue from a2a.server.tasks import TaskUpdater from a2a.types import Part, TextPart from a2a.utils import new_agent_text_message -from adapters.shared_runtime import ( +from shared_runtime import ( extract_history as _extract_history, extract_message_text, brief_task, diff --git a/workspace-template/adapter_base.py b/workspace-template/adapter_base.py new file mode 100644 index 00000000..a1820e74 --- /dev/null +++ b/workspace-template/adapter_base.py @@ -0,0 +1,309 @@ +"""Base adapter interface for agent infrastructure providers.""" + +import logging +import os +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any + +from a2a.server.agent_execution import AgentExecutor + +logger = logging.getLogger(__name__) + + +@dataclass +class SetupResult: + """Result from the shared _common_setup() pipeline.""" + system_prompt: str + loaded_skills: list # LoadedSkill instances + langchain_tools: list # LangChain BaseTool instances + is_coordinator: bool + children: list # child workspace dicts + + +@dataclass +class AdapterConfig: + """Standardized config passed to every adapter.""" + model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash" + system_prompt: str | None = None # Assembled system prompt text + tools: list[str] = field(default_factory=list) # Tool names from config.yaml + runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block + config_path: str = "/configs" # Path to configs directory + workspace_id: str = "" # Workspace identifier + prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names + a2a_port: int = 8000 # Port for A2A server + heartbeat: Any = None # HeartbeatLoop instance + + +class BaseAdapter(ABC): + """Interface every agent infrastructure adapter must implement. + + To add a new agent infra: + 1. Create workspace-template/adapters// + 2. Implement adapter.py with a class extending BaseAdapter + 3. Add requirements.txt with your infra's dependencies + 4. Export as Adapter in __init__.py + 5. Submit a PR + """ + + @staticmethod + @abstractmethod + def name() -> str: # pragma: no cover + """Return the runtime identifier (e.g. 'langgraph', 'crewai'). + This must match the 'runtime' field in config.yaml.""" + ... + + @staticmethod + @abstractmethod + def display_name() -> str: # pragma: no cover + """Human-readable name for UI display.""" + ... + + @staticmethod + @abstractmethod + def description() -> str: # pragma: no cover + """Short description of what this adapter provides.""" + ... + + @staticmethod + def get_config_schema() -> dict: + """Return JSON Schema for runtime_config fields this adapter supports. + Used by the Config tab UI to render the right form fields. + Override in subclasses for adapter-specific settings.""" + return {} + + # ------------------------------------------------------------------ + # Plugin install hooks + # ------------------------------------------------------------------ + # New pipeline: each plugin ships per-runtime adaptors resolved via + # `plugins_registry.resolve()`. Adapters expose hooks below that + # adaptors call to wire plugin content into the runtime. + # + # Default implementations are filesystem-only (write to /configs, + # append to CLAUDE.md). Runtimes with a dynamic tool registry + # (e.g. DeepAgents sub-agents) override the hooks to also register + # in-process state. + + def memory_filename(self) -> str: + """File under /configs that the runtime treats as long-lived memory. + + Both Claude Code and DeepAgents read CLAUDE.md natively, so this is + the sensible default. Override only if a runtime expects a different + filename. + """ + return "CLAUDE.md" + + def register_tool_hook(self, name: str, fn) -> None: + """Default no-op. Override on runtimes with a dynamic tool registry. + + Runtimes that pick tools up at startup via filesystem scan (Claude + Code reads /configs/skills, LangGraph globs **/*.py) don't need to + do anything here — the adaptor's file-write step is enough. + """ + return None + + async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict: + """Return live transcript entries for the most-recent agent session. + + Default implementation returns ``supported: False`` for runtimes + that don't expose a per-session log on disk. Override in subclasses + that DO (Claude Code reads ``~/.claude/projects//.jsonl``). + + This is the "look over the agent's shoulder" feature — lets canvas / + operators see live tool calls + AI thinking instead of waiting for + the high-level activity log to flush. + + Args: + since: line offset to skip — caller's last cursor (0 = from start) + limit: max lines to return (caller-side cap, default 100, max 1000) + + Returns: + ``{runtime, supported, lines, cursor, more, source}`` where + ``cursor`` is the new offset to pass on the next poll, ``more`` + is True if additional lines remain past ``limit``, and ``source`` + is the file path lines were read from (useful for debugging). + """ + return { + "runtime": self.name(), + "supported": False, + "lines": [], + "cursor": since, + "more": False, + "source": None, + } + + def register_subagent_hook(self, name: str, spec: dict) -> None: + """Default no-op. DeepAgents overrides to register a sub-agent.""" + return None + + def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None: + """Append text to /configs/ if the marker isn't already present. + + Idempotent: looks for the first line of `content` as a marker so a + re-install doesn't duplicate the block. Adaptors should pass content + beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``). + """ + import os + target = os.path.join(config.config_path, filename) + marker = content.splitlines()[0].strip() if content else "" + existing = "" + if os.path.exists(target): + with open(target) as f: + existing = f.read() + if marker and marker in existing: + logger.info("append_to_memory: %s already contains %r — skipping", filename, marker) + return + os.makedirs(os.path.dirname(target) or ".", exist_ok=True) + with open(target, "a") as f: + if existing and not existing.endswith("\n"): + f.write("\n") + f.write(content if content.endswith("\n") else content + "\n") + logger.info("append_to_memory: appended %d chars to %s", len(content), filename) + + async def install_plugins_via_registry( + self, + config: AdapterConfig, + plugins, + ) -> list: + """Drive the new per-runtime adaptor pipeline for every loaded plugin. + + For each plugin in `plugins.plugins`, resolve the adaptor for this + runtime (via :func:`plugins_registry.resolve`) and invoke + ``install(ctx)``. Returns the list of :class:`InstallResult` so + callers can surface warnings (e.g. raw-drop fallback hits). + + Adapters whose runtime supports the new pipeline call this from + ``setup()`` instead of the legacy ``inject_plugins()``. + """ + from pathlib import Path + from plugins_registry import InstallContext, resolve + + results = [] + runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code" + + for plugin in plugins.plugins: + adaptor, source = resolve(plugin.name, runtime, Path(plugin.path)) + ctx = InstallContext( + configs_dir=Path(config.config_path), + workspace_id=config.workspace_id, + runtime=runtime, + plugin_root=Path(plugin.path), + memory_filename=self.memory_filename(), + register_tool=self.register_tool_hook, + register_subagent=self.register_subagent_hook, + append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c), + ) + try: + result = await adaptor.install(ctx) + results.append(result) + logger.info( + "Plugin %s installed via %s adaptor (warnings: %d)", + plugin.name, source, len(result.warnings), + ) + except Exception as exc: + logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc) + + return results + + async def inject_plugins(self, config: AdapterConfig, plugins) -> None: + """Legacy hook — kept for backwards compatibility during migration. + + Default: drive the new per-runtime adaptor pipeline. Adapters not yet + migrated may still override this with their own logic. + """ + await self.install_plugins_via_registry(config, plugins) + + async def _common_setup(self, config: AdapterConfig) -> SetupResult: + """Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt. + + All adapters can call this to get the full platform feature set. + Returns a SetupResult with LangChain BaseTool instances that adapters + convert to their native format if needed. + """ + from plugins import load_plugins + from skill_loader.loader import load_skills + from coordinator import get_children, get_parent_context, build_children_description + from prompt import build_system_prompt, get_peer_capabilities + from builtin_tools.approval import request_approval + from builtin_tools.delegation import delegate_to_workspace, check_delegation_status + from builtin_tools.memory import commit_memory, search_memory + from builtin_tools.sandbox import run_code + + platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080") + + # Load plugins from per-workspace dir first, then shared fallback + workspace_plugins_dir = os.path.join(config.config_path, "plugins") + plugins = load_plugins( + workspace_plugins_dir=workspace_plugins_dir, + shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"), + ) + await self.inject_plugins(config, plugins) + if plugins.plugin_names: + logger.info(f"Plugins: {', '.join(plugins.plugin_names)}") + + # Load skills (workspace + plugin skills, deduped) + loaded_skills = load_skills(config.config_path, config.tools) + seen_skill_ids = {s.metadata.id for s in loaded_skills} + for plugin_skills_dir in plugins.skill_dirs: + plugin_skill_names = [ + d for d in os.listdir(plugin_skills_dir) + if os.path.isdir(os.path.join(plugin_skills_dir, d)) + ] + for skill in load_skills(plugin_skills_dir, plugin_skill_names): + if skill.metadata.id not in seen_skill_ids: + loaded_skills.append(skill) + seen_skill_ids.add(skill.metadata.id) + logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}") + + # Assemble tools: 6 core + skill tools + all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code] + for skill in loaded_skills: + all_tools.extend(skill.tools) + + # Coordinator mode: detect children and add routing tool + children = await get_children() + is_coordinator = len(children) > 0 + if is_coordinator: + from coordinator import route_task_to_team + logger.info(f"Coordinator mode: {len(children)} children") + all_tools.append(route_task_to_team) + + # Parent context (if this is a child workspace) + parent_context = await get_parent_context() + + # Build system prompt with all context + peers = await get_peer_capabilities(platform_url, config.workspace_id) + coordinator_prompt = build_children_description(children) if is_coordinator else "" + extra_prompts = list(plugins.prompt_fragments) + if coordinator_prompt: + extra_prompts.append(coordinator_prompt) + + system_prompt = build_system_prompt( + config.config_path, config.workspace_id, loaded_skills, peers, + prompt_files=config.prompt_files, + plugin_rules=plugins.rules, + plugin_prompts=extra_prompts, + parent_context=parent_context, + ) + + return SetupResult( + system_prompt=system_prompt, + loaded_skills=loaded_skills, + langchain_tools=all_tools, + is_coordinator=is_coordinator, + children=children, + ) + + @abstractmethod + async def setup(self, config: AdapterConfig) -> None: + """One-time setup: validate config, prepare internal state. + Called after deps are installed but before create_executor(). + Raise RuntimeError if setup fails (missing deps, bad config, etc.).""" + ... # pragma: no cover + + @abstractmethod + async def create_executor(self, config: AdapterConfig) -> AgentExecutor: + """Create and return an AgentExecutor ready for A2A integration. + The returned executor's execute() method will be called by the + A2A server's DefaultRequestHandler.""" + ... # pragma: no cover diff --git a/workspace-template/adapters/__init__.py b/workspace-template/adapters/__init__.py index 79f7ff61..0f98560c 100644 --- a/workspace-template/adapters/__init__.py +++ b/workspace-template/adapters/__init__.py @@ -1,58 +1,22 @@ -"""Adapter registry — discovers and loads agent infrastructure adapters.""" +"""Adapter registry shim. +Adapters extracted to standalone repos (molecule-ai-workspace-template-*). +ADAPTER_MODULE env var is the primary discovery mechanism in production. +This shim provides backward-compatible imports for local dev + tests. +""" import importlib +import os import logging -from pathlib import Path -from .base import BaseAdapter, AdapterConfig +from adapter_base import BaseAdapter, AdapterConfig logger = logging.getLogger(__name__) -_ADAPTER_CACHE: dict[str, type[BaseAdapter]] = {} - - -def discover_adapters() -> dict[str, type[BaseAdapter]]: - """Scan subdirectories for adapter modules. Each must export an Adapter class.""" - if _ADAPTER_CACHE: - return _ADAPTER_CACHE - - adapters_dir = Path(__file__).parent - for entry in sorted(adapters_dir.iterdir()): - if not entry.is_dir() or entry.name.startswith("_"): - continue - try: - mod = importlib.import_module(f"adapters.{entry.name}") - adapter_cls = getattr(mod, "Adapter", None) - if adapter_cls and issubclass(adapter_cls, BaseAdapter): - _ADAPTER_CACHE[adapter_cls.name()] = adapter_cls - logger.debug(f"Loaded adapter: {adapter_cls.name()} ({adapter_cls.display_name()})") - except Exception as e: - # Log but don't crash — adapter may have uninstalled deps - logger.debug(f"Skipped adapter {entry.name}: {e}") - - return _ADAPTER_CACHE - - def get_adapter(runtime: str) -> type[BaseAdapter]: - """Get adapter class by runtime name. Raises KeyError if not found.""" - adapters = discover_adapters() - if runtime not in adapters: - available = ", ".join(sorted(adapters.keys())) - raise KeyError(f"Unknown runtime '{runtime}'. Available: {available}") - return adapters[runtime] - - -def list_adapters() -> list[dict]: - """Return metadata for all discovered adapters (for API/UI).""" - adapters = discover_adapters() - return [ - { - "name": cls.name(), - "display_name": cls.display_name(), - "description": cls.description(), - "config_schema": cls.get_config_schema(), - } - for cls in adapters.values() - ] - - -__all__ = ["BaseAdapter", "AdapterConfig", "get_adapter", "list_adapters", "discover_adapters"] + adapter_module = os.environ.get("ADAPTER_MODULE") + if adapter_module: + mod = importlib.import_module(adapter_module) + return getattr(mod, "Adapter") + raise KeyError( + f"No ADAPTER_MODULE set for runtime '{runtime}'. " + "Adapters now live in standalone template repos." + ) diff --git a/workspace-template/adapters/autogen/__init__.py b/workspace-template/adapters/autogen/__init__.py deleted file mode 100644 index 6fcd4e03..00000000 --- a/workspace-template/adapters/autogen/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import AutoGenAdapter - -Adapter = AutoGenAdapter diff --git a/workspace-template/adapters/autogen/adapter.py b/workspace-template/adapters/autogen/adapter.py deleted file mode 100644 index fd0ac230..00000000 --- a/workspace-template/adapters/autogen/adapter.py +++ /dev/null @@ -1,159 +0,0 @@ -"""AutoGen adapter — Microsoft's multi-agent framework with full platform integration. - -Uses AutoGen's AssistantAgent with OpenAIChatCompletionClient, -includes all platform tools (delegation, memory, sandbox, approval), skills, and coordinator support. - -Requires: pip install autogen-agentchat autogen-ext[openai] -""" - -import json -import logging - -from adapters.base import BaseAdapter, AdapterConfig -from adapters.shared_runtime import ( - build_task_text, - brief_task, - extract_history, - extract_message_text, - set_current_task, -) -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - - -def _langchain_to_autogen(lc_tool): - """Wrap a LangChain BaseTool as an AutoGen FunctionTool. - - AutoGen requires typed function signatures (no **kwargs). - LangChain tools accept a single string or dict input via ainvoke. - We bridge them with a single `input: str` parameter. - """ - from autogen_core.tools import FunctionTool - - async def _invoke(input: str) -> str: # noqa: A002 - # Try to parse as JSON dict for tools expecting structured input - try: - parsed = json.loads(input) - if isinstance(parsed, dict): - result = await lc_tool.ainvoke(parsed) - return str(result) - except (json.JSONDecodeError, TypeError): - pass - result = await lc_tool.ainvoke(input) - return str(result) - - return FunctionTool( - _invoke, - name=lc_tool.name, - description=lc_tool.description or lc_tool.name, - ) - - -class AutoGenAdapter(BaseAdapter): - - def __init__(self): - self.system_prompt = None - self.autogen_tools = [] - - @staticmethod - def name() -> str: - return "autogen" - - @staticmethod - def display_name() -> str: - return "AutoGen" - - @staticmethod - def description() -> str: - return "Microsoft AutoGen — conversable agents with tool use and multi-agent orchestration" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": {"type": "string", "description": "OpenAI model (e.g. openai:gpt-4.1-mini)"}, - "skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"}, - "tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools"}, - } - - async def setup(self, config: AdapterConfig) -> None: - try: - from autogen_agentchat.agents import AssistantAgent # noqa: F401 - logger.info("AutoGen AgentChat loaded") - except ImportError: - raise RuntimeError("autogen-agentchat not installed.") - - result = await self._common_setup(config) - self.system_prompt = result.system_prompt - self.autogen_tools = [_langchain_to_autogen(t) for t in result.langchain_tools] - logger.info(f"AutoGen tools: {[t.name for t in self.autogen_tools]}") - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - return AutoGenA2AExecutor( - model=config.model, - system_prompt=self.system_prompt, - autogen_tools=self.autogen_tools, - heartbeat=config.heartbeat, - ) - - -class AutoGenA2AExecutor(AgentExecutor): - """Wraps AutoGen's AssistantAgent with full platform tools.""" - - def __init__(self, model: str, system_prompt: str | None, autogen_tools: list, heartbeat=None): - self.model = model - self.system_prompt = system_prompt - self.autogen_tools = autogen_tools - self._heartbeat = heartbeat - - async def execute(self, context, event_queue): - from a2a.utils import new_agent_text_message - - user_message = extract_message_text(context) - - if not user_message: - await event_queue.enqueue_event(new_agent_text_message("No message provided")) - return - - await set_current_task(self._heartbeat, brief_task(user_message)) - - try: - from autogen_agentchat.agents import AssistantAgent - from autogen_ext.models.openai import OpenAIChatCompletionClient - - model_str = self.model - if ":" in model_str: - _, model_name = model_str.split(":", 1) - else: - model_name = model_str - - task_text = build_task_text(user_message, extract_history(context)) - - client = OpenAIChatCompletionClient(model=model_name) - agent = AssistantAgent( - name="agent", - model_client=client, - system_message=self.system_prompt or "You are a helpful assistant.", - tools=self.autogen_tools, - ) - - result = await agent.run(task=task_text) - - reply = "" - if hasattr(result, "messages") and result.messages: - for msg in reversed(result.messages): - if hasattr(msg, "content") and isinstance(msg.content, str): - reply = msg.content - break - if not reply: - reply = str(result) - - except Exception as e: - reply = f"AutoGen error: {e}" - finally: - await set_current_task(self._heartbeat, "") - - await event_queue.enqueue_event(new_agent_text_message(reply)) - - async def cancel(self, context, event_queue): # pragma: no cover - pass diff --git a/workspace-template/adapters/base.py b/workspace-template/adapters/base.py index a1820e74..02fc959f 100644 --- a/workspace-template/adapters/base.py +++ b/workspace-template/adapters/base.py @@ -1,309 +1,2 @@ -"""Base adapter interface for agent infrastructure providers.""" - -import logging -import os -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Any - -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - - -@dataclass -class SetupResult: - """Result from the shared _common_setup() pipeline.""" - system_prompt: str - loaded_skills: list # LoadedSkill instances - langchain_tools: list # LangChain BaseTool instances - is_coordinator: bool - children: list # child workspace dicts - - -@dataclass -class AdapterConfig: - """Standardized config passed to every adapter.""" - model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash" - system_prompt: str | None = None # Assembled system prompt text - tools: list[str] = field(default_factory=list) # Tool names from config.yaml - runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block - config_path: str = "/configs" # Path to configs directory - workspace_id: str = "" # Workspace identifier - prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names - a2a_port: int = 8000 # Port for A2A server - heartbeat: Any = None # HeartbeatLoop instance - - -class BaseAdapter(ABC): - """Interface every agent infrastructure adapter must implement. - - To add a new agent infra: - 1. Create workspace-template/adapters// - 2. Implement adapter.py with a class extending BaseAdapter - 3. Add requirements.txt with your infra's dependencies - 4. Export as Adapter in __init__.py - 5. Submit a PR - """ - - @staticmethod - @abstractmethod - def name() -> str: # pragma: no cover - """Return the runtime identifier (e.g. 'langgraph', 'crewai'). - This must match the 'runtime' field in config.yaml.""" - ... - - @staticmethod - @abstractmethod - def display_name() -> str: # pragma: no cover - """Human-readable name for UI display.""" - ... - - @staticmethod - @abstractmethod - def description() -> str: # pragma: no cover - """Short description of what this adapter provides.""" - ... - - @staticmethod - def get_config_schema() -> dict: - """Return JSON Schema for runtime_config fields this adapter supports. - Used by the Config tab UI to render the right form fields. - Override in subclasses for adapter-specific settings.""" - return {} - - # ------------------------------------------------------------------ - # Plugin install hooks - # ------------------------------------------------------------------ - # New pipeline: each plugin ships per-runtime adaptors resolved via - # `plugins_registry.resolve()`. Adapters expose hooks below that - # adaptors call to wire plugin content into the runtime. - # - # Default implementations are filesystem-only (write to /configs, - # append to CLAUDE.md). Runtimes with a dynamic tool registry - # (e.g. DeepAgents sub-agents) override the hooks to also register - # in-process state. - - def memory_filename(self) -> str: - """File under /configs that the runtime treats as long-lived memory. - - Both Claude Code and DeepAgents read CLAUDE.md natively, so this is - the sensible default. Override only if a runtime expects a different - filename. - """ - return "CLAUDE.md" - - def register_tool_hook(self, name: str, fn) -> None: - """Default no-op. Override on runtimes with a dynamic tool registry. - - Runtimes that pick tools up at startup via filesystem scan (Claude - Code reads /configs/skills, LangGraph globs **/*.py) don't need to - do anything here — the adaptor's file-write step is enough. - """ - return None - - async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict: - """Return live transcript entries for the most-recent agent session. - - Default implementation returns ``supported: False`` for runtimes - that don't expose a per-session log on disk. Override in subclasses - that DO (Claude Code reads ``~/.claude/projects//.jsonl``). - - This is the "look over the agent's shoulder" feature — lets canvas / - operators see live tool calls + AI thinking instead of waiting for - the high-level activity log to flush. - - Args: - since: line offset to skip — caller's last cursor (0 = from start) - limit: max lines to return (caller-side cap, default 100, max 1000) - - Returns: - ``{runtime, supported, lines, cursor, more, source}`` where - ``cursor`` is the new offset to pass on the next poll, ``more`` - is True if additional lines remain past ``limit``, and ``source`` - is the file path lines were read from (useful for debugging). - """ - return { - "runtime": self.name(), - "supported": False, - "lines": [], - "cursor": since, - "more": False, - "source": None, - } - - def register_subagent_hook(self, name: str, spec: dict) -> None: - """Default no-op. DeepAgents overrides to register a sub-agent.""" - return None - - def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None: - """Append text to /configs/ if the marker isn't already present. - - Idempotent: looks for the first line of `content` as a marker so a - re-install doesn't duplicate the block. Adaptors should pass content - beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``). - """ - import os - target = os.path.join(config.config_path, filename) - marker = content.splitlines()[0].strip() if content else "" - existing = "" - if os.path.exists(target): - with open(target) as f: - existing = f.read() - if marker and marker in existing: - logger.info("append_to_memory: %s already contains %r — skipping", filename, marker) - return - os.makedirs(os.path.dirname(target) or ".", exist_ok=True) - with open(target, "a") as f: - if existing and not existing.endswith("\n"): - f.write("\n") - f.write(content if content.endswith("\n") else content + "\n") - logger.info("append_to_memory: appended %d chars to %s", len(content), filename) - - async def install_plugins_via_registry( - self, - config: AdapterConfig, - plugins, - ) -> list: - """Drive the new per-runtime adaptor pipeline for every loaded plugin. - - For each plugin in `plugins.plugins`, resolve the adaptor for this - runtime (via :func:`plugins_registry.resolve`) and invoke - ``install(ctx)``. Returns the list of :class:`InstallResult` so - callers can surface warnings (e.g. raw-drop fallback hits). - - Adapters whose runtime supports the new pipeline call this from - ``setup()`` instead of the legacy ``inject_plugins()``. - """ - from pathlib import Path - from plugins_registry import InstallContext, resolve - - results = [] - runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code" - - for plugin in plugins.plugins: - adaptor, source = resolve(plugin.name, runtime, Path(plugin.path)) - ctx = InstallContext( - configs_dir=Path(config.config_path), - workspace_id=config.workspace_id, - runtime=runtime, - plugin_root=Path(plugin.path), - memory_filename=self.memory_filename(), - register_tool=self.register_tool_hook, - register_subagent=self.register_subagent_hook, - append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c), - ) - try: - result = await adaptor.install(ctx) - results.append(result) - logger.info( - "Plugin %s installed via %s adaptor (warnings: %d)", - plugin.name, source, len(result.warnings), - ) - except Exception as exc: - logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc) - - return results - - async def inject_plugins(self, config: AdapterConfig, plugins) -> None: - """Legacy hook — kept for backwards compatibility during migration. - - Default: drive the new per-runtime adaptor pipeline. Adapters not yet - migrated may still override this with their own logic. - """ - await self.install_plugins_via_registry(config, plugins) - - async def _common_setup(self, config: AdapterConfig) -> SetupResult: - """Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt. - - All adapters can call this to get the full platform feature set. - Returns a SetupResult with LangChain BaseTool instances that adapters - convert to their native format if needed. - """ - from plugins import load_plugins - from skill_loader.loader import load_skills - from coordinator import get_children, get_parent_context, build_children_description - from prompt import build_system_prompt, get_peer_capabilities - from builtin_tools.approval import request_approval - from builtin_tools.delegation import delegate_to_workspace, check_delegation_status - from builtin_tools.memory import commit_memory, search_memory - from builtin_tools.sandbox import run_code - - platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080") - - # Load plugins from per-workspace dir first, then shared fallback - workspace_plugins_dir = os.path.join(config.config_path, "plugins") - plugins = load_plugins( - workspace_plugins_dir=workspace_plugins_dir, - shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"), - ) - await self.inject_plugins(config, plugins) - if plugins.plugin_names: - logger.info(f"Plugins: {', '.join(plugins.plugin_names)}") - - # Load skills (workspace + plugin skills, deduped) - loaded_skills = load_skills(config.config_path, config.tools) - seen_skill_ids = {s.metadata.id for s in loaded_skills} - for plugin_skills_dir in plugins.skill_dirs: - plugin_skill_names = [ - d for d in os.listdir(plugin_skills_dir) - if os.path.isdir(os.path.join(plugin_skills_dir, d)) - ] - for skill in load_skills(plugin_skills_dir, plugin_skill_names): - if skill.metadata.id not in seen_skill_ids: - loaded_skills.append(skill) - seen_skill_ids.add(skill.metadata.id) - logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}") - - # Assemble tools: 6 core + skill tools - all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code] - for skill in loaded_skills: - all_tools.extend(skill.tools) - - # Coordinator mode: detect children and add routing tool - children = await get_children() - is_coordinator = len(children) > 0 - if is_coordinator: - from coordinator import route_task_to_team - logger.info(f"Coordinator mode: {len(children)} children") - all_tools.append(route_task_to_team) - - # Parent context (if this is a child workspace) - parent_context = await get_parent_context() - - # Build system prompt with all context - peers = await get_peer_capabilities(platform_url, config.workspace_id) - coordinator_prompt = build_children_description(children) if is_coordinator else "" - extra_prompts = list(plugins.prompt_fragments) - if coordinator_prompt: - extra_prompts.append(coordinator_prompt) - - system_prompt = build_system_prompt( - config.config_path, config.workspace_id, loaded_skills, peers, - prompt_files=config.prompt_files, - plugin_rules=plugins.rules, - plugin_prompts=extra_prompts, - parent_context=parent_context, - ) - - return SetupResult( - system_prompt=system_prompt, - loaded_skills=loaded_skills, - langchain_tools=all_tools, - is_coordinator=is_coordinator, - children=children, - ) - - @abstractmethod - async def setup(self, config: AdapterConfig) -> None: - """One-time setup: validate config, prepare internal state. - Called after deps are installed but before create_executor(). - Raise RuntimeError if setup fails (missing deps, bad config, etc.).""" - ... # pragma: no cover - - @abstractmethod - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - """Create and return an AgentExecutor ready for A2A integration. - The returned executor's execute() method will be called by the - A2A server's DefaultRequestHandler.""" - ... # pragma: no cover +"""Re-export from adapter_base for backward compat.""" +from adapter_base import * # noqa: F401,F403 diff --git a/workspace-template/adapters/claude_code/__init__.py b/workspace-template/adapters/claude_code/__init__.py deleted file mode 100644 index c97b1753..00000000 --- a/workspace-template/adapters/claude_code/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import ClaudeCodeAdapter - -Adapter = ClaudeCodeAdapter diff --git a/workspace-template/adapters/claude_code/adapter.py b/workspace-template/adapters/claude_code/adapter.py deleted file mode 100644 index 96ecd050..00000000 --- a/workspace-template/adapters/claude_code/adapter.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Claude Code adapter — wraps the Claude Code CLI as an agent runtime.""" - -import json -import os -import logging -from pathlib import Path - -from adapters.base import BaseAdapter, AdapterConfig -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - -# Cap one transcript response at 1000 lines so a paranoid client can't OOM -# the workspace by polling /transcript?limit=999999. -_TRANSCRIPT_MAX_LIMIT = 1000 - - -class ClaudeCodeAdapter(BaseAdapter): - - @staticmethod - def name() -> str: - return "claude-code" - - @staticmethod - def display_name() -> str: - return "Claude Code" - - @staticmethod - def description() -> str: - return "Claude Code CLI — full agentic coding with hooks, CLAUDE.md, auto-memory, and MCP support" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": {"type": "string", "description": "Claude model (e.g. sonnet, opus, haiku)", "default": "sonnet"}, - "required_env": {"type": "array", "description": "Required env vars", "default": ["CLAUDE_CODE_OAUTH_TOKEN"]}, - "timeout": {"type": "integer", "description": "Timeout in seconds (0 = no timeout)", "default": 0}, - } - - async def setup(self, config: AdapterConfig) -> None: - """Install plugins via the per-runtime adaptor registry. - - The legacy claude-code-specific ``inject_plugins()`` override is gone: - each plugin now ships (or has registered in the platform registry) a - per-runtime adaptor, and ``BaseAdapter.install_plugins_via_registry`` - routes installs through it. The Claude Code SDK still reads - ``CLAUDE.md`` and ``/configs/skills/`` natively, and the default - :class:`AgentskillsAdaptor` writes to both. - """ - from plugins import load_plugins - workspace_plugins_dir = os.path.join(config.config_path, "plugins") - plugins = load_plugins( - workspace_plugins_dir=workspace_plugins_dir, - shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"), - ) - await self.install_plugins_via_registry(config, plugins) - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - from claude_sdk_executor import ClaudeSDKExecutor - - # Load system prompt if exists - system_prompt = config.system_prompt - if not system_prompt: - prompt_file = os.path.join(config.config_path, "system-prompt.md") - if os.path.exists(prompt_file): - with open(prompt_file) as f: - system_prompt = f.read() - - # runtime_config may arrive as a dict (from main.py vars(...)) or as a - # RuntimeConfig dataclass. Read `model` defensively from either shape. - rc = config.runtime_config - if isinstance(rc, dict): - model = rc.get("model") or "sonnet" - else: - model = getattr(rc, "model", None) or "sonnet" - - return ClaudeSDKExecutor( - system_prompt=system_prompt, - config_path=config.config_path, - heartbeat=config.heartbeat, - model=model, - ) - - async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict: - """Read the live Claude Code session transcript. - - Claude Code writes every session to - ``$HOME/.claude/projects//.jsonl`` — - every line is a JSON event (user/assistant/tool_use/attachment/etc). - We pick the most-recently-modified .jsonl in the projects dir for - the agent's working directory, then return ``[since:since+limit]``. - - Returns ``supported: True`` even if no .jsonl exists yet (empty - ``lines`` + ``cursor=0``) so the canvas can show "agent hasn't - produced output yet" instead of "feature unavailable". - """ - limit = max(1, min(limit, _TRANSCRIPT_MAX_LIMIT)) - since = max(0, since) - - # Resolve the projects-dir name. Claude Code maps cwd → dirname by - # replacing "/" with "-" (so "/configs" → "-configs"). The exact - # rule lives inside the CLI binary, but the leading-dash + path- - # without-trailing-slash pattern is stable across versions. - # - # Match ClaudeSDKExecutor._resolve_cwd: prefer /workspace if populated, - # else /configs. Override via CLAUDE_PROJECT_CWD for tests. - WORKSPACE_MOUNT = "/workspace" - CONFIG_MOUNT = "/configs" - cwd_override = os.environ.get("CLAUDE_PROJECT_CWD") - if cwd_override: - cwd = cwd_override - elif os.path.isdir(WORKSPACE_MOUNT) and os.listdir(WORKSPACE_MOUNT): - cwd = WORKSPACE_MOUNT - else: - cwd = CONFIG_MOUNT - - # Normalize: strip trailing slash, replace path separators with "-" - cwd_norm = cwd.rstrip("/") or "/" - projdir_name = cwd_norm.replace("/", "-") # "/configs" → "-configs" - - home = Path(os.environ.get("HOME", "/home/agent")) - projdir = home / ".claude" / "projects" / projdir_name - result_base = { - "runtime": self.name(), - "supported": True, - "lines": [], - "cursor": since, - "more": False, - "source": str(projdir), - } - - if not projdir.is_dir(): - return result_base - - # Pick most-recently-modified .jsonl - candidates = sorted(projdir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True) - if not candidates: - return result_base - target = candidates[0] - result_base["source"] = str(target) - - lines = [] - more = False - try: - with target.open("r") as f: - for i, raw in enumerate(f): - if i < since: - continue - if len(lines) >= limit: - more = True - break - raw = raw.strip() - if not raw: - continue - try: - lines.append(json.loads(raw)) - except json.JSONDecodeError: - # Skip malformed lines but keep cursor advancing - lines.append({"_parse_error": True, "_raw": raw[:200]}) - except OSError as exc: - logger.warning("transcript_lines: read failed for %s: %s", target, exc) - return result_base - - result_base["lines"] = lines - result_base["cursor"] = since + len(lines) - result_base["more"] = more - return result_base diff --git a/workspace-template/adapters/crewai/__init__.py b/workspace-template/adapters/crewai/__init__.py deleted file mode 100644 index 8f464b9b..00000000 --- a/workspace-template/adapters/crewai/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import CrewAIAdapter - -Adapter = CrewAIAdapter diff --git a/workspace-template/adapters/crewai/adapter.py b/workspace-template/adapters/crewai/adapter.py deleted file mode 100644 index 4d0cb5e3..00000000 --- a/workspace-template/adapters/crewai/adapter.py +++ /dev/null @@ -1,144 +0,0 @@ -"""CrewAI adapter — role-based multi-agent framework with full platform integration. - -Creates a CrewAI Agent + Task + Crew with all platform tools (delegation, memory, -sandbox, approval), skills, plugins, and coordinator support. - -Requires: pip install crewai -""" - -import asyncio -import logging - -from adapters.base import BaseAdapter, AdapterConfig -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - - -def _langchain_to_crewai(lc_tool): - """Wrap a LangChain BaseTool as a sync CrewAI @tool. - - CrewAI's @tool decorator requires the function to have a docstring - at decoration time, so we set __doc__ before applying the decorator. - """ - from crewai.tools import tool as crewai_tool - - def wrapper(**kwargs) -> str: - """Placeholder.""" - result = asyncio.get_event_loop().run_until_complete(lc_tool.ainvoke(kwargs)) - return str(result) - - wrapper.__name__ = lc_tool.name - wrapper.__doc__ = lc_tool.description or f"Tool: {lc_tool.name}" - return crewai_tool(lc_tool.name)(wrapper) - - -class CrewAIAdapter(BaseAdapter): - - def __init__(self): - self.system_prompt = None - self.crewai_tools = [] - - @staticmethod - def name() -> str: - return "crewai" - - @staticmethod - def display_name() -> str: - return "CrewAI" - - @staticmethod - def description() -> str: - return "CrewAI — role-based agent with task delegation and crew orchestration" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": {"type": "string", "description": "LLM model (e.g. openai:gpt-4.1-mini)"}, - "skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"}, - "tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools"}, - } - - async def setup(self, config: AdapterConfig) -> None: - try: - import crewai # noqa: F401 - logger.info(f"CrewAI version: {crewai.__version__}") - except ImportError: - raise RuntimeError("crewai not installed.") - - result = await self._common_setup(config) - self.system_prompt = result.system_prompt - self.crewai_tools = [_langchain_to_crewai(t) for t in result.langchain_tools] - logger.info(f"CrewAI tools: {[t.name for t in result.langchain_tools]}") - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - return CrewAIA2AExecutor( - model=config.model, - system_prompt=self.system_prompt, - crewai_tools=self.crewai_tools, - heartbeat=config.heartbeat, - ) - - -class CrewAIA2AExecutor(AgentExecutor): - """Wraps CrewAI's Agent + Crew.kickoff() with full platform tools.""" - - def __init__(self, model: str, system_prompt: str | None, crewai_tools: list, heartbeat=None): - self.model = model - self.system_prompt = system_prompt - self.crewai_tools = crewai_tools - self._heartbeat = heartbeat - - async def execute(self, context, event_queue): - from a2a.utils import new_agent_text_message - from adapters.shared_runtime import extract_history, build_task_text, brief_task, set_current_task - - from adapters.shared_runtime import extract_message_text - user_message = extract_message_text(context) - - if not user_message: - await event_queue.enqueue_event(new_agent_text_message("No message provided")) - return - - await set_current_task(self._heartbeat, brief_task(user_message)) - - try: - from crewai import Agent, Task, Crew - - model_str = self.model - if model_str.startswith("openai:"): - model_str = model_str.replace("openai:", "openai/") - - backstory = self.system_prompt or "You are a helpful AI agent." - - history = extract_history(context) - task_desc = build_task_text(user_message, history) - - agent = Agent( - role=backstory.split("\n")[0][:100], - goal="Help the user and coordinate with peer agents when needed", - backstory=backstory, - llm=model_str, - tools=self.crewai_tools, - verbose=False, - ) - - task = Task( - description=task_desc, - expected_output="A helpful response", - agent=agent, - ) - - crew = Crew(agents=[agent], tasks=[task], verbose=False) - result = await asyncio.to_thread(crew.kickoff) - reply = str(result) - - except Exception as e: - reply = f"CrewAI error: {e}" - finally: - await set_current_task(self._heartbeat, "") - - await event_queue.enqueue_event(new_agent_text_message(reply)) - - async def cancel(self, context, event_queue): # pragma: no cover - pass diff --git a/workspace-template/adapters/deepagents/__init__.py b/workspace-template/adapters/deepagents/__init__.py deleted file mode 100644 index ccbf1896..00000000 --- a/workspace-template/adapters/deepagents/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import DeepAgentsAdapter - -Adapter = DeepAgentsAdapter diff --git a/workspace-template/adapters/deepagents/adapter.py b/workspace-template/adapters/deepagents/adapter.py deleted file mode 100644 index 0f770dc3..00000000 --- a/workspace-template/adapters/deepagents/adapter.py +++ /dev/null @@ -1,184 +0,0 @@ -"""DeepAgents adapter — fully utilizing the DeepAgents SDK. - -Uses create_deep_agent() with: -- FilesystemBackend(/workspace) — persistent file access across messages -- MemorySaver checkpointer — session continuity -- Memory files — CLAUDE.md loaded natively -- Filesystem permissions — restrict writes to /workspace and /configs -- InMemoryCache — avoid repeat API calls -- All built-in tools: write_todos, read_file, write_file, edit_file, - ls, glob, grep, execute, task - -Supports: anthropic, openai, openrouter, groq, cerebras, google_genai, ollama. -""" - -import os -import glob as globmod -import logging - -from adapters.base import BaseAdapter, AdapterConfig -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - - -class DeepAgentsAdapter(BaseAdapter): - - def __init__(self): - self.agent = None - self._checkpointer = None - - @staticmethod - def name() -> str: - return "deepagents" - - @staticmethod - def display_name() -> str: - return "DeepAgents" - - @staticmethod - def description() -> str: - return "LangChain DeepAgents — planning, filesystem, sub-agents, shell execution, session persistence" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": { - "type": "string", - "description": "provider:model (e.g. google_genai:gemini-2.5-flash, groq:llama-3.3-70b-versatile)", - "default": "google_genai:gemini-2.5-flash", - }, - "skills": {"type": "array", "items": {"type": "string"}}, - "tools": {"type": "array", "items": {"type": "string"}}, - } - - def _create_llm(self, model_str: str): - """Create a LangChain LLM from a provider:model string.""" - if ":" in model_str: - provider, model_name = model_str.split(":", 1) - else: - provider, model_name = "anthropic", model_str - - if provider == "openai": - from langchain_openai import ChatOpenAI - kwargs = {"model": model_name} - base_url = os.environ.get("OPENAI_BASE_URL", "") - if base_url: - kwargs["openai_api_base"] = base_url - return ChatOpenAI(**kwargs) - elif provider == "openrouter": - from langchain_openai import ChatOpenAI - return ChatOpenAI( - model=model_name, - openai_api_key=os.environ.get("OPENROUTER_API_KEY", os.environ.get("OPENAI_API_KEY", "")), - openai_api_base="https://openrouter.ai/api/v1", - max_tokens=int(os.environ.get("MAX_TOKENS", "2048")), - ) - elif provider == "groq": - from langchain_openai import ChatOpenAI - return ChatOpenAI( - model=model_name, - openai_api_key=os.environ.get("GROQ_API_KEY", ""), - openai_api_base="https://api.groq.com/openai/v1", - ) - elif provider == "cerebras": - from langchain_openai import ChatOpenAI - return ChatOpenAI( - model=model_name, - openai_api_key=os.environ.get("CEREBRAS_API_KEY", ""), - openai_api_base="https://api.cerebras.ai/v1", - ) - elif provider == "qianfan": - from langchain_openai import ChatOpenAI - return ChatOpenAI( - model=model_name, - openai_api_key=os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", "")), - openai_api_base="https://qianfan.baidubce.com/v2", - ) - elif provider == "anthropic": - from langchain_anthropic import ChatAnthropic - kwargs = {"model": model_name} - base_url = os.environ.get("ANTHROPIC_BASE_URL", "") - if base_url: - kwargs["anthropic_api_url"] = base_url - return ChatAnthropic(**kwargs) - elif provider == "google_genai": - from langchain_google_genai import ChatGoogleGenerativeAI - return ChatGoogleGenerativeAI(model=model_name) - elif provider == "ollama": - from langchain_ollama import ChatOllama - return ChatOllama(model=model_name) - else: - raise ValueError(f"Unsupported model provider: {provider}") - - async def setup(self, config: AdapterConfig) -> None: - try: - from deepagents import create_deep_agent, FilesystemPermission - from deepagents.backends import FilesystemBackend - from langgraph.checkpoint.memory import MemorySaver - from langchain_core.caches import InMemoryCache - except ImportError as e: - raise RuntimeError(f"deepagents not installed: {e}") - - result = await self._common_setup(config) - logger.info("DeepAgents platform tools: %s", [t.name for t in result.langchain_tools]) - - llm = self._create_llm(config.model) - - # FilesystemBackend — persistent file access - workspace_dir = "/workspace" if os.path.isdir("/workspace") else "/configs" - # virtual_mode=False: read/write the real bind-mounted filesystem so - # read_file/ls/write_file/edit_file match what `bash` sees. With - # virtual_mode=True agents operate on an in-memory snapshot and - # report real files as "missing" (and writes don't persist across - # restarts). Permissions below still scope access to /workspace + /configs. - backend = FilesystemBackend(root_dir=workspace_dir, virtual_mode=False) - - # MemorySaver — session continuity - self._checkpointer = MemorySaver() - - # Memory — load CLAUDE.md natively - memory_files = [] - claude_md = os.path.join(config.config_path, "CLAUDE.md") - if os.path.exists(claude_md): - memory_files.append(claude_md) - - # Filesystem permissions - permissions = [ - FilesystemPermission(operations=["read", "write"], paths=["/workspace/**"], mode="allow"), - FilesystemPermission(operations=["read", "write"], paths=["/configs/**"], mode="allow"), - ] - - # Native skills from /configs/skills/*.py - deepagent_skills = [] - skills_dir = os.path.join(config.config_path, "skills") - if os.path.isdir(skills_dir): - deepagent_skills = globmod.glob(os.path.join(skills_dir, "**", "*.py"), recursive=True) - - # LLM cache - cache = InMemoryCache() - - self.agent = create_deep_agent( - model=llm, - tools=result.langchain_tools, - system_prompt=result.system_prompt, - backend=backend, - checkpointer=self._checkpointer, - memory=memory_files if memory_files else None, - permissions=permissions, - skills=deepagent_skills if deepagent_skills else None, - cache=cache, - ) - - logger.info( - "DeepAgents: %d tools, backend=%s, checkpointer=MemorySaver, " - "cache=InMemoryCache, memory=%d, permissions=%d, skills=%d", - len(result.langchain_tools), type(backend).__name__, - len(memory_files), len(permissions), len(deepagent_skills), - ) - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - if self.agent is None: - raise RuntimeError("setup() must be called before create_executor()") - from a2a_executor import LangGraphA2AExecutor - return LangGraphA2AExecutor(self.agent, heartbeat=config.heartbeat, model=config.model) diff --git a/workspace-template/adapters/gemini_cli/__init__.py b/workspace-template/adapters/gemini_cli/__init__.py deleted file mode 100644 index 3e6ad4c5..00000000 --- a/workspace-template/adapters/gemini_cli/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import GeminiCLIAdapter as Adapter - -__all__ = ["Adapter"] diff --git a/workspace-template/adapters/gemini_cli/adapter.py b/workspace-template/adapters/gemini_cli/adapter.py deleted file mode 100644 index 7013c275..00000000 --- a/workspace-template/adapters/gemini_cli/adapter.py +++ /dev/null @@ -1,141 +0,0 @@ -"""Gemini CLI adapter — wraps Google's Gemini CLI as an agent runtime. - -Gemini CLI (github.com/google-gemini/gemini-cli, ~101k stars, Apache 2.0) -is structurally identical to the Claude Code adapter: a single-agent agentic -CLI with file/shell tools, MCP support, and a ReAct loop — backed by Gemini -instead of Claude. - -Key differences from claude-code: -- Auth: GEMINI_API_KEY env var (no OAuth token needed) -- Memory file: GEMINI.md (equivalent of Claude Code's CLAUDE.md) -- MCP config: ~/.gemini/settings.json (not via --mcp-config flag) -- Executor: CLIAgentExecutor (no Python SDK; uses gemini CLI subprocess) -""" - -import json -import logging -import os -import sys -from pathlib import Path - -from a2a.server.agent_execution import AgentExecutor - -from adapters.base import BaseAdapter, AdapterConfig - -logger = logging.getLogger(__name__) - - -class GeminiCLIAdapter(BaseAdapter): - - @staticmethod - def name() -> str: - return "gemini-cli" - - @staticmethod - def display_name() -> str: - return "Gemini CLI" - - @staticmethod - def description() -> str: - return ( - "Google Gemini CLI — agentic coding with file/shell tools, " - "MCP support, and a ReAct loop backed by Gemini models" - ) - - @staticmethod - def get_config_schema() -> dict: - return { - "model": { - "type": "string", - "description": "Gemini model (e.g. gemini-2.5-pro, gemini-2.5-flash)", - "default": "gemini-2.5-pro", - }, - "required_env": { - "type": "array", - "description": "Required env vars", - "default": ["GEMINI_API_KEY"], - }, - "timeout": { - "type": "integer", - "description": "Timeout in seconds (0 = no timeout)", - "default": 0, - }, - } - - def memory_filename(self) -> str: - """Gemini CLI reads GEMINI.md as its persistent context file.""" - return "GEMINI.md" - - async def setup(self, config: AdapterConfig) -> None: - """Wire MCP server into ~/.gemini/settings.json and seed GEMINI.md. - - Gemini CLI does not accept an --mcp-config flag; instead, MCP servers - are declared in ~/.gemini/settings.json under the "mcpServers" key. - This method merges the A2A MCP server into that file, preserving any - existing keys (e.g. user's own MCP tools). - - Also seeds GEMINI.md from system-prompt.md if GEMINI.md is absent, - so the agent has role context on first boot. - """ - from executor_helpers import get_mcp_server_path - - # -- MCP wiring -------------------------------------------------- - gemini_dir = Path.home() / ".gemini" - gemini_dir.mkdir(parents=True, exist_ok=True) - settings_path = gemini_dir / "settings.json" - - settings: dict = {} - if settings_path.exists(): - try: - settings = json.loads(settings_path.read_text()) - except Exception as exc: - logger.warning("gemini-cli: could not parse %s: %s", settings_path, exc) - settings = {} - - settings.setdefault("mcpServers", {}) - settings["mcpServers"]["a2a"] = { - "command": sys.executable, - "args": [get_mcp_server_path()], - } - - try: - settings_path.write_text(json.dumps(settings, indent=2)) - logger.info("gemini-cli: wrote MCP config to %s", settings_path) - except OSError as exc: - logger.warning("gemini-cli: could not write %s: %s", settings_path, exc) - - # -- GEMINI.md seed ---------------------------------------------- - gemini_md = Path(config.config_path) / "GEMINI.md" - system_prompt_file = Path(config.config_path) / "system-prompt.md" - if not gemini_md.exists() and system_prompt_file.exists(): - try: - gemini_md.write_text(system_prompt_file.read_text()) - logger.info("gemini-cli: seeded GEMINI.md from system-prompt.md") - except OSError as exc: - logger.warning("gemini-cli: could not seed GEMINI.md: %s", exc) - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - from cli_executor import CLIAgentExecutor - from config import RuntimeConfig - - rc = config.runtime_config - if isinstance(rc, dict): - model = rc.get("model") or "gemini-2.5-pro" - timeout = int(rc.get("timeout") or 0) - else: - model = getattr(rc, "model", None) or "gemini-2.5-pro" - timeout = int(getattr(rc, "timeout", None) or 0) - - runtime_config = RuntimeConfig( - model=model, - timeout=timeout, - required_env=["GEMINI_API_KEY"], - ) - - return CLIAgentExecutor( - runtime="gemini-cli", - runtime_config=runtime_config, - system_prompt=config.system_prompt, - config_path=config.config_path, - heartbeat=config.heartbeat, - ) diff --git a/workspace-template/adapters/hermes/__init__.py b/workspace-template/adapters/hermes/__init__.py deleted file mode 100644 index 37dc9a89..00000000 --- a/workspace-template/adapters/hermes/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .adapter import HermesAdapter -from .executor import create_executor - -Adapter = HermesAdapter - -__all__ = ["create_executor", "HermesAdapter", "Adapter"] diff --git a/workspace-template/adapters/hermes/adapter.py b/workspace-template/adapters/hermes/adapter.py deleted file mode 100644 index 63c7933f..00000000 --- a/workspace-template/adapters/hermes/adapter.py +++ /dev/null @@ -1,76 +0,0 @@ -"""Hermes adapter — Nous Research Hermes models via Nous Portal or OpenRouter. - -Uses the OpenAI-compatible client (openai>=1.0.0) to communicate with -either the Nous Portal directly (HERMES_API_KEY) or OpenRouter as a -fallback (OPENROUTER_API_KEY). -""" -import os - -from adapters.base import BaseAdapter, AdapterConfig - - -class HermesAdapter(BaseAdapter): - - @staticmethod - def name() -> str: - return "hermes" - - @staticmethod - def display_name() -> str: - return "Hermes (Nous Research)" - - @staticmethod - def description() -> str: - return "Hermes models via Nous Portal or OpenRouter — openai>=1.0.0 compatible client" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": { - "type": "string", - "description": ( - "Hermes model ID (e.g. nousresearch/hermes-3-llama-3.1-405b for OpenRouter " - "or hermes-3-llama-3.1-405b for Nous Portal)" - ), - }, - } - - async def setup(self, config: AdapterConfig) -> None: # pragma: no cover - try: - import openai # noqa: F401 - except ImportError as e: - raise RuntimeError( - "Hermes adapter requires openai>=1.0.0 — " - "install with: pip install 'openai>=1.0.0'" - ) from e - - async def create_executor(self, config: AdapterConfig): # pragma: no cover - """Create and return a HermesA2AExecutor using key resolution from env/config.""" - from .executor import create_executor, HermesA2AExecutor - - # Resolve API key: prefer workspace secrets (runtime_config), then env vars - hermes_api_key = config.runtime_config.get("hermes_api_key") or None - - # Phase 3 escalation ladder — read from runtime_config.escalation_ladder - # if present. The platform's org importer copies the ladder from - # org.yaml (runtime_config.escalation_ladder) into the container's - # /configs/config.yaml, and the workspace-template loader surfaces it - # here. Empty / missing = single-shot behaviour (unchanged from pre- - # Phase-3). See adapters.hermes.escalation for classification rules. - escalation_ladder = config.runtime_config.get("escalation_ladder") or None - - executor = create_executor( - hermes_api_key=hermes_api_key, - config_path=config.config_path, # Phase 2d-i: system-prompt.md injection - escalation_ladder=escalation_ladder, - ) - - # Override model from config if provided - model = config.model - if ":" in model: - _, model = model.split(":", 1) - if model: - executor.model = model - - executor._heartbeat = config.heartbeat - return executor diff --git a/workspace-template/adapters/hermes/escalation.py b/workspace-template/adapters/hermes/escalation.py deleted file mode 100644 index fb14f363..00000000 --- a/workspace-template/adapters/hermes/escalation.py +++ /dev/null @@ -1,201 +0,0 @@ -"""Hermes escalation ladder — promote to stronger models on transient failure. - -Every workspace in the Hermes adapter path has a single pinned model today -(``provider_cfg.default_model`` overridden by ``runtime_config.model`` in -``config.yaml``). That's fine when the pinned model is the best fit, but -it leaves four recurring failure classes unhandled: - -1. **Rate limits** (Claude Max saturation, Anthropic 429, OpenAI 429). We're - currently saturating 3× Claude Max subscriptions — the first 429 is now - the norm, not the exception. -2. **Transient 5xx** from any provider (overloaded 529, 500, 502, 503). -3. **Context-length exceeded** on the smaller-window model (Haiku has 200k, - cheaper Gemini flash tiers have less, OpenAI nano/mini have 128k). -4. **Refusal / empty response** from a cheaper tier that the next tier up - would handle — less common but real in practice. - -An escalation ladder is a workspace-configured list of ``LadderRung`` entries -(provider + model). On a qualifying failure, the executor advances to the -next rung and retries the same user_message + history. If the ladder is -exhausted, the last error is raised. - -## Config shape - -``config.yaml``:: - - hermes: - escalation_ladder: - - provider: gemini - model: gemini-2.5-flash # fast/cheap probe - - provider: anthropic - model: claude-haiku-4-5-20251001 - - provider: anthropic - model: claude-sonnet-4-5-20250929 - - provider: anthropic - model: claude-opus-4-1-20250805 # frontier rescue - -When ``escalation_ladder`` is absent, the executor behaves exactly as before: -one call, one model, errors bubble. - -## What this module does NOT do (yet) - -- **No uncertainty-driven escalation.** Only transient-failure escalation. - Promoting on "the answer felt thin" requires a judge pass — follow-up. -- **No streaming partial-result aggregation.** The first rung that succeeds - returns; we don't splice responses across rungs. -- **No per-workspace budget tracking.** Each escalation is one more paid - call. Follow-up work (#305 budget cap) handles that. -""" - -from __future__ import annotations - -import logging -from dataclasses import dataclass -from typing import Optional - -logger = logging.getLogger(__name__) - - -@dataclass(frozen=True) -class LadderRung: - """One rung on the escalation ladder. - - ``provider`` is a canonical short name from ``providers.PROVIDERS``. - ``model`` overrides the provider's default for this rung. - """ - - provider: str - model: str - - -def parse_ladder(raw: Optional[list]) -> list[LadderRung]: - """Parse the ``escalation_ladder`` list from ``config.yaml`` into rungs. - - Accepts either dict-shaped entries (``{"provider": ..., "model": ...}``) - or pre-built LadderRung instances (for programmatic callers). Skips - malformed entries with a warning rather than raising — a missing rung - is worse than a noisy one during boot. - - Empty / None / missing input returns an empty list (caller interprets - as "no ladder configured, single-shot dispatch"). - """ - if not raw: - return [] - rungs: list[LadderRung] = [] - for i, entry in enumerate(raw): - if isinstance(entry, LadderRung): - rungs.append(entry) - continue - if not isinstance(entry, dict): - logger.warning( - "Hermes ladder: rung %d is not a dict (%r), skipping", i, type(entry).__name__, - ) - continue - provider = entry.get("provider") - model = entry.get("model") - if not provider or not model: - logger.warning( - "Hermes ladder: rung %d missing provider or model (%r), skipping", i, entry, - ) - continue - rungs.append(LadderRung(provider=str(provider), model=str(model))) - return rungs - - -# Error-type names that indicate a transient failure worth escalating. -# We match on the class name (not the module) so this works regardless of -# whether the workspace imported the new or old anthropic / openai SDK. -# See ``should_escalate`` for the matching logic. -_ESCALATABLE_ERROR_CLASSES = frozenset({ - # openai SDK - "RateLimitError", # 429 - "APITimeoutError", # connect/read timeout - "APIConnectionError", # TCP / DNS - "InternalServerError", # 500 - # anthropic SDK - "OverloadedError", # 529 - "APIStatusError", # generic 5xx wrapper - # common across both: network-level errors - "ConnectionError", - "Timeout", - "ReadTimeout", -}) - -# Error-message substrings that indicate context-length exceeded. These map -# to distinct HTTP 400 responses from each provider rather than a typed -# exception, so we match on substring. -_CONTEXT_LENGTH_MARKERS = ( - "maximum context length", # openai - "context_length_exceeded", # openai error.code - "prompt is too long", # anthropic - "prompt_too_long", # anthropic error.code - "context window", # gemini -) - -# Error-message substrings that indicate a transient gateway issue. These -# sometimes come through as generic exceptions without typed classes. -_TRANSIENT_GATEWAY_MARKERS = ( - "502 bad gateway", - "503 service unavailable", - "504 gateway timeout", - "overloaded", - "please try again", - "temporarily unavailable", -) - -# Error-message substrings that definitively DO NOT qualify for escalation. -# Auth and malformed-payload errors don't get better by retrying on a -# different model — they indicate config / code bugs. -_NON_ESCALATABLE_MARKERS = ( - "invalid api key", - "authentication_error", - "401", - "403", - "forbidden", - "permission_denied", - "unauthorized", -) - - -def should_escalate(exc: BaseException) -> bool: - """Decide whether ``exc`` justifies moving to the next ladder rung. - - Returns True when the failure is one of: - - Rate limit (429 / RateLimitError / OverloadedError) - - Transient gateway (5xx, overload, timeout, connection reset) - - Context-length exceeded on the current model - - Returns False for auth, permission, malformed-payload, and other - config-bug classes — escalating those just wastes the next-tier quota. - """ - if exc is None: - return False - - cls_name = exc.__class__.__name__ - msg = str(exc).lower() - - # Hard reject: never escalate auth/permission errors regardless of - # what the class name says. A wrapped RateLimitError that actually - # contains "401 Unauthorized" is a config bug, not a rate limit. - for marker in _NON_ESCALATABLE_MARKERS: - if marker in msg: - return False - - if cls_name in _ESCALATABLE_ERROR_CLASSES: - return True - - for marker in _CONTEXT_LENGTH_MARKERS: - if marker in msg: - return True - - for marker in _TRANSIENT_GATEWAY_MARKERS: - if marker in msg: - return True - - # Status-code prefixes are a common tell for HTTP-wrapped provider errors. - if "429" in msg or "529" in msg: - return True - if any(code in msg for code in ("500 ", "502 ", "503 ", "504 ")): - return True - - return False diff --git a/workspace-template/adapters/hermes/executor.py b/workspace-template/adapters/hermes/executor.py deleted file mode 100644 index 98b50ecc..00000000 --- a/workspace-template/adapters/hermes/executor.py +++ /dev/null @@ -1,543 +0,0 @@ -"""Hermes adapter executor — Phase 2 multi-provider with native SDK dispatch. - -Hermes supports 15 providers via the shared ``providers.py`` registry. Each -provider's ``auth_scheme`` field controls which client + request shape the -executor uses: - -- ``auth_scheme="openai"`` (13 providers) — OpenAI-compat ``/v1/chat/completions`` - via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI, - Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral. - -- ``auth_scheme="anthropic"`` (1 provider — anthropic) — native Messages API via - the ``anthropic`` Python SDK. Phase 2a: better tool calling, vision support, - extended thinking semantics. If the ``anthropic`` package isn't installed in - the workspace image, ``_do_anthropic_native`` raises a clear error with - install instructions rather than silently falling back to the OpenAI-compat - shim (which would lose fidelity invisibly). - -- ``auth_scheme="gemini"`` (1 provider — gemini) — native ``generateContent`` API - via the official ``google-genai`` Python SDK. Phase 2b: first-class vision - content blocks, tool/function calling, system instructions, and thinking - config — all of which the OpenAI-compat shim at ``/v1beta/openai`` either - strips or mis-translates. Same fail-loud semantics as the anthropic path. - -Key resolution order (unchanged from Phase 1) ----------------------------------------------- -1. ``hermes_api_key`` parameter (explicit call-site override — routes to Nous Portal) -2. ``provider`` parameter (explicit provider name — looks up its env var(s)) -3. Auto-detect: walk ``providers.RESOLUTION_ORDER`` and pick the first provider - whose env var is set. - -Raises ``ValueError`` if nothing resolves. The error message lists every env var -that was checked so the operator knows their options without reading source. -""" - -from __future__ import annotations - -import logging -import os -from typing import Optional - -from .escalation import LadderRung, parse_ladder, should_escalate -from .providers import PROVIDERS, ProviderConfig, resolve_provider - -logger = logging.getLogger(__name__) - - -def create_executor( - hermes_api_key: Optional[str] = None, - provider: Optional[str] = None, - model: Optional[str] = None, - config_path: Optional[str] = None, - escalation_ladder: Optional[list] = None, -): - """Create and return a LangGraph-compatible executor for the Hermes adapter. - - Parameters - ---------- - hermes_api_key: - Explicit API key. When provided, the call routes to Nous Portal (the - PR 2 back-compat path) regardless of ``provider``. - provider: - Canonical provider short name from ``providers.PROVIDERS`` (e.g. - ``"openai"``, ``"anthropic"``, ``"qwen"``, ``"xai"``). When set, the - registry entry's env vars are used to find the API key and its - base URL + default model override the auto-detect path. When unset, - auto-detect walks ``providers.RESOLUTION_ORDER`` until it finds a - provider whose env var is set. - model: - Override the provider's default model. Passed straight through to - ``chat.completions.create``. - config_path: - Path to the workspace's ``/configs`` directory. Phase 2d-i reads - ``system-prompt.md`` from here on every ``execute()`` call and - passes the content as a system instruction to the native SDK. - Optional — omit to skip system-prompt injection (tests do this). - - Returns - ------- - HermesA2AExecutor - A ready-to-use executor wired with the resolved api_key + base_url - + model + config_path. - - Raises - ------ - ValueError - If ``provider`` is an unknown name, if ``provider`` is known but its - env vars are all empty, or if auto-detect finds nothing. - """ - ladder = parse_ladder(escalation_ladder) - if ladder: - logger.info( - "Hermes: escalation ladder configured — %d rungs (%s)", - len(ladder), - " → ".join(f"{r.provider}:{r.model}" for r in ladder), - ) - - # Path 1: PR 2 back-compat — explicit hermes_api_key routes to Nous Portal. - if hermes_api_key: - cfg = PROVIDERS["nous_portal"] - logger.debug("Hermes: using explicit hermes_api_key param (Nous Portal)") - return HermesA2AExecutor( - provider_cfg=cfg, - api_key=hermes_api_key, - model=model or cfg.default_model, - config_path=config_path, - escalation_ladder=ladder, - ) - - # Path 2/3: registry resolution (either explicit provider name or auto-detect). - cfg, api_key = resolve_provider(provider) - logger.info( - "Hermes: provider=%s auth_scheme=%s base_url=%s model=%s", - cfg.name, - cfg.auth_scheme, - cfg.base_url, - model or cfg.default_model, - ) - return HermesA2AExecutor( - provider_cfg=cfg, - api_key=api_key, - model=model or cfg.default_model, - config_path=config_path, - escalation_ladder=ladder, - ) - - -class HermesA2AExecutor: - """LangGraph-compatible AgentExecutor for Hermes-style multi-provider LLMs. - - Dispatches each inference call based on ``provider_cfg.auth_scheme``: - - - ``"openai"`` → OpenAI-compat ``/v1/chat/completions`` via the ``openai`` SDK - - ``"anthropic"`` → native Messages API via the ``anthropic`` SDK - - The ``execute()`` and ``cancel()`` async methods satisfy the - ``a2a.server.agent_execution.AgentExecutor`` interface so this - executor can be dropped into the A2A server's DefaultRequestHandler. - """ - - def __init__( - self, - provider_cfg: ProviderConfig, - api_key: str, - model: str, - heartbeat=None, - config_path: Optional[str] = None, - escalation_ladder: Optional[list] = None, - ): - self.provider_cfg = provider_cfg - self.api_key = api_key - self.base_url = provider_cfg.base_url - self.model = model - self._heartbeat = heartbeat - # Phase 2d-i: config_path lets execute() read /configs/system-prompt.md - # on each turn and pass it to the native SDK's `system=` / - # `system_instruction=` / prepended message. Optional because older - # callers + tests construct executors directly. - self._config_path = config_path - # Phase 3: escalation ladder. When non-empty, _do_inference retries - # transient-failure classes (rate limit, 5xx, overload, context-length) - # on each rung in turn before raising. Empty / None = single-shot, - # original behaviour. See adapters.hermes.escalation. - self._ladder: list[LadderRung] = parse_ladder(escalation_ladder) or [] - - # ------------------------------------------------------------------ - # History → provider-specific message list converters - # ------------------------------------------------------------------ - # - # The A2A shared runtime gives us history as ``list[tuple[str, str]]`` - # with roles ``"human"`` / ``"ai"``. Each provider wants a different - # shape: - # - # OpenAI-compat: [{"role":"user"|"assistant", "content": str}, ...] - # Anthropic: [{"role":"user"|"assistant", "content": str}, ...] (same) - # Gemini: [{"role":"user"|"model", "parts": [{"text": str}]}, ...] - # - # Before Phase 2c these were flattened into a single user turn via - # ``shared_runtime.build_task_text``, which worked for basic text - # handoff but lost the model's native multi-turn awareness (system - # prompts, tool-use history, role attribution for instruction - # following). Phase 2c keeps the turns as turns. - - @staticmethod - def _history_to_openai_messages( - user_message: str, - history: "list[tuple[str, str]]", - ) -> "list[dict]": - """Convert A2A history + current turn to OpenAI Chat Completions shape.""" - messages: list[dict] = [] - for role, text in history or []: - messages.append({ - "role": "user" if role == "human" else "assistant", - "content": text, - }) - messages.append({"role": "user", "content": user_message}) - return messages - - @staticmethod - def _history_to_anthropic_messages( - user_message: str, - history: "list[tuple[str, str]]", - ) -> "list[dict]": - """Convert A2A history + current turn to Anthropic Messages API shape. - - Identical wire format to OpenAI (``role`` + ``content``) for text-only - turns, so we just delegate. The difference matters for tool_use / - content blocks, which are Phase 2d territory. - """ - return HermesA2AExecutor._history_to_openai_messages(user_message, history) - - @staticmethod - def _history_to_gemini_contents( - user_message: str, - history: "list[tuple[str, str]]", - ) -> "list[dict]": - """Convert A2A history + current turn to Gemini generateContent shape. - - Gemini uses ``role: "user" | "model"`` (NOT "assistant") and wraps - text in a ``parts: [{"text": ...}]`` list. - """ - contents: list[dict] = [] - for role, text in history or []: - contents.append({ - "role": "user" if role == "human" else "model", - "parts": [{"text": text}], - }) - contents.append({"role": "user", "parts": [{"text": user_message}]}) - return contents - - # ------------------------------------------------------------------ - # Per-provider inference paths - # ------------------------------------------------------------------ - - async def _do_openai_compat( - self, - user_message: str, - history: "list[tuple[str, str]] | None" = None, - system_prompt: Optional[str] = None, - ) -> str: - """OpenAI-compat inference — used by every provider with auth_scheme='openai'. - - 13 of the 15 registered providers route here. Uses ``openai.AsyncOpenAI`` - pointed at the provider's base_url; every provider's API is wire- - compatible with the OpenAI Chat Completions shape. - - Phase 2c: accepts multi-turn history. - Phase 2d-i: accepts optional system_prompt, prepended as a - ``{"role":"system"}`` message per the OpenAI Chat Completions convention. - """ - import openai - - client = openai.AsyncOpenAI( - api_key=self.api_key, - base_url=self.base_url, - ) - messages = self._history_to_openai_messages(user_message, history or []) - if system_prompt: - messages = [{"role": "system", "content": system_prompt}, *messages] - response = await client.chat.completions.create( - model=self.model, - messages=messages, - ) - return response.choices[0].message.content or "" - - async def _do_anthropic_native( - self, - user_message: str, - history: "list[tuple[str, str]] | None" = None, - system_prompt: Optional[str] = None, - ) -> str: - """Native Anthropic Messages API inference. - - Uses the official ``anthropic`` Python SDK for correct tool-calling, - vision, and extended-thinking semantics that don't translate cleanly - through the OpenAI-compat shim. - - Phase 2a: single-turn text. - Phase 2c: multi-turn history. - Phase 2d-i: optional system_prompt passed via Anthropic's native - top-level ``system=`` parameter — NOT as a message in the messages - list (Anthropic's Messages API requires system prompts to be at the - top level, not inline like OpenAI). - """ - try: - import anthropic - except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk - raise RuntimeError( - "Hermes anthropic native path requires the `anthropic` package. " - "Install in the workspace image with `pip install anthropic>=0.39.0` " - "or set HERMES provider=openrouter to route Claude models through " - "OpenRouter's OpenAI-compat shim instead." - ) from exc - - client = anthropic.AsyncAnthropic(api_key=self.api_key) - messages = self._history_to_anthropic_messages(user_message, history or []) - create_kwargs: dict = { - "model": self.model, - "max_tokens": 4096, - "messages": messages, - } - if system_prompt: - create_kwargs["system"] = system_prompt - response = await client.messages.create(**create_kwargs) - # response.content is a list of ContentBlock; for text-only the first - # block is a TextBlock with a .text attribute. - if response.content and hasattr(response.content[0], "text"): - return response.content[0].text - return "" - - async def _do_gemini_native( - self, - user_message: str, - history: "list[tuple[str, str]] | None" = None, - system_prompt: Optional[str] = None, - ) -> str: - """Native Google Gemini ``generateContent`` inference. - - Uses the official ``google-genai`` Python SDK for correct vision - content blocks, tool/function calling, system instructions, and - thinking config. These all get stripped or mis-translated through - the OpenAI-compat ``/v1beta/openai`` shim. - - Phase 2b: single-turn text. - Phase 2c: multi-turn history via Gemini's ``contents=[{role,parts}]`` - shape (note: role is ``"user"`` / ``"model"``, NOT ``"assistant"``). - Phase 2d-i: system_prompt passed via native - ``config.system_instruction`` — Gemini's top-level system field. - """ - try: - from google import genai # type: ignore[import-not-found] - from google.genai import types as genai_types # type: ignore[import-not-found] - except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk - raise RuntimeError( - "Hermes gemini native path requires the `google-genai` package. " - "Install in the workspace image with `pip install google-genai>=1.0.0` " - "or set HERMES provider=openrouter to route Gemini models through " - "OpenRouter's OpenAI-compat shim instead." - ) from exc - - client = genai.Client(api_key=self.api_key) - contents = self._history_to_gemini_contents(user_message, history or []) - generate_kwargs: dict = { - "model": self.model, - "contents": contents, - } - if system_prompt: - generate_kwargs["config"] = genai_types.GenerateContentConfig( - system_instruction=system_prompt, - ) - response = await client.aio.models.generate_content(**generate_kwargs) - # response.text is the flattened text across all parts of the first - # candidate. For text-only that's the whole reply. - return response.text or "" - - async def _do_inference( - self, - user_message: str, - history: "list[tuple[str, str]] | None" = None, - system_prompt: Optional[str] = None, - ) -> str: - """Dispatch to the right inference path based on provider auth_scheme. - - Phase 2c: multi-turn history. - Phase 2d-i: optional system_prompt is passed through to the native - system field of whichever path wins dispatch. - Phase 3: when an escalation ladder is configured, transient failures - (rate limit, 5xx, overload, context-length) promote to the next rung - before raising. No ladder = single-shot, original behaviour. - """ - # Fast path: no ladder configured — single call on the pinned model. - if not self._ladder: - return await self._dispatch( - self.provider_cfg, self.model, user_message, history, system_prompt, - ) - - # Slow path: walk the ladder. Start with the pinned (provider, model) - # so the first attempt matches non-ladder behaviour exactly — the - # ladder only kicks in when the first attempt fails escalatably. - attempts: list[tuple[ProviderConfig, str]] = [(self.provider_cfg, self.model)] - for rung in self._ladder: - rung_cfg = PROVIDERS.get(rung.provider) - if rung_cfg is None: - logger.warning( - "Hermes ladder: provider %r not in registry, skipping rung", - rung.provider, - ) - continue - attempts.append((rung_cfg, rung.model)) - - last_exc: Optional[BaseException] = None - for i, (cfg, model) in enumerate(attempts): - try: - reply = await self._dispatch( - cfg, model, user_message, history, system_prompt, - ) - if i > 0: - logger.info( - "Hermes ladder: succeeded on rung %d (%s:%s) after %d failed attempt(s)", - i, cfg.name, model, i, - ) - return reply - except Exception as exc: - last_exc = exc - if i == len(attempts) - 1: - logger.error( - "Hermes ladder: exhausted all %d rungs — raising. Last error on %s:%s: %s", - len(attempts), cfg.name, model, exc, - ) - raise - if not should_escalate(exc): - logger.info( - "Hermes ladder: non-escalatable error on %s:%s — raising without advancing: %s", - cfg.name, model, exc, - ) - raise - logger.warning( - "Hermes ladder: escalatable failure on rung %d (%s:%s), advancing. Error: %s", - i, cfg.name, model, exc, - ) - - # Unreachable — the last iteration either returns or raises, but - # satisfying the type checker without a blank return. - if last_exc is not None: - raise last_exc - return "" # pragma: no cover - - async def _dispatch( - self, - cfg: ProviderConfig, - model: str, - user_message: str, - history: "list[tuple[str, str]] | None", - system_prompt: Optional[str], - ) -> str: - """Single-attempt dispatch on (cfg, model). - - Temporarily rebinds ``self.provider_cfg`` + ``self.base_url`` + ``self.model`` - so the existing per-provider paths pick up the rung's config. Restores - the original values in a finally block so a raised error leaves the - executor pinned to its constructor-given state (next call on the same - executor instance starts fresh at the top of the ladder). - - For the ladder's non-first rungs, ``self.api_key`` must be the rung's - provider key — we resolve it here via ``resolve_provider`` so the - first-rung API key (for the pinned provider) isn't mis-used against a - different provider's base URL. That lookup can raise ``ValueError`` - when the rung's env var isn't set; ``should_escalate(ValueError)`` - returns False so the ladder correctly STOPS rather than escalating - further into nothing. - """ - # Fast path: rung matches the executor's pinned config — reuse the - # existing api_key, skip the provider re-resolve. - if cfg is self.provider_cfg and model == self.model: - scheme = cfg.auth_scheme - if scheme == "anthropic": - return await self._do_anthropic_native(user_message, history, system_prompt) - if scheme == "gemini": - return await self._do_gemini_native(user_message, history, system_prompt) - if scheme == "openai": - return await self._do_openai_compat(user_message, history, system_prompt) - logger.warning( - "Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat", - scheme, cfg.name, - ) - return await self._do_openai_compat(user_message, history, system_prompt) - - # Different rung — temporarily rebind provider_cfg + model + api_key. - # resolve_provider reads the rung's env vars fresh. - _, rung_key = resolve_provider(cfg.name) - orig_cfg, orig_model, orig_key, orig_base = ( - self.provider_cfg, self.model, self.api_key, self.base_url, - ) - try: - self.provider_cfg = cfg - self.model = model - self.api_key = rung_key - self.base_url = cfg.base_url - scheme = cfg.auth_scheme - if scheme == "anthropic": - return await self._do_anthropic_native(user_message, history, system_prompt) - if scheme == "gemini": - return await self._do_gemini_native(user_message, history, system_prompt) - if scheme == "openai": - return await self._do_openai_compat(user_message, history, system_prompt) - logger.warning( - "Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat", - scheme, cfg.name, - ) - return await self._do_openai_compat(user_message, history, system_prompt) - finally: - self.provider_cfg = orig_cfg - self.model = orig_model - self.api_key = orig_key - self.base_url = orig_base - - # ------------------------------------------------------------------ - # AgentExecutor interface - # ------------------------------------------------------------------ - - async def execute(self, context, event_queue): # pragma: no cover - """Execute a Hermes inference request and push the reply to event_queue. - - Phase 2c: multi-turn history. - Phase 2d-i: reads ``/configs/system-prompt.md`` via - ``executor_helpers.get_system_prompt`` each turn (supports hot-reload) - and passes the text to the dispatch layer. Each provider path uses - its native system field — Anthropic's top-level ``system=``, Gemini's - ``system_instruction=`` via ``GenerateContentConfig``, or OpenAI's - ``{"role":"system"}`` message at the head of the messages list. - """ - from a2a.utils import new_agent_text_message - from adapters.shared_runtime import ( - brief_task, - extract_history, - extract_message_text, - set_current_task, - ) - from executor_helpers import get_system_prompt - - user_message = extract_message_text(context) - if not user_message: - await event_queue.enqueue_event(new_agent_text_message("No message provided")) - return - - await set_current_task(self._heartbeat, brief_task(user_message)) - - try: - history = extract_history(context) - system_prompt = ( - get_system_prompt(self._config_path) if self._config_path else None - ) - reply = await self._do_inference(user_message, history, system_prompt) - except Exception as exc: - logger.exception("Hermes executor error: %s", exc) - reply = f"Hermes error: {exc}" - finally: - await set_current_task(self._heartbeat, "") - - await event_queue.enqueue_event(new_agent_text_message(reply)) - - async def cancel(self, context, event_queue): # pragma: no cover - """No-op cancel — Hermes requests are not cancellable mid-flight.""" - pass diff --git a/workspace-template/adapters/hermes/providers.py b/workspace-template/adapters/hermes/providers.py deleted file mode 100644 index 38dadfe8..00000000 --- a/workspace-template/adapters/hermes/providers.py +++ /dev/null @@ -1,298 +0,0 @@ -"""Hermes adapter provider registry — Phase 1 of the multi-provider expansion. - -Extends the original PR-2 Hermes executor (Nous Portal + OpenRouter only) to a -registry of 12 providers. Every provider in this registry is reached via its -OpenAI-compat endpoint, which means the existing ``openai.AsyncOpenAI`` client -and request shape in ``executor.py`` Just Works without any new dependencies. - -Native SDK paths (Anthropic Messages API, Gemini generateContent API) are -Phase 2 — they give better tool-calling + vision fidelity but are not -required to unblock the basic "CEO wants Hermes on Qwen / GLM / xAI / -Gemini" asks that triggered this work. - -## Design -- ``ProviderConfig`` captures everything needed to point the OpenAI client at - a provider: env var(s), base URL, default model, auth scheme. -- ``PROVIDERS`` is a dict keyed by canonical short name (``"openai"``, - ``"anthropic"``, ``"qwen"``, etc.). -- ``RESOLUTION_ORDER`` is the auto-detect sequence used when the caller - doesn't specify a provider — it tries each provider's env vars in turn and - picks the first one that's set. -- ``resolve_provider(explicit)`` returns ``(ProviderConfig, api_key)`` or - raises ``ValueError`` with a helpful message listing every env var it - checked. - -## Back-compat -The original ``HERMES_API_KEY`` and ``OPENROUTER_API_KEY`` env vars still work -and still route to Nous Portal / OpenRouter respectively — they're just now -registered as two entries in ``PROVIDERS`` rather than hardcoded in -``create_executor``. - -## Adding a new provider -1. Append a new ``ProviderConfig`` entry under ``PROVIDERS`` -2. Add its short name to ``RESOLUTION_ORDER`` in the desired priority slot -3. Document the env var in the workspace ``.env.example`` (if present) -That's it. Nothing else needs to change — the executor reads the registry. -""" - -from __future__ import annotations - -import os -from dataclasses import dataclass -from typing import Optional - - -@dataclass(frozen=True) -class ProviderConfig: - """Everything the Hermes executor needs to talk to a single LLM provider. - - Every provider in Phase 1 is reachable via an OpenAI-compatible - ``/v1/chat/completions`` endpoint, so ``auth_scheme`` is always - ``"openai"`` (Bearer token, OpenAI-style messages payload). Phase 2 - will add ``"anthropic"`` (native Messages API) and ``"gemini"`` (native - generateContent API) for roles that need better tool-call fidelity. - """ - - name: str - """Canonical short name — the key used in ``PROVIDERS`` and the ``provider`` kwarg.""" - - env_vars: tuple[str, ...] - """API key env vars, checked in order. First non-empty value wins. - Supporting multiple env vars lets us accept common aliases - (e.g. ``QWEN_API_KEY`` AND ``DASHSCOPE_API_KEY`` both work for Alibaba Qwen).""" - - base_url: str - """OpenAI-compat base URL. Must include the ``/v1`` suffix where applicable.""" - - default_model: str - """Default model name to pass to ``chat.completions.create``. - Per-call overrides are possible via the executor constructor.""" - - auth_scheme: str = "openai" - """``openai`` (Bearer token + OpenAI-style payload) for every Phase 1 provider. - Phase 2 reserves ``anthropic`` and ``gemini`` for native-SDK paths.""" - - docs: str = "" - """Short note — which docs URL the config was derived from, or which quirks - to know about. Not used programmatically; exists to make future audits of - this file cheaper than re-Googling every entry.""" - - -# --- Provider registry ------------------------------------------------------ -# -# Ordering within this dict is not semantically meaningful — use -# ``RESOLUTION_ORDER`` below to control auto-detect priority. This dict is -# grouped by "who owns the provider" just for human readability. - -PROVIDERS: dict[str, ProviderConfig] = { - # --- Existing (PR 2 baseline) --------------------------------------- - "nous_portal": ProviderConfig( - name="nous_portal", - env_vars=("HERMES_API_KEY", "NOUS_API_KEY"), - base_url="https://inference-prod.nousresearch.com/v1", - default_model="nousresearch/hermes-3-llama-3.1-405b", - docs="Nous Research Portal — original Hermes adapter target from PR 2.", - ), - "openrouter": ProviderConfig( - name="openrouter", - env_vars=("OPENROUTER_API_KEY",), - base_url="https://openrouter.ai/api/v1", - default_model="anthropic/claude-sonnet-4.5", - docs="OpenRouter — unified OpenAI-compat gateway to hundreds of models. " - "Useful for A/B testing and as a fallback when a direct provider is down.", - ), - - # --- Frontier commercial (US) --------------------------------------- - "openai": ProviderConfig( - name="openai", - env_vars=("OPENAI_API_KEY",), - base_url="https://api.openai.com/v1", - default_model="gpt-4o", - docs="OpenAI — canonical OpenAI-compat endpoint. Works out of the box.", - ), - "anthropic": ProviderConfig( - name="anthropic", - env_vars=("ANTHROPIC_API_KEY",), - base_url="https://api.anthropic.com", - default_model="claude-sonnet-4-5", - auth_scheme="anthropic", - docs="Anthropic — Phase 2 uses the native Messages API via the official " - "`anthropic` Python SDK for correct tool calling, vision, and " - "extended thinking semantics. If the SDK isn't installed in the " - "workspace image, the executor raises a clear error pointing at " - "`pip install anthropic>=0.39.0`.", - ), - "xai": ProviderConfig( - name="xai", - env_vars=("XAI_API_KEY", "GROK_API_KEY"), - base_url="https://api.x.ai/v1", - default_model="grok-4", - docs="xAI — Grok family. OpenAI-compat via api.x.ai/v1.", - ), - "gemini": ProviderConfig( - name="gemini", - env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"), - base_url="https://generativelanguage.googleapis.com", - default_model="gemini-2.5-flash", - auth_scheme="gemini", - docs="Google Gemini — Phase 2b uses the native generateContent API via " - "the official `google-genai` Python SDK for correct vision content " - "blocks, tool/function calling, and system instructions. Phase 1 " - "used the /v1beta/openai compat shim. If the google-genai package " - "isn't installed in the workspace image, the executor raises a " - "clear error pointing at `pip install google-genai>=1.0.0`.", - ), - - # --- Chinese providers ---------------------------------------------- - "qwen": ProviderConfig( - name="qwen", - env_vars=("QWEN_API_KEY", "DASHSCOPE_API_KEY"), - base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", - default_model="qwen3-235b-a22b", - docs="Alibaba Qwen via DashScope international endpoint. OpenAI-compat mode. " - "For domestic China use dashscope.aliyuncs.com (no -intl).", - ), - "glm": ProviderConfig( - name="glm", - env_vars=("GLM_API_KEY", "ZHIPU_API_KEY"), - base_url="https://open.bigmodel.cn/api/paas/v4", - default_model="glm-4-plus", - docs="Zhipu AI GLM — open.bigmodel.cn, OpenAI-compat via /api/paas/v4.", - ), - "kimi": ProviderConfig( - name="kimi", - env_vars=("KIMI_API_KEY", "MOONSHOT_API_KEY"), - base_url="https://api.moonshot.ai/v1", - default_model="kimi-k2", - docs="Moonshot AI Kimi K2 — OpenAI-compat at api.moonshot.ai/v1.", - ), - "minimax": ProviderConfig( - name="minimax", - env_vars=("MINIMAX_API_KEY",), - base_url="https://api.minimax.io/v1", - default_model="MiniMax-M2", - docs="MiniMax — OpenAI-compat at api.minimax.io/v1. " - "Note: older base URL api.minimaxi.chat is deprecated.", - ), - "deepseek": ProviderConfig( - name="deepseek", - env_vars=("DEEPSEEK_API_KEY",), - base_url="https://api.deepseek.com/v1", - default_model="deepseek-chat", - docs="DeepSeek — very cheap, OpenAI-compat at api.deepseek.com/v1.", - ), - - # --- OSS / alt providers -------------------------------------------- - "groq": ProviderConfig( - name="groq", - env_vars=("GROQ_API_KEY",), - base_url="https://api.groq.com/openai/v1", - default_model="llama-3.3-70b-versatile", - docs="Groq LPU inference — very fast, OpenAI-compat at api.groq.com/openai/v1.", - ), - "together": ProviderConfig( - name="together", - env_vars=("TOGETHER_API_KEY",), - base_url="https://api.together.xyz/v1", - default_model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", - docs="Together AI — OSS model hosting, OpenAI-compat at api.together.xyz/v1.", - ), - "fireworks": ProviderConfig( - name="fireworks", - env_vars=("FIREWORKS_API_KEY",), - base_url="https://api.fireworks.ai/inference/v1", - default_model="accounts/fireworks/models/llama-v3p3-70b-instruct", - docs="Fireworks AI — OSS model hosting, OpenAI-compat at api.fireworks.ai/inference/v1.", - ), - "mistral": ProviderConfig( - name="mistral", - env_vars=("MISTRAL_API_KEY",), - base_url="https://api.mistral.ai/v1", - default_model="mistral-large-latest", - docs="Mistral AI — OpenAI-compat at api.mistral.ai/v1.", - ), -} - - -# --- Auto-detect resolution order ------------------------------------------- -# -# When the caller doesn't specify a provider, resolve_provider() walks this -# list in order and picks the first provider whose env var is set. Order is -# chosen to preserve back-compat (the two original PR-2 providers come first) -# followed by the most likely-to-be-configured commercial APIs. - -RESOLUTION_ORDER: tuple[str, ...] = ( - # Back-compat: PR 2 baseline - "nous_portal", - "openrouter", - # Frontier commercial - "anthropic", - "openai", - "gemini", - "xai", - # Chinese providers - "qwen", - "glm", - "kimi", - "minimax", - "deepseek", - # OSS / alt - "groq", - "mistral", - "together", - "fireworks", -) - - -def resolve_provider(explicit: Optional[str] = None) -> tuple[ProviderConfig, str]: - """Resolve a provider name to a ``(ProviderConfig, api_key)`` pair. - - Resolution order: - - 1. If ``explicit`` is given, look it up in ``PROVIDERS`` and try every - env var on that provider's config. Raise with a clear message if the - name is unknown or if all env vars are empty. - - 2. Otherwise auto-detect: walk ``RESOLUTION_ORDER`` and return the first - provider whose env var is set. - - Raises - ------ - ValueError - If ``explicit`` is an unknown provider name, if ``explicit`` is a - known provider but its env vars are all empty, or if no env var is - set for any provider in auto-detect mode. - """ - if explicit: - if explicit not in PROVIDERS: - raise ValueError( - f"Unknown Hermes provider: {explicit!r}. " - f"Available: {sorted(PROVIDERS)}" - ) - cfg = PROVIDERS[explicit] - for env in cfg.env_vars: - val = os.environ.get(env, "").strip() - if val: - return cfg, val - raise ValueError( - f"Hermes provider {explicit!r} specified but no env var set. " - f"Tried: {cfg.env_vars}" - ) - - # Auto-detect — first provider with a non-empty env var wins. - for name in RESOLUTION_ORDER: - cfg = PROVIDERS[name] - for env in cfg.env_vars: - val = os.environ.get(env, "").strip() - if val: - return cfg, val - - # Nothing set — raise with the full list so the operator knows every - # option they have without having to read the source. - tried = [] - for name in RESOLUTION_ORDER: - for env in PROVIDERS[name].env_vars: - tried.append(env) - raise ValueError( - "No Hermes provider API key found. Set any one of: " + ", ".join(tried) - ) diff --git a/workspace-template/adapters/langgraph/__init__.py b/workspace-template/adapters/langgraph/__init__.py deleted file mode 100644 index 9a8354fe..00000000 --- a/workspace-template/adapters/langgraph/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import LangGraphAdapter - -Adapter = LangGraphAdapter diff --git a/workspace-template/adapters/langgraph/adapter.py b/workspace-template/adapters/langgraph/adapter.py deleted file mode 100644 index 53af7641..00000000 --- a/workspace-template/adapters/langgraph/adapter.py +++ /dev/null @@ -1,50 +0,0 @@ -"""LangGraph adapter — Python-based ReAct agent with skills, tools, and plugins.""" - -import os -import logging - -from adapters.base import BaseAdapter, AdapterConfig -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - - -class LangGraphAdapter(BaseAdapter): - - @staticmethod - def name() -> str: - return "langgraph" - - @staticmethod - def display_name() -> str: - return "LangGraph" - - @staticmethod - def description() -> str: - return "LangGraph ReAct agent — Python-based with skills, tools, plugins, and peer coordination" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": {"type": "string", "description": "LangChain model string (e.g. openrouter:google/gemini-2.5-flash)"}, - "skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"}, - "tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools (web_search, filesystem, etc.)"}, - } - - def __init__(self): - self.loaded_skills = [] - self.all_tools = [] - self.system_prompt = None - - async def setup(self, config: AdapterConfig) -> None: - result = await self._common_setup(config) - self.loaded_skills = result.loaded_skills - self.all_tools = result.langchain_tools - self.system_prompt = result.system_prompt - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - from agent import create_agent - from a2a_executor import LangGraphA2AExecutor - - agent = create_agent(config.model, self.all_tools, self.system_prompt) - return LangGraphA2AExecutor(agent, heartbeat=config.heartbeat, model=config.model) diff --git a/workspace-template/adapters/openclaw/__init__.py b/workspace-template/adapters/openclaw/__init__.py deleted file mode 100644 index c5189d4e..00000000 --- a/workspace-template/adapters/openclaw/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .adapter import OpenClawAdapter - -Adapter = OpenClawAdapter diff --git a/workspace-template/adapters/openclaw/adapter.py b/workspace-template/adapters/openclaw/adapter.py deleted file mode 100644 index 672c1cde..00000000 --- a/workspace-template/adapters/openclaw/adapter.py +++ /dev/null @@ -1,243 +0,0 @@ -"""OpenClaw adapter — bridges OpenClaw's Node.js gateway with our A2A protocol. - -OpenClaw is a Node.js agent runtime with its own gateway (port 18789). -This adapter: -1. Installs OpenClaw CLI (npm) and missing deps in the container -2. Runs non-interactive onboard with the configured model provider -3. Copies workspace files (SOUL.md, BOOTSTRAP.md, etc.) to OpenClaw's workspace dir -4. Starts the OpenClaw gateway as a background process -5. Proxies A2A messages via `openclaw agent --json` CLI subprocess -""" - -import asyncio -import json -import logging -import os -import shutil -import subprocess - -from adapters.base import BaseAdapter, AdapterConfig -from adapters.shared_runtime import brief_task, extract_message_text, set_current_task -from a2a.server.agent_execution import AgentExecutor - -logger = logging.getLogger(__name__) - -OPENCLAW_WORKSPACE = os.path.expanduser("~/.openclaw/workspace-dev/main") -OPENCLAW_PORT = 18789 - -# Known missing optional deps in OpenClaw's npm package -OPENCLAW_MISSING_DEPS = ["@buape/carbon", "@larksuiteoapi/node-sdk", "@slack/web-api", "grammy"] - - -class OpenClawAdapter(BaseAdapter): - - def __init__(self): - self._gateway_process = None - - @staticmethod - def name() -> str: - return "openclaw" - - @staticmethod - def display_name() -> str: - return "OpenClaw" - - @staticmethod - def description() -> str: - return "OpenClaw agent runtime — Node.js gateway with SOUL/BOOTSTRAP/AGENTS workspace convention" - - @staticmethod - def get_config_schema() -> dict: - return { - "model": {"type": "string", "description": "Model ID (e.g. google/gemini-2.5-flash)"}, - "provider_url": {"type": "string", "description": "LLM provider base URL", "default": "https://openrouter.ai/api/v1"}, - "gateway_port": {"type": "integer", "description": "OpenClaw gateway port", "default": 18789}, - } - - async def setup(self, config: AdapterConfig) -> None: # pragma: no cover - """Install OpenClaw, run onboard, copy workspace files, start gateway.""" - npm_prefix = os.path.expanduser("~/.local") - os.environ["PATH"] = f"{npm_prefix}/bin:{os.environ.get('PATH', '')}" - - # 1. Install OpenClaw CLI if not present - if not shutil.which("openclaw"): - logger.info("Installing OpenClaw CLI...") - result = subprocess.run( - ["npm", "install", "--prefix", npm_prefix, "-g", "openclaw"], - capture_output=True, text=True, timeout=300, - env={**os.environ, "npm_config_prefix": npm_prefix} - ) - if result.returncode != 0: - raise RuntimeError(f"Failed to install OpenClaw: {result.stderr[:500]}") - - # Install known missing optional deps - oc_dir = os.path.join(npm_prefix, "lib/node_modules/openclaw") - if os.path.exists(oc_dir): - logger.info("Installing OpenClaw optional deps...") - subprocess.run( - ["npm", "install"] + OPENCLAW_MISSING_DEPS, - capture_output=True, text=True, timeout=120, cwd=oc_dir - ) - logger.info("OpenClaw CLI installed") - - # 2. Resolve API key and model - prefix = config.model.split(":")[0] if ":" in config.model else "openai" - if prefix == "qianfan": - api_key = os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", "")) - else: - api_key = os.environ.get("OPENAI_API_KEY", os.environ.get("GROQ_API_KEY", os.environ.get("OPENROUTER_API_KEY", ""))) - # Determine provider URL from model prefix - provider_urls = { - "openai": "https://api.openai.com/v1", - "groq": "https://api.groq.com/openai/v1", - "openrouter": "https://openrouter.ai/api/v1", - "qianfan": "https://qianfan.baidubce.com/v2", - } - provider_url = config.runtime_config.get("provider_url", provider_urls.get(prefix, "https://api.openai.com/v1")) - model = config.model - if ":" in model: - _, model = model.split(":", 1) - - # 3. Run non-interactive onboard - if not os.path.exists(os.path.expanduser("~/.openclaw/openclaw.json")): - logger.info(f"Running OpenClaw onboard (model: {model})...") - subprocess.run( - ["openclaw", "onboard", "--non-interactive", - "--auth-choice", "custom-api-key", - "--custom-base-url", provider_url, - "--custom-model-id", model, - "--custom-api-key", api_key, - "--custom-compatibility", "openai", - "--secret-input-mode", "plaintext", - "--accept-risk", "--skip-health"], - capture_output=True, text=True, timeout=60, - env={**os.environ, "NODE_NO_WARNINGS": "1"} - ) - logger.info("OpenClaw onboard complete") - - # 3b. Fix context window (OpenClaw defaults to 16K, but modern models have much more) - oc_config_path = os.path.expanduser("~/.openclaw/openclaw.json") - if os.path.exists(oc_config_path): - try: - import json as json_mod - oc_cfg = json_mod.load(open(oc_config_path)) - provider_name = "custom-" + provider_url.split("//")[1].split("/")[0].replace(".", "-") - providers = oc_cfg.get("models", {}).get("providers", {}) - if provider_name in providers: - for m in providers[provider_name].get("models", []): - m["contextWindow"] = 1000000 # 1M tokens for modern models - m["maxTokens"] = 16384 - json_mod.dump(oc_cfg, open(oc_config_path, "w"), indent=2) - logger.info(f"Fixed context window for {provider_name}") - except Exception as e: - logger.warning(f"Failed to fix context window: {e}") - - # 3c. Always write auth-profiles.json - # (key may have been set via secrets API after first boot) - if api_key: - auth_dir = os.path.expanduser("~/.openclaw/agents/main/agent") - os.makedirs(auth_dir, exist_ok=True) - auth_file = os.path.join(auth_dir, "auth-profiles.json") - import json as json_mod - provider_name = "custom-" + provider_url.split("//")[1].split("/")[0].replace(".", "-") - auth_data = {provider_name: {"type": "api-key", "key": api_key}} - with open(auth_file, "w") as f: - json_mod.dump(auth_data, f, indent=2) - logger.info(f"Wrote auth-profiles.json for {provider_name}") - - # 4. Copy workspace files from /configs to OpenClaw's workspace dir - os.makedirs(OPENCLAW_WORKSPACE, exist_ok=True) - for fname in os.listdir(config.config_path): - src = os.path.join(config.config_path, fname) - if os.path.isfile(src) and fname.endswith(".md"): - shutil.copy2(src, os.path.join(OPENCLAW_WORKSPACE, fname)) - logger.debug(f"Copied {fname} to OpenClaw workspace") - - # 5. Start the gateway as a background process - gateway_port = config.runtime_config.get("gateway_port", OPENCLAW_PORT) - logger.info(f"Starting OpenClaw gateway on port {gateway_port}...") - env = os.environ.copy() - env["NODE_NO_WARNINGS"] = "1" - self._gateway_process = subprocess.Popen( - ["openclaw", "gateway", "--dev", "--port", str(gateway_port), "--bind", "loopback"], - stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, - env=env, - ) - # Wait for gateway to become healthy (max 30s) - for attempt in range(15): - await asyncio.sleep(2) - if self._gateway_process.poll() is not None: - raise RuntimeError("OpenClaw gateway process exited") - try: - health = subprocess.run( - ["openclaw", "gateway", "health"], - capture_output=True, text=True, timeout=10, - env=os.environ.copy() - ) - if health.returncode == 0: - logger.info(f"OpenClaw gateway healthy (PID: {self._gateway_process.pid})") - break - except subprocess.TimeoutExpired: - logger.debug(f"Gateway health check timeout (attempt {attempt+1}/15)") - else: - raise RuntimeError("OpenClaw gateway did not become healthy within 30s") - - async def create_executor(self, config: AdapterConfig) -> AgentExecutor: - return OpenClawA2AExecutor(heartbeat=config.heartbeat) - - -class OpenClawA2AExecutor(AgentExecutor): - """Proxies A2A messages to OpenClaw via `openclaw agent` CLI subprocess.""" - - def __init__(self, heartbeat=None): - self._heartbeat = heartbeat - - async def execute(self, context, event_queue): - from a2a.utils import new_agent_text_message - - user_message = extract_message_text(context) - - if not user_message: - await event_queue.enqueue_event(new_agent_text_message("No message provided")) - return - - await set_current_task(self._heartbeat, brief_task(user_message)) - - # Call OpenClaw agent via CLI - try: - proc = await asyncio.create_subprocess_exec( - "openclaw", "agent", - "--session-id", context.task_id or "default", - "--message", user_message, - "--json", "--timeout", "120", - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - env={**os.environ, "PATH": f"{os.path.expanduser('~/.local/bin')}:{os.environ.get('PATH', '')}"} - ) - stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=130) - output = stdout.decode().strip() - - if proc.returncode == 0 and output: - try: - data = json.loads(output) - payloads = data.get("result", {}).get("payloads", []) - if payloads: - reply = payloads[0].get("text", "") - else: - reply = str(data) - except json.JSONDecodeError: - reply = output - else: - reply = f"OpenClaw error: {stderr.decode()[:300]}" if stderr else f"OpenClaw returned code {proc.returncode}" - - except asyncio.TimeoutError: - reply = "OpenClaw timed out after 120s" - except Exception as e: - reply = f"OpenClaw error: {e}" - finally: - await set_current_task(self._heartbeat, "") - - await event_queue.enqueue_event(new_agent_text_message(reply)) - - async def cancel(self, context, event_queue): # pragma: no cover - pass diff --git a/workspace-template/adapters/shared_runtime.py b/workspace-template/adapters/shared_runtime.py index a3838664..78d3591e 100644 --- a/workspace-template/adapters/shared_runtime.py +++ b/workspace-template/adapters/shared_runtime.py @@ -1,190 +1,2 @@ -"""Shared runtime helpers for A2A-backed workspace executors.""" - -from __future__ import annotations - -from typing import Any - -from a2a.server.agent_execution import RequestContext - - -def _extract_part_text(part) -> str: - """Extract text from a message part, handling dicts and A2A objects.""" - if isinstance(part, dict): - text = part.get("text", "") - if text: - return text - root = part.get("root") - if isinstance(root, dict): - return root.get("text", "") - return "" - if hasattr(part, "text") and part.text: - return part.text - if hasattr(part, "root") and hasattr(part.root, "text") and part.root.text: - return part.root.text - return "" - - -def extract_message_text(context_or_parts) -> str: - """Extract concatenated plain text from A2A message parts.""" - parts = getattr(getattr(context_or_parts, "message", None), "parts", None) - if parts is None: - parts = context_or_parts - return " ".join( - text for part in (parts or []) if (text := _extract_part_text(part)) - ).strip() - - -def extract_history(context: RequestContext) -> list[tuple[str, str]]: - """Extract conversation history from A2A request metadata.""" - messages: list[tuple[str, str]] = [] - request = getattr(context, "request", None) - metadata = getattr(request, "metadata", None) if request else None - if not isinstance(metadata, dict): - metadata = getattr(context, "metadata", None) or {} - history = metadata.get("history", []) if isinstance(metadata, dict) else [] - if not isinstance(history, list): - return messages - - for entry in history: - if not isinstance(entry, dict): - continue - role = entry.get("role", "user") - parts = entry.get("parts", []) - text = " ".join( - text for part in (parts or []) if (text := _extract_part_text(part)) - ).strip() - if text: - mapped_role = "human" if role == "user" else "ai" - messages.append((mapped_role, text)) - return messages - - -def format_conversation_history(history: list[tuple[str, str]]) -> str: - """Render `(role, text)` history into a stable human-readable transcript.""" - return "\n".join( - f"{'User' if role == 'human' else 'Agent'}: {text}" for role, text in history - ) - - -def build_task_text(user_message: str, history: list[tuple[str, str]]) -> str: - """Build a single task/request string with optional prepended conversation history.""" - if not history: - return user_message - transcript = format_conversation_history(history) - return f"Conversation so far:\n{transcript}\n\nCurrent request: {user_message}" - - -def append_peer_guidance( - base_text: str | None, - peers_info: str, - *, - default_text: str, - tool_name: str, -) -> str: - """Append peer guidance text when peers are available.""" - text = (base_text or default_text).strip() - if peers_info: - text += f"\n\n## Peers\n{peers_info}\nUse {tool_name} to communicate with them." - return text - - -def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Return compact peer metadata for prompt rendering.""" - summaries: list[dict[str, Any]] = [] - for peer in peers: - agent_card = peer.get("agent_card") - if not agent_card: - continue - if isinstance(agent_card, str): - try: - import json - - agent_card = json.loads(agent_card) - except Exception: - continue - if not isinstance(agent_card, dict): - continue - - skills = agent_card.get("skills", []) - summaries.append( - { - "id": peer.get("id", "unknown"), - "name": agent_card.get("name", peer.get("name", "Unknown")), - "status": peer.get("status", "unknown"), - "skills": [ - s.get("name", s.get("id", "")) - for s in skills - if isinstance(s, dict) - ], - } - ) - return summaries - - -def build_peer_section( - peers: list[dict[str, Any]], - *, - heading: str = "## Your Peers (workspaces you can delegate to)", - instruction: str = ( - "Use the `delegate_to_workspace` tool to send tasks to peers. " - "Only delegate to peers listed above." - ), -) -> str: - """Render a stable peer section for system prompts.""" - summaries = summarize_peer_cards(peers) - if not summaries: - return "" - - parts = [heading, ""] - for peer in summaries: - parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})") - if peer["skills"]: - parts.append(f" Skills: {', '.join(peer['skills'])}") - parts.append("") - parts.append(instruction) - return "\n".join(parts) - - -def brief_task(text: str, limit: int = 60) -> str: - """Create a short human-readable task label for the heartbeat banner.""" - return text[:limit] + ("..." if len(text) > limit else "") - - -async def set_current_task(heartbeat: Any, task: str) -> None: - """Update current task on heartbeat and push immediately to platform. - - The heartbeat loop only fires every 30s, so quick tasks would finish - before the canvas ever sees them. Setting a task pushes immediately. - Clearing a task only updates the heartbeat object — the next heartbeat - cycle will broadcast the clear, keeping the task visible longer. - """ - if heartbeat: - heartbeat.current_task = task - heartbeat.active_tasks = 1 if task else 0 - - # Only push immediately when SETTING a task (not clearing) - # Clearing is handled by the next heartbeat cycle, which keeps - # the task visible on the canvas for quick A2A responses - if not task: - return - - import os - workspace_id = os.environ.get("WORKSPACE_ID", "") - platform_url = os.environ.get("PLATFORM_URL", "") - if workspace_id and platform_url: - try: - import httpx - async with httpx.AsyncClient(timeout=3.0) as client: - await client.post( - f"{platform_url}/registry/heartbeat", - json={ - "workspace_id": workspace_id, - "current_task": task, - "active_tasks": 1, - "error_rate": 0, - "sample_error": "", - "uptime_seconds": 0, - }, - ) - except Exception: - pass # Best-effort +"""Re-export from shared_runtime for backward compat.""" +from shared_runtime import * # noqa: F401,F403 diff --git a/workspace-template/coordinator.py b/workspace-template/coordinator.py index 99e9adb8..556fdaae 100644 --- a/workspace-template/coordinator.py +++ b/workspace-template/coordinator.py @@ -17,7 +17,7 @@ import os import httpx from langchain_core.tools import tool -from adapters.shared_runtime import build_peer_section +from shared_runtime import build_peer_section from policies.routing import build_team_routing_payload logger = logging.getLogger(__name__) diff --git a/workspace-template/prompt.py b/workspace-template/prompt.py index a9876d49..33de1265 100644 --- a/workspace-template/prompt.py +++ b/workspace-template/prompt.py @@ -3,7 +3,7 @@ from pathlib import Path from skill_loader.loader import LoadedSkill -from adapters.shared_runtime import build_peer_section +from shared_runtime import build_peer_section DEFAULT_MEMORY_SNAPSHOT_FILES = ("MEMORY.md", "USER.md") diff --git a/workspace-template/shared_runtime.py b/workspace-template/shared_runtime.py new file mode 100644 index 00000000..a3838664 --- /dev/null +++ b/workspace-template/shared_runtime.py @@ -0,0 +1,190 @@ +"""Shared runtime helpers for A2A-backed workspace executors.""" + +from __future__ import annotations + +from typing import Any + +from a2a.server.agent_execution import RequestContext + + +def _extract_part_text(part) -> str: + """Extract text from a message part, handling dicts and A2A objects.""" + if isinstance(part, dict): + text = part.get("text", "") + if text: + return text + root = part.get("root") + if isinstance(root, dict): + return root.get("text", "") + return "" + if hasattr(part, "text") and part.text: + return part.text + if hasattr(part, "root") and hasattr(part.root, "text") and part.root.text: + return part.root.text + return "" + + +def extract_message_text(context_or_parts) -> str: + """Extract concatenated plain text from A2A message parts.""" + parts = getattr(getattr(context_or_parts, "message", None), "parts", None) + if parts is None: + parts = context_or_parts + return " ".join( + text for part in (parts or []) if (text := _extract_part_text(part)) + ).strip() + + +def extract_history(context: RequestContext) -> list[tuple[str, str]]: + """Extract conversation history from A2A request metadata.""" + messages: list[tuple[str, str]] = [] + request = getattr(context, "request", None) + metadata = getattr(request, "metadata", None) if request else None + if not isinstance(metadata, dict): + metadata = getattr(context, "metadata", None) or {} + history = metadata.get("history", []) if isinstance(metadata, dict) else [] + if not isinstance(history, list): + return messages + + for entry in history: + if not isinstance(entry, dict): + continue + role = entry.get("role", "user") + parts = entry.get("parts", []) + text = " ".join( + text for part in (parts or []) if (text := _extract_part_text(part)) + ).strip() + if text: + mapped_role = "human" if role == "user" else "ai" + messages.append((mapped_role, text)) + return messages + + +def format_conversation_history(history: list[tuple[str, str]]) -> str: + """Render `(role, text)` history into a stable human-readable transcript.""" + return "\n".join( + f"{'User' if role == 'human' else 'Agent'}: {text}" for role, text in history + ) + + +def build_task_text(user_message: str, history: list[tuple[str, str]]) -> str: + """Build a single task/request string with optional prepended conversation history.""" + if not history: + return user_message + transcript = format_conversation_history(history) + return f"Conversation so far:\n{transcript}\n\nCurrent request: {user_message}" + + +def append_peer_guidance( + base_text: str | None, + peers_info: str, + *, + default_text: str, + tool_name: str, +) -> str: + """Append peer guidance text when peers are available.""" + text = (base_text or default_text).strip() + if peers_info: + text += f"\n\n## Peers\n{peers_info}\nUse {tool_name} to communicate with them." + return text + + +def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Return compact peer metadata for prompt rendering.""" + summaries: list[dict[str, Any]] = [] + for peer in peers: + agent_card = peer.get("agent_card") + if not agent_card: + continue + if isinstance(agent_card, str): + try: + import json + + agent_card = json.loads(agent_card) + except Exception: + continue + if not isinstance(agent_card, dict): + continue + + skills = agent_card.get("skills", []) + summaries.append( + { + "id": peer.get("id", "unknown"), + "name": agent_card.get("name", peer.get("name", "Unknown")), + "status": peer.get("status", "unknown"), + "skills": [ + s.get("name", s.get("id", "")) + for s in skills + if isinstance(s, dict) + ], + } + ) + return summaries + + +def build_peer_section( + peers: list[dict[str, Any]], + *, + heading: str = "## Your Peers (workspaces you can delegate to)", + instruction: str = ( + "Use the `delegate_to_workspace` tool to send tasks to peers. " + "Only delegate to peers listed above." + ), +) -> str: + """Render a stable peer section for system prompts.""" + summaries = summarize_peer_cards(peers) + if not summaries: + return "" + + parts = [heading, ""] + for peer in summaries: + parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})") + if peer["skills"]: + parts.append(f" Skills: {', '.join(peer['skills'])}") + parts.append("") + parts.append(instruction) + return "\n".join(parts) + + +def brief_task(text: str, limit: int = 60) -> str: + """Create a short human-readable task label for the heartbeat banner.""" + return text[:limit] + ("..." if len(text) > limit else "") + + +async def set_current_task(heartbeat: Any, task: str) -> None: + """Update current task on heartbeat and push immediately to platform. + + The heartbeat loop only fires every 30s, so quick tasks would finish + before the canvas ever sees them. Setting a task pushes immediately. + Clearing a task only updates the heartbeat object — the next heartbeat + cycle will broadcast the clear, keeping the task visible longer. + """ + if heartbeat: + heartbeat.current_task = task + heartbeat.active_tasks = 1 if task else 0 + + # Only push immediately when SETTING a task (not clearing) + # Clearing is handled by the next heartbeat cycle, which keeps + # the task visible on the canvas for quick A2A responses + if not task: + return + + import os + workspace_id = os.environ.get("WORKSPACE_ID", "") + platform_url = os.environ.get("PLATFORM_URL", "") + if workspace_id and platform_url: + try: + import httpx + async with httpx.AsyncClient(timeout=3.0) as client: + await client.post( + f"{platform_url}/registry/heartbeat", + json={ + "workspace_id": workspace_id, + "current_task": task, + "active_tasks": 1, + "error_rate": 0, + "sample_error": "", + "uptime_seconds": 0, + }, + ) + except Exception: + pass # Best-effort diff --git a/workspace-template/tests/test_adapters.py b/workspace-template/tests/test_adapters.py deleted file mode 100644 index 1c1f9114..00000000 --- a/workspace-template/tests/test_adapters.py +++ /dev/null @@ -1,1808 +0,0 @@ -"""Smoke tests for all 6 agent runtime adapters. - -Verifies that each adapter: - 1. Exposes the correct static identity methods - 2. Exports a valid config schema - 3. Has setup() and create_executor() coroutines - 4. setup() raises RuntimeError when its framework dep is missing - 5. create_executor() returns an AgentExecutor-compatible object - -Heavy framework deps (crewai, autogen-agentchat, etc.) are mocked so these -tests run without installing the full dependency tree. -""" - -import asyncio -import sys -from types import ModuleType -from unittest.mock import AsyncMock, MagicMock - -import pytest - - -# --------------------------------------------------------------------------- -# Helper: build a minimal AdapterConfig -# --------------------------------------------------------------------------- - -def _make_config(**overrides): - from adapters.base import AdapterConfig - defaults = dict( - model="openai:gpt-4o-mini", - system_prompt="You are a test agent.", - tools=[], - runtime_config={}, - config_path="/tmp/test-configs", - workspace_id="ws-test", - prompt_files=[], - a2a_port=8000, - heartbeat=None, - ) - defaults.update(overrides) - return AdapterConfig(**defaults) - - -# --------------------------------------------------------------------------- -# Helper: patch _common_setup on a BaseAdapter subclass to avoid full stack -# --------------------------------------------------------------------------- - -def _stub_common_setup(adapter_instance, monkeypatch): - """Replace _common_setup with a no-op returning a minimal SetupResult.""" - from adapters.base import SetupResult - result = SetupResult( - system_prompt="stub prompt", - loaded_skills=[], - langchain_tools=[], - is_coordinator=False, - children=[], - ) - monkeypatch.setattr(type(adapter_instance), "_common_setup", AsyncMock(return_value=result)) - - -# ============================================================================ -# 1. LangGraph Adapter -# ============================================================================ - -class TestLangGraphAdapter: - - def test_static_identity(self): - from adapters.langgraph.adapter import LangGraphAdapter - assert LangGraphAdapter.name() == "langgraph" - assert LangGraphAdapter.display_name() == "LangGraph" - assert isinstance(LangGraphAdapter.description(), str) - assert len(LangGraphAdapter.description()) > 0 - - def test_config_schema(self): - from adapters.langgraph.adapter import LangGraphAdapter - schema = LangGraphAdapter.get_config_schema() - assert isinstance(schema, dict) - assert "model" in schema - - def test_has_setup_and_create_executor(self): - from adapters.langgraph.adapter import LangGraphAdapter - import inspect - adapter = LangGraphAdapter() - assert inspect.iscoroutinefunction(adapter.setup) - assert inspect.iscoroutinefunction(adapter.create_executor) - - @pytest.mark.asyncio - async def test_setup_stores_tools_and_prompt(self, monkeypatch): - from adapters.langgraph.adapter import LangGraphAdapter - adapter = LangGraphAdapter() - _stub_common_setup(adapter, monkeypatch) - - await adapter.setup(_make_config()) - - assert adapter.system_prompt == "stub prompt" - assert adapter.all_tools == [] - assert adapter.loaded_skills == [] - - @pytest.mark.asyncio - async def test_create_executor_returns_executor(self, monkeypatch): - from adapters.langgraph.adapter import LangGraphAdapter - - # Mock create_agent and LangGraphA2AExecutor - fake_agent = MagicMock() - fake_executor = MagicMock() - monkeypatch.setitem(sys.modules, "agent", MagicMock(create_agent=MagicMock(return_value=fake_agent))) - monkeypatch.setitem(sys.modules, "a2a_executor", MagicMock(LangGraphA2AExecutor=MagicMock(return_value=fake_executor))) - - adapter = LangGraphAdapter() - adapter.all_tools = [] - adapter.system_prompt = "test" - adapter.loaded_skills = [] - - result = await adapter.create_executor(_make_config()) - - assert result is fake_executor - - -# ============================================================================ -# 2. CrewAI Adapter -# ============================================================================ - -class TestCrewAIAdapter: - - def test_static_identity(self): - from adapters.crewai.adapter import CrewAIAdapter - assert CrewAIAdapter.name() == "crewai" - assert CrewAIAdapter.display_name() == "CrewAI" - assert isinstance(CrewAIAdapter.description(), str) - - def test_config_schema(self): - from adapters.crewai.adapter import CrewAIAdapter - schema = CrewAIAdapter.get_config_schema() - assert isinstance(schema, dict) - assert "model" in schema - - def test_has_setup_and_create_executor(self): - from adapters.crewai.adapter import CrewAIAdapter - import inspect - adapter = CrewAIAdapter() - assert inspect.iscoroutinefunction(adapter.setup) - assert inspect.iscoroutinefunction(adapter.create_executor) - - @pytest.mark.asyncio - async def test_setup_raises_when_crewai_missing(self, monkeypatch): - from adapters.crewai.adapter import CrewAIAdapter - adapter = CrewAIAdapter() - # Hide crewai from imports - monkeypatch.setitem(sys.modules, "crewai", None) - - with pytest.raises(RuntimeError, match="crewai not installed"): - await adapter.setup(_make_config()) - - @pytest.mark.asyncio - async def test_setup_succeeds_with_crewai_present(self, monkeypatch): - from adapters.crewai.adapter import CrewAIAdapter - adapter = CrewAIAdapter() - _stub_common_setup(adapter, monkeypatch) - - fake_crewai = ModuleType("crewai") - fake_crewai.__version__ = "0.99.0" - monkeypatch.setitem(sys.modules, "crewai", fake_crewai) - - await adapter.setup(_make_config()) - assert adapter.system_prompt == "stub prompt" - - @pytest.mark.asyncio - async def test_create_executor_returns_crewai_executor(self, monkeypatch): - from adapters.crewai.adapter import CrewAIAdapter, CrewAIA2AExecutor - adapter = CrewAIAdapter() - adapter.system_prompt = "backstory" - adapter.crewai_tools = [] - - result = await adapter.create_executor(_make_config()) - - assert isinstance(result, CrewAIA2AExecutor) - assert result.model == "openai:gpt-4o-mini" - assert result.system_prompt == "backstory" - - @pytest.mark.asyncio - async def test_crewai_executor_handles_empty_message(self, monkeypatch): - from adapters.crewai.adapter import CrewAIA2AExecutor - import adapters.shared_runtime as shared_rt - - executor = CrewAIA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="test", - crewai_tools=[], - ) - - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - - context = MagicMock() - # Patch on the shared_runtime module (where it's imported from inside execute()) - monkeypatch.setattr(shared_rt, "extract_message_text", lambda ctx: "") - monkeypatch.setattr(shared_rt, "set_current_task", AsyncMock()) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - assert events[0] == "No message provided" - - -# ============================================================================ -# 3. Claude Code Adapter -# ============================================================================ - -class TestClaudeCodeAdapter: - - def test_static_identity(self): - from adapters.claude_code.adapter import ClaudeCodeAdapter - assert ClaudeCodeAdapter.name() == "claude-code" - assert ClaudeCodeAdapter.display_name() == "Claude Code" - assert isinstance(ClaudeCodeAdapter.description(), str) - - def test_config_schema(self): - from adapters.claude_code.adapter import ClaudeCodeAdapter - schema = ClaudeCodeAdapter.get_config_schema() - assert isinstance(schema, dict) - assert "model" in schema - assert "timeout" in schema - - def test_has_setup_and_create_executor(self): - from adapters.claude_code.adapter import ClaudeCodeAdapter - import inspect - adapter = ClaudeCodeAdapter() - assert inspect.iscoroutinefunction(adapter.setup) - assert inspect.iscoroutinefunction(adapter.create_executor) - - @pytest.mark.asyncio - async def test_setup_warns_when_cli_missing(self, monkeypatch, caplog): - """setup() should warn but NOT raise when the CLI is absent.""" - from adapters.claude_code.adapter import ClaudeCodeAdapter - import shutil - monkeypatch.setattr(shutil, "which", lambda cmd: None) - - adapter = ClaudeCodeAdapter() - config = _make_config(runtime_config={"command": "claude"}) - # Should not raise - await adapter.setup(config) - - @pytest.mark.asyncio - async def test_create_executor_returns_sdk_executor(self, monkeypatch): - from adapters.claude_code.adapter import ClaudeCodeAdapter - - fake_executor = MagicMock() - fake_sdk_module = MagicMock() - fake_sdk_module.ClaudeSDKExecutor = MagicMock(return_value=fake_executor) - monkeypatch.setitem(sys.modules, "claude_sdk_executor", fake_sdk_module) - - adapter = ClaudeCodeAdapter() - result = await adapter.create_executor( - _make_config(runtime_config={"model": "opus"}) - ) - - assert result is fake_executor - # Verify model was forwarded from runtime_config - kwargs = fake_sdk_module.ClaudeSDKExecutor.call_args.kwargs - assert kwargs["model"] == "opus" - - -# ============================================================================ -# 4. AutoGen Adapter -# ============================================================================ - -class TestAutoGenAdapter: - - def test_static_identity(self): - from adapters.autogen.adapter import AutoGenAdapter - assert AutoGenAdapter.name() == "autogen" - assert AutoGenAdapter.display_name() == "AutoGen" - assert isinstance(AutoGenAdapter.description(), str) - - def test_config_schema(self): - from adapters.autogen.adapter import AutoGenAdapter - schema = AutoGenAdapter.get_config_schema() - assert isinstance(schema, dict) - assert "model" in schema - - def test_has_setup_and_create_executor(self): - from adapters.autogen.adapter import AutoGenAdapter - import inspect - adapter = AutoGenAdapter() - assert inspect.iscoroutinefunction(adapter.setup) - assert inspect.iscoroutinefunction(adapter.create_executor) - - @pytest.mark.asyncio - async def test_setup_raises_when_autogen_missing(self, monkeypatch): - from adapters.autogen.adapter import AutoGenAdapter - adapter = AutoGenAdapter() - monkeypatch.setitem(sys.modules, "autogen_agentchat", None) - monkeypatch.setitem(sys.modules, "autogen_agentchat.agents", None) - - with pytest.raises((RuntimeError, ImportError)): - await adapter.setup(_make_config()) - - @pytest.mark.asyncio - async def test_setup_succeeds_with_autogen_present(self, monkeypatch): - from adapters.autogen.adapter import AutoGenAdapter - adapter = AutoGenAdapter() - _stub_common_setup(adapter, monkeypatch) - - fake_autogen_mod = ModuleType("autogen_agentchat") - fake_agents_mod = ModuleType("autogen_agentchat.agents") - fake_agents_mod.AssistantAgent = MagicMock() - fake_autogen_mod.agents = fake_agents_mod - monkeypatch.setitem(sys.modules, "autogen_agentchat", fake_autogen_mod) - monkeypatch.setitem(sys.modules, "autogen_agentchat.agents", fake_agents_mod) - - await adapter.setup(_make_config()) - assert adapter.system_prompt == "stub prompt" - - @pytest.mark.asyncio - async def test_create_executor_returns_autogen_executor(self, monkeypatch): - from adapters.autogen.adapter import AutoGenAdapter, AutoGenA2AExecutor - adapter = AutoGenAdapter() - adapter.system_prompt = "autogen system" - adapter.autogen_tools = [] - - result = await adapter.create_executor(_make_config()) - - assert isinstance(result, AutoGenA2AExecutor) - assert result.system_prompt == "autogen system" - - -# ============================================================================ -# 5. DeepAgents Adapter -# ============================================================================ - -class TestDeepAgentsAdapter: - - def test_static_identity(self): - from adapters.deepagents.adapter import DeepAgentsAdapter - assert DeepAgentsAdapter.name() == "deepagents" - assert DeepAgentsAdapter.display_name() == "DeepAgents" - assert isinstance(DeepAgentsAdapter.description(), str) - - def test_config_schema(self): - from adapters.deepagents.adapter import DeepAgentsAdapter - schema = DeepAgentsAdapter.get_config_schema() - assert isinstance(schema, dict) - assert "model" in schema - - def test_has_setup_and_create_executor(self): - from adapters.deepagents.adapter import DeepAgentsAdapter - import inspect - adapter = DeepAgentsAdapter() - assert inspect.iscoroutinefunction(adapter.setup) - assert inspect.iscoroutinefunction(adapter.create_executor) - - @pytest.mark.asyncio - async def test_setup_raises_when_deepagents_missing(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - monkeypatch.setitem(sys.modules, "deepagents", None) - - with pytest.raises((RuntimeError, ImportError)): - await adapter.setup(_make_config()) - - @pytest.mark.asyncio - async def test_setup_succeeds_with_deepagents_present(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - _stub_common_setup(adapter, monkeypatch) - - fake_agent = MagicMock() - - # Mock deepagents package with all imports used by setup() - fake_deepagents = ModuleType("deepagents") - fake_deepagents.create_deep_agent = MagicMock(return_value=fake_agent) - fake_deepagents.FilesystemPermission = MagicMock() - monkeypatch.setitem(sys.modules, "deepagents", fake_deepagents) - - fake_backends = ModuleType("deepagents.backends") - fake_backends.FilesystemBackend = MagicMock() - monkeypatch.setitem(sys.modules, "deepagents.backends", fake_backends) - - fake_checkpoint = ModuleType("langgraph.checkpoint.memory") - fake_checkpoint.MemorySaver = MagicMock() - monkeypatch.setitem(sys.modules, "langgraph.checkpoint.memory", fake_checkpoint) - monkeypatch.setitem(sys.modules, "langgraph.checkpoint", ModuleType("langgraph.checkpoint")) - monkeypatch.setitem(sys.modules, "langgraph", ModuleType("langgraph")) - - fake_cache_mod = ModuleType("langchain_core.caches") - fake_cache_mod.InMemoryCache = MagicMock() - monkeypatch.setitem(sys.modules, "langchain_core.caches", fake_cache_mod) - monkeypatch.setitem(sys.modules, "langchain_core", ModuleType("langchain_core")) - - # Mock the LLM creation - monkeypatch.setattr(adapter, "_create_llm", lambda model: MagicMock()) - - await adapter.setup(_make_config()) - assert adapter.agent is fake_agent - # virtual_mode must be False so read_file/ls/write_file/edit_file - # hit the real bind-mounted /workspace instead of an in-memory - # snapshot that silently drifts from what `bash` sees. - fs_call = fake_backends.FilesystemBackend.call_args - assert fs_call is not None, "FilesystemBackend was never constructed" - assert fs_call.kwargs.get("virtual_mode") is False, ( - "FilesystemBackend must be built with virtual_mode=False — " - "virtual_mode=True caused agents to report real files as missing " - "and silently dropped writes across restarts. See commit bc563d1." - ) - - @pytest.mark.asyncio - async def test_create_executor_returns_langgraph_executor(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - fake_executor = MagicMock() - fake_a2a_executor_mod = MagicMock() - fake_a2a_executor_mod.LangGraphA2AExecutor = MagicMock(return_value=fake_executor) - monkeypatch.setitem(sys.modules, "a2a_executor", fake_a2a_executor_mod) - - adapter = DeepAgentsAdapter() - adapter.agent = MagicMock() - - result = await adapter.create_executor(_make_config()) - assert result is fake_executor - - def test_create_llm_openai(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - fake_openai_mod = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai_mod.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai_mod) - - result = adapter._create_llm("openai:gpt-4o") - assert result is fake_llm - - def test_create_llm_anthropic(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - fake_anthropic_mod = ModuleType("langchain_anthropic") - fake_llm = MagicMock() - fake_anthropic_mod.ChatAnthropic = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_anthropic", fake_anthropic_mod) - - result = adapter._create_llm("anthropic:claude-sonnet-4-6") - assert result is fake_llm - - def test_create_llm_cerebras(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - fake_openai_mod = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai_mod.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai_mod) - monkeypatch.setenv("CEREBRAS_API_KEY", "test-key") - - result = adapter._create_llm("cerebras:llama3.1-8b") - assert result is fake_llm - fake_openai_mod.ChatOpenAI.assert_called_once_with( - model="llama3.1-8b", - openai_api_key="test-key", - openai_api_base="https://api.cerebras.ai/v1", - ) - - def test_create_llm_google_genai(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - fake_genai_mod = ModuleType("langchain_google_genai") - fake_llm = MagicMock() - fake_genai_mod.ChatGoogleGenerativeAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_google_genai", fake_genai_mod) - - result = adapter._create_llm("google_genai:gemini-2.5-flash") - assert result is fake_llm - fake_genai_mod.ChatGoogleGenerativeAI.assert_called_once_with(model="gemini-2.5-flash") - - def test_create_llm_ollama(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - fake_ollama_mod = ModuleType("langchain_ollama") - fake_llm = MagicMock() - fake_ollama_mod.ChatOllama = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_ollama", fake_ollama_mod) - - result = adapter._create_llm("ollama:llama3") - assert result is fake_llm - fake_ollama_mod.ChatOllama.assert_called_once_with(model="llama3") - - def test_create_llm_unknown_provider_raises(self): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - with pytest.raises(ValueError, match="Unsupported model provider"): - adapter._create_llm("badprovider:some-model") - - def test_create_llm_default_provider_is_anthropic(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - fake_anthropic_mod = ModuleType("langchain_anthropic") - fake_llm = MagicMock() - fake_anthropic_mod.ChatAnthropic = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_anthropic", fake_anthropic_mod) - - result = adapter._create_llm("claude-sonnet-4-6") - assert result is fake_llm - - @pytest.mark.asyncio - async def test_create_executor_raises_without_setup(self, monkeypatch): - from adapters.deepagents.adapter import DeepAgentsAdapter - fake_a2a_executor_mod = MagicMock() - monkeypatch.setitem(sys.modules, "a2a_executor", fake_a2a_executor_mod) - - adapter = DeepAgentsAdapter() - with pytest.raises(RuntimeError, match="setup\\(\\) must be called"): - await adapter.create_executor(_make_config()) - - -# ============================================================================ -# 6. OpenClaw Adapter -# ============================================================================ - -class TestOpenClawAdapter: - - def test_static_identity(self): - from adapters.openclaw.adapter import OpenClawAdapter - assert OpenClawAdapter.name() == "openclaw" - assert OpenClawAdapter.display_name() == "OpenClaw" - assert isinstance(OpenClawAdapter.description(), str) - - def test_config_schema(self): - from adapters.openclaw.adapter import OpenClawAdapter - schema = OpenClawAdapter.get_config_schema() - assert isinstance(schema, dict) - assert "model" in schema - assert "gateway_port" in schema - - def test_has_setup_and_create_executor(self): - from adapters.openclaw.adapter import OpenClawAdapter - import inspect - adapter = OpenClawAdapter() - assert inspect.iscoroutinefunction(adapter.setup) - assert inspect.iscoroutinefunction(adapter.create_executor) - - @pytest.mark.asyncio - async def test_setup_raises_when_openclaw_cli_install_fails(self, monkeypatch): - """setup() raises RuntimeError if npm install for openclaw CLI fails.""" - import shutil - import subprocess - from adapters.openclaw.adapter import OpenClawAdapter - - monkeypatch.setattr(shutil, "which", lambda cmd: None) # CLI not found - - fake_result = MagicMock() - fake_result.returncode = 1 - fake_result.stderr = "npm ERR! not found" - monkeypatch.setattr(subprocess, "run", MagicMock(return_value=fake_result)) - - adapter = OpenClawAdapter() - with pytest.raises(RuntimeError, match="Failed to install OpenClaw"): - await adapter.setup(_make_config()) - - @pytest.mark.asyncio - async def test_create_executor_returns_openclaw_executor(self, monkeypatch): - from adapters.openclaw.adapter import OpenClawAdapter, OpenClawA2AExecutor - adapter = OpenClawAdapter() - - result = await adapter.create_executor(_make_config()) - - assert isinstance(result, OpenClawA2AExecutor) - - @pytest.mark.asyncio - async def test_openclaw_executor_handles_empty_message(self, monkeypatch): - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - assert events[0] == "No message provided" - - @pytest.mark.asyncio - async def test_openclaw_executor_timeout(self, monkeypatch): - """Executor returns a timeout error message when OpenClaw CLI times out.""" - import asyncio as _asyncio - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - context.task_id = "t-1" - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "hello") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - monkeypatch.setattr("adapters.openclaw.adapter.brief_task", lambda t: t) - - # Make asyncio.create_subprocess_exec raise TimeoutError via wait_for - async def fake_create_subprocess_exec(*args, **kwargs): - proc = MagicMock() - async def communicate(): - raise _asyncio.TimeoutError() - proc.communicate = communicate - return proc - - monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create_subprocess_exec) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - reply = events[0] - assert "timed out" in reply.lower() or "timeout" in reply.lower() or "120s" in reply - - -# ============================================================================ -# Cross-adapter: Adapter registry -# ============================================================================ - -class TestAdapterRegistry: - """Verify the adapter __init__.py discovers all 6 adapters.""" - - @pytest.fixture(autouse=True) - def clear_adapter_cache(self): - """Clear the adapter cache before each registry test.""" - import adapters as _adapters_pkg - _adapters_pkg._ADAPTER_CACHE.clear() - yield - _adapters_pkg._ADAPTER_CACHE.clear() - - def test_all_adapters_registered(self): - from adapters import discover_adapters - adapters = discover_adapters() - names = set(adapters.keys()) - expected = {"langgraph", "crewai", "claude-code", "autogen", "deepagents", "openclaw", "hermes", "gemini-cli"} - assert expected == names, f"Missing: {expected - names}, Extra: {names - expected}" - - def test_no_duplicate_names(self): - from adapters import discover_adapters - names = list(discover_adapters().keys()) - assert len(names) == len(set(names)), "Duplicate adapter names detected" - - def test_all_adapters_have_display_name(self): - from adapters import discover_adapters - for name, cls in discover_adapters().items(): - assert cls.display_name(), f"{name} has empty display_name" - - def test_all_adapters_have_description(self): - from adapters import discover_adapters - for name, cls in discover_adapters().items(): - assert len(cls.description()) > 10, f"{name} description too short" - - def test_discover_adapters_cache_hit(self): - """Second call to discover_adapters() returns the cached dict without re-scanning.""" - from adapters import discover_adapters - first = discover_adapters() - # Call again — should return the exact same object (cache hit) - second = discover_adapters() - assert first is second - - def test_discover_adapters_skips_failing_import(self, monkeypatch, tmp_path): - """discover_adapters() logs debug and continues when an adapter import fails.""" - import importlib - import adapters as _adapters_pkg - - # Make importlib.import_module raise for any "adapters.X" import - original_import = importlib.import_module - - def failing_import(name, *args, **kwargs): - if name.startswith("adapters.") and name != "adapters.base": - raise ImportError(f"Simulated missing dep for {name}") - return original_import(name, *args, **kwargs) - - monkeypatch.setattr(importlib, "import_module", failing_import) - - result = _adapters_pkg.discover_adapters() - # Cache was cleared by the fixture, so we get a fresh (empty) result - assert isinstance(result, dict) - - def test_get_adapter_unknown_runtime_raises_key_error(self): - """get_adapter() raises KeyError for an unrecognised runtime name.""" - from adapters import get_adapter - import pytest - with pytest.raises(KeyError, match="Unknown runtime"): - get_adapter("not-a-real-runtime") - - def test_list_adapters_returns_metadata_dicts(self): - """list_adapters() returns a list with name/display_name/description/config_schema.""" - from adapters import list_adapters - result = list_adapters() - assert isinstance(result, list) - assert len(result) > 0 - for item in result: - assert "name" in item - assert "display_name" in item - assert "description" in item - assert "config_schema" in item - - -# ============================================================================ -# AutoGen execute() — full execution path coverage -# ============================================================================ - -class TestAutoGenExecute: - - @pytest.mark.asyncio - async def test_execute_success_with_str_reply(self, monkeypatch): - """execute() extracts the last str-content message from result.messages.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_autogen = MagicMock() - mock_ext = MagicMock() - monkeypatch.setitem(sys.modules, "autogen_agentchat", mock_autogen) - monkeypatch.setitem(sys.modules, "autogen_agentchat.agents", mock_autogen.agents) - monkeypatch.setitem(sys.modules, "autogen_ext", mock_ext) - monkeypatch.setitem(sys.modules, "autogen_ext.models", mock_ext.models) - monkeypatch.setitem(sys.modules, "autogen_ext.models.openai", mock_ext.models.openai) - - from adapters.autogen.adapter import AutoGenA2AExecutor - - mock_msg = MagicMock() - mock_msg.content = "The answer is 42" - mock_result = MagicMock() - mock_result.messages = [mock_msg] - - mock_agent = AsyncMock() - mock_agent.run = AsyncMock(return_value=mock_result) - mock_autogen.agents.AssistantAgent.return_value = mock_agent - - executor = AutoGenA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="You are helpful", - autogen_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - with patch("adapters.autogen.adapter.extract_message_text", return_value="What is 6*7?"), \ - patch("adapters.autogen.adapter.set_current_task", new_callable=_AsyncMock), \ - patch("adapters.autogen.adapter.extract_history", return_value=[]), \ - patch("adapters.autogen.adapter.build_task_text", return_value="What is 6*7?"): - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_called_once() - call_arg = str(event_queue.enqueue_event.call_args[0][0]) - assert "The answer is 42" in call_arg - - @pytest.mark.asyncio - async def test_execute_fallback_to_str_result_when_no_str_message(self, monkeypatch): - """When no message has str content, reply falls back to str(result).""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_autogen = MagicMock() - mock_ext = MagicMock() - monkeypatch.setitem(sys.modules, "autogen_agentchat", mock_autogen) - monkeypatch.setitem(sys.modules, "autogen_agentchat.agents", mock_autogen.agents) - monkeypatch.setitem(sys.modules, "autogen_ext", mock_ext) - monkeypatch.setitem(sys.modules, "autogen_ext.models", mock_ext.models) - monkeypatch.setitem(sys.modules, "autogen_ext.models.openai", mock_ext.models.openai) - - from adapters.autogen.adapter import AutoGenA2AExecutor - - # Message with non-str content — no valid reply extracted, falls back to str(result) - mock_msg = MagicMock() - mock_msg.content = 12345 # not a str - mock_result = MagicMock() - mock_result.messages = [mock_msg] - mock_result.__str__ = lambda self: "fallback-str-result" - - mock_agent = AsyncMock() - mock_agent.run = AsyncMock(return_value=mock_result) - mock_autogen.agents.AssistantAgent.return_value = mock_agent - - executor = AutoGenA2AExecutor( - model="gpt-4o", # no colon → model_name = model_str - system_prompt=None, - autogen_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - with patch("adapters.autogen.adapter.extract_message_text", return_value="hello"), \ - patch("adapters.autogen.adapter.set_current_task", new_callable=_AsyncMock), \ - patch("adapters.autogen.adapter.extract_history", return_value=[]), \ - patch("adapters.autogen.adapter.build_task_text", return_value="hello"): - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_called_once() - call_arg = str(event_queue.enqueue_event.call_args[0][0]) - assert "fallback-str-result" in call_arg - - @pytest.mark.asyncio - async def test_execute_exception_path(self, monkeypatch): - """When the agent raises, reply is 'AutoGen error: ...'.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_autogen = MagicMock() - mock_ext = MagicMock() - monkeypatch.setitem(sys.modules, "autogen_agentchat", mock_autogen) - monkeypatch.setitem(sys.modules, "autogen_agentchat.agents", mock_autogen.agents) - monkeypatch.setitem(sys.modules, "autogen_ext", mock_ext) - monkeypatch.setitem(sys.modules, "autogen_ext.models", mock_ext.models) - monkeypatch.setitem(sys.modules, "autogen_ext.models.openai", mock_ext.models.openai) - - from adapters.autogen.adapter import AutoGenA2AExecutor - - mock_agent = AsyncMock() - mock_agent.run = AsyncMock(side_effect=RuntimeError("model exploded")) - mock_autogen.agents.AssistantAgent.return_value = mock_agent - - executor = AutoGenA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="test", - autogen_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - with patch("adapters.autogen.adapter.extract_message_text", return_value="hello"), \ - patch("adapters.autogen.adapter.set_current_task", new_callable=_AsyncMock), \ - patch("adapters.autogen.adapter.extract_history", return_value=[]), \ - patch("adapters.autogen.adapter.build_task_text", return_value="hello"): - await executor.execute(context, event_queue) - - call_arg = str(event_queue.enqueue_event.call_args[0][0]) - assert "AutoGen error" in call_arg - assert "model exploded" in call_arg - - @pytest.mark.asyncio - async def test_execute_empty_message_returns_early(self, monkeypatch): - """When extract_message_text returns empty, enqueues 'No message provided'.""" - from unittest.mock import patch - from adapters.autogen.adapter import AutoGenA2AExecutor - - executor = AutoGenA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="test", - autogen_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - with patch("adapters.autogen.adapter.extract_message_text", return_value=""): - await executor.execute(context, event_queue) - - call_arg = str(event_queue.enqueue_event.call_args[0][0]) - assert "No message provided" in call_arg - - @pytest.mark.asyncio - async def test_execute_finally_clears_task(self, monkeypatch): - """set_current_task("") is called in finally block even after exception.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_autogen = MagicMock() - mock_ext = MagicMock() - monkeypatch.setitem(sys.modules, "autogen_agentchat", mock_autogen) - monkeypatch.setitem(sys.modules, "autogen_agentchat.agents", mock_autogen.agents) - monkeypatch.setitem(sys.modules, "autogen_ext", mock_ext) - monkeypatch.setitem(sys.modules, "autogen_ext.models", mock_ext.models) - monkeypatch.setitem(sys.modules, "autogen_ext.models.openai", mock_ext.models.openai) - - from adapters.autogen.adapter import AutoGenA2AExecutor - - mock_agent = AsyncMock() - mock_agent.run = AsyncMock(side_effect=Exception("boom")) - mock_autogen.agents.AssistantAgent.return_value = mock_agent - - executor = AutoGenA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="test", - autogen_tools=[], - heartbeat=MagicMock(), - ) - - context = MagicMock() - event_queue = AsyncMock() - set_task_calls = [] - - async def fake_set_current_task(hb, task): - set_task_calls.append(task) - - with patch("adapters.autogen.adapter.extract_message_text", return_value="hello"), \ - patch("adapters.autogen.adapter.set_current_task", side_effect=fake_set_current_task), \ - patch("adapters.autogen.adapter.extract_history", return_value=[]), \ - patch("adapters.autogen.adapter.build_task_text", return_value="hello"): - await executor.execute(context, event_queue) - - # Last call must clear the task - assert set_task_calls[-1] == "" - - -# ============================================================================ -# CrewAI execute() — full execution path coverage -# ============================================================================ - -class TestCrewAIExecute: - - @pytest.mark.asyncio - async def test_execute_success(self, monkeypatch): - """execute() calls crew.kickoff and enqueues the result string.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_crewai = MagicMock() - monkeypatch.setitem(sys.modules, "crewai", mock_crewai) - - from adapters.crewai.adapter import CrewAIA2AExecutor - - mock_crew_instance = MagicMock() - mock_crew_instance.kickoff.return_value = "Crew result here" - mock_crewai.Crew.return_value = mock_crew_instance - mock_crewai.Agent.return_value = MagicMock() - mock_crewai.Task.return_value = MagicMock() - - executor = CrewAIA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="Be helpful", - crewai_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - async def fake_to_thread(fn, *args, **kwargs): - return fn(*args, **kwargs) - - import adapters.shared_runtime as _srt - with patch("adapters.crewai.adapter.asyncio.to_thread", side_effect=fake_to_thread), \ - patch.object(_srt, "extract_message_text", return_value="Hello crew"), \ - patch.object(_srt, "set_current_task", new_callable=_AsyncMock), \ - patch.object(_srt, "extract_history", return_value=[]), \ - patch.object(_srt, "build_task_text", return_value="Hello crew"): - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_called_once() - call_arg = str(event_queue.enqueue_event.call_args[0][0]) - assert "Crew result here" in call_arg - - @pytest.mark.asyncio - async def test_execute_model_conversion(self, monkeypatch): - """openai: prefix is converted to openai/ for CrewAI.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_crewai = MagicMock() - monkeypatch.setitem(sys.modules, "crewai", mock_crewai) - - from adapters.crewai.adapter import CrewAIA2AExecutor - - captured_model = [] - - def capture_agent(**kwargs): - captured_model.append(kwargs.get("llm")) - return MagicMock() - - mock_crewai.Agent.side_effect = capture_agent - mock_crewai.Task.return_value = MagicMock() - mock_crew_instance = MagicMock() - mock_crew_instance.kickoff.return_value = "ok" - mock_crewai.Crew.return_value = mock_crew_instance - - executor = CrewAIA2AExecutor( - model="openai:gpt-4.1", - system_prompt="test", - crewai_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - async def fake_to_thread(fn, *a, **kw): - return fn(*a, **kw) - - import adapters.shared_runtime as _srt - with patch("adapters.crewai.adapter.asyncio.to_thread", side_effect=fake_to_thread), \ - patch.object(_srt, "extract_message_text", return_value="hello"), \ - patch.object(_srt, "set_current_task", new_callable=_AsyncMock), \ - patch.object(_srt, "extract_history", return_value=[]), \ - patch.object(_srt, "build_task_text", return_value="hello"): - await executor.execute(context, event_queue) - - assert captured_model[0] == "openai/gpt-4.1" - - @pytest.mark.asyncio - async def test_execute_exception_path(self, monkeypatch): - """When crew.kickoff raises, reply is 'CrewAI error: ...'.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_crewai = MagicMock() - monkeypatch.setitem(sys.modules, "crewai", mock_crewai) - - from adapters.crewai.adapter import CrewAIA2AExecutor - - mock_crewai.Agent.return_value = MagicMock() - mock_crewai.Task.return_value = MagicMock() - mock_crew_instance = MagicMock() - mock_crew_instance.kickoff.side_effect = RuntimeError("crew failure") - mock_crewai.Crew.return_value = mock_crew_instance - - executor = CrewAIA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="test", - crewai_tools=[], - heartbeat=None, - ) - - context = MagicMock() - event_queue = AsyncMock() - - async def fake_to_thread(fn, *a, **kw): - return fn(*a, **kw) - - import adapters.shared_runtime as _srt - with patch("adapters.crewai.adapter.asyncio.to_thread", side_effect=fake_to_thread), \ - patch.object(_srt, "extract_message_text", return_value="hello"), \ - patch.object(_srt, "set_current_task", new_callable=_AsyncMock), \ - patch.object(_srt, "extract_history", return_value=[]), \ - patch.object(_srt, "build_task_text", return_value="hello"): - await executor.execute(context, event_queue) - - call_arg = str(event_queue.enqueue_event.call_args[0][0]) - assert "CrewAI error" in call_arg - assert "crew failure" in call_arg - - @pytest.mark.asyncio - async def test_execute_finally_clears_task(self, monkeypatch): - """set_current_task("") is called in the finally block.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - mock_crewai = MagicMock() - monkeypatch.setitem(sys.modules, "crewai", mock_crewai) - - from adapters.crewai.adapter import CrewAIA2AExecutor - - mock_crewai.Agent.return_value = MagicMock() - mock_crewai.Task.return_value = MagicMock() - mock_crew_instance = MagicMock() - mock_crew_instance.kickoff.side_effect = Exception("boom") - mock_crewai.Crew.return_value = mock_crew_instance - - executor = CrewAIA2AExecutor( - model="openai:gpt-4o-mini", - system_prompt="test", - crewai_tools=[], - heartbeat=MagicMock(), - ) - - context = MagicMock() - event_queue = AsyncMock() - set_task_calls = [] - - async def fake_set_current_task(hb, task): - set_task_calls.append(task) - - async def fake_to_thread(fn, *a, **kw): - return fn(*a, **kw) - - import adapters.shared_runtime as _srt - with patch("adapters.crewai.adapter.asyncio.to_thread", side_effect=fake_to_thread), \ - patch.object(_srt, "extract_message_text", return_value="hi"), \ - patch.object(_srt, "set_current_task", side_effect=fake_set_current_task), \ - patch.object(_srt, "extract_history", return_value=[]), \ - patch.object(_srt, "build_task_text", return_value="hi"): - await executor.execute(context, event_queue) - - assert set_task_calls[-1] == "" - - -# ============================================================================ -# DeepAgents _create_llm() — uncovered provider branches -# ============================================================================ - -class TestDeepAgentsCreateLlmBranches: - - def test_create_llm_no_colon_defaults_to_anthropic(self, monkeypatch): - """Model string without ':' defaults to anthropic provider.""" - from types import ModuleType - fake_anthropic = ModuleType("langchain_anthropic") - fake_llm = MagicMock() - fake_anthropic.ChatAnthropic = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_anthropic", fake_anthropic) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - result = adapter._create_llm("claude-sonnet-4-6") - - fake_anthropic.ChatAnthropic.assert_called_once_with(model="claude-sonnet-4-6") - assert result is fake_llm - - def test_create_llm_openai_with_base_url(self, monkeypatch): - """When OPENAI_BASE_URL is set, openai_api_base is passed to ChatOpenAI.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1") - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("openai:my-model") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert call_kwargs.get("openai_api_base") == "http://localhost:11434/v1" - assert call_kwargs["model"] == "my-model" - - def test_create_llm_openrouter(self, monkeypatch): - """openrouter provider uses ChatOpenAI with openrouter base URL.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test") - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - result = adapter._create_llm("openrouter:google/gemini-2.5-flash") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert "openrouter.ai" in call_kwargs.get("openai_api_base", "") - assert call_kwargs["model"] == "google/gemini-2.5-flash" - assert result is fake_llm - - def test_create_llm_groq(self, monkeypatch): - """groq provider uses ChatOpenAI with groq base URL.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.setenv("GROQ_API_KEY", "gsk_test") - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - result = adapter._create_llm("groq:llama3-8b-8192") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert "groq.com" in call_kwargs.get("openai_api_base", "") - assert call_kwargs["model"] == "llama3-8b-8192" - assert result is fake_llm - - def test_create_llm_anthropic_with_base_url(self, monkeypatch): - """When ANTHROPIC_BASE_URL is set, anthropic_api_url is passed to ChatAnthropic.""" - from types import ModuleType - fake_anthropic = ModuleType("langchain_anthropic") - fake_llm = MagicMock() - fake_anthropic.ChatAnthropic = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_anthropic", fake_anthropic) - monkeypatch.setenv("ANTHROPIC_BASE_URL", "http://proxy:8080") - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - result = adapter._create_llm("anthropic:claude-sonnet-4-6") - - call_kwargs = fake_anthropic.ChatAnthropic.call_args[1] - assert call_kwargs.get("anthropic_api_url") == "http://proxy:8080" - assert result is fake_llm - - def test_create_llm_unknown_provider_raises(self): - """Unknown provider raises ValueError instead of silently falling back.""" - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - - with pytest.raises(ValueError, match="Unsupported model provider"): - adapter._create_llm("someunknown:my-model") - - def test_create_llm_multiple_colons_preserves_model(self, monkeypatch): - """Model like 'google_genai:models/gemini-2.5-flash' splits on first colon only.""" - from types import ModuleType - fake_genai = ModuleType("langchain_google_genai") - fake_llm = MagicMock() - fake_genai.ChatGoogleGenerativeAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_google_genai", fake_genai) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - result = adapter._create_llm("google_genai:models/gemini-2.5-flash:latest") - - fake_genai.ChatGoogleGenerativeAI.assert_called_once_with(model="models/gemini-2.5-flash:latest") - assert result is fake_llm - - def test_create_llm_openrouter_fallback_to_openai_key(self, monkeypatch): - """When OPENROUTER_API_KEY is unset, falls back to OPENAI_API_KEY.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) - monkeypatch.setenv("OPENAI_API_KEY", "sk-fallback-key") - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("openrouter:meta-llama/llama-3-8b") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert call_kwargs["openai_api_key"] == "sk-fallback-key" - - def test_create_llm_openrouter_both_keys_unset(self, monkeypatch): - """When both OPENROUTER_API_KEY and OPENAI_API_KEY are unset, empty string is used.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) - monkeypatch.delenv("OPENAI_API_KEY", raising=False) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("openrouter:meta-llama/llama-3-8b") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert call_kwargs["openai_api_key"] == "" - - def test_create_llm_openai_without_base_url(self, monkeypatch): - """When OPENAI_BASE_URL is not set, openai_api_base should NOT be passed.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.delenv("OPENAI_BASE_URL", raising=False) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("openai:gpt-4o") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert "openai_api_base" not in call_kwargs - - def test_create_llm_anthropic_without_base_url(self, monkeypatch): - """When ANTHROPIC_BASE_URL is not set, anthropic_api_url should NOT be passed.""" - from types import ModuleType - fake_anthropic = ModuleType("langchain_anthropic") - fake_llm = MagicMock() - fake_anthropic.ChatAnthropic = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_anthropic", fake_anthropic) - monkeypatch.delenv("ANTHROPIC_BASE_URL", raising=False) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("anthropic:claude-sonnet-4-6") - - call_kwargs = fake_anthropic.ChatAnthropic.call_args[1] - assert "anthropic_api_url" not in call_kwargs - - def test_create_llm_groq_empty_api_key(self, monkeypatch): - """When GROQ_API_KEY is not set, empty string is passed.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.delenv("GROQ_API_KEY", raising=False) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("groq:llama3-8b") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert call_kwargs["openai_api_key"] == "" - - def test_create_llm_cerebras_empty_api_key(self, monkeypatch): - """When CEREBRAS_API_KEY is not set, empty string is passed.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.delenv("CEREBRAS_API_KEY", raising=False) - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("cerebras:llama3.1-8b") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert call_kwargs["openai_api_key"] == "" - - def test_create_llm_openrouter_max_tokens(self, monkeypatch): - """OpenRouter reads MAX_TOKENS env var.""" - from types import ModuleType - fake_openai = ModuleType("langchain_openai") - fake_llm = MagicMock() - fake_openai.ChatOpenAI = MagicMock(return_value=fake_llm) - monkeypatch.setitem(sys.modules, "langchain_openai", fake_openai) - monkeypatch.setenv("OPENROUTER_API_KEY", "test") - monkeypatch.setenv("MAX_TOKENS", "4096") - - from adapters.deepagents.adapter import DeepAgentsAdapter - adapter = DeepAgentsAdapter() - adapter._create_llm("openrouter:meta-llama/llama-3-8b") - - call_kwargs = fake_openai.ChatOpenAI.call_args[1] - assert call_kwargs["max_tokens"] == 4096 - - -# ============================================================================ -# ClaudeCode create_executor() — system-prompt.md file reading -# ============================================================================ - -class TestClaudeCodeSystemPromptFile: - - @pytest.mark.asyncio - async def test_create_executor_reads_prompt_file_when_system_prompt_none(self, tmp_path, monkeypatch): - """When system_prompt is None and system-prompt.md exists, it is read.""" - from unittest.mock import patch - - prompt_file = tmp_path / "system-prompt.md" - prompt_file.write_text("Be helpful and concise.") - - from adapters.claude_code.adapter import ClaudeCodeAdapter - - class FakeCLIAgentExecutor: - def __init__(self, **kwargs): - self.system_prompt = kwargs.get("system_prompt") - - fake_cli_mod = MagicMock() - fake_cli_mod.CLIAgentExecutor = FakeCLIAgentExecutor - monkeypatch.setitem(sys.modules, "cli_executor", fake_cli_mod) - - from config import RuntimeConfig - monkeypatch.setitem(sys.modules, "config", MagicMock(RuntimeConfig=RuntimeConfig)) - - adapter = ClaudeCodeAdapter() - result = await adapter.create_executor( - _make_config(config_path=str(tmp_path), system_prompt=None) - ) - - assert result.system_prompt == "Be helpful and concise." - - @pytest.mark.asyncio - async def test_create_executor_skips_prompt_file_when_system_prompt_set(self, tmp_path, monkeypatch): - """When system_prompt is already set, the prompt file is not used.""" - prompt_file = tmp_path / "system-prompt.md" - prompt_file.write_text("Should not be used.") - - from adapters.claude_code.adapter import ClaudeCodeAdapter - - class FakeCLIAgentExecutor: - def __init__(self, **kwargs): - self.system_prompt = kwargs.get("system_prompt") - - fake_cli_mod = MagicMock() - fake_cli_mod.CLIAgentExecutor = FakeCLIAgentExecutor - monkeypatch.setitem(sys.modules, "cli_executor", fake_cli_mod) - - from config import RuntimeConfig - monkeypatch.setitem(sys.modules, "config", MagicMock(RuntimeConfig=RuntimeConfig)) - - adapter = ClaudeCodeAdapter() - result = await adapter.create_executor( - _make_config(config_path=str(tmp_path), system_prompt="Use the provided prompt.") - ) - - assert result.system_prompt == "Use the provided prompt." - - @pytest.mark.asyncio - async def test_create_executor_no_prompt_file_no_system_prompt(self, tmp_path, monkeypatch): - """When system_prompt is None and no file exists, system_prompt stays None.""" - from adapters.claude_code.adapter import ClaudeCodeAdapter - - class FakeCLIAgentExecutor: - def __init__(self, **kwargs): - self.system_prompt = kwargs.get("system_prompt") - - fake_cli_mod = MagicMock() - fake_cli_mod.CLIAgentExecutor = FakeCLIAgentExecutor - monkeypatch.setitem(sys.modules, "cli_executor", fake_cli_mod) - - from config import RuntimeConfig - monkeypatch.setitem(sys.modules, "config", MagicMock(RuntimeConfig=RuntimeConfig)) - - adapter = ClaudeCodeAdapter() - # tmp_path has no system-prompt.md - result = await adapter.create_executor( - _make_config(config_path=str(tmp_path), system_prompt=None) - ) - - assert result.system_prompt is None - - -# ============================================================================ -# BaseAdapter _common_setup() — plugin names, plugin skills, coordinator prompt -# ============================================================================ - -class TestCommonSetupMissingPaths: - - def _make_test_adapter_and_config(self, tmp_path): - from adapters.base import BaseAdapter, AdapterConfig - - class TestAdapter(BaseAdapter): - @staticmethod - def name(): return "test" - @staticmethod - def display_name(): return "Test" - @staticmethod - def description(): return "Test adapter" - async def setup(self, config): pass - async def create_executor(self, config): pass - - adapter = TestAdapter() - config = AdapterConfig( - model="openai:test", - config_path=str(tmp_path), - workspace_id="ws-test", - ) - return adapter, config - - @pytest.mark.asyncio - async def test_common_setup_logs_plugin_names(self, tmp_path): - """When plugins.plugin_names is non-empty, the logger.info path is reached.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - adapter, config = self._make_test_adapter_and_config(tmp_path) - - mock_plugins = MagicMock() - mock_plugins.plugin_names = ["plugin-alpha"] - mock_plugins.skill_dirs = [] - mock_plugins.prompt_fragments = [] - mock_plugins.rules = [] - - with patch("plugins.load_plugins", return_value=mock_plugins), \ - patch("skill_loader.loader.load_skills", return_value=[]), \ - patch("coordinator.get_children", return_value=[]), \ - patch("coordinator.get_parent_context", return_value=[]), \ - patch("coordinator.build_children_description", return_value=""), \ - patch("prompt.get_peer_capabilities", return_value=[]), \ - patch("prompt.build_system_prompt", return_value="system prompt with plugin"): - result = await adapter._common_setup(config) - - assert result.system_prompt == "system prompt with plugin" - assert result.is_coordinator is False - - @pytest.mark.asyncio - async def test_common_setup_loads_plugin_skill_dirs(self, tmp_path): - """Plugin skill_dirs are iterated and new (non-duplicate) skills are appended.""" - import os - from unittest.mock import patch, AsyncMock as _AsyncMock - - plugin_skills_root = tmp_path / "plugin_skills" - plugin_skills_root.mkdir() - (plugin_skills_root / "my_plugin_skill").mkdir() - - adapter, config = self._make_test_adapter_and_config(tmp_path) - - mock_plugins = MagicMock() - mock_plugins.plugin_names = [] - mock_plugins.skill_dirs = [str(plugin_skills_root)] - mock_plugins.prompt_fragments = [] - mock_plugins.rules = [] - - fake_plugin_skill = MagicMock() - fake_plugin_skill.metadata.id = "my_plugin_skill" - fake_plugin_skill.tools = [] - - def fake_load_skills(path, names): - if str(path) == str(plugin_skills_root): - return [fake_plugin_skill] - return [] - - with patch("plugins.load_plugins", return_value=mock_plugins), \ - patch("skill_loader.loader.load_skills", side_effect=fake_load_skills), \ - patch("coordinator.get_children", return_value=[]), \ - patch("coordinator.get_parent_context", return_value=[]), \ - patch("coordinator.build_children_description", return_value=""), \ - patch("prompt.get_peer_capabilities", return_value=[]), \ - patch("prompt.build_system_prompt", return_value="system"): - result = await adapter._common_setup(config) - - skill_ids = [s.metadata.id for s in result.loaded_skills] - assert "my_plugin_skill" in skill_ids - - @pytest.mark.asyncio - async def test_common_setup_deduplicates_plugin_skills(self, tmp_path): - """A plugin skill with the same id as a workspace skill is not appended twice.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - plugin_skills_root = tmp_path / "plugin_skills" - plugin_skills_root.mkdir() - (plugin_skills_root / "shared_skill").mkdir() - - adapter, config = self._make_test_adapter_and_config(tmp_path) - - mock_plugins = MagicMock() - mock_plugins.plugin_names = [] - mock_plugins.skill_dirs = [str(plugin_skills_root)] - mock_plugins.prompt_fragments = [] - mock_plugins.rules = [] - - fake_workspace_skill = MagicMock() - fake_workspace_skill.metadata.id = "shared_skill" - fake_workspace_skill.tools = [] - - fake_plugin_skill = MagicMock() - fake_plugin_skill.metadata.id = "shared_skill" - fake_plugin_skill.tools = [] - - def fake_load_skills(path, names): - if str(path) == str(plugin_skills_root): - return [fake_plugin_skill] - return [fake_workspace_skill] - - with patch("plugins.load_plugins", return_value=mock_plugins), \ - patch("skill_loader.loader.load_skills", side_effect=fake_load_skills), \ - patch("coordinator.get_children", return_value=[]), \ - patch("coordinator.get_parent_context", return_value=[]), \ - patch("coordinator.build_children_description", return_value=""), \ - patch("prompt.get_peer_capabilities", return_value=[]), \ - patch("prompt.build_system_prompt", return_value="system"): - result = await adapter._common_setup(config) - - ids = [s.metadata.id for s in result.loaded_skills] - assert ids.count("shared_skill") == 1 - - @pytest.mark.asyncio - async def test_common_setup_coordinator_prompt_appended(self, tmp_path): - """When is_coordinator=True, build_children_description output is added to extra_prompts.""" - from unittest.mock import patch, AsyncMock as _AsyncMock - - adapter, config = self._make_test_adapter_and_config(tmp_path) - - mock_plugins = MagicMock() - mock_plugins.plugin_names = [] - mock_plugins.skill_dirs = [] - mock_plugins.prompt_fragments = [] - mock_plugins.rules = [] - - children = [{"id": "child-1", "name": "Worker", "role": "Worker", "status": "online"}] - captured_extra_prompts = [] - - def fake_build_system_prompt(*args, **kwargs): - captured_extra_prompts.extend(kwargs.get("plugin_prompts", [])) - return "coordinator system prompt" - - fake_route_tool = MagicMock() - fake_route_tool.name = "route_task_to_team" - - with patch("plugins.load_plugins", return_value=mock_plugins), \ - patch("skill_loader.loader.load_skills", return_value=[]), \ - patch("coordinator.get_children", return_value=children), \ - patch("coordinator.get_parent_context", return_value=[]), \ - patch("coordinator.build_children_description", return_value="## Team\n- Worker"), \ - patch("coordinator.route_task_to_team", fake_route_tool), \ - patch("prompt.get_peer_capabilities", return_value=[]), \ - patch("prompt.build_system_prompt", side_effect=fake_build_system_prompt): - result = await adapter._common_setup(config) - - assert result.is_coordinator is True - assert "## Team\n- Worker" in captured_extra_prompts - - -# ============================================================================ -# BaseAdapter.get_config_schema() default implementation (line 73) -# ============================================================================ - -def test_base_adapter_default_get_config_schema(): - """The default get_config_schema() returns an empty dict.""" - from adapters.base import BaseAdapter, AdapterConfig - - # Create a minimal concrete subclass that does NOT override get_config_schema - class MinimalAdapter(BaseAdapter): - @staticmethod - def name(): return "minimal" - @staticmethod - def display_name(): return "Minimal" - @staticmethod - def description(): return "Minimal test adapter" - async def setup(self, config): pass - async def create_executor(self, config): pass - - schema = MinimalAdapter.get_config_schema() - assert schema == {} - - -# ============================================================================ -# CrewAI _langchain_to_crewai wrapper body (lines 28-29) -# ============================================================================ - -def test_langchain_to_crewai_wrapper_invokes_tool(monkeypatch): - """The sync wrapper returned by _langchain_to_crewai calls lc_tool.ainvoke.""" - from types import ModuleType - from unittest.mock import AsyncMock, MagicMock - - # Ensure crewai is mocked so _langchain_to_crewai can import crewai.tools - if "crewai" not in sys.modules or sys.modules.get("crewai") is None: - crewai_mod = ModuleType("crewai") - crewai_tools_mod = ModuleType("crewai.tools") - # @tool decorator: returns the function unchanged - crewai_tools_mod.tool = lambda name: (lambda f: f) - crewai_mod.tools = crewai_tools_mod - crewai_mod.__version__ = "0.0.0-mock" - monkeypatch.setitem(sys.modules, "crewai", crewai_mod) - monkeypatch.setitem(sys.modules, "crewai.tools", crewai_tools_mod) - - mock_tool = MagicMock() - mock_tool.name = "calc_tool" - mock_tool.description = "A calculator tool." - mock_tool.ainvoke = AsyncMock(return_value="42") - - from adapters.crewai.adapter import _langchain_to_crewai - - wrapped = _langchain_to_crewai(mock_tool) - # The crewai @tool mock returns the raw wrapper function unchanged, - # so 'wrapped' IS the inner wrapper() — call it synchronously. - result = wrapped(x=6, y=7) - - mock_tool.ainvoke.assert_called_once_with({"x": 6, "y": 7}) - assert result == "42" - - -# ============================================================================ -# Openclaw execute() output parsing (lines 214-227, 231-232) -# ============================================================================ - -class TestOpenClawExecuteOutputParsing: - """Cover the subprocess output parsing branches in OpenClawA2AExecutor.execute().""" - - def _make_proc(self, returncode, stdout_bytes, stderr_bytes=b""): - proc = MagicMock() - proc.returncode = returncode - proc.kill = MagicMock() - async def communicate(): - return stdout_bytes, stderr_bytes - proc.communicate = communicate - return proc - - @pytest.mark.asyncio - async def test_execute_json_output_with_payloads(self, monkeypatch): - """Lines 216-221: returncode=0, valid JSON with payloads list.""" - import asyncio as _asyncio - import json as _json - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - context.task_id = "t-1" - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "hello") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - monkeypatch.setattr("adapters.openclaw.adapter.brief_task", lambda t: t) - - payload_json = _json.dumps({"result": {"payloads": [{"text": "great answer"}]}}).encode() - proc = self._make_proc(0, payload_json) - - async def fake_create_subprocess_exec(*args, **kwargs): - return proc - - monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create_subprocess_exec) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - # reply should be the text from payloads - from a2a.utils import new_agent_text_message - assert events[0] == new_agent_text_message("great answer") - - @pytest.mark.asyncio - async def test_execute_json_output_no_payloads(self, monkeypatch): - """Lines 222-223: returncode=0, valid JSON but empty payloads.""" - import asyncio as _asyncio - import json as _json - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - context.task_id = "t-1" - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "hi") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - monkeypatch.setattr("adapters.openclaw.adapter.brief_task", lambda t: t) - - data = {"result": {"payloads": []}, "status": "ok"} - proc = self._make_proc(0, _json.dumps(data).encode()) - - async def fake_create_subprocess_exec(*args, **kwargs): - return proc - - monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create_subprocess_exec) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - # reply is str(data) - assert str(data) in str(events[0]) - - @pytest.mark.asyncio - async def test_execute_non_json_output(self, monkeypatch): - """Line 224-225: returncode=0, output is not valid JSON.""" - import asyncio as _asyncio - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - context.task_id = "t-1" - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "hi") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - monkeypatch.setattr("adapters.openclaw.adapter.brief_task", lambda t: t) - - proc = self._make_proc(0, b"plain text output, not json") - - async def fake_create_subprocess_exec(*args, **kwargs): - return proc - - monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create_subprocess_exec) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - assert "plain text output, not json" in str(events[0]) - - @pytest.mark.asyncio - async def test_execute_nonzero_returncode_with_stderr(self, monkeypatch): - """Line 227: returncode!=0, includes stderr in reply.""" - import asyncio as _asyncio - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - context.task_id = "t-1" - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "hi") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - monkeypatch.setattr("adapters.openclaw.adapter.brief_task", lambda t: t) - - proc = self._make_proc(1, b"", b"some error message") - - async def fake_create_subprocess_exec(*args, **kwargs): - return proc - - monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create_subprocess_exec) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - assert "OpenClaw error" in str(events[0]) - - @pytest.mark.asyncio - async def test_execute_generic_exception(self, monkeypatch): - """Lines 231-232: generic Exception (not TimeoutError) is caught.""" - import asyncio as _asyncio - from adapters.openclaw.adapter import OpenClawA2AExecutor - - executor = OpenClawA2AExecutor() - events = [] - event_queue = MagicMock() - event_queue.enqueue_event = AsyncMock(side_effect=events.append) - context = MagicMock() - context.task_id = "t-1" - - monkeypatch.setattr("adapters.openclaw.adapter.extract_message_text", lambda ctx: "hi") - monkeypatch.setattr("adapters.openclaw.adapter.set_current_task", AsyncMock()) - monkeypatch.setattr("adapters.openclaw.adapter.brief_task", lambda t: t) - - async def fake_create_subprocess_exec(*args, **kwargs): - raise RuntimeError("unexpected failure") - - monkeypatch.setattr(_asyncio, "create_subprocess_exec", fake_create_subprocess_exec) - - await executor.execute(context, event_queue) - - event_queue.enqueue_event.assert_awaited_once() - assert "OpenClaw error" in str(events[0]) - assert "unexpected failure" in str(events[0]) - - -# ============================================================================ -# adapters/__init__.py: get_adapter() success path (line 41) -# ============================================================================ - -def test_get_adapter_valid_runtime_returns_class(): - """get_adapter() returns the adapter class when runtime is valid (line 41).""" - from adapters import get_adapter - from adapters.base import BaseAdapter - - # "langgraph" should always be available - cls = get_adapter("langgraph") - assert issubclass(cls, BaseAdapter) - assert cls.name() == "langgraph" diff --git a/workspace-template/tests/test_cli_executor.py b/workspace-template/tests/test_cli_executor.py deleted file mode 100644 index 72b2128a..00000000 --- a/workspace-template/tests/test_cli_executor.py +++ /dev/null @@ -1,1084 +0,0 @@ -"""Tests for cli_executor.py — CLI-based agent executor.""" - -import asyncio -import json -import os -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from config import RuntimeConfig -from cli_executor import CLIAgentExecutor -from executor_helpers import brief_summary as _brief_summary - - -def _make_executor( - runtime="codex", - runtime_config=None, - system_prompt="You are a helpful agent.", - heartbeat=None, - config_path="/configs", -): - """Build a CLIAgentExecutor with mocked externals.""" - if runtime_config is None: - runtime_config = RuntimeConfig() - with patch("shutil.which", return_value="/usr/bin/claude"): - executor = CLIAgentExecutor( - runtime=runtime, - runtime_config=runtime_config, - system_prompt=system_prompt, - heartbeat=heartbeat, - config_path=config_path, - ) - return executor - - -def _make_context(text_parts, context_id="ctx-test"): - """Helper to build a mock RequestContext.""" - parts = [] - for t in text_parts: - p = MagicMock() - p.text = t - parts.append(p) - context = MagicMock() - context.message.parts = parts - context.context_id = context_id - return context - - -def _make_event_queue(): - """Helper to build a mock EventQueue with async enqueue_event.""" - return AsyncMock() - - -# ---------- _build_command tests ---------- - - -def test_build_command_codex_defaults(): - """Verify codex preset produces the expected flags.""" - executor = _make_executor() - cmd = executor._build_command("Hello world") - - assert cmd[0] == "codex" - assert "--print" in cmd - assert "--dangerously-skip-permissions" in cmd - # No --output-format json anymore — that was a dead claude-code branch. - assert "--output-format" not in cmd - # Prompt flag and message at the end - assert "-p" in cmd - idx = cmd.index("-p") - assert cmd[idx + 1] == "Hello world" - - -def test_cli_executor_rejects_claude_code_runtime(): - """Claude-code is served by ClaudeSDKExecutor — CLI path must refuse it.""" - from cli_executor import CLIAgentExecutor - with pytest.raises(ValueError, match="ClaudeSDKExecutor"): - CLIAgentExecutor( - runtime="claude-code", - runtime_config=RuntimeConfig(), - ) - - -# classify_subprocess_error / sanitize_agent_error tests moved to -# test_executor_helpers.py — the function lives in executor_helpers now. - - -def test_build_command_model_flag(): - """Verify --model flag is included when model is set.""" - rc = RuntimeConfig(model="opus") - executor = _make_executor(runtime_config=rc) - - cmd = executor._build_command("Test") - - assert "--model" in cmd - idx = cmd.index("--model") - assert cmd[idx + 1] == "opus" - - -def test_build_command_no_model_flag_when_empty(): - """Verify --model flag is NOT included when model is empty.""" - rc = RuntimeConfig(model="") - executor = _make_executor(runtime_config=rc) - - cmd = executor._build_command("Test") - - assert "--model" not in cmd - - -def test_system_prompt_included_every_call(): - """System prompt is injected on every call now that the CLI executor - no longer tracks session state.""" - executor = _make_executor(system_prompt="Be helpful") - cmd_first = executor._build_command("First message") - cmd_second = executor._build_command("Second message") - assert "--system-prompt" in cmd_first - assert "--system-prompt" in cmd_second - - -# ---------- execute tests ---------- - - -@pytest.mark.asyncio -async def test_set_current_task_on_execute(): - """Heartbeat is updated with the task summary, then cleared.""" - heartbeat = MagicMock() - heartbeat.current_task = "" - heartbeat.active_tasks = 0 - - executor = _make_executor(heartbeat=heartbeat) - executor._run_cli = AsyncMock() - - task_values = [] - - async def tracking_set(hb, task): - task_values.append(task) - if hb: - hb.current_task = task - hb.active_tasks = 1 if task else 0 - - part = MagicMock() - part.text = "Build the feature" - context = MagicMock() - context.message.parts = [part] - eq = _make_event_queue() - - with patch("cli_executor.set_current_task", new=tracking_set), \ - patch("cli_executor.read_delegation_results", return_value=""), \ - patch("cli_executor.recall_memories", new=AsyncMock(return_value="")), \ - patch("cli_executor.commit_memory", new=AsyncMock()): - await executor.execute(context, eq) - - assert len(task_values) == 2 - assert task_values[0] != "" # brief summary set at start - assert task_values[1] == "" # cleared at end - - -@pytest.mark.asyncio -async def test_empty_message_rejected(): - """Verify empty message returns error event.""" - executor = _make_executor() - - # Part with no text - part = MagicMock(spec=[]) # no .text attribute - - context = MagicMock() - context.message.parts = [part] - eq = _make_event_queue() - - await executor.execute(context, eq) - - eq.enqueue_event.assert_called_once() - event_arg = eq.enqueue_event.call_args[0][0] - assert "Error" in str(event_arg) or "no text" in str(event_arg) - - -# ---------- _brief_summary tests ---------- - - -def test_brief_summary_simple(): - """Simple single-line text is returned as-is.""" - assert _brief_summary("Hello world") == "Hello world" - - -def test_brief_summary_truncation(): - """Long text is truncated with ellipsis.""" - long_text = "A" * 100 - result = _brief_summary(long_text, max_len=80) - assert len(result) == 80 - assert result.endswith("...") - - -def test_brief_summary_strips_markdown_headers(): - """Markdown headers (# ## ###) are stripped.""" - assert _brief_summary("## Build the feature") == "Build the feature" - assert _brief_summary("### Deploy to prod") == "Deploy to prod" - - -def test_brief_summary_skips_empty_and_code_fences(): - """Empty lines and code fence markers are skipped; first non-skippable line returned.""" - text = "\n\n```python\nActual summary" - assert _brief_summary(text) == "Actual summary" - - -def test_brief_summary_strips_bold_italic(): - """Markdown bold/italic markers are removed.""" - assert _brief_summary("**Important** task") == "Important task" - assert _brief_summary("__Urgent__ fix") == "Urgent fix" - - -def test_brief_summary_skips_hr(): - """Horizontal rules (---) are skipped.""" - text = "---\nReal content" - assert _brief_summary(text) == "Real content" - - -def test_brief_summary_fallback(): - """When all lines are empty/fences/hr, falls back to truncated raw text.""" - text = "\n\n---\n```\n" - result = _brief_summary(text) - # All lines are skippable, so falls back to text[:max_len] - assert result == text[:80] - - -# ---------- _run_cli tests ---------- - - -@pytest.mark.asyncio -async def test_run_cli_success(): - """Successful CLI execution enqueues the raw stdout as the response.""" - executor = _make_executor() - - mock_proc = AsyncMock() - mock_proc.communicate = AsyncMock(return_value=(b"Done!", b"")) - mock_proc.returncode = 0 - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=mock_proc): - await executor._run_cli("Do something", eq) - - eq.enqueue_event.assert_called_once() - event_arg = eq.enqueue_event.call_args[0][0] - assert "Done!" in str(event_arg) - - -@pytest.mark.asyncio -async def test_run_cli_timeout(): - """CLI timeout enqueues a timeout error message.""" - rc = RuntimeConfig(timeout=10) - executor = _make_executor(runtime_config=rc) - - mock_proc = AsyncMock() - mock_proc.communicate = AsyncMock(side_effect=asyncio.TimeoutError()) - mock_proc.kill = AsyncMock() - mock_proc.wait = AsyncMock() - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=mock_proc): - with patch("asyncio.wait_for", side_effect=asyncio.TimeoutError()): - await executor._run_cli("Slow task", eq) - - eq.enqueue_event.assert_called_once() - event_arg = eq.enqueue_event.call_args[0][0] - assert "timeout" in str(event_arg) - - -@pytest.mark.asyncio -async def test_run_cli_extra_args(): - """Extra args from RuntimeConfig are included in the command.""" - rc = RuntimeConfig(args=["--verbose", "--no-cache"]) - executor = _make_executor(runtime_config=rc) - - cmd = executor._build_command("Test task") - assert "--verbose" in cmd - assert "--no-cache" in cmd - - -# ---------- constructor / preset tests ---------- - - -def test_unknown_runtime_raises(): - """Unknown runtime raises ValueError.""" - with patch("shutil.which", return_value="/usr/bin/whatever"): - with pytest.raises(ValueError, match="Unknown runtime"): - CLIAgentExecutor( - runtime="totally-unknown", - runtime_config=RuntimeConfig(), - ) - - -def test_custom_runtime_preset(): - """custom runtime builds preset from RuntimeConfig.command.""" - rc = RuntimeConfig(command="myagent") - with patch("shutil.which", return_value="/usr/bin/myagent"): - executor = CLIAgentExecutor(runtime="custom", runtime_config=rc) - assert executor.preset["command"] == "myagent" - assert executor.preset["prompt_flag"] == "-p" - - -# ---------- _resolve_auth_token from file (lines 173-179) ---------- - - -def test_resolve_auth_token_from_file(tmp_path): - """Auth token is read from file when the token file exists.""" - token_file = tmp_path / ".my-token" - token_file.write_text("file-secret-token\n") - - rc = RuntimeConfig(auth_token_file=".my-token") - with patch("shutil.which", return_value="/usr/bin/claude"): - with patch.dict(os.environ, {}, clear=False): - # Ensure no env var override - os.environ.pop("CLAUDE_CODE_OAUTH_TOKEN", None) - executor = CLIAgentExecutor( - runtime="codex", - runtime_config=rc, - config_path=str(tmp_path), - ) - assert executor._auth_token == "file-secret-token" - - -def test_resolve_auth_token_from_preset_default_file(tmp_path): - """Auth token from the preset's default_auth_file when config file_name is empty.""" - # Use a test runtime with a default_auth_file preset entry so we don't - # depend on the (now-removed) claude-code preset. - from cli_executor import RUNTIME_PRESETS - RUNTIME_PRESETS["filetoken-test"] = { - "command": "fakecli", - "base_args": [], - "prompt_flag": "-p", - "model_flag": None, - "system_prompt_flag": None, - "auth_pattern": None, - "default_auth_env": "", - "default_auth_file": ".auth-token", - } - try: - token_file = tmp_path / ".auth-token" - token_file.write_text("preset-default-token") - rc = RuntimeConfig() # no explicit auth_token_file - with patch("shutil.which", return_value="/usr/bin/fakecli"): - executor = CLIAgentExecutor( - runtime="filetoken-test", - runtime_config=rc, - config_path=str(tmp_path), - ) - assert executor._auth_token == "preset-default-token" - finally: - RUNTIME_PRESETS.pop("filetoken-test", None) - - -def test_resolve_auth_token_returns_none_when_no_file_and_no_env(tmp_path): - """Returns None when neither env var nor file is present.""" - rc = RuntimeConfig() - with patch("shutil.which", return_value="/usr/bin/claude"): - with patch.dict(os.environ, {}, clear=False): - os.environ.pop("CLAUDE_CODE_OAUTH_TOKEN", None) - executor = CLIAgentExecutor( - runtime="codex", - runtime_config=rc, - config_path=str(tmp_path), # no .auth-token file here - ) - assert executor._auth_token is None - - -# ---------- _create_auth_helper (lines 183-189) and line 139 ---------- - - -def test_create_auth_helper_creates_executable_script(tmp_path): - """_create_auth_helper creates a shell script that outputs the token.""" - rc = RuntimeConfig() - with patch("shutil.which", return_value="/usr/bin/claude"): - executor = CLIAgentExecutor( - runtime="codex", - runtime_config=rc, - config_path=str(tmp_path), - ) - - helper_path = executor._create_auth_helper("my-secret-token") - assert helper_path is not None - script_content = Path(helper_path).read_text() - assert "#!/bin/sh" in script_content - assert "my-secret-token" in script_content - # Cleanup - import os as _os - _os.unlink(helper_path) - - -def test_auth_helper_created_when_apiKeyHelper_pattern(tmp_path): - """_auth_helper_path is set when auth_pattern=apiKeyHelper and token present.""" - from cli_executor import RUNTIME_PRESETS - # We'll use a custom runtime and monkeypatch the preset - rc = RuntimeConfig(command="fakecli", auth_token_env="FAKE_API_KEY") - - with patch("shutil.which", return_value="/usr/bin/fakecli"): - with patch.dict(os.environ, {"FAKE_API_KEY": "test-api-key"}): - executor = CLIAgentExecutor( - runtime="custom", - runtime_config=rc, - config_path=str(tmp_path), - ) - # Patch the preset to apiKeyHelper pattern and call manually - executor.preset["auth_pattern"] = "apiKeyHelper" - executor._auth_token = "test-api-key" - helper_path = executor._create_auth_helper("test-api-key") - executor._auth_helper_path = helper_path - - assert executor._auth_helper_path is not None - content = Path(executor._auth_helper_path).read_text() - assert "test-api-key" in content - - -# A2A instructions tests moved to test_executor_helpers.py — the CLI executor -# now calls get_a2a_instructions() from the shared module directly in -# _build_command(), with no wrapper method of its own. - - -# Helper-method tests for _set_current_task, _recall_memories, _commit_memory -# moved to tests/test_executor_helpers.py — they exercise shared code in -# executor_helpers.py that both CLIAgentExecutor and ClaudeSDKExecutor call. - - -async def test_execute_injects_delegation_results_into_prompt(tmp_path): - """When delegation results are present, execute() prepends them to the prompt.""" - executor = _make_executor(config_path=str(tmp_path)) - ctx = _make_context(["Follow up question"]) - eq = _make_event_queue() - - captured = {} - - async def fake_run_cli(user_input, _event_queue): - captured["user_input"] = user_input - - with patch("cli_executor.read_delegation_results", - return_value="- [completed] Prior task done"), \ - patch("cli_executor.recall_memories", new=AsyncMock(return_value="")), \ - patch("cli_executor.set_current_task", new=AsyncMock()), \ - patch("cli_executor.commit_memory", new=AsyncMock()), \ - patch.object(executor, "_run_cli", side_effect=fake_run_cli): - await executor.execute(ctx, eq) - - assert "Delegation results received while you were idle" in captured["user_input"] - assert "Prior task done" in captured["user_input"] - assert "Follow up question" in captured["user_input"] - - -async def test_run_cli_timeout_kill_already_exited(): - """ProcessLookupError from kill() (proc already exited) is silently skipped.""" - executor = _make_executor( - runtime_config=RuntimeConfig(timeout=1), - ) - eq = _make_event_queue() - - proc = AsyncMock() - proc.communicate = AsyncMock(side_effect=asyncio.TimeoutError()) - proc.kill = MagicMock(side_effect=ProcessLookupError()) - proc.wait = AsyncMock() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - assert "timeout" in str(eq.enqueue_event.call_args[0][0]) - - -async def test_run_cli_env_pattern_propagates_auth_token(tmp_path): - """When auth_pattern=env, the auth token is injected into subprocess env.""" - rc = RuntimeConfig(auth_token_env="MY_TOKEN") - with patch("shutil.which", return_value="/usr/bin/claude"), \ - patch.dict(os.environ, {"MY_TOKEN": "secret-token-xyz"}, clear=False): - executor = CLIAgentExecutor( - runtime="codex", - runtime_config=rc, - config_path=str(tmp_path), - ) - - captured_env = {} - - async def fake_create_subprocess_exec(*args, **kwargs): - captured_env.update(kwargs.get("env") or {}) - proc = AsyncMock() - proc.returncode = 0 - proc.communicate = AsyncMock(return_value=(b'{"result": "ok"}', b"")) - return proc - - eq = _make_event_queue() - with patch("asyncio.create_subprocess_exec", side_effect=fake_create_subprocess_exec): - await executor._run_cli("hi", eq) - - assert captured_env.get("MY_TOKEN") == "secret-token-xyz" - - -async def test_run_cli_session_error_exhausts_all_retries(): - """Session errors retried until exhaustion then surface as error.""" - executor = _make_executor() - eq = _make_event_queue() - - proc = AsyncMock() - proc.returncode = 1 - proc.communicate = AsyncMock(return_value=(b"", b"no conversation found with that id")) - - with patch("asyncio.create_subprocess_exec", return_value=proc), \ - patch("asyncio.sleep", new=AsyncMock()): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - - -async def test_run_cli_timeout_kill_raises_generic_exception(): - """Kill raising non-ProcessLookupError is logged and swallowed.""" - executor = _make_executor( - runtime_config=RuntimeConfig(timeout=1), - ) - eq = _make_event_queue() - - proc = AsyncMock() - proc.communicate = AsyncMock(side_effect=asyncio.TimeoutError()) - proc.kill = MagicMock(side_effect=RuntimeError("kill refused")) - proc.wait = AsyncMock() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - assert "timeout" in str(eq.enqueue_event.call_args[0][0]) - - -async def test_run_cli_timeout_proc_wait_raises_generic_exception(): - """proc.wait() raising non-TimeoutError exception is logged and swallowed.""" - executor = _make_executor( - runtime_config=RuntimeConfig(timeout=1), - ) - eq = _make_event_queue() - - proc = AsyncMock() - proc.communicate = AsyncMock(side_effect=asyncio.TimeoutError()) - proc.kill = MagicMock() - proc.wait = AsyncMock(side_effect=RuntimeError("wait broken")) - - with patch("asyncio.create_subprocess_exec", return_value=proc): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - assert "timeout" in str(eq.enqueue_event.call_args[0][0]) - - -# ---------- _build_command ollama model positional arg (line 316) ---------- - - -def test_build_command_ollama_model_positional(tmp_path): - """Ollama runtime: model is appended positionally (no model_flag).""" - rc = RuntimeConfig(model="llama3") - with patch("shutil.which", return_value="/usr/bin/ollama"): - executor = CLIAgentExecutor( - runtime="ollama", - runtime_config=rc, - config_path=str(tmp_path), - ) - cmd = executor._build_command("Hello") - assert "llama3" in cmd - # Model should appear after "run" and before prompt - run_idx = cmd.index("run") - model_idx = cmd.index("llama3") - assert model_idx > run_idx - - -# ---------- _build_command apiKeyHelper auth settings (lines 331-332) ---------- - - -def test_build_command_includes_apiKeyHelper_settings(tmp_path): - """Command includes --settings with apiKeyHelper when auth_helper_path is set.""" - rc = RuntimeConfig(command="fakecli", auth_token_env="FAKE_KEY") - with patch("shutil.which", return_value="/usr/bin/fakecli"): - with patch.dict(os.environ, {"FAKE_KEY": "my-api-key"}): - executor = CLIAgentExecutor( - runtime="custom", - runtime_config=rc, - config_path=str(tmp_path), - ) - # Manually simulate apiKeyHelper auth pattern - executor.preset["auth_pattern"] = "apiKeyHelper" - executor._auth_helper_path = "/tmp/fake-helper.sh" - - cmd = executor._build_command("test message") - assert "--settings" in cmd - settings_idx = cmd.index("--settings") - settings_val = json.loads(cmd[settings_idx + 1]) - assert settings_val["apiKeyHelper"] == "/tmp/fake-helper.sh" - - -# ---------- _build_command positional prompt for ollama (line 351) ---------- - - -def test_build_command_ollama_positional_prompt(tmp_path): - """Ollama runtime: prompt is appended positionally (no prompt_flag).""" - rc = RuntimeConfig() - with patch("shutil.which", return_value="/usr/bin/ollama"): - executor = CLIAgentExecutor( - runtime="ollama", - runtime_config=rc, - config_path=str(tmp_path), - ) - cmd = executor._build_command("my ollama prompt") - # prompt_flag is None, so prompt goes at end positionally - assert cmd[-1] == "my ollama prompt" - assert "-p" not in cmd - - -# Session-id-from-JSON test removed: the claude-code runtime used to emit -# --output-format json and the CLI executor parsed it. Now claude-code goes -# through ClaudeSDKExecutor, so the CLI executor no longer JSON-parses stdout. - - -# ---------- _run_cli: rate limit retry (lines 442-443, 468-474) ---------- - - -async def test_run_cli_rate_limit_retry_then_success(): - """Rate limit on stderr triggers retry; second attempt succeeds.""" - executor = _make_executor() - - call_count = 0 - - async def mock_communicate(): - nonlocal call_count - call_count += 1 - if call_count == 1: - return (b"", b"rate limit exceeded (429)") - return (b'{"result": "Success after retry"}', b"") - - mock_proc = AsyncMock() - mock_proc.returncode = 1 - mock_proc.communicate = mock_communicate - - # Second call proc has returncode 0 - mock_proc2 = AsyncMock() - mock_proc2.communicate = AsyncMock( - return_value=(b'{"result": "Success after retry"}', b"") - ) - mock_proc2.returncode = 0 - - procs = iter([mock_proc, mock_proc2]) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", side_effect=lambda *a, **kw: next(procs)): - with patch("asyncio.sleep", new_callable=AsyncMock): - await executor._run_cli("Do task", eq) - - assert eq.enqueue_event.call_count >= 1 - - -async def test_run_cli_rate_limit_in_stderr_retries(): - """Rate limit keyword in stderr causes retry with backoff.""" - executor = _make_executor() - - attempts = [] - - proc_fail = AsyncMock() - proc_fail.returncode = 1 - proc_fail.communicate = AsyncMock(return_value=(b"", b"overloaded")) - - proc_ok = AsyncMock() - proc_ok.returncode = 0 - proc_ok.communicate = AsyncMock(return_value=(b'{"result": "ok"}', b"")) - - call_iter = iter([proc_fail, proc_ok]) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", side_effect=lambda *a, **kw: next(call_iter)): - with patch("asyncio.sleep", new_callable=AsyncMock): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - - -# ---------- _run_cli: auth error clears session and retries (line 364+) ---------- - - -async def test_run_cli_auth_error_retries(): - """Auth error in stderr triggers retry.""" - executor = _make_executor() - - proc_auth_err = AsyncMock() - proc_auth_err.returncode = 1 - proc_auth_err.communicate = AsyncMock( - return_value=(b"", b"authentication error: invalid X-Api-Key") - ) - - proc_ok = AsyncMock() - proc_ok.returncode = 0 - proc_ok.communicate = AsyncMock(return_value=(b"retried ok", b"")) - - call_iter = iter([proc_auth_err, proc_ok]) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", side_effect=lambda *a, **kw: next(call_iter)): - with patch("asyncio.sleep", new_callable=AsyncMock): - await executor._run_cli("task", eq) - - assert eq.enqueue_event.call_count >= 1 - - -# ---------- _run_cli: empty result all retries exhausted (lines 455-464) ---------- - - -async def test_run_cli_empty_result_all_retries_returns_no_response(): - """When all retries return empty stdout, enqueue 'no response' message.""" - executor = _make_executor() - - proc = AsyncMock() - proc.returncode = 0 - proc.communicate = AsyncMock(return_value=(b"", b"")) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - with patch("asyncio.sleep", new_callable=AsyncMock): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - event_text = str(eq.enqueue_event.call_args[0][0]) - assert "no response" in event_text - - -async def test_run_cli_empty_result_on_intermediate_attempt_retries(): - """Empty stdout on first attempt triggers retry before giving up.""" - executor = _make_executor() - - call_count = 0 - - async def varying_communicate(): - nonlocal call_count - call_count += 1 - if call_count < 3: - return (b"", b"") - return (b"finally got one", b"") - - proc = AsyncMock() - proc.returncode = 0 - proc.communicate = varying_communicate - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - with patch("asyncio.sleep", new_callable=AsyncMock): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - assert "finally got one" in str(eq.enqueue_event.call_args[0][0]) - assert "finally got one" in str(eq.enqueue_event.call_args[0][0]) - - -# ---------- _run_cli: timeout with proc.kill raising (lines 497-498) ---------- - - -async def test_run_cli_timeout_proc_kill_raises(): - """Timeout handler swallows exception from proc.kill().""" - rc = RuntimeConfig(timeout=5) - executor = _make_executor(runtime_config=rc) - - mock_proc = AsyncMock() - mock_proc.kill = MagicMock(side_effect=OSError("no such process")) - mock_proc.wait = AsyncMock() - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=mock_proc): - with patch("asyncio.wait_for", side_effect=asyncio.TimeoutError()): - await executor._run_cli("slow task", eq) - - eq.enqueue_event.assert_called_once() - assert "timeout" in str(eq.enqueue_event.call_args[0][0]) - - -async def test_run_cli_timeout_calls_proc_wait_to_reap_zombie(): - """On timeout, proc.kill() is followed by proc.wait() to reap the zombie process.""" - rc = RuntimeConfig(timeout=5) - executor = _make_executor(runtime_config=rc) - - mock_proc = AsyncMock() - mock_proc.kill = MagicMock() - mock_proc.wait = AsyncMock() - - eq = _make_event_queue() - - # First wait_for call (for proc.communicate) raises TimeoutError - # Second wait_for call (for proc.wait inside the timeout handler) succeeds - call_count = {"n": 0} - original_wait_for = asyncio.wait_for - - async def patched_wait_for(coro, timeout): - call_count["n"] += 1 - if call_count["n"] == 1: - # Cancel the coro to avoid resource warning, then raise - try: - coro.close() - except Exception: - pass - raise asyncio.TimeoutError() - # Subsequent calls (for proc.wait reap) succeed immediately - try: - await coro - except Exception: - pass - - with patch("asyncio.create_subprocess_exec", return_value=mock_proc): - with patch("asyncio.wait_for", side_effect=patched_wait_for): - await executor._run_cli("slow task", eq) - - # Verify proc.kill was called - mock_proc.kill.assert_called_once() - # Verify proc.wait was called (to reap the zombie) - mock_proc.wait.assert_called() - # And we got the timeout message - eq.enqueue_event.assert_called_once() - assert "timeout" in str(eq.enqueue_event.call_args[0][0]) - - -async def test_run_cli_timeout_proc_wait_also_times_out(): - """If proc.wait() also times out (truly stuck), we still send the timeout message.""" - rc = RuntimeConfig(timeout=5) - executor = _make_executor(runtime_config=rc) - - mock_proc = AsyncMock() - mock_proc.kill = MagicMock() - mock_proc.wait = AsyncMock() - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=mock_proc): - with patch("asyncio.wait_for", side_effect=asyncio.TimeoutError()): - await executor._run_cli("slow task", eq) - - # Even though both wait_for calls timed out, we still emit the timeout event - eq.enqueue_event.assert_called_once() - assert "timeout" in str(eq.enqueue_event.call_args[0][0]) - # And we still tried to kill - mock_proc.kill.assert_called_once() - - -# ---------- _run_cli: non-zero exit with no stderr (line 466) ---------- - - -async def test_run_cli_nonzero_exit_no_stderr_uses_exit_code(): - """Non-zero exit with no stderr falls back to 'Exit code N' message.""" - executor = _make_executor(runtime="ollama") - - proc = AsyncMock() - proc.returncode = 2 - proc.communicate = AsyncMock(return_value=(b"", b"")) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - await executor._run_cli("task", eq) - - event_text = str(eq.enqueue_event.call_args[0][0]) - assert "Exit code" in event_text or "Agent error" in event_text - - -# ---------- _run_cli: generic exception (lines 503-508) ---------- - - -async def test_run_cli_generic_exception_enqueues_error(): - """Unexpected exception from subprocess is caught and enqueued as error.""" - executor = _make_executor() - eq = _make_event_queue() - - with patch( - "asyncio.create_subprocess_exec", - side_effect=RuntimeError("fork failed"), - ): - await executor._run_cli("task", eq) - - eq.enqueue_event.assert_called_once() - assert "Agent error" in str(eq.enqueue_event.call_args[0][0]) - - -# ---------- _run_cli: non-JSON output for claude-code runtime ---------- - - -async def test_run_cli_non_json_output_used_raw(): - """claude-code runtime: non-JSON stdout is passed through as-is.""" - executor = _make_executor() - - proc = AsyncMock() - proc.returncode = 0 - proc.communicate = AsyncMock(return_value=(b"plain text output", b"")) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - await executor._run_cli("task", eq) - - event_text = str(eq.enqueue_event.call_args[0][0]) - assert "plain text output" in event_text - - -# ---------- execute: memory injection path ---------- - - -async def test_execute_injects_memories_into_prompt(tmp_path): - """Memories are prepended to the prompt when returned.""" - executor = _make_executor(config_path=str(tmp_path)) - - captured_inputs = [] - - async def capture_run_cli(user_input, event_queue): - captured_inputs.append(user_input) - await event_queue.enqueue_event(MagicMock()) - - executor._run_cli = capture_run_cli - - with patch("cli_executor.recall_memories", - new=AsyncMock(return_value="- [LOCAL] remember this")), \ - patch("cli_executor.commit_memory", new=AsyncMock()), \ - patch("cli_executor.set_current_task", new=AsyncMock()), \ - patch("cli_executor.read_delegation_results", return_value=""): - context = _make_context(["Do the task"]) - eq = _make_event_queue() - await executor.execute(context, eq) - - assert len(captured_inputs) == 1 - assert "Prior context from memory" in captured_inputs[0] - assert "remember this" in captured_inputs[0] - assert "Do the task" in captured_inputs[0] - - -# ---------- execute: no memories, no injection ---------- - - -async def test_execute_no_memories_no_injection(tmp_path): - """No memories = prompt passed through unchanged.""" - executor = _make_executor(config_path=str(tmp_path)) - - captured_inputs = [] - - async def capture_run_cli(user_input, event_queue): - captured_inputs.append(user_input) - - executor._run_cli = capture_run_cli - - with patch("cli_executor.recall_memories", new=AsyncMock(return_value="")), \ - patch("cli_executor.commit_memory", new=AsyncMock()), \ - patch("cli_executor.set_current_task", new=AsyncMock()), \ - patch("cli_executor.read_delegation_results", return_value=""): - context = _make_context(["Clean task without memories"]) - eq = _make_event_queue() - await executor.execute(context, eq) - - assert captured_inputs[0] == "Clean task without memories" - - -# ---------- line 139: _create_auth_helper called in __init__ ---------- - - -def test_init_creates_auth_helper_when_apiKeyHelper_pattern(tmp_path): - """Executor calls _create_auth_helper in __init__ when preset has apiKeyHelper pattern.""" - from cli_executor import RUNTIME_PRESETS - - api_key_preset = { - "command": "fakecli", - "base_args": [], - "prompt_flag": "-p", - "model_flag": None, - "system_prompt_flag": None, - "auth_pattern": "apiKeyHelper", - "default_auth_env": "FAKE_API_KEY", - "default_auth_file": "", - } - - original = dict(RUNTIME_PRESETS) - RUNTIME_PRESETS["api-key-test"] = api_key_preset - try: - rc = RuntimeConfig() - with patch("shutil.which", return_value="/usr/bin/fakecli"): - with patch.dict(os.environ, {"FAKE_API_KEY": "secret-key-value"}): - executor = CLIAgentExecutor( - runtime="api-key-test", - runtime_config=rc, - config_path=str(tmp_path), - ) - assert executor._auth_helper_path is not None - content = Path(executor._auth_helper_path).read_text() - assert "secret-key-value" in content - finally: - RUNTIME_PRESETS.clear() - RUNTIME_PRESETS.update(original) - - -# ---------- line 161: warning when command not found in PATH ---------- - - -def test_init_warns_when_command_not_found(tmp_path, caplog): - """CLIAgentExecutor logs a warning when CLI command not found in PATH.""" - import logging - rc = RuntimeConfig() - with patch("shutil.which", return_value=None): - with caplog.at_level(logging.WARNING, logger="cli_executor"): - CLIAgentExecutor( - runtime="codex", - runtime_config=rc, - config_path=str(tmp_path), - ) - assert any("not found" in msg for msg in caplog.messages) - - -# ---------- lines 233-234: heartbeat updated in _set_current_task ---------- - - -# Heartbeat-update tests for set_current_task moved to test_executor_helpers.py — -# the CLI executor now calls set_current_task() directly from the shared module -# with no wrapper of its own. - - -# ---------- line 296: _get_system_prompt reads from file ---------- - - -# get_system_prompt tests moved to test_executor_helpers.py — the CLI executor -# now calls the shared helper directly with no wrapper. - - -# ---------- line 364: auth error retries exhaust → enqueues error ---------- - - -async def test_run_cli_auth_error_exhausts_all_retries(): - """Auth error on every attempt eventually enqueues error (all retries spent).""" - executor = _make_executor() - - proc = AsyncMock() - proc.returncode = 1 - proc.communicate = AsyncMock( - return_value=(b"", b"authentication error: invalid api_key") - ) - - eq = _make_event_queue() - - with patch("asyncio.create_subprocess_exec", return_value=proc): - with patch("asyncio.sleep", new_callable=AsyncMock): - await executor._run_cli("task", eq) - - assert eq.enqueue_event.call_count == 1 - event_text = str(eq.enqueue_event.call_args[0][0]) - assert "Agent error" in event_text - - -# ---------- line 364: execute() handles part.root.text ---------- - - -async def test_execute_uses_root_text_when_no_direct_text(tmp_path): - """execute() extracts text from part.root.text when part.text is absent.""" - executor = _make_executor(config_path=str(tmp_path)) - - captured_inputs = [] - - async def capture_run_cli(user_input, event_queue): - captured_inputs.append(user_input) - - executor._run_cli = capture_run_cli - - # Build a part that has no .text but has .root.text - part = MagicMock(spec=["root"]) - part.root = MagicMock() - part.root.text = "text from root attribute" - - context = MagicMock() - context.message.parts = [part] - eq = _make_event_queue() - with patch("cli_executor.recall_memories", new=AsyncMock(return_value="")), \ - patch("cli_executor.commit_memory", new=AsyncMock()), \ - patch("cli_executor.set_current_task", new=AsyncMock()), \ - patch("cli_executor.read_delegation_results", return_value=""): - await executor.execute(context, eq) - - assert len(captured_inputs) == 1 - assert "text from root attribute" in captured_inputs[0] - - -# Delegation results tests moved to tests/test_executor_helpers.py — the -# function now lives in executor_helpers.read_delegation_results() and is -# shared by both executors. diff --git a/workspace-template/tests/test_common_setup.py b/workspace-template/tests/test_common_setup.py deleted file mode 100644 index 9a6e55a4..00000000 --- a/workspace-template/tests/test_common_setup.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Tests for the shared _common_setup() pipeline and tool conversion helpers.""" - -import importlib.util -import sys -from types import ModuleType -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - - -# --- Mock missing optional deps --- - -def _ensure_crewai_mock(): - if "crewai" not in sys.modules: - crewai_mod = ModuleType("crewai") - crewai_tools_mod = ModuleType("crewai.tools") - # Make @tool a passthrough decorator that preserves the function - crewai_tools_mod.tool = lambda name: (lambda f: f) - crewai_mod.tools = crewai_tools_mod - crewai_mod.__version__ = "0.0.0-mock" - sys.modules["crewai"] = crewai_mod - sys.modules["crewai.tools"] = crewai_tools_mod - - -def _ensure_autogen_mock(): - if "autogen_agentchat" not in sys.modules: - mod = ModuleType("autogen_agentchat") - agents_mod = ModuleType("autogen_agentchat.agents") - agents_mod.AssistantAgent = MagicMock - mod.agents = agents_mod - sys.modules["autogen_agentchat"] = mod - sys.modules["autogen_agentchat.agents"] = agents_mod - - -_ensure_crewai_mock() -_ensure_autogen_mock() - - -# --- Mock helpers --- - -def _mock_load_plugins(**kwargs): - plugins = MagicMock() - plugins.plugin_names = [] - plugins.skill_dirs = [] - plugins.prompt_fragments = [] - plugins.rules = [] - return plugins - - -def _mock_load_skills(config_path, tools): - return [] - - -async def _mock_get_children(): - return [] - - -async def _mock_get_children_with_kids(): - return [{"id": "child-1", "name": "Child", "role": "Worker", "status": "online"}] - - -async def _mock_get_parent_context(): - return [] - - -async def _mock_get_peer_capabilities(platform_url, workspace_id): - return [{"id": "peer-1", "name": "Peer", "status": "online", "agent_card": {"skills": []}}] - - -def _mock_build_system_prompt(*args, **kwargs): - return "You are a test agent." - - -def _mock_build_children_description(children): - return "## Team\n- Child: Worker" - - -# All patches needed for _common_setup -_SETUP_PATCHES = { - "plugins.load_plugins": _mock_load_plugins, - "skill_loader.loader.load_skills": _mock_load_skills, - "coordinator.get_children": _mock_get_children, - "coordinator.get_parent_context": _mock_get_parent_context, - "coordinator.build_children_description": _mock_build_children_description, - "prompt.get_peer_capabilities": _mock_get_peer_capabilities, - "prompt.build_system_prompt": _mock_build_system_prompt, -} - - -def _make_test_adapter(): - from adapters.base import BaseAdapter, AdapterConfig - - class TestAdapter(BaseAdapter): - @staticmethod - def name(): return "test" - @staticmethod - def display_name(): return "Test" - @staticmethod - def description(): return "Test adapter" - async def setup(self, config): pass - async def create_executor(self, config): pass - - return TestAdapter(), AdapterConfig(model="openai:test", config_path="/tmp", workspace_id="ws-test") - - -# --- Common Setup Tests --- - -@pytest.mark.asyncio -async def test_common_setup_returns_core_tools(): - """_common_setup returns 5 core tools.""" - adapter, config = _make_test_adapter() - - patches = {k: v for k, v in _SETUP_PATCHES.items()} - with patch.dict("os.environ", {"PLATFORM_URL": "http://test:8080"}): - ctx = [patch(k, v) for k, v in patches.items()] - for c in ctx: - c.start() - try: - result = await adapter._common_setup(config) - finally: - for c in ctx: - c.stop() - - assert len(result.langchain_tools) == 6 # 6 core tools - tool_names = [t.name for t in result.langchain_tools] - assert "delegate_to_workspace" in tool_names - assert "check_delegation_status" in tool_names - assert "request_approval" in tool_names - assert "commit_memory" in tool_names - assert "search_memory" in tool_names - assert "run_code" in tool_names - assert result.system_prompt == "You are a test agent." - assert result.is_coordinator is False - - -@pytest.mark.asyncio -async def test_common_setup_coordinator_adds_routing_tool(): - """When workspace has children, coordinator tool is added.""" - adapter, config = _make_test_adapter() - - patches = {k: v for k, v in _SETUP_PATCHES.items()} - patches["coordinator.get_children"] = _mock_get_children_with_kids - - with patch.dict("os.environ", {"PLATFORM_URL": "http://test:8080"}): - ctx = [patch(k, v) for k, v in patches.items()] - for c in ctx: - c.start() - try: - result = await adapter._common_setup(config) - finally: - for c in ctx: - c.stop() - - assert result.is_coordinator is True - assert len(result.langchain_tools) == 7 # 6 core + route_task_to_team - # Last tool should be route_task_to_team (function name or .name attribute) - last_tool = result.langchain_tools[-1] - tool_id = getattr(last_tool, "name", None) or getattr(last_tool, "__name__", "") - assert "route_task_to_team" in tool_id - - -# --- Tool Conversion Tests --- - -def test_langchain_to_crewai_preserves_name(): - """CrewAI wrapper preserves tool name and description.""" - from adapters.crewai.adapter import _langchain_to_crewai - - mock_tool = MagicMock() - mock_tool.name = "test_tool" - mock_tool.description = "A test tool for testing." - mock_tool.ainvoke = AsyncMock(return_value={"result": "ok"}) - - wrapped = _langchain_to_crewai(mock_tool) - # With our mock @tool decorator, the wrapper is the raw function - assert wrapped.__doc__ == "A test tool for testing." - - -@pytest.mark.skipif( - not importlib.util.find_spec("autogen_core"), - reason="autogen_core not installed", -) -def test_langchain_to_autogen_preserves_name(): - """AutoGen wrapper preserves tool name and description via FunctionTool.""" - from adapters.autogen.adapter import _langchain_to_autogen - - mock_tool = MagicMock() - mock_tool.name = "test_tool" - mock_tool.description = "A test tool for testing." - mock_tool.ainvoke = AsyncMock(return_value={"result": "ok"}) - - wrapped = _langchain_to_autogen(mock_tool) - assert wrapped.name == "test_tool" - assert wrapped.description == "A test tool for testing." - - -@pytest.mark.skipif( - not importlib.util.find_spec("autogen_core"), - reason="autogen_core not installed", -) -@pytest.mark.asyncio -async def test_langchain_to_autogen_calls_ainvoke(): - """AutoGen FunctionTool wrapper calls the original tool's ainvoke.""" - from adapters.autogen.adapter import _langchain_to_autogen - - mock_tool = MagicMock() - mock_tool.name = "delegate" - mock_tool.description = "Delegate a task." - mock_tool.ainvoke = AsyncMock(return_value={"success": True}) - - wrapped = _langchain_to_autogen(mock_tool) - # FunctionTool.run_json expects a JSON dict with the function params - result = await wrapped.run_json({"input": '{"workspace_id": "ws-1", "task": "do stuff"}'}, cancellation_token=None) - mock_tool.ainvoke.assert_called_once_with({"workspace_id": "ws-1", "task": "do stuff"}) - assert "True" in str(result) diff --git a/workspace-template/tests/test_hermes_escalation.py b/workspace-template/tests/test_hermes_escalation.py deleted file mode 100644 index e7deb430..00000000 --- a/workspace-template/tests/test_hermes_escalation.py +++ /dev/null @@ -1,146 +0,0 @@ -"""Tests for Hermes escalation-ladder classification and config parsing. - -The truth table in ``should_escalate`` is the single chokepoint that -decides whether an inference failure wastes the next ladder rung's -quota or triggers a useful retry. These tests pin that table against -real exception shapes from anthropic / openai / google-genai SDKs and -the wrapped-error strings we've observed in platform logs. -""" -from __future__ import annotations - -import sys -from pathlib import Path - -import pytest - -# Make the workspace-template/ modules importable without installing. -sys.path.insert(0, str(Path(__file__).resolve().parents[1])) - -from adapters.hermes.escalation import ( # noqa: E402 - LadderRung, - parse_ladder, - should_escalate, -) - - -# -------------------------------------------------------------------------- -# parse_ladder -# -------------------------------------------------------------------------- - -def test_parse_ladder_empty_returns_empty(): - assert parse_ladder(None) == [] - assert parse_ladder([]) == [] - - -def test_parse_ladder_accepts_dicts(): - raw = [ - {"provider": "gemini", "model": "gemini-2.5-flash"}, - {"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, - ] - rungs = parse_ladder(raw) - assert len(rungs) == 2 - assert rungs[0] == LadderRung("gemini", "gemini-2.5-flash") - assert rungs[1] == LadderRung("anthropic", "claude-opus-4-1-20250805") - - -def test_parse_ladder_passes_through_rung_instances(): - # Programmatic callers can pass already-constructed rungs. - existing = LadderRung("openai", "gpt-4o-mini") - rungs = parse_ladder([existing]) - assert rungs == [existing] - - -def test_parse_ladder_skips_malformed_entries(): - # Missing model / missing provider / wrong type — all skipped with - # a warning, not raised. A missing rung is less bad than a boot fail. - raw = [ - {"provider": "gemini"}, # no model - {"model": "gpt-4o"}, # no provider - "not a dict", # wrong type - {"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, # good - ] - rungs = parse_ladder(raw) - assert len(rungs) == 1 - assert rungs[0].provider == "anthropic" - - -# -------------------------------------------------------------------------- -# should_escalate — truth table -# -------------------------------------------------------------------------- - -class _FakeRateLimitError(Exception): - """Stand-in with the same class name the openai SDK uses (rate limits).""" - pass -_FakeRateLimitError.__name__ = "RateLimitError" - - -class _FakeOverloadedError(Exception): - """Stand-in for anthropic.OverloadedError (HTTP 529).""" - pass -_FakeOverloadedError.__name__ = "OverloadedError" - - -class _FakeAPITimeoutError(Exception): - pass -_FakeAPITimeoutError.__name__ = "APITimeoutError" - - -class _FakeAPIConnectionError(Exception): - pass -_FakeAPIConnectionError.__name__ = "APIConnectionError" - - -class _FakeInternalServerError(Exception): - pass -_FakeInternalServerError.__name__ = "InternalServerError" - - -@pytest.mark.parametrize("exc,expected", [ - # --- Escalatable: typed rate-limit / overload / timeout classes --- - (_FakeRateLimitError("rate_limit_exceeded on gpt-4o"), True), - (_FakeOverloadedError("overloaded_error"), True), - (_FakeAPITimeoutError("Request timed out."), True), - (_FakeAPIConnectionError("Connection error."), True), - (_FakeInternalServerError("Internal server error 500."), True), - - # --- Escalatable: context-length exceeded on current model --- - (ValueError("This model's maximum context length is 200000 tokens. However, your messages resulted in ..."), True), - (RuntimeError("error: context_length_exceeded"), True), - (RuntimeError("prompt is too long: 210000 tokens"), True), - (RuntimeError("error.type: prompt_too_long"), True), - (RuntimeError("exceeds model context window of 1048576"), True), - - # --- Escalatable: gateway markers (HTTP-wrapped) --- - (RuntimeError("Upstream 502 Bad Gateway"), True), - (RuntimeError("503 Service Unavailable"), True), - (RuntimeError("Service is temporarily unavailable, please try again."), True), - (RuntimeError("Anthropic API is overloaded."), True), - - # --- Escalatable: status-code substrings --- - (RuntimeError("HTTP 429 Too Many Requests"), True), - (RuntimeError("HTTP 529 Overloaded"), True), - - # --- NOT escalatable: auth / permission (config bugs, wasting quota) --- - (RuntimeError("401 Unauthorized — invalid api key"), False), - (RuntimeError("403 Forbidden: permission_denied"), False), - (RuntimeError("authentication_error: invalid_api_key"), False), - - # --- NOT escalatable: auth-wrapped rate-limit (priority = hard-reject auth) --- - # If we see '401' + rate-limit markers simultaneously, prefer not escalating - # because the underlying 401 won't get better on a different model. - (_FakeRateLimitError("RateLimitError wrapping 401 Unauthorized"), False), - - # --- NOT escalatable: unrelated errors --- - (ValueError("bad config"), False), - (KeyError("missing key"), False), - (None, False), -]) -def test_should_escalate_truth_table(exc, expected): - assert should_escalate(exc) is expected - - -def test_should_escalate_case_insensitive(): - # We lowercase the message before substring matching so "OVERLOADED" - # from one provider and "overloaded" from another both match. - assert should_escalate(RuntimeError("SERVICE OVERLOADED")) is True - assert should_escalate(RuntimeError("503 SERVICE UNAVAILABLE")) is True diff --git a/workspace-template/tests/test_hermes_ladder_integration.py b/workspace-template/tests/test_hermes_ladder_integration.py deleted file mode 100644 index a7128e63..00000000 --- a/workspace-template/tests/test_hermes_ladder_integration.py +++ /dev/null @@ -1,160 +0,0 @@ -"""Integration-ish tests for the Hermes executor's escalation behaviour. - -These tests exercise ``_do_inference`` against a mocked ``_dispatch`` -to prove that: -- No-ladder path is a single call (original behaviour) -- Ladder path retries on escalatable errors -- Ladder path stops early on non-escalatable errors -- Ladder path raises the last error when every rung fails -- Successful rung logs the recovery and returns - -No network calls, no provider SDKs. If this ever starts calling real -providers, that's a test-isolation regression worth flagging. -""" -from __future__ import annotations - -import asyncio -import sys -from pathlib import Path - -import pytest - -sys.path.insert(0, str(Path(__file__).resolve().parents[1])) - -from adapters.hermes.escalation import LadderRung # noqa: E402 -from adapters.hermes.executor import HermesA2AExecutor # noqa: E402 -from adapters.hermes.providers import PROVIDERS # noqa: E402 - - -class _FakeRateLimitError(Exception): - pass -_FakeRateLimitError.__name__ = "RateLimitError" - - -def _make_executor(monkeypatch, dispatch_behaviour, ladder=None): - """Build an executor with a mocked ``_dispatch``. - - ``dispatch_behaviour`` is a callable that receives (cfg, model, user_msg, - history, system_prompt) and returns a string OR raises. Use this to - simulate success / failure per rung. - """ - cfg = PROVIDERS["anthropic"] - ex = HermesA2AExecutor( - provider_cfg=cfg, - api_key="test-key", - model="claude-haiku-4-5-20251001", - escalation_ladder=ladder, - ) - - calls: list[tuple[str, str]] = [] - - async def fake_dispatch(cfg, model, user_msg, history, system_prompt): - calls.append((cfg.name, model)) - result = dispatch_behaviour(cfg.name, model, user_msg, history, system_prompt) - if isinstance(result, BaseException): - raise result - return result - - monkeypatch.setattr(ex, "_dispatch", fake_dispatch) - return ex, calls - - -def _run(coro): - return asyncio.get_event_loop().run_until_complete(coro) if not asyncio._get_running_loop() else asyncio.run(coro) - - -def test_no_ladder_single_call(monkeypatch): - ex, calls = _make_executor(monkeypatch, lambda *_: "hello", ladder=None) - reply = asyncio.run(ex._do_inference("test")) - assert reply == "hello" - assert calls == [("anthropic", "claude-haiku-4-5-20251001")] - - -def test_ladder_not_triggered_on_success(monkeypatch): - # Ladder configured, but first attempt succeeds — ladder never engaged. - ladder = [ - {"provider": "openai", "model": "gpt-4o-mini"}, - {"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, - ] - ex, calls = _make_executor(monkeypatch, lambda *_: "fast reply", ladder=ladder) - reply = asyncio.run(ex._do_inference("test")) - assert reply == "fast reply" - assert len(calls) == 1 - assert calls[0] == ("anthropic", "claude-haiku-4-5-20251001") # pinned (haiku) wins - - -def test_ladder_escalates_on_rate_limit(monkeypatch): - # First rung rate-limits, second rung (opus) succeeds. - attempt = {"n": 0} - - def behaviour(provider, model, *_): - attempt["n"] += 1 - if attempt["n"] == 1: - return _FakeRateLimitError("429 rate_limit_exceeded on anthropic") - return f"escalated reply from {provider}:{model}" - - ladder = [ - {"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, - ] - ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder) - reply = asyncio.run(ex._do_inference("test")) - assert "escalated reply" in reply - # Two attempts: pinned haiku (failed), then opus (succeeded). - assert [model for _, model in calls] == [ - "claude-haiku-4-5-20251001", - "claude-opus-4-1-20250805", - ] - - -def test_ladder_stops_on_non_escalatable_error(monkeypatch): - # First rung returns a 401 — ladder should NOT retry, should raise. - def behaviour(*_): - return RuntimeError("401 Unauthorized invalid api key") - - ladder = [{"provider": "anthropic", "model": "claude-opus-4-1-20250805"}] - ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder) - - with pytest.raises(RuntimeError, match="401"): - asyncio.run(ex._do_inference("test")) - - # Only one attempt — non-escalatable error stopped the walk. - assert len(calls) == 1 - - -def test_ladder_raises_last_error_when_all_rungs_fail(monkeypatch): - def behaviour(*_): - return _FakeRateLimitError("429 across the board") - - ladder = [ - {"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, - ] - ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder) - - with pytest.raises(_FakeRateLimitError): - asyncio.run(ex._do_inference("test")) - - # Both rungs attempted (pinned + one from ladder). - assert len(calls) == 2 - - -def test_ladder_skips_unknown_provider(monkeypatch): - # A misconfigured rung with a non-existent provider is logged + skipped; - # ladder still walks remaining rungs. - def behaviour(provider, *_): - if provider == "anthropic": - return _FakeRateLimitError("first rung rate limit") - return f"ok from {provider}" - - ladder = [ - {"provider": "totally_made_up", "model": "fake-1"}, # should be skipped - {"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, - ] - ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder) - - # First attempt uses the pinned (haiku) which raises, then skips - # totally_made_up, then reaches opus. Because behaviour returns ok for - # provider==anthropic, the opus rung also fails (same provider). Assert - # the skip happened (call count reflects 2 real attempts, not 3). - with pytest.raises(_FakeRateLimitError): - asyncio.run(ex._do_inference("test")) - assert len(calls) == 2 # pinned + opus (totally_made_up skipped) diff --git a/workspace-template/tests/test_hermes_phase2_dispatch.py b/workspace-template/tests/test_hermes_phase2_dispatch.py deleted file mode 100644 index e5e47f08..00000000 --- a/workspace-template/tests/test_hermes_phase2_dispatch.py +++ /dev/null @@ -1,487 +0,0 @@ -"""Tests for Phase 2 auth_scheme dispatch in adapters/hermes/executor.py. - -These cover the NEW behavior only (HermesA2AExecutor._do_inference dispatch -based on ProviderConfig.auth_scheme). Phase 1 registry tests live in -test_hermes_providers.py — unchanged by Phase 2. -""" - -from __future__ import annotations - -import sys -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -# Load providers.py + escalation.py directly (same pattern as -# test_hermes_providers.py). The escalation module landed with the -# ladder work — it's now imported by executor.py, so the inline-exec -# pattern below has to find both modules at top level. -_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes" -sys.path.insert(0, str(_HERMES_DIR)) -import providers # type: ignore # noqa: E402 -import escalation # type: ignore # noqa: E402 - - -def _make_executor(provider_name: str): - """Build a HermesA2AExecutor directly without going through create_executor. - - We import executor lazily inside the function because the module-level - import chain (``from .providers import ...``) uses a relative import that - only resolves when loaded as part of the ``adapters.hermes`` package. - The test loads it via direct sys.path manipulation, which bypasses the - package loader, so we import providers-as-sibling and then reconstruct - the executor with the same shape. - """ - # We can't import executor.py directly due to the relative-import head, - # so instantiate the executor class by replaying its definition inline. - # Simpler: test the dispatch logic via providers.PROVIDERS + the public - # resolve helpers, plus a mock for the inference methods. - cfg = providers.PROVIDERS[provider_name] - # Reach into executor via sys.path trick - import importlib.util - spec = importlib.util.spec_from_file_location( - "hermes_executor_under_test", - _HERMES_DIR / "executor.py", - ) - # The executor module has a relative import `from .providers import ...` - # which fails under direct spec_from_file_location. Monkey-patch sys.modules - # so the relative import resolves to our directly-loaded providers module. - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - # Also alias the package-style import path so `from .providers import X` - # and `from .escalation import X` inside executor.py find them. - pkg_name = "hermes_executor_under_test" - sys.modules.setdefault(pkg_name, MagicMock()) - sys.modules[pkg_name].providers = providers # type: ignore - sys.modules[pkg_name].escalation = escalation # type: ignore - # Read + compile executor.py with relative imports rewritten to match - # the sibling-import setup above. - src = (_HERMES_DIR / "executor.py").read_text() - src = src.replace("from .providers import", "from providers import") - src = src.replace("from .escalation import", "from escalation import") - # The exec'd module needs `__name__` in its globals because executor.py - # calls ``logging.getLogger(__name__)`` at import time. Without this the - # exec fails with `KeyError: "'__name__' not in globals"`. - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - HermesA2AExecutor = ns["HermesA2AExecutor"] - return HermesA2AExecutor( - provider_cfg=cfg, - api_key="test-key", - model=cfg.default_model, - ) - - -def test_anthropic_entry_has_anthropic_scheme(): - """Phase 2a: anthropic's auth_scheme is 'anthropic'.""" - cfg = providers.PROVIDERS["anthropic"] - assert cfg.auth_scheme == "anthropic" - - -def test_gemini_entry_has_gemini_scheme(): - """Phase 2b: gemini's auth_scheme is 'gemini'.""" - cfg = providers.PROVIDERS["gemini"] - assert cfg.auth_scheme == "gemini" - # Base URL no longer has the /v1beta/openai suffix — native SDK uses bare host. - assert "/openai" not in cfg.base_url - assert cfg.base_url.startswith("https://generativelanguage.googleapis.com") - - -def test_all_other_providers_still_openai_scheme(): - """Phase 2 changes only anthropic + gemini. Every other provider keeps auth_scheme='openai'.""" - native_providers = {"anthropic", "gemini"} - for name, cfg in providers.PROVIDERS.items(): - if name in native_providers: - continue - assert cfg.auth_scheme == "openai", ( - f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}" - ) - - -@pytest.mark.asyncio -async def test_dispatch_openai_scheme_calls_openai_compat(): - """auth_scheme='openai' → _do_openai_compat runs, native paths do not.""" - executor = _make_executor("openai") - executor._do_openai_compat = AsyncMock(return_value="openai-result") - executor._do_anthropic_native = AsyncMock(return_value="should-not-run") - executor._do_gemini_native = AsyncMock(return_value="should-not-run") - - result = await executor._do_inference("hello") - - # Phase 2c: _do_inference passes (user_message, history) to the path; - # when no history supplied, second arg is None. - executor._do_openai_compat.assert_awaited_once_with("hello", None, None) - executor._do_anthropic_native.assert_not_awaited() - executor._do_gemini_native.assert_not_awaited() - assert result == "openai-result" - - -@pytest.mark.asyncio -async def test_dispatch_anthropic_scheme_calls_anthropic_native(): - """auth_scheme='anthropic' → _do_anthropic_native runs, others do not.""" - executor = _make_executor("anthropic") - executor._do_openai_compat = AsyncMock(return_value="should-not-run") - executor._do_anthropic_native = AsyncMock(return_value="anthropic-result") - executor._do_gemini_native = AsyncMock(return_value="should-not-run") - - result = await executor._do_inference("hello") - - executor._do_anthropic_native.assert_awaited_once_with("hello", None, None) - executor._do_openai_compat.assert_not_awaited() - executor._do_gemini_native.assert_not_awaited() - assert result == "anthropic-result" - - -@pytest.mark.asyncio -async def test_dispatch_gemini_scheme_calls_gemini_native(): - """auth_scheme='gemini' → _do_gemini_native runs, others do not. Phase 2b.""" - executor = _make_executor("gemini") - executor._do_openai_compat = AsyncMock(return_value="should-not-run") - executor._do_anthropic_native = AsyncMock(return_value="should-not-run") - executor._do_gemini_native = AsyncMock(return_value="gemini-result") - - result = await executor._do_inference("hello") - - executor._do_gemini_native.assert_awaited_once_with("hello", None, None) - executor._do_openai_compat.assert_not_awaited() - executor._do_anthropic_native.assert_not_awaited() - assert result == "gemini-result" - - -# --------------------------------------------------------------------------- -# Phase 2c — history-to-message conversion tests -# --------------------------------------------------------------------------- - - -def test_history_to_openai_messages_empty_history(): - """No history → single user message (back-compat with pre-2c single-turn shape).""" - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - HermesA2AExecutor = ns["HermesA2AExecutor"] - - msgs = HermesA2AExecutor._history_to_openai_messages("current turn", []) - assert msgs == [{"role": "user", "content": "current turn"}] - - -def test_history_to_openai_messages_multi_turn(): - """A2A history roles map: human→user, ai→assistant. Current turn appended as user.""" - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - HermesA2AExecutor = ns["HermesA2AExecutor"] - - history = [("human", "first question"), ("ai", "first answer"), ("human", "follow-up")] - msgs = HermesA2AExecutor._history_to_openai_messages("current turn", history) - assert msgs == [ - {"role": "user", "content": "first question"}, - {"role": "assistant", "content": "first answer"}, - {"role": "user", "content": "follow-up"}, - {"role": "user", "content": "current turn"}, - ] - - -def test_history_to_anthropic_messages_same_as_openai(): - """Anthropic Messages API uses the same wire shape as OpenAI for text-only turns.""" - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - HermesA2AExecutor = ns["HermesA2AExecutor"] - - history = [("human", "hello"), ("ai", "hi")] - openai_msgs = HermesA2AExecutor._history_to_openai_messages("how are you?", history) - anth_msgs = HermesA2AExecutor._history_to_anthropic_messages("how are you?", history) - assert openai_msgs == anth_msgs - - -def test_history_to_gemini_contents_uses_model_role_and_parts_wrapper(): - """Gemini uses role='user'|'model' (NOT 'assistant') and wraps text in parts=[{text}].""" - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - HermesA2AExecutor = ns["HermesA2AExecutor"] - - history = [("human", "hi"), ("ai", "hello back")] - contents = HermesA2AExecutor._history_to_gemini_contents("follow-up?", history) - assert contents == [ - {"role": "user", "parts": [{"text": "hi"}]}, - {"role": "model", "parts": [{"text": "hello back"}]}, - {"role": "user", "parts": [{"text": "follow-up?"}]}, - ] - - -@pytest.mark.asyncio -async def test_dispatch_passes_history_through(): - """When _do_inference is called with history, it flows through to the provider path.""" - executor = _make_executor("anthropic") - executor._do_anthropic_native = AsyncMock(return_value="reply-with-history") - executor._do_openai_compat = AsyncMock() - executor._do_gemini_native = AsyncMock() - - history = [("human", "prior q"), ("ai", "prior a")] - result = await executor._do_inference("current", history) - - executor._do_anthropic_native.assert_awaited_once_with("current", history, None) - assert result == "reply-with-history" - - -# --------------------------------------------------------------------------- -# Phase 2d-i — system_prompt dispatch tests -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_dispatch_passes_system_prompt_to_anthropic(): - """system_prompt flows through _do_inference → _do_anthropic_native as third arg.""" - executor = _make_executor("anthropic") - executor._do_anthropic_native = AsyncMock(return_value="reply") - executor._do_openai_compat = AsyncMock() - executor._do_gemini_native = AsyncMock() - - await executor._do_inference("user msg", None, "you are a helpful assistant") - executor._do_anthropic_native.assert_awaited_once_with( - "user msg", None, "you are a helpful assistant" - ) - - -@pytest.mark.asyncio -async def test_dispatch_passes_system_prompt_to_gemini(): - """system_prompt flows through _do_inference → _do_gemini_native as third arg.""" - executor = _make_executor("gemini") - executor._do_gemini_native = AsyncMock(return_value="reply") - executor._do_openai_compat = AsyncMock() - executor._do_anthropic_native = AsyncMock() - - await executor._do_inference("user msg", None, "system instruction") - executor._do_gemini_native.assert_awaited_once_with( - "user msg", None, "system instruction" - ) - - -@pytest.mark.asyncio -async def test_dispatch_passes_system_prompt_to_openai(): - """system_prompt flows through _do_inference → _do_openai_compat as third arg.""" - executor = _make_executor("openai") - executor._do_openai_compat = AsyncMock(return_value="reply") - executor._do_anthropic_native = AsyncMock() - executor._do_gemini_native = AsyncMock() - - await executor._do_inference("user msg", None, "system prompt") - executor._do_openai_compat.assert_awaited_once_with( - "user msg", None, "system prompt" - ) - - -def test_executor_accepts_config_path_kwarg(): - """HermesA2AExecutor.__init__ accepts config_path and stores it on _config_path.""" - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - HermesA2AExecutor = ns["HermesA2AExecutor"] - cfg = providers.PROVIDERS["openai"] - - # Without config_path — default None - e1 = HermesA2AExecutor(provider_cfg=cfg, api_key="k", model="m") - assert e1._config_path is None - - # With config_path - e2 = HermesA2AExecutor( - provider_cfg=cfg, api_key="k", model="m", config_path="/configs" - ) - assert e2._config_path == "/configs" - - -def test_create_executor_forwards_config_path(): - """create_executor(config_path=...) → executor._config_path gets set. - - Exercises both the hermes_api_key back-compat path AND the registry - resolution path to make sure config_path threads through both. - """ - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - create_executor = ns["create_executor"] - - # Path 1: hermes_api_key - e1 = create_executor(hermes_api_key="k", config_path="/path/a") - assert e1._config_path == "/path/a" - - # Path 2: registry resolution - import os - os.environ["OPENAI_API_KEY"] = "openai-test" - try: - e2 = create_executor(provider="openai", config_path="/path/b") - assert e2._config_path == "/path/b" - finally: - os.environ.pop("OPENAI_API_KEY", None) - - -@pytest.mark.asyncio -async def test_dispatch_unknown_scheme_falls_back_to_openai_compat(): - """Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat).""" - executor = _make_executor("openai") - # Mutate the cfg field to simulate an unknown scheme (testing the dispatch, not the registry) - executor.provider_cfg = providers.ProviderConfig( - name="futureprovider", - env_vars=("FOO",), - base_url="https://example.com/v1", - default_model="foo", - auth_scheme="some_future_scheme", - ) - executor._do_openai_compat = AsyncMock(return_value="fallback-result") - executor._do_anthropic_native = AsyncMock() - executor._do_gemini_native = AsyncMock() - - result = await executor._do_inference("hello") - - executor._do_openai_compat.assert_awaited_once() - executor._do_anthropic_native.assert_not_awaited() - executor._do_gemini_native.assert_not_awaited() - assert result == "fallback-result" - - -@pytest.mark.asyncio -async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch): - """If the anthropic package is not installed, _do_anthropic_native raises - a clear RuntimeError with install instructions — it does NOT silently - fall back to the OpenAI-compat shim (which would lose tool-calling + - vision fidelity invisibly). - """ - executor = _make_executor("anthropic") - - # Simulate ImportError on `import anthropic`. We do this by clobbering - # the name in sys.modules so the import statement inside - # _do_anthropic_native hits an ImportError. - monkeypatch.setitem(sys.modules, "anthropic", None) - - with pytest.raises(RuntimeError, match="anthropic"): - await executor._do_anthropic_native("hello") - - -@pytest.mark.asyncio -async def test_gemini_native_raises_clear_error_when_sdk_missing(monkeypatch): - """If the google-genai package is not installed, _do_gemini_native raises - a clear RuntimeError with install instructions — same fail-loud semantics - as the anthropic native path.""" - executor = _make_executor("gemini") - - # Simulate ImportError on `from google import genai`. Clobbering - # sys.modules["google"] forces the submodule import to fail. - monkeypatch.setitem(sys.modules, "google", None) - - with pytest.raises(RuntimeError, match="google-genai"): - await executor._do_gemini_native("hello") - - -def test_create_executor_passes_provider_cfg(): - """create_executor's back-compat paths should set .provider_cfg on the - returned executor so dispatch has auth_scheme available at runtime.""" - # Direct-load executor module same way _make_executor does - import importlib.util - src = (_HERMES_DIR / "executor.py").read_text().replace( - "from .providers import", "from providers import" - ).replace( - "from .escalation import", "from escalation import" - ) - # `__name__` needed because executor.py does logging.getLogger(__name__) - # at import time. `escalation` + `providers` must also be importable - # at the top level — the caller handles sys.path for that. - sys.modules.setdefault("hermes_executor_under_test", MagicMock()) - sys.modules["hermes_executor_under_test.providers"] = providers - sys.modules["hermes_executor_under_test.escalation"] = escalation - ns: dict = {"__name__": "hermes_executor_under_test"} - exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns) - create_executor = ns["create_executor"] - - # Path 1: hermes_api_key back-compat → nous_portal cfg - exec1 = create_executor(hermes_api_key="test-key") - assert exec1.provider_cfg.name == "nous_portal" - assert exec1.provider_cfg.auth_scheme == "openai" - - # Path 2: explicit provider name → that cfg (anthropic has the new scheme) - import os - os.environ["ANTHROPIC_API_KEY"] = "ant-test" - try: - exec2 = create_executor(provider="anthropic") - assert exec2.provider_cfg.name == "anthropic" - assert exec2.provider_cfg.auth_scheme == "anthropic" - assert exec2.model == "claude-sonnet-4-5" - finally: - os.environ.pop("ANTHROPIC_API_KEY", None) - - # Path 3: Phase 2b — gemini explicit resolution - os.environ["GEMINI_API_KEY"] = "gem-test" - try: - exec3 = create_executor(provider="gemini") - assert exec3.provider_cfg.name == "gemini" - assert exec3.provider_cfg.auth_scheme == "gemini" - assert exec3.model == "gemini-2.5-flash" - finally: - os.environ.pop("GEMINI_API_KEY", None) diff --git a/workspace-template/tests/test_hermes_providers.py b/workspace-template/tests/test_hermes_providers.py deleted file mode 100644 index e8be47c6..00000000 --- a/workspace-template/tests/test_hermes_providers.py +++ /dev/null @@ -1,182 +0,0 @@ -"""Tests for workspace-template/adapters/hermes/providers.py. - -These tests exercise resolve_provider() in isolation — they do not import -anything from adapters/__init__.py so they don't need the a2a runtime deps. -""" - -from __future__ import annotations - -import importlib -import os -import sys -from pathlib import Path - -import pytest - -# Make the hermes package importable without pulling in adapters/__init__.py -# (which imports the a2a SDK). We load providers.py directly from its file path. -_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes" -sys.path.insert(0, str(_HERMES_DIR)) -import providers # type: ignore # noqa: E402 - - -_ALL_PROVIDER_ENV_VARS = ( - "HERMES_API_KEY", - "NOUS_API_KEY", - "OPENROUTER_API_KEY", - "OPENAI_API_KEY", - "ANTHROPIC_API_KEY", - "XAI_API_KEY", - "GROK_API_KEY", - "GEMINI_API_KEY", - "GOOGLE_API_KEY", - "QWEN_API_KEY", - "DASHSCOPE_API_KEY", - "GLM_API_KEY", - "ZHIPU_API_KEY", - "KIMI_API_KEY", - "MOONSHOT_API_KEY", - "MINIMAX_API_KEY", - "DEEPSEEK_API_KEY", - "GROQ_API_KEY", - "TOGETHER_API_KEY", - "FIREWORKS_API_KEY", - "MISTRAL_API_KEY", -) - - -@pytest.fixture(autouse=True) -def _clean_env(): - """Clear every provider env var before each test and restore to the - exact pre-test state on teardown. - - Implementation note: earlier version used pytest's monkeypatch fixture, - which tracks deltas from the state at fixture entry. That was buggy - because several tests in this file mutate os.environ directly - (os.environ["HERMES_API_KEY"] = ...), bypassing monkeypatch's - tracking. The direct mutations leaked into the NEXT test file - (test_hermes_smoke.py::test_create_executor_raises_without_keys), - causing a file-order-dependent failure. Pure snapshot/restore - avoids all the delta-tracking edge cases. - """ - saved = {k: os.environ.get(k) for k in _ALL_PROVIDER_ENV_VARS} - for k in _ALL_PROVIDER_ENV_VARS: - os.environ.pop(k, None) - try: - yield - finally: - for k, v in saved.items(): - if v is None: - os.environ.pop(k, None) - else: - os.environ[k] = v - - -def test_registry_is_populated(): - """Phase 1 ships at least 15 providers and every entry is self-consistent.""" - assert len(providers.PROVIDERS) >= 15 - assert len(providers.RESOLUTION_ORDER) == len(providers.PROVIDERS) - for name, cfg in providers.PROVIDERS.items(): - assert cfg.name == name, f"{name}: config.name should match dict key" - assert cfg.env_vars, f"{name}: must declare at least one env var" - assert cfg.base_url.startswith("http"), f"{name}: base_url must be http(s)" - assert cfg.default_model, f"{name}: must declare a default model" - assert name in providers.RESOLUTION_ORDER, f"{name}: missing from resolution order" - - -def test_resolution_order_has_no_duplicates(): - assert len(providers.RESOLUTION_ORDER) == len(set(providers.RESOLUTION_ORDER)) - - -def test_backcompat_hermes_api_key_first(): - """PR 2 back-compat — HERMES_API_KEY auto-detect still routes to Nous Portal.""" - os.environ["HERMES_API_KEY"] = "hermes-test-key" - cfg, key = providers.resolve_provider() - assert cfg.name == "nous_portal" - assert key == "hermes-test-key" - - -def test_backcompat_openrouter_api_key_second(): - """PR 2 back-compat — OPENROUTER_API_KEY still routes to OpenRouter when HERMES_API_KEY is absent.""" - os.environ["OPENROUTER_API_KEY"] = "or-test-key" - cfg, key = providers.resolve_provider() - assert cfg.name == "openrouter" - - -def test_auto_detect_openai(): - os.environ["OPENAI_API_KEY"] = "sk-test" - cfg, key = providers.resolve_provider() - assert cfg.name == "openai" - assert cfg.base_url == "https://api.openai.com/v1" - - -def test_auto_detect_anthropic(): - os.environ["ANTHROPIC_API_KEY"] = "ant-test" - cfg, key = providers.resolve_provider() - assert cfg.name == "anthropic" - - -@pytest.mark.parametrize( - "env_var,expected", - [ - ("XAI_API_KEY", "xai"), - ("GROK_API_KEY", "xai"), - ("QWEN_API_KEY", "qwen"), - ("DASHSCOPE_API_KEY", "qwen"), - ("GLM_API_KEY", "glm"), - ("ZHIPU_API_KEY", "glm"), - ("KIMI_API_KEY", "kimi"), - ("MOONSHOT_API_KEY", "kimi"), - ("GROQ_API_KEY", "groq"), - ("DEEPSEEK_API_KEY", "deepseek"), - ("MISTRAL_API_KEY", "mistral"), - ("TOGETHER_API_KEY", "together"), - ("FIREWORKS_API_KEY", "fireworks"), - ("MINIMAX_API_KEY", "minimax"), - ("GEMINI_API_KEY", "gemini"), - ("GOOGLE_API_KEY", "gemini"), - ], -) -def test_every_provider_env_var_resolves(env_var, expected): - """Every env var listed in PROVIDERS resolves to the right provider - — this guards against typos in the registry dict.""" - os.environ[env_var] = "test-key" - cfg, _ = providers.resolve_provider() - assert cfg.name == expected, ( - f"{env_var} should route to {expected}, got {cfg.name}" - ) - - -def test_explicit_provider_wins_over_auto_detect(): - """When `provider=` is given, auto-detect is bypassed.""" - os.environ["HERMES_API_KEY"] = "hermes-key" # would auto-detect - os.environ["OPENAI_API_KEY"] = "openai-key" - cfg, key = providers.resolve_provider("openai") - assert cfg.name == "openai" - assert key == "openai-key" - - -def test_unknown_provider_raises(): - with pytest.raises(ValueError, match="Unknown Hermes provider"): - providers.resolve_provider("this_provider_does_not_exist") - - -def test_explicit_provider_with_missing_env_raises(): - """If the operator asks for a specific provider but its env var is empty, - we raise — we do NOT fall back to auto-detect because that would be - surprising ("why is my openai config talking to anthropic?").""" - os.environ["HERMES_API_KEY"] = "some-value" # auto-detect would succeed - with pytest.raises(ValueError, match="no env var set"): - providers.resolve_provider("anthropic") - - -def test_auto_detect_with_no_env_lists_all_options(): - """The error message should list every env var the caller could set, - so operators don't have to read the source.""" - # No env vars set (autouse fixture clears them all) - with pytest.raises(ValueError) as exc_info: - providers.resolve_provider() - msg = str(exc_info.value) - # Spot-check: the message names at least a few providers - for env_var in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "QWEN_API_KEY"): - assert env_var in msg, f"error message should mention {env_var}" diff --git a/workspace-template/tests/test_hermes_smoke.py b/workspace-template/tests/test_hermes_smoke.py deleted file mode 100644 index fed10a92..00000000 --- a/workspace-template/tests/test_hermes_smoke.py +++ /dev/null @@ -1,84 +0,0 @@ -"""Smoke tests for adapters.hermes.create_executor(). - -Verifies key resolution order and ValueError on missing keys. -No real network calls are made — the executor object is just instantiated. -""" -import os -import pytest -from unittest.mock import patch - -from adapters.hermes import create_executor - - -def test_create_executor_with_param(): - """create_executor() works when key passed directly as param.""" - executor = create_executor(hermes_api_key="test-key-direct") - assert executor is not None - - -def test_create_executor_with_hermes_env(): - """create_executor() works when HERMES_API_KEY env var is set.""" - with patch.dict(os.environ, {"HERMES_API_KEY": "test-hermes-key"}, clear=False): - os.environ.pop("OPENROUTER_API_KEY", None) - executor = create_executor() - assert executor is not None - - -def test_create_executor_falls_back_to_openrouter(): - """create_executor() falls back to OPENROUTER_API_KEY when HERMES_API_KEY absent.""" - env = {"OPENROUTER_API_KEY": "test-openrouter-key"} - with patch.dict(os.environ, env, clear=False): - os.environ.pop("HERMES_API_KEY", None) - executor = create_executor() - assert executor is not None - - -def test_create_executor_raises_without_keys(): - """create_executor() raises ValueError when no keys available.""" - with patch.dict(os.environ, {}, clear=False): - os.environ.pop("HERMES_API_KEY", None) - os.environ.pop("OPENROUTER_API_KEY", None) - with pytest.raises(ValueError): - create_executor() - - -# --------------------------------------------------------------------------- -# Additional assertions — verify key routing is correct -# --------------------------------------------------------------------------- - -def test_param_key_uses_nous_base_url(): - """When called with explicit key, base_url points at Nous Portal.""" - executor = create_executor(hermes_api_key="nous-key") - assert "nousresearch.com" in executor.base_url - - -def test_hermes_env_uses_nous_base_url(): - """HERMES_API_KEY maps to Nous Portal base URL.""" - with patch.dict(os.environ, {"HERMES_API_KEY": "nous-key"}, clear=False): - os.environ.pop("OPENROUTER_API_KEY", None) - executor = create_executor() - assert "nousresearch.com" in executor.base_url - - -def test_openrouter_fallback_uses_openrouter_base_url(): - """OPENROUTER_API_KEY fallback maps to OpenRouter base URL.""" - with patch.dict(os.environ, {"OPENROUTER_API_KEY": "or-key"}, clear=False): - os.environ.pop("HERMES_API_KEY", None) - executor = create_executor() - assert "openrouter.ai" in executor.base_url - - -def test_param_takes_priority_over_hermes_env(): - """Explicit param overrides HERMES_API_KEY env var.""" - with patch.dict(os.environ, {"HERMES_API_KEY": "env-key"}, clear=False): - executor = create_executor(hermes_api_key="param-key") - assert executor.api_key == "param-key" - - -def test_hermes_env_takes_priority_over_openrouter(): - """HERMES_API_KEY overrides OPENROUTER_API_KEY fallback.""" - env = {"HERMES_API_KEY": "hermes-key", "OPENROUTER_API_KEY": "or-key"} - with patch.dict(os.environ, env, clear=False): - executor = create_executor() - assert executor.api_key == "hermes-key" - assert "nousresearch.com" in executor.base_url diff --git a/workspace-template/tests/test_qianfan_provider.py b/workspace-template/tests/test_qianfan_provider.py deleted file mode 100644 index 2c018fc7..00000000 --- a/workspace-template/tests/test_qianfan_provider.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Tests for Baidu Qianfan provider support across agent.py, deepagents, and openclaw.""" - -import importlib -import sys -from types import ModuleType - -import pytest - -QIANFAN_BASE_URL = "https://qianfan.baidubce.com/v2" - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def _install_langgraph_mocks(monkeypatch, captured: dict): - """Inject lightweight langgraph + langchain_openai stubs into sys.modules.""" - prebuilt_mod = ModuleType("langgraph.prebuilt") - - def fake_create_react_agent(*, model, tools, prompt): - captured["react_agent"] = model - return {"model": model} - - prebuilt_mod.create_react_agent = fake_create_react_agent - langgraph_mod = ModuleType("langgraph") - monkeypatch.setitem(sys.modules, "langgraph", langgraph_mod) - monkeypatch.setitem(sys.modules, "langgraph.prebuilt", prebuilt_mod) - - openai_mod = ModuleType("langchain_openai") - - class FakeChatOpenAI: - def __init__(self, **kwargs): - captured["llm_kwargs"] = kwargs - - openai_mod.ChatOpenAI = FakeChatOpenAI - monkeypatch.setitem(sys.modules, "langchain_openai", openai_mod) - - -# --------------------------------------------------------------------------- -# Track D-1: agent.py qianfan dispatch -# --------------------------------------------------------------------------- - -class TestQianfanInAgent: - """agent.py create_agent() correctly wires Qianfan provider.""" - - def _load_agent(self, monkeypatch, captured): - _install_langgraph_mocks(monkeypatch, captured) - sys.modules.pop("agent", None) - return importlib.import_module("agent") - - def test_uses_qianfan_api_key(self, monkeypatch): - """QIANFAN_API_KEY is used when set.""" - captured = {} - monkeypatch.setenv("QIANFAN_API_KEY", "qf-key-123") - monkeypatch.delenv("AISTUDIO_API_KEY", raising=False) - agent_mod = self._load_agent(monkeypatch, captured) - agent_mod.create_agent("qianfan:ernie-4.5", [], "sys") - assert captured["llm_kwargs"]["openai_api_key"] == "qf-key-123" - - def test_falls_back_to_aistudio_api_key(self, monkeypatch): - """Falls back to AISTUDIO_API_KEY when QIANFAN_API_KEY is absent.""" - captured = {} - monkeypatch.delenv("QIANFAN_API_KEY", raising=False) - monkeypatch.setenv("AISTUDIO_API_KEY", "ai-studio-456") - agent_mod = self._load_agent(monkeypatch, captured) - agent_mod.create_agent("qianfan:ernie-speed", [], "sys") - assert captured["llm_kwargs"]["openai_api_key"] == "ai-studio-456" - - def test_uses_qianfan_base_url(self, monkeypatch): - """openai_api_base is always the Qianfan endpoint.""" - captured = {} - monkeypatch.setenv("QIANFAN_API_KEY", "any-key") - agent_mod = self._load_agent(monkeypatch, captured) - agent_mod.create_agent("qianfan:ernie-lite", [], "sys") - assert captured["llm_kwargs"]["openai_api_base"] == QIANFAN_BASE_URL - - def test_model_name_stripped_of_prefix(self, monkeypatch): - """The model kwarg contains only the bare model name, not the prefix.""" - captured = {} - monkeypatch.setenv("QIANFAN_API_KEY", "k") - agent_mod = self._load_agent(monkeypatch, captured) - agent_mod.create_agent("qianfan:ernie-4.5-turbo", [], "sys") - assert captured["llm_kwargs"]["model"] == "ernie-4.5-turbo" - - -# --------------------------------------------------------------------------- -# Track D-2: adapters/deepagents _create_llm qianfan dispatch -# --------------------------------------------------------------------------- - -class TestQianfanInDeepAgents: - """DeepAgents adapter._create_llm() correctly wires Qianfan provider.""" - - def _make_adapter(self, monkeypatch, captured): - openai_mod = ModuleType("langchain_openai") - - class FakeChatOpenAI: - def __init__(self, **kwargs): - captured["llm_kwargs"] = kwargs - - openai_mod.ChatOpenAI = FakeChatOpenAI - monkeypatch.setitem(sys.modules, "langchain_openai", openai_mod) - from adapters.deepagents.adapter import DeepAgentsAdapter - return DeepAgentsAdapter() - - def test_uses_qianfan_api_key(self, monkeypatch): - captured = {} - monkeypatch.setenv("QIANFAN_API_KEY", "qf-deep-999") - monkeypatch.delenv("AISTUDIO_API_KEY", raising=False) - adapter = self._make_adapter(monkeypatch, captured) - adapter._create_llm("qianfan:ernie-4.5") - assert captured["llm_kwargs"]["openai_api_key"] == "qf-deep-999" - - def test_falls_back_to_aistudio_api_key(self, monkeypatch): - captured = {} - monkeypatch.delenv("QIANFAN_API_KEY", raising=False) - monkeypatch.setenv("AISTUDIO_API_KEY", "aistudio-deep-777") - adapter = self._make_adapter(monkeypatch, captured) - adapter._create_llm("qianfan:ernie-speed") - assert captured["llm_kwargs"]["openai_api_key"] == "aistudio-deep-777" - - def test_uses_qianfan_base_url(self, monkeypatch): - captured = {} - monkeypatch.setenv("QIANFAN_API_KEY", "k") - adapter = self._make_adapter(monkeypatch, captured) - adapter._create_llm("qianfan:ernie-lite") - assert captured["llm_kwargs"]["openai_api_base"] == QIANFAN_BASE_URL - - -# --------------------------------------------------------------------------- -# Track D-3: adapters/openclaw provider_urls + key resolution -# --------------------------------------------------------------------------- - -class TestQianfanInOpenClaw: - """OpenClaw adapter exposes Qianfan URL and resolves the correct API key.""" - - def _provider_urls(self): - """Return a copy of the provider_urls dict defined in the adapter.""" - return { - "openai": "https://api.openai.com/v1", - "groq": "https://api.groq.com/openai/v1", - "openrouter": "https://openrouter.ai/api/v1", - "qianfan": QIANFAN_BASE_URL, - } - - def _select_key(self, prefix: str, env: dict) -> str: - """Mirror the prefix-aware key selection added to openclaw/adapter.py.""" - if prefix == "qianfan": - return env.get("QIANFAN_API_KEY", env.get("AISTUDIO_API_KEY", "")) - return env.get("OPENAI_API_KEY", env.get("GROQ_API_KEY", env.get("OPENROUTER_API_KEY", ""))) - - def test_qianfan_url_in_provider_map(self): - urls = self._provider_urls() - assert "qianfan" in urls - assert urls["qianfan"] == QIANFAN_BASE_URL - - def test_qianfan_key_resolution_primary(self): - key = self._select_key("qianfan", {"QIANFAN_API_KEY": "qf-oc-111"}) - assert key == "qf-oc-111" - - def test_qianfan_key_resolution_fallback(self): - key = self._select_key("qianfan", {"AISTUDIO_API_KEY": "as-oc-222"}) - assert key == "as-oc-222" - - def test_non_qianfan_prefix_not_affected(self): - """Existing providers still resolve via OPENAI_API_KEY chain.""" - key = self._select_key("openai", {"OPENAI_API_KEY": "sk-test"}) - assert key == "sk-test" diff --git a/workspace-template/tests/test_shared_runtime.py b/workspace-template/tests/test_shared_runtime.py deleted file mode 100644 index 4423e755..00000000 --- a/workspace-template/tests/test_shared_runtime.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Tests for shared runtime helpers used by A2A-backed executors.""" - -from types import SimpleNamespace -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from adapters.shared_runtime import ( - append_peer_guidance, - build_peer_section, - build_task_text, - brief_task, - extract_history, - extract_message_text, - format_conversation_history, - summarize_peer_cards, - set_current_task, -) - - -def _make_context(parts=None, metadata=None): - context = MagicMock() - context.message.parts = parts or [] - context.metadata = metadata or {} - return context - - -def test_extract_message_text_prefers_text_then_root_text(): - part1 = MagicMock() - part1.text = "Hello" - part2 = MagicMock(spec=[]) - part2.root = SimpleNamespace(text="World") - assert extract_message_text(_make_context([part1, part2])) == "Hello World" - - -def test_extract_message_text_supports_dict_parts(): - parts = [{"text": "Hello"}, {"root": {"text": "World"}}] - assert extract_message_text(parts) == "Hello World" - - -def test_extract_history_and_formatting(): - ctx = _make_context( - metadata={ - "history": [ - {"role": "user", "parts": [{"text": "First"}]}, - {"role": "agent", "parts": [{"text": "Second"}]}, - ] - } - ) - - history = extract_history(ctx) - - assert history == [("human", "First"), ("ai", "Second")] - assert format_conversation_history(history) == "User: First\nAgent: Second" - assert ( - build_task_text("Current request", history) - == "Conversation so far:\nUser: First\nAgent: Second\n\nCurrent request: Current request" - ) - - -def test_append_peer_guidance_is_optional(): - assert append_peer_guidance(None, "", default_text="Base", tool_name="delegate") == "Base" - assert ( - append_peer_guidance("Base", "Peer A", default_text="Base", tool_name="delegate") - == "Base\n\n## Peers\nPeer A\nUse delegate to communicate with them." - ) - - -def test_summarize_peer_cards_and_render_section(): - peers = [ - { - "id": "peer-1", - "status": "online", - "agent_card": { - "name": "Alpha", - "skills": [{"name": "research"}, {"id": "write"}], - }, - }, - {"id": "peer-2", "status": "offline", "agent_card": None}, - ] - - assert summarize_peer_cards(peers) == [ - { - "id": "peer-1", - "name": "Alpha", - "status": "online", - "skills": ["research", "write"], - } - ] - - section = build_peer_section(peers) - assert "## Your Peers" in section - assert "**Alpha** (id: `peer-1`, status: online)" in section - assert "Skills: research, write" in section - assert "delegate_to_workspace" in section - - -def test_brief_task_truncates_at_sixty_chars(): - assert brief_task("x" * 59) == "x" * 59 - assert brief_task("x" * 60) == "x" * 60 - assert brief_task("x" * 61) == ("x" * 60) + "..." - - -@pytest.mark.asyncio -async def test_set_current_task_updates_heartbeat(): - heartbeat = SimpleNamespace(current_task="", active_tasks=0) - - await set_current_task(heartbeat, "Working") - assert heartbeat.current_task == "Working" - assert heartbeat.active_tasks == 1 - - await set_current_task(heartbeat, "") - assert heartbeat.current_task == "" - assert heartbeat.active_tasks == 0 - - -@pytest.mark.asyncio -async def test_set_current_task_is_noop_for_none(): - await set_current_task(None, "Working") - - -# --------------------------------------------------------------------------- -# build_task_text() with no history -# --------------------------------------------------------------------------- - -def test_build_task_text_no_history_returns_user_message(): - """When history is empty, build_task_text() returns the user_message directly.""" - result = build_task_text("What is the weather?", []) - assert result == "What is the weather?" - - -# --------------------------------------------------------------------------- -# summarize_peer_cards() edge cases -# --------------------------------------------------------------------------- - -def test_summarize_peer_cards_invalid_json_string_skipped(): - """A peer whose agent_card is an invalid JSON string is skipped entirely.""" - peers = [ - {"id": "peer-bad", "status": "online", "agent_card": "{not valid json}"}, - { - "id": "peer-good", - "status": "online", - "agent_card": {"name": "Good Peer", "skills": []}, - }, - ] - result = summarize_peer_cards(peers) - assert len(result) == 1 - assert result[0]["id"] == "peer-good" - - -def test_summarize_peer_cards_json_string_not_dict_skipped(): - """A peer whose agent_card is a JSON-encoded list (not a dict) is skipped.""" - import json - peers = [ - {"id": "peer-list", "status": "online", "agent_card": json.dumps(["skill1"])}, - { - "id": "peer-dict", - "status": "online", - "agent_card": {"name": "Dict Peer", "skills": []}, - }, - ] - result = summarize_peer_cards(peers) - assert len(result) == 1 - assert result[0]["id"] == "peer-dict" - - -# --------------------------------------------------------------------------- -# set_current_task() httpx exception is swallowed -# --------------------------------------------------------------------------- - -@pytest.mark.asyncio -async def test_set_current_task_httpx_exception_is_silenced(monkeypatch): - """set_current_task() silently ignores exceptions from the httpx heartbeat push.""" - monkeypatch.setenv("WORKSPACE_ID", "ws-test") - monkeypatch.setenv("PLATFORM_URL", "http://platform:8080") - - mock_client = AsyncMock() - mock_client.__aenter__ = AsyncMock(return_value=mock_client) - mock_client.__aexit__ = AsyncMock(return_value=False) - mock_client.post = AsyncMock(side_effect=Exception("Connection refused")) - - # httpx is imported lazily inside the function, so patch at the httpx module level - with patch("httpx.AsyncClient", return_value=mock_client): - # Should not raise — exception is swallowed with pass - heartbeat = SimpleNamespace(current_task="", active_tasks=0) - await set_current_task(heartbeat, "Doing work") - - assert heartbeat.current_task == "Doing work" - assert heartbeat.active_tasks == 1 diff --git a/workspace-template/tests/test_transcript_lines.py b/workspace-template/tests/test_transcript_lines.py deleted file mode 100644 index 552af6ee..00000000 --- a/workspace-template/tests/test_transcript_lines.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Tests for the new BaseAdapter.transcript_lines() method + claude-code override.""" - -import asyncio -import json -import os -import tempfile -from pathlib import Path - -import pytest - - -# ── Default (BaseAdapter) ─────────────────────────────────────────────────── - - -def test_base_adapter_returns_unsupported(): - """Adapters that don't override return supported:False.""" - from adapters.langgraph.adapter import LangGraphAdapter - a = LangGraphAdapter() - r = asyncio.run(a.transcript_lines()) - assert r["supported"] is False - assert r["lines"] == [] - assert r["cursor"] == 0 - assert r["runtime"] == "langgraph" - assert r["more"] is False - - -# ── Claude Code override ──────────────────────────────────────────────────── - - -def _write_jsonl(path: Path, entries: list[dict]) -> None: - with path.open("w") as f: - for e in entries: - f.write(json.dumps(e) + "\n") - - -def test_claude_code_no_projects_dir(): - """Returns supported:True with empty lines when projects dir missing.""" - from adapters.claude_code.adapter import ClaudeCodeAdapter - with tempfile.TemporaryDirectory() as tmp: - os.environ["HOME"] = tmp - os.environ["CLAUDE_PROJECT_CWD"] = "/configs" - try: - r = asyncio.run(ClaudeCodeAdapter().transcript_lines()) - assert r["supported"] is True - assert r["lines"] == [] - assert r["cursor"] == 0 - assert "-configs" in r["source"] - finally: - del os.environ["CLAUDE_PROJECT_CWD"] - - -def test_claude_code_reads_jsonl_with_pagination(): - from adapters.claude_code.adapter import ClaudeCodeAdapter - with tempfile.TemporaryDirectory() as tmp: - os.environ["HOME"] = tmp - os.environ["CLAUDE_PROJECT_CWD"] = "/configs" - try: - projdir = Path(tmp) / ".claude" / "projects" / "-configs" - projdir.mkdir(parents=True) - _write_jsonl(projdir / "abc.jsonl", [ - {"type": "user", "n": 1}, - {"type": "assistant", "n": 2}, - {"type": "user", "n": 3}, - {"type": "assistant", "n": 4}, - {"type": "user", "n": 5}, - ]) - a = ClaudeCodeAdapter() - # First page (limit=2) - r1 = asyncio.run(a.transcript_lines(since=0, limit=2)) - assert r1["supported"] is True - assert [l["n"] for l in r1["lines"]] == [1, 2] - assert r1["cursor"] == 2 - assert r1["more"] is True - # Second page (since=2, limit=2) - r2 = asyncio.run(a.transcript_lines(since=2, limit=2)) - assert [l["n"] for l in r2["lines"]] == [3, 4] - assert r2["cursor"] == 4 - assert r2["more"] is True - # Third page exhausts - r3 = asyncio.run(a.transcript_lines(since=4, limit=2)) - assert [l["n"] for l in r3["lines"]] == [5] - assert r3["cursor"] == 5 - assert r3["more"] is False - finally: - del os.environ["CLAUDE_PROJECT_CWD"] - - -def test_claude_code_picks_most_recent_jsonl(): - """When multiple .jsonl files exist, picks the most-recently-modified.""" - from adapters.claude_code.adapter import ClaudeCodeAdapter - with tempfile.TemporaryDirectory() as tmp: - os.environ["HOME"] = tmp - os.environ["CLAUDE_PROJECT_CWD"] = "/configs" - try: - projdir = Path(tmp) / ".claude" / "projects" / "-configs" - projdir.mkdir(parents=True) - old = projdir / "old.jsonl" - new = projdir / "new.jsonl" - _write_jsonl(old, [{"src": "old"}]) - _write_jsonl(new, [{"src": "new"}]) - # Force new to be more recent - os.utime(old, (1000, 1000)) - os.utime(new, (2000, 2000)) - r = asyncio.run(ClaudeCodeAdapter().transcript_lines()) - assert r["lines"] == [{"src": "new"}] - assert r["source"].endswith("new.jsonl") - finally: - del os.environ["CLAUDE_PROJECT_CWD"] - - -def test_claude_code_skips_malformed_lines(): - """Bad JSON lines surface as ``_parse_error: True`` rather than 500'ing.""" - from adapters.claude_code.adapter import ClaudeCodeAdapter - with tempfile.TemporaryDirectory() as tmp: - os.environ["HOME"] = tmp - os.environ["CLAUDE_PROJECT_CWD"] = "/configs" - try: - projdir = Path(tmp) / ".claude" / "projects" / "-configs" - projdir.mkdir(parents=True) - with (projdir / "x.jsonl").open("w") as f: - f.write('{"good": 1}\n') - f.write("not-json garbage\n") - f.write('{"good": 2}\n') - r = asyncio.run(ClaudeCodeAdapter().transcript_lines()) - assert r["lines"][0] == {"good": 1} - assert r["lines"][1].get("_parse_error") is True - assert r["lines"][2] == {"good": 2} - finally: - del os.environ["CLAUDE_PROJECT_CWD"] - - -def test_claude_code_caps_limit(): - """Limit is capped at 1000 to prevent OOM via paranoid client.""" - from adapters.claude_code.adapter import ClaudeCodeAdapter - with tempfile.TemporaryDirectory() as tmp: - os.environ["HOME"] = tmp - os.environ["CLAUDE_PROJECT_CWD"] = "/configs" - try: - projdir = Path(tmp) / ".claude" / "projects" / "-configs" - projdir.mkdir(parents=True) - _write_jsonl(projdir / "x.jsonl", [{"i": i} for i in range(1500)]) - r = asyncio.run(ClaudeCodeAdapter().transcript_lines(limit=999999)) - assert len(r["lines"]) == 1000 # capped - assert r["more"] is True - assert r["cursor"] == 1000 - finally: - del os.environ["CLAUDE_PROJECT_CWD"]