Merge pull request #473 from Molecule-AI/fix/remove-adapters-dir

fix: remove adapter subdirectories from workspace-template
This commit is contained in:
Hongming Wang 2026-04-16 04:59:34 -07:00 committed by GitHub
commit 73865ee164
38 changed files with 521 additions and 7454 deletions

View File

@ -41,7 +41,7 @@ from a2a.server.events import EventQueue
from a2a.server.tasks import TaskUpdater
from a2a.types import Part, TextPart
from a2a.utils import new_agent_text_message
from adapters.shared_runtime import (
from shared_runtime import (
extract_history as _extract_history,
extract_message_text,
brief_task,

View File

@ -0,0 +1,309 @@
"""Base adapter interface for agent infrastructure providers."""
import logging
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
@dataclass
class SetupResult:
"""Result from the shared _common_setup() pipeline."""
system_prompt: str
loaded_skills: list # LoadedSkill instances
langchain_tools: list # LangChain BaseTool instances
is_coordinator: bool
children: list # child workspace dicts
@dataclass
class AdapterConfig:
"""Standardized config passed to every adapter."""
model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash"
system_prompt: str | None = None # Assembled system prompt text
tools: list[str] = field(default_factory=list) # Tool names from config.yaml
runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block
config_path: str = "/configs" # Path to configs directory
workspace_id: str = "" # Workspace identifier
prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names
a2a_port: int = 8000 # Port for A2A server
heartbeat: Any = None # HeartbeatLoop instance
class BaseAdapter(ABC):
"""Interface every agent infrastructure adapter must implement.
To add a new agent infra:
1. Create workspace-template/adapters/<your_infra>/
2. Implement adapter.py with a class extending BaseAdapter
3. Add requirements.txt with your infra's dependencies
4. Export as Adapter in __init__.py
5. Submit a PR
"""
@staticmethod
@abstractmethod
def name() -> str: # pragma: no cover
"""Return the runtime identifier (e.g. 'langgraph', 'crewai').
This must match the 'runtime' field in config.yaml."""
...
@staticmethod
@abstractmethod
def display_name() -> str: # pragma: no cover
"""Human-readable name for UI display."""
...
@staticmethod
@abstractmethod
def description() -> str: # pragma: no cover
"""Short description of what this adapter provides."""
...
@staticmethod
def get_config_schema() -> dict:
"""Return JSON Schema for runtime_config fields this adapter supports.
Used by the Config tab UI to render the right form fields.
Override in subclasses for adapter-specific settings."""
return {}
# ------------------------------------------------------------------
# Plugin install hooks
# ------------------------------------------------------------------
# New pipeline: each plugin ships per-runtime adaptors resolved via
# `plugins_registry.resolve()`. Adapters expose hooks below that
# adaptors call to wire plugin content into the runtime.
#
# Default implementations are filesystem-only (write to /configs,
# append to CLAUDE.md). Runtimes with a dynamic tool registry
# (e.g. DeepAgents sub-agents) override the hooks to also register
# in-process state.
def memory_filename(self) -> str:
"""File under /configs that the runtime treats as long-lived memory.
Both Claude Code and DeepAgents read CLAUDE.md natively, so this is
the sensible default. Override only if a runtime expects a different
filename.
"""
return "CLAUDE.md"
def register_tool_hook(self, name: str, fn) -> None:
"""Default no-op. Override on runtimes with a dynamic tool registry.
Runtimes that pick tools up at startup via filesystem scan (Claude
Code reads /configs/skills, LangGraph globs **/*.py) don't need to
do anything here the adaptor's file-write step is enough.
"""
return None
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
"""Return live transcript entries for the most-recent agent session.
Default implementation returns ``supported: False`` for runtimes
that don't expose a per-session log on disk. Override in subclasses
that DO (Claude Code reads ``~/.claude/projects/<cwd>/<session>.jsonl``).
This is the "look over the agent's shoulder" feature lets canvas /
operators see live tool calls + AI thinking instead of waiting for
the high-level activity log to flush.
Args:
since: line offset to skip caller's last cursor (0 = from start)
limit: max lines to return (caller-side cap, default 100, max 1000)
Returns:
``{runtime, supported, lines, cursor, more, source}`` where
``cursor`` is the new offset to pass on the next poll, ``more``
is True if additional lines remain past ``limit``, and ``source``
is the file path lines were read from (useful for debugging).
"""
return {
"runtime": self.name(),
"supported": False,
"lines": [],
"cursor": since,
"more": False,
"source": None,
}
def register_subagent_hook(self, name: str, spec: dict) -> None:
"""Default no-op. DeepAgents overrides to register a sub-agent."""
return None
def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None:
"""Append text to /configs/<filename> if the marker isn't already present.
Idempotent: looks for the first line of `content` as a marker so a
re-install doesn't duplicate the block. Adaptors should pass content
beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``).
"""
import os
target = os.path.join(config.config_path, filename)
marker = content.splitlines()[0].strip() if content else ""
existing = ""
if os.path.exists(target):
with open(target) as f:
existing = f.read()
if marker and marker in existing:
logger.info("append_to_memory: %s already contains %r — skipping", filename, marker)
return
os.makedirs(os.path.dirname(target) or ".", exist_ok=True)
with open(target, "a") as f:
if existing and not existing.endswith("\n"):
f.write("\n")
f.write(content if content.endswith("\n") else content + "\n")
logger.info("append_to_memory: appended %d chars to %s", len(content), filename)
async def install_plugins_via_registry(
self,
config: AdapterConfig,
plugins,
) -> list:
"""Drive the new per-runtime adaptor pipeline for every loaded plugin.
For each plugin in `plugins.plugins`, resolve the adaptor for this
runtime (via :func:`plugins_registry.resolve`) and invoke
``install(ctx)``. Returns the list of :class:`InstallResult` so
callers can surface warnings (e.g. raw-drop fallback hits).
Adapters whose runtime supports the new pipeline call this from
``setup()`` instead of the legacy ``inject_plugins()``.
"""
from pathlib import Path
from plugins_registry import InstallContext, resolve
results = []
runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code"
for plugin in plugins.plugins:
adaptor, source = resolve(plugin.name, runtime, Path(plugin.path))
ctx = InstallContext(
configs_dir=Path(config.config_path),
workspace_id=config.workspace_id,
runtime=runtime,
plugin_root=Path(plugin.path),
memory_filename=self.memory_filename(),
register_tool=self.register_tool_hook,
register_subagent=self.register_subagent_hook,
append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c),
)
try:
result = await adaptor.install(ctx)
results.append(result)
logger.info(
"Plugin %s installed via %s adaptor (warnings: %d)",
plugin.name, source, len(result.warnings),
)
except Exception as exc:
logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc)
return results
async def inject_plugins(self, config: AdapterConfig, plugins) -> None:
"""Legacy hook — kept for backwards compatibility during migration.
Default: drive the new per-runtime adaptor pipeline. Adapters not yet
migrated may still override this with their own logic.
"""
await self.install_plugins_via_registry(config, plugins)
async def _common_setup(self, config: AdapterConfig) -> SetupResult:
"""Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt.
All adapters can call this to get the full platform feature set.
Returns a SetupResult with LangChain BaseTool instances that adapters
convert to their native format if needed.
"""
from plugins import load_plugins
from skill_loader.loader import load_skills
from coordinator import get_children, get_parent_context, build_children_description
from prompt import build_system_prompt, get_peer_capabilities
from builtin_tools.approval import request_approval
from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
from builtin_tools.memory import commit_memory, search_memory
from builtin_tools.sandbox import run_code
platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080")
# Load plugins from per-workspace dir first, then shared fallback
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
plugins = load_plugins(
workspace_plugins_dir=workspace_plugins_dir,
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
)
await self.inject_plugins(config, plugins)
if plugins.plugin_names:
logger.info(f"Plugins: {', '.join(plugins.plugin_names)}")
# Load skills (workspace + plugin skills, deduped)
loaded_skills = load_skills(config.config_path, config.tools)
seen_skill_ids = {s.metadata.id for s in loaded_skills}
for plugin_skills_dir in plugins.skill_dirs:
plugin_skill_names = [
d for d in os.listdir(plugin_skills_dir)
if os.path.isdir(os.path.join(plugin_skills_dir, d))
]
for skill in load_skills(plugin_skills_dir, plugin_skill_names):
if skill.metadata.id not in seen_skill_ids:
loaded_skills.append(skill)
seen_skill_ids.add(skill.metadata.id)
logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")
# Assemble tools: 6 core + skill tools
all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
for skill in loaded_skills:
all_tools.extend(skill.tools)
# Coordinator mode: detect children and add routing tool
children = await get_children()
is_coordinator = len(children) > 0
if is_coordinator:
from coordinator import route_task_to_team
logger.info(f"Coordinator mode: {len(children)} children")
all_tools.append(route_task_to_team)
# Parent context (if this is a child workspace)
parent_context = await get_parent_context()
# Build system prompt with all context
peers = await get_peer_capabilities(platform_url, config.workspace_id)
coordinator_prompt = build_children_description(children) if is_coordinator else ""
extra_prompts = list(plugins.prompt_fragments)
if coordinator_prompt:
extra_prompts.append(coordinator_prompt)
system_prompt = build_system_prompt(
config.config_path, config.workspace_id, loaded_skills, peers,
prompt_files=config.prompt_files,
plugin_rules=plugins.rules,
plugin_prompts=extra_prompts,
parent_context=parent_context,
)
return SetupResult(
system_prompt=system_prompt,
loaded_skills=loaded_skills,
langchain_tools=all_tools,
is_coordinator=is_coordinator,
children=children,
)
@abstractmethod
async def setup(self, config: AdapterConfig) -> None:
"""One-time setup: validate config, prepare internal state.
Called after deps are installed but before create_executor().
Raise RuntimeError if setup fails (missing deps, bad config, etc.)."""
... # pragma: no cover
@abstractmethod
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
"""Create and return an AgentExecutor ready for A2A integration.
The returned executor's execute() method will be called by the
A2A server's DefaultRequestHandler."""
... # pragma: no cover

View File

@ -1,58 +1,22 @@
"""Adapter registry — discovers and loads agent infrastructure adapters."""
"""Adapter registry shim.
Adapters extracted to standalone repos (molecule-ai-workspace-template-*).
ADAPTER_MODULE env var is the primary discovery mechanism in production.
This shim provides backward-compatible imports for local dev + tests.
"""
import importlib
import os
import logging
from pathlib import Path
from .base import BaseAdapter, AdapterConfig
from adapter_base import BaseAdapter, AdapterConfig
logger = logging.getLogger(__name__)
_ADAPTER_CACHE: dict[str, type[BaseAdapter]] = {}
def discover_adapters() -> dict[str, type[BaseAdapter]]:
"""Scan subdirectories for adapter modules. Each must export an Adapter class."""
if _ADAPTER_CACHE:
return _ADAPTER_CACHE
adapters_dir = Path(__file__).parent
for entry in sorted(adapters_dir.iterdir()):
if not entry.is_dir() or entry.name.startswith("_"):
continue
try:
mod = importlib.import_module(f"adapters.{entry.name}")
adapter_cls = getattr(mod, "Adapter", None)
if adapter_cls and issubclass(adapter_cls, BaseAdapter):
_ADAPTER_CACHE[adapter_cls.name()] = adapter_cls
logger.debug(f"Loaded adapter: {adapter_cls.name()} ({adapter_cls.display_name()})")
except Exception as e:
# Log but don't crash — adapter may have uninstalled deps
logger.debug(f"Skipped adapter {entry.name}: {e}")
return _ADAPTER_CACHE
def get_adapter(runtime: str) -> type[BaseAdapter]:
"""Get adapter class by runtime name. Raises KeyError if not found."""
adapters = discover_adapters()
if runtime not in adapters:
available = ", ".join(sorted(adapters.keys()))
raise KeyError(f"Unknown runtime '{runtime}'. Available: {available}")
return adapters[runtime]
def list_adapters() -> list[dict]:
"""Return metadata for all discovered adapters (for API/UI)."""
adapters = discover_adapters()
return [
{
"name": cls.name(),
"display_name": cls.display_name(),
"description": cls.description(),
"config_schema": cls.get_config_schema(),
}
for cls in adapters.values()
]
__all__ = ["BaseAdapter", "AdapterConfig", "get_adapter", "list_adapters", "discover_adapters"]
adapter_module = os.environ.get("ADAPTER_MODULE")
if adapter_module:
mod = importlib.import_module(adapter_module)
return getattr(mod, "Adapter")
raise KeyError(
f"No ADAPTER_MODULE set for runtime '{runtime}'. "
"Adapters now live in standalone template repos."
)

View File

@ -1,3 +0,0 @@
from .adapter import AutoGenAdapter
Adapter = AutoGenAdapter

View File

@ -1,159 +0,0 @@
"""AutoGen adapter — Microsoft's multi-agent framework with full platform integration.
Uses AutoGen's AssistantAgent with OpenAIChatCompletionClient,
includes all platform tools (delegation, memory, sandbox, approval), skills, and coordinator support.
Requires: pip install autogen-agentchat autogen-ext[openai]
"""
import json
import logging
from adapters.base import BaseAdapter, AdapterConfig
from adapters.shared_runtime import (
build_task_text,
brief_task,
extract_history,
extract_message_text,
set_current_task,
)
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
def _langchain_to_autogen(lc_tool):
"""Wrap a LangChain BaseTool as an AutoGen FunctionTool.
AutoGen requires typed function signatures (no **kwargs).
LangChain tools accept a single string or dict input via ainvoke.
We bridge them with a single `input: str` parameter.
"""
from autogen_core.tools import FunctionTool
async def _invoke(input: str) -> str: # noqa: A002
# Try to parse as JSON dict for tools expecting structured input
try:
parsed = json.loads(input)
if isinstance(parsed, dict):
result = await lc_tool.ainvoke(parsed)
return str(result)
except (json.JSONDecodeError, TypeError):
pass
result = await lc_tool.ainvoke(input)
return str(result)
return FunctionTool(
_invoke,
name=lc_tool.name,
description=lc_tool.description or lc_tool.name,
)
class AutoGenAdapter(BaseAdapter):
def __init__(self):
self.system_prompt = None
self.autogen_tools = []
@staticmethod
def name() -> str:
return "autogen"
@staticmethod
def display_name() -> str:
return "AutoGen"
@staticmethod
def description() -> str:
return "Microsoft AutoGen — conversable agents with tool use and multi-agent orchestration"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {"type": "string", "description": "OpenAI model (e.g. openai:gpt-4.1-mini)"},
"skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"},
"tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools"},
}
async def setup(self, config: AdapterConfig) -> None:
try:
from autogen_agentchat.agents import AssistantAgent # noqa: F401
logger.info("AutoGen AgentChat loaded")
except ImportError:
raise RuntimeError("autogen-agentchat not installed.")
result = await self._common_setup(config)
self.system_prompt = result.system_prompt
self.autogen_tools = [_langchain_to_autogen(t) for t in result.langchain_tools]
logger.info(f"AutoGen tools: {[t.name for t in self.autogen_tools]}")
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
return AutoGenA2AExecutor(
model=config.model,
system_prompt=self.system_prompt,
autogen_tools=self.autogen_tools,
heartbeat=config.heartbeat,
)
class AutoGenA2AExecutor(AgentExecutor):
"""Wraps AutoGen's AssistantAgent with full platform tools."""
def __init__(self, model: str, system_prompt: str | None, autogen_tools: list, heartbeat=None):
self.model = model
self.system_prompt = system_prompt
self.autogen_tools = autogen_tools
self._heartbeat = heartbeat
async def execute(self, context, event_queue):
from a2a.utils import new_agent_text_message
user_message = extract_message_text(context)
if not user_message:
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
return
await set_current_task(self._heartbeat, brief_task(user_message))
try:
from autogen_agentchat.agents import AssistantAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
model_str = self.model
if ":" in model_str:
_, model_name = model_str.split(":", 1)
else:
model_name = model_str
task_text = build_task_text(user_message, extract_history(context))
client = OpenAIChatCompletionClient(model=model_name)
agent = AssistantAgent(
name="agent",
model_client=client,
system_message=self.system_prompt or "You are a helpful assistant.",
tools=self.autogen_tools,
)
result = await agent.run(task=task_text)
reply = ""
if hasattr(result, "messages") and result.messages:
for msg in reversed(result.messages):
if hasattr(msg, "content") and isinstance(msg.content, str):
reply = msg.content
break
if not reply:
reply = str(result)
except Exception as e:
reply = f"AutoGen error: {e}"
finally:
await set_current_task(self._heartbeat, "")
await event_queue.enqueue_event(new_agent_text_message(reply))
async def cancel(self, context, event_queue): # pragma: no cover
pass

View File

@ -1,309 +1,2 @@
"""Base adapter interface for agent infrastructure providers."""
import logging
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
@dataclass
class SetupResult:
"""Result from the shared _common_setup() pipeline."""
system_prompt: str
loaded_skills: list # LoadedSkill instances
langchain_tools: list # LangChain BaseTool instances
is_coordinator: bool
children: list # child workspace dicts
@dataclass
class AdapterConfig:
"""Standardized config passed to every adapter."""
model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash"
system_prompt: str | None = None # Assembled system prompt text
tools: list[str] = field(default_factory=list) # Tool names from config.yaml
runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block
config_path: str = "/configs" # Path to configs directory
workspace_id: str = "" # Workspace identifier
prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names
a2a_port: int = 8000 # Port for A2A server
heartbeat: Any = None # HeartbeatLoop instance
class BaseAdapter(ABC):
"""Interface every agent infrastructure adapter must implement.
To add a new agent infra:
1. Create workspace-template/adapters/<your_infra>/
2. Implement adapter.py with a class extending BaseAdapter
3. Add requirements.txt with your infra's dependencies
4. Export as Adapter in __init__.py
5. Submit a PR
"""
@staticmethod
@abstractmethod
def name() -> str: # pragma: no cover
"""Return the runtime identifier (e.g. 'langgraph', 'crewai').
This must match the 'runtime' field in config.yaml."""
...
@staticmethod
@abstractmethod
def display_name() -> str: # pragma: no cover
"""Human-readable name for UI display."""
...
@staticmethod
@abstractmethod
def description() -> str: # pragma: no cover
"""Short description of what this adapter provides."""
...
@staticmethod
def get_config_schema() -> dict:
"""Return JSON Schema for runtime_config fields this adapter supports.
Used by the Config tab UI to render the right form fields.
Override in subclasses for adapter-specific settings."""
return {}
# ------------------------------------------------------------------
# Plugin install hooks
# ------------------------------------------------------------------
# New pipeline: each plugin ships per-runtime adaptors resolved via
# `plugins_registry.resolve()`. Adapters expose hooks below that
# adaptors call to wire plugin content into the runtime.
#
# Default implementations are filesystem-only (write to /configs,
# append to CLAUDE.md). Runtimes with a dynamic tool registry
# (e.g. DeepAgents sub-agents) override the hooks to also register
# in-process state.
def memory_filename(self) -> str:
"""File under /configs that the runtime treats as long-lived memory.
Both Claude Code and DeepAgents read CLAUDE.md natively, so this is
the sensible default. Override only if a runtime expects a different
filename.
"""
return "CLAUDE.md"
def register_tool_hook(self, name: str, fn) -> None:
"""Default no-op. Override on runtimes with a dynamic tool registry.
Runtimes that pick tools up at startup via filesystem scan (Claude
Code reads /configs/skills, LangGraph globs **/*.py) don't need to
do anything here the adaptor's file-write step is enough.
"""
return None
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
"""Return live transcript entries for the most-recent agent session.
Default implementation returns ``supported: False`` for runtimes
that don't expose a per-session log on disk. Override in subclasses
that DO (Claude Code reads ``~/.claude/projects/<cwd>/<session>.jsonl``).
This is the "look over the agent's shoulder" feature lets canvas /
operators see live tool calls + AI thinking instead of waiting for
the high-level activity log to flush.
Args:
since: line offset to skip caller's last cursor (0 = from start)
limit: max lines to return (caller-side cap, default 100, max 1000)
Returns:
``{runtime, supported, lines, cursor, more, source}`` where
``cursor`` is the new offset to pass on the next poll, ``more``
is True if additional lines remain past ``limit``, and ``source``
is the file path lines were read from (useful for debugging).
"""
return {
"runtime": self.name(),
"supported": False,
"lines": [],
"cursor": since,
"more": False,
"source": None,
}
def register_subagent_hook(self, name: str, spec: dict) -> None:
"""Default no-op. DeepAgents overrides to register a sub-agent."""
return None
def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None:
"""Append text to /configs/<filename> if the marker isn't already present.
Idempotent: looks for the first line of `content` as a marker so a
re-install doesn't duplicate the block. Adaptors should pass content
beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``).
"""
import os
target = os.path.join(config.config_path, filename)
marker = content.splitlines()[0].strip() if content else ""
existing = ""
if os.path.exists(target):
with open(target) as f:
existing = f.read()
if marker and marker in existing:
logger.info("append_to_memory: %s already contains %r — skipping", filename, marker)
return
os.makedirs(os.path.dirname(target) or ".", exist_ok=True)
with open(target, "a") as f:
if existing and not existing.endswith("\n"):
f.write("\n")
f.write(content if content.endswith("\n") else content + "\n")
logger.info("append_to_memory: appended %d chars to %s", len(content), filename)
async def install_plugins_via_registry(
self,
config: AdapterConfig,
plugins,
) -> list:
"""Drive the new per-runtime adaptor pipeline for every loaded plugin.
For each plugin in `plugins.plugins`, resolve the adaptor for this
runtime (via :func:`plugins_registry.resolve`) and invoke
``install(ctx)``. Returns the list of :class:`InstallResult` so
callers can surface warnings (e.g. raw-drop fallback hits).
Adapters whose runtime supports the new pipeline call this from
``setup()`` instead of the legacy ``inject_plugins()``.
"""
from pathlib import Path
from plugins_registry import InstallContext, resolve
results = []
runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code"
for plugin in plugins.plugins:
adaptor, source = resolve(plugin.name, runtime, Path(plugin.path))
ctx = InstallContext(
configs_dir=Path(config.config_path),
workspace_id=config.workspace_id,
runtime=runtime,
plugin_root=Path(plugin.path),
memory_filename=self.memory_filename(),
register_tool=self.register_tool_hook,
register_subagent=self.register_subagent_hook,
append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c),
)
try:
result = await adaptor.install(ctx)
results.append(result)
logger.info(
"Plugin %s installed via %s adaptor (warnings: %d)",
plugin.name, source, len(result.warnings),
)
except Exception as exc:
logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc)
return results
async def inject_plugins(self, config: AdapterConfig, plugins) -> None:
"""Legacy hook — kept for backwards compatibility during migration.
Default: drive the new per-runtime adaptor pipeline. Adapters not yet
migrated may still override this with their own logic.
"""
await self.install_plugins_via_registry(config, plugins)
async def _common_setup(self, config: AdapterConfig) -> SetupResult:
"""Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt.
All adapters can call this to get the full platform feature set.
Returns a SetupResult with LangChain BaseTool instances that adapters
convert to their native format if needed.
"""
from plugins import load_plugins
from skill_loader.loader import load_skills
from coordinator import get_children, get_parent_context, build_children_description
from prompt import build_system_prompt, get_peer_capabilities
from builtin_tools.approval import request_approval
from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
from builtin_tools.memory import commit_memory, search_memory
from builtin_tools.sandbox import run_code
platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080")
# Load plugins from per-workspace dir first, then shared fallback
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
plugins = load_plugins(
workspace_plugins_dir=workspace_plugins_dir,
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
)
await self.inject_plugins(config, plugins)
if plugins.plugin_names:
logger.info(f"Plugins: {', '.join(plugins.plugin_names)}")
# Load skills (workspace + plugin skills, deduped)
loaded_skills = load_skills(config.config_path, config.tools)
seen_skill_ids = {s.metadata.id for s in loaded_skills}
for plugin_skills_dir in plugins.skill_dirs:
plugin_skill_names = [
d for d in os.listdir(plugin_skills_dir)
if os.path.isdir(os.path.join(plugin_skills_dir, d))
]
for skill in load_skills(plugin_skills_dir, plugin_skill_names):
if skill.metadata.id not in seen_skill_ids:
loaded_skills.append(skill)
seen_skill_ids.add(skill.metadata.id)
logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")
# Assemble tools: 6 core + skill tools
all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
for skill in loaded_skills:
all_tools.extend(skill.tools)
# Coordinator mode: detect children and add routing tool
children = await get_children()
is_coordinator = len(children) > 0
if is_coordinator:
from coordinator import route_task_to_team
logger.info(f"Coordinator mode: {len(children)} children")
all_tools.append(route_task_to_team)
# Parent context (if this is a child workspace)
parent_context = await get_parent_context()
# Build system prompt with all context
peers = await get_peer_capabilities(platform_url, config.workspace_id)
coordinator_prompt = build_children_description(children) if is_coordinator else ""
extra_prompts = list(plugins.prompt_fragments)
if coordinator_prompt:
extra_prompts.append(coordinator_prompt)
system_prompt = build_system_prompt(
config.config_path, config.workspace_id, loaded_skills, peers,
prompt_files=config.prompt_files,
plugin_rules=plugins.rules,
plugin_prompts=extra_prompts,
parent_context=parent_context,
)
return SetupResult(
system_prompt=system_prompt,
loaded_skills=loaded_skills,
langchain_tools=all_tools,
is_coordinator=is_coordinator,
children=children,
)
@abstractmethod
async def setup(self, config: AdapterConfig) -> None:
"""One-time setup: validate config, prepare internal state.
Called after deps are installed but before create_executor().
Raise RuntimeError if setup fails (missing deps, bad config, etc.)."""
... # pragma: no cover
@abstractmethod
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
"""Create and return an AgentExecutor ready for A2A integration.
The returned executor's execute() method will be called by the
A2A server's DefaultRequestHandler."""
... # pragma: no cover
"""Re-export from adapter_base for backward compat."""
from adapter_base import * # noqa: F401,F403

View File

@ -1,3 +0,0 @@
from .adapter import ClaudeCodeAdapter
Adapter = ClaudeCodeAdapter

View File

@ -1,167 +0,0 @@
"""Claude Code adapter — wraps the Claude Code CLI as an agent runtime."""
import json
import os
import logging
from pathlib import Path
from adapters.base import BaseAdapter, AdapterConfig
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
# Cap one transcript response at 1000 lines so a paranoid client can't OOM
# the workspace by polling /transcript?limit=999999.
_TRANSCRIPT_MAX_LIMIT = 1000
class ClaudeCodeAdapter(BaseAdapter):
@staticmethod
def name() -> str:
return "claude-code"
@staticmethod
def display_name() -> str:
return "Claude Code"
@staticmethod
def description() -> str:
return "Claude Code CLI — full agentic coding with hooks, CLAUDE.md, auto-memory, and MCP support"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {"type": "string", "description": "Claude model (e.g. sonnet, opus, haiku)", "default": "sonnet"},
"required_env": {"type": "array", "description": "Required env vars", "default": ["CLAUDE_CODE_OAUTH_TOKEN"]},
"timeout": {"type": "integer", "description": "Timeout in seconds (0 = no timeout)", "default": 0},
}
async def setup(self, config: AdapterConfig) -> None:
"""Install plugins via the per-runtime adaptor registry.
The legacy claude-code-specific ``inject_plugins()`` override is gone:
each plugin now ships (or has registered in the platform registry) a
per-runtime adaptor, and ``BaseAdapter.install_plugins_via_registry``
routes installs through it. The Claude Code SDK still reads
``CLAUDE.md`` and ``/configs/skills/`` natively, and the default
:class:`AgentskillsAdaptor` writes to both.
"""
from plugins import load_plugins
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
plugins = load_plugins(
workspace_plugins_dir=workspace_plugins_dir,
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
)
await self.install_plugins_via_registry(config, plugins)
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
from claude_sdk_executor import ClaudeSDKExecutor
# Load system prompt if exists
system_prompt = config.system_prompt
if not system_prompt:
prompt_file = os.path.join(config.config_path, "system-prompt.md")
if os.path.exists(prompt_file):
with open(prompt_file) as f:
system_prompt = f.read()
# runtime_config may arrive as a dict (from main.py vars(...)) or as a
# RuntimeConfig dataclass. Read `model` defensively from either shape.
rc = config.runtime_config
if isinstance(rc, dict):
model = rc.get("model") or "sonnet"
else:
model = getattr(rc, "model", None) or "sonnet"
return ClaudeSDKExecutor(
system_prompt=system_prompt,
config_path=config.config_path,
heartbeat=config.heartbeat,
model=model,
)
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
"""Read the live Claude Code session transcript.
Claude Code writes every session to
``$HOME/.claude/projects/<cwd-as-dirname>/<session-uuid>.jsonl``
every line is a JSON event (user/assistant/tool_use/attachment/etc).
We pick the most-recently-modified .jsonl in the projects dir for
the agent's working directory, then return ``[since:since+limit]``.
Returns ``supported: True`` even if no .jsonl exists yet (empty
``lines`` + ``cursor=0``) so the canvas can show "agent hasn't
produced output yet" instead of "feature unavailable".
"""
limit = max(1, min(limit, _TRANSCRIPT_MAX_LIMIT))
since = max(0, since)
# Resolve the projects-dir name. Claude Code maps cwd → dirname by
# replacing "/" with "-" (so "/configs" → "-configs"). The exact
# rule lives inside the CLI binary, but the leading-dash + path-
# without-trailing-slash pattern is stable across versions.
#
# Match ClaudeSDKExecutor._resolve_cwd: prefer /workspace if populated,
# else /configs. Override via CLAUDE_PROJECT_CWD for tests.
WORKSPACE_MOUNT = "/workspace"
CONFIG_MOUNT = "/configs"
cwd_override = os.environ.get("CLAUDE_PROJECT_CWD")
if cwd_override:
cwd = cwd_override
elif os.path.isdir(WORKSPACE_MOUNT) and os.listdir(WORKSPACE_MOUNT):
cwd = WORKSPACE_MOUNT
else:
cwd = CONFIG_MOUNT
# Normalize: strip trailing slash, replace path separators with "-"
cwd_norm = cwd.rstrip("/") or "/"
projdir_name = cwd_norm.replace("/", "-") # "/configs" → "-configs"
home = Path(os.environ.get("HOME", "/home/agent"))
projdir = home / ".claude" / "projects" / projdir_name
result_base = {
"runtime": self.name(),
"supported": True,
"lines": [],
"cursor": since,
"more": False,
"source": str(projdir),
}
if not projdir.is_dir():
return result_base
# Pick most-recently-modified .jsonl
candidates = sorted(projdir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True)
if not candidates:
return result_base
target = candidates[0]
result_base["source"] = str(target)
lines = []
more = False
try:
with target.open("r") as f:
for i, raw in enumerate(f):
if i < since:
continue
if len(lines) >= limit:
more = True
break
raw = raw.strip()
if not raw:
continue
try:
lines.append(json.loads(raw))
except json.JSONDecodeError:
# Skip malformed lines but keep cursor advancing
lines.append({"_parse_error": True, "_raw": raw[:200]})
except OSError as exc:
logger.warning("transcript_lines: read failed for %s: %s", target, exc)
return result_base
result_base["lines"] = lines
result_base["cursor"] = since + len(lines)
result_base["more"] = more
return result_base

View File

@ -1,3 +0,0 @@
from .adapter import CrewAIAdapter
Adapter = CrewAIAdapter

View File

@ -1,144 +0,0 @@
"""CrewAI adapter — role-based multi-agent framework with full platform integration.
Creates a CrewAI Agent + Task + Crew with all platform tools (delegation, memory,
sandbox, approval), skills, plugins, and coordinator support.
Requires: pip install crewai
"""
import asyncio
import logging
from adapters.base import BaseAdapter, AdapterConfig
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
def _langchain_to_crewai(lc_tool):
"""Wrap a LangChain BaseTool as a sync CrewAI @tool.
CrewAI's @tool decorator requires the function to have a docstring
at decoration time, so we set __doc__ before applying the decorator.
"""
from crewai.tools import tool as crewai_tool
def wrapper(**kwargs) -> str:
"""Placeholder."""
result = asyncio.get_event_loop().run_until_complete(lc_tool.ainvoke(kwargs))
return str(result)
wrapper.__name__ = lc_tool.name
wrapper.__doc__ = lc_tool.description or f"Tool: {lc_tool.name}"
return crewai_tool(lc_tool.name)(wrapper)
class CrewAIAdapter(BaseAdapter):
def __init__(self):
self.system_prompt = None
self.crewai_tools = []
@staticmethod
def name() -> str:
return "crewai"
@staticmethod
def display_name() -> str:
return "CrewAI"
@staticmethod
def description() -> str:
return "CrewAI — role-based agent with task delegation and crew orchestration"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {"type": "string", "description": "LLM model (e.g. openai:gpt-4.1-mini)"},
"skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"},
"tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools"},
}
async def setup(self, config: AdapterConfig) -> None:
try:
import crewai # noqa: F401
logger.info(f"CrewAI version: {crewai.__version__}")
except ImportError:
raise RuntimeError("crewai not installed.")
result = await self._common_setup(config)
self.system_prompt = result.system_prompt
self.crewai_tools = [_langchain_to_crewai(t) for t in result.langchain_tools]
logger.info(f"CrewAI tools: {[t.name for t in result.langchain_tools]}")
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
return CrewAIA2AExecutor(
model=config.model,
system_prompt=self.system_prompt,
crewai_tools=self.crewai_tools,
heartbeat=config.heartbeat,
)
class CrewAIA2AExecutor(AgentExecutor):
"""Wraps CrewAI's Agent + Crew.kickoff() with full platform tools."""
def __init__(self, model: str, system_prompt: str | None, crewai_tools: list, heartbeat=None):
self.model = model
self.system_prompt = system_prompt
self.crewai_tools = crewai_tools
self._heartbeat = heartbeat
async def execute(self, context, event_queue):
from a2a.utils import new_agent_text_message
from adapters.shared_runtime import extract_history, build_task_text, brief_task, set_current_task
from adapters.shared_runtime import extract_message_text
user_message = extract_message_text(context)
if not user_message:
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
return
await set_current_task(self._heartbeat, brief_task(user_message))
try:
from crewai import Agent, Task, Crew
model_str = self.model
if model_str.startswith("openai:"):
model_str = model_str.replace("openai:", "openai/")
backstory = self.system_prompt or "You are a helpful AI agent."
history = extract_history(context)
task_desc = build_task_text(user_message, history)
agent = Agent(
role=backstory.split("\n")[0][:100],
goal="Help the user and coordinate with peer agents when needed",
backstory=backstory,
llm=model_str,
tools=self.crewai_tools,
verbose=False,
)
task = Task(
description=task_desc,
expected_output="A helpful response",
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task], verbose=False)
result = await asyncio.to_thread(crew.kickoff)
reply = str(result)
except Exception as e:
reply = f"CrewAI error: {e}"
finally:
await set_current_task(self._heartbeat, "")
await event_queue.enqueue_event(new_agent_text_message(reply))
async def cancel(self, context, event_queue): # pragma: no cover
pass

View File

@ -1,3 +0,0 @@
from .adapter import DeepAgentsAdapter
Adapter = DeepAgentsAdapter

View File

@ -1,184 +0,0 @@
"""DeepAgents adapter — fully utilizing the DeepAgents SDK.
Uses create_deep_agent() with:
- FilesystemBackend(/workspace) persistent file access across messages
- MemorySaver checkpointer session continuity
- Memory files CLAUDE.md loaded natively
- Filesystem permissions restrict writes to /workspace and /configs
- InMemoryCache avoid repeat API calls
- All built-in tools: write_todos, read_file, write_file, edit_file,
ls, glob, grep, execute, task
Supports: anthropic, openai, openrouter, groq, cerebras, google_genai, ollama.
"""
import os
import glob as globmod
import logging
from adapters.base import BaseAdapter, AdapterConfig
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
class DeepAgentsAdapter(BaseAdapter):
def __init__(self):
self.agent = None
self._checkpointer = None
@staticmethod
def name() -> str:
return "deepagents"
@staticmethod
def display_name() -> str:
return "DeepAgents"
@staticmethod
def description() -> str:
return "LangChain DeepAgents — planning, filesystem, sub-agents, shell execution, session persistence"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {
"type": "string",
"description": "provider:model (e.g. google_genai:gemini-2.5-flash, groq:llama-3.3-70b-versatile)",
"default": "google_genai:gemini-2.5-flash",
},
"skills": {"type": "array", "items": {"type": "string"}},
"tools": {"type": "array", "items": {"type": "string"}},
}
def _create_llm(self, model_str: str):
"""Create a LangChain LLM from a provider:model string."""
if ":" in model_str:
provider, model_name = model_str.split(":", 1)
else:
provider, model_name = "anthropic", model_str
if provider == "openai":
from langchain_openai import ChatOpenAI
kwargs = {"model": model_name}
base_url = os.environ.get("OPENAI_BASE_URL", "")
if base_url:
kwargs["openai_api_base"] = base_url
return ChatOpenAI(**kwargs)
elif provider == "openrouter":
from langchain_openai import ChatOpenAI
return ChatOpenAI(
model=model_name,
openai_api_key=os.environ.get("OPENROUTER_API_KEY", os.environ.get("OPENAI_API_KEY", "")),
openai_api_base="https://openrouter.ai/api/v1",
max_tokens=int(os.environ.get("MAX_TOKENS", "2048")),
)
elif provider == "groq":
from langchain_openai import ChatOpenAI
return ChatOpenAI(
model=model_name,
openai_api_key=os.environ.get("GROQ_API_KEY", ""),
openai_api_base="https://api.groq.com/openai/v1",
)
elif provider == "cerebras":
from langchain_openai import ChatOpenAI
return ChatOpenAI(
model=model_name,
openai_api_key=os.environ.get("CEREBRAS_API_KEY", ""),
openai_api_base="https://api.cerebras.ai/v1",
)
elif provider == "qianfan":
from langchain_openai import ChatOpenAI
return ChatOpenAI(
model=model_name,
openai_api_key=os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", "")),
openai_api_base="https://qianfan.baidubce.com/v2",
)
elif provider == "anthropic":
from langchain_anthropic import ChatAnthropic
kwargs = {"model": model_name}
base_url = os.environ.get("ANTHROPIC_BASE_URL", "")
if base_url:
kwargs["anthropic_api_url"] = base_url
return ChatAnthropic(**kwargs)
elif provider == "google_genai":
from langchain_google_genai import ChatGoogleGenerativeAI
return ChatGoogleGenerativeAI(model=model_name)
elif provider == "ollama":
from langchain_ollama import ChatOllama
return ChatOllama(model=model_name)
else:
raise ValueError(f"Unsupported model provider: {provider}")
async def setup(self, config: AdapterConfig) -> None:
try:
from deepagents import create_deep_agent, FilesystemPermission
from deepagents.backends import FilesystemBackend
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.caches import InMemoryCache
except ImportError as e:
raise RuntimeError(f"deepagents not installed: {e}")
result = await self._common_setup(config)
logger.info("DeepAgents platform tools: %s", [t.name for t in result.langchain_tools])
llm = self._create_llm(config.model)
# FilesystemBackend — persistent file access
workspace_dir = "/workspace" if os.path.isdir("/workspace") else "/configs"
# virtual_mode=False: read/write the real bind-mounted filesystem so
# read_file/ls/write_file/edit_file match what `bash` sees. With
# virtual_mode=True agents operate on an in-memory snapshot and
# report real files as "missing" (and writes don't persist across
# restarts). Permissions below still scope access to /workspace + /configs.
backend = FilesystemBackend(root_dir=workspace_dir, virtual_mode=False)
# MemorySaver — session continuity
self._checkpointer = MemorySaver()
# Memory — load CLAUDE.md natively
memory_files = []
claude_md = os.path.join(config.config_path, "CLAUDE.md")
if os.path.exists(claude_md):
memory_files.append(claude_md)
# Filesystem permissions
permissions = [
FilesystemPermission(operations=["read", "write"], paths=["/workspace/**"], mode="allow"),
FilesystemPermission(operations=["read", "write"], paths=["/configs/**"], mode="allow"),
]
# Native skills from /configs/skills/*.py
deepagent_skills = []
skills_dir = os.path.join(config.config_path, "skills")
if os.path.isdir(skills_dir):
deepagent_skills = globmod.glob(os.path.join(skills_dir, "**", "*.py"), recursive=True)
# LLM cache
cache = InMemoryCache()
self.agent = create_deep_agent(
model=llm,
tools=result.langchain_tools,
system_prompt=result.system_prompt,
backend=backend,
checkpointer=self._checkpointer,
memory=memory_files if memory_files else None,
permissions=permissions,
skills=deepagent_skills if deepagent_skills else None,
cache=cache,
)
logger.info(
"DeepAgents: %d tools, backend=%s, checkpointer=MemorySaver, "
"cache=InMemoryCache, memory=%d, permissions=%d, skills=%d",
len(result.langchain_tools), type(backend).__name__,
len(memory_files), len(permissions), len(deepagent_skills),
)
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
if self.agent is None:
raise RuntimeError("setup() must be called before create_executor()")
from a2a_executor import LangGraphA2AExecutor
return LangGraphA2AExecutor(self.agent, heartbeat=config.heartbeat, model=config.model)

View File

@ -1,3 +0,0 @@
from .adapter import GeminiCLIAdapter as Adapter
__all__ = ["Adapter"]

View File

@ -1,141 +0,0 @@
"""Gemini CLI adapter — wraps Google's Gemini CLI as an agent runtime.
Gemini CLI (github.com/google-gemini/gemini-cli, ~101k stars, Apache 2.0)
is structurally identical to the Claude Code adapter: a single-agent agentic
CLI with file/shell tools, MCP support, and a ReAct loop backed by Gemini
instead of Claude.
Key differences from claude-code:
- Auth: GEMINI_API_KEY env var (no OAuth token needed)
- Memory file: GEMINI.md (equivalent of Claude Code's CLAUDE.md)
- MCP config: ~/.gemini/settings.json (not via --mcp-config flag)
- Executor: CLIAgentExecutor (no Python SDK; uses gemini CLI subprocess)
"""
import json
import logging
import os
import sys
from pathlib import Path
from a2a.server.agent_execution import AgentExecutor
from adapters.base import BaseAdapter, AdapterConfig
logger = logging.getLogger(__name__)
class GeminiCLIAdapter(BaseAdapter):
@staticmethod
def name() -> str:
return "gemini-cli"
@staticmethod
def display_name() -> str:
return "Gemini CLI"
@staticmethod
def description() -> str:
return (
"Google Gemini CLI — agentic coding with file/shell tools, "
"MCP support, and a ReAct loop backed by Gemini models"
)
@staticmethod
def get_config_schema() -> dict:
return {
"model": {
"type": "string",
"description": "Gemini model (e.g. gemini-2.5-pro, gemini-2.5-flash)",
"default": "gemini-2.5-pro",
},
"required_env": {
"type": "array",
"description": "Required env vars",
"default": ["GEMINI_API_KEY"],
},
"timeout": {
"type": "integer",
"description": "Timeout in seconds (0 = no timeout)",
"default": 0,
},
}
def memory_filename(self) -> str:
"""Gemini CLI reads GEMINI.md as its persistent context file."""
return "GEMINI.md"
async def setup(self, config: AdapterConfig) -> None:
"""Wire MCP server into ~/.gemini/settings.json and seed GEMINI.md.
Gemini CLI does not accept an --mcp-config flag; instead, MCP servers
are declared in ~/.gemini/settings.json under the "mcpServers" key.
This method merges the A2A MCP server into that file, preserving any
existing keys (e.g. user's own MCP tools).
Also seeds GEMINI.md from system-prompt.md if GEMINI.md is absent,
so the agent has role context on first boot.
"""
from executor_helpers import get_mcp_server_path
# -- MCP wiring --------------------------------------------------
gemini_dir = Path.home() / ".gemini"
gemini_dir.mkdir(parents=True, exist_ok=True)
settings_path = gemini_dir / "settings.json"
settings: dict = {}
if settings_path.exists():
try:
settings = json.loads(settings_path.read_text())
except Exception as exc:
logger.warning("gemini-cli: could not parse %s: %s", settings_path, exc)
settings = {}
settings.setdefault("mcpServers", {})
settings["mcpServers"]["a2a"] = {
"command": sys.executable,
"args": [get_mcp_server_path()],
}
try:
settings_path.write_text(json.dumps(settings, indent=2))
logger.info("gemini-cli: wrote MCP config to %s", settings_path)
except OSError as exc:
logger.warning("gemini-cli: could not write %s: %s", settings_path, exc)
# -- GEMINI.md seed ----------------------------------------------
gemini_md = Path(config.config_path) / "GEMINI.md"
system_prompt_file = Path(config.config_path) / "system-prompt.md"
if not gemini_md.exists() and system_prompt_file.exists():
try:
gemini_md.write_text(system_prompt_file.read_text())
logger.info("gemini-cli: seeded GEMINI.md from system-prompt.md")
except OSError as exc:
logger.warning("gemini-cli: could not seed GEMINI.md: %s", exc)
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
from cli_executor import CLIAgentExecutor
from config import RuntimeConfig
rc = config.runtime_config
if isinstance(rc, dict):
model = rc.get("model") or "gemini-2.5-pro"
timeout = int(rc.get("timeout") or 0)
else:
model = getattr(rc, "model", None) or "gemini-2.5-pro"
timeout = int(getattr(rc, "timeout", None) or 0)
runtime_config = RuntimeConfig(
model=model,
timeout=timeout,
required_env=["GEMINI_API_KEY"],
)
return CLIAgentExecutor(
runtime="gemini-cli",
runtime_config=runtime_config,
system_prompt=config.system_prompt,
config_path=config.config_path,
heartbeat=config.heartbeat,
)

View File

@ -1,6 +0,0 @@
from .adapter import HermesAdapter
from .executor import create_executor
Adapter = HermesAdapter
__all__ = ["create_executor", "HermesAdapter", "Adapter"]

View File

@ -1,76 +0,0 @@
"""Hermes adapter — Nous Research Hermes models via Nous Portal or OpenRouter.
Uses the OpenAI-compatible client (openai>=1.0.0) to communicate with
either the Nous Portal directly (HERMES_API_KEY) or OpenRouter as a
fallback (OPENROUTER_API_KEY).
"""
import os
from adapters.base import BaseAdapter, AdapterConfig
class HermesAdapter(BaseAdapter):
@staticmethod
def name() -> str:
return "hermes"
@staticmethod
def display_name() -> str:
return "Hermes (Nous Research)"
@staticmethod
def description() -> str:
return "Hermes models via Nous Portal or OpenRouter — openai>=1.0.0 compatible client"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {
"type": "string",
"description": (
"Hermes model ID (e.g. nousresearch/hermes-3-llama-3.1-405b for OpenRouter "
"or hermes-3-llama-3.1-405b for Nous Portal)"
),
},
}
async def setup(self, config: AdapterConfig) -> None: # pragma: no cover
try:
import openai # noqa: F401
except ImportError as e:
raise RuntimeError(
"Hermes adapter requires openai>=1.0.0 — "
"install with: pip install 'openai>=1.0.0'"
) from e
async def create_executor(self, config: AdapterConfig): # pragma: no cover
"""Create and return a HermesA2AExecutor using key resolution from env/config."""
from .executor import create_executor, HermesA2AExecutor
# Resolve API key: prefer workspace secrets (runtime_config), then env vars
hermes_api_key = config.runtime_config.get("hermes_api_key") or None
# Phase 3 escalation ladder — read from runtime_config.escalation_ladder
# if present. The platform's org importer copies the ladder from
# org.yaml (runtime_config.escalation_ladder) into the container's
# /configs/config.yaml, and the workspace-template loader surfaces it
# here. Empty / missing = single-shot behaviour (unchanged from pre-
# Phase-3). See adapters.hermes.escalation for classification rules.
escalation_ladder = config.runtime_config.get("escalation_ladder") or None
executor = create_executor(
hermes_api_key=hermes_api_key,
config_path=config.config_path, # Phase 2d-i: system-prompt.md injection
escalation_ladder=escalation_ladder,
)
# Override model from config if provided
model = config.model
if ":" in model:
_, model = model.split(":", 1)
if model:
executor.model = model
executor._heartbeat = config.heartbeat
return executor

View File

@ -1,201 +0,0 @@
"""Hermes escalation ladder — promote to stronger models on transient failure.
Every workspace in the Hermes adapter path has a single pinned model today
(``provider_cfg.default_model`` overridden by ``runtime_config.model`` in
``config.yaml``). That's fine when the pinned model is the best fit, but
it leaves four recurring failure classes unhandled:
1. **Rate limits** (Claude Max saturation, Anthropic 429, OpenAI 429). We're
currently saturating 3× Claude Max subscriptions the first 429 is now
the norm, not the exception.
2. **Transient 5xx** from any provider (overloaded 529, 500, 502, 503).
3. **Context-length exceeded** on the smaller-window model (Haiku has 200k,
cheaper Gemini flash tiers have less, OpenAI nano/mini have 128k).
4. **Refusal / empty response** from a cheaper tier that the next tier up
would handle less common but real in practice.
An escalation ladder is a workspace-configured list of ``LadderRung`` entries
(provider + model). On a qualifying failure, the executor advances to the
next rung and retries the same user_message + history. If the ladder is
exhausted, the last error is raised.
## Config shape
``config.yaml``::
hermes:
escalation_ladder:
- provider: gemini
model: gemini-2.5-flash # fast/cheap probe
- provider: anthropic
model: claude-haiku-4-5-20251001
- provider: anthropic
model: claude-sonnet-4-5-20250929
- provider: anthropic
model: claude-opus-4-1-20250805 # frontier rescue
When ``escalation_ladder`` is absent, the executor behaves exactly as before:
one call, one model, errors bubble.
## What this module does NOT do (yet)
- **No uncertainty-driven escalation.** Only transient-failure escalation.
Promoting on "the answer felt thin" requires a judge pass follow-up.
- **No streaming partial-result aggregation.** The first rung that succeeds
returns; we don't splice responses across rungs.
- **No per-workspace budget tracking.** Each escalation is one more paid
call. Follow-up work (#305 budget cap) handles that.
"""
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Optional
logger = logging.getLogger(__name__)
@dataclass(frozen=True)
class LadderRung:
"""One rung on the escalation ladder.
``provider`` is a canonical short name from ``providers.PROVIDERS``.
``model`` overrides the provider's default for this rung.
"""
provider: str
model: str
def parse_ladder(raw: Optional[list]) -> list[LadderRung]:
"""Parse the ``escalation_ladder`` list from ``config.yaml`` into rungs.
Accepts either dict-shaped entries (``{"provider": ..., "model": ...}``)
or pre-built LadderRung instances (for programmatic callers). Skips
malformed entries with a warning rather than raising a missing rung
is worse than a noisy one during boot.
Empty / None / missing input returns an empty list (caller interprets
as "no ladder configured, single-shot dispatch").
"""
if not raw:
return []
rungs: list[LadderRung] = []
for i, entry in enumerate(raw):
if isinstance(entry, LadderRung):
rungs.append(entry)
continue
if not isinstance(entry, dict):
logger.warning(
"Hermes ladder: rung %d is not a dict (%r), skipping", i, type(entry).__name__,
)
continue
provider = entry.get("provider")
model = entry.get("model")
if not provider or not model:
logger.warning(
"Hermes ladder: rung %d missing provider or model (%r), skipping", i, entry,
)
continue
rungs.append(LadderRung(provider=str(provider), model=str(model)))
return rungs
# Error-type names that indicate a transient failure worth escalating.
# We match on the class name (not the module) so this works regardless of
# whether the workspace imported the new or old anthropic / openai SDK.
# See ``should_escalate`` for the matching logic.
_ESCALATABLE_ERROR_CLASSES = frozenset({
# openai SDK
"RateLimitError", # 429
"APITimeoutError", # connect/read timeout
"APIConnectionError", # TCP / DNS
"InternalServerError", # 500
# anthropic SDK
"OverloadedError", # 529
"APIStatusError", # generic 5xx wrapper
# common across both: network-level errors
"ConnectionError",
"Timeout",
"ReadTimeout",
})
# Error-message substrings that indicate context-length exceeded. These map
# to distinct HTTP 400 responses from each provider rather than a typed
# exception, so we match on substring.
_CONTEXT_LENGTH_MARKERS = (
"maximum context length", # openai
"context_length_exceeded", # openai error.code
"prompt is too long", # anthropic
"prompt_too_long", # anthropic error.code
"context window", # gemini
)
# Error-message substrings that indicate a transient gateway issue. These
# sometimes come through as generic exceptions without typed classes.
_TRANSIENT_GATEWAY_MARKERS = (
"502 bad gateway",
"503 service unavailable",
"504 gateway timeout",
"overloaded",
"please try again",
"temporarily unavailable",
)
# Error-message substrings that definitively DO NOT qualify for escalation.
# Auth and malformed-payload errors don't get better by retrying on a
# different model — they indicate config / code bugs.
_NON_ESCALATABLE_MARKERS = (
"invalid api key",
"authentication_error",
"401",
"403",
"forbidden",
"permission_denied",
"unauthorized",
)
def should_escalate(exc: BaseException) -> bool:
"""Decide whether ``exc`` justifies moving to the next ladder rung.
Returns True when the failure is one of:
- Rate limit (429 / RateLimitError / OverloadedError)
- Transient gateway (5xx, overload, timeout, connection reset)
- Context-length exceeded on the current model
Returns False for auth, permission, malformed-payload, and other
config-bug classes escalating those just wastes the next-tier quota.
"""
if exc is None:
return False
cls_name = exc.__class__.__name__
msg = str(exc).lower()
# Hard reject: never escalate auth/permission errors regardless of
# what the class name says. A wrapped RateLimitError that actually
# contains "401 Unauthorized" is a config bug, not a rate limit.
for marker in _NON_ESCALATABLE_MARKERS:
if marker in msg:
return False
if cls_name in _ESCALATABLE_ERROR_CLASSES:
return True
for marker in _CONTEXT_LENGTH_MARKERS:
if marker in msg:
return True
for marker in _TRANSIENT_GATEWAY_MARKERS:
if marker in msg:
return True
# Status-code prefixes are a common tell for HTTP-wrapped provider errors.
if "429" in msg or "529" in msg:
return True
if any(code in msg for code in ("500 ", "502 ", "503 ", "504 ")):
return True
return False

View File

@ -1,543 +0,0 @@
"""Hermes adapter executor — Phase 2 multi-provider with native SDK dispatch.
Hermes supports 15 providers via the shared ``providers.py`` registry. Each
provider's ``auth_scheme`` field controls which client + request shape the
executor uses:
- ``auth_scheme="openai"`` (13 providers) OpenAI-compat ``/v1/chat/completions``
via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI,
Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
- ``auth_scheme="anthropic"`` (1 provider anthropic) native Messages API via
the ``anthropic`` Python SDK. Phase 2a: better tool calling, vision support,
extended thinking semantics. If the ``anthropic`` package isn't installed in
the workspace image, ``_do_anthropic_native`` raises a clear error with
install instructions rather than silently falling back to the OpenAI-compat
shim (which would lose fidelity invisibly).
- ``auth_scheme="gemini"`` (1 provider gemini) native ``generateContent`` API
via the official ``google-genai`` Python SDK. Phase 2b: first-class vision
content blocks, tool/function calling, system instructions, and thinking
config all of which the OpenAI-compat shim at ``/v1beta/openai`` either
strips or mis-translates. Same fail-loud semantics as the anthropic path.
Key resolution order (unchanged from Phase 1)
----------------------------------------------
1. ``hermes_api_key`` parameter (explicit call-site override routes to Nous Portal)
2. ``provider`` parameter (explicit provider name looks up its env var(s))
3. Auto-detect: walk ``providers.RESOLUTION_ORDER`` and pick the first provider
whose env var is set.
Raises ``ValueError`` if nothing resolves. The error message lists every env var
that was checked so the operator knows their options without reading source.
"""
from __future__ import annotations
import logging
import os
from typing import Optional
from .escalation import LadderRung, parse_ladder, should_escalate
from .providers import PROVIDERS, ProviderConfig, resolve_provider
logger = logging.getLogger(__name__)
def create_executor(
hermes_api_key: Optional[str] = None,
provider: Optional[str] = None,
model: Optional[str] = None,
config_path: Optional[str] = None,
escalation_ladder: Optional[list] = None,
):
"""Create and return a LangGraph-compatible executor for the Hermes adapter.
Parameters
----------
hermes_api_key:
Explicit API key. When provided, the call routes to Nous Portal (the
PR 2 back-compat path) regardless of ``provider``.
provider:
Canonical provider short name from ``providers.PROVIDERS`` (e.g.
``"openai"``, ``"anthropic"``, ``"qwen"``, ``"xai"``). When set, the
registry entry's env vars are used to find the API key and its
base URL + default model override the auto-detect path. When unset,
auto-detect walks ``providers.RESOLUTION_ORDER`` until it finds a
provider whose env var is set.
model:
Override the provider's default model. Passed straight through to
``chat.completions.create``.
config_path:
Path to the workspace's ``/configs`` directory. Phase 2d-i reads
``system-prompt.md`` from here on every ``execute()`` call and
passes the content as a system instruction to the native SDK.
Optional omit to skip system-prompt injection (tests do this).
Returns
-------
HermesA2AExecutor
A ready-to-use executor wired with the resolved api_key + base_url
+ model + config_path.
Raises
------
ValueError
If ``provider`` is an unknown name, if ``provider`` is known but its
env vars are all empty, or if auto-detect finds nothing.
"""
ladder = parse_ladder(escalation_ladder)
if ladder:
logger.info(
"Hermes: escalation ladder configured — %d rungs (%s)",
len(ladder),
"".join(f"{r.provider}:{r.model}" for r in ladder),
)
# Path 1: PR 2 back-compat — explicit hermes_api_key routes to Nous Portal.
if hermes_api_key:
cfg = PROVIDERS["nous_portal"]
logger.debug("Hermes: using explicit hermes_api_key param (Nous Portal)")
return HermesA2AExecutor(
provider_cfg=cfg,
api_key=hermes_api_key,
model=model or cfg.default_model,
config_path=config_path,
escalation_ladder=ladder,
)
# Path 2/3: registry resolution (either explicit provider name or auto-detect).
cfg, api_key = resolve_provider(provider)
logger.info(
"Hermes: provider=%s auth_scheme=%s base_url=%s model=%s",
cfg.name,
cfg.auth_scheme,
cfg.base_url,
model or cfg.default_model,
)
return HermesA2AExecutor(
provider_cfg=cfg,
api_key=api_key,
model=model or cfg.default_model,
config_path=config_path,
escalation_ladder=ladder,
)
class HermesA2AExecutor:
"""LangGraph-compatible AgentExecutor for Hermes-style multi-provider LLMs.
Dispatches each inference call based on ``provider_cfg.auth_scheme``:
- ``"openai"`` OpenAI-compat ``/v1/chat/completions`` via the ``openai`` SDK
- ``"anthropic"`` native Messages API via the ``anthropic`` SDK
The ``execute()`` and ``cancel()`` async methods satisfy the
``a2a.server.agent_execution.AgentExecutor`` interface so this
executor can be dropped into the A2A server's DefaultRequestHandler.
"""
def __init__(
self,
provider_cfg: ProviderConfig,
api_key: str,
model: str,
heartbeat=None,
config_path: Optional[str] = None,
escalation_ladder: Optional[list] = None,
):
self.provider_cfg = provider_cfg
self.api_key = api_key
self.base_url = provider_cfg.base_url
self.model = model
self._heartbeat = heartbeat
# Phase 2d-i: config_path lets execute() read /configs/system-prompt.md
# on each turn and pass it to the native SDK's `system=` /
# `system_instruction=` / prepended message. Optional because older
# callers + tests construct executors directly.
self._config_path = config_path
# Phase 3: escalation ladder. When non-empty, _do_inference retries
# transient-failure classes (rate limit, 5xx, overload, context-length)
# on each rung in turn before raising. Empty / None = single-shot,
# original behaviour. See adapters.hermes.escalation.
self._ladder: list[LadderRung] = parse_ladder(escalation_ladder) or []
# ------------------------------------------------------------------
# History → provider-specific message list converters
# ------------------------------------------------------------------
#
# The A2A shared runtime gives us history as ``list[tuple[str, str]]``
# with roles ``"human"`` / ``"ai"``. Each provider wants a different
# shape:
#
# OpenAI-compat: [{"role":"user"|"assistant", "content": str}, ...]
# Anthropic: [{"role":"user"|"assistant", "content": str}, ...] (same)
# Gemini: [{"role":"user"|"model", "parts": [{"text": str}]}, ...]
#
# Before Phase 2c these were flattened into a single user turn via
# ``shared_runtime.build_task_text``, which worked for basic text
# handoff but lost the model's native multi-turn awareness (system
# prompts, tool-use history, role attribution for instruction
# following). Phase 2c keeps the turns as turns.
@staticmethod
def _history_to_openai_messages(
user_message: str,
history: "list[tuple[str, str]]",
) -> "list[dict]":
"""Convert A2A history + current turn to OpenAI Chat Completions shape."""
messages: list[dict] = []
for role, text in history or []:
messages.append({
"role": "user" if role == "human" else "assistant",
"content": text,
})
messages.append({"role": "user", "content": user_message})
return messages
@staticmethod
def _history_to_anthropic_messages(
user_message: str,
history: "list[tuple[str, str]]",
) -> "list[dict]":
"""Convert A2A history + current turn to Anthropic Messages API shape.
Identical wire format to OpenAI (``role`` + ``content``) for text-only
turns, so we just delegate. The difference matters for tool_use /
content blocks, which are Phase 2d territory.
"""
return HermesA2AExecutor._history_to_openai_messages(user_message, history)
@staticmethod
def _history_to_gemini_contents(
user_message: str,
history: "list[tuple[str, str]]",
) -> "list[dict]":
"""Convert A2A history + current turn to Gemini generateContent shape.
Gemini uses ``role: "user" | "model"`` (NOT "assistant") and wraps
text in a ``parts: [{"text": ...}]`` list.
"""
contents: list[dict] = []
for role, text in history or []:
contents.append({
"role": "user" if role == "human" else "model",
"parts": [{"text": text}],
})
contents.append({"role": "user", "parts": [{"text": user_message}]})
return contents
# ------------------------------------------------------------------
# Per-provider inference paths
# ------------------------------------------------------------------
async def _do_openai_compat(
self,
user_message: str,
history: "list[tuple[str, str]] | None" = None,
system_prompt: Optional[str] = None,
) -> str:
"""OpenAI-compat inference — used by every provider with auth_scheme='openai'.
13 of the 15 registered providers route here. Uses ``openai.AsyncOpenAI``
pointed at the provider's base_url; every provider's API is wire-
compatible with the OpenAI Chat Completions shape.
Phase 2c: accepts multi-turn history.
Phase 2d-i: accepts optional system_prompt, prepended as a
``{"role":"system"}`` message per the OpenAI Chat Completions convention.
"""
import openai
client = openai.AsyncOpenAI(
api_key=self.api_key,
base_url=self.base_url,
)
messages = self._history_to_openai_messages(user_message, history or [])
if system_prompt:
messages = [{"role": "system", "content": system_prompt}, *messages]
response = await client.chat.completions.create(
model=self.model,
messages=messages,
)
return response.choices[0].message.content or ""
async def _do_anthropic_native(
self,
user_message: str,
history: "list[tuple[str, str]] | None" = None,
system_prompt: Optional[str] = None,
) -> str:
"""Native Anthropic Messages API inference.
Uses the official ``anthropic`` Python SDK for correct tool-calling,
vision, and extended-thinking semantics that don't translate cleanly
through the OpenAI-compat shim.
Phase 2a: single-turn text.
Phase 2c: multi-turn history.
Phase 2d-i: optional system_prompt passed via Anthropic's native
top-level ``system=`` parameter NOT as a message in the messages
list (Anthropic's Messages API requires system prompts to be at the
top level, not inline like OpenAI).
"""
try:
import anthropic
except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk
raise RuntimeError(
"Hermes anthropic native path requires the `anthropic` package. "
"Install in the workspace image with `pip install anthropic>=0.39.0` "
"or set HERMES provider=openrouter to route Claude models through "
"OpenRouter's OpenAI-compat shim instead."
) from exc
client = anthropic.AsyncAnthropic(api_key=self.api_key)
messages = self._history_to_anthropic_messages(user_message, history or [])
create_kwargs: dict = {
"model": self.model,
"max_tokens": 4096,
"messages": messages,
}
if system_prompt:
create_kwargs["system"] = system_prompt
response = await client.messages.create(**create_kwargs)
# response.content is a list of ContentBlock; for text-only the first
# block is a TextBlock with a .text attribute.
if response.content and hasattr(response.content[0], "text"):
return response.content[0].text
return ""
async def _do_gemini_native(
self,
user_message: str,
history: "list[tuple[str, str]] | None" = None,
system_prompt: Optional[str] = None,
) -> str:
"""Native Google Gemini ``generateContent`` inference.
Uses the official ``google-genai`` Python SDK for correct vision
content blocks, tool/function calling, system instructions, and
thinking config. These all get stripped or mis-translated through
the OpenAI-compat ``/v1beta/openai`` shim.
Phase 2b: single-turn text.
Phase 2c: multi-turn history via Gemini's ``contents=[{role,parts}]``
shape (note: role is ``"user"`` / ``"model"``, NOT ``"assistant"``).
Phase 2d-i: system_prompt passed via native
``config.system_instruction`` Gemini's top-level system field.
"""
try:
from google import genai # type: ignore[import-not-found]
from google.genai import types as genai_types # type: ignore[import-not-found]
except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk
raise RuntimeError(
"Hermes gemini native path requires the `google-genai` package. "
"Install in the workspace image with `pip install google-genai>=1.0.0` "
"or set HERMES provider=openrouter to route Gemini models through "
"OpenRouter's OpenAI-compat shim instead."
) from exc
client = genai.Client(api_key=self.api_key)
contents = self._history_to_gemini_contents(user_message, history or [])
generate_kwargs: dict = {
"model": self.model,
"contents": contents,
}
if system_prompt:
generate_kwargs["config"] = genai_types.GenerateContentConfig(
system_instruction=system_prompt,
)
response = await client.aio.models.generate_content(**generate_kwargs)
# response.text is the flattened text across all parts of the first
# candidate. For text-only that's the whole reply.
return response.text or ""
async def _do_inference(
self,
user_message: str,
history: "list[tuple[str, str]] | None" = None,
system_prompt: Optional[str] = None,
) -> str:
"""Dispatch to the right inference path based on provider auth_scheme.
Phase 2c: multi-turn history.
Phase 2d-i: optional system_prompt is passed through to the native
system field of whichever path wins dispatch.
Phase 3: when an escalation ladder is configured, transient failures
(rate limit, 5xx, overload, context-length) promote to the next rung
before raising. No ladder = single-shot, original behaviour.
"""
# Fast path: no ladder configured — single call on the pinned model.
if not self._ladder:
return await self._dispatch(
self.provider_cfg, self.model, user_message, history, system_prompt,
)
# Slow path: walk the ladder. Start with the pinned (provider, model)
# so the first attempt matches non-ladder behaviour exactly — the
# ladder only kicks in when the first attempt fails escalatably.
attempts: list[tuple[ProviderConfig, str]] = [(self.provider_cfg, self.model)]
for rung in self._ladder:
rung_cfg = PROVIDERS.get(rung.provider)
if rung_cfg is None:
logger.warning(
"Hermes ladder: provider %r not in registry, skipping rung",
rung.provider,
)
continue
attempts.append((rung_cfg, rung.model))
last_exc: Optional[BaseException] = None
for i, (cfg, model) in enumerate(attempts):
try:
reply = await self._dispatch(
cfg, model, user_message, history, system_prompt,
)
if i > 0:
logger.info(
"Hermes ladder: succeeded on rung %d (%s:%s) after %d failed attempt(s)",
i, cfg.name, model, i,
)
return reply
except Exception as exc:
last_exc = exc
if i == len(attempts) - 1:
logger.error(
"Hermes ladder: exhausted all %d rungs — raising. Last error on %s:%s: %s",
len(attempts), cfg.name, model, exc,
)
raise
if not should_escalate(exc):
logger.info(
"Hermes ladder: non-escalatable error on %s:%s — raising without advancing: %s",
cfg.name, model, exc,
)
raise
logger.warning(
"Hermes ladder: escalatable failure on rung %d (%s:%s), advancing. Error: %s",
i, cfg.name, model, exc,
)
# Unreachable — the last iteration either returns or raises, but
# satisfying the type checker without a blank return.
if last_exc is not None:
raise last_exc
return "" # pragma: no cover
async def _dispatch(
self,
cfg: ProviderConfig,
model: str,
user_message: str,
history: "list[tuple[str, str]] | None",
system_prompt: Optional[str],
) -> str:
"""Single-attempt dispatch on (cfg, model).
Temporarily rebinds ``self.provider_cfg`` + ``self.base_url`` + ``self.model``
so the existing per-provider paths pick up the rung's config. Restores
the original values in a finally block so a raised error leaves the
executor pinned to its constructor-given state (next call on the same
executor instance starts fresh at the top of the ladder).
For the ladder's non-first rungs, ``self.api_key`` must be the rung's
provider key we resolve it here via ``resolve_provider`` so the
first-rung API key (for the pinned provider) isn't mis-used against a
different provider's base URL. That lookup can raise ``ValueError``
when the rung's env var isn't set; ``should_escalate(ValueError)``
returns False so the ladder correctly STOPS rather than escalating
further into nothing.
"""
# Fast path: rung matches the executor's pinned config — reuse the
# existing api_key, skip the provider re-resolve.
if cfg is self.provider_cfg and model == self.model:
scheme = cfg.auth_scheme
if scheme == "anthropic":
return await self._do_anthropic_native(user_message, history, system_prompt)
if scheme == "gemini":
return await self._do_gemini_native(user_message, history, system_prompt)
if scheme == "openai":
return await self._do_openai_compat(user_message, history, system_prompt)
logger.warning(
"Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat",
scheme, cfg.name,
)
return await self._do_openai_compat(user_message, history, system_prompt)
# Different rung — temporarily rebind provider_cfg + model + api_key.
# resolve_provider reads the rung's env vars fresh.
_, rung_key = resolve_provider(cfg.name)
orig_cfg, orig_model, orig_key, orig_base = (
self.provider_cfg, self.model, self.api_key, self.base_url,
)
try:
self.provider_cfg = cfg
self.model = model
self.api_key = rung_key
self.base_url = cfg.base_url
scheme = cfg.auth_scheme
if scheme == "anthropic":
return await self._do_anthropic_native(user_message, history, system_prompt)
if scheme == "gemini":
return await self._do_gemini_native(user_message, history, system_prompt)
if scheme == "openai":
return await self._do_openai_compat(user_message, history, system_prompt)
logger.warning(
"Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat",
scheme, cfg.name,
)
return await self._do_openai_compat(user_message, history, system_prompt)
finally:
self.provider_cfg = orig_cfg
self.model = orig_model
self.api_key = orig_key
self.base_url = orig_base
# ------------------------------------------------------------------
# AgentExecutor interface
# ------------------------------------------------------------------
async def execute(self, context, event_queue): # pragma: no cover
"""Execute a Hermes inference request and push the reply to event_queue.
Phase 2c: multi-turn history.
Phase 2d-i: reads ``/configs/system-prompt.md`` via
``executor_helpers.get_system_prompt`` each turn (supports hot-reload)
and passes the text to the dispatch layer. Each provider path uses
its native system field Anthropic's top-level ``system=``, Gemini's
``system_instruction=`` via ``GenerateContentConfig``, or OpenAI's
``{"role":"system"}`` message at the head of the messages list.
"""
from a2a.utils import new_agent_text_message
from adapters.shared_runtime import (
brief_task,
extract_history,
extract_message_text,
set_current_task,
)
from executor_helpers import get_system_prompt
user_message = extract_message_text(context)
if not user_message:
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
return
await set_current_task(self._heartbeat, brief_task(user_message))
try:
history = extract_history(context)
system_prompt = (
get_system_prompt(self._config_path) if self._config_path else None
)
reply = await self._do_inference(user_message, history, system_prompt)
except Exception as exc:
logger.exception("Hermes executor error: %s", exc)
reply = f"Hermes error: {exc}"
finally:
await set_current_task(self._heartbeat, "")
await event_queue.enqueue_event(new_agent_text_message(reply))
async def cancel(self, context, event_queue): # pragma: no cover
"""No-op cancel — Hermes requests are not cancellable mid-flight."""
pass

View File

@ -1,298 +0,0 @@
"""Hermes adapter provider registry — Phase 1 of the multi-provider expansion.
Extends the original PR-2 Hermes executor (Nous Portal + OpenRouter only) to a
registry of 12 providers. Every provider in this registry is reached via its
OpenAI-compat endpoint, which means the existing ``openai.AsyncOpenAI`` client
and request shape in ``executor.py`` Just Works without any new dependencies.
Native SDK paths (Anthropic Messages API, Gemini generateContent API) are
Phase 2 they give better tool-calling + vision fidelity but are not
required to unblock the basic "CEO wants Hermes on Qwen / GLM / xAI /
Gemini" asks that triggered this work.
## Design
- ``ProviderConfig`` captures everything needed to point the OpenAI client at
a provider: env var(s), base URL, default model, auth scheme.
- ``PROVIDERS`` is a dict keyed by canonical short name (``"openai"``,
``"anthropic"``, ``"qwen"``, etc.).
- ``RESOLUTION_ORDER`` is the auto-detect sequence used when the caller
doesn't specify a provider — it tries each provider's env vars in turn and
picks the first one that's set.
- ``resolve_provider(explicit)`` returns ``(ProviderConfig, api_key)`` or
raises ``ValueError`` with a helpful message listing every env var it
checked.
## Back-compat
The original ``HERMES_API_KEY`` and ``OPENROUTER_API_KEY`` env vars still work
and still route to Nous Portal / OpenRouter respectively they're just now
registered as two entries in ``PROVIDERS`` rather than hardcoded in
``create_executor``.
## Adding a new provider
1. Append a new ``ProviderConfig`` entry under ``PROVIDERS``
2. Add its short name to ``RESOLUTION_ORDER`` in the desired priority slot
3. Document the env var in the workspace ``.env.example`` (if present)
That's it. Nothing else needs to change — the executor reads the registry.
"""
from __future__ import annotations
import os
from dataclasses import dataclass
from typing import Optional
@dataclass(frozen=True)
class ProviderConfig:
"""Everything the Hermes executor needs to talk to a single LLM provider.
Every provider in Phase 1 is reachable via an OpenAI-compatible
``/v1/chat/completions`` endpoint, so ``auth_scheme`` is always
``"openai"`` (Bearer token, OpenAI-style messages payload). Phase 2
will add ``"anthropic"`` (native Messages API) and ``"gemini"`` (native
generateContent API) for roles that need better tool-call fidelity.
"""
name: str
"""Canonical short name — the key used in ``PROVIDERS`` and the ``provider`` kwarg."""
env_vars: tuple[str, ...]
"""API key env vars, checked in order. First non-empty value wins.
Supporting multiple env vars lets us accept common aliases
(e.g. ``QWEN_API_KEY`` AND ``DASHSCOPE_API_KEY`` both work for Alibaba Qwen)."""
base_url: str
"""OpenAI-compat base URL. Must include the ``/v1`` suffix where applicable."""
default_model: str
"""Default model name to pass to ``chat.completions.create``.
Per-call overrides are possible via the executor constructor."""
auth_scheme: str = "openai"
"""``openai`` (Bearer token + OpenAI-style payload) for every Phase 1 provider.
Phase 2 reserves ``anthropic`` and ``gemini`` for native-SDK paths."""
docs: str = ""
"""Short note — which docs URL the config was derived from, or which quirks
to know about. Not used programmatically; exists to make future audits of
this file cheaper than re-Googling every entry."""
# --- Provider registry ------------------------------------------------------
#
# Ordering within this dict is not semantically meaningful — use
# ``RESOLUTION_ORDER`` below to control auto-detect priority. This dict is
# grouped by "who owns the provider" just for human readability.
PROVIDERS: dict[str, ProviderConfig] = {
# --- Existing (PR 2 baseline) ---------------------------------------
"nous_portal": ProviderConfig(
name="nous_portal",
env_vars=("HERMES_API_KEY", "NOUS_API_KEY"),
base_url="https://inference-prod.nousresearch.com/v1",
default_model="nousresearch/hermes-3-llama-3.1-405b",
docs="Nous Research Portal — original Hermes adapter target from PR 2.",
),
"openrouter": ProviderConfig(
name="openrouter",
env_vars=("OPENROUTER_API_KEY",),
base_url="https://openrouter.ai/api/v1",
default_model="anthropic/claude-sonnet-4.5",
docs="OpenRouter — unified OpenAI-compat gateway to hundreds of models. "
"Useful for A/B testing and as a fallback when a direct provider is down.",
),
# --- Frontier commercial (US) ---------------------------------------
"openai": ProviderConfig(
name="openai",
env_vars=("OPENAI_API_KEY",),
base_url="https://api.openai.com/v1",
default_model="gpt-4o",
docs="OpenAI — canonical OpenAI-compat endpoint. Works out of the box.",
),
"anthropic": ProviderConfig(
name="anthropic",
env_vars=("ANTHROPIC_API_KEY",),
base_url="https://api.anthropic.com",
default_model="claude-sonnet-4-5",
auth_scheme="anthropic",
docs="Anthropic — Phase 2 uses the native Messages API via the official "
"`anthropic` Python SDK for correct tool calling, vision, and "
"extended thinking semantics. If the SDK isn't installed in the "
"workspace image, the executor raises a clear error pointing at "
"`pip install anthropic>=0.39.0`.",
),
"xai": ProviderConfig(
name="xai",
env_vars=("XAI_API_KEY", "GROK_API_KEY"),
base_url="https://api.x.ai/v1",
default_model="grok-4",
docs="xAI — Grok family. OpenAI-compat via api.x.ai/v1.",
),
"gemini": ProviderConfig(
name="gemini",
env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"),
base_url="https://generativelanguage.googleapis.com",
default_model="gemini-2.5-flash",
auth_scheme="gemini",
docs="Google Gemini — Phase 2b uses the native generateContent API via "
"the official `google-genai` Python SDK for correct vision content "
"blocks, tool/function calling, and system instructions. Phase 1 "
"used the /v1beta/openai compat shim. If the google-genai package "
"isn't installed in the workspace image, the executor raises a "
"clear error pointing at `pip install google-genai>=1.0.0`.",
),
# --- Chinese providers ----------------------------------------------
"qwen": ProviderConfig(
name="qwen",
env_vars=("QWEN_API_KEY", "DASHSCOPE_API_KEY"),
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
default_model="qwen3-235b-a22b",
docs="Alibaba Qwen via DashScope international endpoint. OpenAI-compat mode. "
"For domestic China use dashscope.aliyuncs.com (no -intl).",
),
"glm": ProviderConfig(
name="glm",
env_vars=("GLM_API_KEY", "ZHIPU_API_KEY"),
base_url="https://open.bigmodel.cn/api/paas/v4",
default_model="glm-4-plus",
docs="Zhipu AI GLM — open.bigmodel.cn, OpenAI-compat via /api/paas/v4.",
),
"kimi": ProviderConfig(
name="kimi",
env_vars=("KIMI_API_KEY", "MOONSHOT_API_KEY"),
base_url="https://api.moonshot.ai/v1",
default_model="kimi-k2",
docs="Moonshot AI Kimi K2 — OpenAI-compat at api.moonshot.ai/v1.",
),
"minimax": ProviderConfig(
name="minimax",
env_vars=("MINIMAX_API_KEY",),
base_url="https://api.minimax.io/v1",
default_model="MiniMax-M2",
docs="MiniMax — OpenAI-compat at api.minimax.io/v1. "
"Note: older base URL api.minimaxi.chat is deprecated.",
),
"deepseek": ProviderConfig(
name="deepseek",
env_vars=("DEEPSEEK_API_KEY",),
base_url="https://api.deepseek.com/v1",
default_model="deepseek-chat",
docs="DeepSeek — very cheap, OpenAI-compat at api.deepseek.com/v1.",
),
# --- OSS / alt providers --------------------------------------------
"groq": ProviderConfig(
name="groq",
env_vars=("GROQ_API_KEY",),
base_url="https://api.groq.com/openai/v1",
default_model="llama-3.3-70b-versatile",
docs="Groq LPU inference — very fast, OpenAI-compat at api.groq.com/openai/v1.",
),
"together": ProviderConfig(
name="together",
env_vars=("TOGETHER_API_KEY",),
base_url="https://api.together.xyz/v1",
default_model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
docs="Together AI — OSS model hosting, OpenAI-compat at api.together.xyz/v1.",
),
"fireworks": ProviderConfig(
name="fireworks",
env_vars=("FIREWORKS_API_KEY",),
base_url="https://api.fireworks.ai/inference/v1",
default_model="accounts/fireworks/models/llama-v3p3-70b-instruct",
docs="Fireworks AI — OSS model hosting, OpenAI-compat at api.fireworks.ai/inference/v1.",
),
"mistral": ProviderConfig(
name="mistral",
env_vars=("MISTRAL_API_KEY",),
base_url="https://api.mistral.ai/v1",
default_model="mistral-large-latest",
docs="Mistral AI — OpenAI-compat at api.mistral.ai/v1.",
),
}
# --- Auto-detect resolution order -------------------------------------------
#
# When the caller doesn't specify a provider, resolve_provider() walks this
# list in order and picks the first provider whose env var is set. Order is
# chosen to preserve back-compat (the two original PR-2 providers come first)
# followed by the most likely-to-be-configured commercial APIs.
RESOLUTION_ORDER: tuple[str, ...] = (
# Back-compat: PR 2 baseline
"nous_portal",
"openrouter",
# Frontier commercial
"anthropic",
"openai",
"gemini",
"xai",
# Chinese providers
"qwen",
"glm",
"kimi",
"minimax",
"deepseek",
# OSS / alt
"groq",
"mistral",
"together",
"fireworks",
)
def resolve_provider(explicit: Optional[str] = None) -> tuple[ProviderConfig, str]:
"""Resolve a provider name to a ``(ProviderConfig, api_key)`` pair.
Resolution order:
1. If ``explicit`` is given, look it up in ``PROVIDERS`` and try every
env var on that provider's config. Raise with a clear message if the
name is unknown or if all env vars are empty.
2. Otherwise auto-detect: walk ``RESOLUTION_ORDER`` and return the first
provider whose env var is set.
Raises
------
ValueError
If ``explicit`` is an unknown provider name, if ``explicit`` is a
known provider but its env vars are all empty, or if no env var is
set for any provider in auto-detect mode.
"""
if explicit:
if explicit not in PROVIDERS:
raise ValueError(
f"Unknown Hermes provider: {explicit!r}. "
f"Available: {sorted(PROVIDERS)}"
)
cfg = PROVIDERS[explicit]
for env in cfg.env_vars:
val = os.environ.get(env, "").strip()
if val:
return cfg, val
raise ValueError(
f"Hermes provider {explicit!r} specified but no env var set. "
f"Tried: {cfg.env_vars}"
)
# Auto-detect — first provider with a non-empty env var wins.
for name in RESOLUTION_ORDER:
cfg = PROVIDERS[name]
for env in cfg.env_vars:
val = os.environ.get(env, "").strip()
if val:
return cfg, val
# Nothing set — raise with the full list so the operator knows every
# option they have without having to read the source.
tried = []
for name in RESOLUTION_ORDER:
for env in PROVIDERS[name].env_vars:
tried.append(env)
raise ValueError(
"No Hermes provider API key found. Set any one of: " + ", ".join(tried)
)

View File

@ -1,3 +0,0 @@
from .adapter import LangGraphAdapter
Adapter = LangGraphAdapter

View File

@ -1,50 +0,0 @@
"""LangGraph adapter — Python-based ReAct agent with skills, tools, and plugins."""
import os
import logging
from adapters.base import BaseAdapter, AdapterConfig
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
class LangGraphAdapter(BaseAdapter):
@staticmethod
def name() -> str:
return "langgraph"
@staticmethod
def display_name() -> str:
return "LangGraph"
@staticmethod
def description() -> str:
return "LangGraph ReAct agent — Python-based with skills, tools, plugins, and peer coordination"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {"type": "string", "description": "LangChain model string (e.g. openrouter:google/gemini-2.5-flash)"},
"skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"},
"tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools (web_search, filesystem, etc.)"},
}
def __init__(self):
self.loaded_skills = []
self.all_tools = []
self.system_prompt = None
async def setup(self, config: AdapterConfig) -> None:
result = await self._common_setup(config)
self.loaded_skills = result.loaded_skills
self.all_tools = result.langchain_tools
self.system_prompt = result.system_prompt
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
from agent import create_agent
from a2a_executor import LangGraphA2AExecutor
agent = create_agent(config.model, self.all_tools, self.system_prompt)
return LangGraphA2AExecutor(agent, heartbeat=config.heartbeat, model=config.model)

View File

@ -1,3 +0,0 @@
from .adapter import OpenClawAdapter
Adapter = OpenClawAdapter

View File

@ -1,243 +0,0 @@
"""OpenClaw adapter — bridges OpenClaw's Node.js gateway with our A2A protocol.
OpenClaw is a Node.js agent runtime with its own gateway (port 18789).
This adapter:
1. Installs OpenClaw CLI (npm) and missing deps in the container
2. Runs non-interactive onboard with the configured model provider
3. Copies workspace files (SOUL.md, BOOTSTRAP.md, etc.) to OpenClaw's workspace dir
4. Starts the OpenClaw gateway as a background process
5. Proxies A2A messages via `openclaw agent --json` CLI subprocess
"""
import asyncio
import json
import logging
import os
import shutil
import subprocess
from adapters.base import BaseAdapter, AdapterConfig
from adapters.shared_runtime import brief_task, extract_message_text, set_current_task
from a2a.server.agent_execution import AgentExecutor
logger = logging.getLogger(__name__)
OPENCLAW_WORKSPACE = os.path.expanduser("~/.openclaw/workspace-dev/main")
OPENCLAW_PORT = 18789
# Known missing optional deps in OpenClaw's npm package
OPENCLAW_MISSING_DEPS = ["@buape/carbon", "@larksuiteoapi/node-sdk", "@slack/web-api", "grammy"]
class OpenClawAdapter(BaseAdapter):
def __init__(self):
self._gateway_process = None
@staticmethod
def name() -> str:
return "openclaw"
@staticmethod
def display_name() -> str:
return "OpenClaw"
@staticmethod
def description() -> str:
return "OpenClaw agent runtime — Node.js gateway with SOUL/BOOTSTRAP/AGENTS workspace convention"
@staticmethod
def get_config_schema() -> dict:
return {
"model": {"type": "string", "description": "Model ID (e.g. google/gemini-2.5-flash)"},
"provider_url": {"type": "string", "description": "LLM provider base URL", "default": "https://openrouter.ai/api/v1"},
"gateway_port": {"type": "integer", "description": "OpenClaw gateway port", "default": 18789},
}
async def setup(self, config: AdapterConfig) -> None: # pragma: no cover
"""Install OpenClaw, run onboard, copy workspace files, start gateway."""
npm_prefix = os.path.expanduser("~/.local")
os.environ["PATH"] = f"{npm_prefix}/bin:{os.environ.get('PATH', '')}"
# 1. Install OpenClaw CLI if not present
if not shutil.which("openclaw"):
logger.info("Installing OpenClaw CLI...")
result = subprocess.run(
["npm", "install", "--prefix", npm_prefix, "-g", "openclaw"],
capture_output=True, text=True, timeout=300,
env={**os.environ, "npm_config_prefix": npm_prefix}
)
if result.returncode != 0:
raise RuntimeError(f"Failed to install OpenClaw: {result.stderr[:500]}")
# Install known missing optional deps
oc_dir = os.path.join(npm_prefix, "lib/node_modules/openclaw")
if os.path.exists(oc_dir):
logger.info("Installing OpenClaw optional deps...")
subprocess.run(
["npm", "install"] + OPENCLAW_MISSING_DEPS,
capture_output=True, text=True, timeout=120, cwd=oc_dir
)
logger.info("OpenClaw CLI installed")
# 2. Resolve API key and model
prefix = config.model.split(":")[0] if ":" in config.model else "openai"
if prefix == "qianfan":
api_key = os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", ""))
else:
api_key = os.environ.get("OPENAI_API_KEY", os.environ.get("GROQ_API_KEY", os.environ.get("OPENROUTER_API_KEY", "")))
# Determine provider URL from model prefix
provider_urls = {
"openai": "https://api.openai.com/v1",
"groq": "https://api.groq.com/openai/v1",
"openrouter": "https://openrouter.ai/api/v1",
"qianfan": "https://qianfan.baidubce.com/v2",
}
provider_url = config.runtime_config.get("provider_url", provider_urls.get(prefix, "https://api.openai.com/v1"))
model = config.model
if ":" in model:
_, model = model.split(":", 1)
# 3. Run non-interactive onboard
if not os.path.exists(os.path.expanduser("~/.openclaw/openclaw.json")):
logger.info(f"Running OpenClaw onboard (model: {model})...")
subprocess.run(
["openclaw", "onboard", "--non-interactive",
"--auth-choice", "custom-api-key",
"--custom-base-url", provider_url,
"--custom-model-id", model,
"--custom-api-key", api_key,
"--custom-compatibility", "openai",
"--secret-input-mode", "plaintext",
"--accept-risk", "--skip-health"],
capture_output=True, text=True, timeout=60,
env={**os.environ, "NODE_NO_WARNINGS": "1"}
)
logger.info("OpenClaw onboard complete")
# 3b. Fix context window (OpenClaw defaults to 16K, but modern models have much more)
oc_config_path = os.path.expanduser("~/.openclaw/openclaw.json")
if os.path.exists(oc_config_path):
try:
import json as json_mod
oc_cfg = json_mod.load(open(oc_config_path))
provider_name = "custom-" + provider_url.split("//")[1].split("/")[0].replace(".", "-")
providers = oc_cfg.get("models", {}).get("providers", {})
if provider_name in providers:
for m in providers[provider_name].get("models", []):
m["contextWindow"] = 1000000 # 1M tokens for modern models
m["maxTokens"] = 16384
json_mod.dump(oc_cfg, open(oc_config_path, "w"), indent=2)
logger.info(f"Fixed context window for {provider_name}")
except Exception as e:
logger.warning(f"Failed to fix context window: {e}")
# 3c. Always write auth-profiles.json
# (key may have been set via secrets API after first boot)
if api_key:
auth_dir = os.path.expanduser("~/.openclaw/agents/main/agent")
os.makedirs(auth_dir, exist_ok=True)
auth_file = os.path.join(auth_dir, "auth-profiles.json")
import json as json_mod
provider_name = "custom-" + provider_url.split("//")[1].split("/")[0].replace(".", "-")
auth_data = {provider_name: {"type": "api-key", "key": api_key}}
with open(auth_file, "w") as f:
json_mod.dump(auth_data, f, indent=2)
logger.info(f"Wrote auth-profiles.json for {provider_name}")
# 4. Copy workspace files from /configs to OpenClaw's workspace dir
os.makedirs(OPENCLAW_WORKSPACE, exist_ok=True)
for fname in os.listdir(config.config_path):
src = os.path.join(config.config_path, fname)
if os.path.isfile(src) and fname.endswith(".md"):
shutil.copy2(src, os.path.join(OPENCLAW_WORKSPACE, fname))
logger.debug(f"Copied {fname} to OpenClaw workspace")
# 5. Start the gateway as a background process
gateway_port = config.runtime_config.get("gateway_port", OPENCLAW_PORT)
logger.info(f"Starting OpenClaw gateway on port {gateway_port}...")
env = os.environ.copy()
env["NODE_NO_WARNINGS"] = "1"
self._gateway_process = subprocess.Popen(
["openclaw", "gateway", "--dev", "--port", str(gateway_port), "--bind", "loopback"],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
env=env,
)
# Wait for gateway to become healthy (max 30s)
for attempt in range(15):
await asyncio.sleep(2)
if self._gateway_process.poll() is not None:
raise RuntimeError("OpenClaw gateway process exited")
try:
health = subprocess.run(
["openclaw", "gateway", "health"],
capture_output=True, text=True, timeout=10,
env=os.environ.copy()
)
if health.returncode == 0:
logger.info(f"OpenClaw gateway healthy (PID: {self._gateway_process.pid})")
break
except subprocess.TimeoutExpired:
logger.debug(f"Gateway health check timeout (attempt {attempt+1}/15)")
else:
raise RuntimeError("OpenClaw gateway did not become healthy within 30s")
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
return OpenClawA2AExecutor(heartbeat=config.heartbeat)
class OpenClawA2AExecutor(AgentExecutor):
"""Proxies A2A messages to OpenClaw via `openclaw agent` CLI subprocess."""
def __init__(self, heartbeat=None):
self._heartbeat = heartbeat
async def execute(self, context, event_queue):
from a2a.utils import new_agent_text_message
user_message = extract_message_text(context)
if not user_message:
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
return
await set_current_task(self._heartbeat, brief_task(user_message))
# Call OpenClaw agent via CLI
try:
proc = await asyncio.create_subprocess_exec(
"openclaw", "agent",
"--session-id", context.task_id or "default",
"--message", user_message,
"--json", "--timeout", "120",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env={**os.environ, "PATH": f"{os.path.expanduser('~/.local/bin')}:{os.environ.get('PATH', '')}"}
)
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=130)
output = stdout.decode().strip()
if proc.returncode == 0 and output:
try:
data = json.loads(output)
payloads = data.get("result", {}).get("payloads", [])
if payloads:
reply = payloads[0].get("text", "")
else:
reply = str(data)
except json.JSONDecodeError:
reply = output
else:
reply = f"OpenClaw error: {stderr.decode()[:300]}" if stderr else f"OpenClaw returned code {proc.returncode}"
except asyncio.TimeoutError:
reply = "OpenClaw timed out after 120s"
except Exception as e:
reply = f"OpenClaw error: {e}"
finally:
await set_current_task(self._heartbeat, "")
await event_queue.enqueue_event(new_agent_text_message(reply))
async def cancel(self, context, event_queue): # pragma: no cover
pass

View File

@ -1,190 +1,2 @@
"""Shared runtime helpers for A2A-backed workspace executors."""
from __future__ import annotations
from typing import Any
from a2a.server.agent_execution import RequestContext
def _extract_part_text(part) -> str:
"""Extract text from a message part, handling dicts and A2A objects."""
if isinstance(part, dict):
text = part.get("text", "")
if text:
return text
root = part.get("root")
if isinstance(root, dict):
return root.get("text", "")
return ""
if hasattr(part, "text") and part.text:
return part.text
if hasattr(part, "root") and hasattr(part.root, "text") and part.root.text:
return part.root.text
return ""
def extract_message_text(context_or_parts) -> str:
"""Extract concatenated plain text from A2A message parts."""
parts = getattr(getattr(context_or_parts, "message", None), "parts", None)
if parts is None:
parts = context_or_parts
return " ".join(
text for part in (parts or []) if (text := _extract_part_text(part))
).strip()
def extract_history(context: RequestContext) -> list[tuple[str, str]]:
"""Extract conversation history from A2A request metadata."""
messages: list[tuple[str, str]] = []
request = getattr(context, "request", None)
metadata = getattr(request, "metadata", None) if request else None
if not isinstance(metadata, dict):
metadata = getattr(context, "metadata", None) or {}
history = metadata.get("history", []) if isinstance(metadata, dict) else []
if not isinstance(history, list):
return messages
for entry in history:
if not isinstance(entry, dict):
continue
role = entry.get("role", "user")
parts = entry.get("parts", [])
text = " ".join(
text for part in (parts or []) if (text := _extract_part_text(part))
).strip()
if text:
mapped_role = "human" if role == "user" else "ai"
messages.append((mapped_role, text))
return messages
def format_conversation_history(history: list[tuple[str, str]]) -> str:
"""Render `(role, text)` history into a stable human-readable transcript."""
return "\n".join(
f"{'User' if role == 'human' else 'Agent'}: {text}" for role, text in history
)
def build_task_text(user_message: str, history: list[tuple[str, str]]) -> str:
"""Build a single task/request string with optional prepended conversation history."""
if not history:
return user_message
transcript = format_conversation_history(history)
return f"Conversation so far:\n{transcript}\n\nCurrent request: {user_message}"
def append_peer_guidance(
base_text: str | None,
peers_info: str,
*,
default_text: str,
tool_name: str,
) -> str:
"""Append peer guidance text when peers are available."""
text = (base_text or default_text).strip()
if peers_info:
text += f"\n\n## Peers\n{peers_info}\nUse {tool_name} to communicate with them."
return text
def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Return compact peer metadata for prompt rendering."""
summaries: list[dict[str, Any]] = []
for peer in peers:
agent_card = peer.get("agent_card")
if not agent_card:
continue
if isinstance(agent_card, str):
try:
import json
agent_card = json.loads(agent_card)
except Exception:
continue
if not isinstance(agent_card, dict):
continue
skills = agent_card.get("skills", [])
summaries.append(
{
"id": peer.get("id", "unknown"),
"name": agent_card.get("name", peer.get("name", "Unknown")),
"status": peer.get("status", "unknown"),
"skills": [
s.get("name", s.get("id", ""))
for s in skills
if isinstance(s, dict)
],
}
)
return summaries
def build_peer_section(
peers: list[dict[str, Any]],
*,
heading: str = "## Your Peers (workspaces you can delegate to)",
instruction: str = (
"Use the `delegate_to_workspace` tool to send tasks to peers. "
"Only delegate to peers listed above."
),
) -> str:
"""Render a stable peer section for system prompts."""
summaries = summarize_peer_cards(peers)
if not summaries:
return ""
parts = [heading, ""]
for peer in summaries:
parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})")
if peer["skills"]:
parts.append(f" Skills: {', '.join(peer['skills'])}")
parts.append("")
parts.append(instruction)
return "\n".join(parts)
def brief_task(text: str, limit: int = 60) -> str:
"""Create a short human-readable task label for the heartbeat banner."""
return text[:limit] + ("..." if len(text) > limit else "")
async def set_current_task(heartbeat: Any, task: str) -> None:
"""Update current task on heartbeat and push immediately to platform.
The heartbeat loop only fires every 30s, so quick tasks would finish
before the canvas ever sees them. Setting a task pushes immediately.
Clearing a task only updates the heartbeat object the next heartbeat
cycle will broadcast the clear, keeping the task visible longer.
"""
if heartbeat:
heartbeat.current_task = task
heartbeat.active_tasks = 1 if task else 0
# Only push immediately when SETTING a task (not clearing)
# Clearing is handled by the next heartbeat cycle, which keeps
# the task visible on the canvas for quick A2A responses
if not task:
return
import os
workspace_id = os.environ.get("WORKSPACE_ID", "")
platform_url = os.environ.get("PLATFORM_URL", "")
if workspace_id and platform_url:
try:
import httpx
async with httpx.AsyncClient(timeout=3.0) as client:
await client.post(
f"{platform_url}/registry/heartbeat",
json={
"workspace_id": workspace_id,
"current_task": task,
"active_tasks": 1,
"error_rate": 0,
"sample_error": "",
"uptime_seconds": 0,
},
)
except Exception:
pass # Best-effort
"""Re-export from shared_runtime for backward compat."""
from shared_runtime import * # noqa: F401,F403

View File

@ -17,7 +17,7 @@ import os
import httpx
from langchain_core.tools import tool
from adapters.shared_runtime import build_peer_section
from shared_runtime import build_peer_section
from policies.routing import build_team_routing_payload
logger = logging.getLogger(__name__)

View File

@ -3,7 +3,7 @@
from pathlib import Path
from skill_loader.loader import LoadedSkill
from adapters.shared_runtime import build_peer_section
from shared_runtime import build_peer_section
DEFAULT_MEMORY_SNAPSHOT_FILES = ("MEMORY.md", "USER.md")

View File

@ -0,0 +1,190 @@
"""Shared runtime helpers for A2A-backed workspace executors."""
from __future__ import annotations
from typing import Any
from a2a.server.agent_execution import RequestContext
def _extract_part_text(part) -> str:
"""Extract text from a message part, handling dicts and A2A objects."""
if isinstance(part, dict):
text = part.get("text", "")
if text:
return text
root = part.get("root")
if isinstance(root, dict):
return root.get("text", "")
return ""
if hasattr(part, "text") and part.text:
return part.text
if hasattr(part, "root") and hasattr(part.root, "text") and part.root.text:
return part.root.text
return ""
def extract_message_text(context_or_parts) -> str:
"""Extract concatenated plain text from A2A message parts."""
parts = getattr(getattr(context_or_parts, "message", None), "parts", None)
if parts is None:
parts = context_or_parts
return " ".join(
text for part in (parts or []) if (text := _extract_part_text(part))
).strip()
def extract_history(context: RequestContext) -> list[tuple[str, str]]:
"""Extract conversation history from A2A request metadata."""
messages: list[tuple[str, str]] = []
request = getattr(context, "request", None)
metadata = getattr(request, "metadata", None) if request else None
if not isinstance(metadata, dict):
metadata = getattr(context, "metadata", None) or {}
history = metadata.get("history", []) if isinstance(metadata, dict) else []
if not isinstance(history, list):
return messages
for entry in history:
if not isinstance(entry, dict):
continue
role = entry.get("role", "user")
parts = entry.get("parts", [])
text = " ".join(
text for part in (parts or []) if (text := _extract_part_text(part))
).strip()
if text:
mapped_role = "human" if role == "user" else "ai"
messages.append((mapped_role, text))
return messages
def format_conversation_history(history: list[tuple[str, str]]) -> str:
"""Render `(role, text)` history into a stable human-readable transcript."""
return "\n".join(
f"{'User' if role == 'human' else 'Agent'}: {text}" for role, text in history
)
def build_task_text(user_message: str, history: list[tuple[str, str]]) -> str:
"""Build a single task/request string with optional prepended conversation history."""
if not history:
return user_message
transcript = format_conversation_history(history)
return f"Conversation so far:\n{transcript}\n\nCurrent request: {user_message}"
def append_peer_guidance(
base_text: str | None,
peers_info: str,
*,
default_text: str,
tool_name: str,
) -> str:
"""Append peer guidance text when peers are available."""
text = (base_text or default_text).strip()
if peers_info:
text += f"\n\n## Peers\n{peers_info}\nUse {tool_name} to communicate with them."
return text
def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Return compact peer metadata for prompt rendering."""
summaries: list[dict[str, Any]] = []
for peer in peers:
agent_card = peer.get("agent_card")
if not agent_card:
continue
if isinstance(agent_card, str):
try:
import json
agent_card = json.loads(agent_card)
except Exception:
continue
if not isinstance(agent_card, dict):
continue
skills = agent_card.get("skills", [])
summaries.append(
{
"id": peer.get("id", "unknown"),
"name": agent_card.get("name", peer.get("name", "Unknown")),
"status": peer.get("status", "unknown"),
"skills": [
s.get("name", s.get("id", ""))
for s in skills
if isinstance(s, dict)
],
}
)
return summaries
def build_peer_section(
peers: list[dict[str, Any]],
*,
heading: str = "## Your Peers (workspaces you can delegate to)",
instruction: str = (
"Use the `delegate_to_workspace` tool to send tasks to peers. "
"Only delegate to peers listed above."
),
) -> str:
"""Render a stable peer section for system prompts."""
summaries = summarize_peer_cards(peers)
if not summaries:
return ""
parts = [heading, ""]
for peer in summaries:
parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})")
if peer["skills"]:
parts.append(f" Skills: {', '.join(peer['skills'])}")
parts.append("")
parts.append(instruction)
return "\n".join(parts)
def brief_task(text: str, limit: int = 60) -> str:
"""Create a short human-readable task label for the heartbeat banner."""
return text[:limit] + ("..." if len(text) > limit else "")
async def set_current_task(heartbeat: Any, task: str) -> None:
"""Update current task on heartbeat and push immediately to platform.
The heartbeat loop only fires every 30s, so quick tasks would finish
before the canvas ever sees them. Setting a task pushes immediately.
Clearing a task only updates the heartbeat object the next heartbeat
cycle will broadcast the clear, keeping the task visible longer.
"""
if heartbeat:
heartbeat.current_task = task
heartbeat.active_tasks = 1 if task else 0
# Only push immediately when SETTING a task (not clearing)
# Clearing is handled by the next heartbeat cycle, which keeps
# the task visible on the canvas for quick A2A responses
if not task:
return
import os
workspace_id = os.environ.get("WORKSPACE_ID", "")
platform_url = os.environ.get("PLATFORM_URL", "")
if workspace_id and platform_url:
try:
import httpx
async with httpx.AsyncClient(timeout=3.0) as client:
await client.post(
f"{platform_url}/registry/heartbeat",
json={
"workspace_id": workspace_id,
"current_task": task,
"active_tasks": 1,
"error_rate": 0,
"sample_error": "",
"uptime_seconds": 0,
},
)
except Exception:
pass # Best-effort

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,214 +0,0 @@
"""Tests for the shared _common_setup() pipeline and tool conversion helpers."""
import importlib.util
import sys
from types import ModuleType
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# --- Mock missing optional deps ---
def _ensure_crewai_mock():
if "crewai" not in sys.modules:
crewai_mod = ModuleType("crewai")
crewai_tools_mod = ModuleType("crewai.tools")
# Make @tool a passthrough decorator that preserves the function
crewai_tools_mod.tool = lambda name: (lambda f: f)
crewai_mod.tools = crewai_tools_mod
crewai_mod.__version__ = "0.0.0-mock"
sys.modules["crewai"] = crewai_mod
sys.modules["crewai.tools"] = crewai_tools_mod
def _ensure_autogen_mock():
if "autogen_agentchat" not in sys.modules:
mod = ModuleType("autogen_agentchat")
agents_mod = ModuleType("autogen_agentchat.agents")
agents_mod.AssistantAgent = MagicMock
mod.agents = agents_mod
sys.modules["autogen_agentchat"] = mod
sys.modules["autogen_agentchat.agents"] = agents_mod
_ensure_crewai_mock()
_ensure_autogen_mock()
# --- Mock helpers ---
def _mock_load_plugins(**kwargs):
plugins = MagicMock()
plugins.plugin_names = []
plugins.skill_dirs = []
plugins.prompt_fragments = []
plugins.rules = []
return plugins
def _mock_load_skills(config_path, tools):
return []
async def _mock_get_children():
return []
async def _mock_get_children_with_kids():
return [{"id": "child-1", "name": "Child", "role": "Worker", "status": "online"}]
async def _mock_get_parent_context():
return []
async def _mock_get_peer_capabilities(platform_url, workspace_id):
return [{"id": "peer-1", "name": "Peer", "status": "online", "agent_card": {"skills": []}}]
def _mock_build_system_prompt(*args, **kwargs):
return "You are a test agent."
def _mock_build_children_description(children):
return "## Team\n- Child: Worker"
# All patches needed for _common_setup
_SETUP_PATCHES = {
"plugins.load_plugins": _mock_load_plugins,
"skill_loader.loader.load_skills": _mock_load_skills,
"coordinator.get_children": _mock_get_children,
"coordinator.get_parent_context": _mock_get_parent_context,
"coordinator.build_children_description": _mock_build_children_description,
"prompt.get_peer_capabilities": _mock_get_peer_capabilities,
"prompt.build_system_prompt": _mock_build_system_prompt,
}
def _make_test_adapter():
from adapters.base import BaseAdapter, AdapterConfig
class TestAdapter(BaseAdapter):
@staticmethod
def name(): return "test"
@staticmethod
def display_name(): return "Test"
@staticmethod
def description(): return "Test adapter"
async def setup(self, config): pass
async def create_executor(self, config): pass
return TestAdapter(), AdapterConfig(model="openai:test", config_path="/tmp", workspace_id="ws-test")
# --- Common Setup Tests ---
@pytest.mark.asyncio
async def test_common_setup_returns_core_tools():
"""_common_setup returns 5 core tools."""
adapter, config = _make_test_adapter()
patches = {k: v for k, v in _SETUP_PATCHES.items()}
with patch.dict("os.environ", {"PLATFORM_URL": "http://test:8080"}):
ctx = [patch(k, v) for k, v in patches.items()]
for c in ctx:
c.start()
try:
result = await adapter._common_setup(config)
finally:
for c in ctx:
c.stop()
assert len(result.langchain_tools) == 6 # 6 core tools
tool_names = [t.name for t in result.langchain_tools]
assert "delegate_to_workspace" in tool_names
assert "check_delegation_status" in tool_names
assert "request_approval" in tool_names
assert "commit_memory" in tool_names
assert "search_memory" in tool_names
assert "run_code" in tool_names
assert result.system_prompt == "You are a test agent."
assert result.is_coordinator is False
@pytest.mark.asyncio
async def test_common_setup_coordinator_adds_routing_tool():
"""When workspace has children, coordinator tool is added."""
adapter, config = _make_test_adapter()
patches = {k: v for k, v in _SETUP_PATCHES.items()}
patches["coordinator.get_children"] = _mock_get_children_with_kids
with patch.dict("os.environ", {"PLATFORM_URL": "http://test:8080"}):
ctx = [patch(k, v) for k, v in patches.items()]
for c in ctx:
c.start()
try:
result = await adapter._common_setup(config)
finally:
for c in ctx:
c.stop()
assert result.is_coordinator is True
assert len(result.langchain_tools) == 7 # 6 core + route_task_to_team
# Last tool should be route_task_to_team (function name or .name attribute)
last_tool = result.langchain_tools[-1]
tool_id = getattr(last_tool, "name", None) or getattr(last_tool, "__name__", "")
assert "route_task_to_team" in tool_id
# --- Tool Conversion Tests ---
def test_langchain_to_crewai_preserves_name():
"""CrewAI wrapper preserves tool name and description."""
from adapters.crewai.adapter import _langchain_to_crewai
mock_tool = MagicMock()
mock_tool.name = "test_tool"
mock_tool.description = "A test tool for testing."
mock_tool.ainvoke = AsyncMock(return_value={"result": "ok"})
wrapped = _langchain_to_crewai(mock_tool)
# With our mock @tool decorator, the wrapper is the raw function
assert wrapped.__doc__ == "A test tool for testing."
@pytest.mark.skipif(
not importlib.util.find_spec("autogen_core"),
reason="autogen_core not installed",
)
def test_langchain_to_autogen_preserves_name():
"""AutoGen wrapper preserves tool name and description via FunctionTool."""
from adapters.autogen.adapter import _langchain_to_autogen
mock_tool = MagicMock()
mock_tool.name = "test_tool"
mock_tool.description = "A test tool for testing."
mock_tool.ainvoke = AsyncMock(return_value={"result": "ok"})
wrapped = _langchain_to_autogen(mock_tool)
assert wrapped.name == "test_tool"
assert wrapped.description == "A test tool for testing."
@pytest.mark.skipif(
not importlib.util.find_spec("autogen_core"),
reason="autogen_core not installed",
)
@pytest.mark.asyncio
async def test_langchain_to_autogen_calls_ainvoke():
"""AutoGen FunctionTool wrapper calls the original tool's ainvoke."""
from adapters.autogen.adapter import _langchain_to_autogen
mock_tool = MagicMock()
mock_tool.name = "delegate"
mock_tool.description = "Delegate a task."
mock_tool.ainvoke = AsyncMock(return_value={"success": True})
wrapped = _langchain_to_autogen(mock_tool)
# FunctionTool.run_json expects a JSON dict with the function params
result = await wrapped.run_json({"input": '{"workspace_id": "ws-1", "task": "do stuff"}'}, cancellation_token=None)
mock_tool.ainvoke.assert_called_once_with({"workspace_id": "ws-1", "task": "do stuff"})
assert "True" in str(result)

View File

@ -1,146 +0,0 @@
"""Tests for Hermes escalation-ladder classification and config parsing.
The truth table in ``should_escalate`` is the single chokepoint that
decides whether an inference failure wastes the next ladder rung's
quota or triggers a useful retry. These tests pin that table against
real exception shapes from anthropic / openai / google-genai SDKs and
the wrapped-error strings we've observed in platform logs.
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
# Make the workspace-template/ modules importable without installing.
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from adapters.hermes.escalation import ( # noqa: E402
LadderRung,
parse_ladder,
should_escalate,
)
# --------------------------------------------------------------------------
# parse_ladder
# --------------------------------------------------------------------------
def test_parse_ladder_empty_returns_empty():
assert parse_ladder(None) == []
assert parse_ladder([]) == []
def test_parse_ladder_accepts_dicts():
raw = [
{"provider": "gemini", "model": "gemini-2.5-flash"},
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
]
rungs = parse_ladder(raw)
assert len(rungs) == 2
assert rungs[0] == LadderRung("gemini", "gemini-2.5-flash")
assert rungs[1] == LadderRung("anthropic", "claude-opus-4-1-20250805")
def test_parse_ladder_passes_through_rung_instances():
# Programmatic callers can pass already-constructed rungs.
existing = LadderRung("openai", "gpt-4o-mini")
rungs = parse_ladder([existing])
assert rungs == [existing]
def test_parse_ladder_skips_malformed_entries():
# Missing model / missing provider / wrong type — all skipped with
# a warning, not raised. A missing rung is less bad than a boot fail.
raw = [
{"provider": "gemini"}, # no model
{"model": "gpt-4o"}, # no provider
"not a dict", # wrong type
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, # good
]
rungs = parse_ladder(raw)
assert len(rungs) == 1
assert rungs[0].provider == "anthropic"
# --------------------------------------------------------------------------
# should_escalate — truth table
# --------------------------------------------------------------------------
class _FakeRateLimitError(Exception):
"""Stand-in with the same class name the openai SDK uses (rate limits)."""
pass
_FakeRateLimitError.__name__ = "RateLimitError"
class _FakeOverloadedError(Exception):
"""Stand-in for anthropic.OverloadedError (HTTP 529)."""
pass
_FakeOverloadedError.__name__ = "OverloadedError"
class _FakeAPITimeoutError(Exception):
pass
_FakeAPITimeoutError.__name__ = "APITimeoutError"
class _FakeAPIConnectionError(Exception):
pass
_FakeAPIConnectionError.__name__ = "APIConnectionError"
class _FakeInternalServerError(Exception):
pass
_FakeInternalServerError.__name__ = "InternalServerError"
@pytest.mark.parametrize("exc,expected", [
# --- Escalatable: typed rate-limit / overload / timeout classes ---
(_FakeRateLimitError("rate_limit_exceeded on gpt-4o"), True),
(_FakeOverloadedError("overloaded_error"), True),
(_FakeAPITimeoutError("Request timed out."), True),
(_FakeAPIConnectionError("Connection error."), True),
(_FakeInternalServerError("Internal server error 500."), True),
# --- Escalatable: context-length exceeded on current model ---
(ValueError("This model's maximum context length is 200000 tokens. However, your messages resulted in ..."), True),
(RuntimeError("error: context_length_exceeded"), True),
(RuntimeError("prompt is too long: 210000 tokens"), True),
(RuntimeError("error.type: prompt_too_long"), True),
(RuntimeError("exceeds model context window of 1048576"), True),
# --- Escalatable: gateway markers (HTTP-wrapped) ---
(RuntimeError("Upstream 502 Bad Gateway"), True),
(RuntimeError("503 Service Unavailable"), True),
(RuntimeError("Service is temporarily unavailable, please try again."), True),
(RuntimeError("Anthropic API is overloaded."), True),
# --- Escalatable: status-code substrings ---
(RuntimeError("HTTP 429 Too Many Requests"), True),
(RuntimeError("HTTP 529 Overloaded"), True),
# --- NOT escalatable: auth / permission (config bugs, wasting quota) ---
(RuntimeError("401 Unauthorized — invalid api key"), False),
(RuntimeError("403 Forbidden: permission_denied"), False),
(RuntimeError("authentication_error: invalid_api_key"), False),
# --- NOT escalatable: auth-wrapped rate-limit (priority = hard-reject auth) ---
# If we see '401' + rate-limit markers simultaneously, prefer not escalating
# because the underlying 401 won't get better on a different model.
(_FakeRateLimitError("RateLimitError wrapping 401 Unauthorized"), False),
# --- NOT escalatable: unrelated errors ---
(ValueError("bad config"), False),
(KeyError("missing key"), False),
(None, False),
])
def test_should_escalate_truth_table(exc, expected):
assert should_escalate(exc) is expected
def test_should_escalate_case_insensitive():
# We lowercase the message before substring matching so "OVERLOADED"
# from one provider and "overloaded" from another both match.
assert should_escalate(RuntimeError("SERVICE OVERLOADED")) is True
assert should_escalate(RuntimeError("503 SERVICE UNAVAILABLE")) is True

View File

@ -1,160 +0,0 @@
"""Integration-ish tests for the Hermes executor's escalation behaviour.
These tests exercise ``_do_inference`` against a mocked ``_dispatch``
to prove that:
- No-ladder path is a single call (original behaviour)
- Ladder path retries on escalatable errors
- Ladder path stops early on non-escalatable errors
- Ladder path raises the last error when every rung fails
- Successful rung logs the recovery and returns
No network calls, no provider SDKs. If this ever starts calling real
providers, that's a test-isolation regression worth flagging.
"""
from __future__ import annotations
import asyncio
import sys
from pathlib import Path
import pytest
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from adapters.hermes.escalation import LadderRung # noqa: E402
from adapters.hermes.executor import HermesA2AExecutor # noqa: E402
from adapters.hermes.providers import PROVIDERS # noqa: E402
class _FakeRateLimitError(Exception):
pass
_FakeRateLimitError.__name__ = "RateLimitError"
def _make_executor(monkeypatch, dispatch_behaviour, ladder=None):
"""Build an executor with a mocked ``_dispatch``.
``dispatch_behaviour`` is a callable that receives (cfg, model, user_msg,
history, system_prompt) and returns a string OR raises. Use this to
simulate success / failure per rung.
"""
cfg = PROVIDERS["anthropic"]
ex = HermesA2AExecutor(
provider_cfg=cfg,
api_key="test-key",
model="claude-haiku-4-5-20251001",
escalation_ladder=ladder,
)
calls: list[tuple[str, str]] = []
async def fake_dispatch(cfg, model, user_msg, history, system_prompt):
calls.append((cfg.name, model))
result = dispatch_behaviour(cfg.name, model, user_msg, history, system_prompt)
if isinstance(result, BaseException):
raise result
return result
monkeypatch.setattr(ex, "_dispatch", fake_dispatch)
return ex, calls
def _run(coro):
return asyncio.get_event_loop().run_until_complete(coro) if not asyncio._get_running_loop() else asyncio.run(coro)
def test_no_ladder_single_call(monkeypatch):
ex, calls = _make_executor(monkeypatch, lambda *_: "hello", ladder=None)
reply = asyncio.run(ex._do_inference("test"))
assert reply == "hello"
assert calls == [("anthropic", "claude-haiku-4-5-20251001")]
def test_ladder_not_triggered_on_success(monkeypatch):
# Ladder configured, but first attempt succeeds — ladder never engaged.
ladder = [
{"provider": "openai", "model": "gpt-4o-mini"},
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
]
ex, calls = _make_executor(monkeypatch, lambda *_: "fast reply", ladder=ladder)
reply = asyncio.run(ex._do_inference("test"))
assert reply == "fast reply"
assert len(calls) == 1
assert calls[0] == ("anthropic", "claude-haiku-4-5-20251001") # pinned (haiku) wins
def test_ladder_escalates_on_rate_limit(monkeypatch):
# First rung rate-limits, second rung (opus) succeeds.
attempt = {"n": 0}
def behaviour(provider, model, *_):
attempt["n"] += 1
if attempt["n"] == 1:
return _FakeRateLimitError("429 rate_limit_exceeded on anthropic")
return f"escalated reply from {provider}:{model}"
ladder = [
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
]
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
reply = asyncio.run(ex._do_inference("test"))
assert "escalated reply" in reply
# Two attempts: pinned haiku (failed), then opus (succeeded).
assert [model for _, model in calls] == [
"claude-haiku-4-5-20251001",
"claude-opus-4-1-20250805",
]
def test_ladder_stops_on_non_escalatable_error(monkeypatch):
# First rung returns a 401 — ladder should NOT retry, should raise.
def behaviour(*_):
return RuntimeError("401 Unauthorized invalid api key")
ladder = [{"provider": "anthropic", "model": "claude-opus-4-1-20250805"}]
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
with pytest.raises(RuntimeError, match="401"):
asyncio.run(ex._do_inference("test"))
# Only one attempt — non-escalatable error stopped the walk.
assert len(calls) == 1
def test_ladder_raises_last_error_when_all_rungs_fail(monkeypatch):
def behaviour(*_):
return _FakeRateLimitError("429 across the board")
ladder = [
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
]
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
with pytest.raises(_FakeRateLimitError):
asyncio.run(ex._do_inference("test"))
# Both rungs attempted (pinned + one from ladder).
assert len(calls) == 2
def test_ladder_skips_unknown_provider(monkeypatch):
# A misconfigured rung with a non-existent provider is logged + skipped;
# ladder still walks remaining rungs.
def behaviour(provider, *_):
if provider == "anthropic":
return _FakeRateLimitError("first rung rate limit")
return f"ok from {provider}"
ladder = [
{"provider": "totally_made_up", "model": "fake-1"}, # should be skipped
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
]
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
# First attempt uses the pinned (haiku) which raises, then skips
# totally_made_up, then reaches opus. Because behaviour returns ok for
# provider==anthropic, the opus rung also fails (same provider). Assert
# the skip happened (call count reflects 2 real attempts, not 3).
with pytest.raises(_FakeRateLimitError):
asyncio.run(ex._do_inference("test"))
assert len(calls) == 2 # pinned + opus (totally_made_up skipped)

View File

@ -1,487 +0,0 @@
"""Tests for Phase 2 auth_scheme dispatch in adapters/hermes/executor.py.
These cover the NEW behavior only (HermesA2AExecutor._do_inference dispatch
based on ProviderConfig.auth_scheme). Phase 1 registry tests live in
test_hermes_providers.py unchanged by Phase 2.
"""
from __future__ import annotations
import sys
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# Load providers.py + escalation.py directly (same pattern as
# test_hermes_providers.py). The escalation module landed with the
# ladder work — it's now imported by executor.py, so the inline-exec
# pattern below has to find both modules at top level.
_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes"
sys.path.insert(0, str(_HERMES_DIR))
import providers # type: ignore # noqa: E402
import escalation # type: ignore # noqa: E402
def _make_executor(provider_name: str):
"""Build a HermesA2AExecutor directly without going through create_executor.
We import executor lazily inside the function because the module-level
import chain (``from .providers import ...``) uses a relative import that
only resolves when loaded as part of the ``adapters.hermes`` package.
The test loads it via direct sys.path manipulation, which bypasses the
package loader, so we import providers-as-sibling and then reconstruct
the executor with the same shape.
"""
# We can't import executor.py directly due to the relative-import head,
# so instantiate the executor class by replaying its definition inline.
# Simpler: test the dispatch logic via providers.PROVIDERS + the public
# resolve helpers, plus a mock for the inference methods.
cfg = providers.PROVIDERS[provider_name]
# Reach into executor via sys.path trick
import importlib.util
spec = importlib.util.spec_from_file_location(
"hermes_executor_under_test",
_HERMES_DIR / "executor.py",
)
# The executor module has a relative import `from .providers import ...`
# which fails under direct spec_from_file_location. Monkey-patch sys.modules
# so the relative import resolves to our directly-loaded providers module.
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
# Also alias the package-style import path so `from .providers import X`
# and `from .escalation import X` inside executor.py find them.
pkg_name = "hermes_executor_under_test"
sys.modules.setdefault(pkg_name, MagicMock())
sys.modules[pkg_name].providers = providers # type: ignore
sys.modules[pkg_name].escalation = escalation # type: ignore
# Read + compile executor.py with relative imports rewritten to match
# the sibling-import setup above.
src = (_HERMES_DIR / "executor.py").read_text()
src = src.replace("from .providers import", "from providers import")
src = src.replace("from .escalation import", "from escalation import")
# The exec'd module needs `__name__` in its globals because executor.py
# calls ``logging.getLogger(__name__)`` at import time. Without this the
# exec fails with `KeyError: "'__name__' not in globals"`.
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
HermesA2AExecutor = ns["HermesA2AExecutor"]
return HermesA2AExecutor(
provider_cfg=cfg,
api_key="test-key",
model=cfg.default_model,
)
def test_anthropic_entry_has_anthropic_scheme():
"""Phase 2a: anthropic's auth_scheme is 'anthropic'."""
cfg = providers.PROVIDERS["anthropic"]
assert cfg.auth_scheme == "anthropic"
def test_gemini_entry_has_gemini_scheme():
"""Phase 2b: gemini's auth_scheme is 'gemini'."""
cfg = providers.PROVIDERS["gemini"]
assert cfg.auth_scheme == "gemini"
# Base URL no longer has the /v1beta/openai suffix — native SDK uses bare host.
assert "/openai" not in cfg.base_url
assert cfg.base_url.startswith("https://generativelanguage.googleapis.com")
def test_all_other_providers_still_openai_scheme():
"""Phase 2 changes only anthropic + gemini. Every other provider keeps auth_scheme='openai'."""
native_providers = {"anthropic", "gemini"}
for name, cfg in providers.PROVIDERS.items():
if name in native_providers:
continue
assert cfg.auth_scheme == "openai", (
f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}"
)
@pytest.mark.asyncio
async def test_dispatch_openai_scheme_calls_openai_compat():
"""auth_scheme='openai' → _do_openai_compat runs, native paths do not."""
executor = _make_executor("openai")
executor._do_openai_compat = AsyncMock(return_value="openai-result")
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
executor._do_gemini_native = AsyncMock(return_value="should-not-run")
result = await executor._do_inference("hello")
# Phase 2c: _do_inference passes (user_message, history) to the path;
# when no history supplied, second arg is None.
executor._do_openai_compat.assert_awaited_once_with("hello", None, None)
executor._do_anthropic_native.assert_not_awaited()
executor._do_gemini_native.assert_not_awaited()
assert result == "openai-result"
@pytest.mark.asyncio
async def test_dispatch_anthropic_scheme_calls_anthropic_native():
"""auth_scheme='anthropic' → _do_anthropic_native runs, others do not."""
executor = _make_executor("anthropic")
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
executor._do_anthropic_native = AsyncMock(return_value="anthropic-result")
executor._do_gemini_native = AsyncMock(return_value="should-not-run")
result = await executor._do_inference("hello")
executor._do_anthropic_native.assert_awaited_once_with("hello", None, None)
executor._do_openai_compat.assert_not_awaited()
executor._do_gemini_native.assert_not_awaited()
assert result == "anthropic-result"
@pytest.mark.asyncio
async def test_dispatch_gemini_scheme_calls_gemini_native():
"""auth_scheme='gemini' → _do_gemini_native runs, others do not. Phase 2b."""
executor = _make_executor("gemini")
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
executor._do_gemini_native = AsyncMock(return_value="gemini-result")
result = await executor._do_inference("hello")
executor._do_gemini_native.assert_awaited_once_with("hello", None, None)
executor._do_openai_compat.assert_not_awaited()
executor._do_anthropic_native.assert_not_awaited()
assert result == "gemini-result"
# ---------------------------------------------------------------------------
# Phase 2c — history-to-message conversion tests
# ---------------------------------------------------------------------------
def test_history_to_openai_messages_empty_history():
"""No history → single user message (back-compat with pre-2c single-turn shape)."""
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
HermesA2AExecutor = ns["HermesA2AExecutor"]
msgs = HermesA2AExecutor._history_to_openai_messages("current turn", [])
assert msgs == [{"role": "user", "content": "current turn"}]
def test_history_to_openai_messages_multi_turn():
"""A2A history roles map: human→user, ai→assistant. Current turn appended as user."""
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
HermesA2AExecutor = ns["HermesA2AExecutor"]
history = [("human", "first question"), ("ai", "first answer"), ("human", "follow-up")]
msgs = HermesA2AExecutor._history_to_openai_messages("current turn", history)
assert msgs == [
{"role": "user", "content": "first question"},
{"role": "assistant", "content": "first answer"},
{"role": "user", "content": "follow-up"},
{"role": "user", "content": "current turn"},
]
def test_history_to_anthropic_messages_same_as_openai():
"""Anthropic Messages API uses the same wire shape as OpenAI for text-only turns."""
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
HermesA2AExecutor = ns["HermesA2AExecutor"]
history = [("human", "hello"), ("ai", "hi")]
openai_msgs = HermesA2AExecutor._history_to_openai_messages("how are you?", history)
anth_msgs = HermesA2AExecutor._history_to_anthropic_messages("how are you?", history)
assert openai_msgs == anth_msgs
def test_history_to_gemini_contents_uses_model_role_and_parts_wrapper():
"""Gemini uses role='user'|'model' (NOT 'assistant') and wraps text in parts=[{text}]."""
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
HermesA2AExecutor = ns["HermesA2AExecutor"]
history = [("human", "hi"), ("ai", "hello back")]
contents = HermesA2AExecutor._history_to_gemini_contents("follow-up?", history)
assert contents == [
{"role": "user", "parts": [{"text": "hi"}]},
{"role": "model", "parts": [{"text": "hello back"}]},
{"role": "user", "parts": [{"text": "follow-up?"}]},
]
@pytest.mark.asyncio
async def test_dispatch_passes_history_through():
"""When _do_inference is called with history, it flows through to the provider path."""
executor = _make_executor("anthropic")
executor._do_anthropic_native = AsyncMock(return_value="reply-with-history")
executor._do_openai_compat = AsyncMock()
executor._do_gemini_native = AsyncMock()
history = [("human", "prior q"), ("ai", "prior a")]
result = await executor._do_inference("current", history)
executor._do_anthropic_native.assert_awaited_once_with("current", history, None)
assert result == "reply-with-history"
# ---------------------------------------------------------------------------
# Phase 2d-i — system_prompt dispatch tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_dispatch_passes_system_prompt_to_anthropic():
"""system_prompt flows through _do_inference → _do_anthropic_native as third arg."""
executor = _make_executor("anthropic")
executor._do_anthropic_native = AsyncMock(return_value="reply")
executor._do_openai_compat = AsyncMock()
executor._do_gemini_native = AsyncMock()
await executor._do_inference("user msg", None, "you are a helpful assistant")
executor._do_anthropic_native.assert_awaited_once_with(
"user msg", None, "you are a helpful assistant"
)
@pytest.mark.asyncio
async def test_dispatch_passes_system_prompt_to_gemini():
"""system_prompt flows through _do_inference → _do_gemini_native as third arg."""
executor = _make_executor("gemini")
executor._do_gemini_native = AsyncMock(return_value="reply")
executor._do_openai_compat = AsyncMock()
executor._do_anthropic_native = AsyncMock()
await executor._do_inference("user msg", None, "system instruction")
executor._do_gemini_native.assert_awaited_once_with(
"user msg", None, "system instruction"
)
@pytest.mark.asyncio
async def test_dispatch_passes_system_prompt_to_openai():
"""system_prompt flows through _do_inference → _do_openai_compat as third arg."""
executor = _make_executor("openai")
executor._do_openai_compat = AsyncMock(return_value="reply")
executor._do_anthropic_native = AsyncMock()
executor._do_gemini_native = AsyncMock()
await executor._do_inference("user msg", None, "system prompt")
executor._do_openai_compat.assert_awaited_once_with(
"user msg", None, "system prompt"
)
def test_executor_accepts_config_path_kwarg():
"""HermesA2AExecutor.__init__ accepts config_path and stores it on _config_path."""
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
HermesA2AExecutor = ns["HermesA2AExecutor"]
cfg = providers.PROVIDERS["openai"]
# Without config_path — default None
e1 = HermesA2AExecutor(provider_cfg=cfg, api_key="k", model="m")
assert e1._config_path is None
# With config_path
e2 = HermesA2AExecutor(
provider_cfg=cfg, api_key="k", model="m", config_path="/configs"
)
assert e2._config_path == "/configs"
def test_create_executor_forwards_config_path():
"""create_executor(config_path=...) → executor._config_path gets set.
Exercises both the hermes_api_key back-compat path AND the registry
resolution path to make sure config_path threads through both.
"""
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
create_executor = ns["create_executor"]
# Path 1: hermes_api_key
e1 = create_executor(hermes_api_key="k", config_path="/path/a")
assert e1._config_path == "/path/a"
# Path 2: registry resolution
import os
os.environ["OPENAI_API_KEY"] = "openai-test"
try:
e2 = create_executor(provider="openai", config_path="/path/b")
assert e2._config_path == "/path/b"
finally:
os.environ.pop("OPENAI_API_KEY", None)
@pytest.mark.asyncio
async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
"""Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""
executor = _make_executor("openai")
# Mutate the cfg field to simulate an unknown scheme (testing the dispatch, not the registry)
executor.provider_cfg = providers.ProviderConfig(
name="futureprovider",
env_vars=("FOO",),
base_url="https://example.com/v1",
default_model="foo",
auth_scheme="some_future_scheme",
)
executor._do_openai_compat = AsyncMock(return_value="fallback-result")
executor._do_anthropic_native = AsyncMock()
executor._do_gemini_native = AsyncMock()
result = await executor._do_inference("hello")
executor._do_openai_compat.assert_awaited_once()
executor._do_anthropic_native.assert_not_awaited()
executor._do_gemini_native.assert_not_awaited()
assert result == "fallback-result"
@pytest.mark.asyncio
async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch):
"""If the anthropic package is not installed, _do_anthropic_native raises
a clear RuntimeError with install instructions it does NOT silently
fall back to the OpenAI-compat shim (which would lose tool-calling +
vision fidelity invisibly).
"""
executor = _make_executor("anthropic")
# Simulate ImportError on `import anthropic`. We do this by clobbering
# the name in sys.modules so the import statement inside
# _do_anthropic_native hits an ImportError.
monkeypatch.setitem(sys.modules, "anthropic", None)
with pytest.raises(RuntimeError, match="anthropic"):
await executor._do_anthropic_native("hello")
@pytest.mark.asyncio
async def test_gemini_native_raises_clear_error_when_sdk_missing(monkeypatch):
"""If the google-genai package is not installed, _do_gemini_native raises
a clear RuntimeError with install instructions same fail-loud semantics
as the anthropic native path."""
executor = _make_executor("gemini")
# Simulate ImportError on `from google import genai`. Clobbering
# sys.modules["google"] forces the submodule import to fail.
monkeypatch.setitem(sys.modules, "google", None)
with pytest.raises(RuntimeError, match="google-genai"):
await executor._do_gemini_native("hello")
def test_create_executor_passes_provider_cfg():
"""create_executor's back-compat paths should set .provider_cfg on the
returned executor so dispatch has auth_scheme available at runtime."""
# Direct-load executor module same way _make_executor does
import importlib.util
src = (_HERMES_DIR / "executor.py").read_text().replace(
"from .providers import", "from providers import"
).replace(
"from .escalation import", "from escalation import"
)
# `__name__` needed because executor.py does logging.getLogger(__name__)
# at import time. `escalation` + `providers` must also be importable
# at the top level — the caller handles sys.path for that.
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
sys.modules["hermes_executor_under_test.providers"] = providers
sys.modules["hermes_executor_under_test.escalation"] = escalation
ns: dict = {"__name__": "hermes_executor_under_test"}
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
create_executor = ns["create_executor"]
# Path 1: hermes_api_key back-compat → nous_portal cfg
exec1 = create_executor(hermes_api_key="test-key")
assert exec1.provider_cfg.name == "nous_portal"
assert exec1.provider_cfg.auth_scheme == "openai"
# Path 2: explicit provider name → that cfg (anthropic has the new scheme)
import os
os.environ["ANTHROPIC_API_KEY"] = "ant-test"
try:
exec2 = create_executor(provider="anthropic")
assert exec2.provider_cfg.name == "anthropic"
assert exec2.provider_cfg.auth_scheme == "anthropic"
assert exec2.model == "claude-sonnet-4-5"
finally:
os.environ.pop("ANTHROPIC_API_KEY", None)
# Path 3: Phase 2b — gemini explicit resolution
os.environ["GEMINI_API_KEY"] = "gem-test"
try:
exec3 = create_executor(provider="gemini")
assert exec3.provider_cfg.name == "gemini"
assert exec3.provider_cfg.auth_scheme == "gemini"
assert exec3.model == "gemini-2.5-flash"
finally:
os.environ.pop("GEMINI_API_KEY", None)

View File

@ -1,182 +0,0 @@
"""Tests for workspace-template/adapters/hermes/providers.py.
These tests exercise resolve_provider() in isolation they do not import
anything from adapters/__init__.py so they don't need the a2a runtime deps.
"""
from __future__ import annotations
import importlib
import os
import sys
from pathlib import Path
import pytest
# Make the hermes package importable without pulling in adapters/__init__.py
# (which imports the a2a SDK). We load providers.py directly from its file path.
_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes"
sys.path.insert(0, str(_HERMES_DIR))
import providers # type: ignore # noqa: E402
_ALL_PROVIDER_ENV_VARS = (
"HERMES_API_KEY",
"NOUS_API_KEY",
"OPENROUTER_API_KEY",
"OPENAI_API_KEY",
"ANTHROPIC_API_KEY",
"XAI_API_KEY",
"GROK_API_KEY",
"GEMINI_API_KEY",
"GOOGLE_API_KEY",
"QWEN_API_KEY",
"DASHSCOPE_API_KEY",
"GLM_API_KEY",
"ZHIPU_API_KEY",
"KIMI_API_KEY",
"MOONSHOT_API_KEY",
"MINIMAX_API_KEY",
"DEEPSEEK_API_KEY",
"GROQ_API_KEY",
"TOGETHER_API_KEY",
"FIREWORKS_API_KEY",
"MISTRAL_API_KEY",
)
@pytest.fixture(autouse=True)
def _clean_env():
"""Clear every provider env var before each test and restore to the
exact pre-test state on teardown.
Implementation note: earlier version used pytest's monkeypatch fixture,
which tracks deltas from the state at fixture entry. That was buggy
because several tests in this file mutate os.environ directly
(os.environ["HERMES_API_KEY"] = ...), bypassing monkeypatch's
tracking. The direct mutations leaked into the NEXT test file
(test_hermes_smoke.py::test_create_executor_raises_without_keys),
causing a file-order-dependent failure. Pure snapshot/restore
avoids all the delta-tracking edge cases.
"""
saved = {k: os.environ.get(k) for k in _ALL_PROVIDER_ENV_VARS}
for k in _ALL_PROVIDER_ENV_VARS:
os.environ.pop(k, None)
try:
yield
finally:
for k, v in saved.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
def test_registry_is_populated():
"""Phase 1 ships at least 15 providers and every entry is self-consistent."""
assert len(providers.PROVIDERS) >= 15
assert len(providers.RESOLUTION_ORDER) == len(providers.PROVIDERS)
for name, cfg in providers.PROVIDERS.items():
assert cfg.name == name, f"{name}: config.name should match dict key"
assert cfg.env_vars, f"{name}: must declare at least one env var"
assert cfg.base_url.startswith("http"), f"{name}: base_url must be http(s)"
assert cfg.default_model, f"{name}: must declare a default model"
assert name in providers.RESOLUTION_ORDER, f"{name}: missing from resolution order"
def test_resolution_order_has_no_duplicates():
assert len(providers.RESOLUTION_ORDER) == len(set(providers.RESOLUTION_ORDER))
def test_backcompat_hermes_api_key_first():
"""PR 2 back-compat — HERMES_API_KEY auto-detect still routes to Nous Portal."""
os.environ["HERMES_API_KEY"] = "hermes-test-key"
cfg, key = providers.resolve_provider()
assert cfg.name == "nous_portal"
assert key == "hermes-test-key"
def test_backcompat_openrouter_api_key_second():
"""PR 2 back-compat — OPENROUTER_API_KEY still routes to OpenRouter when HERMES_API_KEY is absent."""
os.environ["OPENROUTER_API_KEY"] = "or-test-key"
cfg, key = providers.resolve_provider()
assert cfg.name == "openrouter"
def test_auto_detect_openai():
os.environ["OPENAI_API_KEY"] = "sk-test"
cfg, key = providers.resolve_provider()
assert cfg.name == "openai"
assert cfg.base_url == "https://api.openai.com/v1"
def test_auto_detect_anthropic():
os.environ["ANTHROPIC_API_KEY"] = "ant-test"
cfg, key = providers.resolve_provider()
assert cfg.name == "anthropic"
@pytest.mark.parametrize(
"env_var,expected",
[
("XAI_API_KEY", "xai"),
("GROK_API_KEY", "xai"),
("QWEN_API_KEY", "qwen"),
("DASHSCOPE_API_KEY", "qwen"),
("GLM_API_KEY", "glm"),
("ZHIPU_API_KEY", "glm"),
("KIMI_API_KEY", "kimi"),
("MOONSHOT_API_KEY", "kimi"),
("GROQ_API_KEY", "groq"),
("DEEPSEEK_API_KEY", "deepseek"),
("MISTRAL_API_KEY", "mistral"),
("TOGETHER_API_KEY", "together"),
("FIREWORKS_API_KEY", "fireworks"),
("MINIMAX_API_KEY", "minimax"),
("GEMINI_API_KEY", "gemini"),
("GOOGLE_API_KEY", "gemini"),
],
)
def test_every_provider_env_var_resolves(env_var, expected):
"""Every env var listed in PROVIDERS resolves to the right provider
this guards against typos in the registry dict."""
os.environ[env_var] = "test-key"
cfg, _ = providers.resolve_provider()
assert cfg.name == expected, (
f"{env_var} should route to {expected}, got {cfg.name}"
)
def test_explicit_provider_wins_over_auto_detect():
"""When `provider=` is given, auto-detect is bypassed."""
os.environ["HERMES_API_KEY"] = "hermes-key" # would auto-detect
os.environ["OPENAI_API_KEY"] = "openai-key"
cfg, key = providers.resolve_provider("openai")
assert cfg.name == "openai"
assert key == "openai-key"
def test_unknown_provider_raises():
with pytest.raises(ValueError, match="Unknown Hermes provider"):
providers.resolve_provider("this_provider_does_not_exist")
def test_explicit_provider_with_missing_env_raises():
"""If the operator asks for a specific provider but its env var is empty,
we raise we do NOT fall back to auto-detect because that would be
surprising ("why is my openai config talking to anthropic?")."""
os.environ["HERMES_API_KEY"] = "some-value" # auto-detect would succeed
with pytest.raises(ValueError, match="no env var set"):
providers.resolve_provider("anthropic")
def test_auto_detect_with_no_env_lists_all_options():
"""The error message should list every env var the caller could set,
so operators don't have to read the source."""
# No env vars set (autouse fixture clears them all)
with pytest.raises(ValueError) as exc_info:
providers.resolve_provider()
msg = str(exc_info.value)
# Spot-check: the message names at least a few providers
for env_var in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "QWEN_API_KEY"):
assert env_var in msg, f"error message should mention {env_var}"

View File

@ -1,84 +0,0 @@
"""Smoke tests for adapters.hermes.create_executor().
Verifies key resolution order and ValueError on missing keys.
No real network calls are made the executor object is just instantiated.
"""
import os
import pytest
from unittest.mock import patch
from adapters.hermes import create_executor
def test_create_executor_with_param():
"""create_executor() works when key passed directly as param."""
executor = create_executor(hermes_api_key="test-key-direct")
assert executor is not None
def test_create_executor_with_hermes_env():
"""create_executor() works when HERMES_API_KEY env var is set."""
with patch.dict(os.environ, {"HERMES_API_KEY": "test-hermes-key"}, clear=False):
os.environ.pop("OPENROUTER_API_KEY", None)
executor = create_executor()
assert executor is not None
def test_create_executor_falls_back_to_openrouter():
"""create_executor() falls back to OPENROUTER_API_KEY when HERMES_API_KEY absent."""
env = {"OPENROUTER_API_KEY": "test-openrouter-key"}
with patch.dict(os.environ, env, clear=False):
os.environ.pop("HERMES_API_KEY", None)
executor = create_executor()
assert executor is not None
def test_create_executor_raises_without_keys():
"""create_executor() raises ValueError when no keys available."""
with patch.dict(os.environ, {}, clear=False):
os.environ.pop("HERMES_API_KEY", None)
os.environ.pop("OPENROUTER_API_KEY", None)
with pytest.raises(ValueError):
create_executor()
# ---------------------------------------------------------------------------
# Additional assertions — verify key routing is correct
# ---------------------------------------------------------------------------
def test_param_key_uses_nous_base_url():
"""When called with explicit key, base_url points at Nous Portal."""
executor = create_executor(hermes_api_key="nous-key")
assert "nousresearch.com" in executor.base_url
def test_hermes_env_uses_nous_base_url():
"""HERMES_API_KEY maps to Nous Portal base URL."""
with patch.dict(os.environ, {"HERMES_API_KEY": "nous-key"}, clear=False):
os.environ.pop("OPENROUTER_API_KEY", None)
executor = create_executor()
assert "nousresearch.com" in executor.base_url
def test_openrouter_fallback_uses_openrouter_base_url():
"""OPENROUTER_API_KEY fallback maps to OpenRouter base URL."""
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "or-key"}, clear=False):
os.environ.pop("HERMES_API_KEY", None)
executor = create_executor()
assert "openrouter.ai" in executor.base_url
def test_param_takes_priority_over_hermes_env():
"""Explicit param overrides HERMES_API_KEY env var."""
with patch.dict(os.environ, {"HERMES_API_KEY": "env-key"}, clear=False):
executor = create_executor(hermes_api_key="param-key")
assert executor.api_key == "param-key"
def test_hermes_env_takes_priority_over_openrouter():
"""HERMES_API_KEY overrides OPENROUTER_API_KEY fallback."""
env = {"HERMES_API_KEY": "hermes-key", "OPENROUTER_API_KEY": "or-key"}
with patch.dict(os.environ, env, clear=False):
executor = create_executor()
assert executor.api_key == "hermes-key"
assert "nousresearch.com" in executor.base_url

View File

@ -1,167 +0,0 @@
"""Tests for Baidu Qianfan provider support across agent.py, deepagents, and openclaw."""
import importlib
import sys
from types import ModuleType
import pytest
QIANFAN_BASE_URL = "https://qianfan.baidubce.com/v2"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _install_langgraph_mocks(monkeypatch, captured: dict):
"""Inject lightweight langgraph + langchain_openai stubs into sys.modules."""
prebuilt_mod = ModuleType("langgraph.prebuilt")
def fake_create_react_agent(*, model, tools, prompt):
captured["react_agent"] = model
return {"model": model}
prebuilt_mod.create_react_agent = fake_create_react_agent
langgraph_mod = ModuleType("langgraph")
monkeypatch.setitem(sys.modules, "langgraph", langgraph_mod)
monkeypatch.setitem(sys.modules, "langgraph.prebuilt", prebuilt_mod)
openai_mod = ModuleType("langchain_openai")
class FakeChatOpenAI:
def __init__(self, **kwargs):
captured["llm_kwargs"] = kwargs
openai_mod.ChatOpenAI = FakeChatOpenAI
monkeypatch.setitem(sys.modules, "langchain_openai", openai_mod)
# ---------------------------------------------------------------------------
# Track D-1: agent.py qianfan dispatch
# ---------------------------------------------------------------------------
class TestQianfanInAgent:
"""agent.py create_agent() correctly wires Qianfan provider."""
def _load_agent(self, monkeypatch, captured):
_install_langgraph_mocks(monkeypatch, captured)
sys.modules.pop("agent", None)
return importlib.import_module("agent")
def test_uses_qianfan_api_key(self, monkeypatch):
"""QIANFAN_API_KEY is used when set."""
captured = {}
monkeypatch.setenv("QIANFAN_API_KEY", "qf-key-123")
monkeypatch.delenv("AISTUDIO_API_KEY", raising=False)
agent_mod = self._load_agent(monkeypatch, captured)
agent_mod.create_agent("qianfan:ernie-4.5", [], "sys")
assert captured["llm_kwargs"]["openai_api_key"] == "qf-key-123"
def test_falls_back_to_aistudio_api_key(self, monkeypatch):
"""Falls back to AISTUDIO_API_KEY when QIANFAN_API_KEY is absent."""
captured = {}
monkeypatch.delenv("QIANFAN_API_KEY", raising=False)
monkeypatch.setenv("AISTUDIO_API_KEY", "ai-studio-456")
agent_mod = self._load_agent(monkeypatch, captured)
agent_mod.create_agent("qianfan:ernie-speed", [], "sys")
assert captured["llm_kwargs"]["openai_api_key"] == "ai-studio-456"
def test_uses_qianfan_base_url(self, monkeypatch):
"""openai_api_base is always the Qianfan endpoint."""
captured = {}
monkeypatch.setenv("QIANFAN_API_KEY", "any-key")
agent_mod = self._load_agent(monkeypatch, captured)
agent_mod.create_agent("qianfan:ernie-lite", [], "sys")
assert captured["llm_kwargs"]["openai_api_base"] == QIANFAN_BASE_URL
def test_model_name_stripped_of_prefix(self, monkeypatch):
"""The model kwarg contains only the bare model name, not the prefix."""
captured = {}
monkeypatch.setenv("QIANFAN_API_KEY", "k")
agent_mod = self._load_agent(monkeypatch, captured)
agent_mod.create_agent("qianfan:ernie-4.5-turbo", [], "sys")
assert captured["llm_kwargs"]["model"] == "ernie-4.5-turbo"
# ---------------------------------------------------------------------------
# Track D-2: adapters/deepagents _create_llm qianfan dispatch
# ---------------------------------------------------------------------------
class TestQianfanInDeepAgents:
"""DeepAgents adapter._create_llm() correctly wires Qianfan provider."""
def _make_adapter(self, monkeypatch, captured):
openai_mod = ModuleType("langchain_openai")
class FakeChatOpenAI:
def __init__(self, **kwargs):
captured["llm_kwargs"] = kwargs
openai_mod.ChatOpenAI = FakeChatOpenAI
monkeypatch.setitem(sys.modules, "langchain_openai", openai_mod)
from adapters.deepagents.adapter import DeepAgentsAdapter
return DeepAgentsAdapter()
def test_uses_qianfan_api_key(self, monkeypatch):
captured = {}
monkeypatch.setenv("QIANFAN_API_KEY", "qf-deep-999")
monkeypatch.delenv("AISTUDIO_API_KEY", raising=False)
adapter = self._make_adapter(monkeypatch, captured)
adapter._create_llm("qianfan:ernie-4.5")
assert captured["llm_kwargs"]["openai_api_key"] == "qf-deep-999"
def test_falls_back_to_aistudio_api_key(self, monkeypatch):
captured = {}
monkeypatch.delenv("QIANFAN_API_KEY", raising=False)
monkeypatch.setenv("AISTUDIO_API_KEY", "aistudio-deep-777")
adapter = self._make_adapter(monkeypatch, captured)
adapter._create_llm("qianfan:ernie-speed")
assert captured["llm_kwargs"]["openai_api_key"] == "aistudio-deep-777"
def test_uses_qianfan_base_url(self, monkeypatch):
captured = {}
monkeypatch.setenv("QIANFAN_API_KEY", "k")
adapter = self._make_adapter(monkeypatch, captured)
adapter._create_llm("qianfan:ernie-lite")
assert captured["llm_kwargs"]["openai_api_base"] == QIANFAN_BASE_URL
# ---------------------------------------------------------------------------
# Track D-3: adapters/openclaw provider_urls + key resolution
# ---------------------------------------------------------------------------
class TestQianfanInOpenClaw:
"""OpenClaw adapter exposes Qianfan URL and resolves the correct API key."""
def _provider_urls(self):
"""Return a copy of the provider_urls dict defined in the adapter."""
return {
"openai": "https://api.openai.com/v1",
"groq": "https://api.groq.com/openai/v1",
"openrouter": "https://openrouter.ai/api/v1",
"qianfan": QIANFAN_BASE_URL,
}
def _select_key(self, prefix: str, env: dict) -> str:
"""Mirror the prefix-aware key selection added to openclaw/adapter.py."""
if prefix == "qianfan":
return env.get("QIANFAN_API_KEY", env.get("AISTUDIO_API_KEY", ""))
return env.get("OPENAI_API_KEY", env.get("GROQ_API_KEY", env.get("OPENROUTER_API_KEY", "")))
def test_qianfan_url_in_provider_map(self):
urls = self._provider_urls()
assert "qianfan" in urls
assert urls["qianfan"] == QIANFAN_BASE_URL
def test_qianfan_key_resolution_primary(self):
key = self._select_key("qianfan", {"QIANFAN_API_KEY": "qf-oc-111"})
assert key == "qf-oc-111"
def test_qianfan_key_resolution_fallback(self):
key = self._select_key("qianfan", {"AISTUDIO_API_KEY": "as-oc-222"})
assert key == "as-oc-222"
def test_non_qianfan_prefix_not_affected(self):
"""Existing providers still resolve via OPENAI_API_KEY chain."""
key = self._select_key("openai", {"OPENAI_API_KEY": "sk-test"})
assert key == "sk-test"

View File

@ -1,189 +0,0 @@
"""Tests for shared runtime helpers used by A2A-backed executors."""
from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from adapters.shared_runtime import (
append_peer_guidance,
build_peer_section,
build_task_text,
brief_task,
extract_history,
extract_message_text,
format_conversation_history,
summarize_peer_cards,
set_current_task,
)
def _make_context(parts=None, metadata=None):
context = MagicMock()
context.message.parts = parts or []
context.metadata = metadata or {}
return context
def test_extract_message_text_prefers_text_then_root_text():
part1 = MagicMock()
part1.text = "Hello"
part2 = MagicMock(spec=[])
part2.root = SimpleNamespace(text="World")
assert extract_message_text(_make_context([part1, part2])) == "Hello World"
def test_extract_message_text_supports_dict_parts():
parts = [{"text": "Hello"}, {"root": {"text": "World"}}]
assert extract_message_text(parts) == "Hello World"
def test_extract_history_and_formatting():
ctx = _make_context(
metadata={
"history": [
{"role": "user", "parts": [{"text": "First"}]},
{"role": "agent", "parts": [{"text": "Second"}]},
]
}
)
history = extract_history(ctx)
assert history == [("human", "First"), ("ai", "Second")]
assert format_conversation_history(history) == "User: First\nAgent: Second"
assert (
build_task_text("Current request", history)
== "Conversation so far:\nUser: First\nAgent: Second\n\nCurrent request: Current request"
)
def test_append_peer_guidance_is_optional():
assert append_peer_guidance(None, "", default_text="Base", tool_name="delegate") == "Base"
assert (
append_peer_guidance("Base", "Peer A", default_text="Base", tool_name="delegate")
== "Base\n\n## Peers\nPeer A\nUse delegate to communicate with them."
)
def test_summarize_peer_cards_and_render_section():
peers = [
{
"id": "peer-1",
"status": "online",
"agent_card": {
"name": "Alpha",
"skills": [{"name": "research"}, {"id": "write"}],
},
},
{"id": "peer-2", "status": "offline", "agent_card": None},
]
assert summarize_peer_cards(peers) == [
{
"id": "peer-1",
"name": "Alpha",
"status": "online",
"skills": ["research", "write"],
}
]
section = build_peer_section(peers)
assert "## Your Peers" in section
assert "**Alpha** (id: `peer-1`, status: online)" in section
assert "Skills: research, write" in section
assert "delegate_to_workspace" in section
def test_brief_task_truncates_at_sixty_chars():
assert brief_task("x" * 59) == "x" * 59
assert brief_task("x" * 60) == "x" * 60
assert brief_task("x" * 61) == ("x" * 60) + "..."
@pytest.mark.asyncio
async def test_set_current_task_updates_heartbeat():
heartbeat = SimpleNamespace(current_task="", active_tasks=0)
await set_current_task(heartbeat, "Working")
assert heartbeat.current_task == "Working"
assert heartbeat.active_tasks == 1
await set_current_task(heartbeat, "")
assert heartbeat.current_task == ""
assert heartbeat.active_tasks == 0
@pytest.mark.asyncio
async def test_set_current_task_is_noop_for_none():
await set_current_task(None, "Working")
# ---------------------------------------------------------------------------
# build_task_text() with no history
# ---------------------------------------------------------------------------
def test_build_task_text_no_history_returns_user_message():
"""When history is empty, build_task_text() returns the user_message directly."""
result = build_task_text("What is the weather?", [])
assert result == "What is the weather?"
# ---------------------------------------------------------------------------
# summarize_peer_cards() edge cases
# ---------------------------------------------------------------------------
def test_summarize_peer_cards_invalid_json_string_skipped():
"""A peer whose agent_card is an invalid JSON string is skipped entirely."""
peers = [
{"id": "peer-bad", "status": "online", "agent_card": "{not valid json}"},
{
"id": "peer-good",
"status": "online",
"agent_card": {"name": "Good Peer", "skills": []},
},
]
result = summarize_peer_cards(peers)
assert len(result) == 1
assert result[0]["id"] == "peer-good"
def test_summarize_peer_cards_json_string_not_dict_skipped():
"""A peer whose agent_card is a JSON-encoded list (not a dict) is skipped."""
import json
peers = [
{"id": "peer-list", "status": "online", "agent_card": json.dumps(["skill1"])},
{
"id": "peer-dict",
"status": "online",
"agent_card": {"name": "Dict Peer", "skills": []},
},
]
result = summarize_peer_cards(peers)
assert len(result) == 1
assert result[0]["id"] == "peer-dict"
# ---------------------------------------------------------------------------
# set_current_task() httpx exception is swallowed
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_set_current_task_httpx_exception_is_silenced(monkeypatch):
"""set_current_task() silently ignores exceptions from the httpx heartbeat push."""
monkeypatch.setenv("WORKSPACE_ID", "ws-test")
monkeypatch.setenv("PLATFORM_URL", "http://platform:8080")
mock_client = AsyncMock()
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=False)
mock_client.post = AsyncMock(side_effect=Exception("Connection refused"))
# httpx is imported lazily inside the function, so patch at the httpx module level
with patch("httpx.AsyncClient", return_value=mock_client):
# Should not raise — exception is swallowed with pass
heartbeat = SimpleNamespace(current_task="", active_tasks=0)
await set_current_task(heartbeat, "Doing work")
assert heartbeat.current_task == "Doing work"
assert heartbeat.active_tasks == 1

View File

@ -1,147 +0,0 @@
"""Tests for the new BaseAdapter.transcript_lines() method + claude-code override."""
import asyncio
import json
import os
import tempfile
from pathlib import Path
import pytest
# ── Default (BaseAdapter) ───────────────────────────────────────────────────
def test_base_adapter_returns_unsupported():
"""Adapters that don't override return supported:False."""
from adapters.langgraph.adapter import LangGraphAdapter
a = LangGraphAdapter()
r = asyncio.run(a.transcript_lines())
assert r["supported"] is False
assert r["lines"] == []
assert r["cursor"] == 0
assert r["runtime"] == "langgraph"
assert r["more"] is False
# ── Claude Code override ────────────────────────────────────────────────────
def _write_jsonl(path: Path, entries: list[dict]) -> None:
with path.open("w") as f:
for e in entries:
f.write(json.dumps(e) + "\n")
def test_claude_code_no_projects_dir():
"""Returns supported:True with empty lines when projects dir missing."""
from adapters.claude_code.adapter import ClaudeCodeAdapter
with tempfile.TemporaryDirectory() as tmp:
os.environ["HOME"] = tmp
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
try:
r = asyncio.run(ClaudeCodeAdapter().transcript_lines())
assert r["supported"] is True
assert r["lines"] == []
assert r["cursor"] == 0
assert "-configs" in r["source"]
finally:
del os.environ["CLAUDE_PROJECT_CWD"]
def test_claude_code_reads_jsonl_with_pagination():
from adapters.claude_code.adapter import ClaudeCodeAdapter
with tempfile.TemporaryDirectory() as tmp:
os.environ["HOME"] = tmp
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
try:
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
projdir.mkdir(parents=True)
_write_jsonl(projdir / "abc.jsonl", [
{"type": "user", "n": 1},
{"type": "assistant", "n": 2},
{"type": "user", "n": 3},
{"type": "assistant", "n": 4},
{"type": "user", "n": 5},
])
a = ClaudeCodeAdapter()
# First page (limit=2)
r1 = asyncio.run(a.transcript_lines(since=0, limit=2))
assert r1["supported"] is True
assert [l["n"] for l in r1["lines"]] == [1, 2]
assert r1["cursor"] == 2
assert r1["more"] is True
# Second page (since=2, limit=2)
r2 = asyncio.run(a.transcript_lines(since=2, limit=2))
assert [l["n"] for l in r2["lines"]] == [3, 4]
assert r2["cursor"] == 4
assert r2["more"] is True
# Third page exhausts
r3 = asyncio.run(a.transcript_lines(since=4, limit=2))
assert [l["n"] for l in r3["lines"]] == [5]
assert r3["cursor"] == 5
assert r3["more"] is False
finally:
del os.environ["CLAUDE_PROJECT_CWD"]
def test_claude_code_picks_most_recent_jsonl():
"""When multiple .jsonl files exist, picks the most-recently-modified."""
from adapters.claude_code.adapter import ClaudeCodeAdapter
with tempfile.TemporaryDirectory() as tmp:
os.environ["HOME"] = tmp
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
try:
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
projdir.mkdir(parents=True)
old = projdir / "old.jsonl"
new = projdir / "new.jsonl"
_write_jsonl(old, [{"src": "old"}])
_write_jsonl(new, [{"src": "new"}])
# Force new to be more recent
os.utime(old, (1000, 1000))
os.utime(new, (2000, 2000))
r = asyncio.run(ClaudeCodeAdapter().transcript_lines())
assert r["lines"] == [{"src": "new"}]
assert r["source"].endswith("new.jsonl")
finally:
del os.environ["CLAUDE_PROJECT_CWD"]
def test_claude_code_skips_malformed_lines():
"""Bad JSON lines surface as ``_parse_error: True`` rather than 500'ing."""
from adapters.claude_code.adapter import ClaudeCodeAdapter
with tempfile.TemporaryDirectory() as tmp:
os.environ["HOME"] = tmp
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
try:
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
projdir.mkdir(parents=True)
with (projdir / "x.jsonl").open("w") as f:
f.write('{"good": 1}\n')
f.write("not-json garbage\n")
f.write('{"good": 2}\n')
r = asyncio.run(ClaudeCodeAdapter().transcript_lines())
assert r["lines"][0] == {"good": 1}
assert r["lines"][1].get("_parse_error") is True
assert r["lines"][2] == {"good": 2}
finally:
del os.environ["CLAUDE_PROJECT_CWD"]
def test_claude_code_caps_limit():
"""Limit is capped at 1000 to prevent OOM via paranoid client."""
from adapters.claude_code.adapter import ClaudeCodeAdapter
with tempfile.TemporaryDirectory() as tmp:
os.environ["HOME"] = tmp
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
try:
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
projdir.mkdir(parents=True)
_write_jsonl(projdir / "x.jsonl", [{"i": i} for i in range(1500)])
r = asyncio.run(ClaudeCodeAdapter().transcript_lines(limit=999999))
assert len(r["lines"]) == 1000 # capped
assert r["more"] is True
assert r["cursor"] == 1000
finally:
del os.environ["CLAUDE_PROJECT_CWD"]