Merge pull request #473 from Molecule-AI/fix/remove-adapters-dir
fix: remove adapter subdirectories from workspace-template
This commit is contained in:
commit
73865ee164
@ -41,7 +41,7 @@ from a2a.server.events import EventQueue
|
||||
from a2a.server.tasks import TaskUpdater
|
||||
from a2a.types import Part, TextPart
|
||||
from a2a.utils import new_agent_text_message
|
||||
from adapters.shared_runtime import (
|
||||
from shared_runtime import (
|
||||
extract_history as _extract_history,
|
||||
extract_message_text,
|
||||
brief_task,
|
||||
|
||||
309
workspace-template/adapter_base.py
Normal file
309
workspace-template/adapter_base.py
Normal file
@ -0,0 +1,309 @@
|
||||
"""Base adapter interface for agent infrastructure providers."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SetupResult:
|
||||
"""Result from the shared _common_setup() pipeline."""
|
||||
system_prompt: str
|
||||
loaded_skills: list # LoadedSkill instances
|
||||
langchain_tools: list # LangChain BaseTool instances
|
||||
is_coordinator: bool
|
||||
children: list # child workspace dicts
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterConfig:
|
||||
"""Standardized config passed to every adapter."""
|
||||
model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash"
|
||||
system_prompt: str | None = None # Assembled system prompt text
|
||||
tools: list[str] = field(default_factory=list) # Tool names from config.yaml
|
||||
runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block
|
||||
config_path: str = "/configs" # Path to configs directory
|
||||
workspace_id: str = "" # Workspace identifier
|
||||
prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names
|
||||
a2a_port: int = 8000 # Port for A2A server
|
||||
heartbeat: Any = None # HeartbeatLoop instance
|
||||
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
"""Interface every agent infrastructure adapter must implement.
|
||||
|
||||
To add a new agent infra:
|
||||
1. Create workspace-template/adapters/<your_infra>/
|
||||
2. Implement adapter.py with a class extending BaseAdapter
|
||||
3. Add requirements.txt with your infra's dependencies
|
||||
4. Export as Adapter in __init__.py
|
||||
5. Submit a PR
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def name() -> str: # pragma: no cover
|
||||
"""Return the runtime identifier (e.g. 'langgraph', 'crewai').
|
||||
This must match the 'runtime' field in config.yaml."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def display_name() -> str: # pragma: no cover
|
||||
"""Human-readable name for UI display."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def description() -> str: # pragma: no cover
|
||||
"""Short description of what this adapter provides."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
"""Return JSON Schema for runtime_config fields this adapter supports.
|
||||
Used by the Config tab UI to render the right form fields.
|
||||
Override in subclasses for adapter-specific settings."""
|
||||
return {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Plugin install hooks
|
||||
# ------------------------------------------------------------------
|
||||
# New pipeline: each plugin ships per-runtime adaptors resolved via
|
||||
# `plugins_registry.resolve()`. Adapters expose hooks below that
|
||||
# adaptors call to wire plugin content into the runtime.
|
||||
#
|
||||
# Default implementations are filesystem-only (write to /configs,
|
||||
# append to CLAUDE.md). Runtimes with a dynamic tool registry
|
||||
# (e.g. DeepAgents sub-agents) override the hooks to also register
|
||||
# in-process state.
|
||||
|
||||
def memory_filename(self) -> str:
|
||||
"""File under /configs that the runtime treats as long-lived memory.
|
||||
|
||||
Both Claude Code and DeepAgents read CLAUDE.md natively, so this is
|
||||
the sensible default. Override only if a runtime expects a different
|
||||
filename.
|
||||
"""
|
||||
return "CLAUDE.md"
|
||||
|
||||
def register_tool_hook(self, name: str, fn) -> None:
|
||||
"""Default no-op. Override on runtimes with a dynamic tool registry.
|
||||
|
||||
Runtimes that pick tools up at startup via filesystem scan (Claude
|
||||
Code reads /configs/skills, LangGraph globs **/*.py) don't need to
|
||||
do anything here — the adaptor's file-write step is enough.
|
||||
"""
|
||||
return None
|
||||
|
||||
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
|
||||
"""Return live transcript entries for the most-recent agent session.
|
||||
|
||||
Default implementation returns ``supported: False`` for runtimes
|
||||
that don't expose a per-session log on disk. Override in subclasses
|
||||
that DO (Claude Code reads ``~/.claude/projects/<cwd>/<session>.jsonl``).
|
||||
|
||||
This is the "look over the agent's shoulder" feature — lets canvas /
|
||||
operators see live tool calls + AI thinking instead of waiting for
|
||||
the high-level activity log to flush.
|
||||
|
||||
Args:
|
||||
since: line offset to skip — caller's last cursor (0 = from start)
|
||||
limit: max lines to return (caller-side cap, default 100, max 1000)
|
||||
|
||||
Returns:
|
||||
``{runtime, supported, lines, cursor, more, source}`` where
|
||||
``cursor`` is the new offset to pass on the next poll, ``more``
|
||||
is True if additional lines remain past ``limit``, and ``source``
|
||||
is the file path lines were read from (useful for debugging).
|
||||
"""
|
||||
return {
|
||||
"runtime": self.name(),
|
||||
"supported": False,
|
||||
"lines": [],
|
||||
"cursor": since,
|
||||
"more": False,
|
||||
"source": None,
|
||||
}
|
||||
|
||||
def register_subagent_hook(self, name: str, spec: dict) -> None:
|
||||
"""Default no-op. DeepAgents overrides to register a sub-agent."""
|
||||
return None
|
||||
|
||||
def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None:
|
||||
"""Append text to /configs/<filename> if the marker isn't already present.
|
||||
|
||||
Idempotent: looks for the first line of `content` as a marker so a
|
||||
re-install doesn't duplicate the block. Adaptors should pass content
|
||||
beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``).
|
||||
"""
|
||||
import os
|
||||
target = os.path.join(config.config_path, filename)
|
||||
marker = content.splitlines()[0].strip() if content else ""
|
||||
existing = ""
|
||||
if os.path.exists(target):
|
||||
with open(target) as f:
|
||||
existing = f.read()
|
||||
if marker and marker in existing:
|
||||
logger.info("append_to_memory: %s already contains %r — skipping", filename, marker)
|
||||
return
|
||||
os.makedirs(os.path.dirname(target) or ".", exist_ok=True)
|
||||
with open(target, "a") as f:
|
||||
if existing and not existing.endswith("\n"):
|
||||
f.write("\n")
|
||||
f.write(content if content.endswith("\n") else content + "\n")
|
||||
logger.info("append_to_memory: appended %d chars to %s", len(content), filename)
|
||||
|
||||
async def install_plugins_via_registry(
|
||||
self,
|
||||
config: AdapterConfig,
|
||||
plugins,
|
||||
) -> list:
|
||||
"""Drive the new per-runtime adaptor pipeline for every loaded plugin.
|
||||
|
||||
For each plugin in `plugins.plugins`, resolve the adaptor for this
|
||||
runtime (via :func:`plugins_registry.resolve`) and invoke
|
||||
``install(ctx)``. Returns the list of :class:`InstallResult` so
|
||||
callers can surface warnings (e.g. raw-drop fallback hits).
|
||||
|
||||
Adapters whose runtime supports the new pipeline call this from
|
||||
``setup()`` instead of the legacy ``inject_plugins()``.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from plugins_registry import InstallContext, resolve
|
||||
|
||||
results = []
|
||||
runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code"
|
||||
|
||||
for plugin in plugins.plugins:
|
||||
adaptor, source = resolve(plugin.name, runtime, Path(plugin.path))
|
||||
ctx = InstallContext(
|
||||
configs_dir=Path(config.config_path),
|
||||
workspace_id=config.workspace_id,
|
||||
runtime=runtime,
|
||||
plugin_root=Path(plugin.path),
|
||||
memory_filename=self.memory_filename(),
|
||||
register_tool=self.register_tool_hook,
|
||||
register_subagent=self.register_subagent_hook,
|
||||
append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c),
|
||||
)
|
||||
try:
|
||||
result = await adaptor.install(ctx)
|
||||
results.append(result)
|
||||
logger.info(
|
||||
"Plugin %s installed via %s adaptor (warnings: %d)",
|
||||
plugin.name, source, len(result.warnings),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc)
|
||||
|
||||
return results
|
||||
|
||||
async def inject_plugins(self, config: AdapterConfig, plugins) -> None:
|
||||
"""Legacy hook — kept for backwards compatibility during migration.
|
||||
|
||||
Default: drive the new per-runtime adaptor pipeline. Adapters not yet
|
||||
migrated may still override this with their own logic.
|
||||
"""
|
||||
await self.install_plugins_via_registry(config, plugins)
|
||||
|
||||
async def _common_setup(self, config: AdapterConfig) -> SetupResult:
|
||||
"""Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt.
|
||||
|
||||
All adapters can call this to get the full platform feature set.
|
||||
Returns a SetupResult with LangChain BaseTool instances that adapters
|
||||
convert to their native format if needed.
|
||||
"""
|
||||
from plugins import load_plugins
|
||||
from skill_loader.loader import load_skills
|
||||
from coordinator import get_children, get_parent_context, build_children_description
|
||||
from prompt import build_system_prompt, get_peer_capabilities
|
||||
from builtin_tools.approval import request_approval
|
||||
from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
|
||||
from builtin_tools.memory import commit_memory, search_memory
|
||||
from builtin_tools.sandbox import run_code
|
||||
|
||||
platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080")
|
||||
|
||||
# Load plugins from per-workspace dir first, then shared fallback
|
||||
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
|
||||
plugins = load_plugins(
|
||||
workspace_plugins_dir=workspace_plugins_dir,
|
||||
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
|
||||
)
|
||||
await self.inject_plugins(config, plugins)
|
||||
if plugins.plugin_names:
|
||||
logger.info(f"Plugins: {', '.join(plugins.plugin_names)}")
|
||||
|
||||
# Load skills (workspace + plugin skills, deduped)
|
||||
loaded_skills = load_skills(config.config_path, config.tools)
|
||||
seen_skill_ids = {s.metadata.id for s in loaded_skills}
|
||||
for plugin_skills_dir in plugins.skill_dirs:
|
||||
plugin_skill_names = [
|
||||
d for d in os.listdir(plugin_skills_dir)
|
||||
if os.path.isdir(os.path.join(plugin_skills_dir, d))
|
||||
]
|
||||
for skill in load_skills(plugin_skills_dir, plugin_skill_names):
|
||||
if skill.metadata.id not in seen_skill_ids:
|
||||
loaded_skills.append(skill)
|
||||
seen_skill_ids.add(skill.metadata.id)
|
||||
logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")
|
||||
|
||||
# Assemble tools: 6 core + skill tools
|
||||
all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
|
||||
for skill in loaded_skills:
|
||||
all_tools.extend(skill.tools)
|
||||
|
||||
# Coordinator mode: detect children and add routing tool
|
||||
children = await get_children()
|
||||
is_coordinator = len(children) > 0
|
||||
if is_coordinator:
|
||||
from coordinator import route_task_to_team
|
||||
logger.info(f"Coordinator mode: {len(children)} children")
|
||||
all_tools.append(route_task_to_team)
|
||||
|
||||
# Parent context (if this is a child workspace)
|
||||
parent_context = await get_parent_context()
|
||||
|
||||
# Build system prompt with all context
|
||||
peers = await get_peer_capabilities(platform_url, config.workspace_id)
|
||||
coordinator_prompt = build_children_description(children) if is_coordinator else ""
|
||||
extra_prompts = list(plugins.prompt_fragments)
|
||||
if coordinator_prompt:
|
||||
extra_prompts.append(coordinator_prompt)
|
||||
|
||||
system_prompt = build_system_prompt(
|
||||
config.config_path, config.workspace_id, loaded_skills, peers,
|
||||
prompt_files=config.prompt_files,
|
||||
plugin_rules=plugins.rules,
|
||||
plugin_prompts=extra_prompts,
|
||||
parent_context=parent_context,
|
||||
)
|
||||
|
||||
return SetupResult(
|
||||
system_prompt=system_prompt,
|
||||
loaded_skills=loaded_skills,
|
||||
langchain_tools=all_tools,
|
||||
is_coordinator=is_coordinator,
|
||||
children=children,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
"""One-time setup: validate config, prepare internal state.
|
||||
Called after deps are installed but before create_executor().
|
||||
Raise RuntimeError if setup fails (missing deps, bad config, etc.)."""
|
||||
... # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
"""Create and return an AgentExecutor ready for A2A integration.
|
||||
The returned executor's execute() method will be called by the
|
||||
A2A server's DefaultRequestHandler."""
|
||||
... # pragma: no cover
|
||||
@ -1,58 +1,22 @@
|
||||
"""Adapter registry — discovers and loads agent infrastructure adapters."""
|
||||
"""Adapter registry shim.
|
||||
|
||||
Adapters extracted to standalone repos (molecule-ai-workspace-template-*).
|
||||
ADAPTER_MODULE env var is the primary discovery mechanism in production.
|
||||
This shim provides backward-compatible imports for local dev + tests.
|
||||
"""
|
||||
import importlib
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from .base import BaseAdapter, AdapterConfig
|
||||
from adapter_base import BaseAdapter, AdapterConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_ADAPTER_CACHE: dict[str, type[BaseAdapter]] = {}
|
||||
|
||||
|
||||
def discover_adapters() -> dict[str, type[BaseAdapter]]:
|
||||
"""Scan subdirectories for adapter modules. Each must export an Adapter class."""
|
||||
if _ADAPTER_CACHE:
|
||||
return _ADAPTER_CACHE
|
||||
|
||||
adapters_dir = Path(__file__).parent
|
||||
for entry in sorted(adapters_dir.iterdir()):
|
||||
if not entry.is_dir() or entry.name.startswith("_"):
|
||||
continue
|
||||
try:
|
||||
mod = importlib.import_module(f"adapters.{entry.name}")
|
||||
adapter_cls = getattr(mod, "Adapter", None)
|
||||
if adapter_cls and issubclass(adapter_cls, BaseAdapter):
|
||||
_ADAPTER_CACHE[adapter_cls.name()] = adapter_cls
|
||||
logger.debug(f"Loaded adapter: {adapter_cls.name()} ({adapter_cls.display_name()})")
|
||||
except Exception as e:
|
||||
# Log but don't crash — adapter may have uninstalled deps
|
||||
logger.debug(f"Skipped adapter {entry.name}: {e}")
|
||||
|
||||
return _ADAPTER_CACHE
|
||||
|
||||
|
||||
def get_adapter(runtime: str) -> type[BaseAdapter]:
|
||||
"""Get adapter class by runtime name. Raises KeyError if not found."""
|
||||
adapters = discover_adapters()
|
||||
if runtime not in adapters:
|
||||
available = ", ".join(sorted(adapters.keys()))
|
||||
raise KeyError(f"Unknown runtime '{runtime}'. Available: {available}")
|
||||
return adapters[runtime]
|
||||
|
||||
|
||||
def list_adapters() -> list[dict]:
|
||||
"""Return metadata for all discovered adapters (for API/UI)."""
|
||||
adapters = discover_adapters()
|
||||
return [
|
||||
{
|
||||
"name": cls.name(),
|
||||
"display_name": cls.display_name(),
|
||||
"description": cls.description(),
|
||||
"config_schema": cls.get_config_schema(),
|
||||
}
|
||||
for cls in adapters.values()
|
||||
]
|
||||
|
||||
|
||||
__all__ = ["BaseAdapter", "AdapterConfig", "get_adapter", "list_adapters", "discover_adapters"]
|
||||
adapter_module = os.environ.get("ADAPTER_MODULE")
|
||||
if adapter_module:
|
||||
mod = importlib.import_module(adapter_module)
|
||||
return getattr(mod, "Adapter")
|
||||
raise KeyError(
|
||||
f"No ADAPTER_MODULE set for runtime '{runtime}'. "
|
||||
"Adapters now live in standalone template repos."
|
||||
)
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import AutoGenAdapter
|
||||
|
||||
Adapter = AutoGenAdapter
|
||||
@ -1,159 +0,0 @@
|
||||
"""AutoGen adapter — Microsoft's multi-agent framework with full platform integration.
|
||||
|
||||
Uses AutoGen's AssistantAgent with OpenAIChatCompletionClient,
|
||||
includes all platform tools (delegation, memory, sandbox, approval), skills, and coordinator support.
|
||||
|
||||
Requires: pip install autogen-agentchat autogen-ext[openai]
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
from adapters.shared_runtime import (
|
||||
build_task_text,
|
||||
brief_task,
|
||||
extract_history,
|
||||
extract_message_text,
|
||||
set_current_task,
|
||||
)
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _langchain_to_autogen(lc_tool):
|
||||
"""Wrap a LangChain BaseTool as an AutoGen FunctionTool.
|
||||
|
||||
AutoGen requires typed function signatures (no **kwargs).
|
||||
LangChain tools accept a single string or dict input via ainvoke.
|
||||
We bridge them with a single `input: str` parameter.
|
||||
"""
|
||||
from autogen_core.tools import FunctionTool
|
||||
|
||||
async def _invoke(input: str) -> str: # noqa: A002
|
||||
# Try to parse as JSON dict for tools expecting structured input
|
||||
try:
|
||||
parsed = json.loads(input)
|
||||
if isinstance(parsed, dict):
|
||||
result = await lc_tool.ainvoke(parsed)
|
||||
return str(result)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
result = await lc_tool.ainvoke(input)
|
||||
return str(result)
|
||||
|
||||
return FunctionTool(
|
||||
_invoke,
|
||||
name=lc_tool.name,
|
||||
description=lc_tool.description or lc_tool.name,
|
||||
)
|
||||
|
||||
|
||||
class AutoGenAdapter(BaseAdapter):
|
||||
|
||||
def __init__(self):
|
||||
self.system_prompt = None
|
||||
self.autogen_tools = []
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "autogen"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "AutoGen"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "Microsoft AutoGen — conversable agents with tool use and multi-agent orchestration"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {"type": "string", "description": "OpenAI model (e.g. openai:gpt-4.1-mini)"},
|
||||
"skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"},
|
||||
"tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools"},
|
||||
}
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
try:
|
||||
from autogen_agentchat.agents import AssistantAgent # noqa: F401
|
||||
logger.info("AutoGen AgentChat loaded")
|
||||
except ImportError:
|
||||
raise RuntimeError("autogen-agentchat not installed.")
|
||||
|
||||
result = await self._common_setup(config)
|
||||
self.system_prompt = result.system_prompt
|
||||
self.autogen_tools = [_langchain_to_autogen(t) for t in result.langchain_tools]
|
||||
logger.info(f"AutoGen tools: {[t.name for t in self.autogen_tools]}")
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
return AutoGenA2AExecutor(
|
||||
model=config.model,
|
||||
system_prompt=self.system_prompt,
|
||||
autogen_tools=self.autogen_tools,
|
||||
heartbeat=config.heartbeat,
|
||||
)
|
||||
|
||||
|
||||
class AutoGenA2AExecutor(AgentExecutor):
|
||||
"""Wraps AutoGen's AssistantAgent with full platform tools."""
|
||||
|
||||
def __init__(self, model: str, system_prompt: str | None, autogen_tools: list, heartbeat=None):
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
self.autogen_tools = autogen_tools
|
||||
self._heartbeat = heartbeat
|
||||
|
||||
async def execute(self, context, event_queue):
|
||||
from a2a.utils import new_agent_text_message
|
||||
|
||||
user_message = extract_message_text(context)
|
||||
|
||||
if not user_message:
|
||||
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
|
||||
return
|
||||
|
||||
await set_current_task(self._heartbeat, brief_task(user_message))
|
||||
|
||||
try:
|
||||
from autogen_agentchat.agents import AssistantAgent
|
||||
from autogen_ext.models.openai import OpenAIChatCompletionClient
|
||||
|
||||
model_str = self.model
|
||||
if ":" in model_str:
|
||||
_, model_name = model_str.split(":", 1)
|
||||
else:
|
||||
model_name = model_str
|
||||
|
||||
task_text = build_task_text(user_message, extract_history(context))
|
||||
|
||||
client = OpenAIChatCompletionClient(model=model_name)
|
||||
agent = AssistantAgent(
|
||||
name="agent",
|
||||
model_client=client,
|
||||
system_message=self.system_prompt or "You are a helpful assistant.",
|
||||
tools=self.autogen_tools,
|
||||
)
|
||||
|
||||
result = await agent.run(task=task_text)
|
||||
|
||||
reply = ""
|
||||
if hasattr(result, "messages") and result.messages:
|
||||
for msg in reversed(result.messages):
|
||||
if hasattr(msg, "content") and isinstance(msg.content, str):
|
||||
reply = msg.content
|
||||
break
|
||||
if not reply:
|
||||
reply = str(result)
|
||||
|
||||
except Exception as e:
|
||||
reply = f"AutoGen error: {e}"
|
||||
finally:
|
||||
await set_current_task(self._heartbeat, "")
|
||||
|
||||
await event_queue.enqueue_event(new_agent_text_message(reply))
|
||||
|
||||
async def cancel(self, context, event_queue): # pragma: no cover
|
||||
pass
|
||||
@ -1,309 +1,2 @@
|
||||
"""Base adapter interface for agent infrastructure providers."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SetupResult:
|
||||
"""Result from the shared _common_setup() pipeline."""
|
||||
system_prompt: str
|
||||
loaded_skills: list # LoadedSkill instances
|
||||
langchain_tools: list # LangChain BaseTool instances
|
||||
is_coordinator: bool
|
||||
children: list # child workspace dicts
|
||||
|
||||
|
||||
@dataclass
|
||||
class AdapterConfig:
|
||||
"""Standardized config passed to every adapter."""
|
||||
model: str # e.g. "anthropic:claude-sonnet-4-6" or "openrouter:google/gemini-2.5-flash"
|
||||
system_prompt: str | None = None # Assembled system prompt text
|
||||
tools: list[str] = field(default_factory=list) # Tool names from config.yaml
|
||||
runtime_config: dict[str, Any] = field(default_factory=dict) # Raw runtime_config block
|
||||
config_path: str = "/configs" # Path to configs directory
|
||||
workspace_id: str = "" # Workspace identifier
|
||||
prompt_files: list[str] = field(default_factory=list) # Ordered prompt file names
|
||||
a2a_port: int = 8000 # Port for A2A server
|
||||
heartbeat: Any = None # HeartbeatLoop instance
|
||||
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
"""Interface every agent infrastructure adapter must implement.
|
||||
|
||||
To add a new agent infra:
|
||||
1. Create workspace-template/adapters/<your_infra>/
|
||||
2. Implement adapter.py with a class extending BaseAdapter
|
||||
3. Add requirements.txt with your infra's dependencies
|
||||
4. Export as Adapter in __init__.py
|
||||
5. Submit a PR
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def name() -> str: # pragma: no cover
|
||||
"""Return the runtime identifier (e.g. 'langgraph', 'crewai').
|
||||
This must match the 'runtime' field in config.yaml."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def display_name() -> str: # pragma: no cover
|
||||
"""Human-readable name for UI display."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
@abstractmethod
|
||||
def description() -> str: # pragma: no cover
|
||||
"""Short description of what this adapter provides."""
|
||||
...
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
"""Return JSON Schema for runtime_config fields this adapter supports.
|
||||
Used by the Config tab UI to render the right form fields.
|
||||
Override in subclasses for adapter-specific settings."""
|
||||
return {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Plugin install hooks
|
||||
# ------------------------------------------------------------------
|
||||
# New pipeline: each plugin ships per-runtime adaptors resolved via
|
||||
# `plugins_registry.resolve()`. Adapters expose hooks below that
|
||||
# adaptors call to wire plugin content into the runtime.
|
||||
#
|
||||
# Default implementations are filesystem-only (write to /configs,
|
||||
# append to CLAUDE.md). Runtimes with a dynamic tool registry
|
||||
# (e.g. DeepAgents sub-agents) override the hooks to also register
|
||||
# in-process state.
|
||||
|
||||
def memory_filename(self) -> str:
|
||||
"""File under /configs that the runtime treats as long-lived memory.
|
||||
|
||||
Both Claude Code and DeepAgents read CLAUDE.md natively, so this is
|
||||
the sensible default. Override only if a runtime expects a different
|
||||
filename.
|
||||
"""
|
||||
return "CLAUDE.md"
|
||||
|
||||
def register_tool_hook(self, name: str, fn) -> None:
|
||||
"""Default no-op. Override on runtimes with a dynamic tool registry.
|
||||
|
||||
Runtimes that pick tools up at startup via filesystem scan (Claude
|
||||
Code reads /configs/skills, LangGraph globs **/*.py) don't need to
|
||||
do anything here — the adaptor's file-write step is enough.
|
||||
"""
|
||||
return None
|
||||
|
||||
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
|
||||
"""Return live transcript entries for the most-recent agent session.
|
||||
|
||||
Default implementation returns ``supported: False`` for runtimes
|
||||
that don't expose a per-session log on disk. Override in subclasses
|
||||
that DO (Claude Code reads ``~/.claude/projects/<cwd>/<session>.jsonl``).
|
||||
|
||||
This is the "look over the agent's shoulder" feature — lets canvas /
|
||||
operators see live tool calls + AI thinking instead of waiting for
|
||||
the high-level activity log to flush.
|
||||
|
||||
Args:
|
||||
since: line offset to skip — caller's last cursor (0 = from start)
|
||||
limit: max lines to return (caller-side cap, default 100, max 1000)
|
||||
|
||||
Returns:
|
||||
``{runtime, supported, lines, cursor, more, source}`` where
|
||||
``cursor`` is the new offset to pass on the next poll, ``more``
|
||||
is True if additional lines remain past ``limit``, and ``source``
|
||||
is the file path lines were read from (useful for debugging).
|
||||
"""
|
||||
return {
|
||||
"runtime": self.name(),
|
||||
"supported": False,
|
||||
"lines": [],
|
||||
"cursor": since,
|
||||
"more": False,
|
||||
"source": None,
|
||||
}
|
||||
|
||||
def register_subagent_hook(self, name: str, spec: dict) -> None:
|
||||
"""Default no-op. DeepAgents overrides to register a sub-agent."""
|
||||
return None
|
||||
|
||||
def append_to_memory_hook(self, config: AdapterConfig, filename: str, content: str) -> None:
|
||||
"""Append text to /configs/<filename> if the marker isn't already present.
|
||||
|
||||
Idempotent: looks for the first line of `content` as a marker so a
|
||||
re-install doesn't duplicate the block. Adaptors should pass content
|
||||
beginning with a unique header (e.g. ``# Plugin: molecule-dev-conventions``).
|
||||
"""
|
||||
import os
|
||||
target = os.path.join(config.config_path, filename)
|
||||
marker = content.splitlines()[0].strip() if content else ""
|
||||
existing = ""
|
||||
if os.path.exists(target):
|
||||
with open(target) as f:
|
||||
existing = f.read()
|
||||
if marker and marker in existing:
|
||||
logger.info("append_to_memory: %s already contains %r — skipping", filename, marker)
|
||||
return
|
||||
os.makedirs(os.path.dirname(target) or ".", exist_ok=True)
|
||||
with open(target, "a") as f:
|
||||
if existing and not existing.endswith("\n"):
|
||||
f.write("\n")
|
||||
f.write(content if content.endswith("\n") else content + "\n")
|
||||
logger.info("append_to_memory: appended %d chars to %s", len(content), filename)
|
||||
|
||||
async def install_plugins_via_registry(
|
||||
self,
|
||||
config: AdapterConfig,
|
||||
plugins,
|
||||
) -> list:
|
||||
"""Drive the new per-runtime adaptor pipeline for every loaded plugin.
|
||||
|
||||
For each plugin in `plugins.plugins`, resolve the adaptor for this
|
||||
runtime (via :func:`plugins_registry.resolve`) and invoke
|
||||
``install(ctx)``. Returns the list of :class:`InstallResult` so
|
||||
callers can surface warnings (e.g. raw-drop fallback hits).
|
||||
|
||||
Adapters whose runtime supports the new pipeline call this from
|
||||
``setup()`` instead of the legacy ``inject_plugins()``.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from plugins_registry import InstallContext, resolve
|
||||
|
||||
results = []
|
||||
runtime = self.name().replace("-", "_") # e.g. "claude-code" -> "claude_code"
|
||||
|
||||
for plugin in plugins.plugins:
|
||||
adaptor, source = resolve(plugin.name, runtime, Path(plugin.path))
|
||||
ctx = InstallContext(
|
||||
configs_dir=Path(config.config_path),
|
||||
workspace_id=config.workspace_id,
|
||||
runtime=runtime,
|
||||
plugin_root=Path(plugin.path),
|
||||
memory_filename=self.memory_filename(),
|
||||
register_tool=self.register_tool_hook,
|
||||
register_subagent=self.register_subagent_hook,
|
||||
append_to_memory=lambda fn, c, _cfg=config: self.append_to_memory_hook(_cfg, fn, c),
|
||||
)
|
||||
try:
|
||||
result = await adaptor.install(ctx)
|
||||
results.append(result)
|
||||
logger.info(
|
||||
"Plugin %s installed via %s adaptor (warnings: %d)",
|
||||
plugin.name, source, len(result.warnings),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception("Plugin %s install via %s failed: %s", plugin.name, source, exc)
|
||||
|
||||
return results
|
||||
|
||||
async def inject_plugins(self, config: AdapterConfig, plugins) -> None:
|
||||
"""Legacy hook — kept for backwards compatibility during migration.
|
||||
|
||||
Default: drive the new per-runtime adaptor pipeline. Adapters not yet
|
||||
migrated may still override this with their own logic.
|
||||
"""
|
||||
await self.install_plugins_via_registry(config, plugins)
|
||||
|
||||
async def _common_setup(self, config: AdapterConfig) -> SetupResult:
|
||||
"""Shared setup pipeline — loads plugins, skills, tools, coordinator, and builds system prompt.
|
||||
|
||||
All adapters can call this to get the full platform feature set.
|
||||
Returns a SetupResult with LangChain BaseTool instances that adapters
|
||||
convert to their native format if needed.
|
||||
"""
|
||||
from plugins import load_plugins
|
||||
from skill_loader.loader import load_skills
|
||||
from coordinator import get_children, get_parent_context, build_children_description
|
||||
from prompt import build_system_prompt, get_peer_capabilities
|
||||
from builtin_tools.approval import request_approval
|
||||
from builtin_tools.delegation import delegate_to_workspace, check_delegation_status
|
||||
from builtin_tools.memory import commit_memory, search_memory
|
||||
from builtin_tools.sandbox import run_code
|
||||
|
||||
platform_url = os.environ.get("PLATFORM_URL", "http://platform:8080")
|
||||
|
||||
# Load plugins from per-workspace dir first, then shared fallback
|
||||
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
|
||||
plugins = load_plugins(
|
||||
workspace_plugins_dir=workspace_plugins_dir,
|
||||
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
|
||||
)
|
||||
await self.inject_plugins(config, plugins)
|
||||
if plugins.plugin_names:
|
||||
logger.info(f"Plugins: {', '.join(plugins.plugin_names)}")
|
||||
|
||||
# Load skills (workspace + plugin skills, deduped)
|
||||
loaded_skills = load_skills(config.config_path, config.tools)
|
||||
seen_skill_ids = {s.metadata.id for s in loaded_skills}
|
||||
for plugin_skills_dir in plugins.skill_dirs:
|
||||
plugin_skill_names = [
|
||||
d for d in os.listdir(plugin_skills_dir)
|
||||
if os.path.isdir(os.path.join(plugin_skills_dir, d))
|
||||
]
|
||||
for skill in load_skills(plugin_skills_dir, plugin_skill_names):
|
||||
if skill.metadata.id not in seen_skill_ids:
|
||||
loaded_skills.append(skill)
|
||||
seen_skill_ids.add(skill.metadata.id)
|
||||
logger.info(f"Loaded {len(loaded_skills)} skills: {[s.metadata.id for s in loaded_skills]}")
|
||||
|
||||
# Assemble tools: 6 core + skill tools
|
||||
all_tools = [delegate_to_workspace, check_delegation_status, request_approval, commit_memory, search_memory, run_code]
|
||||
for skill in loaded_skills:
|
||||
all_tools.extend(skill.tools)
|
||||
|
||||
# Coordinator mode: detect children and add routing tool
|
||||
children = await get_children()
|
||||
is_coordinator = len(children) > 0
|
||||
if is_coordinator:
|
||||
from coordinator import route_task_to_team
|
||||
logger.info(f"Coordinator mode: {len(children)} children")
|
||||
all_tools.append(route_task_to_team)
|
||||
|
||||
# Parent context (if this is a child workspace)
|
||||
parent_context = await get_parent_context()
|
||||
|
||||
# Build system prompt with all context
|
||||
peers = await get_peer_capabilities(platform_url, config.workspace_id)
|
||||
coordinator_prompt = build_children_description(children) if is_coordinator else ""
|
||||
extra_prompts = list(plugins.prompt_fragments)
|
||||
if coordinator_prompt:
|
||||
extra_prompts.append(coordinator_prompt)
|
||||
|
||||
system_prompt = build_system_prompt(
|
||||
config.config_path, config.workspace_id, loaded_skills, peers,
|
||||
prompt_files=config.prompt_files,
|
||||
plugin_rules=plugins.rules,
|
||||
plugin_prompts=extra_prompts,
|
||||
parent_context=parent_context,
|
||||
)
|
||||
|
||||
return SetupResult(
|
||||
system_prompt=system_prompt,
|
||||
loaded_skills=loaded_skills,
|
||||
langchain_tools=all_tools,
|
||||
is_coordinator=is_coordinator,
|
||||
children=children,
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
"""One-time setup: validate config, prepare internal state.
|
||||
Called after deps are installed but before create_executor().
|
||||
Raise RuntimeError if setup fails (missing deps, bad config, etc.)."""
|
||||
... # pragma: no cover
|
||||
|
||||
@abstractmethod
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
"""Create and return an AgentExecutor ready for A2A integration.
|
||||
The returned executor's execute() method will be called by the
|
||||
A2A server's DefaultRequestHandler."""
|
||||
... # pragma: no cover
|
||||
"""Re-export from adapter_base for backward compat."""
|
||||
from adapter_base import * # noqa: F401,F403
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import ClaudeCodeAdapter
|
||||
|
||||
Adapter = ClaudeCodeAdapter
|
||||
@ -1,167 +0,0 @@
|
||||
"""Claude Code adapter — wraps the Claude Code CLI as an agent runtime."""
|
||||
|
||||
import json
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cap one transcript response at 1000 lines so a paranoid client can't OOM
|
||||
# the workspace by polling /transcript?limit=999999.
|
||||
_TRANSCRIPT_MAX_LIMIT = 1000
|
||||
|
||||
|
||||
class ClaudeCodeAdapter(BaseAdapter):
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "claude-code"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "Claude Code"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "Claude Code CLI — full agentic coding with hooks, CLAUDE.md, auto-memory, and MCP support"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {"type": "string", "description": "Claude model (e.g. sonnet, opus, haiku)", "default": "sonnet"},
|
||||
"required_env": {"type": "array", "description": "Required env vars", "default": ["CLAUDE_CODE_OAUTH_TOKEN"]},
|
||||
"timeout": {"type": "integer", "description": "Timeout in seconds (0 = no timeout)", "default": 0},
|
||||
}
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
"""Install plugins via the per-runtime adaptor registry.
|
||||
|
||||
The legacy claude-code-specific ``inject_plugins()`` override is gone:
|
||||
each plugin now ships (or has registered in the platform registry) a
|
||||
per-runtime adaptor, and ``BaseAdapter.install_plugins_via_registry``
|
||||
routes installs through it. The Claude Code SDK still reads
|
||||
``CLAUDE.md`` and ``/configs/skills/`` natively, and the default
|
||||
:class:`AgentskillsAdaptor` writes to both.
|
||||
"""
|
||||
from plugins import load_plugins
|
||||
workspace_plugins_dir = os.path.join(config.config_path, "plugins")
|
||||
plugins = load_plugins(
|
||||
workspace_plugins_dir=workspace_plugins_dir,
|
||||
shared_plugins_dir=os.environ.get("PLUGINS_DIR", "/plugins"),
|
||||
)
|
||||
await self.install_plugins_via_registry(config, plugins)
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
from claude_sdk_executor import ClaudeSDKExecutor
|
||||
|
||||
# Load system prompt if exists
|
||||
system_prompt = config.system_prompt
|
||||
if not system_prompt:
|
||||
prompt_file = os.path.join(config.config_path, "system-prompt.md")
|
||||
if os.path.exists(prompt_file):
|
||||
with open(prompt_file) as f:
|
||||
system_prompt = f.read()
|
||||
|
||||
# runtime_config may arrive as a dict (from main.py vars(...)) or as a
|
||||
# RuntimeConfig dataclass. Read `model` defensively from either shape.
|
||||
rc = config.runtime_config
|
||||
if isinstance(rc, dict):
|
||||
model = rc.get("model") or "sonnet"
|
||||
else:
|
||||
model = getattr(rc, "model", None) or "sonnet"
|
||||
|
||||
return ClaudeSDKExecutor(
|
||||
system_prompt=system_prompt,
|
||||
config_path=config.config_path,
|
||||
heartbeat=config.heartbeat,
|
||||
model=model,
|
||||
)
|
||||
|
||||
async def transcript_lines(self, since: int = 0, limit: int = 100) -> dict:
|
||||
"""Read the live Claude Code session transcript.
|
||||
|
||||
Claude Code writes every session to
|
||||
``$HOME/.claude/projects/<cwd-as-dirname>/<session-uuid>.jsonl`` —
|
||||
every line is a JSON event (user/assistant/tool_use/attachment/etc).
|
||||
We pick the most-recently-modified .jsonl in the projects dir for
|
||||
the agent's working directory, then return ``[since:since+limit]``.
|
||||
|
||||
Returns ``supported: True`` even if no .jsonl exists yet (empty
|
||||
``lines`` + ``cursor=0``) so the canvas can show "agent hasn't
|
||||
produced output yet" instead of "feature unavailable".
|
||||
"""
|
||||
limit = max(1, min(limit, _TRANSCRIPT_MAX_LIMIT))
|
||||
since = max(0, since)
|
||||
|
||||
# Resolve the projects-dir name. Claude Code maps cwd → dirname by
|
||||
# replacing "/" with "-" (so "/configs" → "-configs"). The exact
|
||||
# rule lives inside the CLI binary, but the leading-dash + path-
|
||||
# without-trailing-slash pattern is stable across versions.
|
||||
#
|
||||
# Match ClaudeSDKExecutor._resolve_cwd: prefer /workspace if populated,
|
||||
# else /configs. Override via CLAUDE_PROJECT_CWD for tests.
|
||||
WORKSPACE_MOUNT = "/workspace"
|
||||
CONFIG_MOUNT = "/configs"
|
||||
cwd_override = os.environ.get("CLAUDE_PROJECT_CWD")
|
||||
if cwd_override:
|
||||
cwd = cwd_override
|
||||
elif os.path.isdir(WORKSPACE_MOUNT) and os.listdir(WORKSPACE_MOUNT):
|
||||
cwd = WORKSPACE_MOUNT
|
||||
else:
|
||||
cwd = CONFIG_MOUNT
|
||||
|
||||
# Normalize: strip trailing slash, replace path separators with "-"
|
||||
cwd_norm = cwd.rstrip("/") or "/"
|
||||
projdir_name = cwd_norm.replace("/", "-") # "/configs" → "-configs"
|
||||
|
||||
home = Path(os.environ.get("HOME", "/home/agent"))
|
||||
projdir = home / ".claude" / "projects" / projdir_name
|
||||
result_base = {
|
||||
"runtime": self.name(),
|
||||
"supported": True,
|
||||
"lines": [],
|
||||
"cursor": since,
|
||||
"more": False,
|
||||
"source": str(projdir),
|
||||
}
|
||||
|
||||
if not projdir.is_dir():
|
||||
return result_base
|
||||
|
||||
# Pick most-recently-modified .jsonl
|
||||
candidates = sorted(projdir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if not candidates:
|
||||
return result_base
|
||||
target = candidates[0]
|
||||
result_base["source"] = str(target)
|
||||
|
||||
lines = []
|
||||
more = False
|
||||
try:
|
||||
with target.open("r") as f:
|
||||
for i, raw in enumerate(f):
|
||||
if i < since:
|
||||
continue
|
||||
if len(lines) >= limit:
|
||||
more = True
|
||||
break
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
continue
|
||||
try:
|
||||
lines.append(json.loads(raw))
|
||||
except json.JSONDecodeError:
|
||||
# Skip malformed lines but keep cursor advancing
|
||||
lines.append({"_parse_error": True, "_raw": raw[:200]})
|
||||
except OSError as exc:
|
||||
logger.warning("transcript_lines: read failed for %s: %s", target, exc)
|
||||
return result_base
|
||||
|
||||
result_base["lines"] = lines
|
||||
result_base["cursor"] = since + len(lines)
|
||||
result_base["more"] = more
|
||||
return result_base
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import CrewAIAdapter
|
||||
|
||||
Adapter = CrewAIAdapter
|
||||
@ -1,144 +0,0 @@
|
||||
"""CrewAI adapter — role-based multi-agent framework with full platform integration.
|
||||
|
||||
Creates a CrewAI Agent + Task + Crew with all platform tools (delegation, memory,
|
||||
sandbox, approval), skills, plugins, and coordinator support.
|
||||
|
||||
Requires: pip install crewai
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _langchain_to_crewai(lc_tool):
|
||||
"""Wrap a LangChain BaseTool as a sync CrewAI @tool.
|
||||
|
||||
CrewAI's @tool decorator requires the function to have a docstring
|
||||
at decoration time, so we set __doc__ before applying the decorator.
|
||||
"""
|
||||
from crewai.tools import tool as crewai_tool
|
||||
|
||||
def wrapper(**kwargs) -> str:
|
||||
"""Placeholder."""
|
||||
result = asyncio.get_event_loop().run_until_complete(lc_tool.ainvoke(kwargs))
|
||||
return str(result)
|
||||
|
||||
wrapper.__name__ = lc_tool.name
|
||||
wrapper.__doc__ = lc_tool.description or f"Tool: {lc_tool.name}"
|
||||
return crewai_tool(lc_tool.name)(wrapper)
|
||||
|
||||
|
||||
class CrewAIAdapter(BaseAdapter):
|
||||
|
||||
def __init__(self):
|
||||
self.system_prompt = None
|
||||
self.crewai_tools = []
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "crewai"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "CrewAI"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "CrewAI — role-based agent with task delegation and crew orchestration"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {"type": "string", "description": "LLM model (e.g. openai:gpt-4.1-mini)"},
|
||||
"skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"},
|
||||
"tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools"},
|
||||
}
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
try:
|
||||
import crewai # noqa: F401
|
||||
logger.info(f"CrewAI version: {crewai.__version__}")
|
||||
except ImportError:
|
||||
raise RuntimeError("crewai not installed.")
|
||||
|
||||
result = await self._common_setup(config)
|
||||
self.system_prompt = result.system_prompt
|
||||
self.crewai_tools = [_langchain_to_crewai(t) for t in result.langchain_tools]
|
||||
logger.info(f"CrewAI tools: {[t.name for t in result.langchain_tools]}")
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
return CrewAIA2AExecutor(
|
||||
model=config.model,
|
||||
system_prompt=self.system_prompt,
|
||||
crewai_tools=self.crewai_tools,
|
||||
heartbeat=config.heartbeat,
|
||||
)
|
||||
|
||||
|
||||
class CrewAIA2AExecutor(AgentExecutor):
|
||||
"""Wraps CrewAI's Agent + Crew.kickoff() with full platform tools."""
|
||||
|
||||
def __init__(self, model: str, system_prompt: str | None, crewai_tools: list, heartbeat=None):
|
||||
self.model = model
|
||||
self.system_prompt = system_prompt
|
||||
self.crewai_tools = crewai_tools
|
||||
self._heartbeat = heartbeat
|
||||
|
||||
async def execute(self, context, event_queue):
|
||||
from a2a.utils import new_agent_text_message
|
||||
from adapters.shared_runtime import extract_history, build_task_text, brief_task, set_current_task
|
||||
|
||||
from adapters.shared_runtime import extract_message_text
|
||||
user_message = extract_message_text(context)
|
||||
|
||||
if not user_message:
|
||||
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
|
||||
return
|
||||
|
||||
await set_current_task(self._heartbeat, brief_task(user_message))
|
||||
|
||||
try:
|
||||
from crewai import Agent, Task, Crew
|
||||
|
||||
model_str = self.model
|
||||
if model_str.startswith("openai:"):
|
||||
model_str = model_str.replace("openai:", "openai/")
|
||||
|
||||
backstory = self.system_prompt or "You are a helpful AI agent."
|
||||
|
||||
history = extract_history(context)
|
||||
task_desc = build_task_text(user_message, history)
|
||||
|
||||
agent = Agent(
|
||||
role=backstory.split("\n")[0][:100],
|
||||
goal="Help the user and coordinate with peer agents when needed",
|
||||
backstory=backstory,
|
||||
llm=model_str,
|
||||
tools=self.crewai_tools,
|
||||
verbose=False,
|
||||
)
|
||||
|
||||
task = Task(
|
||||
description=task_desc,
|
||||
expected_output="A helpful response",
|
||||
agent=agent,
|
||||
)
|
||||
|
||||
crew = Crew(agents=[agent], tasks=[task], verbose=False)
|
||||
result = await asyncio.to_thread(crew.kickoff)
|
||||
reply = str(result)
|
||||
|
||||
except Exception as e:
|
||||
reply = f"CrewAI error: {e}"
|
||||
finally:
|
||||
await set_current_task(self._heartbeat, "")
|
||||
|
||||
await event_queue.enqueue_event(new_agent_text_message(reply))
|
||||
|
||||
async def cancel(self, context, event_queue): # pragma: no cover
|
||||
pass
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import DeepAgentsAdapter
|
||||
|
||||
Adapter = DeepAgentsAdapter
|
||||
@ -1,184 +0,0 @@
|
||||
"""DeepAgents adapter — fully utilizing the DeepAgents SDK.
|
||||
|
||||
Uses create_deep_agent() with:
|
||||
- FilesystemBackend(/workspace) — persistent file access across messages
|
||||
- MemorySaver checkpointer — session continuity
|
||||
- Memory files — CLAUDE.md loaded natively
|
||||
- Filesystem permissions — restrict writes to /workspace and /configs
|
||||
- InMemoryCache — avoid repeat API calls
|
||||
- All built-in tools: write_todos, read_file, write_file, edit_file,
|
||||
ls, glob, grep, execute, task
|
||||
|
||||
Supports: anthropic, openai, openrouter, groq, cerebras, google_genai, ollama.
|
||||
"""
|
||||
|
||||
import os
|
||||
import glob as globmod
|
||||
import logging
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DeepAgentsAdapter(BaseAdapter):
|
||||
|
||||
def __init__(self):
|
||||
self.agent = None
|
||||
self._checkpointer = None
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "deepagents"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "DeepAgents"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "LangChain DeepAgents — planning, filesystem, sub-agents, shell execution, session persistence"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "provider:model (e.g. google_genai:gemini-2.5-flash, groq:llama-3.3-70b-versatile)",
|
||||
"default": "google_genai:gemini-2.5-flash",
|
||||
},
|
||||
"skills": {"type": "array", "items": {"type": "string"}},
|
||||
"tools": {"type": "array", "items": {"type": "string"}},
|
||||
}
|
||||
|
||||
def _create_llm(self, model_str: str):
|
||||
"""Create a LangChain LLM from a provider:model string."""
|
||||
if ":" in model_str:
|
||||
provider, model_name = model_str.split(":", 1)
|
||||
else:
|
||||
provider, model_name = "anthropic", model_str
|
||||
|
||||
if provider == "openai":
|
||||
from langchain_openai import ChatOpenAI
|
||||
kwargs = {"model": model_name}
|
||||
base_url = os.environ.get("OPENAI_BASE_URL", "")
|
||||
if base_url:
|
||||
kwargs["openai_api_base"] = base_url
|
||||
return ChatOpenAI(**kwargs)
|
||||
elif provider == "openrouter":
|
||||
from langchain_openai import ChatOpenAI
|
||||
return ChatOpenAI(
|
||||
model=model_name,
|
||||
openai_api_key=os.environ.get("OPENROUTER_API_KEY", os.environ.get("OPENAI_API_KEY", "")),
|
||||
openai_api_base="https://openrouter.ai/api/v1",
|
||||
max_tokens=int(os.environ.get("MAX_TOKENS", "2048")),
|
||||
)
|
||||
elif provider == "groq":
|
||||
from langchain_openai import ChatOpenAI
|
||||
return ChatOpenAI(
|
||||
model=model_name,
|
||||
openai_api_key=os.environ.get("GROQ_API_KEY", ""),
|
||||
openai_api_base="https://api.groq.com/openai/v1",
|
||||
)
|
||||
elif provider == "cerebras":
|
||||
from langchain_openai import ChatOpenAI
|
||||
return ChatOpenAI(
|
||||
model=model_name,
|
||||
openai_api_key=os.environ.get("CEREBRAS_API_KEY", ""),
|
||||
openai_api_base="https://api.cerebras.ai/v1",
|
||||
)
|
||||
elif provider == "qianfan":
|
||||
from langchain_openai import ChatOpenAI
|
||||
return ChatOpenAI(
|
||||
model=model_name,
|
||||
openai_api_key=os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", "")),
|
||||
openai_api_base="https://qianfan.baidubce.com/v2",
|
||||
)
|
||||
elif provider == "anthropic":
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
kwargs = {"model": model_name}
|
||||
base_url = os.environ.get("ANTHROPIC_BASE_URL", "")
|
||||
if base_url:
|
||||
kwargs["anthropic_api_url"] = base_url
|
||||
return ChatAnthropic(**kwargs)
|
||||
elif provider == "google_genai":
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
return ChatGoogleGenerativeAI(model=model_name)
|
||||
elif provider == "ollama":
|
||||
from langchain_ollama import ChatOllama
|
||||
return ChatOllama(model=model_name)
|
||||
else:
|
||||
raise ValueError(f"Unsupported model provider: {provider}")
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
try:
|
||||
from deepagents import create_deep_agent, FilesystemPermission
|
||||
from deepagents.backends import FilesystemBackend
|
||||
from langgraph.checkpoint.memory import MemorySaver
|
||||
from langchain_core.caches import InMemoryCache
|
||||
except ImportError as e:
|
||||
raise RuntimeError(f"deepagents not installed: {e}")
|
||||
|
||||
result = await self._common_setup(config)
|
||||
logger.info("DeepAgents platform tools: %s", [t.name for t in result.langchain_tools])
|
||||
|
||||
llm = self._create_llm(config.model)
|
||||
|
||||
# FilesystemBackend — persistent file access
|
||||
workspace_dir = "/workspace" if os.path.isdir("/workspace") else "/configs"
|
||||
# virtual_mode=False: read/write the real bind-mounted filesystem so
|
||||
# read_file/ls/write_file/edit_file match what `bash` sees. With
|
||||
# virtual_mode=True agents operate on an in-memory snapshot and
|
||||
# report real files as "missing" (and writes don't persist across
|
||||
# restarts). Permissions below still scope access to /workspace + /configs.
|
||||
backend = FilesystemBackend(root_dir=workspace_dir, virtual_mode=False)
|
||||
|
||||
# MemorySaver — session continuity
|
||||
self._checkpointer = MemorySaver()
|
||||
|
||||
# Memory — load CLAUDE.md natively
|
||||
memory_files = []
|
||||
claude_md = os.path.join(config.config_path, "CLAUDE.md")
|
||||
if os.path.exists(claude_md):
|
||||
memory_files.append(claude_md)
|
||||
|
||||
# Filesystem permissions
|
||||
permissions = [
|
||||
FilesystemPermission(operations=["read", "write"], paths=["/workspace/**"], mode="allow"),
|
||||
FilesystemPermission(operations=["read", "write"], paths=["/configs/**"], mode="allow"),
|
||||
]
|
||||
|
||||
# Native skills from /configs/skills/*.py
|
||||
deepagent_skills = []
|
||||
skills_dir = os.path.join(config.config_path, "skills")
|
||||
if os.path.isdir(skills_dir):
|
||||
deepagent_skills = globmod.glob(os.path.join(skills_dir, "**", "*.py"), recursive=True)
|
||||
|
||||
# LLM cache
|
||||
cache = InMemoryCache()
|
||||
|
||||
self.agent = create_deep_agent(
|
||||
model=llm,
|
||||
tools=result.langchain_tools,
|
||||
system_prompt=result.system_prompt,
|
||||
backend=backend,
|
||||
checkpointer=self._checkpointer,
|
||||
memory=memory_files if memory_files else None,
|
||||
permissions=permissions,
|
||||
skills=deepagent_skills if deepagent_skills else None,
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"DeepAgents: %d tools, backend=%s, checkpointer=MemorySaver, "
|
||||
"cache=InMemoryCache, memory=%d, permissions=%d, skills=%d",
|
||||
len(result.langchain_tools), type(backend).__name__,
|
||||
len(memory_files), len(permissions), len(deepagent_skills),
|
||||
)
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
if self.agent is None:
|
||||
raise RuntimeError("setup() must be called before create_executor()")
|
||||
from a2a_executor import LangGraphA2AExecutor
|
||||
return LangGraphA2AExecutor(self.agent, heartbeat=config.heartbeat, model=config.model)
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import GeminiCLIAdapter as Adapter
|
||||
|
||||
__all__ = ["Adapter"]
|
||||
@ -1,141 +0,0 @@
|
||||
"""Gemini CLI adapter — wraps Google's Gemini CLI as an agent runtime.
|
||||
|
||||
Gemini CLI (github.com/google-gemini/gemini-cli, ~101k stars, Apache 2.0)
|
||||
is structurally identical to the Claude Code adapter: a single-agent agentic
|
||||
CLI with file/shell tools, MCP support, and a ReAct loop — backed by Gemini
|
||||
instead of Claude.
|
||||
|
||||
Key differences from claude-code:
|
||||
- Auth: GEMINI_API_KEY env var (no OAuth token needed)
|
||||
- Memory file: GEMINI.md (equivalent of Claude Code's CLAUDE.md)
|
||||
- MCP config: ~/.gemini/settings.json (not via --mcp-config flag)
|
||||
- Executor: CLIAgentExecutor (no Python SDK; uses gemini CLI subprocess)
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GeminiCLIAdapter(BaseAdapter):
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "gemini-cli"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "Gemini CLI"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return (
|
||||
"Google Gemini CLI — agentic coding with file/shell tools, "
|
||||
"MCP support, and a ReAct loop backed by Gemini models"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "Gemini model (e.g. gemini-2.5-pro, gemini-2.5-flash)",
|
||||
"default": "gemini-2.5-pro",
|
||||
},
|
||||
"required_env": {
|
||||
"type": "array",
|
||||
"description": "Required env vars",
|
||||
"default": ["GEMINI_API_KEY"],
|
||||
},
|
||||
"timeout": {
|
||||
"type": "integer",
|
||||
"description": "Timeout in seconds (0 = no timeout)",
|
||||
"default": 0,
|
||||
},
|
||||
}
|
||||
|
||||
def memory_filename(self) -> str:
|
||||
"""Gemini CLI reads GEMINI.md as its persistent context file."""
|
||||
return "GEMINI.md"
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
"""Wire MCP server into ~/.gemini/settings.json and seed GEMINI.md.
|
||||
|
||||
Gemini CLI does not accept an --mcp-config flag; instead, MCP servers
|
||||
are declared in ~/.gemini/settings.json under the "mcpServers" key.
|
||||
This method merges the A2A MCP server into that file, preserving any
|
||||
existing keys (e.g. user's own MCP tools).
|
||||
|
||||
Also seeds GEMINI.md from system-prompt.md if GEMINI.md is absent,
|
||||
so the agent has role context on first boot.
|
||||
"""
|
||||
from executor_helpers import get_mcp_server_path
|
||||
|
||||
# -- MCP wiring --------------------------------------------------
|
||||
gemini_dir = Path.home() / ".gemini"
|
||||
gemini_dir.mkdir(parents=True, exist_ok=True)
|
||||
settings_path = gemini_dir / "settings.json"
|
||||
|
||||
settings: dict = {}
|
||||
if settings_path.exists():
|
||||
try:
|
||||
settings = json.loads(settings_path.read_text())
|
||||
except Exception as exc:
|
||||
logger.warning("gemini-cli: could not parse %s: %s", settings_path, exc)
|
||||
settings = {}
|
||||
|
||||
settings.setdefault("mcpServers", {})
|
||||
settings["mcpServers"]["a2a"] = {
|
||||
"command": sys.executable,
|
||||
"args": [get_mcp_server_path()],
|
||||
}
|
||||
|
||||
try:
|
||||
settings_path.write_text(json.dumps(settings, indent=2))
|
||||
logger.info("gemini-cli: wrote MCP config to %s", settings_path)
|
||||
except OSError as exc:
|
||||
logger.warning("gemini-cli: could not write %s: %s", settings_path, exc)
|
||||
|
||||
# -- GEMINI.md seed ----------------------------------------------
|
||||
gemini_md = Path(config.config_path) / "GEMINI.md"
|
||||
system_prompt_file = Path(config.config_path) / "system-prompt.md"
|
||||
if not gemini_md.exists() and system_prompt_file.exists():
|
||||
try:
|
||||
gemini_md.write_text(system_prompt_file.read_text())
|
||||
logger.info("gemini-cli: seeded GEMINI.md from system-prompt.md")
|
||||
except OSError as exc:
|
||||
logger.warning("gemini-cli: could not seed GEMINI.md: %s", exc)
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
from cli_executor import CLIAgentExecutor
|
||||
from config import RuntimeConfig
|
||||
|
||||
rc = config.runtime_config
|
||||
if isinstance(rc, dict):
|
||||
model = rc.get("model") or "gemini-2.5-pro"
|
||||
timeout = int(rc.get("timeout") or 0)
|
||||
else:
|
||||
model = getattr(rc, "model", None) or "gemini-2.5-pro"
|
||||
timeout = int(getattr(rc, "timeout", None) or 0)
|
||||
|
||||
runtime_config = RuntimeConfig(
|
||||
model=model,
|
||||
timeout=timeout,
|
||||
required_env=["GEMINI_API_KEY"],
|
||||
)
|
||||
|
||||
return CLIAgentExecutor(
|
||||
runtime="gemini-cli",
|
||||
runtime_config=runtime_config,
|
||||
system_prompt=config.system_prompt,
|
||||
config_path=config.config_path,
|
||||
heartbeat=config.heartbeat,
|
||||
)
|
||||
@ -1,6 +0,0 @@
|
||||
from .adapter import HermesAdapter
|
||||
from .executor import create_executor
|
||||
|
||||
Adapter = HermesAdapter
|
||||
|
||||
__all__ = ["create_executor", "HermesAdapter", "Adapter"]
|
||||
@ -1,76 +0,0 @@
|
||||
"""Hermes adapter — Nous Research Hermes models via Nous Portal or OpenRouter.
|
||||
|
||||
Uses the OpenAI-compatible client (openai>=1.0.0) to communicate with
|
||||
either the Nous Portal directly (HERMES_API_KEY) or OpenRouter as a
|
||||
fallback (OPENROUTER_API_KEY).
|
||||
"""
|
||||
import os
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
|
||||
|
||||
class HermesAdapter(BaseAdapter):
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "hermes"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "Hermes (Nous Research)"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "Hermes models via Nous Portal or OpenRouter — openai>=1.0.0 compatible client"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Hermes model ID (e.g. nousresearch/hermes-3-llama-3.1-405b for OpenRouter "
|
||||
"or hermes-3-llama-3.1-405b for Nous Portal)"
|
||||
),
|
||||
},
|
||||
}
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None: # pragma: no cover
|
||||
try:
|
||||
import openai # noqa: F401
|
||||
except ImportError as e:
|
||||
raise RuntimeError(
|
||||
"Hermes adapter requires openai>=1.0.0 — "
|
||||
"install with: pip install 'openai>=1.0.0'"
|
||||
) from e
|
||||
|
||||
async def create_executor(self, config: AdapterConfig): # pragma: no cover
|
||||
"""Create and return a HermesA2AExecutor using key resolution from env/config."""
|
||||
from .executor import create_executor, HermesA2AExecutor
|
||||
|
||||
# Resolve API key: prefer workspace secrets (runtime_config), then env vars
|
||||
hermes_api_key = config.runtime_config.get("hermes_api_key") or None
|
||||
|
||||
# Phase 3 escalation ladder — read from runtime_config.escalation_ladder
|
||||
# if present. The platform's org importer copies the ladder from
|
||||
# org.yaml (runtime_config.escalation_ladder) into the container's
|
||||
# /configs/config.yaml, and the workspace-template loader surfaces it
|
||||
# here. Empty / missing = single-shot behaviour (unchanged from pre-
|
||||
# Phase-3). See adapters.hermes.escalation for classification rules.
|
||||
escalation_ladder = config.runtime_config.get("escalation_ladder") or None
|
||||
|
||||
executor = create_executor(
|
||||
hermes_api_key=hermes_api_key,
|
||||
config_path=config.config_path, # Phase 2d-i: system-prompt.md injection
|
||||
escalation_ladder=escalation_ladder,
|
||||
)
|
||||
|
||||
# Override model from config if provided
|
||||
model = config.model
|
||||
if ":" in model:
|
||||
_, model = model.split(":", 1)
|
||||
if model:
|
||||
executor.model = model
|
||||
|
||||
executor._heartbeat = config.heartbeat
|
||||
return executor
|
||||
@ -1,201 +0,0 @@
|
||||
"""Hermes escalation ladder — promote to stronger models on transient failure.
|
||||
|
||||
Every workspace in the Hermes adapter path has a single pinned model today
|
||||
(``provider_cfg.default_model`` overridden by ``runtime_config.model`` in
|
||||
``config.yaml``). That's fine when the pinned model is the best fit, but
|
||||
it leaves four recurring failure classes unhandled:
|
||||
|
||||
1. **Rate limits** (Claude Max saturation, Anthropic 429, OpenAI 429). We're
|
||||
currently saturating 3× Claude Max subscriptions — the first 429 is now
|
||||
the norm, not the exception.
|
||||
2. **Transient 5xx** from any provider (overloaded 529, 500, 502, 503).
|
||||
3. **Context-length exceeded** on the smaller-window model (Haiku has 200k,
|
||||
cheaper Gemini flash tiers have less, OpenAI nano/mini have 128k).
|
||||
4. **Refusal / empty response** from a cheaper tier that the next tier up
|
||||
would handle — less common but real in practice.
|
||||
|
||||
An escalation ladder is a workspace-configured list of ``LadderRung`` entries
|
||||
(provider + model). On a qualifying failure, the executor advances to the
|
||||
next rung and retries the same user_message + history. If the ladder is
|
||||
exhausted, the last error is raised.
|
||||
|
||||
## Config shape
|
||||
|
||||
``config.yaml``::
|
||||
|
||||
hermes:
|
||||
escalation_ladder:
|
||||
- provider: gemini
|
||||
model: gemini-2.5-flash # fast/cheap probe
|
||||
- provider: anthropic
|
||||
model: claude-haiku-4-5-20251001
|
||||
- provider: anthropic
|
||||
model: claude-sonnet-4-5-20250929
|
||||
- provider: anthropic
|
||||
model: claude-opus-4-1-20250805 # frontier rescue
|
||||
|
||||
When ``escalation_ladder`` is absent, the executor behaves exactly as before:
|
||||
one call, one model, errors bubble.
|
||||
|
||||
## What this module does NOT do (yet)
|
||||
|
||||
- **No uncertainty-driven escalation.** Only transient-failure escalation.
|
||||
Promoting on "the answer felt thin" requires a judge pass — follow-up.
|
||||
- **No streaming partial-result aggregation.** The first rung that succeeds
|
||||
returns; we don't splice responses across rungs.
|
||||
- **No per-workspace budget tracking.** Each escalation is one more paid
|
||||
call. Follow-up work (#305 budget cap) handles that.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LadderRung:
|
||||
"""One rung on the escalation ladder.
|
||||
|
||||
``provider`` is a canonical short name from ``providers.PROVIDERS``.
|
||||
``model`` overrides the provider's default for this rung.
|
||||
"""
|
||||
|
||||
provider: str
|
||||
model: str
|
||||
|
||||
|
||||
def parse_ladder(raw: Optional[list]) -> list[LadderRung]:
|
||||
"""Parse the ``escalation_ladder`` list from ``config.yaml`` into rungs.
|
||||
|
||||
Accepts either dict-shaped entries (``{"provider": ..., "model": ...}``)
|
||||
or pre-built LadderRung instances (for programmatic callers). Skips
|
||||
malformed entries with a warning rather than raising — a missing rung
|
||||
is worse than a noisy one during boot.
|
||||
|
||||
Empty / None / missing input returns an empty list (caller interprets
|
||||
as "no ladder configured, single-shot dispatch").
|
||||
"""
|
||||
if not raw:
|
||||
return []
|
||||
rungs: list[LadderRung] = []
|
||||
for i, entry in enumerate(raw):
|
||||
if isinstance(entry, LadderRung):
|
||||
rungs.append(entry)
|
||||
continue
|
||||
if not isinstance(entry, dict):
|
||||
logger.warning(
|
||||
"Hermes ladder: rung %d is not a dict (%r), skipping", i, type(entry).__name__,
|
||||
)
|
||||
continue
|
||||
provider = entry.get("provider")
|
||||
model = entry.get("model")
|
||||
if not provider or not model:
|
||||
logger.warning(
|
||||
"Hermes ladder: rung %d missing provider or model (%r), skipping", i, entry,
|
||||
)
|
||||
continue
|
||||
rungs.append(LadderRung(provider=str(provider), model=str(model)))
|
||||
return rungs
|
||||
|
||||
|
||||
# Error-type names that indicate a transient failure worth escalating.
|
||||
# We match on the class name (not the module) so this works regardless of
|
||||
# whether the workspace imported the new or old anthropic / openai SDK.
|
||||
# See ``should_escalate`` for the matching logic.
|
||||
_ESCALATABLE_ERROR_CLASSES = frozenset({
|
||||
# openai SDK
|
||||
"RateLimitError", # 429
|
||||
"APITimeoutError", # connect/read timeout
|
||||
"APIConnectionError", # TCP / DNS
|
||||
"InternalServerError", # 500
|
||||
# anthropic SDK
|
||||
"OverloadedError", # 529
|
||||
"APIStatusError", # generic 5xx wrapper
|
||||
# common across both: network-level errors
|
||||
"ConnectionError",
|
||||
"Timeout",
|
||||
"ReadTimeout",
|
||||
})
|
||||
|
||||
# Error-message substrings that indicate context-length exceeded. These map
|
||||
# to distinct HTTP 400 responses from each provider rather than a typed
|
||||
# exception, so we match on substring.
|
||||
_CONTEXT_LENGTH_MARKERS = (
|
||||
"maximum context length", # openai
|
||||
"context_length_exceeded", # openai error.code
|
||||
"prompt is too long", # anthropic
|
||||
"prompt_too_long", # anthropic error.code
|
||||
"context window", # gemini
|
||||
)
|
||||
|
||||
# Error-message substrings that indicate a transient gateway issue. These
|
||||
# sometimes come through as generic exceptions without typed classes.
|
||||
_TRANSIENT_GATEWAY_MARKERS = (
|
||||
"502 bad gateway",
|
||||
"503 service unavailable",
|
||||
"504 gateway timeout",
|
||||
"overloaded",
|
||||
"please try again",
|
||||
"temporarily unavailable",
|
||||
)
|
||||
|
||||
# Error-message substrings that definitively DO NOT qualify for escalation.
|
||||
# Auth and malformed-payload errors don't get better by retrying on a
|
||||
# different model — they indicate config / code bugs.
|
||||
_NON_ESCALATABLE_MARKERS = (
|
||||
"invalid api key",
|
||||
"authentication_error",
|
||||
"401",
|
||||
"403",
|
||||
"forbidden",
|
||||
"permission_denied",
|
||||
"unauthorized",
|
||||
)
|
||||
|
||||
|
||||
def should_escalate(exc: BaseException) -> bool:
|
||||
"""Decide whether ``exc`` justifies moving to the next ladder rung.
|
||||
|
||||
Returns True when the failure is one of:
|
||||
- Rate limit (429 / RateLimitError / OverloadedError)
|
||||
- Transient gateway (5xx, overload, timeout, connection reset)
|
||||
- Context-length exceeded on the current model
|
||||
|
||||
Returns False for auth, permission, malformed-payload, and other
|
||||
config-bug classes — escalating those just wastes the next-tier quota.
|
||||
"""
|
||||
if exc is None:
|
||||
return False
|
||||
|
||||
cls_name = exc.__class__.__name__
|
||||
msg = str(exc).lower()
|
||||
|
||||
# Hard reject: never escalate auth/permission errors regardless of
|
||||
# what the class name says. A wrapped RateLimitError that actually
|
||||
# contains "401 Unauthorized" is a config bug, not a rate limit.
|
||||
for marker in _NON_ESCALATABLE_MARKERS:
|
||||
if marker in msg:
|
||||
return False
|
||||
|
||||
if cls_name in _ESCALATABLE_ERROR_CLASSES:
|
||||
return True
|
||||
|
||||
for marker in _CONTEXT_LENGTH_MARKERS:
|
||||
if marker in msg:
|
||||
return True
|
||||
|
||||
for marker in _TRANSIENT_GATEWAY_MARKERS:
|
||||
if marker in msg:
|
||||
return True
|
||||
|
||||
# Status-code prefixes are a common tell for HTTP-wrapped provider errors.
|
||||
if "429" in msg or "529" in msg:
|
||||
return True
|
||||
if any(code in msg for code in ("500 ", "502 ", "503 ", "504 ")):
|
||||
return True
|
||||
|
||||
return False
|
||||
@ -1,543 +0,0 @@
|
||||
"""Hermes adapter executor — Phase 2 multi-provider with native SDK dispatch.
|
||||
|
||||
Hermes supports 15 providers via the shared ``providers.py`` registry. Each
|
||||
provider's ``auth_scheme`` field controls which client + request shape the
|
||||
executor uses:
|
||||
|
||||
- ``auth_scheme="openai"`` (13 providers) — OpenAI-compat ``/v1/chat/completions``
|
||||
via the ``openai`` Python SDK. Covers: Nous Portal, OpenRouter, OpenAI, xAI,
|
||||
Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, Fireworks, Mistral.
|
||||
|
||||
- ``auth_scheme="anthropic"`` (1 provider — anthropic) — native Messages API via
|
||||
the ``anthropic`` Python SDK. Phase 2a: better tool calling, vision support,
|
||||
extended thinking semantics. If the ``anthropic`` package isn't installed in
|
||||
the workspace image, ``_do_anthropic_native`` raises a clear error with
|
||||
install instructions rather than silently falling back to the OpenAI-compat
|
||||
shim (which would lose fidelity invisibly).
|
||||
|
||||
- ``auth_scheme="gemini"`` (1 provider — gemini) — native ``generateContent`` API
|
||||
via the official ``google-genai`` Python SDK. Phase 2b: first-class vision
|
||||
content blocks, tool/function calling, system instructions, and thinking
|
||||
config — all of which the OpenAI-compat shim at ``/v1beta/openai`` either
|
||||
strips or mis-translates. Same fail-loud semantics as the anthropic path.
|
||||
|
||||
Key resolution order (unchanged from Phase 1)
|
||||
----------------------------------------------
|
||||
1. ``hermes_api_key`` parameter (explicit call-site override — routes to Nous Portal)
|
||||
2. ``provider`` parameter (explicit provider name — looks up its env var(s))
|
||||
3. Auto-detect: walk ``providers.RESOLUTION_ORDER`` and pick the first provider
|
||||
whose env var is set.
|
||||
|
||||
Raises ``ValueError`` if nothing resolves. The error message lists every env var
|
||||
that was checked so the operator knows their options without reading source.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from .escalation import LadderRung, parse_ladder, should_escalate
|
||||
from .providers import PROVIDERS, ProviderConfig, resolve_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_executor(
|
||||
hermes_api_key: Optional[str] = None,
|
||||
provider: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
config_path: Optional[str] = None,
|
||||
escalation_ladder: Optional[list] = None,
|
||||
):
|
||||
"""Create and return a LangGraph-compatible executor for the Hermes adapter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
hermes_api_key:
|
||||
Explicit API key. When provided, the call routes to Nous Portal (the
|
||||
PR 2 back-compat path) regardless of ``provider``.
|
||||
provider:
|
||||
Canonical provider short name from ``providers.PROVIDERS`` (e.g.
|
||||
``"openai"``, ``"anthropic"``, ``"qwen"``, ``"xai"``). When set, the
|
||||
registry entry's env vars are used to find the API key and its
|
||||
base URL + default model override the auto-detect path. When unset,
|
||||
auto-detect walks ``providers.RESOLUTION_ORDER`` until it finds a
|
||||
provider whose env var is set.
|
||||
model:
|
||||
Override the provider's default model. Passed straight through to
|
||||
``chat.completions.create``.
|
||||
config_path:
|
||||
Path to the workspace's ``/configs`` directory. Phase 2d-i reads
|
||||
``system-prompt.md`` from here on every ``execute()`` call and
|
||||
passes the content as a system instruction to the native SDK.
|
||||
Optional — omit to skip system-prompt injection (tests do this).
|
||||
|
||||
Returns
|
||||
-------
|
||||
HermesA2AExecutor
|
||||
A ready-to-use executor wired with the resolved api_key + base_url
|
||||
+ model + config_path.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If ``provider`` is an unknown name, if ``provider`` is known but its
|
||||
env vars are all empty, or if auto-detect finds nothing.
|
||||
"""
|
||||
ladder = parse_ladder(escalation_ladder)
|
||||
if ladder:
|
||||
logger.info(
|
||||
"Hermes: escalation ladder configured — %d rungs (%s)",
|
||||
len(ladder),
|
||||
" → ".join(f"{r.provider}:{r.model}" for r in ladder),
|
||||
)
|
||||
|
||||
# Path 1: PR 2 back-compat — explicit hermes_api_key routes to Nous Portal.
|
||||
if hermes_api_key:
|
||||
cfg = PROVIDERS["nous_portal"]
|
||||
logger.debug("Hermes: using explicit hermes_api_key param (Nous Portal)")
|
||||
return HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key=hermes_api_key,
|
||||
model=model or cfg.default_model,
|
||||
config_path=config_path,
|
||||
escalation_ladder=ladder,
|
||||
)
|
||||
|
||||
# Path 2/3: registry resolution (either explicit provider name or auto-detect).
|
||||
cfg, api_key = resolve_provider(provider)
|
||||
logger.info(
|
||||
"Hermes: provider=%s auth_scheme=%s base_url=%s model=%s",
|
||||
cfg.name,
|
||||
cfg.auth_scheme,
|
||||
cfg.base_url,
|
||||
model or cfg.default_model,
|
||||
)
|
||||
return HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key=api_key,
|
||||
model=model or cfg.default_model,
|
||||
config_path=config_path,
|
||||
escalation_ladder=ladder,
|
||||
)
|
||||
|
||||
|
||||
class HermesA2AExecutor:
|
||||
"""LangGraph-compatible AgentExecutor for Hermes-style multi-provider LLMs.
|
||||
|
||||
Dispatches each inference call based on ``provider_cfg.auth_scheme``:
|
||||
|
||||
- ``"openai"`` → OpenAI-compat ``/v1/chat/completions`` via the ``openai`` SDK
|
||||
- ``"anthropic"`` → native Messages API via the ``anthropic`` SDK
|
||||
|
||||
The ``execute()`` and ``cancel()`` async methods satisfy the
|
||||
``a2a.server.agent_execution.AgentExecutor`` interface so this
|
||||
executor can be dropped into the A2A server's DefaultRequestHandler.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider_cfg: ProviderConfig,
|
||||
api_key: str,
|
||||
model: str,
|
||||
heartbeat=None,
|
||||
config_path: Optional[str] = None,
|
||||
escalation_ladder: Optional[list] = None,
|
||||
):
|
||||
self.provider_cfg = provider_cfg
|
||||
self.api_key = api_key
|
||||
self.base_url = provider_cfg.base_url
|
||||
self.model = model
|
||||
self._heartbeat = heartbeat
|
||||
# Phase 2d-i: config_path lets execute() read /configs/system-prompt.md
|
||||
# on each turn and pass it to the native SDK's `system=` /
|
||||
# `system_instruction=` / prepended message. Optional because older
|
||||
# callers + tests construct executors directly.
|
||||
self._config_path = config_path
|
||||
# Phase 3: escalation ladder. When non-empty, _do_inference retries
|
||||
# transient-failure classes (rate limit, 5xx, overload, context-length)
|
||||
# on each rung in turn before raising. Empty / None = single-shot,
|
||||
# original behaviour. See adapters.hermes.escalation.
|
||||
self._ladder: list[LadderRung] = parse_ladder(escalation_ladder) or []
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# History → provider-specific message list converters
|
||||
# ------------------------------------------------------------------
|
||||
#
|
||||
# The A2A shared runtime gives us history as ``list[tuple[str, str]]``
|
||||
# with roles ``"human"`` / ``"ai"``. Each provider wants a different
|
||||
# shape:
|
||||
#
|
||||
# OpenAI-compat: [{"role":"user"|"assistant", "content": str}, ...]
|
||||
# Anthropic: [{"role":"user"|"assistant", "content": str}, ...] (same)
|
||||
# Gemini: [{"role":"user"|"model", "parts": [{"text": str}]}, ...]
|
||||
#
|
||||
# Before Phase 2c these were flattened into a single user turn via
|
||||
# ``shared_runtime.build_task_text``, which worked for basic text
|
||||
# handoff but lost the model's native multi-turn awareness (system
|
||||
# prompts, tool-use history, role attribution for instruction
|
||||
# following). Phase 2c keeps the turns as turns.
|
||||
|
||||
@staticmethod
|
||||
def _history_to_openai_messages(
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]]",
|
||||
) -> "list[dict]":
|
||||
"""Convert A2A history + current turn to OpenAI Chat Completions shape."""
|
||||
messages: list[dict] = []
|
||||
for role, text in history or []:
|
||||
messages.append({
|
||||
"role": "user" if role == "human" else "assistant",
|
||||
"content": text,
|
||||
})
|
||||
messages.append({"role": "user", "content": user_message})
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _history_to_anthropic_messages(
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]]",
|
||||
) -> "list[dict]":
|
||||
"""Convert A2A history + current turn to Anthropic Messages API shape.
|
||||
|
||||
Identical wire format to OpenAI (``role`` + ``content``) for text-only
|
||||
turns, so we just delegate. The difference matters for tool_use /
|
||||
content blocks, which are Phase 2d territory.
|
||||
"""
|
||||
return HermesA2AExecutor._history_to_openai_messages(user_message, history)
|
||||
|
||||
@staticmethod
|
||||
def _history_to_gemini_contents(
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]]",
|
||||
) -> "list[dict]":
|
||||
"""Convert A2A history + current turn to Gemini generateContent shape.
|
||||
|
||||
Gemini uses ``role: "user" | "model"`` (NOT "assistant") and wraps
|
||||
text in a ``parts: [{"text": ...}]`` list.
|
||||
"""
|
||||
contents: list[dict] = []
|
||||
for role, text in history or []:
|
||||
contents.append({
|
||||
"role": "user" if role == "human" else "model",
|
||||
"parts": [{"text": text}],
|
||||
})
|
||||
contents.append({"role": "user", "parts": [{"text": user_message}]})
|
||||
return contents
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Per-provider inference paths
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def _do_openai_compat(
|
||||
self,
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]] | None" = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> str:
|
||||
"""OpenAI-compat inference — used by every provider with auth_scheme='openai'.
|
||||
|
||||
13 of the 15 registered providers route here. Uses ``openai.AsyncOpenAI``
|
||||
pointed at the provider's base_url; every provider's API is wire-
|
||||
compatible with the OpenAI Chat Completions shape.
|
||||
|
||||
Phase 2c: accepts multi-turn history.
|
||||
Phase 2d-i: accepts optional system_prompt, prepended as a
|
||||
``{"role":"system"}`` message per the OpenAI Chat Completions convention.
|
||||
"""
|
||||
import openai
|
||||
|
||||
client = openai.AsyncOpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
)
|
||||
messages = self._history_to_openai_messages(user_message, history or [])
|
||||
if system_prompt:
|
||||
messages = [{"role": "system", "content": system_prompt}, *messages]
|
||||
response = await client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=messages,
|
||||
)
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
async def _do_anthropic_native(
|
||||
self,
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]] | None" = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Native Anthropic Messages API inference.
|
||||
|
||||
Uses the official ``anthropic`` Python SDK for correct tool-calling,
|
||||
vision, and extended-thinking semantics that don't translate cleanly
|
||||
through the OpenAI-compat shim.
|
||||
|
||||
Phase 2a: single-turn text.
|
||||
Phase 2c: multi-turn history.
|
||||
Phase 2d-i: optional system_prompt passed via Anthropic's native
|
||||
top-level ``system=`` parameter — NOT as a message in the messages
|
||||
list (Anthropic's Messages API requires system prompts to be at the
|
||||
top level, not inline like OpenAI).
|
||||
"""
|
||||
try:
|
||||
import anthropic
|
||||
except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk
|
||||
raise RuntimeError(
|
||||
"Hermes anthropic native path requires the `anthropic` package. "
|
||||
"Install in the workspace image with `pip install anthropic>=0.39.0` "
|
||||
"or set HERMES provider=openrouter to route Claude models through "
|
||||
"OpenRouter's OpenAI-compat shim instead."
|
||||
) from exc
|
||||
|
||||
client = anthropic.AsyncAnthropic(api_key=self.api_key)
|
||||
messages = self._history_to_anthropic_messages(user_message, history or [])
|
||||
create_kwargs: dict = {
|
||||
"model": self.model,
|
||||
"max_tokens": 4096,
|
||||
"messages": messages,
|
||||
}
|
||||
if system_prompt:
|
||||
create_kwargs["system"] = system_prompt
|
||||
response = await client.messages.create(**create_kwargs)
|
||||
# response.content is a list of ContentBlock; for text-only the first
|
||||
# block is a TextBlock with a .text attribute.
|
||||
if response.content and hasattr(response.content[0], "text"):
|
||||
return response.content[0].text
|
||||
return ""
|
||||
|
||||
async def _do_gemini_native(
|
||||
self,
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]] | None" = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Native Google Gemini ``generateContent`` inference.
|
||||
|
||||
Uses the official ``google-genai`` Python SDK for correct vision
|
||||
content blocks, tool/function calling, system instructions, and
|
||||
thinking config. These all get stripped or mis-translated through
|
||||
the OpenAI-compat ``/v1beta/openai`` shim.
|
||||
|
||||
Phase 2b: single-turn text.
|
||||
Phase 2c: multi-turn history via Gemini's ``contents=[{role,parts}]``
|
||||
shape (note: role is ``"user"`` / ``"model"``, NOT ``"assistant"``).
|
||||
Phase 2d-i: system_prompt passed via native
|
||||
``config.system_instruction`` — Gemini's top-level system field.
|
||||
"""
|
||||
try:
|
||||
from google import genai # type: ignore[import-not-found]
|
||||
from google.genai import types as genai_types # type: ignore[import-not-found]
|
||||
except ImportError as exc: # pragma: no cover — exercised by test_missing_sdk
|
||||
raise RuntimeError(
|
||||
"Hermes gemini native path requires the `google-genai` package. "
|
||||
"Install in the workspace image with `pip install google-genai>=1.0.0` "
|
||||
"or set HERMES provider=openrouter to route Gemini models through "
|
||||
"OpenRouter's OpenAI-compat shim instead."
|
||||
) from exc
|
||||
|
||||
client = genai.Client(api_key=self.api_key)
|
||||
contents = self._history_to_gemini_contents(user_message, history or [])
|
||||
generate_kwargs: dict = {
|
||||
"model": self.model,
|
||||
"contents": contents,
|
||||
}
|
||||
if system_prompt:
|
||||
generate_kwargs["config"] = genai_types.GenerateContentConfig(
|
||||
system_instruction=system_prompt,
|
||||
)
|
||||
response = await client.aio.models.generate_content(**generate_kwargs)
|
||||
# response.text is the flattened text across all parts of the first
|
||||
# candidate. For text-only that's the whole reply.
|
||||
return response.text or ""
|
||||
|
||||
async def _do_inference(
|
||||
self,
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]] | None" = None,
|
||||
system_prompt: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Dispatch to the right inference path based on provider auth_scheme.
|
||||
|
||||
Phase 2c: multi-turn history.
|
||||
Phase 2d-i: optional system_prompt is passed through to the native
|
||||
system field of whichever path wins dispatch.
|
||||
Phase 3: when an escalation ladder is configured, transient failures
|
||||
(rate limit, 5xx, overload, context-length) promote to the next rung
|
||||
before raising. No ladder = single-shot, original behaviour.
|
||||
"""
|
||||
# Fast path: no ladder configured — single call on the pinned model.
|
||||
if not self._ladder:
|
||||
return await self._dispatch(
|
||||
self.provider_cfg, self.model, user_message, history, system_prompt,
|
||||
)
|
||||
|
||||
# Slow path: walk the ladder. Start with the pinned (provider, model)
|
||||
# so the first attempt matches non-ladder behaviour exactly — the
|
||||
# ladder only kicks in when the first attempt fails escalatably.
|
||||
attempts: list[tuple[ProviderConfig, str]] = [(self.provider_cfg, self.model)]
|
||||
for rung in self._ladder:
|
||||
rung_cfg = PROVIDERS.get(rung.provider)
|
||||
if rung_cfg is None:
|
||||
logger.warning(
|
||||
"Hermes ladder: provider %r not in registry, skipping rung",
|
||||
rung.provider,
|
||||
)
|
||||
continue
|
||||
attempts.append((rung_cfg, rung.model))
|
||||
|
||||
last_exc: Optional[BaseException] = None
|
||||
for i, (cfg, model) in enumerate(attempts):
|
||||
try:
|
||||
reply = await self._dispatch(
|
||||
cfg, model, user_message, history, system_prompt,
|
||||
)
|
||||
if i > 0:
|
||||
logger.info(
|
||||
"Hermes ladder: succeeded on rung %d (%s:%s) after %d failed attempt(s)",
|
||||
i, cfg.name, model, i,
|
||||
)
|
||||
return reply
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
if i == len(attempts) - 1:
|
||||
logger.error(
|
||||
"Hermes ladder: exhausted all %d rungs — raising. Last error on %s:%s: %s",
|
||||
len(attempts), cfg.name, model, exc,
|
||||
)
|
||||
raise
|
||||
if not should_escalate(exc):
|
||||
logger.info(
|
||||
"Hermes ladder: non-escalatable error on %s:%s — raising without advancing: %s",
|
||||
cfg.name, model, exc,
|
||||
)
|
||||
raise
|
||||
logger.warning(
|
||||
"Hermes ladder: escalatable failure on rung %d (%s:%s), advancing. Error: %s",
|
||||
i, cfg.name, model, exc,
|
||||
)
|
||||
|
||||
# Unreachable — the last iteration either returns or raises, but
|
||||
# satisfying the type checker without a blank return.
|
||||
if last_exc is not None:
|
||||
raise last_exc
|
||||
return "" # pragma: no cover
|
||||
|
||||
async def _dispatch(
|
||||
self,
|
||||
cfg: ProviderConfig,
|
||||
model: str,
|
||||
user_message: str,
|
||||
history: "list[tuple[str, str]] | None",
|
||||
system_prompt: Optional[str],
|
||||
) -> str:
|
||||
"""Single-attempt dispatch on (cfg, model).
|
||||
|
||||
Temporarily rebinds ``self.provider_cfg`` + ``self.base_url`` + ``self.model``
|
||||
so the existing per-provider paths pick up the rung's config. Restores
|
||||
the original values in a finally block so a raised error leaves the
|
||||
executor pinned to its constructor-given state (next call on the same
|
||||
executor instance starts fresh at the top of the ladder).
|
||||
|
||||
For the ladder's non-first rungs, ``self.api_key`` must be the rung's
|
||||
provider key — we resolve it here via ``resolve_provider`` so the
|
||||
first-rung API key (for the pinned provider) isn't mis-used against a
|
||||
different provider's base URL. That lookup can raise ``ValueError``
|
||||
when the rung's env var isn't set; ``should_escalate(ValueError)``
|
||||
returns False so the ladder correctly STOPS rather than escalating
|
||||
further into nothing.
|
||||
"""
|
||||
# Fast path: rung matches the executor's pinned config — reuse the
|
||||
# existing api_key, skip the provider re-resolve.
|
||||
if cfg is self.provider_cfg and model == self.model:
|
||||
scheme = cfg.auth_scheme
|
||||
if scheme == "anthropic":
|
||||
return await self._do_anthropic_native(user_message, history, system_prompt)
|
||||
if scheme == "gemini":
|
||||
return await self._do_gemini_native(user_message, history, system_prompt)
|
||||
if scheme == "openai":
|
||||
return await self._do_openai_compat(user_message, history, system_prompt)
|
||||
logger.warning(
|
||||
"Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat",
|
||||
scheme, cfg.name,
|
||||
)
|
||||
return await self._do_openai_compat(user_message, history, system_prompt)
|
||||
|
||||
# Different rung — temporarily rebind provider_cfg + model + api_key.
|
||||
# resolve_provider reads the rung's env vars fresh.
|
||||
_, rung_key = resolve_provider(cfg.name)
|
||||
orig_cfg, orig_model, orig_key, orig_base = (
|
||||
self.provider_cfg, self.model, self.api_key, self.base_url,
|
||||
)
|
||||
try:
|
||||
self.provider_cfg = cfg
|
||||
self.model = model
|
||||
self.api_key = rung_key
|
||||
self.base_url = cfg.base_url
|
||||
scheme = cfg.auth_scheme
|
||||
if scheme == "anthropic":
|
||||
return await self._do_anthropic_native(user_message, history, system_prompt)
|
||||
if scheme == "gemini":
|
||||
return await self._do_gemini_native(user_message, history, system_prompt)
|
||||
if scheme == "openai":
|
||||
return await self._do_openai_compat(user_message, history, system_prompt)
|
||||
logger.warning(
|
||||
"Hermes: unknown auth_scheme=%r for provider=%s — falling back to openai-compat",
|
||||
scheme, cfg.name,
|
||||
)
|
||||
return await self._do_openai_compat(user_message, history, system_prompt)
|
||||
finally:
|
||||
self.provider_cfg = orig_cfg
|
||||
self.model = orig_model
|
||||
self.api_key = orig_key
|
||||
self.base_url = orig_base
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# AgentExecutor interface
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def execute(self, context, event_queue): # pragma: no cover
|
||||
"""Execute a Hermes inference request and push the reply to event_queue.
|
||||
|
||||
Phase 2c: multi-turn history.
|
||||
Phase 2d-i: reads ``/configs/system-prompt.md`` via
|
||||
``executor_helpers.get_system_prompt`` each turn (supports hot-reload)
|
||||
and passes the text to the dispatch layer. Each provider path uses
|
||||
its native system field — Anthropic's top-level ``system=``, Gemini's
|
||||
``system_instruction=`` via ``GenerateContentConfig``, or OpenAI's
|
||||
``{"role":"system"}`` message at the head of the messages list.
|
||||
"""
|
||||
from a2a.utils import new_agent_text_message
|
||||
from adapters.shared_runtime import (
|
||||
brief_task,
|
||||
extract_history,
|
||||
extract_message_text,
|
||||
set_current_task,
|
||||
)
|
||||
from executor_helpers import get_system_prompt
|
||||
|
||||
user_message = extract_message_text(context)
|
||||
if not user_message:
|
||||
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
|
||||
return
|
||||
|
||||
await set_current_task(self._heartbeat, brief_task(user_message))
|
||||
|
||||
try:
|
||||
history = extract_history(context)
|
||||
system_prompt = (
|
||||
get_system_prompt(self._config_path) if self._config_path else None
|
||||
)
|
||||
reply = await self._do_inference(user_message, history, system_prompt)
|
||||
except Exception as exc:
|
||||
logger.exception("Hermes executor error: %s", exc)
|
||||
reply = f"Hermes error: {exc}"
|
||||
finally:
|
||||
await set_current_task(self._heartbeat, "")
|
||||
|
||||
await event_queue.enqueue_event(new_agent_text_message(reply))
|
||||
|
||||
async def cancel(self, context, event_queue): # pragma: no cover
|
||||
"""No-op cancel — Hermes requests are not cancellable mid-flight."""
|
||||
pass
|
||||
@ -1,298 +0,0 @@
|
||||
"""Hermes adapter provider registry — Phase 1 of the multi-provider expansion.
|
||||
|
||||
Extends the original PR-2 Hermes executor (Nous Portal + OpenRouter only) to a
|
||||
registry of 12 providers. Every provider in this registry is reached via its
|
||||
OpenAI-compat endpoint, which means the existing ``openai.AsyncOpenAI`` client
|
||||
and request shape in ``executor.py`` Just Works without any new dependencies.
|
||||
|
||||
Native SDK paths (Anthropic Messages API, Gemini generateContent API) are
|
||||
Phase 2 — they give better tool-calling + vision fidelity but are not
|
||||
required to unblock the basic "CEO wants Hermes on Qwen / GLM / xAI /
|
||||
Gemini" asks that triggered this work.
|
||||
|
||||
## Design
|
||||
- ``ProviderConfig`` captures everything needed to point the OpenAI client at
|
||||
a provider: env var(s), base URL, default model, auth scheme.
|
||||
- ``PROVIDERS`` is a dict keyed by canonical short name (``"openai"``,
|
||||
``"anthropic"``, ``"qwen"``, etc.).
|
||||
- ``RESOLUTION_ORDER`` is the auto-detect sequence used when the caller
|
||||
doesn't specify a provider — it tries each provider's env vars in turn and
|
||||
picks the first one that's set.
|
||||
- ``resolve_provider(explicit)`` returns ``(ProviderConfig, api_key)`` or
|
||||
raises ``ValueError`` with a helpful message listing every env var it
|
||||
checked.
|
||||
|
||||
## Back-compat
|
||||
The original ``HERMES_API_KEY`` and ``OPENROUTER_API_KEY`` env vars still work
|
||||
and still route to Nous Portal / OpenRouter respectively — they're just now
|
||||
registered as two entries in ``PROVIDERS`` rather than hardcoded in
|
||||
``create_executor``.
|
||||
|
||||
## Adding a new provider
|
||||
1. Append a new ``ProviderConfig`` entry under ``PROVIDERS``
|
||||
2. Add its short name to ``RESOLUTION_ORDER`` in the desired priority slot
|
||||
3. Document the env var in the workspace ``.env.example`` (if present)
|
||||
That's it. Nothing else needs to change — the executor reads the registry.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderConfig:
|
||||
"""Everything the Hermes executor needs to talk to a single LLM provider.
|
||||
|
||||
Every provider in Phase 1 is reachable via an OpenAI-compatible
|
||||
``/v1/chat/completions`` endpoint, so ``auth_scheme`` is always
|
||||
``"openai"`` (Bearer token, OpenAI-style messages payload). Phase 2
|
||||
will add ``"anthropic"`` (native Messages API) and ``"gemini"`` (native
|
||||
generateContent API) for roles that need better tool-call fidelity.
|
||||
"""
|
||||
|
||||
name: str
|
||||
"""Canonical short name — the key used in ``PROVIDERS`` and the ``provider`` kwarg."""
|
||||
|
||||
env_vars: tuple[str, ...]
|
||||
"""API key env vars, checked in order. First non-empty value wins.
|
||||
Supporting multiple env vars lets us accept common aliases
|
||||
(e.g. ``QWEN_API_KEY`` AND ``DASHSCOPE_API_KEY`` both work for Alibaba Qwen)."""
|
||||
|
||||
base_url: str
|
||||
"""OpenAI-compat base URL. Must include the ``/v1`` suffix where applicable."""
|
||||
|
||||
default_model: str
|
||||
"""Default model name to pass to ``chat.completions.create``.
|
||||
Per-call overrides are possible via the executor constructor."""
|
||||
|
||||
auth_scheme: str = "openai"
|
||||
"""``openai`` (Bearer token + OpenAI-style payload) for every Phase 1 provider.
|
||||
Phase 2 reserves ``anthropic`` and ``gemini`` for native-SDK paths."""
|
||||
|
||||
docs: str = ""
|
||||
"""Short note — which docs URL the config was derived from, or which quirks
|
||||
to know about. Not used programmatically; exists to make future audits of
|
||||
this file cheaper than re-Googling every entry."""
|
||||
|
||||
|
||||
# --- Provider registry ------------------------------------------------------
|
||||
#
|
||||
# Ordering within this dict is not semantically meaningful — use
|
||||
# ``RESOLUTION_ORDER`` below to control auto-detect priority. This dict is
|
||||
# grouped by "who owns the provider" just for human readability.
|
||||
|
||||
PROVIDERS: dict[str, ProviderConfig] = {
|
||||
# --- Existing (PR 2 baseline) ---------------------------------------
|
||||
"nous_portal": ProviderConfig(
|
||||
name="nous_portal",
|
||||
env_vars=("HERMES_API_KEY", "NOUS_API_KEY"),
|
||||
base_url="https://inference-prod.nousresearch.com/v1",
|
||||
default_model="nousresearch/hermes-3-llama-3.1-405b",
|
||||
docs="Nous Research Portal — original Hermes adapter target from PR 2.",
|
||||
),
|
||||
"openrouter": ProviderConfig(
|
||||
name="openrouter",
|
||||
env_vars=("OPENROUTER_API_KEY",),
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
default_model="anthropic/claude-sonnet-4.5",
|
||||
docs="OpenRouter — unified OpenAI-compat gateway to hundreds of models. "
|
||||
"Useful for A/B testing and as a fallback when a direct provider is down.",
|
||||
),
|
||||
|
||||
# --- Frontier commercial (US) ---------------------------------------
|
||||
"openai": ProviderConfig(
|
||||
name="openai",
|
||||
env_vars=("OPENAI_API_KEY",),
|
||||
base_url="https://api.openai.com/v1",
|
||||
default_model="gpt-4o",
|
||||
docs="OpenAI — canonical OpenAI-compat endpoint. Works out of the box.",
|
||||
),
|
||||
"anthropic": ProviderConfig(
|
||||
name="anthropic",
|
||||
env_vars=("ANTHROPIC_API_KEY",),
|
||||
base_url="https://api.anthropic.com",
|
||||
default_model="claude-sonnet-4-5",
|
||||
auth_scheme="anthropic",
|
||||
docs="Anthropic — Phase 2 uses the native Messages API via the official "
|
||||
"`anthropic` Python SDK for correct tool calling, vision, and "
|
||||
"extended thinking semantics. If the SDK isn't installed in the "
|
||||
"workspace image, the executor raises a clear error pointing at "
|
||||
"`pip install anthropic>=0.39.0`.",
|
||||
),
|
||||
"xai": ProviderConfig(
|
||||
name="xai",
|
||||
env_vars=("XAI_API_KEY", "GROK_API_KEY"),
|
||||
base_url="https://api.x.ai/v1",
|
||||
default_model="grok-4",
|
||||
docs="xAI — Grok family. OpenAI-compat via api.x.ai/v1.",
|
||||
),
|
||||
"gemini": ProviderConfig(
|
||||
name="gemini",
|
||||
env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"),
|
||||
base_url="https://generativelanguage.googleapis.com",
|
||||
default_model="gemini-2.5-flash",
|
||||
auth_scheme="gemini",
|
||||
docs="Google Gemini — Phase 2b uses the native generateContent API via "
|
||||
"the official `google-genai` Python SDK for correct vision content "
|
||||
"blocks, tool/function calling, and system instructions. Phase 1 "
|
||||
"used the /v1beta/openai compat shim. If the google-genai package "
|
||||
"isn't installed in the workspace image, the executor raises a "
|
||||
"clear error pointing at `pip install google-genai>=1.0.0`.",
|
||||
),
|
||||
|
||||
# --- Chinese providers ----------------------------------------------
|
||||
"qwen": ProviderConfig(
|
||||
name="qwen",
|
||||
env_vars=("QWEN_API_KEY", "DASHSCOPE_API_KEY"),
|
||||
base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
|
||||
default_model="qwen3-235b-a22b",
|
||||
docs="Alibaba Qwen via DashScope international endpoint. OpenAI-compat mode. "
|
||||
"For domestic China use dashscope.aliyuncs.com (no -intl).",
|
||||
),
|
||||
"glm": ProviderConfig(
|
||||
name="glm",
|
||||
env_vars=("GLM_API_KEY", "ZHIPU_API_KEY"),
|
||||
base_url="https://open.bigmodel.cn/api/paas/v4",
|
||||
default_model="glm-4-plus",
|
||||
docs="Zhipu AI GLM — open.bigmodel.cn, OpenAI-compat via /api/paas/v4.",
|
||||
),
|
||||
"kimi": ProviderConfig(
|
||||
name="kimi",
|
||||
env_vars=("KIMI_API_KEY", "MOONSHOT_API_KEY"),
|
||||
base_url="https://api.moonshot.ai/v1",
|
||||
default_model="kimi-k2",
|
||||
docs="Moonshot AI Kimi K2 — OpenAI-compat at api.moonshot.ai/v1.",
|
||||
),
|
||||
"minimax": ProviderConfig(
|
||||
name="minimax",
|
||||
env_vars=("MINIMAX_API_KEY",),
|
||||
base_url="https://api.minimax.io/v1",
|
||||
default_model="MiniMax-M2",
|
||||
docs="MiniMax — OpenAI-compat at api.minimax.io/v1. "
|
||||
"Note: older base URL api.minimaxi.chat is deprecated.",
|
||||
),
|
||||
"deepseek": ProviderConfig(
|
||||
name="deepseek",
|
||||
env_vars=("DEEPSEEK_API_KEY",),
|
||||
base_url="https://api.deepseek.com/v1",
|
||||
default_model="deepseek-chat",
|
||||
docs="DeepSeek — very cheap, OpenAI-compat at api.deepseek.com/v1.",
|
||||
),
|
||||
|
||||
# --- OSS / alt providers --------------------------------------------
|
||||
"groq": ProviderConfig(
|
||||
name="groq",
|
||||
env_vars=("GROQ_API_KEY",),
|
||||
base_url="https://api.groq.com/openai/v1",
|
||||
default_model="llama-3.3-70b-versatile",
|
||||
docs="Groq LPU inference — very fast, OpenAI-compat at api.groq.com/openai/v1.",
|
||||
),
|
||||
"together": ProviderConfig(
|
||||
name="together",
|
||||
env_vars=("TOGETHER_API_KEY",),
|
||||
base_url="https://api.together.xyz/v1",
|
||||
default_model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
|
||||
docs="Together AI — OSS model hosting, OpenAI-compat at api.together.xyz/v1.",
|
||||
),
|
||||
"fireworks": ProviderConfig(
|
||||
name="fireworks",
|
||||
env_vars=("FIREWORKS_API_KEY",),
|
||||
base_url="https://api.fireworks.ai/inference/v1",
|
||||
default_model="accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||
docs="Fireworks AI — OSS model hosting, OpenAI-compat at api.fireworks.ai/inference/v1.",
|
||||
),
|
||||
"mistral": ProviderConfig(
|
||||
name="mistral",
|
||||
env_vars=("MISTRAL_API_KEY",),
|
||||
base_url="https://api.mistral.ai/v1",
|
||||
default_model="mistral-large-latest",
|
||||
docs="Mistral AI — OpenAI-compat at api.mistral.ai/v1.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# --- Auto-detect resolution order -------------------------------------------
|
||||
#
|
||||
# When the caller doesn't specify a provider, resolve_provider() walks this
|
||||
# list in order and picks the first provider whose env var is set. Order is
|
||||
# chosen to preserve back-compat (the two original PR-2 providers come first)
|
||||
# followed by the most likely-to-be-configured commercial APIs.
|
||||
|
||||
RESOLUTION_ORDER: tuple[str, ...] = (
|
||||
# Back-compat: PR 2 baseline
|
||||
"nous_portal",
|
||||
"openrouter",
|
||||
# Frontier commercial
|
||||
"anthropic",
|
||||
"openai",
|
||||
"gemini",
|
||||
"xai",
|
||||
# Chinese providers
|
||||
"qwen",
|
||||
"glm",
|
||||
"kimi",
|
||||
"minimax",
|
||||
"deepseek",
|
||||
# OSS / alt
|
||||
"groq",
|
||||
"mistral",
|
||||
"together",
|
||||
"fireworks",
|
||||
)
|
||||
|
||||
|
||||
def resolve_provider(explicit: Optional[str] = None) -> tuple[ProviderConfig, str]:
|
||||
"""Resolve a provider name to a ``(ProviderConfig, api_key)`` pair.
|
||||
|
||||
Resolution order:
|
||||
|
||||
1. If ``explicit`` is given, look it up in ``PROVIDERS`` and try every
|
||||
env var on that provider's config. Raise with a clear message if the
|
||||
name is unknown or if all env vars are empty.
|
||||
|
||||
2. Otherwise auto-detect: walk ``RESOLUTION_ORDER`` and return the first
|
||||
provider whose env var is set.
|
||||
|
||||
Raises
|
||||
------
|
||||
ValueError
|
||||
If ``explicit`` is an unknown provider name, if ``explicit`` is a
|
||||
known provider but its env vars are all empty, or if no env var is
|
||||
set for any provider in auto-detect mode.
|
||||
"""
|
||||
if explicit:
|
||||
if explicit not in PROVIDERS:
|
||||
raise ValueError(
|
||||
f"Unknown Hermes provider: {explicit!r}. "
|
||||
f"Available: {sorted(PROVIDERS)}"
|
||||
)
|
||||
cfg = PROVIDERS[explicit]
|
||||
for env in cfg.env_vars:
|
||||
val = os.environ.get(env, "").strip()
|
||||
if val:
|
||||
return cfg, val
|
||||
raise ValueError(
|
||||
f"Hermes provider {explicit!r} specified but no env var set. "
|
||||
f"Tried: {cfg.env_vars}"
|
||||
)
|
||||
|
||||
# Auto-detect — first provider with a non-empty env var wins.
|
||||
for name in RESOLUTION_ORDER:
|
||||
cfg = PROVIDERS[name]
|
||||
for env in cfg.env_vars:
|
||||
val = os.environ.get(env, "").strip()
|
||||
if val:
|
||||
return cfg, val
|
||||
|
||||
# Nothing set — raise with the full list so the operator knows every
|
||||
# option they have without having to read the source.
|
||||
tried = []
|
||||
for name in RESOLUTION_ORDER:
|
||||
for env in PROVIDERS[name].env_vars:
|
||||
tried.append(env)
|
||||
raise ValueError(
|
||||
"No Hermes provider API key found. Set any one of: " + ", ".join(tried)
|
||||
)
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import LangGraphAdapter
|
||||
|
||||
Adapter = LangGraphAdapter
|
||||
@ -1,50 +0,0 @@
|
||||
"""LangGraph adapter — Python-based ReAct agent with skills, tools, and plugins."""
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LangGraphAdapter(BaseAdapter):
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "langgraph"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "LangGraph"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "LangGraph ReAct agent — Python-based with skills, tools, plugins, and peer coordination"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {"type": "string", "description": "LangChain model string (e.g. openrouter:google/gemini-2.5-flash)"},
|
||||
"skills": {"type": "array", "items": {"type": "string"}, "description": "Skill folder names to load"},
|
||||
"tools": {"type": "array", "items": {"type": "string"}, "description": "Built-in tools (web_search, filesystem, etc.)"},
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.loaded_skills = []
|
||||
self.all_tools = []
|
||||
self.system_prompt = None
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None:
|
||||
result = await self._common_setup(config)
|
||||
self.loaded_skills = result.loaded_skills
|
||||
self.all_tools = result.langchain_tools
|
||||
self.system_prompt = result.system_prompt
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
from agent import create_agent
|
||||
from a2a_executor import LangGraphA2AExecutor
|
||||
|
||||
agent = create_agent(config.model, self.all_tools, self.system_prompt)
|
||||
return LangGraphA2AExecutor(agent, heartbeat=config.heartbeat, model=config.model)
|
||||
@ -1,3 +0,0 @@
|
||||
from .adapter import OpenClawAdapter
|
||||
|
||||
Adapter = OpenClawAdapter
|
||||
@ -1,243 +0,0 @@
|
||||
"""OpenClaw adapter — bridges OpenClaw's Node.js gateway with our A2A protocol.
|
||||
|
||||
OpenClaw is a Node.js agent runtime with its own gateway (port 18789).
|
||||
This adapter:
|
||||
1. Installs OpenClaw CLI (npm) and missing deps in the container
|
||||
2. Runs non-interactive onboard with the configured model provider
|
||||
3. Copies workspace files (SOUL.md, BOOTSTRAP.md, etc.) to OpenClaw's workspace dir
|
||||
4. Starts the OpenClaw gateway as a background process
|
||||
5. Proxies A2A messages via `openclaw agent --json` CLI subprocess
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
from adapters.shared_runtime import brief_task, extract_message_text, set_current_task
|
||||
from a2a.server.agent_execution import AgentExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
OPENCLAW_WORKSPACE = os.path.expanduser("~/.openclaw/workspace-dev/main")
|
||||
OPENCLAW_PORT = 18789
|
||||
|
||||
# Known missing optional deps in OpenClaw's npm package
|
||||
OPENCLAW_MISSING_DEPS = ["@buape/carbon", "@larksuiteoapi/node-sdk", "@slack/web-api", "grammy"]
|
||||
|
||||
|
||||
class OpenClawAdapter(BaseAdapter):
|
||||
|
||||
def __init__(self):
|
||||
self._gateway_process = None
|
||||
|
||||
@staticmethod
|
||||
def name() -> str:
|
||||
return "openclaw"
|
||||
|
||||
@staticmethod
|
||||
def display_name() -> str:
|
||||
return "OpenClaw"
|
||||
|
||||
@staticmethod
|
||||
def description() -> str:
|
||||
return "OpenClaw agent runtime — Node.js gateway with SOUL/BOOTSTRAP/AGENTS workspace convention"
|
||||
|
||||
@staticmethod
|
||||
def get_config_schema() -> dict:
|
||||
return {
|
||||
"model": {"type": "string", "description": "Model ID (e.g. google/gemini-2.5-flash)"},
|
||||
"provider_url": {"type": "string", "description": "LLM provider base URL", "default": "https://openrouter.ai/api/v1"},
|
||||
"gateway_port": {"type": "integer", "description": "OpenClaw gateway port", "default": 18789},
|
||||
}
|
||||
|
||||
async def setup(self, config: AdapterConfig) -> None: # pragma: no cover
|
||||
"""Install OpenClaw, run onboard, copy workspace files, start gateway."""
|
||||
npm_prefix = os.path.expanduser("~/.local")
|
||||
os.environ["PATH"] = f"{npm_prefix}/bin:{os.environ.get('PATH', '')}"
|
||||
|
||||
# 1. Install OpenClaw CLI if not present
|
||||
if not shutil.which("openclaw"):
|
||||
logger.info("Installing OpenClaw CLI...")
|
||||
result = subprocess.run(
|
||||
["npm", "install", "--prefix", npm_prefix, "-g", "openclaw"],
|
||||
capture_output=True, text=True, timeout=300,
|
||||
env={**os.environ, "npm_config_prefix": npm_prefix}
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Failed to install OpenClaw: {result.stderr[:500]}")
|
||||
|
||||
# Install known missing optional deps
|
||||
oc_dir = os.path.join(npm_prefix, "lib/node_modules/openclaw")
|
||||
if os.path.exists(oc_dir):
|
||||
logger.info("Installing OpenClaw optional deps...")
|
||||
subprocess.run(
|
||||
["npm", "install"] + OPENCLAW_MISSING_DEPS,
|
||||
capture_output=True, text=True, timeout=120, cwd=oc_dir
|
||||
)
|
||||
logger.info("OpenClaw CLI installed")
|
||||
|
||||
# 2. Resolve API key and model
|
||||
prefix = config.model.split(":")[0] if ":" in config.model else "openai"
|
||||
if prefix == "qianfan":
|
||||
api_key = os.environ.get("QIANFAN_API_KEY", os.environ.get("AISTUDIO_API_KEY", ""))
|
||||
else:
|
||||
api_key = os.environ.get("OPENAI_API_KEY", os.environ.get("GROQ_API_KEY", os.environ.get("OPENROUTER_API_KEY", "")))
|
||||
# Determine provider URL from model prefix
|
||||
provider_urls = {
|
||||
"openai": "https://api.openai.com/v1",
|
||||
"groq": "https://api.groq.com/openai/v1",
|
||||
"openrouter": "https://openrouter.ai/api/v1",
|
||||
"qianfan": "https://qianfan.baidubce.com/v2",
|
||||
}
|
||||
provider_url = config.runtime_config.get("provider_url", provider_urls.get(prefix, "https://api.openai.com/v1"))
|
||||
model = config.model
|
||||
if ":" in model:
|
||||
_, model = model.split(":", 1)
|
||||
|
||||
# 3. Run non-interactive onboard
|
||||
if not os.path.exists(os.path.expanduser("~/.openclaw/openclaw.json")):
|
||||
logger.info(f"Running OpenClaw onboard (model: {model})...")
|
||||
subprocess.run(
|
||||
["openclaw", "onboard", "--non-interactive",
|
||||
"--auth-choice", "custom-api-key",
|
||||
"--custom-base-url", provider_url,
|
||||
"--custom-model-id", model,
|
||||
"--custom-api-key", api_key,
|
||||
"--custom-compatibility", "openai",
|
||||
"--secret-input-mode", "plaintext",
|
||||
"--accept-risk", "--skip-health"],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
env={**os.environ, "NODE_NO_WARNINGS": "1"}
|
||||
)
|
||||
logger.info("OpenClaw onboard complete")
|
||||
|
||||
# 3b. Fix context window (OpenClaw defaults to 16K, but modern models have much more)
|
||||
oc_config_path = os.path.expanduser("~/.openclaw/openclaw.json")
|
||||
if os.path.exists(oc_config_path):
|
||||
try:
|
||||
import json as json_mod
|
||||
oc_cfg = json_mod.load(open(oc_config_path))
|
||||
provider_name = "custom-" + provider_url.split("//")[1].split("/")[0].replace(".", "-")
|
||||
providers = oc_cfg.get("models", {}).get("providers", {})
|
||||
if provider_name in providers:
|
||||
for m in providers[provider_name].get("models", []):
|
||||
m["contextWindow"] = 1000000 # 1M tokens for modern models
|
||||
m["maxTokens"] = 16384
|
||||
json_mod.dump(oc_cfg, open(oc_config_path, "w"), indent=2)
|
||||
logger.info(f"Fixed context window for {provider_name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fix context window: {e}")
|
||||
|
||||
# 3c. Always write auth-profiles.json
|
||||
# (key may have been set via secrets API after first boot)
|
||||
if api_key:
|
||||
auth_dir = os.path.expanduser("~/.openclaw/agents/main/agent")
|
||||
os.makedirs(auth_dir, exist_ok=True)
|
||||
auth_file = os.path.join(auth_dir, "auth-profiles.json")
|
||||
import json as json_mod
|
||||
provider_name = "custom-" + provider_url.split("//")[1].split("/")[0].replace(".", "-")
|
||||
auth_data = {provider_name: {"type": "api-key", "key": api_key}}
|
||||
with open(auth_file, "w") as f:
|
||||
json_mod.dump(auth_data, f, indent=2)
|
||||
logger.info(f"Wrote auth-profiles.json for {provider_name}")
|
||||
|
||||
# 4. Copy workspace files from /configs to OpenClaw's workspace dir
|
||||
os.makedirs(OPENCLAW_WORKSPACE, exist_ok=True)
|
||||
for fname in os.listdir(config.config_path):
|
||||
src = os.path.join(config.config_path, fname)
|
||||
if os.path.isfile(src) and fname.endswith(".md"):
|
||||
shutil.copy2(src, os.path.join(OPENCLAW_WORKSPACE, fname))
|
||||
logger.debug(f"Copied {fname} to OpenClaw workspace")
|
||||
|
||||
# 5. Start the gateway as a background process
|
||||
gateway_port = config.runtime_config.get("gateway_port", OPENCLAW_PORT)
|
||||
logger.info(f"Starting OpenClaw gateway on port {gateway_port}...")
|
||||
env = os.environ.copy()
|
||||
env["NODE_NO_WARNINGS"] = "1"
|
||||
self._gateway_process = subprocess.Popen(
|
||||
["openclaw", "gateway", "--dev", "--port", str(gateway_port), "--bind", "loopback"],
|
||||
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
||||
env=env,
|
||||
)
|
||||
# Wait for gateway to become healthy (max 30s)
|
||||
for attempt in range(15):
|
||||
await asyncio.sleep(2)
|
||||
if self._gateway_process.poll() is not None:
|
||||
raise RuntimeError("OpenClaw gateway process exited")
|
||||
try:
|
||||
health = subprocess.run(
|
||||
["openclaw", "gateway", "health"],
|
||||
capture_output=True, text=True, timeout=10,
|
||||
env=os.environ.copy()
|
||||
)
|
||||
if health.returncode == 0:
|
||||
logger.info(f"OpenClaw gateway healthy (PID: {self._gateway_process.pid})")
|
||||
break
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.debug(f"Gateway health check timeout (attempt {attempt+1}/15)")
|
||||
else:
|
||||
raise RuntimeError("OpenClaw gateway did not become healthy within 30s")
|
||||
|
||||
async def create_executor(self, config: AdapterConfig) -> AgentExecutor:
|
||||
return OpenClawA2AExecutor(heartbeat=config.heartbeat)
|
||||
|
||||
|
||||
class OpenClawA2AExecutor(AgentExecutor):
|
||||
"""Proxies A2A messages to OpenClaw via `openclaw agent` CLI subprocess."""
|
||||
|
||||
def __init__(self, heartbeat=None):
|
||||
self._heartbeat = heartbeat
|
||||
|
||||
async def execute(self, context, event_queue):
|
||||
from a2a.utils import new_agent_text_message
|
||||
|
||||
user_message = extract_message_text(context)
|
||||
|
||||
if not user_message:
|
||||
await event_queue.enqueue_event(new_agent_text_message("No message provided"))
|
||||
return
|
||||
|
||||
await set_current_task(self._heartbeat, brief_task(user_message))
|
||||
|
||||
# Call OpenClaw agent via CLI
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
"openclaw", "agent",
|
||||
"--session-id", context.task_id or "default",
|
||||
"--message", user_message,
|
||||
"--json", "--timeout", "120",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env={**os.environ, "PATH": f"{os.path.expanduser('~/.local/bin')}:{os.environ.get('PATH', '')}"}
|
||||
)
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=130)
|
||||
output = stdout.decode().strip()
|
||||
|
||||
if proc.returncode == 0 and output:
|
||||
try:
|
||||
data = json.loads(output)
|
||||
payloads = data.get("result", {}).get("payloads", [])
|
||||
if payloads:
|
||||
reply = payloads[0].get("text", "")
|
||||
else:
|
||||
reply = str(data)
|
||||
except json.JSONDecodeError:
|
||||
reply = output
|
||||
else:
|
||||
reply = f"OpenClaw error: {stderr.decode()[:300]}" if stderr else f"OpenClaw returned code {proc.returncode}"
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
reply = "OpenClaw timed out after 120s"
|
||||
except Exception as e:
|
||||
reply = f"OpenClaw error: {e}"
|
||||
finally:
|
||||
await set_current_task(self._heartbeat, "")
|
||||
|
||||
await event_queue.enqueue_event(new_agent_text_message(reply))
|
||||
|
||||
async def cancel(self, context, event_queue): # pragma: no cover
|
||||
pass
|
||||
@ -1,190 +1,2 @@
|
||||
"""Shared runtime helpers for A2A-backed workspace executors."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from a2a.server.agent_execution import RequestContext
|
||||
|
||||
|
||||
def _extract_part_text(part) -> str:
|
||||
"""Extract text from a message part, handling dicts and A2A objects."""
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text", "")
|
||||
if text:
|
||||
return text
|
||||
root = part.get("root")
|
||||
if isinstance(root, dict):
|
||||
return root.get("text", "")
|
||||
return ""
|
||||
if hasattr(part, "text") and part.text:
|
||||
return part.text
|
||||
if hasattr(part, "root") and hasattr(part.root, "text") and part.root.text:
|
||||
return part.root.text
|
||||
return ""
|
||||
|
||||
|
||||
def extract_message_text(context_or_parts) -> str:
|
||||
"""Extract concatenated plain text from A2A message parts."""
|
||||
parts = getattr(getattr(context_or_parts, "message", None), "parts", None)
|
||||
if parts is None:
|
||||
parts = context_or_parts
|
||||
return " ".join(
|
||||
text for part in (parts or []) if (text := _extract_part_text(part))
|
||||
).strip()
|
||||
|
||||
|
||||
def extract_history(context: RequestContext) -> list[tuple[str, str]]:
|
||||
"""Extract conversation history from A2A request metadata."""
|
||||
messages: list[tuple[str, str]] = []
|
||||
request = getattr(context, "request", None)
|
||||
metadata = getattr(request, "metadata", None) if request else None
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = getattr(context, "metadata", None) or {}
|
||||
history = metadata.get("history", []) if isinstance(metadata, dict) else []
|
||||
if not isinstance(history, list):
|
||||
return messages
|
||||
|
||||
for entry in history:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
role = entry.get("role", "user")
|
||||
parts = entry.get("parts", [])
|
||||
text = " ".join(
|
||||
text for part in (parts or []) if (text := _extract_part_text(part))
|
||||
).strip()
|
||||
if text:
|
||||
mapped_role = "human" if role == "user" else "ai"
|
||||
messages.append((mapped_role, text))
|
||||
return messages
|
||||
|
||||
|
||||
def format_conversation_history(history: list[tuple[str, str]]) -> str:
|
||||
"""Render `(role, text)` history into a stable human-readable transcript."""
|
||||
return "\n".join(
|
||||
f"{'User' if role == 'human' else 'Agent'}: {text}" for role, text in history
|
||||
)
|
||||
|
||||
|
||||
def build_task_text(user_message: str, history: list[tuple[str, str]]) -> str:
|
||||
"""Build a single task/request string with optional prepended conversation history."""
|
||||
if not history:
|
||||
return user_message
|
||||
transcript = format_conversation_history(history)
|
||||
return f"Conversation so far:\n{transcript}\n\nCurrent request: {user_message}"
|
||||
|
||||
|
||||
def append_peer_guidance(
|
||||
base_text: str | None,
|
||||
peers_info: str,
|
||||
*,
|
||||
default_text: str,
|
||||
tool_name: str,
|
||||
) -> str:
|
||||
"""Append peer guidance text when peers are available."""
|
||||
text = (base_text or default_text).strip()
|
||||
if peers_info:
|
||||
text += f"\n\n## Peers\n{peers_info}\nUse {tool_name} to communicate with them."
|
||||
return text
|
||||
|
||||
|
||||
def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Return compact peer metadata for prompt rendering."""
|
||||
summaries: list[dict[str, Any]] = []
|
||||
for peer in peers:
|
||||
agent_card = peer.get("agent_card")
|
||||
if not agent_card:
|
||||
continue
|
||||
if isinstance(agent_card, str):
|
||||
try:
|
||||
import json
|
||||
|
||||
agent_card = json.loads(agent_card)
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(agent_card, dict):
|
||||
continue
|
||||
|
||||
skills = agent_card.get("skills", [])
|
||||
summaries.append(
|
||||
{
|
||||
"id": peer.get("id", "unknown"),
|
||||
"name": agent_card.get("name", peer.get("name", "Unknown")),
|
||||
"status": peer.get("status", "unknown"),
|
||||
"skills": [
|
||||
s.get("name", s.get("id", ""))
|
||||
for s in skills
|
||||
if isinstance(s, dict)
|
||||
],
|
||||
}
|
||||
)
|
||||
return summaries
|
||||
|
||||
|
||||
def build_peer_section(
|
||||
peers: list[dict[str, Any]],
|
||||
*,
|
||||
heading: str = "## Your Peers (workspaces you can delegate to)",
|
||||
instruction: str = (
|
||||
"Use the `delegate_to_workspace` tool to send tasks to peers. "
|
||||
"Only delegate to peers listed above."
|
||||
),
|
||||
) -> str:
|
||||
"""Render a stable peer section for system prompts."""
|
||||
summaries = summarize_peer_cards(peers)
|
||||
if not summaries:
|
||||
return ""
|
||||
|
||||
parts = [heading, ""]
|
||||
for peer in summaries:
|
||||
parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})")
|
||||
if peer["skills"]:
|
||||
parts.append(f" Skills: {', '.join(peer['skills'])}")
|
||||
parts.append("")
|
||||
parts.append(instruction)
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def brief_task(text: str, limit: int = 60) -> str:
|
||||
"""Create a short human-readable task label for the heartbeat banner."""
|
||||
return text[:limit] + ("..." if len(text) > limit else "")
|
||||
|
||||
|
||||
async def set_current_task(heartbeat: Any, task: str) -> None:
|
||||
"""Update current task on heartbeat and push immediately to platform.
|
||||
|
||||
The heartbeat loop only fires every 30s, so quick tasks would finish
|
||||
before the canvas ever sees them. Setting a task pushes immediately.
|
||||
Clearing a task only updates the heartbeat object — the next heartbeat
|
||||
cycle will broadcast the clear, keeping the task visible longer.
|
||||
"""
|
||||
if heartbeat:
|
||||
heartbeat.current_task = task
|
||||
heartbeat.active_tasks = 1 if task else 0
|
||||
|
||||
# Only push immediately when SETTING a task (not clearing)
|
||||
# Clearing is handled by the next heartbeat cycle, which keeps
|
||||
# the task visible on the canvas for quick A2A responses
|
||||
if not task:
|
||||
return
|
||||
|
||||
import os
|
||||
workspace_id = os.environ.get("WORKSPACE_ID", "")
|
||||
platform_url = os.environ.get("PLATFORM_URL", "")
|
||||
if workspace_id and platform_url:
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=3.0) as client:
|
||||
await client.post(
|
||||
f"{platform_url}/registry/heartbeat",
|
||||
json={
|
||||
"workspace_id": workspace_id,
|
||||
"current_task": task,
|
||||
"active_tasks": 1,
|
||||
"error_rate": 0,
|
||||
"sample_error": "",
|
||||
"uptime_seconds": 0,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass # Best-effort
|
||||
"""Re-export from shared_runtime for backward compat."""
|
||||
from shared_runtime import * # noqa: F401,F403
|
||||
|
||||
@ -17,7 +17,7 @@ import os
|
||||
|
||||
import httpx
|
||||
from langchain_core.tools import tool
|
||||
from adapters.shared_runtime import build_peer_section
|
||||
from shared_runtime import build_peer_section
|
||||
from policies.routing import build_team_routing_payload
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
from pathlib import Path
|
||||
|
||||
from skill_loader.loader import LoadedSkill
|
||||
from adapters.shared_runtime import build_peer_section
|
||||
from shared_runtime import build_peer_section
|
||||
|
||||
DEFAULT_MEMORY_SNAPSHOT_FILES = ("MEMORY.md", "USER.md")
|
||||
|
||||
|
||||
190
workspace-template/shared_runtime.py
Normal file
190
workspace-template/shared_runtime.py
Normal file
@ -0,0 +1,190 @@
|
||||
"""Shared runtime helpers for A2A-backed workspace executors."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from a2a.server.agent_execution import RequestContext
|
||||
|
||||
|
||||
def _extract_part_text(part) -> str:
|
||||
"""Extract text from a message part, handling dicts and A2A objects."""
|
||||
if isinstance(part, dict):
|
||||
text = part.get("text", "")
|
||||
if text:
|
||||
return text
|
||||
root = part.get("root")
|
||||
if isinstance(root, dict):
|
||||
return root.get("text", "")
|
||||
return ""
|
||||
if hasattr(part, "text") and part.text:
|
||||
return part.text
|
||||
if hasattr(part, "root") and hasattr(part.root, "text") and part.root.text:
|
||||
return part.root.text
|
||||
return ""
|
||||
|
||||
|
||||
def extract_message_text(context_or_parts) -> str:
|
||||
"""Extract concatenated plain text from A2A message parts."""
|
||||
parts = getattr(getattr(context_or_parts, "message", None), "parts", None)
|
||||
if parts is None:
|
||||
parts = context_or_parts
|
||||
return " ".join(
|
||||
text for part in (parts or []) if (text := _extract_part_text(part))
|
||||
).strip()
|
||||
|
||||
|
||||
def extract_history(context: RequestContext) -> list[tuple[str, str]]:
|
||||
"""Extract conversation history from A2A request metadata."""
|
||||
messages: list[tuple[str, str]] = []
|
||||
request = getattr(context, "request", None)
|
||||
metadata = getattr(request, "metadata", None) if request else None
|
||||
if not isinstance(metadata, dict):
|
||||
metadata = getattr(context, "metadata", None) or {}
|
||||
history = metadata.get("history", []) if isinstance(metadata, dict) else []
|
||||
if not isinstance(history, list):
|
||||
return messages
|
||||
|
||||
for entry in history:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
role = entry.get("role", "user")
|
||||
parts = entry.get("parts", [])
|
||||
text = " ".join(
|
||||
text for part in (parts or []) if (text := _extract_part_text(part))
|
||||
).strip()
|
||||
if text:
|
||||
mapped_role = "human" if role == "user" else "ai"
|
||||
messages.append((mapped_role, text))
|
||||
return messages
|
||||
|
||||
|
||||
def format_conversation_history(history: list[tuple[str, str]]) -> str:
|
||||
"""Render `(role, text)` history into a stable human-readable transcript."""
|
||||
return "\n".join(
|
||||
f"{'User' if role == 'human' else 'Agent'}: {text}" for role, text in history
|
||||
)
|
||||
|
||||
|
||||
def build_task_text(user_message: str, history: list[tuple[str, str]]) -> str:
|
||||
"""Build a single task/request string with optional prepended conversation history."""
|
||||
if not history:
|
||||
return user_message
|
||||
transcript = format_conversation_history(history)
|
||||
return f"Conversation so far:\n{transcript}\n\nCurrent request: {user_message}"
|
||||
|
||||
|
||||
def append_peer_guidance(
|
||||
base_text: str | None,
|
||||
peers_info: str,
|
||||
*,
|
||||
default_text: str,
|
||||
tool_name: str,
|
||||
) -> str:
|
||||
"""Append peer guidance text when peers are available."""
|
||||
text = (base_text or default_text).strip()
|
||||
if peers_info:
|
||||
text += f"\n\n## Peers\n{peers_info}\nUse {tool_name} to communicate with them."
|
||||
return text
|
||||
|
||||
|
||||
def summarize_peer_cards(peers: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Return compact peer metadata for prompt rendering."""
|
||||
summaries: list[dict[str, Any]] = []
|
||||
for peer in peers:
|
||||
agent_card = peer.get("agent_card")
|
||||
if not agent_card:
|
||||
continue
|
||||
if isinstance(agent_card, str):
|
||||
try:
|
||||
import json
|
||||
|
||||
agent_card = json.loads(agent_card)
|
||||
except Exception:
|
||||
continue
|
||||
if not isinstance(agent_card, dict):
|
||||
continue
|
||||
|
||||
skills = agent_card.get("skills", [])
|
||||
summaries.append(
|
||||
{
|
||||
"id": peer.get("id", "unknown"),
|
||||
"name": agent_card.get("name", peer.get("name", "Unknown")),
|
||||
"status": peer.get("status", "unknown"),
|
||||
"skills": [
|
||||
s.get("name", s.get("id", ""))
|
||||
for s in skills
|
||||
if isinstance(s, dict)
|
||||
],
|
||||
}
|
||||
)
|
||||
return summaries
|
||||
|
||||
|
||||
def build_peer_section(
|
||||
peers: list[dict[str, Any]],
|
||||
*,
|
||||
heading: str = "## Your Peers (workspaces you can delegate to)",
|
||||
instruction: str = (
|
||||
"Use the `delegate_to_workspace` tool to send tasks to peers. "
|
||||
"Only delegate to peers listed above."
|
||||
),
|
||||
) -> str:
|
||||
"""Render a stable peer section for system prompts."""
|
||||
summaries = summarize_peer_cards(peers)
|
||||
if not summaries:
|
||||
return ""
|
||||
|
||||
parts = [heading, ""]
|
||||
for peer in summaries:
|
||||
parts.append(f"- **{peer['name']}** (id: `{peer['id']}`, status: {peer['status']})")
|
||||
if peer["skills"]:
|
||||
parts.append(f" Skills: {', '.join(peer['skills'])}")
|
||||
parts.append("")
|
||||
parts.append(instruction)
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def brief_task(text: str, limit: int = 60) -> str:
|
||||
"""Create a short human-readable task label for the heartbeat banner."""
|
||||
return text[:limit] + ("..." if len(text) > limit else "")
|
||||
|
||||
|
||||
async def set_current_task(heartbeat: Any, task: str) -> None:
|
||||
"""Update current task on heartbeat and push immediately to platform.
|
||||
|
||||
The heartbeat loop only fires every 30s, so quick tasks would finish
|
||||
before the canvas ever sees them. Setting a task pushes immediately.
|
||||
Clearing a task only updates the heartbeat object — the next heartbeat
|
||||
cycle will broadcast the clear, keeping the task visible longer.
|
||||
"""
|
||||
if heartbeat:
|
||||
heartbeat.current_task = task
|
||||
heartbeat.active_tasks = 1 if task else 0
|
||||
|
||||
# Only push immediately when SETTING a task (not clearing)
|
||||
# Clearing is handled by the next heartbeat cycle, which keeps
|
||||
# the task visible on the canvas for quick A2A responses
|
||||
if not task:
|
||||
return
|
||||
|
||||
import os
|
||||
workspace_id = os.environ.get("WORKSPACE_ID", "")
|
||||
platform_url = os.environ.get("PLATFORM_URL", "")
|
||||
if workspace_id and platform_url:
|
||||
try:
|
||||
import httpx
|
||||
async with httpx.AsyncClient(timeout=3.0) as client:
|
||||
await client.post(
|
||||
f"{platform_url}/registry/heartbeat",
|
||||
json={
|
||||
"workspace_id": workspace_id,
|
||||
"current_task": task,
|
||||
"active_tasks": 1,
|
||||
"error_rate": 0,
|
||||
"sample_error": "",
|
||||
"uptime_seconds": 0,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass # Best-effort
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,214 +0,0 @@
|
||||
"""Tests for the shared _common_setup() pipeline and tool conversion helpers."""
|
||||
|
||||
import importlib.util
|
||||
import sys
|
||||
from types import ModuleType
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# --- Mock missing optional deps ---
|
||||
|
||||
def _ensure_crewai_mock():
|
||||
if "crewai" not in sys.modules:
|
||||
crewai_mod = ModuleType("crewai")
|
||||
crewai_tools_mod = ModuleType("crewai.tools")
|
||||
# Make @tool a passthrough decorator that preserves the function
|
||||
crewai_tools_mod.tool = lambda name: (lambda f: f)
|
||||
crewai_mod.tools = crewai_tools_mod
|
||||
crewai_mod.__version__ = "0.0.0-mock"
|
||||
sys.modules["crewai"] = crewai_mod
|
||||
sys.modules["crewai.tools"] = crewai_tools_mod
|
||||
|
||||
|
||||
def _ensure_autogen_mock():
|
||||
if "autogen_agentchat" not in sys.modules:
|
||||
mod = ModuleType("autogen_agentchat")
|
||||
agents_mod = ModuleType("autogen_agentchat.agents")
|
||||
agents_mod.AssistantAgent = MagicMock
|
||||
mod.agents = agents_mod
|
||||
sys.modules["autogen_agentchat"] = mod
|
||||
sys.modules["autogen_agentchat.agents"] = agents_mod
|
||||
|
||||
|
||||
_ensure_crewai_mock()
|
||||
_ensure_autogen_mock()
|
||||
|
||||
|
||||
# --- Mock helpers ---
|
||||
|
||||
def _mock_load_plugins(**kwargs):
|
||||
plugins = MagicMock()
|
||||
plugins.plugin_names = []
|
||||
plugins.skill_dirs = []
|
||||
plugins.prompt_fragments = []
|
||||
plugins.rules = []
|
||||
return plugins
|
||||
|
||||
|
||||
def _mock_load_skills(config_path, tools):
|
||||
return []
|
||||
|
||||
|
||||
async def _mock_get_children():
|
||||
return []
|
||||
|
||||
|
||||
async def _mock_get_children_with_kids():
|
||||
return [{"id": "child-1", "name": "Child", "role": "Worker", "status": "online"}]
|
||||
|
||||
|
||||
async def _mock_get_parent_context():
|
||||
return []
|
||||
|
||||
|
||||
async def _mock_get_peer_capabilities(platform_url, workspace_id):
|
||||
return [{"id": "peer-1", "name": "Peer", "status": "online", "agent_card": {"skills": []}}]
|
||||
|
||||
|
||||
def _mock_build_system_prompt(*args, **kwargs):
|
||||
return "You are a test agent."
|
||||
|
||||
|
||||
def _mock_build_children_description(children):
|
||||
return "## Team\n- Child: Worker"
|
||||
|
||||
|
||||
# All patches needed for _common_setup
|
||||
_SETUP_PATCHES = {
|
||||
"plugins.load_plugins": _mock_load_plugins,
|
||||
"skill_loader.loader.load_skills": _mock_load_skills,
|
||||
"coordinator.get_children": _mock_get_children,
|
||||
"coordinator.get_parent_context": _mock_get_parent_context,
|
||||
"coordinator.build_children_description": _mock_build_children_description,
|
||||
"prompt.get_peer_capabilities": _mock_get_peer_capabilities,
|
||||
"prompt.build_system_prompt": _mock_build_system_prompt,
|
||||
}
|
||||
|
||||
|
||||
def _make_test_adapter():
|
||||
from adapters.base import BaseAdapter, AdapterConfig
|
||||
|
||||
class TestAdapter(BaseAdapter):
|
||||
@staticmethod
|
||||
def name(): return "test"
|
||||
@staticmethod
|
||||
def display_name(): return "Test"
|
||||
@staticmethod
|
||||
def description(): return "Test adapter"
|
||||
async def setup(self, config): pass
|
||||
async def create_executor(self, config): pass
|
||||
|
||||
return TestAdapter(), AdapterConfig(model="openai:test", config_path="/tmp", workspace_id="ws-test")
|
||||
|
||||
|
||||
# --- Common Setup Tests ---
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_common_setup_returns_core_tools():
|
||||
"""_common_setup returns 5 core tools."""
|
||||
adapter, config = _make_test_adapter()
|
||||
|
||||
patches = {k: v for k, v in _SETUP_PATCHES.items()}
|
||||
with patch.dict("os.environ", {"PLATFORM_URL": "http://test:8080"}):
|
||||
ctx = [patch(k, v) for k, v in patches.items()]
|
||||
for c in ctx:
|
||||
c.start()
|
||||
try:
|
||||
result = await adapter._common_setup(config)
|
||||
finally:
|
||||
for c in ctx:
|
||||
c.stop()
|
||||
|
||||
assert len(result.langchain_tools) == 6 # 6 core tools
|
||||
tool_names = [t.name for t in result.langchain_tools]
|
||||
assert "delegate_to_workspace" in tool_names
|
||||
assert "check_delegation_status" in tool_names
|
||||
assert "request_approval" in tool_names
|
||||
assert "commit_memory" in tool_names
|
||||
assert "search_memory" in tool_names
|
||||
assert "run_code" in tool_names
|
||||
assert result.system_prompt == "You are a test agent."
|
||||
assert result.is_coordinator is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_common_setup_coordinator_adds_routing_tool():
|
||||
"""When workspace has children, coordinator tool is added."""
|
||||
adapter, config = _make_test_adapter()
|
||||
|
||||
patches = {k: v for k, v in _SETUP_PATCHES.items()}
|
||||
patches["coordinator.get_children"] = _mock_get_children_with_kids
|
||||
|
||||
with patch.dict("os.environ", {"PLATFORM_URL": "http://test:8080"}):
|
||||
ctx = [patch(k, v) for k, v in patches.items()]
|
||||
for c in ctx:
|
||||
c.start()
|
||||
try:
|
||||
result = await adapter._common_setup(config)
|
||||
finally:
|
||||
for c in ctx:
|
||||
c.stop()
|
||||
|
||||
assert result.is_coordinator is True
|
||||
assert len(result.langchain_tools) == 7 # 6 core + route_task_to_team
|
||||
# Last tool should be route_task_to_team (function name or .name attribute)
|
||||
last_tool = result.langchain_tools[-1]
|
||||
tool_id = getattr(last_tool, "name", None) or getattr(last_tool, "__name__", "")
|
||||
assert "route_task_to_team" in tool_id
|
||||
|
||||
|
||||
# --- Tool Conversion Tests ---
|
||||
|
||||
def test_langchain_to_crewai_preserves_name():
|
||||
"""CrewAI wrapper preserves tool name and description."""
|
||||
from adapters.crewai.adapter import _langchain_to_crewai
|
||||
|
||||
mock_tool = MagicMock()
|
||||
mock_tool.name = "test_tool"
|
||||
mock_tool.description = "A test tool for testing."
|
||||
mock_tool.ainvoke = AsyncMock(return_value={"result": "ok"})
|
||||
|
||||
wrapped = _langchain_to_crewai(mock_tool)
|
||||
# With our mock @tool decorator, the wrapper is the raw function
|
||||
assert wrapped.__doc__ == "A test tool for testing."
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not importlib.util.find_spec("autogen_core"),
|
||||
reason="autogen_core not installed",
|
||||
)
|
||||
def test_langchain_to_autogen_preserves_name():
|
||||
"""AutoGen wrapper preserves tool name and description via FunctionTool."""
|
||||
from adapters.autogen.adapter import _langchain_to_autogen
|
||||
|
||||
mock_tool = MagicMock()
|
||||
mock_tool.name = "test_tool"
|
||||
mock_tool.description = "A test tool for testing."
|
||||
mock_tool.ainvoke = AsyncMock(return_value={"result": "ok"})
|
||||
|
||||
wrapped = _langchain_to_autogen(mock_tool)
|
||||
assert wrapped.name == "test_tool"
|
||||
assert wrapped.description == "A test tool for testing."
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not importlib.util.find_spec("autogen_core"),
|
||||
reason="autogen_core not installed",
|
||||
)
|
||||
@pytest.mark.asyncio
|
||||
async def test_langchain_to_autogen_calls_ainvoke():
|
||||
"""AutoGen FunctionTool wrapper calls the original tool's ainvoke."""
|
||||
from adapters.autogen.adapter import _langchain_to_autogen
|
||||
|
||||
mock_tool = MagicMock()
|
||||
mock_tool.name = "delegate"
|
||||
mock_tool.description = "Delegate a task."
|
||||
mock_tool.ainvoke = AsyncMock(return_value={"success": True})
|
||||
|
||||
wrapped = _langchain_to_autogen(mock_tool)
|
||||
# FunctionTool.run_json expects a JSON dict with the function params
|
||||
result = await wrapped.run_json({"input": '{"workspace_id": "ws-1", "task": "do stuff"}'}, cancellation_token=None)
|
||||
mock_tool.ainvoke.assert_called_once_with({"workspace_id": "ws-1", "task": "do stuff"})
|
||||
assert "True" in str(result)
|
||||
@ -1,146 +0,0 @@
|
||||
"""Tests for Hermes escalation-ladder classification and config parsing.
|
||||
|
||||
The truth table in ``should_escalate`` is the single chokepoint that
|
||||
decides whether an inference failure wastes the next ladder rung's
|
||||
quota or triggers a useful retry. These tests pin that table against
|
||||
real exception shapes from anthropic / openai / google-genai SDKs and
|
||||
the wrapped-error strings we've observed in platform logs.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Make the workspace-template/ modules importable without installing.
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||
|
||||
from adapters.hermes.escalation import ( # noqa: E402
|
||||
LadderRung,
|
||||
parse_ladder,
|
||||
should_escalate,
|
||||
)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# parse_ladder
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
def test_parse_ladder_empty_returns_empty():
|
||||
assert parse_ladder(None) == []
|
||||
assert parse_ladder([]) == []
|
||||
|
||||
|
||||
def test_parse_ladder_accepts_dicts():
|
||||
raw = [
|
||||
{"provider": "gemini", "model": "gemini-2.5-flash"},
|
||||
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
|
||||
]
|
||||
rungs = parse_ladder(raw)
|
||||
assert len(rungs) == 2
|
||||
assert rungs[0] == LadderRung("gemini", "gemini-2.5-flash")
|
||||
assert rungs[1] == LadderRung("anthropic", "claude-opus-4-1-20250805")
|
||||
|
||||
|
||||
def test_parse_ladder_passes_through_rung_instances():
|
||||
# Programmatic callers can pass already-constructed rungs.
|
||||
existing = LadderRung("openai", "gpt-4o-mini")
|
||||
rungs = parse_ladder([existing])
|
||||
assert rungs == [existing]
|
||||
|
||||
|
||||
def test_parse_ladder_skips_malformed_entries():
|
||||
# Missing model / missing provider / wrong type — all skipped with
|
||||
# a warning, not raised. A missing rung is less bad than a boot fail.
|
||||
raw = [
|
||||
{"provider": "gemini"}, # no model
|
||||
{"model": "gpt-4o"}, # no provider
|
||||
"not a dict", # wrong type
|
||||
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"}, # good
|
||||
]
|
||||
rungs = parse_ladder(raw)
|
||||
assert len(rungs) == 1
|
||||
assert rungs[0].provider == "anthropic"
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------
|
||||
# should_escalate — truth table
|
||||
# --------------------------------------------------------------------------
|
||||
|
||||
class _FakeRateLimitError(Exception):
|
||||
"""Stand-in with the same class name the openai SDK uses (rate limits)."""
|
||||
pass
|
||||
_FakeRateLimitError.__name__ = "RateLimitError"
|
||||
|
||||
|
||||
class _FakeOverloadedError(Exception):
|
||||
"""Stand-in for anthropic.OverloadedError (HTTP 529)."""
|
||||
pass
|
||||
_FakeOverloadedError.__name__ = "OverloadedError"
|
||||
|
||||
|
||||
class _FakeAPITimeoutError(Exception):
|
||||
pass
|
||||
_FakeAPITimeoutError.__name__ = "APITimeoutError"
|
||||
|
||||
|
||||
class _FakeAPIConnectionError(Exception):
|
||||
pass
|
||||
_FakeAPIConnectionError.__name__ = "APIConnectionError"
|
||||
|
||||
|
||||
class _FakeInternalServerError(Exception):
|
||||
pass
|
||||
_FakeInternalServerError.__name__ = "InternalServerError"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("exc,expected", [
|
||||
# --- Escalatable: typed rate-limit / overload / timeout classes ---
|
||||
(_FakeRateLimitError("rate_limit_exceeded on gpt-4o"), True),
|
||||
(_FakeOverloadedError("overloaded_error"), True),
|
||||
(_FakeAPITimeoutError("Request timed out."), True),
|
||||
(_FakeAPIConnectionError("Connection error."), True),
|
||||
(_FakeInternalServerError("Internal server error 500."), True),
|
||||
|
||||
# --- Escalatable: context-length exceeded on current model ---
|
||||
(ValueError("This model's maximum context length is 200000 tokens. However, your messages resulted in ..."), True),
|
||||
(RuntimeError("error: context_length_exceeded"), True),
|
||||
(RuntimeError("prompt is too long: 210000 tokens"), True),
|
||||
(RuntimeError("error.type: prompt_too_long"), True),
|
||||
(RuntimeError("exceeds model context window of 1048576"), True),
|
||||
|
||||
# --- Escalatable: gateway markers (HTTP-wrapped) ---
|
||||
(RuntimeError("Upstream 502 Bad Gateway"), True),
|
||||
(RuntimeError("503 Service Unavailable"), True),
|
||||
(RuntimeError("Service is temporarily unavailable, please try again."), True),
|
||||
(RuntimeError("Anthropic API is overloaded."), True),
|
||||
|
||||
# --- Escalatable: status-code substrings ---
|
||||
(RuntimeError("HTTP 429 Too Many Requests"), True),
|
||||
(RuntimeError("HTTP 529 Overloaded"), True),
|
||||
|
||||
# --- NOT escalatable: auth / permission (config bugs, wasting quota) ---
|
||||
(RuntimeError("401 Unauthorized — invalid api key"), False),
|
||||
(RuntimeError("403 Forbidden: permission_denied"), False),
|
||||
(RuntimeError("authentication_error: invalid_api_key"), False),
|
||||
|
||||
# --- NOT escalatable: auth-wrapped rate-limit (priority = hard-reject auth) ---
|
||||
# If we see '401' + rate-limit markers simultaneously, prefer not escalating
|
||||
# because the underlying 401 won't get better on a different model.
|
||||
(_FakeRateLimitError("RateLimitError wrapping 401 Unauthorized"), False),
|
||||
|
||||
# --- NOT escalatable: unrelated errors ---
|
||||
(ValueError("bad config"), False),
|
||||
(KeyError("missing key"), False),
|
||||
(None, False),
|
||||
])
|
||||
def test_should_escalate_truth_table(exc, expected):
|
||||
assert should_escalate(exc) is expected
|
||||
|
||||
|
||||
def test_should_escalate_case_insensitive():
|
||||
# We lowercase the message before substring matching so "OVERLOADED"
|
||||
# from one provider and "overloaded" from another both match.
|
||||
assert should_escalate(RuntimeError("SERVICE OVERLOADED")) is True
|
||||
assert should_escalate(RuntimeError("503 SERVICE UNAVAILABLE")) is True
|
||||
@ -1,160 +0,0 @@
|
||||
"""Integration-ish tests for the Hermes executor's escalation behaviour.
|
||||
|
||||
These tests exercise ``_do_inference`` against a mocked ``_dispatch``
|
||||
to prove that:
|
||||
- No-ladder path is a single call (original behaviour)
|
||||
- Ladder path retries on escalatable errors
|
||||
- Ladder path stops early on non-escalatable errors
|
||||
- Ladder path raises the last error when every rung fails
|
||||
- Successful rung logs the recovery and returns
|
||||
|
||||
No network calls, no provider SDKs. If this ever starts calling real
|
||||
providers, that's a test-isolation regression worth flagging.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
|
||||
|
||||
from adapters.hermes.escalation import LadderRung # noqa: E402
|
||||
from adapters.hermes.executor import HermesA2AExecutor # noqa: E402
|
||||
from adapters.hermes.providers import PROVIDERS # noqa: E402
|
||||
|
||||
|
||||
class _FakeRateLimitError(Exception):
|
||||
pass
|
||||
_FakeRateLimitError.__name__ = "RateLimitError"
|
||||
|
||||
|
||||
def _make_executor(monkeypatch, dispatch_behaviour, ladder=None):
|
||||
"""Build an executor with a mocked ``_dispatch``.
|
||||
|
||||
``dispatch_behaviour`` is a callable that receives (cfg, model, user_msg,
|
||||
history, system_prompt) and returns a string OR raises. Use this to
|
||||
simulate success / failure per rung.
|
||||
"""
|
||||
cfg = PROVIDERS["anthropic"]
|
||||
ex = HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key="test-key",
|
||||
model="claude-haiku-4-5-20251001",
|
||||
escalation_ladder=ladder,
|
||||
)
|
||||
|
||||
calls: list[tuple[str, str]] = []
|
||||
|
||||
async def fake_dispatch(cfg, model, user_msg, history, system_prompt):
|
||||
calls.append((cfg.name, model))
|
||||
result = dispatch_behaviour(cfg.name, model, user_msg, history, system_prompt)
|
||||
if isinstance(result, BaseException):
|
||||
raise result
|
||||
return result
|
||||
|
||||
monkeypatch.setattr(ex, "_dispatch", fake_dispatch)
|
||||
return ex, calls
|
||||
|
||||
|
||||
def _run(coro):
|
||||
return asyncio.get_event_loop().run_until_complete(coro) if not asyncio._get_running_loop() else asyncio.run(coro)
|
||||
|
||||
|
||||
def test_no_ladder_single_call(monkeypatch):
|
||||
ex, calls = _make_executor(monkeypatch, lambda *_: "hello", ladder=None)
|
||||
reply = asyncio.run(ex._do_inference("test"))
|
||||
assert reply == "hello"
|
||||
assert calls == [("anthropic", "claude-haiku-4-5-20251001")]
|
||||
|
||||
|
||||
def test_ladder_not_triggered_on_success(monkeypatch):
|
||||
# Ladder configured, but first attempt succeeds — ladder never engaged.
|
||||
ladder = [
|
||||
{"provider": "openai", "model": "gpt-4o-mini"},
|
||||
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
|
||||
]
|
||||
ex, calls = _make_executor(monkeypatch, lambda *_: "fast reply", ladder=ladder)
|
||||
reply = asyncio.run(ex._do_inference("test"))
|
||||
assert reply == "fast reply"
|
||||
assert len(calls) == 1
|
||||
assert calls[0] == ("anthropic", "claude-haiku-4-5-20251001") # pinned (haiku) wins
|
||||
|
||||
|
||||
def test_ladder_escalates_on_rate_limit(monkeypatch):
|
||||
# First rung rate-limits, second rung (opus) succeeds.
|
||||
attempt = {"n": 0}
|
||||
|
||||
def behaviour(provider, model, *_):
|
||||
attempt["n"] += 1
|
||||
if attempt["n"] == 1:
|
||||
return _FakeRateLimitError("429 rate_limit_exceeded on anthropic")
|
||||
return f"escalated reply from {provider}:{model}"
|
||||
|
||||
ladder = [
|
||||
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
|
||||
]
|
||||
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
|
||||
reply = asyncio.run(ex._do_inference("test"))
|
||||
assert "escalated reply" in reply
|
||||
# Two attempts: pinned haiku (failed), then opus (succeeded).
|
||||
assert [model for _, model in calls] == [
|
||||
"claude-haiku-4-5-20251001",
|
||||
"claude-opus-4-1-20250805",
|
||||
]
|
||||
|
||||
|
||||
def test_ladder_stops_on_non_escalatable_error(monkeypatch):
|
||||
# First rung returns a 401 — ladder should NOT retry, should raise.
|
||||
def behaviour(*_):
|
||||
return RuntimeError("401 Unauthorized invalid api key")
|
||||
|
||||
ladder = [{"provider": "anthropic", "model": "claude-opus-4-1-20250805"}]
|
||||
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
|
||||
|
||||
with pytest.raises(RuntimeError, match="401"):
|
||||
asyncio.run(ex._do_inference("test"))
|
||||
|
||||
# Only one attempt — non-escalatable error stopped the walk.
|
||||
assert len(calls) == 1
|
||||
|
||||
|
||||
def test_ladder_raises_last_error_when_all_rungs_fail(monkeypatch):
|
||||
def behaviour(*_):
|
||||
return _FakeRateLimitError("429 across the board")
|
||||
|
||||
ladder = [
|
||||
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
|
||||
]
|
||||
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
|
||||
|
||||
with pytest.raises(_FakeRateLimitError):
|
||||
asyncio.run(ex._do_inference("test"))
|
||||
|
||||
# Both rungs attempted (pinned + one from ladder).
|
||||
assert len(calls) == 2
|
||||
|
||||
|
||||
def test_ladder_skips_unknown_provider(monkeypatch):
|
||||
# A misconfigured rung with a non-existent provider is logged + skipped;
|
||||
# ladder still walks remaining rungs.
|
||||
def behaviour(provider, *_):
|
||||
if provider == "anthropic":
|
||||
return _FakeRateLimitError("first rung rate limit")
|
||||
return f"ok from {provider}"
|
||||
|
||||
ladder = [
|
||||
{"provider": "totally_made_up", "model": "fake-1"}, # should be skipped
|
||||
{"provider": "anthropic", "model": "claude-opus-4-1-20250805"},
|
||||
]
|
||||
ex, calls = _make_executor(monkeypatch, behaviour, ladder=ladder)
|
||||
|
||||
# First attempt uses the pinned (haiku) which raises, then skips
|
||||
# totally_made_up, then reaches opus. Because behaviour returns ok for
|
||||
# provider==anthropic, the opus rung also fails (same provider). Assert
|
||||
# the skip happened (call count reflects 2 real attempts, not 3).
|
||||
with pytest.raises(_FakeRateLimitError):
|
||||
asyncio.run(ex._do_inference("test"))
|
||||
assert len(calls) == 2 # pinned + opus (totally_made_up skipped)
|
||||
@ -1,487 +0,0 @@
|
||||
"""Tests for Phase 2 auth_scheme dispatch in adapters/hermes/executor.py.
|
||||
|
||||
These cover the NEW behavior only (HermesA2AExecutor._do_inference dispatch
|
||||
based on ProviderConfig.auth_scheme). Phase 1 registry tests live in
|
||||
test_hermes_providers.py — unchanged by Phase 2.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
# Load providers.py + escalation.py directly (same pattern as
|
||||
# test_hermes_providers.py). The escalation module landed with the
|
||||
# ladder work — it's now imported by executor.py, so the inline-exec
|
||||
# pattern below has to find both modules at top level.
|
||||
_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes"
|
||||
sys.path.insert(0, str(_HERMES_DIR))
|
||||
import providers # type: ignore # noqa: E402
|
||||
import escalation # type: ignore # noqa: E402
|
||||
|
||||
|
||||
def _make_executor(provider_name: str):
|
||||
"""Build a HermesA2AExecutor directly without going through create_executor.
|
||||
|
||||
We import executor lazily inside the function because the module-level
|
||||
import chain (``from .providers import ...``) uses a relative import that
|
||||
only resolves when loaded as part of the ``adapters.hermes`` package.
|
||||
The test loads it via direct sys.path manipulation, which bypasses the
|
||||
package loader, so we import providers-as-sibling and then reconstruct
|
||||
the executor with the same shape.
|
||||
"""
|
||||
# We can't import executor.py directly due to the relative-import head,
|
||||
# so instantiate the executor class by replaying its definition inline.
|
||||
# Simpler: test the dispatch logic via providers.PROVIDERS + the public
|
||||
# resolve helpers, plus a mock for the inference methods.
|
||||
cfg = providers.PROVIDERS[provider_name]
|
||||
# Reach into executor via sys.path trick
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"hermes_executor_under_test",
|
||||
_HERMES_DIR / "executor.py",
|
||||
)
|
||||
# The executor module has a relative import `from .providers import ...`
|
||||
# which fails under direct spec_from_file_location. Monkey-patch sys.modules
|
||||
# so the relative import resolves to our directly-loaded providers module.
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
# Also alias the package-style import path so `from .providers import X`
|
||||
# and `from .escalation import X` inside executor.py find them.
|
||||
pkg_name = "hermes_executor_under_test"
|
||||
sys.modules.setdefault(pkg_name, MagicMock())
|
||||
sys.modules[pkg_name].providers = providers # type: ignore
|
||||
sys.modules[pkg_name].escalation = escalation # type: ignore
|
||||
# Read + compile executor.py with relative imports rewritten to match
|
||||
# the sibling-import setup above.
|
||||
src = (_HERMES_DIR / "executor.py").read_text()
|
||||
src = src.replace("from .providers import", "from providers import")
|
||||
src = src.replace("from .escalation import", "from escalation import")
|
||||
# The exec'd module needs `__name__` in its globals because executor.py
|
||||
# calls ``logging.getLogger(__name__)`` at import time. Without this the
|
||||
# exec fails with `KeyError: "'__name__' not in globals"`.
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
return HermesA2AExecutor(
|
||||
provider_cfg=cfg,
|
||||
api_key="test-key",
|
||||
model=cfg.default_model,
|
||||
)
|
||||
|
||||
|
||||
def test_anthropic_entry_has_anthropic_scheme():
|
||||
"""Phase 2a: anthropic's auth_scheme is 'anthropic'."""
|
||||
cfg = providers.PROVIDERS["anthropic"]
|
||||
assert cfg.auth_scheme == "anthropic"
|
||||
|
||||
|
||||
def test_gemini_entry_has_gemini_scheme():
|
||||
"""Phase 2b: gemini's auth_scheme is 'gemini'."""
|
||||
cfg = providers.PROVIDERS["gemini"]
|
||||
assert cfg.auth_scheme == "gemini"
|
||||
# Base URL no longer has the /v1beta/openai suffix — native SDK uses bare host.
|
||||
assert "/openai" not in cfg.base_url
|
||||
assert cfg.base_url.startswith("https://generativelanguage.googleapis.com")
|
||||
|
||||
|
||||
def test_all_other_providers_still_openai_scheme():
|
||||
"""Phase 2 changes only anthropic + gemini. Every other provider keeps auth_scheme='openai'."""
|
||||
native_providers = {"anthropic", "gemini"}
|
||||
for name, cfg in providers.PROVIDERS.items():
|
||||
if name in native_providers:
|
||||
continue
|
||||
assert cfg.auth_scheme == "openai", (
|
||||
f"{name} unexpectedly has auth_scheme={cfg.auth_scheme!r}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_openai_scheme_calls_openai_compat():
|
||||
"""auth_scheme='openai' → _do_openai_compat runs, native paths do not."""
|
||||
executor = _make_executor("openai")
|
||||
executor._do_openai_compat = AsyncMock(return_value="openai-result")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
|
||||
executor._do_gemini_native = AsyncMock(return_value="should-not-run")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
# Phase 2c: _do_inference passes (user_message, history) to the path;
|
||||
# when no history supplied, second arg is None.
|
||||
executor._do_openai_compat.assert_awaited_once_with("hello", None, None)
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
executor._do_gemini_native.assert_not_awaited()
|
||||
assert result == "openai-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_anthropic_scheme_calls_anthropic_native():
|
||||
"""auth_scheme='anthropic' → _do_anthropic_native runs, others do not."""
|
||||
executor = _make_executor("anthropic")
|
||||
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="anthropic-result")
|
||||
executor._do_gemini_native = AsyncMock(return_value="should-not-run")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_anthropic_native.assert_awaited_once_with("hello", None, None)
|
||||
executor._do_openai_compat.assert_not_awaited()
|
||||
executor._do_gemini_native.assert_not_awaited()
|
||||
assert result == "anthropic-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_gemini_scheme_calls_gemini_native():
|
||||
"""auth_scheme='gemini' → _do_gemini_native runs, others do not. Phase 2b."""
|
||||
executor = _make_executor("gemini")
|
||||
executor._do_openai_compat = AsyncMock(return_value="should-not-run")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="should-not-run")
|
||||
executor._do_gemini_native = AsyncMock(return_value="gemini-result")
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_gemini_native.assert_awaited_once_with("hello", None, None)
|
||||
executor._do_openai_compat.assert_not_awaited()
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
assert result == "gemini-result"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 2c — history-to-message conversion tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_history_to_openai_messages_empty_history():
|
||||
"""No history → single user message (back-compat with pre-2c single-turn shape)."""
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
|
||||
msgs = HermesA2AExecutor._history_to_openai_messages("current turn", [])
|
||||
assert msgs == [{"role": "user", "content": "current turn"}]
|
||||
|
||||
|
||||
def test_history_to_openai_messages_multi_turn():
|
||||
"""A2A history roles map: human→user, ai→assistant. Current turn appended as user."""
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
|
||||
history = [("human", "first question"), ("ai", "first answer"), ("human", "follow-up")]
|
||||
msgs = HermesA2AExecutor._history_to_openai_messages("current turn", history)
|
||||
assert msgs == [
|
||||
{"role": "user", "content": "first question"},
|
||||
{"role": "assistant", "content": "first answer"},
|
||||
{"role": "user", "content": "follow-up"},
|
||||
{"role": "user", "content": "current turn"},
|
||||
]
|
||||
|
||||
|
||||
def test_history_to_anthropic_messages_same_as_openai():
|
||||
"""Anthropic Messages API uses the same wire shape as OpenAI for text-only turns."""
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
|
||||
history = [("human", "hello"), ("ai", "hi")]
|
||||
openai_msgs = HermesA2AExecutor._history_to_openai_messages("how are you?", history)
|
||||
anth_msgs = HermesA2AExecutor._history_to_anthropic_messages("how are you?", history)
|
||||
assert openai_msgs == anth_msgs
|
||||
|
||||
|
||||
def test_history_to_gemini_contents_uses_model_role_and_parts_wrapper():
|
||||
"""Gemini uses role='user'|'model' (NOT 'assistant') and wraps text in parts=[{text}]."""
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
|
||||
history = [("human", "hi"), ("ai", "hello back")]
|
||||
contents = HermesA2AExecutor._history_to_gemini_contents("follow-up?", history)
|
||||
assert contents == [
|
||||
{"role": "user", "parts": [{"text": "hi"}]},
|
||||
{"role": "model", "parts": [{"text": "hello back"}]},
|
||||
{"role": "user", "parts": [{"text": "follow-up?"}]},
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_passes_history_through():
|
||||
"""When _do_inference is called with history, it flows through to the provider path."""
|
||||
executor = _make_executor("anthropic")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="reply-with-history")
|
||||
executor._do_openai_compat = AsyncMock()
|
||||
executor._do_gemini_native = AsyncMock()
|
||||
|
||||
history = [("human", "prior q"), ("ai", "prior a")]
|
||||
result = await executor._do_inference("current", history)
|
||||
|
||||
executor._do_anthropic_native.assert_awaited_once_with("current", history, None)
|
||||
assert result == "reply-with-history"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 2d-i — system_prompt dispatch tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_passes_system_prompt_to_anthropic():
|
||||
"""system_prompt flows through _do_inference → _do_anthropic_native as third arg."""
|
||||
executor = _make_executor("anthropic")
|
||||
executor._do_anthropic_native = AsyncMock(return_value="reply")
|
||||
executor._do_openai_compat = AsyncMock()
|
||||
executor._do_gemini_native = AsyncMock()
|
||||
|
||||
await executor._do_inference("user msg", None, "you are a helpful assistant")
|
||||
executor._do_anthropic_native.assert_awaited_once_with(
|
||||
"user msg", None, "you are a helpful assistant"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_passes_system_prompt_to_gemini():
|
||||
"""system_prompt flows through _do_inference → _do_gemini_native as third arg."""
|
||||
executor = _make_executor("gemini")
|
||||
executor._do_gemini_native = AsyncMock(return_value="reply")
|
||||
executor._do_openai_compat = AsyncMock()
|
||||
executor._do_anthropic_native = AsyncMock()
|
||||
|
||||
await executor._do_inference("user msg", None, "system instruction")
|
||||
executor._do_gemini_native.assert_awaited_once_with(
|
||||
"user msg", None, "system instruction"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_passes_system_prompt_to_openai():
|
||||
"""system_prompt flows through _do_inference → _do_openai_compat as third arg."""
|
||||
executor = _make_executor("openai")
|
||||
executor._do_openai_compat = AsyncMock(return_value="reply")
|
||||
executor._do_anthropic_native = AsyncMock()
|
||||
executor._do_gemini_native = AsyncMock()
|
||||
|
||||
await executor._do_inference("user msg", None, "system prompt")
|
||||
executor._do_openai_compat.assert_awaited_once_with(
|
||||
"user msg", None, "system prompt"
|
||||
)
|
||||
|
||||
|
||||
def test_executor_accepts_config_path_kwarg():
|
||||
"""HermesA2AExecutor.__init__ accepts config_path and stores it on _config_path."""
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
HermesA2AExecutor = ns["HermesA2AExecutor"]
|
||||
cfg = providers.PROVIDERS["openai"]
|
||||
|
||||
# Without config_path — default None
|
||||
e1 = HermesA2AExecutor(provider_cfg=cfg, api_key="k", model="m")
|
||||
assert e1._config_path is None
|
||||
|
||||
# With config_path
|
||||
e2 = HermesA2AExecutor(
|
||||
provider_cfg=cfg, api_key="k", model="m", config_path="/configs"
|
||||
)
|
||||
assert e2._config_path == "/configs"
|
||||
|
||||
|
||||
def test_create_executor_forwards_config_path():
|
||||
"""create_executor(config_path=...) → executor._config_path gets set.
|
||||
|
||||
Exercises both the hermes_api_key back-compat path AND the registry
|
||||
resolution path to make sure config_path threads through both.
|
||||
"""
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
create_executor = ns["create_executor"]
|
||||
|
||||
# Path 1: hermes_api_key
|
||||
e1 = create_executor(hermes_api_key="k", config_path="/path/a")
|
||||
assert e1._config_path == "/path/a"
|
||||
|
||||
# Path 2: registry resolution
|
||||
import os
|
||||
os.environ["OPENAI_API_KEY"] = "openai-test"
|
||||
try:
|
||||
e2 = create_executor(provider="openai", config_path="/path/b")
|
||||
assert e2._config_path == "/path/b"
|
||||
finally:
|
||||
os.environ.pop("OPENAI_API_KEY", None)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dispatch_unknown_scheme_falls_back_to_openai_compat():
|
||||
"""Unknown auth_scheme → log a warning + fall back to openai-compat (forward-compat)."""
|
||||
executor = _make_executor("openai")
|
||||
# Mutate the cfg field to simulate an unknown scheme (testing the dispatch, not the registry)
|
||||
executor.provider_cfg = providers.ProviderConfig(
|
||||
name="futureprovider",
|
||||
env_vars=("FOO",),
|
||||
base_url="https://example.com/v1",
|
||||
default_model="foo",
|
||||
auth_scheme="some_future_scheme",
|
||||
)
|
||||
executor._do_openai_compat = AsyncMock(return_value="fallback-result")
|
||||
executor._do_anthropic_native = AsyncMock()
|
||||
executor._do_gemini_native = AsyncMock()
|
||||
|
||||
result = await executor._do_inference("hello")
|
||||
|
||||
executor._do_openai_compat.assert_awaited_once()
|
||||
executor._do_anthropic_native.assert_not_awaited()
|
||||
executor._do_gemini_native.assert_not_awaited()
|
||||
assert result == "fallback-result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_native_raises_clear_error_when_sdk_missing(monkeypatch):
|
||||
"""If the anthropic package is not installed, _do_anthropic_native raises
|
||||
a clear RuntimeError with install instructions — it does NOT silently
|
||||
fall back to the OpenAI-compat shim (which would lose tool-calling +
|
||||
vision fidelity invisibly).
|
||||
"""
|
||||
executor = _make_executor("anthropic")
|
||||
|
||||
# Simulate ImportError on `import anthropic`. We do this by clobbering
|
||||
# the name in sys.modules so the import statement inside
|
||||
# _do_anthropic_native hits an ImportError.
|
||||
monkeypatch.setitem(sys.modules, "anthropic", None)
|
||||
|
||||
with pytest.raises(RuntimeError, match="anthropic"):
|
||||
await executor._do_anthropic_native("hello")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_gemini_native_raises_clear_error_when_sdk_missing(monkeypatch):
|
||||
"""If the google-genai package is not installed, _do_gemini_native raises
|
||||
a clear RuntimeError with install instructions — same fail-loud semantics
|
||||
as the anthropic native path."""
|
||||
executor = _make_executor("gemini")
|
||||
|
||||
# Simulate ImportError on `from google import genai`. Clobbering
|
||||
# sys.modules["google"] forces the submodule import to fail.
|
||||
monkeypatch.setitem(sys.modules, "google", None)
|
||||
|
||||
with pytest.raises(RuntimeError, match="google-genai"):
|
||||
await executor._do_gemini_native("hello")
|
||||
|
||||
|
||||
def test_create_executor_passes_provider_cfg():
|
||||
"""create_executor's back-compat paths should set .provider_cfg on the
|
||||
returned executor so dispatch has auth_scheme available at runtime."""
|
||||
# Direct-load executor module same way _make_executor does
|
||||
import importlib.util
|
||||
src = (_HERMES_DIR / "executor.py").read_text().replace(
|
||||
"from .providers import", "from providers import"
|
||||
).replace(
|
||||
"from .escalation import", "from escalation import"
|
||||
)
|
||||
# `__name__` needed because executor.py does logging.getLogger(__name__)
|
||||
# at import time. `escalation` + `providers` must also be importable
|
||||
# at the top level — the caller handles sys.path for that.
|
||||
sys.modules.setdefault("hermes_executor_under_test", MagicMock())
|
||||
sys.modules["hermes_executor_under_test.providers"] = providers
|
||||
sys.modules["hermes_executor_under_test.escalation"] = escalation
|
||||
ns: dict = {"__name__": "hermes_executor_under_test"}
|
||||
exec(compile(src, str(_HERMES_DIR / "executor.py"), "exec"), ns)
|
||||
create_executor = ns["create_executor"]
|
||||
|
||||
# Path 1: hermes_api_key back-compat → nous_portal cfg
|
||||
exec1 = create_executor(hermes_api_key="test-key")
|
||||
assert exec1.provider_cfg.name == "nous_portal"
|
||||
assert exec1.provider_cfg.auth_scheme == "openai"
|
||||
|
||||
# Path 2: explicit provider name → that cfg (anthropic has the new scheme)
|
||||
import os
|
||||
os.environ["ANTHROPIC_API_KEY"] = "ant-test"
|
||||
try:
|
||||
exec2 = create_executor(provider="anthropic")
|
||||
assert exec2.provider_cfg.name == "anthropic"
|
||||
assert exec2.provider_cfg.auth_scheme == "anthropic"
|
||||
assert exec2.model == "claude-sonnet-4-5"
|
||||
finally:
|
||||
os.environ.pop("ANTHROPIC_API_KEY", None)
|
||||
|
||||
# Path 3: Phase 2b — gemini explicit resolution
|
||||
os.environ["GEMINI_API_KEY"] = "gem-test"
|
||||
try:
|
||||
exec3 = create_executor(provider="gemini")
|
||||
assert exec3.provider_cfg.name == "gemini"
|
||||
assert exec3.provider_cfg.auth_scheme == "gemini"
|
||||
assert exec3.model == "gemini-2.5-flash"
|
||||
finally:
|
||||
os.environ.pop("GEMINI_API_KEY", None)
|
||||
@ -1,182 +0,0 @@
|
||||
"""Tests for workspace-template/adapters/hermes/providers.py.
|
||||
|
||||
These tests exercise resolve_provider() in isolation — they do not import
|
||||
anything from adapters/__init__.py so they don't need the a2a runtime deps.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
# Make the hermes package importable without pulling in adapters/__init__.py
|
||||
# (which imports the a2a SDK). We load providers.py directly from its file path.
|
||||
_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes"
|
||||
sys.path.insert(0, str(_HERMES_DIR))
|
||||
import providers # type: ignore # noqa: E402
|
||||
|
||||
|
||||
_ALL_PROVIDER_ENV_VARS = (
|
||||
"HERMES_API_KEY",
|
||||
"NOUS_API_KEY",
|
||||
"OPENROUTER_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"ANTHROPIC_API_KEY",
|
||||
"XAI_API_KEY",
|
||||
"GROK_API_KEY",
|
||||
"GEMINI_API_KEY",
|
||||
"GOOGLE_API_KEY",
|
||||
"QWEN_API_KEY",
|
||||
"DASHSCOPE_API_KEY",
|
||||
"GLM_API_KEY",
|
||||
"ZHIPU_API_KEY",
|
||||
"KIMI_API_KEY",
|
||||
"MOONSHOT_API_KEY",
|
||||
"MINIMAX_API_KEY",
|
||||
"DEEPSEEK_API_KEY",
|
||||
"GROQ_API_KEY",
|
||||
"TOGETHER_API_KEY",
|
||||
"FIREWORKS_API_KEY",
|
||||
"MISTRAL_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_env():
|
||||
"""Clear every provider env var before each test and restore to the
|
||||
exact pre-test state on teardown.
|
||||
|
||||
Implementation note: earlier version used pytest's monkeypatch fixture,
|
||||
which tracks deltas from the state at fixture entry. That was buggy
|
||||
because several tests in this file mutate os.environ directly
|
||||
(os.environ["HERMES_API_KEY"] = ...), bypassing monkeypatch's
|
||||
tracking. The direct mutations leaked into the NEXT test file
|
||||
(test_hermes_smoke.py::test_create_executor_raises_without_keys),
|
||||
causing a file-order-dependent failure. Pure snapshot/restore
|
||||
avoids all the delta-tracking edge cases.
|
||||
"""
|
||||
saved = {k: os.environ.get(k) for k in _ALL_PROVIDER_ENV_VARS}
|
||||
for k in _ALL_PROVIDER_ENV_VARS:
|
||||
os.environ.pop(k, None)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
for k, v in saved.items():
|
||||
if v is None:
|
||||
os.environ.pop(k, None)
|
||||
else:
|
||||
os.environ[k] = v
|
||||
|
||||
|
||||
def test_registry_is_populated():
|
||||
"""Phase 1 ships at least 15 providers and every entry is self-consistent."""
|
||||
assert len(providers.PROVIDERS) >= 15
|
||||
assert len(providers.RESOLUTION_ORDER) == len(providers.PROVIDERS)
|
||||
for name, cfg in providers.PROVIDERS.items():
|
||||
assert cfg.name == name, f"{name}: config.name should match dict key"
|
||||
assert cfg.env_vars, f"{name}: must declare at least one env var"
|
||||
assert cfg.base_url.startswith("http"), f"{name}: base_url must be http(s)"
|
||||
assert cfg.default_model, f"{name}: must declare a default model"
|
||||
assert name in providers.RESOLUTION_ORDER, f"{name}: missing from resolution order"
|
||||
|
||||
|
||||
def test_resolution_order_has_no_duplicates():
|
||||
assert len(providers.RESOLUTION_ORDER) == len(set(providers.RESOLUTION_ORDER))
|
||||
|
||||
|
||||
def test_backcompat_hermes_api_key_first():
|
||||
"""PR 2 back-compat — HERMES_API_KEY auto-detect still routes to Nous Portal."""
|
||||
os.environ["HERMES_API_KEY"] = "hermes-test-key"
|
||||
cfg, key = providers.resolve_provider()
|
||||
assert cfg.name == "nous_portal"
|
||||
assert key == "hermes-test-key"
|
||||
|
||||
|
||||
def test_backcompat_openrouter_api_key_second():
|
||||
"""PR 2 back-compat — OPENROUTER_API_KEY still routes to OpenRouter when HERMES_API_KEY is absent."""
|
||||
os.environ["OPENROUTER_API_KEY"] = "or-test-key"
|
||||
cfg, key = providers.resolve_provider()
|
||||
assert cfg.name == "openrouter"
|
||||
|
||||
|
||||
def test_auto_detect_openai():
|
||||
os.environ["OPENAI_API_KEY"] = "sk-test"
|
||||
cfg, key = providers.resolve_provider()
|
||||
assert cfg.name == "openai"
|
||||
assert cfg.base_url == "https://api.openai.com/v1"
|
||||
|
||||
|
||||
def test_auto_detect_anthropic():
|
||||
os.environ["ANTHROPIC_API_KEY"] = "ant-test"
|
||||
cfg, key = providers.resolve_provider()
|
||||
assert cfg.name == "anthropic"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"env_var,expected",
|
||||
[
|
||||
("XAI_API_KEY", "xai"),
|
||||
("GROK_API_KEY", "xai"),
|
||||
("QWEN_API_KEY", "qwen"),
|
||||
("DASHSCOPE_API_KEY", "qwen"),
|
||||
("GLM_API_KEY", "glm"),
|
||||
("ZHIPU_API_KEY", "glm"),
|
||||
("KIMI_API_KEY", "kimi"),
|
||||
("MOONSHOT_API_KEY", "kimi"),
|
||||
("GROQ_API_KEY", "groq"),
|
||||
("DEEPSEEK_API_KEY", "deepseek"),
|
||||
("MISTRAL_API_KEY", "mistral"),
|
||||
("TOGETHER_API_KEY", "together"),
|
||||
("FIREWORKS_API_KEY", "fireworks"),
|
||||
("MINIMAX_API_KEY", "minimax"),
|
||||
("GEMINI_API_KEY", "gemini"),
|
||||
("GOOGLE_API_KEY", "gemini"),
|
||||
],
|
||||
)
|
||||
def test_every_provider_env_var_resolves(env_var, expected):
|
||||
"""Every env var listed in PROVIDERS resolves to the right provider
|
||||
— this guards against typos in the registry dict."""
|
||||
os.environ[env_var] = "test-key"
|
||||
cfg, _ = providers.resolve_provider()
|
||||
assert cfg.name == expected, (
|
||||
f"{env_var} should route to {expected}, got {cfg.name}"
|
||||
)
|
||||
|
||||
|
||||
def test_explicit_provider_wins_over_auto_detect():
|
||||
"""When `provider=` is given, auto-detect is bypassed."""
|
||||
os.environ["HERMES_API_KEY"] = "hermes-key" # would auto-detect
|
||||
os.environ["OPENAI_API_KEY"] = "openai-key"
|
||||
cfg, key = providers.resolve_provider("openai")
|
||||
assert cfg.name == "openai"
|
||||
assert key == "openai-key"
|
||||
|
||||
|
||||
def test_unknown_provider_raises():
|
||||
with pytest.raises(ValueError, match="Unknown Hermes provider"):
|
||||
providers.resolve_provider("this_provider_does_not_exist")
|
||||
|
||||
|
||||
def test_explicit_provider_with_missing_env_raises():
|
||||
"""If the operator asks for a specific provider but its env var is empty,
|
||||
we raise — we do NOT fall back to auto-detect because that would be
|
||||
surprising ("why is my openai config talking to anthropic?")."""
|
||||
os.environ["HERMES_API_KEY"] = "some-value" # auto-detect would succeed
|
||||
with pytest.raises(ValueError, match="no env var set"):
|
||||
providers.resolve_provider("anthropic")
|
||||
|
||||
|
||||
def test_auto_detect_with_no_env_lists_all_options():
|
||||
"""The error message should list every env var the caller could set,
|
||||
so operators don't have to read the source."""
|
||||
# No env vars set (autouse fixture clears them all)
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
providers.resolve_provider()
|
||||
msg = str(exc_info.value)
|
||||
# Spot-check: the message names at least a few providers
|
||||
for env_var in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "QWEN_API_KEY"):
|
||||
assert env_var in msg, f"error message should mention {env_var}"
|
||||
@ -1,84 +0,0 @@
|
||||
"""Smoke tests for adapters.hermes.create_executor().
|
||||
|
||||
Verifies key resolution order and ValueError on missing keys.
|
||||
No real network calls are made — the executor object is just instantiated.
|
||||
"""
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch
|
||||
|
||||
from adapters.hermes import create_executor
|
||||
|
||||
|
||||
def test_create_executor_with_param():
|
||||
"""create_executor() works when key passed directly as param."""
|
||||
executor = create_executor(hermes_api_key="test-key-direct")
|
||||
assert executor is not None
|
||||
|
||||
|
||||
def test_create_executor_with_hermes_env():
|
||||
"""create_executor() works when HERMES_API_KEY env var is set."""
|
||||
with patch.dict(os.environ, {"HERMES_API_KEY": "test-hermes-key"}, clear=False):
|
||||
os.environ.pop("OPENROUTER_API_KEY", None)
|
||||
executor = create_executor()
|
||||
assert executor is not None
|
||||
|
||||
|
||||
def test_create_executor_falls_back_to_openrouter():
|
||||
"""create_executor() falls back to OPENROUTER_API_KEY when HERMES_API_KEY absent."""
|
||||
env = {"OPENROUTER_API_KEY": "test-openrouter-key"}
|
||||
with patch.dict(os.environ, env, clear=False):
|
||||
os.environ.pop("HERMES_API_KEY", None)
|
||||
executor = create_executor()
|
||||
assert executor is not None
|
||||
|
||||
|
||||
def test_create_executor_raises_without_keys():
|
||||
"""create_executor() raises ValueError when no keys available."""
|
||||
with patch.dict(os.environ, {}, clear=False):
|
||||
os.environ.pop("HERMES_API_KEY", None)
|
||||
os.environ.pop("OPENROUTER_API_KEY", None)
|
||||
with pytest.raises(ValueError):
|
||||
create_executor()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Additional assertions — verify key routing is correct
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_param_key_uses_nous_base_url():
|
||||
"""When called with explicit key, base_url points at Nous Portal."""
|
||||
executor = create_executor(hermes_api_key="nous-key")
|
||||
assert "nousresearch.com" in executor.base_url
|
||||
|
||||
|
||||
def test_hermes_env_uses_nous_base_url():
|
||||
"""HERMES_API_KEY maps to Nous Portal base URL."""
|
||||
with patch.dict(os.environ, {"HERMES_API_KEY": "nous-key"}, clear=False):
|
||||
os.environ.pop("OPENROUTER_API_KEY", None)
|
||||
executor = create_executor()
|
||||
assert "nousresearch.com" in executor.base_url
|
||||
|
||||
|
||||
def test_openrouter_fallback_uses_openrouter_base_url():
|
||||
"""OPENROUTER_API_KEY fallback maps to OpenRouter base URL."""
|
||||
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "or-key"}, clear=False):
|
||||
os.environ.pop("HERMES_API_KEY", None)
|
||||
executor = create_executor()
|
||||
assert "openrouter.ai" in executor.base_url
|
||||
|
||||
|
||||
def test_param_takes_priority_over_hermes_env():
|
||||
"""Explicit param overrides HERMES_API_KEY env var."""
|
||||
with patch.dict(os.environ, {"HERMES_API_KEY": "env-key"}, clear=False):
|
||||
executor = create_executor(hermes_api_key="param-key")
|
||||
assert executor.api_key == "param-key"
|
||||
|
||||
|
||||
def test_hermes_env_takes_priority_over_openrouter():
|
||||
"""HERMES_API_KEY overrides OPENROUTER_API_KEY fallback."""
|
||||
env = {"HERMES_API_KEY": "hermes-key", "OPENROUTER_API_KEY": "or-key"}
|
||||
with patch.dict(os.environ, env, clear=False):
|
||||
executor = create_executor()
|
||||
assert executor.api_key == "hermes-key"
|
||||
assert "nousresearch.com" in executor.base_url
|
||||
@ -1,167 +0,0 @@
|
||||
"""Tests for Baidu Qianfan provider support across agent.py, deepagents, and openclaw."""
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from types import ModuleType
|
||||
|
||||
import pytest
|
||||
|
||||
QIANFAN_BASE_URL = "https://qianfan.baidubce.com/v2"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _install_langgraph_mocks(monkeypatch, captured: dict):
|
||||
"""Inject lightweight langgraph + langchain_openai stubs into sys.modules."""
|
||||
prebuilt_mod = ModuleType("langgraph.prebuilt")
|
||||
|
||||
def fake_create_react_agent(*, model, tools, prompt):
|
||||
captured["react_agent"] = model
|
||||
return {"model": model}
|
||||
|
||||
prebuilt_mod.create_react_agent = fake_create_react_agent
|
||||
langgraph_mod = ModuleType("langgraph")
|
||||
monkeypatch.setitem(sys.modules, "langgraph", langgraph_mod)
|
||||
monkeypatch.setitem(sys.modules, "langgraph.prebuilt", prebuilt_mod)
|
||||
|
||||
openai_mod = ModuleType("langchain_openai")
|
||||
|
||||
class FakeChatOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
captured["llm_kwargs"] = kwargs
|
||||
|
||||
openai_mod.ChatOpenAI = FakeChatOpenAI
|
||||
monkeypatch.setitem(sys.modules, "langchain_openai", openai_mod)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Track D-1: agent.py qianfan dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestQianfanInAgent:
|
||||
"""agent.py create_agent() correctly wires Qianfan provider."""
|
||||
|
||||
def _load_agent(self, monkeypatch, captured):
|
||||
_install_langgraph_mocks(monkeypatch, captured)
|
||||
sys.modules.pop("agent", None)
|
||||
return importlib.import_module("agent")
|
||||
|
||||
def test_uses_qianfan_api_key(self, monkeypatch):
|
||||
"""QIANFAN_API_KEY is used when set."""
|
||||
captured = {}
|
||||
monkeypatch.setenv("QIANFAN_API_KEY", "qf-key-123")
|
||||
monkeypatch.delenv("AISTUDIO_API_KEY", raising=False)
|
||||
agent_mod = self._load_agent(monkeypatch, captured)
|
||||
agent_mod.create_agent("qianfan:ernie-4.5", [], "sys")
|
||||
assert captured["llm_kwargs"]["openai_api_key"] == "qf-key-123"
|
||||
|
||||
def test_falls_back_to_aistudio_api_key(self, monkeypatch):
|
||||
"""Falls back to AISTUDIO_API_KEY when QIANFAN_API_KEY is absent."""
|
||||
captured = {}
|
||||
monkeypatch.delenv("QIANFAN_API_KEY", raising=False)
|
||||
monkeypatch.setenv("AISTUDIO_API_KEY", "ai-studio-456")
|
||||
agent_mod = self._load_agent(monkeypatch, captured)
|
||||
agent_mod.create_agent("qianfan:ernie-speed", [], "sys")
|
||||
assert captured["llm_kwargs"]["openai_api_key"] == "ai-studio-456"
|
||||
|
||||
def test_uses_qianfan_base_url(self, monkeypatch):
|
||||
"""openai_api_base is always the Qianfan endpoint."""
|
||||
captured = {}
|
||||
monkeypatch.setenv("QIANFAN_API_KEY", "any-key")
|
||||
agent_mod = self._load_agent(monkeypatch, captured)
|
||||
agent_mod.create_agent("qianfan:ernie-lite", [], "sys")
|
||||
assert captured["llm_kwargs"]["openai_api_base"] == QIANFAN_BASE_URL
|
||||
|
||||
def test_model_name_stripped_of_prefix(self, monkeypatch):
|
||||
"""The model kwarg contains only the bare model name, not the prefix."""
|
||||
captured = {}
|
||||
monkeypatch.setenv("QIANFAN_API_KEY", "k")
|
||||
agent_mod = self._load_agent(monkeypatch, captured)
|
||||
agent_mod.create_agent("qianfan:ernie-4.5-turbo", [], "sys")
|
||||
assert captured["llm_kwargs"]["model"] == "ernie-4.5-turbo"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Track D-2: adapters/deepagents _create_llm qianfan dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestQianfanInDeepAgents:
|
||||
"""DeepAgents adapter._create_llm() correctly wires Qianfan provider."""
|
||||
|
||||
def _make_adapter(self, monkeypatch, captured):
|
||||
openai_mod = ModuleType("langchain_openai")
|
||||
|
||||
class FakeChatOpenAI:
|
||||
def __init__(self, **kwargs):
|
||||
captured["llm_kwargs"] = kwargs
|
||||
|
||||
openai_mod.ChatOpenAI = FakeChatOpenAI
|
||||
monkeypatch.setitem(sys.modules, "langchain_openai", openai_mod)
|
||||
from adapters.deepagents.adapter import DeepAgentsAdapter
|
||||
return DeepAgentsAdapter()
|
||||
|
||||
def test_uses_qianfan_api_key(self, monkeypatch):
|
||||
captured = {}
|
||||
monkeypatch.setenv("QIANFAN_API_KEY", "qf-deep-999")
|
||||
monkeypatch.delenv("AISTUDIO_API_KEY", raising=False)
|
||||
adapter = self._make_adapter(monkeypatch, captured)
|
||||
adapter._create_llm("qianfan:ernie-4.5")
|
||||
assert captured["llm_kwargs"]["openai_api_key"] == "qf-deep-999"
|
||||
|
||||
def test_falls_back_to_aistudio_api_key(self, monkeypatch):
|
||||
captured = {}
|
||||
monkeypatch.delenv("QIANFAN_API_KEY", raising=False)
|
||||
monkeypatch.setenv("AISTUDIO_API_KEY", "aistudio-deep-777")
|
||||
adapter = self._make_adapter(monkeypatch, captured)
|
||||
adapter._create_llm("qianfan:ernie-speed")
|
||||
assert captured["llm_kwargs"]["openai_api_key"] == "aistudio-deep-777"
|
||||
|
||||
def test_uses_qianfan_base_url(self, monkeypatch):
|
||||
captured = {}
|
||||
monkeypatch.setenv("QIANFAN_API_KEY", "k")
|
||||
adapter = self._make_adapter(monkeypatch, captured)
|
||||
adapter._create_llm("qianfan:ernie-lite")
|
||||
assert captured["llm_kwargs"]["openai_api_base"] == QIANFAN_BASE_URL
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Track D-3: adapters/openclaw provider_urls + key resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestQianfanInOpenClaw:
|
||||
"""OpenClaw adapter exposes Qianfan URL and resolves the correct API key."""
|
||||
|
||||
def _provider_urls(self):
|
||||
"""Return a copy of the provider_urls dict defined in the adapter."""
|
||||
return {
|
||||
"openai": "https://api.openai.com/v1",
|
||||
"groq": "https://api.groq.com/openai/v1",
|
||||
"openrouter": "https://openrouter.ai/api/v1",
|
||||
"qianfan": QIANFAN_BASE_URL,
|
||||
}
|
||||
|
||||
def _select_key(self, prefix: str, env: dict) -> str:
|
||||
"""Mirror the prefix-aware key selection added to openclaw/adapter.py."""
|
||||
if prefix == "qianfan":
|
||||
return env.get("QIANFAN_API_KEY", env.get("AISTUDIO_API_KEY", ""))
|
||||
return env.get("OPENAI_API_KEY", env.get("GROQ_API_KEY", env.get("OPENROUTER_API_KEY", "")))
|
||||
|
||||
def test_qianfan_url_in_provider_map(self):
|
||||
urls = self._provider_urls()
|
||||
assert "qianfan" in urls
|
||||
assert urls["qianfan"] == QIANFAN_BASE_URL
|
||||
|
||||
def test_qianfan_key_resolution_primary(self):
|
||||
key = self._select_key("qianfan", {"QIANFAN_API_KEY": "qf-oc-111"})
|
||||
assert key == "qf-oc-111"
|
||||
|
||||
def test_qianfan_key_resolution_fallback(self):
|
||||
key = self._select_key("qianfan", {"AISTUDIO_API_KEY": "as-oc-222"})
|
||||
assert key == "as-oc-222"
|
||||
|
||||
def test_non_qianfan_prefix_not_affected(self):
|
||||
"""Existing providers still resolve via OPENAI_API_KEY chain."""
|
||||
key = self._select_key("openai", {"OPENAI_API_KEY": "sk-test"})
|
||||
assert key == "sk-test"
|
||||
@ -1,189 +0,0 @@
|
||||
"""Tests for shared runtime helpers used by A2A-backed executors."""
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from adapters.shared_runtime import (
|
||||
append_peer_guidance,
|
||||
build_peer_section,
|
||||
build_task_text,
|
||||
brief_task,
|
||||
extract_history,
|
||||
extract_message_text,
|
||||
format_conversation_history,
|
||||
summarize_peer_cards,
|
||||
set_current_task,
|
||||
)
|
||||
|
||||
|
||||
def _make_context(parts=None, metadata=None):
|
||||
context = MagicMock()
|
||||
context.message.parts = parts or []
|
||||
context.metadata = metadata or {}
|
||||
return context
|
||||
|
||||
|
||||
def test_extract_message_text_prefers_text_then_root_text():
|
||||
part1 = MagicMock()
|
||||
part1.text = "Hello"
|
||||
part2 = MagicMock(spec=[])
|
||||
part2.root = SimpleNamespace(text="World")
|
||||
assert extract_message_text(_make_context([part1, part2])) == "Hello World"
|
||||
|
||||
|
||||
def test_extract_message_text_supports_dict_parts():
|
||||
parts = [{"text": "Hello"}, {"root": {"text": "World"}}]
|
||||
assert extract_message_text(parts) == "Hello World"
|
||||
|
||||
|
||||
def test_extract_history_and_formatting():
|
||||
ctx = _make_context(
|
||||
metadata={
|
||||
"history": [
|
||||
{"role": "user", "parts": [{"text": "First"}]},
|
||||
{"role": "agent", "parts": [{"text": "Second"}]},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
history = extract_history(ctx)
|
||||
|
||||
assert history == [("human", "First"), ("ai", "Second")]
|
||||
assert format_conversation_history(history) == "User: First\nAgent: Second"
|
||||
assert (
|
||||
build_task_text("Current request", history)
|
||||
== "Conversation so far:\nUser: First\nAgent: Second\n\nCurrent request: Current request"
|
||||
)
|
||||
|
||||
|
||||
def test_append_peer_guidance_is_optional():
|
||||
assert append_peer_guidance(None, "", default_text="Base", tool_name="delegate") == "Base"
|
||||
assert (
|
||||
append_peer_guidance("Base", "Peer A", default_text="Base", tool_name="delegate")
|
||||
== "Base\n\n## Peers\nPeer A\nUse delegate to communicate with them."
|
||||
)
|
||||
|
||||
|
||||
def test_summarize_peer_cards_and_render_section():
|
||||
peers = [
|
||||
{
|
||||
"id": "peer-1",
|
||||
"status": "online",
|
||||
"agent_card": {
|
||||
"name": "Alpha",
|
||||
"skills": [{"name": "research"}, {"id": "write"}],
|
||||
},
|
||||
},
|
||||
{"id": "peer-2", "status": "offline", "agent_card": None},
|
||||
]
|
||||
|
||||
assert summarize_peer_cards(peers) == [
|
||||
{
|
||||
"id": "peer-1",
|
||||
"name": "Alpha",
|
||||
"status": "online",
|
||||
"skills": ["research", "write"],
|
||||
}
|
||||
]
|
||||
|
||||
section = build_peer_section(peers)
|
||||
assert "## Your Peers" in section
|
||||
assert "**Alpha** (id: `peer-1`, status: online)" in section
|
||||
assert "Skills: research, write" in section
|
||||
assert "delegate_to_workspace" in section
|
||||
|
||||
|
||||
def test_brief_task_truncates_at_sixty_chars():
|
||||
assert brief_task("x" * 59) == "x" * 59
|
||||
assert brief_task("x" * 60) == "x" * 60
|
||||
assert brief_task("x" * 61) == ("x" * 60) + "..."
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_current_task_updates_heartbeat():
|
||||
heartbeat = SimpleNamespace(current_task="", active_tasks=0)
|
||||
|
||||
await set_current_task(heartbeat, "Working")
|
||||
assert heartbeat.current_task == "Working"
|
||||
assert heartbeat.active_tasks == 1
|
||||
|
||||
await set_current_task(heartbeat, "")
|
||||
assert heartbeat.current_task == ""
|
||||
assert heartbeat.active_tasks == 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_current_task_is_noop_for_none():
|
||||
await set_current_task(None, "Working")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_task_text() with no history
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_build_task_text_no_history_returns_user_message():
|
||||
"""When history is empty, build_task_text() returns the user_message directly."""
|
||||
result = build_task_text("What is the weather?", [])
|
||||
assert result == "What is the weather?"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# summarize_peer_cards() edge cases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_summarize_peer_cards_invalid_json_string_skipped():
|
||||
"""A peer whose agent_card is an invalid JSON string is skipped entirely."""
|
||||
peers = [
|
||||
{"id": "peer-bad", "status": "online", "agent_card": "{not valid json}"},
|
||||
{
|
||||
"id": "peer-good",
|
||||
"status": "online",
|
||||
"agent_card": {"name": "Good Peer", "skills": []},
|
||||
},
|
||||
]
|
||||
result = summarize_peer_cards(peers)
|
||||
assert len(result) == 1
|
||||
assert result[0]["id"] == "peer-good"
|
||||
|
||||
|
||||
def test_summarize_peer_cards_json_string_not_dict_skipped():
|
||||
"""A peer whose agent_card is a JSON-encoded list (not a dict) is skipped."""
|
||||
import json
|
||||
peers = [
|
||||
{"id": "peer-list", "status": "online", "agent_card": json.dumps(["skill1"])},
|
||||
{
|
||||
"id": "peer-dict",
|
||||
"status": "online",
|
||||
"agent_card": {"name": "Dict Peer", "skills": []},
|
||||
},
|
||||
]
|
||||
result = summarize_peer_cards(peers)
|
||||
assert len(result) == 1
|
||||
assert result[0]["id"] == "peer-dict"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# set_current_task() httpx exception is swallowed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_set_current_task_httpx_exception_is_silenced(monkeypatch):
|
||||
"""set_current_task() silently ignores exceptions from the httpx heartbeat push."""
|
||||
monkeypatch.setenv("WORKSPACE_ID", "ws-test")
|
||||
monkeypatch.setenv("PLATFORM_URL", "http://platform:8080")
|
||||
|
||||
mock_client = AsyncMock()
|
||||
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
||||
mock_client.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_client.post = AsyncMock(side_effect=Exception("Connection refused"))
|
||||
|
||||
# httpx is imported lazily inside the function, so patch at the httpx module level
|
||||
with patch("httpx.AsyncClient", return_value=mock_client):
|
||||
# Should not raise — exception is swallowed with pass
|
||||
heartbeat = SimpleNamespace(current_task="", active_tasks=0)
|
||||
await set_current_task(heartbeat, "Doing work")
|
||||
|
||||
assert heartbeat.current_task == "Doing work"
|
||||
assert heartbeat.active_tasks == 1
|
||||
@ -1,147 +0,0 @@
|
||||
"""Tests for the new BaseAdapter.transcript_lines() method + claude-code override."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ── Default (BaseAdapter) ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_base_adapter_returns_unsupported():
|
||||
"""Adapters that don't override return supported:False."""
|
||||
from adapters.langgraph.adapter import LangGraphAdapter
|
||||
a = LangGraphAdapter()
|
||||
r = asyncio.run(a.transcript_lines())
|
||||
assert r["supported"] is False
|
||||
assert r["lines"] == []
|
||||
assert r["cursor"] == 0
|
||||
assert r["runtime"] == "langgraph"
|
||||
assert r["more"] is False
|
||||
|
||||
|
||||
# ── Claude Code override ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _write_jsonl(path: Path, entries: list[dict]) -> None:
|
||||
with path.open("w") as f:
|
||||
for e in entries:
|
||||
f.write(json.dumps(e) + "\n")
|
||||
|
||||
|
||||
def test_claude_code_no_projects_dir():
|
||||
"""Returns supported:True with empty lines when projects dir missing."""
|
||||
from adapters.claude_code.adapter import ClaudeCodeAdapter
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
os.environ["HOME"] = tmp
|
||||
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
|
||||
try:
|
||||
r = asyncio.run(ClaudeCodeAdapter().transcript_lines())
|
||||
assert r["supported"] is True
|
||||
assert r["lines"] == []
|
||||
assert r["cursor"] == 0
|
||||
assert "-configs" in r["source"]
|
||||
finally:
|
||||
del os.environ["CLAUDE_PROJECT_CWD"]
|
||||
|
||||
|
||||
def test_claude_code_reads_jsonl_with_pagination():
|
||||
from adapters.claude_code.adapter import ClaudeCodeAdapter
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
os.environ["HOME"] = tmp
|
||||
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
|
||||
try:
|
||||
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
|
||||
projdir.mkdir(parents=True)
|
||||
_write_jsonl(projdir / "abc.jsonl", [
|
||||
{"type": "user", "n": 1},
|
||||
{"type": "assistant", "n": 2},
|
||||
{"type": "user", "n": 3},
|
||||
{"type": "assistant", "n": 4},
|
||||
{"type": "user", "n": 5},
|
||||
])
|
||||
a = ClaudeCodeAdapter()
|
||||
# First page (limit=2)
|
||||
r1 = asyncio.run(a.transcript_lines(since=0, limit=2))
|
||||
assert r1["supported"] is True
|
||||
assert [l["n"] for l in r1["lines"]] == [1, 2]
|
||||
assert r1["cursor"] == 2
|
||||
assert r1["more"] is True
|
||||
# Second page (since=2, limit=2)
|
||||
r2 = asyncio.run(a.transcript_lines(since=2, limit=2))
|
||||
assert [l["n"] for l in r2["lines"]] == [3, 4]
|
||||
assert r2["cursor"] == 4
|
||||
assert r2["more"] is True
|
||||
# Third page exhausts
|
||||
r3 = asyncio.run(a.transcript_lines(since=4, limit=2))
|
||||
assert [l["n"] for l in r3["lines"]] == [5]
|
||||
assert r3["cursor"] == 5
|
||||
assert r3["more"] is False
|
||||
finally:
|
||||
del os.environ["CLAUDE_PROJECT_CWD"]
|
||||
|
||||
|
||||
def test_claude_code_picks_most_recent_jsonl():
|
||||
"""When multiple .jsonl files exist, picks the most-recently-modified."""
|
||||
from adapters.claude_code.adapter import ClaudeCodeAdapter
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
os.environ["HOME"] = tmp
|
||||
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
|
||||
try:
|
||||
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
|
||||
projdir.mkdir(parents=True)
|
||||
old = projdir / "old.jsonl"
|
||||
new = projdir / "new.jsonl"
|
||||
_write_jsonl(old, [{"src": "old"}])
|
||||
_write_jsonl(new, [{"src": "new"}])
|
||||
# Force new to be more recent
|
||||
os.utime(old, (1000, 1000))
|
||||
os.utime(new, (2000, 2000))
|
||||
r = asyncio.run(ClaudeCodeAdapter().transcript_lines())
|
||||
assert r["lines"] == [{"src": "new"}]
|
||||
assert r["source"].endswith("new.jsonl")
|
||||
finally:
|
||||
del os.environ["CLAUDE_PROJECT_CWD"]
|
||||
|
||||
|
||||
def test_claude_code_skips_malformed_lines():
|
||||
"""Bad JSON lines surface as ``_parse_error: True`` rather than 500'ing."""
|
||||
from adapters.claude_code.adapter import ClaudeCodeAdapter
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
os.environ["HOME"] = tmp
|
||||
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
|
||||
try:
|
||||
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
|
||||
projdir.mkdir(parents=True)
|
||||
with (projdir / "x.jsonl").open("w") as f:
|
||||
f.write('{"good": 1}\n')
|
||||
f.write("not-json garbage\n")
|
||||
f.write('{"good": 2}\n')
|
||||
r = asyncio.run(ClaudeCodeAdapter().transcript_lines())
|
||||
assert r["lines"][0] == {"good": 1}
|
||||
assert r["lines"][1].get("_parse_error") is True
|
||||
assert r["lines"][2] == {"good": 2}
|
||||
finally:
|
||||
del os.environ["CLAUDE_PROJECT_CWD"]
|
||||
|
||||
|
||||
def test_claude_code_caps_limit():
|
||||
"""Limit is capped at 1000 to prevent OOM via paranoid client."""
|
||||
from adapters.claude_code.adapter import ClaudeCodeAdapter
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
os.environ["HOME"] = tmp
|
||||
os.environ["CLAUDE_PROJECT_CWD"] = "/configs"
|
||||
try:
|
||||
projdir = Path(tmp) / ".claude" / "projects" / "-configs"
|
||||
projdir.mkdir(parents=True)
|
||||
_write_jsonl(projdir / "x.jsonl", [{"i": i} for i in range(1500)])
|
||||
r = asyncio.run(ClaudeCodeAdapter().transcript_lines(limit=999999))
|
||||
assert len(r["lines"]) == 1000 # capped
|
||||
assert r["more"] is True
|
||||
assert r["cursor"] == 1000
|
||||
finally:
|
||||
del os.environ["CLAUDE_PROJECT_CWD"]
|
||||
Loading…
Reference in New Issue
Block a user