From 37bca9176e829125c99c222d91a46cbeba85e82b Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Wed, 15 Apr 2026 11:09:43 -0700 Subject: [PATCH 1/7] feat(workspace): add idle-loop reflection pattern (Hermes/Letta shape) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today's multi-framework research (Hermes, Letta, Trigger.dev, Inngest, AG2, Rivet, n8n, Composio, SWE-agent — see docs/ecosystem-watch.md) confirmed that nobody runs while(true) per agent. The working patterns are: (a) event-driven + hibernation (Hermes, Letta, Trigger.dev, Inngest) (b) cron/user-triggered ephemeral runs (AG2, Rivet, n8n, SWE-agent) Molecule AI is currently 100% in category (b). Observed team utilization: ~0.5% — agents idle 99.5% of the time because cron fires and CEO-typed A2A are the only initiating signals. CEO's north-star is 24/7 iteration, current cadence falls short. This PR closes the gap by adding an in-workspace idle loop that wakes the agent periodically ONLY when it has no active task. The shape is the Hermes reflection-on-completion pattern combined with the Letta backlog-pull pattern, collapsed into a ~60 LOC change in the workspace-template. Zero new Go code. Zero new DB tables. Zero new API endpoints. ## How it works 1. `config.py` gets two new fields on WorkspaceConfig: - `idle_prompt: str = ""` — the prompt to self-send when idle - `idle_interval_seconds: int = 600` — how often to check (default 10 min) Both support inline or file ref (matching the initial_prompt pattern). 2. `main.py` spawns an `_run_idle_loop()` asyncio task alongside the existing initial_prompt task (same lifecycle hooks — cancelled in the `finally:` of the server.serve() block). 3. The loop body: a. Sleep interval b. Check `heartbeat.active_tasks == 0` LOCALLY (no LLM call, no HTTP) c. If idle → self-POST the idle_prompt via the existing /workspaces/{id}/a2a proxy d. Loop The agent's own concurrency control rejects the post if it becomes busy between the check and the POST — that's the safety valve. 4. Gated on `config.idle_prompt` being non-empty. Default = "" = no loop. Existing workspaces upgrade silently as no-ops until someone explicitly opts in by setting idle_prompt in org.yaml (either defaults: or per-workspace:). ## Cost analysis (from the research report) - while(true) pattern: ~$93/day/org (12 agents × 12 thinks/hour × $0.027). Unshippable. - Hermes reflection-on-completion: ~$0.45/day/org. Cost ∝ useful work. - This PR's idle loop at 10-min cadence: upper bound 12 × 6/hour × 24h × ~3k tokens × Sonnet rate ≈ $5/day/org PER ROLE, only if they're genuinely idle every check. In practice far less because busy periods skip the LLM call entirely (the active_tasks check is local). ## Rollout plan Research report recommended rolling to ONE workspace first (Technical Researcher) and measuring 24h of activity_logs before enabling for all 12. This PR enables the mechanism; it does NOT add any default idle_prompt to org-templates/molecule-dev/org.yaml. That's a follow-up PR after this one lands and one workspace has been manually opted in for measurement. ## Not touched in this PR - No Go code (no new platform endpoint, no new DB columns) - No org.yaml changes (zero-impact until someone opts in) - No scheduler changes (the idle loop is a workspace concern, not a scheduler concern — matches the research report's layering) ## Test plan - [x] Python syntax check (ast.parse) on main.py + config.py - [ ] Unit test: WorkspaceConfig parses idle_prompt / idle_interval_seconds from yaml - [ ] Integration test: set idle_prompt on Technical Researcher, measure that an A2A message is received every ~10 min while idle, and NOT received while busy with a delegation - [ ] Dogfood: enable on Technical Researcher for 24h, count activity_logs delta vs baseline, confirm cost stays within model ## Related - Today's research report (conversation output, summarized in commit trailer) - docs/ecosystem-watch.md → `### Hermes Agent` (the canonical reflection-on-completion example) - #159 orchestrator/worker split — complementary: leaders pulse for dispatch, workers idle-loop for pull. Together: leaders push work, workers pull work, no role ever sits idle with a cold queue. --- workspace-template/config.py | 22 ++++++++++++ workspace-template/main.py | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/workspace-template/config.py b/workspace-template/config.py index 6a8648a2..19f34d62 100644 --- a/workspace-template/config.py +++ b/workspace-template/config.py @@ -198,6 +198,17 @@ class WorkspaceConfig: initial_prompt: str = "" """Auto-sent as the first A2A message after startup. Default empty = no auto-message. Can be an inline string or a file reference (initial_prompt_file in yaml).""" + idle_prompt: str = "" + """Auto-sent every `idle_interval_seconds` while the workspace has no active + task (heartbeat.active_tasks == 0). Default empty = no idle loop. This is + the reflection-on-completion / backlog-pull pattern from the Hermes/Letta + playbook: the workspace self-wakes when idle, runs a lightweight reflection + prompt, and either picks up queued work or stops. Cost scales with useful + activity (the prompt returns quickly if there's nothing to do). Can be + inline or a file reference via `idle_prompt_file`.""" + idle_interval_seconds: int = 600 + """How often the idle loop checks in (seconds). Default 600 (10 min). + Ignored when idle_prompt is empty.""" skills: list[str] = field(default_factory=list) plugins: list[str] = field(default_factory=list) # installed plugin names tools: list[str] = field(default_factory=list) @@ -251,6 +262,15 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig: if prompt_path.exists(): initial_prompt = prompt_path.read_text().strip() + # Resolve idle_prompt: same pattern as initial_prompt + idle_prompt = raw.get("idle_prompt", "") + idle_prompt_file = raw.get("idle_prompt_file", "") + if not idle_prompt and idle_prompt_file: + idle_path = Path(config_path) / idle_prompt_file + if idle_path.exists(): + idle_prompt = idle_path.read_text().strip() + idle_interval_seconds = int(raw.get("idle_interval_seconds", 600)) + return WorkspaceConfig( name=raw.get("name", "Workspace"), description=raw.get("description", ""), @@ -259,6 +279,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig: model=model, runtime=runtime, initial_prompt=initial_prompt, + idle_prompt=idle_prompt, + idle_interval_seconds=idle_interval_seconds, runtime_config=RuntimeConfig( command=runtime_raw.get("command", ""), args=runtime_raw.get("args", []), diff --git a/workspace-template/main.py b/workspace-template/main.py index d54e7bb3..77894997 100644 --- a/workspace-template/main.py +++ b/workspace-template/main.py @@ -368,12 +368,80 @@ async def main(): # pragma: no cover initial_prompt_task = asyncio.create_task(_send_initial_prompt()) + # 10c. Idle loop — reflection-on-completion / backlog-pull pattern. + # Fires config.idle_prompt every config.idle_interval_seconds while the + # workspace has no active task. This turns every role from "waits for cron" + # into "self-wakes when idle" — the Hermes/Letta shape from today's + # multi-framework survey (see docs/ecosystem-watch.md). Cost collapses to + # event-driven in practice: the idle check is local (no LLM call, just + # heartbeat.active_tasks==0), and the prompt only fires when there's + # actually nothing to do. Gated on idle_prompt being non-empty so existing + # workspaces upgrade opt-in — set idle_prompt in org.yaml defaults or + # per-workspace to enable. + idle_loop_task = None + if config.idle_prompt: + async def _run_idle_loop(): + """Self-sends config.idle_prompt periodically when the workspace is idle.""" + # Wait for server + initial prompt to settle before the first idle check. + # Short wait (min of 60s or interval) so cold-start races don't fire instantly. + await asyncio.sleep(min(config.idle_interval_seconds, 60)) + + import json as _json + import urllib.request + + while True: + try: + await asyncio.sleep(config.idle_interval_seconds) + except asyncio.CancelledError: + return + + # Local idle check — no platform API call, no LLM call. + # heartbeat.active_tasks == 0 means no in-flight work. + if heartbeat.active_tasks > 0: + continue + + # Self-post the idle prompt via the platform A2A proxy (same + # path as initial_prompt). The agent's own concurrency control + # rejects if the workspace becomes busy between this check and + # the post — that's the expected safety valve. + payload = _json.dumps({ + "method": "message/send", + "params": { + "message": { + "role": "user", + "messageId": f"idle-{_uuid.uuid4().hex[:8]}", + "parts": [{"kind": "text", "text": config.idle_prompt}], + }, + }, + }).encode() + + def _post_sync(): + try: + req = urllib.request.Request( + f"{platform_url}/workspaces/{workspace_id}/a2a", + data=payload, + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=600) as resp: + resp.read() + except Exception as e: + print(f"Idle loop: post failed — {e}", flush=True) + + print(f"Idle loop: firing (active_tasks=0, interval={config.idle_interval_seconds}s)", flush=True) + loop_ref = asyncio.get_event_loop() + loop_ref.run_in_executor(None, _post_sync) + + idle_loop_task = asyncio.create_task(_run_idle_loop()) + try: await server.serve() finally: # Cancel initial prompt if still running if initial_prompt_task and not initial_prompt_task.done(): initial_prompt_task.cancel() + # Cancel idle loop if running + if idle_loop_task and not idle_loop_task.done(): + idle_loop_task.cancel() # Gracefully stop the Temporal worker background task on shutdown await temporal_wrapper.stop() From 8d8ca18bc09a85cc0d996b19a60f363f5b2116c9 Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Wed, 15 Apr 2026 11:14:35 -0700 Subject: [PATCH 2/7] =?UTF-8?q?feat(hermes):=20Phase=201=20=E2=80=94=20mul?= =?UTF-8?q?ti-provider=20registry=20(15=20providers,=20back-compat=20prese?= =?UTF-8?q?rved)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ships the first half of the queued Hermes adapter expansion. PR 2 only supported Nous Portal + OpenRouter; this adds 13 more providers reachable via OpenAI-compat endpoints. Native SDK paths for Anthropic + Gemini are Phase 2 (better tool-calling + vision fidelity). ## What's new **`workspace-template/adapters/hermes/providers.py`** (new file, 220 LOC): - ``ProviderConfig`` dataclass: name, env vars, base URL, default model, auth scheme, docs - ``PROVIDERS`` dict with 15 entries across 4 groups: - PR 2 baseline: nous_portal, openrouter - Frontier commercial: openai, anthropic, xai, gemini - Chinese providers: qwen, glm, kimi, minimax, deepseek - OSS/alt: groq, together, fireworks, mistral - ``RESOLUTION_ORDER`` tuple: priority for auto-detect (back-compat first, then commercial, then Chinese, then OSS/alt) - ``resolve_provider(explicit=None)`` -> (ProviderConfig, api_key) - With explicit name: routes to that provider, raises if env var empty - Without: walks RESOLUTION_ORDER, first env-var-set provider wins **`workspace-template/adapters/hermes/executor.py`** (refactored): - `create_executor(hermes_api_key=None, provider=None, model=None)` now has three parameters: - `hermes_api_key`: PR 2 back-compat — routes to Nous Portal - `provider`: canonical short name from the registry (e.g. "anthropic") - `model`: optional override of the provider's default model - Delegates all resolution to `providers.resolve_provider()` — no more hardcoded URLs or env var lookups in the executor itself - `HermesA2AExecutor.__init__` no longer has Nous-specific defaults; callers pass base_url + model explicitly (which create_executor always does) **`workspace-template/tests/test_hermes_providers.py`** (new file, 26 tests): - Registry shape invariants (count >= 15, no duplicates, every config valid) - PR 2 back-compat: HERMES_API_KEY / OPENROUTER_API_KEY still route correctly - Auto-detect for every provider in the registry (parametrized — guards against typos in env var lists) - Explicit `provider=` bypass of auto-detect - Error cases: unknown provider, explicit-but-empty, auto-detect-with-no-env - All 26 tests pass locally in 0.08s ## Back-compat guarantees | Scenario | PR 2 behavior | This PR behavior | |---|---|---| | `create_executor(hermes_api_key="x")` | Nous Portal | Nous Portal (unchanged) | | `HERMES_API_KEY=x` env, auto-detect | Nous Portal | Nous Portal (unchanged) | | `OPENROUTER_API_KEY=x` env, auto-detect | OpenRouter | OpenRouter (unchanged) | | Both env + explicit hermes_api_key param | Nous Portal (param wins) | Nous Portal (param wins, unchanged) | Nothing existing can break. New callers gain access to 13 more providers. ## What's NOT in this PR (Phase 2) - **Native Anthropic Messages API path** — better tool calling, vision, extended thinking. Requires pulling in `anthropic` SDK. ~50 LOC. - **Native Gemini generateContent path** — for vision + google tools. Requires `google-genai` SDK. ~50 LOC. - **Streaming support across all providers** — current executor is non-streaming (single chat.completions.create call). Streaming works with openai.AsyncOpenAI but hasn't been wired to the A2A event queue path. ~30 LOC. - **Per-provider model overrides in config.yaml** — Phase 1 uses the registry's default_model. Phase 2 adds a `hermes: { provider: qwen, model: qwen3-coder-plus }` block in the workspace config. - **`.env.example` updates** — not critical since the registry itself documents every env var via the `env_vars` field, but nice-to-have. ## Related - Queued memory: `project_hermes_multi_provider.md` - CEO directive 2026-04-15: *"once current works are cleared, I want you to focus on supporting hermes agent, right now it doesnt take too much providers"* - `docs/ecosystem-watch.md` → `### Hermes Agent` — Research Lead's eco-watch entry listed "Nous Portal, OpenRouter, GLM, Kimi, MiniMax, OpenAI, …" which shaped this registry's initial set ## Test plan - [x] Unit tests: 26/26 pass locally (pytest) - [ ] CI will run on the self-hosted macOS arm64 runner - [ ] Smoke test in a real workspace: set QWEN_API_KEY and verify Technical Researcher actually hits Alibaba DashScope successfully - [ ] Integration test per provider with real API keys (gated on env, skip when not set — Phase 2 CI addition) --- .../adapters/hermes/executor.py | 129 ++++---- .../adapters/hermes/providers.py | 289 ++++++++++++++++++ .../tests/test_hermes_providers.py | 163 ++++++++++ 3 files changed, 523 insertions(+), 58 deletions(-) create mode 100644 workspace-template/adapters/hermes/providers.py create mode 100644 workspace-template/tests/test_hermes_providers.py diff --git a/workspace-template/adapters/hermes/executor.py b/workspace-template/adapters/hermes/executor.py index ac7ae5c1..a152a4a8 100644 --- a/workspace-template/adapters/hermes/executor.py +++ b/workspace-template/adapters/hermes/executor.py @@ -1,89 +1,102 @@ -"""Hermes adapter executor — implements create_executor() for PR 2. +"""Hermes adapter executor — Phase 1 multi-provider. -Hermes models (Nous Research) are accessed via an OpenAI-compatible API, -either through the Nous Portal directly or via OpenRouter as a fallback. +Hermes models are accessed via an OpenAI-compatible API. Phase 1 supports 15 +providers via the shared ``providers.py`` registry: Nous Portal, OpenRouter, +OpenAI, Anthropic, xAI, Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, +Together, Fireworks, Mistral. Every provider is reached through an OpenAI-compat +``/v1/chat/completions`` endpoint, so one code path handles all of them. -Key resolution order --------------------- -1. ``hermes_api_key`` parameter (explicit call-site override) -2. ``HERMES_API_KEY`` environment variable (Nous Portal key) -3. ``OPENROUTER_API_KEY`` environment variable (OpenRouter fallback) +Key resolution order (unchanged from PR 2, extended) +----------------------------------------------------- +1. ``hermes_api_key`` parameter (explicit call-site override — routes to Nous Portal) +2. ``provider`` parameter (explicit provider name — looks up its env var(s)) +3. Auto-detect: walk ``providers.RESOLUTION_ORDER`` and pick the first provider + whose env var is set (``HERMES_API_KEY`` / ``OPENROUTER_API_KEY`` still come + first so PR 2 back-compat holds). -Raises ``ValueError`` if none of the three sources yields a non-empty key. +Raises ``ValueError`` if nothing resolves. The error message lists every env var +that was checked so the operator knows their options without reading source. """ from __future__ import annotations import logging import os +from typing import Optional + +from .providers import PROVIDERS, resolve_provider logger = logging.getLogger(__name__) -# Default base URLs -_NOUS_BASE_URL = "https://inference-prod.nousresearch.com/v1" -_OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" -# Default model when routing through OpenRouter -_DEFAULT_MODEL = "nousresearch/hermes-3-llama-3.1-405b" - - -def create_executor(hermes_api_key: str | None = None): +def create_executor( + hermes_api_key: Optional[str] = None, + provider: Optional[str] = None, + model: Optional[str] = None, +): """Create and return a LangGraph-compatible executor for the Hermes adapter. - Key resolution order: - 1. hermes_api_key parameter (if provided) - 2. HERMES_API_KEY environment variable - 3. OPENROUTER_API_KEY environment variable (fallback) - Raises ValueError if none of the above are found. - Parameters ---------- hermes_api_key: - Explicit API key. When provided, the Nous Portal base URL is used. - When absent and OPENROUTER_API_KEY is the fallback, OpenRouter's - base URL is used instead. + Explicit API key. When provided, the call routes to Nous Portal (the + PR 2 back-compat path) regardless of ``provider``. + provider: + Canonical provider short name from ``providers.PROVIDERS`` (e.g. + ``"openai"``, ``"anthropic"``, ``"qwen"``, ``"xai"``). When set, the + registry entry's env vars are used to find the API key and its + base URL + default model override the auto-detect path. When unset, + auto-detect walks ``providers.RESOLUTION_ORDER`` until it finds a + provider whose env var is set. + model: + Override the provider's default model. Passed straight through to + ``chat.completions.create``. Returns ------- HermesA2AExecutor - A ready-to-use executor instance wired with the resolved key - and matching base URL. + A ready-to-use executor wired with the resolved api_key + base_url + + model. + + Raises + ------ + ValueError + If ``provider`` is an unknown name, if ``provider`` is known but its + env vars are all empty, or if auto-detect finds nothing. """ - api_key: str | None = None - base_url: str = _NOUS_BASE_URL - + # Path 1: PR 2 back-compat — explicit hermes_api_key routes to Nous Portal. if hermes_api_key: - api_key = hermes_api_key - base_url = _NOUS_BASE_URL - logger.debug("Hermes: using explicit hermes_api_key param") - else: - env_hermes = os.environ.get("HERMES_API_KEY", "").strip() - if env_hermes: - api_key = env_hermes - base_url = _NOUS_BASE_URL - logger.debug("Hermes: using HERMES_API_KEY env var") - else: - env_openrouter = os.environ.get("OPENROUTER_API_KEY", "").strip() - if env_openrouter: - api_key = env_openrouter - base_url = _OPENROUTER_BASE_URL - logger.debug("Hermes: using OPENROUTER_API_KEY env var (fallback)") - - if not api_key: - raise ValueError( - "No API key found: provide hermes_api_key param, " - "or set HERMES_API_KEY or OPENROUTER_API_KEY env var" + cfg = PROVIDERS["nous_portal"] + logger.debug("Hermes: using explicit hermes_api_key param (Nous Portal)") + return HermesA2AExecutor( + api_key=hermes_api_key, + base_url=cfg.base_url, + model=model or cfg.default_model, ) - return HermesA2AExecutor(api_key=api_key, base_url=base_url) + # Path 2/3: registry resolution (either explicit provider name or auto-detect). + cfg, api_key = resolve_provider(provider) + logger.info( + "Hermes: provider=%s base_url=%s model=%s", + cfg.name, + cfg.base_url, + model or cfg.default_model, + ) + return HermesA2AExecutor( + api_key=api_key, + base_url=cfg.base_url, + model=model or cfg.default_model, + ) class HermesA2AExecutor: - """LangGraph-compatible AgentExecutor for Hermes models. + """LangGraph-compatible AgentExecutor for Hermes-style multi-provider LLMs. - Uses the OpenAI-compatible ``openai`` client pointed at either the - Nous Portal or OpenRouter, matching the pattern of sibling adapters - (AutoGen, LangGraph) which all use OpenAI-compatible clients. + Uses the OpenAI-compatible ``openai`` client pointed at whichever provider + was resolved by ``create_executor`` (Nous Portal, OpenRouter, OpenAI, + Anthropic, xAI, Gemini, Qwen, GLM, Kimi, MiniMax, DeepSeek, Groq, Together, + Fireworks, Mistral). Matches the pattern of sibling adapters (AutoGen, + LangGraph) which also use OpenAI-compat clients. The ``execute()`` and ``cancel()`` async methods satisfy the ``a2a.server.agent_execution.AgentExecutor`` interface so this @@ -93,8 +106,8 @@ class HermesA2AExecutor: def __init__( self, api_key: str, - base_url: str = _NOUS_BASE_URL, - model: str = _DEFAULT_MODEL, + base_url: str, + model: str, heartbeat=None, ): self.api_key = api_key diff --git a/workspace-template/adapters/hermes/providers.py b/workspace-template/adapters/hermes/providers.py new file mode 100644 index 00000000..35a679cc --- /dev/null +++ b/workspace-template/adapters/hermes/providers.py @@ -0,0 +1,289 @@ +"""Hermes adapter provider registry — Phase 1 of the multi-provider expansion. + +Extends the original PR-2 Hermes executor (Nous Portal + OpenRouter only) to a +registry of 12 providers. Every provider in this registry is reached via its +OpenAI-compat endpoint, which means the existing ``openai.AsyncOpenAI`` client +and request shape in ``executor.py`` Just Works without any new dependencies. + +Native SDK paths (Anthropic Messages API, Gemini generateContent API) are +Phase 2 — they give better tool-calling + vision fidelity but are not +required to unblock the basic "CEO wants Hermes on Qwen / GLM / xAI / +Gemini" asks that triggered this work. + +## Design +- ``ProviderConfig`` captures everything needed to point the OpenAI client at + a provider: env var(s), base URL, default model, auth scheme. +- ``PROVIDERS`` is a dict keyed by canonical short name (``"openai"``, + ``"anthropic"``, ``"qwen"``, etc.). +- ``RESOLUTION_ORDER`` is the auto-detect sequence used when the caller + doesn't specify a provider — it tries each provider's env vars in turn and + picks the first one that's set. +- ``resolve_provider(explicit)`` returns ``(ProviderConfig, api_key)`` or + raises ``ValueError`` with a helpful message listing every env var it + checked. + +## Back-compat +The original ``HERMES_API_KEY`` and ``OPENROUTER_API_KEY`` env vars still work +and still route to Nous Portal / OpenRouter respectively — they're just now +registered as two entries in ``PROVIDERS`` rather than hardcoded in +``create_executor``. + +## Adding a new provider +1. Append a new ``ProviderConfig`` entry under ``PROVIDERS`` +2. Add its short name to ``RESOLUTION_ORDER`` in the desired priority slot +3. Document the env var in the workspace ``.env.example`` (if present) +That's it. Nothing else needs to change — the executor reads the registry. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Optional + + +@dataclass(frozen=True) +class ProviderConfig: + """Everything the Hermes executor needs to talk to a single LLM provider. + + Every provider in Phase 1 is reachable via an OpenAI-compatible + ``/v1/chat/completions`` endpoint, so ``auth_scheme`` is always + ``"openai"`` (Bearer token, OpenAI-style messages payload). Phase 2 + will add ``"anthropic"`` (native Messages API) and ``"gemini"`` (native + generateContent API) for roles that need better tool-call fidelity. + """ + + name: str + """Canonical short name — the key used in ``PROVIDERS`` and the ``provider`` kwarg.""" + + env_vars: tuple[str, ...] + """API key env vars, checked in order. First non-empty value wins. + Supporting multiple env vars lets us accept common aliases + (e.g. ``QWEN_API_KEY`` AND ``DASHSCOPE_API_KEY`` both work for Alibaba Qwen).""" + + base_url: str + """OpenAI-compat base URL. Must include the ``/v1`` suffix where applicable.""" + + default_model: str + """Default model name to pass to ``chat.completions.create``. + Per-call overrides are possible via the executor constructor.""" + + auth_scheme: str = "openai" + """``openai`` (Bearer token + OpenAI-style payload) for every Phase 1 provider. + Phase 2 reserves ``anthropic`` and ``gemini`` for native-SDK paths.""" + + docs: str = "" + """Short note — which docs URL the config was derived from, or which quirks + to know about. Not used programmatically; exists to make future audits of + this file cheaper than re-Googling every entry.""" + + +# --- Provider registry ------------------------------------------------------ +# +# Ordering within this dict is not semantically meaningful — use +# ``RESOLUTION_ORDER`` below to control auto-detect priority. This dict is +# grouped by "who owns the provider" just for human readability. + +PROVIDERS: dict[str, ProviderConfig] = { + # --- Existing (PR 2 baseline) --------------------------------------- + "nous_portal": ProviderConfig( + name="nous_portal", + env_vars=("HERMES_API_KEY", "NOUS_API_KEY"), + base_url="https://inference-prod.nousresearch.com/v1", + default_model="nousresearch/hermes-3-llama-3.1-405b", + docs="Nous Research Portal — original Hermes adapter target from PR 2.", + ), + "openrouter": ProviderConfig( + name="openrouter", + env_vars=("OPENROUTER_API_KEY",), + base_url="https://openrouter.ai/api/v1", + default_model="anthropic/claude-sonnet-4.5", + docs="OpenRouter — unified OpenAI-compat gateway to hundreds of models. " + "Useful for A/B testing and as a fallback when a direct provider is down.", + ), + + # --- Frontier commercial (US) --------------------------------------- + "openai": ProviderConfig( + name="openai", + env_vars=("OPENAI_API_KEY",), + base_url="https://api.openai.com/v1", + default_model="gpt-4o", + docs="OpenAI — canonical OpenAI-compat endpoint. Works out of the box.", + ), + "anthropic": ProviderConfig( + name="anthropic", + env_vars=("ANTHROPIC_API_KEY",), + base_url="https://api.anthropic.com/v1", + default_model="claude-sonnet-4-5", + docs="Anthropic — Phase 1 uses the OpenAI-compat shim at /v1. Phase 2 " + "will add the native Messages API path for better tool calling.", + ), + "xai": ProviderConfig( + name="xai", + env_vars=("XAI_API_KEY", "GROK_API_KEY"), + base_url="https://api.x.ai/v1", + default_model="grok-4", + docs="xAI — Grok family. OpenAI-compat via api.x.ai/v1.", + ), + "gemini": ProviderConfig( + name="gemini", + env_vars=("GEMINI_API_KEY", "GOOGLE_API_KEY"), + base_url="https://generativelanguage.googleapis.com/v1beta/openai", + default_model="gemini-2.5-flash", + docs="Google Gemini — uses the documented OpenAI-compat endpoint at " + "/v1beta/openai. Phase 2 will add native generateContent for vision.", + ), + + # --- Chinese providers ---------------------------------------------- + "qwen": ProviderConfig( + name="qwen", + env_vars=("QWEN_API_KEY", "DASHSCOPE_API_KEY"), + base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", + default_model="qwen3-235b-a22b", + docs="Alibaba Qwen via DashScope international endpoint. OpenAI-compat mode. " + "For domestic China use dashscope.aliyuncs.com (no -intl).", + ), + "glm": ProviderConfig( + name="glm", + env_vars=("GLM_API_KEY", "ZHIPU_API_KEY"), + base_url="https://open.bigmodel.cn/api/paas/v4", + default_model="glm-4-plus", + docs="Zhipu AI GLM — open.bigmodel.cn, OpenAI-compat via /api/paas/v4.", + ), + "kimi": ProviderConfig( + name="kimi", + env_vars=("KIMI_API_KEY", "MOONSHOT_API_KEY"), + base_url="https://api.moonshot.ai/v1", + default_model="kimi-k2", + docs="Moonshot AI Kimi K2 — OpenAI-compat at api.moonshot.ai/v1.", + ), + "minimax": ProviderConfig( + name="minimax", + env_vars=("MINIMAX_API_KEY",), + base_url="https://api.minimax.io/v1", + default_model="MiniMax-M2", + docs="MiniMax — OpenAI-compat at api.minimax.io/v1. " + "Note: older base URL api.minimaxi.chat is deprecated.", + ), + "deepseek": ProviderConfig( + name="deepseek", + env_vars=("DEEPSEEK_API_KEY",), + base_url="https://api.deepseek.com/v1", + default_model="deepseek-chat", + docs="DeepSeek — very cheap, OpenAI-compat at api.deepseek.com/v1.", + ), + + # --- OSS / alt providers -------------------------------------------- + "groq": ProviderConfig( + name="groq", + env_vars=("GROQ_API_KEY",), + base_url="https://api.groq.com/openai/v1", + default_model="llama-3.3-70b-versatile", + docs="Groq LPU inference — very fast, OpenAI-compat at api.groq.com/openai/v1.", + ), + "together": ProviderConfig( + name="together", + env_vars=("TOGETHER_API_KEY",), + base_url="https://api.together.xyz/v1", + default_model="meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", + docs="Together AI — OSS model hosting, OpenAI-compat at api.together.xyz/v1.", + ), + "fireworks": ProviderConfig( + name="fireworks", + env_vars=("FIREWORKS_API_KEY",), + base_url="https://api.fireworks.ai/inference/v1", + default_model="accounts/fireworks/models/llama-v3p3-70b-instruct", + docs="Fireworks AI — OSS model hosting, OpenAI-compat at api.fireworks.ai/inference/v1.", + ), + "mistral": ProviderConfig( + name="mistral", + env_vars=("MISTRAL_API_KEY",), + base_url="https://api.mistral.ai/v1", + default_model="mistral-large-latest", + docs="Mistral AI — OpenAI-compat at api.mistral.ai/v1.", + ), +} + + +# --- Auto-detect resolution order ------------------------------------------- +# +# When the caller doesn't specify a provider, resolve_provider() walks this +# list in order and picks the first provider whose env var is set. Order is +# chosen to preserve back-compat (the two original PR-2 providers come first) +# followed by the most likely-to-be-configured commercial APIs. + +RESOLUTION_ORDER: tuple[str, ...] = ( + # Back-compat: PR 2 baseline + "nous_portal", + "openrouter", + # Frontier commercial + "anthropic", + "openai", + "gemini", + "xai", + # Chinese providers + "qwen", + "glm", + "kimi", + "minimax", + "deepseek", + # OSS / alt + "groq", + "mistral", + "together", + "fireworks", +) + + +def resolve_provider(explicit: Optional[str] = None) -> tuple[ProviderConfig, str]: + """Resolve a provider name to a ``(ProviderConfig, api_key)`` pair. + + Resolution order: + + 1. If ``explicit`` is given, look it up in ``PROVIDERS`` and try every + env var on that provider's config. Raise with a clear message if the + name is unknown or if all env vars are empty. + + 2. Otherwise auto-detect: walk ``RESOLUTION_ORDER`` and return the first + provider whose env var is set. + + Raises + ------ + ValueError + If ``explicit`` is an unknown provider name, if ``explicit`` is a + known provider but its env vars are all empty, or if no env var is + set for any provider in auto-detect mode. + """ + if explicit: + if explicit not in PROVIDERS: + raise ValueError( + f"Unknown Hermes provider: {explicit!r}. " + f"Available: {sorted(PROVIDERS)}" + ) + cfg = PROVIDERS[explicit] + for env in cfg.env_vars: + val = os.environ.get(env, "").strip() + if val: + return cfg, val + raise ValueError( + f"Hermes provider {explicit!r} specified but no env var set. " + f"Tried: {cfg.env_vars}" + ) + + # Auto-detect — first provider with a non-empty env var wins. + for name in RESOLUTION_ORDER: + cfg = PROVIDERS[name] + for env in cfg.env_vars: + val = os.environ.get(env, "").strip() + if val: + return cfg, val + + # Nothing set — raise with the full list so the operator knows every + # option they have without having to read the source. + tried = [] + for name in RESOLUTION_ORDER: + for env in PROVIDERS[name].env_vars: + tried.append(env) + raise ValueError( + "No Hermes provider API key found. Set any one of: " + ", ".join(tried) + ) diff --git a/workspace-template/tests/test_hermes_providers.py b/workspace-template/tests/test_hermes_providers.py new file mode 100644 index 00000000..4656e20d --- /dev/null +++ b/workspace-template/tests/test_hermes_providers.py @@ -0,0 +1,163 @@ +"""Tests for workspace-template/adapters/hermes/providers.py. + +These tests exercise resolve_provider() in isolation — they do not import +anything from adapters/__init__.py so they don't need the a2a runtime deps. +""" + +from __future__ import annotations + +import importlib +import os +import sys +from pathlib import Path + +import pytest + +# Make the hermes package importable without pulling in adapters/__init__.py +# (which imports the a2a SDK). We load providers.py directly from its file path. +_HERMES_DIR = Path(__file__).parent.parent / "adapters" / "hermes" +sys.path.insert(0, str(_HERMES_DIR)) +import providers # type: ignore # noqa: E402 + + +_ALL_PROVIDER_ENV_VARS = ( + "HERMES_API_KEY", + "NOUS_API_KEY", + "OPENROUTER_API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "XAI_API_KEY", + "GROK_API_KEY", + "GEMINI_API_KEY", + "GOOGLE_API_KEY", + "QWEN_API_KEY", + "DASHSCOPE_API_KEY", + "GLM_API_KEY", + "ZHIPU_API_KEY", + "KIMI_API_KEY", + "MOONSHOT_API_KEY", + "MINIMAX_API_KEY", + "DEEPSEEK_API_KEY", + "GROQ_API_KEY", + "TOGETHER_API_KEY", + "FIREWORKS_API_KEY", + "MISTRAL_API_KEY", +) + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch): + """Clear every provider env var before each test so runs are deterministic.""" + for key in _ALL_PROVIDER_ENV_VARS: + monkeypatch.delenv(key, raising=False) + yield + + +def test_registry_is_populated(): + """Phase 1 ships at least 15 providers and every entry is self-consistent.""" + assert len(providers.PROVIDERS) >= 15 + assert len(providers.RESOLUTION_ORDER) == len(providers.PROVIDERS) + for name, cfg in providers.PROVIDERS.items(): + assert cfg.name == name, f"{name}: config.name should match dict key" + assert cfg.env_vars, f"{name}: must declare at least one env var" + assert cfg.base_url.startswith("http"), f"{name}: base_url must be http(s)" + assert cfg.default_model, f"{name}: must declare a default model" + assert name in providers.RESOLUTION_ORDER, f"{name}: missing from resolution order" + + +def test_resolution_order_has_no_duplicates(): + assert len(providers.RESOLUTION_ORDER) == len(set(providers.RESOLUTION_ORDER)) + + +def test_backcompat_hermes_api_key_first(): + """PR 2 back-compat — HERMES_API_KEY auto-detect still routes to Nous Portal.""" + os.environ["HERMES_API_KEY"] = "hermes-test-key" + cfg, key = providers.resolve_provider() + assert cfg.name == "nous_portal" + assert key == "hermes-test-key" + + +def test_backcompat_openrouter_api_key_second(): + """PR 2 back-compat — OPENROUTER_API_KEY still routes to OpenRouter when HERMES_API_KEY is absent.""" + os.environ["OPENROUTER_API_KEY"] = "or-test-key" + cfg, key = providers.resolve_provider() + assert cfg.name == "openrouter" + + +def test_auto_detect_openai(): + os.environ["OPENAI_API_KEY"] = "sk-test" + cfg, key = providers.resolve_provider() + assert cfg.name == "openai" + assert cfg.base_url == "https://api.openai.com/v1" + + +def test_auto_detect_anthropic(): + os.environ["ANTHROPIC_API_KEY"] = "ant-test" + cfg, key = providers.resolve_provider() + assert cfg.name == "anthropic" + + +@pytest.mark.parametrize( + "env_var,expected", + [ + ("XAI_API_KEY", "xai"), + ("GROK_API_KEY", "xai"), + ("QWEN_API_KEY", "qwen"), + ("DASHSCOPE_API_KEY", "qwen"), + ("GLM_API_KEY", "glm"), + ("ZHIPU_API_KEY", "glm"), + ("KIMI_API_KEY", "kimi"), + ("MOONSHOT_API_KEY", "kimi"), + ("GROQ_API_KEY", "groq"), + ("DEEPSEEK_API_KEY", "deepseek"), + ("MISTRAL_API_KEY", "mistral"), + ("TOGETHER_API_KEY", "together"), + ("FIREWORKS_API_KEY", "fireworks"), + ("MINIMAX_API_KEY", "minimax"), + ("GEMINI_API_KEY", "gemini"), + ("GOOGLE_API_KEY", "gemini"), + ], +) +def test_every_provider_env_var_resolves(env_var, expected): + """Every env var listed in PROVIDERS resolves to the right provider + — this guards against typos in the registry dict.""" + os.environ[env_var] = "test-key" + cfg, _ = providers.resolve_provider() + assert cfg.name == expected, ( + f"{env_var} should route to {expected}, got {cfg.name}" + ) + + +def test_explicit_provider_wins_over_auto_detect(): + """When `provider=` is given, auto-detect is bypassed.""" + os.environ["HERMES_API_KEY"] = "hermes-key" # would auto-detect + os.environ["OPENAI_API_KEY"] = "openai-key" + cfg, key = providers.resolve_provider("openai") + assert cfg.name == "openai" + assert key == "openai-key" + + +def test_unknown_provider_raises(): + with pytest.raises(ValueError, match="Unknown Hermes provider"): + providers.resolve_provider("this_provider_does_not_exist") + + +def test_explicit_provider_with_missing_env_raises(): + """If the operator asks for a specific provider but its env var is empty, + we raise — we do NOT fall back to auto-detect because that would be + surprising ("why is my openai config talking to anthropic?").""" + os.environ["HERMES_API_KEY"] = "some-value" # auto-detect would succeed + with pytest.raises(ValueError, match="no env var set"): + providers.resolve_provider("anthropic") + + +def test_auto_detect_with_no_env_lists_all_options(): + """The error message should list every env var the caller could set, + so operators don't have to read the source.""" + # No env vars set (autouse fixture clears them all) + with pytest.raises(ValueError) as exc_info: + providers.resolve_provider() + msg = str(exc_info.value) + # Spot-check: the message names at least a few providers + for env_var in ("OPENAI_API_KEY", "ANTHROPIC_API_KEY", "QWEN_API_KEY"): + assert env_var in msg, f"error message should mention {env_var}" From 25bbfd3bfc45a72c5dd4fa5ca8df583f4f740696 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 11:15:08 -0700 Subject: [PATCH 3/7] =?UTF-8?q?fix(security):=20C2=20from=20#169=20?= =?UTF-8?q?=E2=80=94=20reject=20spoofed=20source=5Fid=20in=20activity.Repo?= =?UTF-8?q?rt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry-picks the one genuinely new fix from #169 after confirming the rest of that PR is already covered on main (C1/C3/C5 by wsAuth group, C6 by #94+#119 SSRF blocklist, C4 ownership by existing WHERE filter). Pre-existing middleware (WorkspaceAuth on /workspaces/:id/* sub-routes) proves the caller owns the :id path param. But the body field source_id was never validated — a workspace authenticated for its own /activity endpoint could still attribute logs to a different workspace by setting source_id=. Rejected with 403 now. No schema change, no new middleware. 4-line handler delta. Closes the only real gap in #169; #169 itself will be closed as superseded. Co-Authored-By: Claude Opus 4.6 (1M context) --- platform/internal/handlers/activity.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/platform/internal/handlers/activity.go b/platform/internal/handlers/activity.go index 9be1daf8..b92538d8 100644 --- a/platform/internal/handlers/activity.go +++ b/platform/internal/handlers/activity.go @@ -329,7 +329,18 @@ func (h *ActivityHandler) Report(c *gin.Context) { if reqBody == nil { reqBody = body.Metadata } + // C2 (from #169) — source_id spoof defense. WorkspaceAuth middleware + // already proves the caller owns :id, but that check doesn't cover the + // body field. Without this guard, workspace A authenticated for its own + // /activity endpoint could still set source_id= in + // the payload and attribute the log to B. Reject any body where + // source_id is non-empty AND differs from the authenticated workspace. + // Empty source_id falls through to the default-to-self branch below. sourceID := body.SourceID + if sourceID != "" && sourceID != workspaceID { + c.JSON(http.StatusForbidden, gin.H{"error": "source_id must match authenticated workspace"}) + return + } if sourceID == "" { sourceID = workspaceID } From 7d7d5995e0cdf2c97c2e842caca6928a0297b1c5 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 11:18:52 -0700 Subject: [PATCH 4/7] =?UTF-8?q?fix(workspace-template):=20#204=20=E2=80=94?= =?UTF-8?q?=20drop=20PushNotificationSender=20(abstract=20class)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #204. PR #198 wired push_sender=PushNotificationSender() into DefaultRequestHandler to satisfy #175's push-notification capability, but PushNotificationSender in a2a-sdk is an abstract base class and cannot be instantiated. Every workspace container crashed on startup with TypeError. Reverted to DefaultRequestHandler's defaults. The pushNotifications capability still appears in AgentCard.capabilities (advertised to A2A clients) but actual implementation of the sender is deferred to a Phase-H follow-up that subclasses PushNotificationSender properly. Existing pytest suite unchanged (the crash was only at runtime on main.py import, which no existing test exercises directly). Co-Authored-By: Claude Opus 4.6 (1M context) --- workspace-template/main.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/workspace-template/main.py b/workspace-template/main.py index d54e7bb3..c8e5b7d6 100644 --- a/workspace-template/main.py +++ b/workspace-template/main.py @@ -12,7 +12,7 @@ import httpx import uvicorn from a2a.server.apps import A2AStarletteApplication from a2a.server.request_handlers import DefaultRequestHandler -from a2a.server.tasks import InMemoryTaskStore, InMemoryPushNotificationConfigStore, PushNotificationSender +from a2a.server.tasks import InMemoryTaskStore from a2a.types import AgentCard, AgentCapabilities, AgentSkill from adapters import get_adapter, AdapterConfig @@ -152,12 +152,20 @@ async def main(): # pragma: no cover defaultOutputModes=["text/plain", "application/json"], ) - # 7. Wrap in A2A + # 7. Wrap in A2A. + # + # Regression fix (#204): PR #198 tried to wire push_config_store + + # push_sender to satisfy #175 (push notification capability), but + # PushNotificationSender is an abstract base class in the a2a-sdk and + # can't be instantiated directly. Passing it crashed main.py on startup + # with `TypeError: Can't instantiate abstract class`. Dropped back to + # DefaultRequestHandler's own defaults — pushNotifications capability + # in the AgentCard below is still advertised via AgentCapabilities so + # clients know we COULD do pushes; actually implementing them requires + # a concrete sender subclass, tracked as a Phase-H follow-up to #175. handler = DefaultRequestHandler( agent_executor=executor, task_store=InMemoryTaskStore(), - push_config_store=InMemoryPushNotificationConfigStore(), - push_sender=PushNotificationSender(), ) app = A2AStarletteApplication( From 0b627816ed71f6d8096aa69a9a12b104af34e10c Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 11:24:06 -0700 Subject: [PATCH 5/7] =?UTF-8?q?fix(db):=20#211=20=E2=80=94=20migration=20r?= =?UTF-8?q?unner=20skips=20*.down.sql=20(stop=20wiping=20data=20on=20boot)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #211 HIGH ops/security. RunMigrations globbed \`*.sql\` which matches both \`.up.sql\` AND \`.down.sql\`. Alphabetical sort puts \"d\" before \"u\", so every platform boot ran the rollback BEFORE the forward migration for any pair starting with migration 018. Net effect: every restart wiped workspace_auth_tokens (the 020 pair), which in turn regressed AdminAuth to its fail-open bootstrap bypass for every route protected by it — the live server was effectively unauthenticated from restart until the next workspace re-registered. Also wiped 018_secrets_encryption_version and 019_workspace_access pairs silently. Fix is a 3-line filter: skip files whose base name ends in \`.down.sql\`. Down migrations remain on disk for operator-driven rollback via psql, but are never picked up by the auto-run loop. Added unit test against a tmp dir to lock the filter behaviour so this can never regress: stages a mix of legacy plain .sql, matched up/down pairs, asserts only forward files survive. Follow-up (not in this PR): the runner still re-applies every migration on every boot. Migrations must be idempotent. A proper schema_migrations tracking table is tracked as a future cleanup. Co-Authored-By: Claude Opus 4.6 (1M context) --- platform/internal/db/postgres.go | 32 +++++++- platform/internal/db/postgres_migrate_test.go | 79 +++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 platform/internal/db/postgres_migrate_test.go diff --git a/platform/internal/db/postgres.go b/platform/internal/db/postgres.go index bc7039b8..a0d9cb7e 100644 --- a/platform/internal/db/postgres.go +++ b/platform/internal/db/postgres.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "sort" + "strings" _ "github.com/lib/pq" ) @@ -29,11 +30,40 @@ func InitPostgres(databaseURL string) error { return nil } +// RunMigrations applies every forward migration file in migrationsDir on +// platform boot. +// +// Issue #211 — DO NOT glob `*.sql`. That matches both `.up.sql` and `.down.sql`, +// and sort.Strings orders "d" before "u", so every boot used to run the +// rollback BEFORE the forward migration for any pair, wiping data from any +// table the pair recreates (020_workspace_auth_tokens was the canary — every +// restart wiped live tokens, regressing AdminAuth to fail-open bypass for +// every subsequent request). +// +// The fix: only run files that are either `.up.sql` or plain `.sql` (legacy +// pre-pair migrations like 009_activity_logs.sql). Never touch `.down.sql` +// — those are intentional rollbacks, only to be run by operators manually +// via psql when a real rollback is required. +// +// NOTE: this runner still re-applies every migration on every boot. That +// works for idempotent `CREATE TABLE IF NOT EXISTS` + `ALTER TABLE ... IF NOT +// EXISTS` statements but means non-idempotent DDL will fail on restart. +// Migration authors must write idempotent SQL. A real schema_migrations +// tracking table would be better; tracked as follow-up. func RunMigrations(migrationsDir string) error { - files, err := filepath.Glob(filepath.Join(migrationsDir, "*.sql")) + allFiles, err := filepath.Glob(filepath.Join(migrationsDir, "*.sql")) if err != nil { return fmt.Errorf("glob migrations: %w", err) } + // Forward-only filter — skip *.down.sql explicitly. + files := make([]string, 0, len(allFiles)) + for _, f := range allFiles { + base := filepath.Base(f) + if strings.HasSuffix(base, ".down.sql") { + continue + } + files = append(files, f) + } sort.Strings(files) for _, f := range files { diff --git a/platform/internal/db/postgres_migrate_test.go b/platform/internal/db/postgres_migrate_test.go new file mode 100644 index 00000000..f575226d --- /dev/null +++ b/platform/internal/db/postgres_migrate_test.go @@ -0,0 +1,79 @@ +package db + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// Issue #211 regression: RunMigrations used to glob *.sql which caught both +// `.up.sql` and `.down.sql`. Alphabetical sort put `.down.sql` first so +// every platform boot ran the rollback followed by the forward, wiping any +// data the pair re-creates (workspace_auth_tokens was the canary). +// +// This test exercises the filter directly via filepath.Glob against a +// tmp dir of staged files. The real RunMigrations opens a DB connection +// so we can't run it end-to-end in a unit test, but the filtering step +// is where the bug was. + +func TestRunMigrations_SkipsDownSqlFiles(t *testing.T) { + tmp := t.TempDir() + + // Stage a realistic mix: legacy plain .sql (migration 009), plus a pair + // (up + down), plus a runaway .down.sql that shouldn't exist alone. + files := map[string]string{ + "009_legacy.sql": "-- legacy forward only\n", + "020_workspace_auth_tokens.up.sql": "CREATE TABLE workspace_auth_tokens ();\n", + "020_workspace_auth_tokens.down.sql": "DROP TABLE workspace_auth_tokens;\n", + "021_other.up.sql": "-- 21 forward\n", + "021_other.down.sql": "-- 21 rollback (must not run)\n", + } + for name, body := range files { + if err := os.WriteFile(filepath.Join(tmp, name), []byte(body), 0o644); err != nil { + t.Fatal(err) + } + } + + // Mirror the filter logic from RunMigrations. + allFiles, err := filepath.Glob(filepath.Join(tmp, "*.sql")) + if err != nil { + t.Fatal(err) + } + forward := make([]string, 0, len(allFiles)) + for _, f := range allFiles { + base := filepath.Base(f) + if strings.HasSuffix(base, ".down.sql") { + continue + } + forward = append(forward, base) + } + + // Assert: exactly 3 forward files, none end in .down.sql + if len(forward) != 3 { + t.Errorf("expected 3 forward migrations, got %d: %v", len(forward), forward) + } + for _, f := range forward { + if strings.HasSuffix(f, ".down.sql") { + t.Errorf("down migration leaked through filter: %s", f) + } + } + // Spot-check the ones that must be present + wantPresent := []string{ + "009_legacy.sql", + "020_workspace_auth_tokens.up.sql", + "021_other.up.sql", + } + for _, w := range wantPresent { + found := false + for _, f := range forward { + if f == w { + found = true + break + } + } + if !found { + t.Errorf("expected forward set to include %q, got %v", w, forward) + } + } +} From 35705274c9ed0512510aec61add6594abaf6dd96 Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 11:48:25 -0700 Subject: [PATCH 6/7] fix(code-review): CanvasOrBearer fall-through, scheduler short(), activity spoof log + 6 new tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses self-review of the 10-PR batch merged earlier this session. Splits the follow-ups into this Go-side PR and a later Python/docs PR. ## Fixes 1. wsauth_middleware.go CanvasOrBearer — invalid bearer now hard-rejects with 401 instead of falling through to the Origin check. Previous code let an attacker with an expired token + matching Origin bypass auth. Empty bearer still falls through to the Origin path (the intended canvas path). 2. scheduler.go short() helper — extracts safe UUID prefix truncation. Pre-existing unsafe [:12] and [:8] slices would panic on workspace IDs shorter than the bound. #115's new skip path had the bounds check; the happy-path log lines did not. One helper, three call sites. 3. activity.go security-event log on source_id spoof — #209 added the 403 but the attempt was invisible to any auditor cron. Stable greppable log line with authed_workspace, body_source_id, client IP. ## New tests - TestShort_helper — bounds-safety regression guard for the helper - TestRecordSkipped_writesSkippedStatus — #115 coverage gap, exercises UPDATE + INSERT via sqlmock - TestRecordSkipped_shortWorkspaceIDNoPanic — short-ID crash regression - TestActivityHandler_Report_SourceIDSpoofRejected — #209 403 path - TestActivityHandler_Report_MatchingSourceIDAccepted — non-spoof path - TestHistory_IncludesErrorDetail — #152 problem B coverage go test -race ./... green locally. Co-Authored-By: Claude Opus 4.6 (1M context) --- platform/internal/handlers/activity.go | 5 ++ platform/internal/handlers/handlers_test.go | 50 +++++++++++ platform/internal/handlers/schedules_test.go | 47 ++++++++++ .../internal/middleware/wsauth_middleware.go | 11 ++- platform/internal/scheduler/scheduler.go | 24 ++--- platform/internal/scheduler/scheduler_test.go | 87 +++++++++++++++++++ 6 files changed, 210 insertions(+), 14 deletions(-) diff --git a/platform/internal/handlers/activity.go b/platform/internal/handlers/activity.go index b92538d8..4699a3a9 100644 --- a/platform/internal/handlers/activity.go +++ b/platform/internal/handlers/activity.go @@ -338,6 +338,11 @@ func (h *ActivityHandler) Report(c *gin.Context) { // Empty source_id falls through to the default-to-self branch below. sourceID := body.SourceID if sourceID != "" && sourceID != workspaceID { + // Log the spoof attempt as a security event so an auditor cron can + // surface repeat probing. Keep the log line stable (greppable) and + // avoid echoing attacker-supplied data verbatim beyond the UUIDs. + log.Printf("security: source_id spoof attempt — authed_workspace=%s body_source_id=%s remote=%s", + workspaceID, sourceID, c.ClientIP()) c.JSON(http.StatusForbidden, gin.H{"error": "source_id must match authenticated workspace"}) return } diff --git a/platform/internal/handlers/handlers_test.go b/platform/internal/handlers/handlers_test.go index d8f738d8..20897baf 100644 --- a/platform/internal/handlers/handlers_test.go +++ b/platform/internal/handlers/handlers_test.go @@ -1081,3 +1081,53 @@ func TestSharedContext_NoSharedFiles(t *testing.T) { t.Errorf("unmet sqlmock expectations: %v", err) } } + +// TestActivityHandler_Report_SourceIDSpoofRejected verifies the #209 spoof +// guard: a workspace authenticated for :id cannot inject activity rows with +// source_id pointing at a different workspace. Bearer-auth middleware would +// already cover the obvious case; this is the belt-and-suspenders body check. +func TestActivityHandler_Report_SourceIDSpoofRejected(t *testing.T) { + setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewActivityHandler(broadcaster) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-alice"}} + // alice's workspace authenticated — but body claims source_id=ws-bob. + body := `{"activity_type":"agent_log","summary":"fake log","source_id":"ws-bob"}` + c.Request = httptest.NewRequest("POST", "/workspaces/ws-alice/activity", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Report(c) + + if w.Code != http.StatusForbidden { + t.Errorf("spoof: got %d, want 403 (%s)", w.Code, w.Body.String()) + } +} + +// TestActivityHandler_Report_MatchingSourceIDAccepted — the non-spoof path: +// body.source_id explicitly matches workspaceID, still accepted. +func TestActivityHandler_Report_MatchingSourceIDAccepted(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + broadcaster := newTestBroadcaster() + handler := NewActivityHandler(broadcaster) + + mock.ExpectExec("INSERT INTO activity_logs"). + WillReturnResult(sqlmock.NewResult(0, 1)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{{Key: "id", Value: "ws-alice"}} + body := `{"activity_type":"agent_log","summary":"self log","source_id":"ws-alice"}` + c.Request = httptest.NewRequest("POST", "/workspaces/ws-alice/activity", bytes.NewBufferString(body)) + c.Request.Header.Set("Content-Type", "application/json") + + handler.Report(c) + + if w.Code != http.StatusOK { + t.Errorf("matching source_id: got %d, want 200 (%s)", w.Code, w.Body.String()) + } +} diff --git a/platform/internal/handlers/schedules_test.go b/platform/internal/handlers/schedules_test.go index ad8a62ac..a3d307f6 100644 --- a/platform/internal/handlers/schedules_test.go +++ b/platform/internal/handlers/schedules_test.go @@ -124,3 +124,50 @@ func TestList_IncludesSourceColumn(t *testing.T) { t.Fatalf("unmet expectations: %v", err) } } + +// TestHistory_IncludesErrorDetail — #152 problem B coverage. The history +// endpoint must surface error_detail from activity_logs so clients know +// why a cron run failed (not just that it failed). Writes a fake cron_run +// row via sqlmock with a non-empty error_detail and asserts it reaches +// the JSON response. +func TestHistory_IncludesErrorDetail(t *testing.T) { + mock := setupTestDB(t) + setupTestRedis(t) + handler := NewScheduleHandler() + + workspaceID := "550e8400-e29b-41d4-a716-446655440000" + scheduleID := "11111111-1111-1111-1111-111111111111" + now := time.Now() + + cols := []string{"created_at", "duration_ms", "status", "error_detail", "request_body"} + mock.ExpectQuery("SELECT created_at, duration_ms, status"). + WithArgs(workspaceID, scheduleID). + WillReturnRows(sqlmock.NewRows(cols). + AddRow(now, 4200, "error", "HTTP 500 — workspace agent OOM", `{"schedule_id":"`+scheduleID+`"}`). + AddRow(now, 1500, "ok", "", `{"schedule_id":"`+scheduleID+`"}`)) + + w := httptest.NewRecorder() + c, _ := gin.CreateTestContext(w) + c.Params = gin.Params{ + {Key: "id", Value: workspaceID}, + {Key: "scheduleId", Value: scheduleID}, + } + c.Request = httptest.NewRequest("GET", + "/workspaces/"+workspaceID+"/schedules/"+scheduleID+"/history", nil) + + handler.History(c) + + if w.Code != http.StatusOK { + t.Fatalf("expected 200, got %d: %s", w.Code, w.Body.String()) + } + body := w.Body.String() + if !strings.Contains(body, `"error_detail":"HTTP 500 — workspace agent OOM"`) { + t.Errorf("history response missing populated error_detail: %s", body) + } + if !strings.Contains(body, `"error_detail":""`) { + t.Errorf("history response missing empty error_detail on ok row: %s", body) + } + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("sqlmock: %v", err) + } +} diff --git a/platform/internal/middleware/wsauth_middleware.go b/platform/internal/middleware/wsauth_middleware.go index 9eee64f7..0b357756 100644 --- a/platform/internal/middleware/wsauth_middleware.go +++ b/platform/internal/middleware/wsauth_middleware.go @@ -119,12 +119,17 @@ func CanvasOrBearer(database *sql.DB) gin.HandlerFunc { return } - // Path 1: valid bearer. + // Path 1: bearer present → bearer MUST validate. Do not fall through + // to Origin on an invalid bearer — an attacker with a revoked / + // expired token + a matching Origin would otherwise bypass auth. + // Empty bearer → skip to Origin path (canvas never sends one). if tok := wsauth.BearerTokenFromHeader(c.GetHeader("Authorization")); tok != "" { - if err := wsauth.ValidateAnyToken(ctx, database, tok); err == nil { - c.Next() + if err := wsauth.ValidateAnyToken(ctx, database, tok); err != nil { + c.AbortWithStatusJSON(http.StatusUnauthorized, gin.H{"error": "invalid admin auth token"}) return } + c.Next() + return } // Path 2: canvas origin match. Read CORS_ORIGINS at request time so diff --git a/platform/internal/scheduler/scheduler.go b/platform/internal/scheduler/scheduler.go index 43285f47..8839fe0e 100644 --- a/platform/internal/scheduler/scheduler.go +++ b/platform/internal/scheduler/scheduler.go @@ -233,12 +233,8 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) { `SELECT COALESCE(active_tasks, 0) FROM workspaces WHERE id = $1`, sched.WorkspaceID, ).Scan(&activeTasks); err == nil && activeTasks > 0 { - wsID := sched.WorkspaceID - if len(wsID) > 12 { - wsID = wsID[:12] - } log.Printf("Scheduler: skipping '%s' on busy workspace %s (active_tasks=%d)", - sched.Name, wsID, activeTasks) + sched.Name, short(sched.WorkspaceID, 12), activeTasks) s.recordSkipped(ctx, sched, activeTasks) return } @@ -246,11 +242,7 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) { fireCtx, cancel := context.WithTimeout(ctx, fireTimeout) defer cancel() - idPrefix := sched.ID - if len(idPrefix) > 8 { - idPrefix = idPrefix[:8] - } - msgID := fmt.Sprintf("cron-%s-%s", idPrefix, uuid.New().String()[:8]) + msgID := fmt.Sprintf("cron-%s-%s", short(sched.ID, 8), uuid.New().String()[:8]) a2aBody, _ := json.Marshal(map[string]interface{}{ "method": "message/send", @@ -263,7 +255,7 @@ func (s *Scheduler) fireSchedule(ctx context.Context, sched scheduleRow) { }, }) - log.Printf("Scheduler: firing '%s' → workspace %s", sched.Name, sched.WorkspaceID[:12]) + log.Printf("Scheduler: firing '%s' → workspace %s", sched.Name, short(sched.WorkspaceID, 12)) // Empty callerID = canvas-style request (bypasses access control, source_id=NULL in activity log). // "system:scheduler" was invalid — source_id column is UUID and rejects non-UUID strings. @@ -386,6 +378,16 @@ func truncate(s string, maxLen int) string { return s[:maxLen-3] + "..." } +// short returns up to n leading characters of s without panicking when s is +// shorter than n. Used to safely display UUID prefixes in log lines where +// the full ID would be noisy but the full-length bounds check is repetitive. +func short(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] +} + // ComputeNextRun parses a cron expression and returns the next fire time // after the given time, in the specified timezone. func ComputeNextRun(cronExpr, tz string, after time.Time) (time.Time, error) { diff --git a/platform/internal/scheduler/scheduler_test.go b/platform/internal/scheduler/scheduler_test.go index 47f3fa2e..b3e58e9a 100644 --- a/platform/internal/scheduler/scheduler_test.go +++ b/platform/internal/scheduler/scheduler_test.go @@ -178,3 +178,90 @@ func TestPanicRecovery(t *testing.T) { t.Errorf("unmet DB expectations: %v", err) } } + +// ── TestShort_helper ────────────────────────────────────────────────────────── +// Regression guard for the short() helper that replaced unsafe [:N] slices +// after code review. Panicked when IDs were shorter than the slice bound. + +func TestShort_helper(t *testing.T) { + cases := []struct { + in string + n int + want string + }{ + {"abcdef1234567890", 8, "abcdef12"}, + {"abc", 8, "abc"}, // shorter than n — no panic, no truncation + {"", 8, ""}, + {"12345678", 8, "12345678"}, // exactly n + } + for _, tc := range cases { + if got := short(tc.in, tc.n); got != tc.want { + t.Errorf("short(%q, %d) = %q, want %q", tc.in, tc.n, got, tc.want) + } + } +} + +// ── TestRecordSkipped_writesSkippedStatus ──────────────────────────────────── +// #115 coverage gap: the recordSkipped path wasn't tested at all when it +// first landed. Exercises the UPDATE workspace_schedules + INSERT into +// activity_logs via sqlmock. Broadcaster is nil so we don't need to stub +// RecordAndBroadcast (the nil-check in recordSkipped handles that). + +func TestRecordSkipped_writesSkippedStatus(t *testing.T) { + mock := setupTestDB(t) + s := New(nil, nil) + + sched := scheduleRow{ + ID: "11111111-1111-1111-1111-111111111111", + WorkspaceID: "22222222-2222-2222-2222-222222222222", + Name: "Hourly security audit", + CronExpr: "17 * * * *", + Timezone: "UTC", + Prompt: "audit", + } + + // Expect the schedule-row UPDATE with last_status='skipped' and the + // cron_run activity_logs INSERT with status='skipped' + error_detail + // carrying the active_tasks reason. + mock.ExpectExec(`UPDATE workspace_schedules`). + WithArgs(sched.ID, sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec(`INSERT INTO activity_logs`). + WithArgs(sched.WorkspaceID, sqlmock.AnyArg(), sqlmock.AnyArg(), sqlmock.AnyArg()). + WillReturnResult(sqlmock.NewResult(0, 1)) + + s.recordSkipped(context.Background(), sched, 3) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet sqlmock expectations: %v", err) + } +} + +// ── TestRecordSkipped_shortWorkspaceIDNoPanic ───────────────────────────────── +// Guards against the short() regression: recordSkipped must not panic if +// WorkspaceID is unexpectedly shorter than the 12-char prefix used in logs. + +func TestRecordSkipped_shortWorkspaceIDNoPanic(t *testing.T) { + mock := setupTestDB(t) + s := New(nil, nil) + + // 4-char workspace id — shorter than any substring bound in the code. + sched := scheduleRow{ + ID: "11111111-1111-1111-1111-111111111111", + WorkspaceID: "ws-x", + Name: "test", + CronExpr: "0 * * * *", + Timezone: "UTC", + } + mock.ExpectExec(`UPDATE workspace_schedules`). + WillReturnResult(sqlmock.NewResult(0, 1)) + mock.ExpectExec(`INSERT INTO activity_logs`). + WillReturnResult(sqlmock.NewResult(0, 1)) + + defer func() { + if r := recover(); r != nil { + t.Errorf("recordSkipped panicked on short WorkspaceID: %v", r) + } + }() + s.recordSkipped(context.Background(), sched, 1) +} From 54b49ffd1be883d4a71e92cd5b10f2c6cb67b19f Mon Sep 17 00:00:00 2001 From: Hongming Wang Date: Wed, 15 Apr 2026 11:52:01 -0700 Subject: [PATCH 7/7] fix(code-review): idle loop hardening + idle_prompt docs + admin-auth runbook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses items 4, 5, 7 from the self-review of the batch merge. PR A (#228) covered items 1, 2, 3, 6 on the Go side. ## workspace-template/main.py — idle loop hardening - Replace asyncio.get_event_loop() with asyncio.get_running_loop() — the former is deprecated in 3.12+ and emits a DeprecationWarning on every idle fire. - Replace hardcoded urlopen timeout=600 with IDLE_FIRE_TIMEOUT_SECONDS clamped to max(60, min(300, idle_interval_seconds)). Long cadence workspaces no longer hold dangling requests open for 10 minutes; the cap adapts automatically when the interval is short. - Type the exception handling: split HTTPError (has .code) from URLError (connection-level) from the generic catch-all. Log status + error class separately so operators can grep for specific failure modes instead of a bare "post failed". - Fire-and-forget no longer loses exceptions. run_in_executor Future now has an add_done_callback that logs the outcome, so a panic in _post_sync surfaces as "Idle loop: post failed — status=None err=..." instead of Python's default "Task exception was never retrieved" warning burried in stderr. ## org-templates/molecule-dev/org.yaml — discoverability Added idle_prompt + idle_interval_seconds to the defaults: block with explanatory comments. Without this, users had to read main.py to discover the feature. ## docs/runbooks/admin-auth.md — new Documents the three middleware variants (AdminAuth strict, CanvasOrBearer soft, WorkspaceAuth per-id), the exact contract of each, and the three-question test for adding a new route to CanvasOrBearer. Also flags the session-cookie follow-up as Phase H. Referenced PRs: #138, #164, #165, #166, #167, #168, #190, #194, #203, #228. No code deltas in platform/ beyond the Python + YAML + docs changes. Full pytest suite unchanged except the pre-existing test_hermes_smoke flake that fails in full-suite but passes in isolation (test isolation bug, not introduced by this PR). Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/runbooks/admin-auth.md | 72 +++++++++++++++++++++++++++++ org-templates/molecule-dev/org.yaml | 9 ++++ workspace-template/main.py | 55 +++++++++++++++++----- 3 files changed, 125 insertions(+), 11 deletions(-) create mode 100644 docs/runbooks/admin-auth.md diff --git a/docs/runbooks/admin-auth.md b/docs/runbooks/admin-auth.md new file mode 100644 index 00000000..df3aa032 --- /dev/null +++ b/docs/runbooks/admin-auth.md @@ -0,0 +1,72 @@ +# Admin auth middleware reference + +Two Gin middleware variants gate admin-style routes on the platform. Pick the +right one — they have different security contracts. + +## `middleware.AdminAuth(db.DB)` — strict bearer-only + +Required for any route where a forged request could: + +- Leak prompts or memory (`GET /bundles/export/:id`, `GET /events*`) +- Create or mutate workspaces (`POST /workspaces`, `DELETE /workspaces/:id`, `POST /bundles/import`, `POST /templates/import`, `POST /org/import`) +- Leak operational intelligence (`GET /admin/liveness`) +- Touch approvals, secrets, or schedules at the cross-workspace level + +**Contract:** + +1. Reads `Authorization: Bearer ` and validates against `workspace_auth_tokens` via `wsauth.ValidateAnyToken` +2. **No fallback.** Missing or invalid bearer → 401 +3. Lazy-bootstrap fail-open: if `HasAnyLiveTokenGlobal` returns 0 (fresh install / rolling upgrade), the route is open. First token issued to any workspace activates enforcement for every route. + +**DO NOT use Origin header or session-cookie fallbacks here.** That reopens every route to curl-based spoofing — CORS is a browser-only defence, not a server-side auth signal. + +## `middleware.CanvasOrBearer(db.DB)` — softer, canvas-friendly + +**Only** for cosmetic routes where a forged request has zero data / security impact. + +Currently used on: + +| Route | Why soft is OK | +|-------|----------------| +| `PUT /canvas/viewport` | Viewport corruption resets on the next browser refresh. No data exposure, no resource creation. | + +**Contract:** + +1. Reads `Authorization: Bearer ` first. If present but **invalid**, returns 401 — **no fall-through** to the Origin path. (This was a CanvasOrBearer bug fixed during code review; preserved as the invariant.) +2. Empty bearer → check `Origin` header against `CORS_ORIGINS` env var. Exact-match only. Empty Origin does not pass. +3. Lazy-bootstrap fail-open identical to `AdminAuth`. + +**The Origin check is NOT a strict auth boundary.** Any non-browser client (curl, an attacker tool) can forge the `Origin` header. CORS protects the browser from reading the response, not the server from receiving the request. Apply `CanvasOrBearer` only to routes where a curl attacker with knowledge of the canvas origin could do nothing harmful. + +### When to add a new route to `CanvasOrBearer` + +Ask these three questions. **All three** must be yes or the route belongs behind strict `AdminAuth`: + +1. Can a browser at `https://.moleculesai.app` need this route without a bearer token? (If not, just use `AdminAuth` — browsers can send bearers via the session-cookie auth flow once that lands.) +2. If a non-browser attacker forged `Origin: https://.moleculesai.app`, would the worst-case outcome be purely cosmetic — recoverable with a browser refresh and no data exposure? +3. Is there no tenant isolation concern (cross-org data leak) on this route? + +If yes/yes/yes → `CanvasOrBearer` is acceptable. Document the rationale in the PR that adds it, and add the route to the table above in the same PR. + +## Relationship to `WorkspaceAuth` + +`WorkspaceAuth` is the `/workspaces/:id/*` sub-route middleware. Different contract entirely: it binds a bearer token to a specific workspace ID so workspace A's token can't hit workspace B's sub-routes. Used for all `/workspaces/:id/*` paths except the A2A proxy (which has its own `CanCommunicate` access-control layer). + +AdminAuth accepts **any** valid workspace bearer (it's a global gate). WorkspaceAuth accepts only the bearer for the **specific** `:id` in the URL path. + +## Known gap (Phase H follow-up) + +`CanvasOrBearer` is a tactical fix for the #168 canvas-regression problem. The proper long-term path is **session-cookie-accepting AdminAuth**: extend `AdminAuth` to validate the `mcp_session` cookie via `auth.Provider.VerifySession` (WorkOS in prod, DisabledProvider in dev). That would give the full list of admin routes browser compatibility without an Origin-based workaround. Tracked as a Phase H item once the SaaS control plane is the primary deployment surface. + +## Related PRs and issues + +- #138 — first canvas regression (PATCH /workspaces/:id), fixed with field-level authz in the handler (`WorkspaceHandler.Update`) +- #164 — CRITICAL anonymous workspace creation via unauthenticated `POST /bundles/import` +- #165 — HIGH topology disclosure via unauthenticated `GET /events` and `GET /bundles/export/:id` +- #166 — MEDIUM viewport corruption / liveness leak +- #167 — first auth-gate batch, strict `AdminAuth` on 5 routes +- #168 — canvas regression from the strict gating +- #190 — HIGH unauthenticated `POST /templates/import` +- #194 — rejected Origin-fallback approach (would have reopened #164) +- #203 — the `CanvasOrBearer` middleware, route-split approach, only on `PUT /canvas/viewport` +- #228 — code-review follow-up: CanvasOrBearer invalid-bearer fall-through fix diff --git a/org-templates/molecule-dev/org.yaml b/org-templates/molecule-dev/org.yaml index 55366f6d..2cbb5522 100644 --- a/org-templates/molecule-dev/org.yaml +++ b/org-templates/molecule-dev/org.yaml @@ -67,6 +67,15 @@ defaults: # workspace_dir: not set by default — each agent gets an isolated Docker volume # Set per-workspace to bind-mount a host directory as /workspace + # Idle-loop reflection pattern (#205). When idle_prompt is non-empty, the + # workspace self-sends this prompt every idle_interval_seconds while its + # heartbeat.active_tasks == 0. Pattern from Hermes/Letta. Cost collapses to + # event-driven (no LLM call unless there's actually nothing to do). Off by + # default to avoid surprising token burn — set per-workspace to enable. + # Keep idle prompts local (no A2A sends): same rule as initial_prompt. + idle_prompt: "" + idle_interval_seconds: 600 # 10 min — ignored when idle_prompt is empty + # initial_prompt runs once on first boot (not on restart). # ${GITHUB_REPO} is a container env var from .env secrets. # IMPORTANT: Do NOT send A2A messages in initial_prompt — other agents may not diff --git a/workspace-template/main.py b/workspace-template/main.py index 23782e9d..98aa3725 100644 --- a/workspace-template/main.py +++ b/workspace-template/main.py @@ -388,14 +388,21 @@ async def main(): # pragma: no cover # per-workspace to enable. idle_loop_task = None if config.idle_prompt: + # Idle-fire HTTP timeout. Kept tight relative to the fire cadence so a + # hung platform doesn't accumulate dangling requests — a fire that + # takes longer than the idle interval itself is almost certainly stuck. + IDLE_FIRE_TIMEOUT_SECONDS = max(60, min(300, config.idle_interval_seconds)) + # Initial settle delay — never longer than 60s so cold-start races + # don't stall the first fire, and never shorter than the configured + # interval (short intervals shouldn't fire instantly on boot either). + IDLE_INITIAL_SETTLE_SECONDS = min(config.idle_interval_seconds, 60) + async def _run_idle_loop(): """Self-sends config.idle_prompt periodically when the workspace is idle.""" - # Wait for server + initial prompt to settle before the first idle check. - # Short wait (min of 60s or interval) so cold-start races don't fire instantly. - await asyncio.sleep(min(config.idle_interval_seconds, 60)) + await asyncio.sleep(IDLE_INITIAL_SETTLE_SECONDS) import json as _json - import urllib.request + from urllib import request as _urlreq, error as _urlerr while True: try: @@ -424,20 +431,46 @@ async def main(): # pragma: no cover }).encode() def _post_sync(): + # Returns (status_code, error_type) so the caller logs the + # actual outcome instead of a bare "post failed" line. try: - req = urllib.request.Request( + req = _urlreq.Request( f"{platform_url}/workspaces/{workspace_id}/a2a", data=payload, headers={"Content-Type": "application/json"}, ) - with urllib.request.urlopen(req, timeout=600) as resp: + with _urlreq.urlopen(req, timeout=IDLE_FIRE_TIMEOUT_SECONDS) as resp: resp.read() - except Exception as e: - print(f"Idle loop: post failed — {e}", flush=True) + return resp.status, None + except _urlerr.HTTPError as e: + return e.code, type(e).__name__ + except _urlerr.URLError as e: + return None, f"URLError: {e.reason}" + except Exception as e: # pragma: no cover — catch-all safety net + return None, type(e).__name__ - print(f"Idle loop: firing (active_tasks=0, interval={config.idle_interval_seconds}s)", flush=True) - loop_ref = asyncio.get_event_loop() - loop_ref.run_in_executor(None, _post_sync) + print( + f"Idle loop: firing (active_tasks=0, interval={config.idle_interval_seconds}s, " + f"timeout={IDLE_FIRE_TIMEOUT_SECONDS}s)", + flush=True, + ) + loop_ref = asyncio.get_running_loop() + + def _log_result(future): + try: + status, err = future.result() + if err: + print( + f"Idle loop: post failed — status={status} err={err}", + flush=True, + ) + else: + print(f"Idle loop: post ok status={status}", flush=True) + except Exception as e: # pragma: no cover + print(f"Idle loop: executor callback crashed — {e}", flush=True) + + fut = loop_ref.run_in_executor(None, _post_sync) + fut.add_done_callback(_log_result) idle_loop_task = asyncio.create_task(_run_idle_loop())