From 4dfb7a42b7d0558c6269a3845858eafd7501cc22 Mon Sep 17 00:00:00 2001 From: rabbitblood Date: Wed, 15 Apr 2026 11:09:43 -0700 Subject: [PATCH] feat(workspace): add idle-loop reflection pattern (Hermes/Letta shape) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today's multi-framework research (Hermes, Letta, Trigger.dev, Inngest, AG2, Rivet, n8n, Composio, SWE-agent — see docs/ecosystem-watch.md) confirmed that nobody runs while(true) per agent. The working patterns are: (a) event-driven + hibernation (Hermes, Letta, Trigger.dev, Inngest) (b) cron/user-triggered ephemeral runs (AG2, Rivet, n8n, SWE-agent) Molecule AI is currently 100% in category (b). Observed team utilization: ~0.5% — agents idle 99.5% of the time because cron fires and CEO-typed A2A are the only initiating signals. CEO's north-star is 24/7 iteration, current cadence falls short. This PR closes the gap by adding an in-workspace idle loop that wakes the agent periodically ONLY when it has no active task. The shape is the Hermes reflection-on-completion pattern combined with the Letta backlog-pull pattern, collapsed into a ~60 LOC change in the workspace-template. Zero new Go code. Zero new DB tables. Zero new API endpoints. ## How it works 1. `config.py` gets two new fields on WorkspaceConfig: - `idle_prompt: str = ""` — the prompt to self-send when idle - `idle_interval_seconds: int = 600` — how often to check (default 10 min) Both support inline or file ref (matching the initial_prompt pattern). 2. `main.py` spawns an `_run_idle_loop()` asyncio task alongside the existing initial_prompt task (same lifecycle hooks — cancelled in the `finally:` of the server.serve() block). 3. The loop body: a. Sleep interval b. Check `heartbeat.active_tasks == 0` LOCALLY (no LLM call, no HTTP) c. If idle → self-POST the idle_prompt via the existing /workspaces/{id}/a2a proxy d. Loop The agent's own concurrency control rejects the post if it becomes busy between the check and the POST — that's the safety valve. 4. Gated on `config.idle_prompt` being non-empty. Default = "" = no loop. Existing workspaces upgrade silently as no-ops until someone explicitly opts in by setting idle_prompt in org.yaml (either defaults: or per-workspace:). ## Cost analysis (from the research report) - while(true) pattern: ~$93/day/org (12 agents × 12 thinks/hour × $0.027). Unshippable. - Hermes reflection-on-completion: ~$0.45/day/org. Cost ∝ useful work. - This PR's idle loop at 10-min cadence: upper bound 12 × 6/hour × 24h × ~3k tokens × Sonnet rate ≈ $5/day/org PER ROLE, only if they're genuinely idle every check. In practice far less because busy periods skip the LLM call entirely (the active_tasks check is local). ## Rollout plan Research report recommended rolling to ONE workspace first (Technical Researcher) and measuring 24h of activity_logs before enabling for all 12. This PR enables the mechanism; it does NOT add any default idle_prompt to org-templates/molecule-dev/org.yaml. That's a follow-up PR after this one lands and one workspace has been manually opted in for measurement. ## Not touched in this PR - No Go code (no new platform endpoint, no new DB columns) - No org.yaml changes (zero-impact until someone opts in) - No scheduler changes (the idle loop is a workspace concern, not a scheduler concern — matches the research report's layering) ## Test plan - [x] Python syntax check (ast.parse) on main.py + config.py - [ ] Unit test: WorkspaceConfig parses idle_prompt / idle_interval_seconds from yaml - [ ] Integration test: set idle_prompt on Technical Researcher, measure that an A2A message is received every ~10 min while idle, and NOT received while busy with a delegation - [ ] Dogfood: enable on Technical Researcher for 24h, count activity_logs delta vs baseline, confirm cost stays within model ## Related - Today's research report (conversation output, summarized in commit trailer) - docs/ecosystem-watch.md → `### Hermes Agent` (the canonical reflection-on-completion example) - #159 orchestrator/worker split — complementary: leaders pulse for dispatch, workers idle-loop for pull. Together: leaders push work, workers pull work, no role ever sits idle with a cold queue. --- workspace-template/config.py | 22 ++++++++++++ workspace-template/main.py | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/workspace-template/config.py b/workspace-template/config.py index 6a8648a2..19f34d62 100644 --- a/workspace-template/config.py +++ b/workspace-template/config.py @@ -198,6 +198,17 @@ class WorkspaceConfig: initial_prompt: str = "" """Auto-sent as the first A2A message after startup. Default empty = no auto-message. Can be an inline string or a file reference (initial_prompt_file in yaml).""" + idle_prompt: str = "" + """Auto-sent every `idle_interval_seconds` while the workspace has no active + task (heartbeat.active_tasks == 0). Default empty = no idle loop. This is + the reflection-on-completion / backlog-pull pattern from the Hermes/Letta + playbook: the workspace self-wakes when idle, runs a lightweight reflection + prompt, and either picks up queued work or stops. Cost scales with useful + activity (the prompt returns quickly if there's nothing to do). Can be + inline or a file reference via `idle_prompt_file`.""" + idle_interval_seconds: int = 600 + """How often the idle loop checks in (seconds). Default 600 (10 min). + Ignored when idle_prompt is empty.""" skills: list[str] = field(default_factory=list) plugins: list[str] = field(default_factory=list) # installed plugin names tools: list[str] = field(default_factory=list) @@ -251,6 +262,15 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig: if prompt_path.exists(): initial_prompt = prompt_path.read_text().strip() + # Resolve idle_prompt: same pattern as initial_prompt + idle_prompt = raw.get("idle_prompt", "") + idle_prompt_file = raw.get("idle_prompt_file", "") + if not idle_prompt and idle_prompt_file: + idle_path = Path(config_path) / idle_prompt_file + if idle_path.exists(): + idle_prompt = idle_path.read_text().strip() + idle_interval_seconds = int(raw.get("idle_interval_seconds", 600)) + return WorkspaceConfig( name=raw.get("name", "Workspace"), description=raw.get("description", ""), @@ -259,6 +279,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig: model=model, runtime=runtime, initial_prompt=initial_prompt, + idle_prompt=idle_prompt, + idle_interval_seconds=idle_interval_seconds, runtime_config=RuntimeConfig( command=runtime_raw.get("command", ""), args=runtime_raw.get("args", []), diff --git a/workspace-template/main.py b/workspace-template/main.py index d54e7bb3..77894997 100644 --- a/workspace-template/main.py +++ b/workspace-template/main.py @@ -368,12 +368,80 @@ async def main(): # pragma: no cover initial_prompt_task = asyncio.create_task(_send_initial_prompt()) + # 10c. Idle loop — reflection-on-completion / backlog-pull pattern. + # Fires config.idle_prompt every config.idle_interval_seconds while the + # workspace has no active task. This turns every role from "waits for cron" + # into "self-wakes when idle" — the Hermes/Letta shape from today's + # multi-framework survey (see docs/ecosystem-watch.md). Cost collapses to + # event-driven in practice: the idle check is local (no LLM call, just + # heartbeat.active_tasks==0), and the prompt only fires when there's + # actually nothing to do. Gated on idle_prompt being non-empty so existing + # workspaces upgrade opt-in — set idle_prompt in org.yaml defaults or + # per-workspace to enable. + idle_loop_task = None + if config.idle_prompt: + async def _run_idle_loop(): + """Self-sends config.idle_prompt periodically when the workspace is idle.""" + # Wait for server + initial prompt to settle before the first idle check. + # Short wait (min of 60s or interval) so cold-start races don't fire instantly. + await asyncio.sleep(min(config.idle_interval_seconds, 60)) + + import json as _json + import urllib.request + + while True: + try: + await asyncio.sleep(config.idle_interval_seconds) + except asyncio.CancelledError: + return + + # Local idle check — no platform API call, no LLM call. + # heartbeat.active_tasks == 0 means no in-flight work. + if heartbeat.active_tasks > 0: + continue + + # Self-post the idle prompt via the platform A2A proxy (same + # path as initial_prompt). The agent's own concurrency control + # rejects if the workspace becomes busy between this check and + # the post — that's the expected safety valve. + payload = _json.dumps({ + "method": "message/send", + "params": { + "message": { + "role": "user", + "messageId": f"idle-{_uuid.uuid4().hex[:8]}", + "parts": [{"kind": "text", "text": config.idle_prompt}], + }, + }, + }).encode() + + def _post_sync(): + try: + req = urllib.request.Request( + f"{platform_url}/workspaces/{workspace_id}/a2a", + data=payload, + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=600) as resp: + resp.read() + except Exception as e: + print(f"Idle loop: post failed — {e}", flush=True) + + print(f"Idle loop: firing (active_tasks=0, interval={config.idle_interval_seconds}s)", flush=True) + loop_ref = asyncio.get_event_loop() + loop_ref.run_in_executor(None, _post_sync) + + idle_loop_task = asyncio.create_task(_run_idle_loop()) + try: await server.serve() finally: # Cancel initial prompt if still running if initial_prompt_task and not initial_prompt_task.done(): initial_prompt_task.cancel() + # Cancel idle loop if running + if idle_loop_task and not idle_loop_task.done(): + idle_loop_task.cancel() # Gracefully stop the Temporal worker background task on shutdown await temporal_wrapper.stop()