From 4dfb7a42b7d0558c6269a3845858eafd7501cc22 Mon Sep 17 00:00:00 2001
From: rabbitblood <hongmingwangrabbit@gmail.com>
Date: Wed, 15 Apr 2026 11:09:43 -0700
Subject: [PATCH] feat(workspace): add idle-loop reflection pattern
 (Hermes/Letta shape)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Today's multi-framework research (Hermes, Letta, Trigger.dev, Inngest, AG2,
Rivet, n8n, Composio, SWE-agent — see docs/ecosystem-watch.md) confirmed
that nobody runs while(true) per agent. The working patterns are:

  (a) event-driven + hibernation (Hermes, Letta, Trigger.dev, Inngest)
  (b) cron/user-triggered ephemeral runs (AG2, Rivet, n8n, SWE-agent)

Molecule AI is currently 100% in category (b). Observed team utilization:
~0.5% — agents idle 99.5% of the time because cron fires and CEO-typed
A2A are the only initiating signals. CEO's north-star is 24/7 iteration,
current cadence falls short.

This PR closes the gap by adding an in-workspace idle loop that wakes the
agent periodically ONLY when it has no active task. The shape is the
Hermes reflection-on-completion pattern combined with the Letta backlog-pull
pattern, collapsed into a ~60 LOC change in the workspace-template. Zero
new Go code. Zero new DB tables. Zero new API endpoints.

## How it works

1. `config.py` gets two new fields on WorkspaceConfig:
   - `idle_prompt: str = ""` — the prompt to self-send when idle
   - `idle_interval_seconds: int = 600` — how often to check (default 10 min)
   Both support inline or file ref (matching the initial_prompt pattern).

2. `main.py` spawns an `_run_idle_loop()` asyncio task alongside the
   existing initial_prompt task (same lifecycle hooks — cancelled in the
   `finally:` of the server.serve() block).

3. The loop body:
   a. Sleep interval
   b. Check `heartbeat.active_tasks == 0` LOCALLY (no LLM call, no HTTP)
   c. If idle → self-POST the idle_prompt via the existing /workspaces/{id}/a2a proxy
   d. Loop
   The agent's own concurrency control rejects the post if it becomes busy
   between the check and the POST — that's the safety valve.

4. Gated on `config.idle_prompt` being non-empty. Default = "" = no loop.
   Existing workspaces upgrade silently as no-ops until someone explicitly
   opts in by setting idle_prompt in org.yaml (either defaults: or
   per-workspace:).

## Cost analysis (from the research report)

- while(true) pattern: ~$93/day/org (12 agents × 12 thinks/hour × $0.027). Unshippable.
- Hermes reflection-on-completion: ~$0.45/day/org. Cost ∝ useful work.
- This PR's idle loop at 10-min cadence: upper bound 12 × 6/hour × 24h
  × ~3k tokens × Sonnet rate ≈ $5/day/org PER ROLE, only if they're
  genuinely idle every check. In practice far less because busy periods
  skip the LLM call entirely (the active_tasks check is local).

## Rollout plan

Research report recommended rolling to ONE workspace first (Technical
Researcher) and measuring 24h of activity_logs before enabling for
all 12. This PR enables the mechanism; it does NOT add any default
idle_prompt to org-templates/molecule-dev/org.yaml. That's a follow-up
PR after this one lands and one workspace has been manually opted in
for measurement.

## Not touched in this PR

- No Go code (no new platform endpoint, no new DB columns)
- No org.yaml changes (zero-impact until someone opts in)
- No scheduler changes (the idle loop is a workspace concern, not a
  scheduler concern — matches the research report's layering)

## Test plan

- [x] Python syntax check (ast.parse) on main.py + config.py
- [ ] Unit test: WorkspaceConfig parses idle_prompt / idle_interval_seconds from yaml
- [ ] Integration test: set idle_prompt on Technical Researcher, measure that
      an A2A message is received every ~10 min while idle, and NOT received
      while busy with a delegation
- [ ] Dogfood: enable on Technical Researcher for 24h, count activity_logs
      delta vs baseline, confirm cost stays within model

## Related

- Today's research report (conversation output, summarized in commit trailer)
- docs/ecosystem-watch.md → `### Hermes Agent` (the canonical reflection-on-completion example)
- #159 orchestrator/worker split — complementary: leaders pulse for dispatch,
  workers idle-loop for pull. Together: leaders push work, workers pull work,
  no role ever sits idle with a cold queue.
---
 workspace-template/config.py | 22 ++++++++++++
 workspace-template/main.py   | 68 ++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/workspace-template/config.py b/workspace-template/config.py
index 6a8648a2..19f34d62 100644
--- a/workspace-template/config.py
+++ b/workspace-template/config.py
@@ -198,6 +198,17 @@ class WorkspaceConfig:
     initial_prompt: str = ""
     """Auto-sent as the first A2A message after startup. Default empty = no auto-message.
     Can be an inline string or a file reference (initial_prompt_file in yaml)."""
+    idle_prompt: str = ""
+    """Auto-sent every `idle_interval_seconds` while the workspace has no active
+    task (heartbeat.active_tasks == 0). Default empty = no idle loop. This is
+    the reflection-on-completion / backlog-pull pattern from the Hermes/Letta
+    playbook: the workspace self-wakes when idle, runs a lightweight reflection
+    prompt, and either picks up queued work or stops. Cost scales with useful
+    activity (the prompt returns quickly if there's nothing to do). Can be
+    inline or a file reference via `idle_prompt_file`."""
+    idle_interval_seconds: int = 600
+    """How often the idle loop checks in (seconds). Default 600 (10 min).
+    Ignored when idle_prompt is empty."""
     skills: list[str] = field(default_factory=list)
     plugins: list[str] = field(default_factory=list)  # installed plugin names
     tools: list[str] = field(default_factory=list)
@@ -251,6 +262,15 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
         if prompt_path.exists():
             initial_prompt = prompt_path.read_text().strip()
 
+    # Resolve idle_prompt: same pattern as initial_prompt
+    idle_prompt = raw.get("idle_prompt", "")
+    idle_prompt_file = raw.get("idle_prompt_file", "")
+    if not idle_prompt and idle_prompt_file:
+        idle_path = Path(config_path) / idle_prompt_file
+        if idle_path.exists():
+            idle_prompt = idle_path.read_text().strip()
+    idle_interval_seconds = int(raw.get("idle_interval_seconds", 600))
+
     return WorkspaceConfig(
         name=raw.get("name", "Workspace"),
         description=raw.get("description", ""),
@@ -259,6 +279,8 @@ def load_config(config_path: Optional[str] = None) -> WorkspaceConfig:
         model=model,
         runtime=runtime,
         initial_prompt=initial_prompt,
+        idle_prompt=idle_prompt,
+        idle_interval_seconds=idle_interval_seconds,
         runtime_config=RuntimeConfig(
             command=runtime_raw.get("command", ""),
             args=runtime_raw.get("args", []),
diff --git a/workspace-template/main.py b/workspace-template/main.py
index d54e7bb3..77894997 100644
--- a/workspace-template/main.py
+++ b/workspace-template/main.py
@@ -368,12 +368,80 @@ async def main():  # pragma: no cover
 
         initial_prompt_task = asyncio.create_task(_send_initial_prompt())
 
+    # 10c. Idle loop — reflection-on-completion / backlog-pull pattern.
+    # Fires config.idle_prompt every config.idle_interval_seconds while the
+    # workspace has no active task. This turns every role from "waits for cron"
+    # into "self-wakes when idle" — the Hermes/Letta shape from today's
+    # multi-framework survey (see docs/ecosystem-watch.md). Cost collapses to
+    # event-driven in practice: the idle check is local (no LLM call, just
+    # heartbeat.active_tasks==0), and the prompt only fires when there's
+    # actually nothing to do. Gated on idle_prompt being non-empty so existing
+    # workspaces upgrade opt-in — set idle_prompt in org.yaml defaults or
+    # per-workspace to enable.
+    idle_loop_task = None
+    if config.idle_prompt:
+        async def _run_idle_loop():
+            """Self-sends config.idle_prompt periodically when the workspace is idle."""
+            # Wait for server + initial prompt to settle before the first idle check.
+            # Short wait (min of 60s or interval) so cold-start races don't fire instantly.
+            await asyncio.sleep(min(config.idle_interval_seconds, 60))
+
+            import json as _json
+            import urllib.request
+
+            while True:
+                try:
+                    await asyncio.sleep(config.idle_interval_seconds)
+                except asyncio.CancelledError:
+                    return
+
+                # Local idle check — no platform API call, no LLM call.
+                # heartbeat.active_tasks == 0 means no in-flight work.
+                if heartbeat.active_tasks > 0:
+                    continue
+
+                # Self-post the idle prompt via the platform A2A proxy (same
+                # path as initial_prompt). The agent's own concurrency control
+                # rejects if the workspace becomes busy between this check and
+                # the post — that's the expected safety valve.
+                payload = _json.dumps({
+                    "method": "message/send",
+                    "params": {
+                        "message": {
+                            "role": "user",
+                            "messageId": f"idle-{_uuid.uuid4().hex[:8]}",
+                            "parts": [{"kind": "text", "text": config.idle_prompt}],
+                        },
+                    },
+                }).encode()
+
+                def _post_sync():
+                    try:
+                        req = urllib.request.Request(
+                            f"{platform_url}/workspaces/{workspace_id}/a2a",
+                            data=payload,
+                            headers={"Content-Type": "application/json"},
+                        )
+                        with urllib.request.urlopen(req, timeout=600) as resp:
+                            resp.read()
+                    except Exception as e:
+                        print(f"Idle loop: post failed — {e}", flush=True)
+
+                print(f"Idle loop: firing (active_tasks=0, interval={config.idle_interval_seconds}s)", flush=True)
+                loop_ref = asyncio.get_event_loop()
+                loop_ref.run_in_executor(None, _post_sync)
+
+        idle_loop_task = asyncio.create_task(_run_idle_loop())
+
     try:
         await server.serve()
     finally:
         # Cancel initial prompt if still running
         if initial_prompt_task and not initial_prompt_task.done():
             initial_prompt_task.cancel()
+        # Cancel idle loop if running
+        if idle_loop_task and not idle_loop_task.done():
+            idle_loop_task.cancel()
         # Gracefully stop the Temporal worker background task on shutdown
         await temporal_wrapper.stop()