Some checks failed
CI / Python Lint & Test (push) Has been cancelled
Block internal-flavored paths / Block forbidden paths (push) Successful in 7s
Secret scan / Scan diff for credential-shaped strings (push) Successful in 6s
CI / Detect changes (push) Successful in 19s
E2E API Smoke Test / detect-changes (push) Successful in 21s
E2E Staging Canvas (Playwright) / detect-changes (push) Successful in 21s
Handlers Postgres Integration / detect-changes (push) Successful in 20s
Runtime PR-Built Compatibility / detect-changes (push) Successful in 20s
CI / Shellcheck (E2E scripts) (push) Successful in 4s
CI / Platform (Go) (push) Successful in 5s
CI / Canvas (Next.js) (push) Successful in 5s
CI / Canvas Deploy Reminder (push) Has been skipped
E2E API Smoke Test / E2E API Smoke Test (push) Successful in 7s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (push) Successful in 7s
Handlers Postgres Integration / Handlers Postgres Integration (push) Successful in 5s
publish-runtime-autobump / autobump-and-tag (push) Failing after 41s
Runtime PR-Built Compatibility / PR-built wheel + import smoke (push) Successful in 1m47s
Co-authored-by: Molecule AI · core-be <core-be@agents.moleculesai.app> Co-committed-by: Molecule AI · core-be <core-be@agents.moleculesai.app>
707 lines
32 KiB
Python
707 lines
32 KiB
Python
"""Heartbeat loop — alive signal + delegation status checker.
|
|
|
|
Every 30 seconds:
|
|
1. Send heartbeat to platform (alive signal with current_task, error_rate)
|
|
2. Check pending delegations — any results back?
|
|
3. Store completed delegation results for the agent to pick up
|
|
|
|
Resilient: recreates HTTP client on failure, auto-restarts on crash.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import httpx
|
|
|
|
from platform_auth import auth_headers, refresh_cache, self_source_headers
|
|
|
|
|
|
def _runtime_state_payload() -> dict:
|
|
"""Build the {runtime_state, sample_error} portion of the heartbeat
|
|
body when SOME adapter executor has marked itself wedged. Returns
|
|
an empty dict when the runtime is healthy so the heartbeat payload
|
|
doesn't grow fields the platform doesn't need.
|
|
|
|
Source of truth is runtime_wedge (lives in molecule-runtime,
|
|
independent of any specific adapter). Pre task #87 this imported
|
|
from claude_sdk_executor — that worked because the executor was
|
|
bundled into molecule-runtime, but blocked moving it to the
|
|
claude-code template repo. The runtime_wedge module is now the
|
|
cross-cutting wedge-state holder; adapters mark/clear via it,
|
|
heartbeat reads it.
|
|
|
|
Imported lazily so a workspace whose runtime image somehow ships
|
|
without runtime_wedge (corrupt install, mid-rolling-deploy state)
|
|
keeps heartbeating — a missing import means "no wedge info; assume
|
|
healthy."
|
|
"""
|
|
try:
|
|
from runtime_wedge import is_wedged, wedge_reason
|
|
except Exception:
|
|
return {}
|
|
if not is_wedged():
|
|
return {}
|
|
return {
|
|
"runtime_state": "wedged",
|
|
# sample_error doubles as the human-readable banner text on the
|
|
# canvas's degraded card — keep it short and actionable.
|
|
"sample_error": wedge_reason(),
|
|
}
|
|
|
|
|
|
def _runtime_metadata_payload() -> dict:
|
|
"""Build the {runtime_metadata} portion of the heartbeat body —
|
|
adapter-declared capabilities + per-capability override values
|
|
(idle timeout, etc.). The platform reads this to route capabilities
|
|
to the right owner: native (adapter) vs fallback (platform).
|
|
|
|
Returns an empty dict if the adapter can't be loaded or introspected.
|
|
Heartbeat must NEVER fail because of capability discovery — observability
|
|
is more important than capability accuracy. The platform falls through
|
|
to its own defaults when fields are missing.
|
|
|
|
See project memory `project_runtime_native_pluggable.md` and
|
|
workspace/adapter_base.py:RuntimeCapabilities.
|
|
"""
|
|
try:
|
|
from adapters import get_adapter
|
|
# ADAPTER_MODULE wins over the runtime arg in get_adapter — pass
|
|
# an empty string to force the env-var path.
|
|
adapter_cls = get_adapter("")
|
|
adapter = adapter_cls()
|
|
caps = adapter.capabilities()
|
|
meta: dict = {"capabilities": caps.to_dict()}
|
|
idle = adapter.idle_timeout_override()
|
|
# Only include the override when it's a positive integer. None /
|
|
# zero / negative falls through to the platform's global default
|
|
# (env A2A_IDLE_TIMEOUT_SECONDS, default 5min) — that "absent
|
|
# field = use default" contract is what keeps the wire small.
|
|
if isinstance(idle, int) and idle > 0:
|
|
meta["idle_timeout_seconds"] = idle
|
|
return {"runtime_metadata": meta}
|
|
except Exception as e:
|
|
# debug-level: missing ADAPTER_MODULE in dev / test envs is normal
|
|
logger.debug("runtime_metadata: failed to read adapter caps: %s", e)
|
|
return {}
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _persist_inbound_secret_from_heartbeat(resp) -> None:
|
|
"""Persist ``platform_inbound_secret`` from a heartbeat response, if any.
|
|
|
|
The platform's heartbeat handler (workspace-server PR #2421) returns
|
|
the secret on every beat — mirrors /registry/register so a workspace
|
|
whose secret was lazy-healed on the platform side picks it up within
|
|
one heartbeat tick instead of requiring a runtime restart.
|
|
|
|
Without this delivery path the chat-upload code path's "secret was
|
|
just minted, will pick up on next heartbeat" 503 message is a lie
|
|
and the workspace stays 401-forever until the operator restarts the
|
|
runtime. Caught 2026-04-30 on the hongmingwang tenant — the
|
|
standalone wrapper (mcp_cli.py) got the same change in #2421 but
|
|
the in-container heartbeat (this file) was missed in the first
|
|
pass.
|
|
|
|
Failure is non-fatal: if the body isn't JSON, doesn't carry the
|
|
field, or the disk write fails, the next heartbeat retries. This
|
|
matches the cold-start register flow in main.py:319-323.
|
|
"""
|
|
try:
|
|
body = resp.json()
|
|
except Exception:
|
|
return
|
|
if not isinstance(body, dict):
|
|
return
|
|
secret = body.get("platform_inbound_secret")
|
|
if not secret:
|
|
return
|
|
try:
|
|
from platform_inbound_auth import save_inbound_secret
|
|
|
|
save_inbound_secret(secret)
|
|
except Exception as exc:
|
|
logger.warning(
|
|
"heartbeat: persist inbound secret failed: %s", exc
|
|
)
|
|
|
|
|
|
HEARTBEAT_INTERVAL = 30 # seconds — fallback default when no per-instance value is passed
|
|
MAX_CONSECUTIVE_FAILURES = 10
|
|
MAX_SEEN_DELEGATION_IDS = 200
|
|
SELF_MESSAGE_COOLDOWN = 60 # seconds — minimum between self-messages to prevent loops
|
|
# Shared path — adapter executors (in their template repos) read this
|
|
# same file via executor_helpers.read_delegation_results so heartbeat-
|
|
# delivered async delegation results land in the next agent turn.
|
|
DELEGATION_RESULTS_FILE = os.environ.get("DELEGATION_RESULTS_FILE", "/tmp/delegation_results.jsonl")
|
|
# Cursor file for tracking activity_log IDs processed from the a2a_receive path
|
|
# (delegations fired via tool_delegate_task → POST /workspaces/:id/a2a proxy, not
|
|
# POST /workspaces/:id/delegate). Persisted to disk so heartbeat restarts
|
|
# don't re-process the same rows.
|
|
_ACTIVITY_DELEGATION_CURSOR_FILE = os.environ.get(
|
|
"DELEGATION_ACTIVITY_CURSOR_FILE",
|
|
"/tmp/delegation_activity_cursor",
|
|
)
|
|
|
|
|
|
class HeartbeatLoop:
|
|
def __init__(
|
|
self,
|
|
platform_url: str,
|
|
workspace_id: str,
|
|
interval_seconds: int = HEARTBEAT_INTERVAL,
|
|
):
|
|
self.platform_url = platform_url
|
|
self.workspace_id = workspace_id
|
|
# Per-instance interval — main.py threads ObservabilityConfig.
|
|
# heartbeat_interval_seconds (clamped to [5, 300] at parse time)
|
|
# in here so operators can tune cadence per-workspace via the
|
|
# `observability:` block in config.yaml. Defaults to the
|
|
# legacy module constant so callers that haven't been updated
|
|
# yet (and tests that construct HeartbeatLoop directly with the
|
|
# 2-arg signature) keep their existing 30s behavior.
|
|
self._interval_seconds = interval_seconds
|
|
self.start_time = time.time()
|
|
self.error_count = 0
|
|
self.request_count = 0
|
|
self.active_tasks = 0
|
|
self.current_task = ""
|
|
self.sample_error = ""
|
|
self._task = None
|
|
self._consecutive_failures = 0
|
|
self._seen_delegation_ids: set[str] = set()
|
|
self._last_self_message_time = 0.0
|
|
self._parent_name: str | None = None # Cached after first lookup
|
|
# Seen activity IDs for a2a_receive polling (delegations via POST /a2a proxy path).
|
|
# Loaded lazily from cursor file on first poll to avoid blocking startup.
|
|
self._seen_activity_ids: set[str] = set()
|
|
self._activity_cursor_loaded = False
|
|
|
|
@property
|
|
def error_rate(self) -> float:
|
|
if self.request_count == 0:
|
|
return 0.0
|
|
return self.error_count / self.request_count
|
|
|
|
def record_error(self, error: str):
|
|
self.error_count += 1
|
|
self.request_count += 1
|
|
self.sample_error = error
|
|
|
|
def record_success(self):
|
|
self.request_count += 1
|
|
|
|
def start(self):
|
|
self._task = asyncio.create_task(self._loop())
|
|
self._task.add_done_callback(self._on_done)
|
|
|
|
def _on_done(self, task):
|
|
if not task.cancelled() and task.exception():
|
|
logger.error("Heartbeat loop died: %s — restarting", task.exception())
|
|
self._task = asyncio.create_task(self._loop())
|
|
self._task.add_done_callback(self._on_done)
|
|
|
|
async def stop(self):
|
|
if self._task:
|
|
self._task.cancel()
|
|
try:
|
|
await self._task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
async def _loop(self):
|
|
while True:
|
|
client = None
|
|
try:
|
|
client = httpx.AsyncClient(timeout=10.0)
|
|
while True:
|
|
# 1. Send heartbeat (Phase 30.1: include auth header if token known)
|
|
try:
|
|
body = {
|
|
"workspace_id": self.workspace_id,
|
|
"error_rate": self.error_rate,
|
|
"sample_error": self.sample_error,
|
|
"active_tasks": self.active_tasks,
|
|
"current_task": self.current_task,
|
|
"uptime_seconds": int(time.time() - self.start_time),
|
|
}
|
|
# Layer the runtime-wedge fields on top so a
|
|
# non-empty sample_error from the wedge wins
|
|
# over the (typically empty) heartbeat
|
|
# sample_error field. The platform reads
|
|
# runtime_state to flip status → degraded.
|
|
body.update(_runtime_state_payload())
|
|
body.update(_runtime_metadata_payload())
|
|
resp = await client.post(
|
|
f"{self.platform_url}/registry/heartbeat",
|
|
json=body,
|
|
headers=auth_headers(),
|
|
)
|
|
self.error_count = 0
|
|
self.request_count = 0
|
|
self._consecutive_failures = 0
|
|
# 2026-04-30: persist the platform_inbound_secret
|
|
# if the heartbeat response carries one. Mirrors
|
|
# the cold-start register flow in main.py:319-323
|
|
# and closes the recovery path for workspaces
|
|
# whose secret was lazy-healed on the platform
|
|
# side after register-time. Without this, the
|
|
# workspace stays 401-forever on chat upload
|
|
# until restart. See workspace-server PR #2421
|
|
# for the server-side delivery change.
|
|
_persist_inbound_secret_from_heartbeat(resp)
|
|
except Exception as e:
|
|
self._consecutive_failures += 1
|
|
# Issue #1877: if heartbeat 401'd, re-read the token from disk
|
|
# and retry once. This handles the platform's token-rotation race
|
|
# where WriteFilesToContainer hasn't finished writing the new
|
|
# token before the runtime boots and caches the old value.
|
|
is_401 = False
|
|
if isinstance(e, httpx.HTTPStatusError) and e.response.status_code == 401:
|
|
is_401 = True
|
|
if is_401:
|
|
logger.warning("Heartbeat 401 for %s — refreshing token cache and retrying once", self.workspace_id)
|
|
refresh_cache()
|
|
try:
|
|
retry_body = {
|
|
"workspace_id": self.workspace_id,
|
|
"error_rate": self.error_rate,
|
|
"sample_error": self.sample_error,
|
|
"active_tasks": self.active_tasks,
|
|
"current_task": self.current_task,
|
|
"uptime_seconds": int(time.time() - self.start_time),
|
|
}
|
|
retry_body.update(_runtime_state_payload())
|
|
retry_resp = await client.post(
|
|
f"{self.platform_url}/registry/heartbeat",
|
|
json=retry_body,
|
|
headers=auth_headers(),
|
|
)
|
|
self._consecutive_failures = 0
|
|
self.request_count += 1
|
|
_persist_inbound_secret_from_heartbeat(retry_resp)
|
|
except Exception:
|
|
# Retry also failed — fall through to the normal
|
|
# failure tracking below.
|
|
pass
|
|
if self._consecutive_failures <= 3 or self._consecutive_failures % MAX_CONSECUTIVE_FAILURES == 0:
|
|
logger.warning("Heartbeat failed (%d consecutive): %s", self._consecutive_failures, e)
|
|
if self._consecutive_failures >= MAX_CONSECUTIVE_FAILURES:
|
|
logger.info("Heartbeat: recreating HTTP client after %d failures", self._consecutive_failures)
|
|
try:
|
|
await client.aclose()
|
|
except Exception:
|
|
pass
|
|
break
|
|
|
|
# 2. Check delegation status
|
|
try:
|
|
await self._check_delegations(client)
|
|
except Exception as e:
|
|
logger.debug("Delegation check failed: %s", e)
|
|
|
|
# 3. Check activity_logs for delegation results that arrived via
|
|
# the POST /a2a proxy path (tool_delegate_task → send_a2a_message).
|
|
# These are NOT written to the delegations table, so
|
|
# _check_delegations misses them. See issue #354.
|
|
try:
|
|
await self._check_activity_delegations(client)
|
|
except Exception as e:
|
|
logger.debug("Activity delegation check failed: %s", e)
|
|
|
|
await asyncio.sleep(self._interval_seconds)
|
|
|
|
except asyncio.CancelledError:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(
|
|
"Heartbeat loop error: %s — retrying in %ds", e, self._interval_seconds
|
|
)
|
|
await asyncio.sleep(self._interval_seconds)
|
|
finally:
|
|
if client:
|
|
try:
|
|
await client.aclose()
|
|
except Exception:
|
|
pass
|
|
|
|
async def _check_delegations(self, client: httpx.AsyncClient):
|
|
"""Check for completed delegations and store results for the agent."""
|
|
try:
|
|
resp = await client.get(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}/delegations",
|
|
headers=auth_headers(),
|
|
)
|
|
if resp.status_code != 200:
|
|
return
|
|
|
|
delegations = resp.json()
|
|
if not isinstance(delegations, list):
|
|
return
|
|
|
|
new_results = []
|
|
for d in delegations:
|
|
did = d.get("delegation_id", "")
|
|
status = d.get("status", "")
|
|
|
|
if not did or did in self._seen_delegation_ids:
|
|
continue
|
|
|
|
if status in ("completed", "failed"):
|
|
# Fix B (Cycle 5): validate source_id before accepting delegation
|
|
# results. Only process delegations that THIS workspace created
|
|
# (source_id == self.workspace_id). Attacker-crafted delegation
|
|
# records with a foreign source_id cannot inject instructions.
|
|
source_id = d.get("source_id", "")
|
|
if source_id != self.workspace_id:
|
|
logger.warning(
|
|
"Heartbeat: skipping delegation %s — source_id %r does not "
|
|
"match this workspace %r; possible injection attempt",
|
|
did, source_id, self.workspace_id,
|
|
)
|
|
self._seen_delegation_ids.add(did) # mark seen so we don't warn again
|
|
continue
|
|
|
|
self._seen_delegation_ids.add(did)
|
|
new_results.append({
|
|
"delegation_id": did,
|
|
"target_id": d.get("target_id", ""),
|
|
"source_id": source_id,
|
|
"status": status,
|
|
"summary": d.get("summary", ""),
|
|
"response_preview": d.get("response_preview", ""),
|
|
"error": d.get("error", ""),
|
|
"timestamp": time.time(),
|
|
})
|
|
|
|
# Evict old seen IDs if over limit
|
|
if len(self._seen_delegation_ids) > MAX_SEEN_DELEGATION_IDS:
|
|
# Keep most recent half
|
|
self._seen_delegation_ids = set(list(self._seen_delegation_ids)[MAX_SEEN_DELEGATION_IDS // 2:])
|
|
|
|
if new_results:
|
|
# Append to results file for context injection on next message
|
|
with open(DELEGATION_RESULTS_FILE, "a") as f:
|
|
for r in new_results:
|
|
f.write(json.dumps(r) + "\n")
|
|
logger.info("Heartbeat: %d new delegation results — triggering self-message", len(new_results))
|
|
|
|
# Build a summary message for the agent.
|
|
# Fix B (Cycle 5): do NOT embed raw response_preview text in
|
|
# user-role A2A messages — that is the prompt-injection vector.
|
|
# Instead reference only the delegation ID and status; the agent
|
|
# reads full content from DELEGATION_RESULTS_FILE which was
|
|
# written above from trusted platform data.
|
|
summary_lines = []
|
|
for r in new_results:
|
|
line = f"- [{r['status']}] Delegation {r['delegation_id'][:8]}: {r['summary'][:80]}"
|
|
if r.get("error"):
|
|
line += f"\n Error: {r['error'][:100]}"
|
|
summary_lines.append(line)
|
|
|
|
# Look up parent workspace (cached after first call)
|
|
if self._parent_name is None:
|
|
try:
|
|
parent_resp = await client.get(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}",
|
|
headers=auth_headers(),
|
|
)
|
|
if parent_resp.status_code == 200:
|
|
parent_id = parent_resp.json().get("parent_id", "")
|
|
if parent_id:
|
|
parent_info = await client.get(
|
|
f"{self.platform_url}/workspaces/{parent_id}",
|
|
headers=auth_headers(),
|
|
)
|
|
if parent_info.status_code == 200:
|
|
self._parent_name = parent_info.json().get("name", "")
|
|
if self._parent_name is None:
|
|
self._parent_name = "" # No parent — cache empty
|
|
except Exception:
|
|
pass # Will retry next cycle
|
|
parent_name = self._parent_name or ""
|
|
|
|
report_instruction = ""
|
|
if parent_name:
|
|
report_instruction = (
|
|
f"\n\nIMPORTANT: Report these results back to your parent '{parent_name}' "
|
|
f"by delegating a summary to them. Use delegate_task or delegate_task_async "
|
|
f"with a concise status report. Also use send_message_to_user to notify the user."
|
|
)
|
|
else:
|
|
report_instruction = (
|
|
"\n\nReport results using send_message_to_user to notify the user."
|
|
)
|
|
|
|
trigger_msg = (
|
|
"Delegation results are ready. Review them and take appropriate action:\n"
|
|
+ "\n".join(summary_lines)
|
|
+ report_instruction
|
|
)
|
|
|
|
# Send A2A self-message to wake the agent.
|
|
# Minimum 60s between self-messages to avoid spam, but always send
|
|
# when there are genuinely NEW results to process.
|
|
now = time.time()
|
|
if now - self._last_self_message_time < SELF_MESSAGE_COOLDOWN:
|
|
logger.debug("Heartbeat: self-message cooldown (60s), will retry next cycle")
|
|
else:
|
|
self._last_self_message_time = now
|
|
try:
|
|
# self_source_headers() adds X-Workspace-ID so the
|
|
# platform tags this row source=agent, not canvas
|
|
# — see platform_auth.py for the full rationale.
|
|
await client.post(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}/a2a",
|
|
json={
|
|
"method": "message/send",
|
|
"params": {
|
|
"message": {
|
|
"role": "user",
|
|
"parts": [{"type": "text", "text": trigger_msg}],
|
|
},
|
|
},
|
|
},
|
|
headers=self_source_headers(self.workspace_id),
|
|
timeout=120.0,
|
|
)
|
|
logger.info("Heartbeat: self-message sent to process delegation results")
|
|
except Exception as e:
|
|
logger.warning("Heartbeat: failed to send self-message: %s", e)
|
|
|
|
# Also push notification to user via canvas
|
|
for r in new_results:
|
|
try:
|
|
msg = f"Delegation {r['status']}: {r['summary'][:100]}"
|
|
if r.get("response_preview"):
|
|
msg += f"\nResult: {r['response_preview'][:200]}"
|
|
await client.post(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}/notify",
|
|
json={"message": msg, "type": "delegation_result"},
|
|
headers=auth_headers(),
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
except Exception as e:
|
|
logger.debug("Delegation check error: %s", e)
|
|
|
|
async def _check_activity_delegations(self, client: httpx.AsyncClient):
|
|
"""Poll activity_logs for delegation results that arrived via the POST /a2a proxy path.
|
|
|
|
tool_delegate_task → send_a2a_message → POST /workspaces/:id/a2a (proxy)
|
|
logs to activity_logs but NOT the delegations table. _check_delegations
|
|
only checks the delegations table, so these results are invisible to the
|
|
heartbeat — the agent never wakes up to consume them (issue #354).
|
|
|
|
This method closes that gap: polls GET /workspaces/:id/activity?type=a2a_receive,
|
|
filters for rows from peer workspaces (source_id != "" and != self.workspace_id),
|
|
tracks seen IDs with a cursor file, and sends a self-message to wake the agent.
|
|
"""
|
|
try:
|
|
# Load cursor lazily on first call so startup is not blocked by disk I/O.
|
|
if not self._activity_cursor_loaded:
|
|
self._activity_cursor_loaded = True
|
|
try:
|
|
if os.path.exists(_ACTIVITY_DELEGATION_CURSOR_FILE):
|
|
cursor = open(_ACTIVITY_DELEGATION_CURSOR_FILE).read().strip()
|
|
if cursor:
|
|
self._seen_activity_ids = set(cursor.split(","))
|
|
except Exception:
|
|
pass # Corrupt cursor — start fresh
|
|
|
|
params: dict[str, str] = {"type": "a2a_receive"}
|
|
resp = await client.get(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}/activity",
|
|
params=params,
|
|
headers=auth_headers(),
|
|
)
|
|
if resp.status_code != 200:
|
|
return
|
|
|
|
rows = resp.json()
|
|
if not isinstance(rows, list):
|
|
return
|
|
|
|
# Activity API returns newest-first; process in reverse order so
|
|
# we advance the cursor monotonically (oldest → newest).
|
|
rows = list(reversed(rows))
|
|
|
|
new_results: list[dict] = []
|
|
last_id: str | None = None
|
|
for row in rows:
|
|
if not isinstance(row, dict):
|
|
continue
|
|
activity_id = str(row.get("id", ""))
|
|
if not activity_id:
|
|
continue
|
|
last_id = activity_id
|
|
|
|
if activity_id in self._seen_activity_ids:
|
|
continue
|
|
|
|
# Filter: must have a non-empty source_id that is NOT this workspace
|
|
# (peer agent messages only; skip canvas-user messages and self-notify).
|
|
source_id = row.get("source_id") or ""
|
|
if not source_id or source_id == self.workspace_id:
|
|
continue
|
|
|
|
self._seen_activity_ids.add(activity_id)
|
|
summary = row.get("summary") or ""
|
|
# Extract response text from request_body if available.
|
|
# Shape mirrors inbox._extract_text: walk parts for "text" field.
|
|
response_text = summary
|
|
request_body = row.get("request_body")
|
|
if isinstance(request_body, dict):
|
|
params_obj = request_body.get("params")
|
|
if isinstance(params_obj, dict):
|
|
msg = params_obj.get("message")
|
|
if isinstance(msg, dict):
|
|
parts = msg.get("parts") or []
|
|
texts = []
|
|
for p in (parts if isinstance(parts, list) else []):
|
|
if isinstance(p, dict) and p.get("kind") == "text" or p.get("type") == "text":
|
|
t = p.get("text", "")
|
|
if t:
|
|
texts.append(t)
|
|
if texts:
|
|
response_text = " ".join(texts)
|
|
|
|
new_results.append({
|
|
"delegation_id": activity_id, # Use activity ID as pseudo-delegation ID
|
|
"target_id": source_id,
|
|
"source_id": self.workspace_id,
|
|
"status": "completed",
|
|
"summary": summary,
|
|
"response_preview": response_text[:4096],
|
|
"error": "",
|
|
"timestamp": time.time(),
|
|
})
|
|
|
|
if not new_results:
|
|
return
|
|
|
|
# Persist cursor so restarts don't re-process these rows.
|
|
if last_id:
|
|
try:
|
|
with open(_ACTIVITY_DELEGATION_CURSOR_FILE, "w") as f:
|
|
# Keep cursor as comma-joined IDs; truncate if over 100KB.
|
|
cursor_str = ",".join(sorted(self._seen_activity_ids))
|
|
if len(cursor_str) > 102_400:
|
|
# Evict oldest half when cursor file grows too large.
|
|
sorted_ids = sorted(self._seen_activity_ids)
|
|
self._seen_activity_ids = set(sorted_ids[len(sorted_ids) // 2:])
|
|
cursor_str = ",".join(sorted(self._seen_activity_ids))
|
|
f.write(cursor_str)
|
|
except Exception:
|
|
pass # Non-fatal; next cycle will retry
|
|
|
|
# Append to results file and trigger self-message (mirrors _check_delegations).
|
|
with open(DELEGATION_RESULTS_FILE, "a") as f:
|
|
for r in new_results:
|
|
f.write(json.dumps(r) + "\n")
|
|
logger.info(
|
|
"Heartbeat: %d new a2a_receive delegation results from activity_logs — "
|
|
"triggering self-message",
|
|
len(new_results),
|
|
)
|
|
|
|
# Build and send self-message to wake the agent.
|
|
summary_lines = []
|
|
for r in new_results:
|
|
line = f"- [completed] Peer response from {r['target_id'][:8]}: {r['summary'][:80] or '(no summary)'}"
|
|
if r.get("error"):
|
|
line += f"\n Error: {r['error'][:100]}"
|
|
summary_lines.append(line)
|
|
|
|
# Look up parent name (reuse cached value from _check_delegations if set).
|
|
if self._parent_name is None:
|
|
try:
|
|
parent_resp = await client.get(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}",
|
|
headers=auth_headers(),
|
|
)
|
|
if parent_resp.status_code == 200:
|
|
parent_id = parent_resp.json().get("parent_id", "")
|
|
if parent_id:
|
|
parent_info = await client.get(
|
|
f"{self.platform_url}/workspaces/{parent_id}",
|
|
headers=auth_headers(),
|
|
)
|
|
if parent_info.status_code == 200:
|
|
self._parent_name = parent_info.json().get("name", "")
|
|
if self._parent_name is None:
|
|
self._parent_name = ""
|
|
except Exception:
|
|
self._parent_name = ""
|
|
parent_name = self._parent_name or ""
|
|
|
|
report_instruction = ""
|
|
if parent_name:
|
|
report_instruction = (
|
|
f"\n\nIMPORTANT: Delegate a summary of these results to your parent "
|
|
f"'{parent_name}' using delegate_task. Also use send_message_to_user "
|
|
f"to notify the user."
|
|
)
|
|
else:
|
|
report_instruction = (
|
|
"\n\nReport results using send_message_to_user to notify the user."
|
|
)
|
|
|
|
trigger_msg = (
|
|
"Delegation results are ready (from a2a_receive via activity_logs). "
|
|
"Review them and take appropriate action:\n"
|
|
+ "\n".join(summary_lines)
|
|
+ report_instruction
|
|
)
|
|
|
|
now = time.time()
|
|
if now - self._last_self_message_time < SELF_MESSAGE_COOLDOWN:
|
|
logger.debug(
|
|
"Heartbeat: self-message cooldown active; "
|
|
"a2a_receive results will be retried next cycle"
|
|
)
|
|
else:
|
|
self._last_self_message_time = now
|
|
try:
|
|
await client.post(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}/a2a",
|
|
json={
|
|
"method": "message/send",
|
|
"params": {
|
|
"message": {
|
|
"role": "user",
|
|
"parts": [{"type": "text", "text": trigger_msg}],
|
|
},
|
|
},
|
|
},
|
|
headers=self_source_headers(self.workspace_id),
|
|
timeout=120.0,
|
|
)
|
|
logger.info("Heartbeat: a2a_receive self-message sent")
|
|
except Exception as e:
|
|
logger.warning("Heartbeat: failed to send a2a_receive self-message: %s", e)
|
|
|
|
# Also notify the user via canvas.
|
|
for r in new_results:
|
|
try:
|
|
msg = f"Delegation completed: {r['summary'][:100] or '(no summary)'}"
|
|
preview = r.get("response_preview", "")
|
|
if preview:
|
|
msg += f"\nResult: {preview[:200]}"
|
|
await client.post(
|
|
f"{self.platform_url}/workspaces/{self.workspace_id}/notify",
|
|
json={"message": msg, "type": "delegation_result"},
|
|
headers=auth_headers(),
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
except Exception as e:
|
|
logger.debug("Activity delegation check error: %s", e)
|